You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
431 lines
15 KiB
431 lines
15 KiB
<?php
|
|
/**
|
|
* TablePress Table Import PHPSpreadsheet Class
|
|
*
|
|
* @package TablePress
|
|
* @subpackage Export/Import
|
|
* @author Tobias Bäthge
|
|
* @since 2.0.0
|
|
*/
|
|
|
|
// Prohibit direct script loading.
|
|
defined( 'ABSPATH' ) || die( 'No direct script access allowed!' );
|
|
|
|
/**
|
|
* TablePress Table Import PHPSpreadsheet Class
|
|
*
|
|
* @package TablePress
|
|
* @subpackage Export/Import
|
|
* @author Tobias Bäthge
|
|
* @since 2.0.0
|
|
*/
|
|
class TablePress_Import_PHPSpreadsheet extends TablePress_Import_Base {
|
|
|
|
/**
|
|
* Initializes the Import class.
|
|
*
|
|
* @since 2.0.0
|
|
*/
|
|
public function __construct() {
|
|
// Load PHPSpreadsheet via the Composer autoloading mechanism.
|
|
TablePress::load_file( 'autoload.php', 'libraries' );
|
|
}
|
|
|
|
/**
|
|
* Imports a table from a file.
|
|
*
|
|
* @since 2.0.0
|
|
*
|
|
* @param array<string, mixed> $file File to import.
|
|
* @return array<string, mixed>|WP_Error Table array on success, WP_Error on error.
|
|
*/
|
|
public function import_table( array $file ) /* : array|WP_Error */ {
|
|
$data = file_get_contents( $file['location'] );
|
|
if ( false === $data ) {
|
|
return new WP_Error( 'table_import_phpspreadsheet_data_read', '', $file['location'] );
|
|
}
|
|
|
|
// Remove a possible UTF-8 Byte-Order Mark (BOM).
|
|
$bom = pack( 'CCC', 0xef, 0xbb, 0xbf );
|
|
if ( str_starts_with( $data, $bom ) ) {
|
|
$data = substr( $data, 3 );
|
|
}
|
|
|
|
if ( '' === $data ) {
|
|
return new WP_Error( 'table_import_phpspreadsheet_data_empty', '', $file['location'] );
|
|
}
|
|
|
|
$table = $this->_maybe_import_json( $data );
|
|
if ( false !== $table ) {
|
|
return $table;
|
|
}
|
|
|
|
$table = $this->_maybe_import_html( $data );
|
|
if ( false !== $table ) {
|
|
return $table;
|
|
}
|
|
|
|
return $this->_import_phpspreadsheet( $file );
|
|
}
|
|
|
|
/**
|
|
* Tries to import a table with the JSON format.
|
|
*
|
|
* @since 2.0.0
|
|
*
|
|
* @param string $data Data to import.
|
|
* @return array<string, mixed>|false Table array on success, false if the file is not a JSON file.
|
|
*/
|
|
protected function _maybe_import_json( string $data ) /* : array|false */ {
|
|
$data = trim( $data );
|
|
|
|
// If the file does not begin / end with [ / ] or { / }, it's not a supported JSON file.
|
|
$first_character = $data[0];
|
|
$last_character = $data[-1];
|
|
if ( ! ( '[' === $first_character && ']' === $last_character ) && ! ( '{' === $first_character && '}' === $last_character ) ) {
|
|
return false;
|
|
}
|
|
|
|
$json_table = json_decode( $data, true );
|
|
|
|
// Check if JSON could be decoded. If not, this is probably not a JSON file.
|
|
if ( is_null( $json_table ) ) {
|
|
return false;
|
|
}
|
|
|
|
// Specifically cast to an array again.
|
|
$json_table = (array) $json_table;
|
|
|
|
if ( isset( $json_table['data'] ) ) {
|
|
// JSON data contained a full export.
|
|
$table = $json_table;
|
|
} else {
|
|
// JSON data contained only the data of a table, but no options.
|
|
$table = array( 'data' => array() );
|
|
foreach ( $json_table as $row ) {
|
|
// Turn row into indexed arrays with numeric keys.
|
|
$row = array_values( (array) $row );
|
|
|
|
// Remove entries of multi-dimensional arrays.
|
|
foreach ( $row as &$cell ) {
|
|
if ( is_array( $cell ) ) {
|
|
$cell = '';
|
|
}
|
|
}
|
|
unset( $cell ); // Unset use-by-reference parameter of foreach loop.
|
|
|
|
$table['data'][] = $row;
|
|
}
|
|
}
|
|
|
|
$this->pad_array_to_max_cols( $table['data'] );
|
|
return $table;
|
|
}
|
|
|
|
/**
|
|
* Tries to import a table with the HTML format.
|
|
*
|
|
* @since 2.0.0
|
|
*
|
|
* @param string $data Data to import.
|
|
* @return array<string, mixed>|false Table array on success, false if the file is not an HTML file.
|
|
*/
|
|
protected function _maybe_import_html( string $data ) /* : array|false */ {
|
|
TablePress::load_file( 'html-parser.class.php', 'libraries' );
|
|
$table = HTML_Parser::parse( $data );
|
|
|
|
// Check if the HTML code could be parsed. If not, this is probably not an HTML file.
|
|
if ( is_wp_error( $table ) ) {
|
|
return false;
|
|
}
|
|
|
|
$this->pad_array_to_max_cols( $table['data'] );
|
|
return $table;
|
|
}
|
|
|
|
/**
|
|
* Tries to import a table via PHPSpreadsheet.
|
|
*
|
|
* @since 2.0.0
|
|
*
|
|
* @param array<string, mixed> $file File to import.
|
|
* @return array<string, mixed>|WP_Error Table array on success, WP_Error on error.
|
|
*/
|
|
protected function _import_phpspreadsheet( array $file ) /* : array|WP_Error */ {
|
|
// Rename the temporary file, as PHPSpreadsheet tries to infer the format from the file's extension.
|
|
if ( '' !== $file['extension'] ) {
|
|
$temp_file = pathinfo( $file['location'] );
|
|
if ( ! isset( $temp_file['extension'] ) || $file['extension'] !== $temp_file['extension'] ) {
|
|
$new_location = "{$temp_file['dirname']}/{$temp_file['filename']}.{$file['extension']}"; // @phpstan-ignore-line
|
|
if ( rename( $file['location'], $new_location ) ) {
|
|
$file['location'] = $new_location;
|
|
}
|
|
}
|
|
}
|
|
|
|
try {
|
|
// Treat all cell values as strings, except for formulas (due to recognition of quoted/escaped formulas like `'=A2`).
|
|
\TablePress\PhpOffice\PhpSpreadsheet\Cell\Cell::setValueBinder( new \TablePress\PhpOffice\PhpSpreadsheet\Cell\StringValueBinder() );
|
|
\TablePress\PhpOffice\PhpSpreadsheet\Cell\Cell::getValueBinder()->setFormulaConversion( false ); // @phpstan-ignore-line
|
|
|
|
/*
|
|
* Try to detect a reader from the file extension and MIME type.
|
|
* Fall back to CSV if no reader could be determined.
|
|
*/
|
|
try {
|
|
$reader = \TablePress\PhpOffice\PhpSpreadsheet\IOFactory::createReaderForFile( $file['location'] );
|
|
} catch ( \TablePress\PhpOffice\PhpSpreadsheet\Reader\Exception $exception ) {
|
|
$reader = \TablePress\PhpOffice\PhpSpreadsheet\IOFactory::createReader( 'Csv' );
|
|
// Append .csv to the file name, so that \TablePress\PhpOffice\PhpSpreadsheet\Reader\Csv::canRead() returns true.
|
|
$new_location = $file['location'] . '.csv';
|
|
if ( rename( $file['location'], $new_location ) ) {
|
|
$file['location'] = $new_location;
|
|
}
|
|
}
|
|
|
|
$class_name = get_class( $reader );
|
|
$class_type = explode( '\\', $class_name );
|
|
$detected_format = strtolower( array_pop( $class_type ) );
|
|
|
|
if ( 'csv' === $detected_format ) {
|
|
$reader->setInputEncoding( \TablePress\PhpOffice\PhpSpreadsheet\Reader\Csv::GUESS_ENCODING ); // @phpstan-ignore-line
|
|
// @phpstan-ignore-next-line
|
|
$reader->setEscapeCharacter( ( PHP_VERSION_ID < 70400 ) ? "\x0" : '' ); // Disable the proprietary escape mechanism of PHP's fgetcsv() in PHP >= 7.4.
|
|
}
|
|
|
|
$reader->setIncludeCharts( false );
|
|
$reader->setReadEmptyCells( true );
|
|
|
|
// For non-Excel files, import only the data, but ignore formatting.
|
|
if ( ! in_array( $detected_format, array( 'xlsx', 'xls' ), true ) ) {
|
|
$reader->setReadDataOnly( true );
|
|
}
|
|
|
|
// For formats where it's supported, import only the first sheet.
|
|
if ( in_array( $detected_format, array( 'csv', 'html', 'slk' ), true ) ) {
|
|
$reader->setSheetIndex( 0 ); // @phpstan-ignore-line
|
|
}
|
|
|
|
$spreadsheet = $reader->load( $file['location'] );
|
|
$worksheet = $spreadsheet->getActiveSheet();
|
|
$cell_collection = $worksheet->getCellCollection();
|
|
$comments = $worksheet->getComments();
|
|
|
|
$table = array(
|
|
'data' => array(),
|
|
);
|
|
|
|
$min_col = 'A';
|
|
$min_row = 1;
|
|
$max_col = $worksheet->getHighestColumn();
|
|
$max_row = $worksheet->getHighestRow();
|
|
|
|
// Adapted from \TablePress\PhpOffice\PhpSpreadsheet\Worksheet\Worksheet::rangeToArray().
|
|
++$max_col; // Due to for-loop with characters for columns.
|
|
for ( $row = $min_row; $row <= $max_row; $row++ ) {
|
|
$row_data = array();
|
|
for ( $col = $min_col; $col !== $max_col; $col++ ) {
|
|
$cell_reference = $col . $row;
|
|
if ( ! $cell_collection->has( $cell_reference ) ) {
|
|
$row_data[] = '';
|
|
continue;
|
|
}
|
|
|
|
$cell = $cell_collection->get( $cell_reference );
|
|
$value = $cell->getValue();
|
|
if ( is_null( $value ) ) {
|
|
$row_data[] = '';
|
|
continue;
|
|
}
|
|
|
|
$cell_has_hyperlink = $worksheet->hyperlinkExists( $cell_reference ) && ! $worksheet->getHyperlink( $cell_reference )->isInternal();
|
|
|
|
if ( $value instanceof \TablePress\PhpOffice\PhpSpreadsheet\RichText\RichText ) {
|
|
$cell_data = $this->parse_rich_text( $value, $cell_has_hyperlink );
|
|
} else {
|
|
$cell_data = (string) $value;
|
|
}
|
|
|
|
// Apply data type formatting.
|
|
$style = $spreadsheet->getCellXfByIndex( $cell->getXfIndex() );
|
|
|
|
$format = $style->getNumberFormat()->getFormatCode() ?? \TablePress\PhpOffice\PhpSpreadsheet\Style\NumberFormat::FORMAT_GENERAL;
|
|
|
|
// Fix floating point precision issues with numbers in the "General" Excel .xlsx format.
|
|
if ( 'xlsx' === $detected_format && \TablePress\PhpOffice\PhpSpreadsheet\Style\NumberFormat::FORMAT_GENERAL === $format && is_numeric( $cell_data ) ) {
|
|
$cell_data = (string) (float) $cell_data; // Type-cast strings to float and back.
|
|
}
|
|
$cell_data = \TablePress\PhpOffice\PhpSpreadsheet\Style\NumberFormat::toFormattedString(
|
|
$cell_data,
|
|
$format,
|
|
array( $this, 'format_color' )
|
|
);
|
|
|
|
if ( strlen( $cell_data ) > 1 && '=' === $cell_data[0] ) {
|
|
if ( $style->getQuotePrefix() ) {
|
|
// Prepend a ' to quoted/escaped formulas (so that they are shown as text). This is currently not supported (at least) for the XLS format.
|
|
$cell_data = "'{$cell_data}";
|
|
} else {
|
|
// Bail early, to not add inline HTML styling around formulas, as they won't work anymore then.
|
|
$row_data[] = $cell_data;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
$font = $style->getFont();
|
|
|
|
if ( $font->getSuperscript() ) {
|
|
$cell_data = "<sup>{$cell_data}</sup>";
|
|
}
|
|
if ( $font->getSubscript() ) {
|
|
$cell_data = "<sub>{$cell_data}</sub>";
|
|
}
|
|
if ( $font->getStrikethrough() ) {
|
|
$cell_data = "<del>{$cell_data}</del>";
|
|
}
|
|
if ( $font->getUnderline() !== \TablePress\PhpOffice\PhpSpreadsheet\Style\Font::UNDERLINE_NONE && ! $cell_has_hyperlink ) {
|
|
$cell_data = "<u>{$cell_data}</u>";
|
|
}
|
|
if ( $font->getBold() ) {
|
|
$cell_data = "<strong>{$cell_data}</strong>";
|
|
}
|
|
if ( $font->getItalic() ) {
|
|
$cell_data = "<em>{$cell_data}</em>";
|
|
}
|
|
$color = $font->getColor()->getRGB();
|
|
if ( '' !== $color && '000000' !== $color && ! $cell_has_hyperlink ) {
|
|
// Don't add the span if the color is black, as that's the default, or if it's in a hyperlink.
|
|
$color_css = esc_attr( "color:#{$color};" );
|
|
$cell_data = "<span style=\"{$color_css}\">{$cell_data}</span>";
|
|
}
|
|
|
|
// Convert Hyperlinks to HTML code.
|
|
if ( $cell_has_hyperlink ) {
|
|
$url = $worksheet->getHyperlink( $cell_reference )->getUrl();
|
|
if ( '' !== $url ) {
|
|
$title = $worksheet->getHyperlink( $cell_reference )->getTooltip();
|
|
if ( '' !== $title ) {
|
|
$title = ' title="' . esc_attr( $title ) . '"';
|
|
}
|
|
$url = esc_url( $url );
|
|
$cell_data = "<a href=\"{$url}\"{$title}>{$cell_data}</a>";
|
|
}
|
|
}
|
|
|
|
// Add comments.
|
|
if ( isset( $comments[ $cell_reference ] ) ) {
|
|
$sanitized_comment = esc_html( $worksheet->getComment( $cell_reference )->getText()->getPlainText() );
|
|
if ( '' !== $sanitized_comment ) {
|
|
$cell_data .= '<div class="comment">' . $sanitized_comment . '</div>';
|
|
}
|
|
}
|
|
|
|
$row_data[] = $cell_data;
|
|
}
|
|
$table['data'][] = $row_data;
|
|
}
|
|
|
|
// Convert merged cells to trigger words.
|
|
$merged_cells = $worksheet->getMergeCells();
|
|
foreach ( $merged_cells as $merged_cells_range ) {
|
|
$cells = explode( ':', $merged_cells_range );
|
|
$first_cell = \TablePress\PhpOffice\PhpSpreadsheet\Cell\Coordinate::indexesFromString( $cells[0] );
|
|
$last_cell = \TablePress\PhpOffice\PhpSpreadsheet\Cell\Coordinate::indexesFromString( $cells[1] );
|
|
for ( $row_idx = $first_cell[1]; $row_idx <= $last_cell[1]; $row_idx++ ) {
|
|
for ( $column_idx = $first_cell[0]; $column_idx <= $last_cell[0]; $column_idx++ ) {
|
|
if ( $row_idx === $first_cell[1] && $column_idx === $first_cell[0] ) {
|
|
continue; // Keep value of first cell.
|
|
} elseif ( $row_idx === $first_cell[1] && $column_idx > $first_cell[0] ) {
|
|
$table['data'][ $row_idx - 1 ][ $column_idx - 1 ] = '#colspan#';
|
|
} elseif ( $row_idx > $first_cell[1] && $column_idx === $first_cell[0] ) {
|
|
$table['data'][ $row_idx - 1 ][ $column_idx - 1 ] = '#rowspan#';
|
|
} else {
|
|
$table['data'][ $row_idx - 1 ][ $column_idx - 1 ] = '#span#';
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Save PHP memory.
|
|
$spreadsheet->disconnectWorksheets();
|
|
unset( $comments, $cell_collection, $worksheet, $spreadsheet );
|
|
|
|
return $table;
|
|
} catch ( \TablePress\PhpOffice\PhpSpreadsheet\Reader\Exception $exception ) {
|
|
return new WP_Error( 'table_import_phpspreadsheet_failed', '', 'Exception: ' . $exception->getMessage() );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parses PHPSpreadsheet RichText elements and converts formatting to HTML tags.
|
|
*
|
|
* @param \TablePress\PhpOffice\PhpSpreadsheet\RichText\RichText $value RichText element.
|
|
* @param bool $cell_has_hyperlink Whether the cell has a hyperlink.
|
|
* @return string Cell value with HTML formatting.
|
|
*/
|
|
protected function parse_rich_text( \TablePress\PhpOffice\PhpSpreadsheet\RichText\RichText $value, bool $cell_has_hyperlink ): string {
|
|
$cell_data = '';
|
|
$elements = $value->getRichTextElements();
|
|
foreach ( $elements as $element ) {
|
|
$element_data = $element->getText();
|
|
|
|
// Rich text start?
|
|
if ( $element instanceof \TablePress\PhpOffice\PhpSpreadsheet\RichText\Run ) {
|
|
$font = $element->getFont();
|
|
|
|
if ( $font->getSuperscript() ) {
|
|
$element_data = "<sup>{$element_data}</sup>";
|
|
}
|
|
if ( $font->getSubscript() ) {
|
|
$element_data = "<sub>{$element_data}</sub>";
|
|
}
|
|
if ( $font->getStrikethrough() ) {
|
|
$element_data = "<del>{$element_data}</del>";
|
|
}
|
|
if ( $font->getUnderline() !== \TablePress\PhpOffice\PhpSpreadsheet\Style\Font::UNDERLINE_NONE ) {
|
|
$element_data = "<u>{$element_data}</u>";
|
|
}
|
|
if ( $font->getBold() ) {
|
|
$element_data = "<strong>{$element_data}</strong>";
|
|
}
|
|
if ( $font->getItalic() ) {
|
|
$element_data = "<em>{$element_data}</em>";
|
|
}
|
|
$color = $font->getColor()->getRGB();
|
|
if ( '' !== $color && '000000' !== $color && ! $cell_has_hyperlink ) {
|
|
// Don't add the span if the color is black, as that's the default, or if it's in a hyperlink.
|
|
$color_css = esc_attr( "color:#{$color};" );
|
|
$element_data = "<span style=\"{$color_css}\">{$element_data}</span>";
|
|
}
|
|
}
|
|
|
|
$cell_data .= $element_data;
|
|
}
|
|
return $cell_data;
|
|
}
|
|
|
|
/**
|
|
* Adds color to formatted string as inline style, e.g. from conditional formatting.
|
|
*
|
|
* @param string $value Plain formatted value without color.
|
|
* @param string $format_code Format code.
|
|
* @return string Value with color format applied.
|
|
*/
|
|
public function format_color( string $value, string $format_code ): string {
|
|
// Color information, e.g. [Red] is always at the beginning of the format code.
|
|
$color = '';
|
|
if ( 1 === preg_match( '/^\\[[a-zA-Z]+\\]/', $format_code, $matches ) ) {
|
|
$color = str_replace( array( '[', ']' ), '', $matches[0] );
|
|
$color = strtolower( $color );
|
|
}
|
|
|
|
if ( '' !== $color ) {
|
|
$color = esc_attr( "color:{$color};" );
|
|
$value = "<span style=\"{$color}\">{$value}</span>";
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
} // class TablePress_Import_PHPSpreadsheet
|