$file File to import. * @return array|WP_Error Table array on success, WP_Error on error. */ public function import_table( array $file ) /* : array|WP_Error */ { $data = file_get_contents( $file['location'] ); if ( false === $data ) { return new WP_Error( 'table_import_phpspreadsheet_data_read', '', $file['location'] ); } // Remove a possible UTF-8 Byte-Order Mark (BOM). $bom = pack( 'CCC', 0xef, 0xbb, 0xbf ); if ( str_starts_with( $data, $bom ) ) { $data = substr( $data, 3 ); } if ( '' === $data ) { return new WP_Error( 'table_import_phpspreadsheet_data_empty', '', $file['location'] ); } $table = $this->_maybe_import_json( $data ); if ( false !== $table ) { return $table; } $table = $this->_maybe_import_html( $data ); if ( false !== $table ) { return $table; } return $this->_import_phpspreadsheet( $file ); } /** * Tries to import a table with the JSON format. * * @since 2.0.0 * * @param string $data Data to import. * @return array|false Table array on success, false if the file is not a JSON file. */ protected function _maybe_import_json( string $data ) /* : array|false */ { $data = trim( $data ); // If the file does not begin / end with [ / ] or { / }, it's not a supported JSON file. $first_character = $data[0]; $last_character = $data[-1]; if ( ! ( '[' === $first_character && ']' === $last_character ) && ! ( '{' === $first_character && '}' === $last_character ) ) { return false; } $json_table = json_decode( $data, true ); // Check if JSON could be decoded. If not, this is probably not a JSON file. if ( is_null( $json_table ) ) { return false; } // Specifically cast to an array again. $json_table = (array) $json_table; if ( isset( $json_table['data'] ) ) { // JSON data contained a full export. $table = $json_table; } else { // JSON data contained only the data of a table, but no options. $table = array( 'data' => array() ); foreach ( $json_table as $row ) { // Turn row into indexed arrays with numeric keys. $row = array_values( (array) $row ); // Remove entries of multi-dimensional arrays. foreach ( $row as &$cell ) { if ( is_array( $cell ) ) { $cell = ''; } } unset( $cell ); // Unset use-by-reference parameter of foreach loop. $table['data'][] = $row; } } $this->pad_array_to_max_cols( $table['data'] ); return $table; } /** * Tries to import a table with the HTML format. * * @since 2.0.0 * * @param string $data Data to import. * @return array|false Table array on success, false if the file is not an HTML file. */ protected function _maybe_import_html( string $data ) /* : array|false */ { TablePress::load_file( 'html-parser.class.php', 'libraries' ); $table = HTML_Parser::parse( $data ); // Check if the HTML code could be parsed. If not, this is probably not an HTML file. if ( is_wp_error( $table ) ) { return false; } $this->pad_array_to_max_cols( $table['data'] ); return $table; } /** * Tries to import a table via PHPSpreadsheet. * * @since 2.0.0 * * @param array $file File to import. * @return array|WP_Error Table array on success, WP_Error on error. */ protected function _import_phpspreadsheet( array $file ) /* : array|WP_Error */ { // Rename the temporary file, as PHPSpreadsheet tries to infer the format from the file's extension. if ( '' !== $file['extension'] ) { $temp_file = pathinfo( $file['location'] ); if ( ! isset( $temp_file['extension'] ) || $file['extension'] !== $temp_file['extension'] ) { $new_location = "{$temp_file['dirname']}/{$temp_file['filename']}.{$file['extension']}"; // @phpstan-ignore-line if ( rename( $file['location'], $new_location ) ) { $file['location'] = $new_location; } } } try { // Treat all cell values as strings, except for formulas (due to recognition of quoted/escaped formulas like `'=A2`). \TablePress\PhpOffice\PhpSpreadsheet\Cell\Cell::setValueBinder( new \TablePress\PhpOffice\PhpSpreadsheet\Cell\StringValueBinder() ); \TablePress\PhpOffice\PhpSpreadsheet\Cell\Cell::getValueBinder()->setFormulaConversion( false ); // @phpstan-ignore-line /* * Try to detect a reader from the file extension and MIME type. * Fall back to CSV if no reader could be determined. */ try { $reader = \TablePress\PhpOffice\PhpSpreadsheet\IOFactory::createReaderForFile( $file['location'] ); } catch ( \TablePress\PhpOffice\PhpSpreadsheet\Reader\Exception $exception ) { $reader = \TablePress\PhpOffice\PhpSpreadsheet\IOFactory::createReader( 'Csv' ); // Append .csv to the file name, so that \TablePress\PhpOffice\PhpSpreadsheet\Reader\Csv::canRead() returns true. $new_location = $file['location'] . '.csv'; if ( rename( $file['location'], $new_location ) ) { $file['location'] = $new_location; } } $class_name = get_class( $reader ); $class_type = explode( '\\', $class_name ); $detected_format = strtolower( array_pop( $class_type ) ); if ( 'csv' === $detected_format ) { $reader->setInputEncoding( \TablePress\PhpOffice\PhpSpreadsheet\Reader\Csv::GUESS_ENCODING ); // @phpstan-ignore-line // @phpstan-ignore-next-line $reader->setEscapeCharacter( ( PHP_VERSION_ID < 70400 ) ? "\x0" : '' ); // Disable the proprietary escape mechanism of PHP's fgetcsv() in PHP >= 7.4. } $reader->setIncludeCharts( false ); $reader->setReadEmptyCells( true ); // For non-Excel files, import only the data, but ignore formatting. if ( ! in_array( $detected_format, array( 'xlsx', 'xls' ), true ) ) { $reader->setReadDataOnly( true ); } // For formats where it's supported, import only the first sheet. if ( in_array( $detected_format, array( 'csv', 'html', 'slk' ), true ) ) { $reader->setSheetIndex( 0 ); // @phpstan-ignore-line } $spreadsheet = $reader->load( $file['location'] ); $worksheet = $spreadsheet->getActiveSheet(); $cell_collection = $worksheet->getCellCollection(); $comments = $worksheet->getComments(); $table = array( 'data' => array(), ); $min_col = 'A'; $min_row = 1; $max_col = $worksheet->getHighestColumn(); $max_row = $worksheet->getHighestRow(); // Adapted from \TablePress\PhpOffice\PhpSpreadsheet\Worksheet\Worksheet::rangeToArray(). ++$max_col; // Due to for-loop with characters for columns. for ( $row = $min_row; $row <= $max_row; $row++ ) { $row_data = array(); for ( $col = $min_col; $col !== $max_col; $col++ ) { $cell_reference = $col . $row; if ( ! $cell_collection->has( $cell_reference ) ) { $row_data[] = ''; continue; } $cell = $cell_collection->get( $cell_reference ); $value = $cell->getValue(); if ( is_null( $value ) ) { $row_data[] = ''; continue; } $cell_has_hyperlink = $worksheet->hyperlinkExists( $cell_reference ) && ! $worksheet->getHyperlink( $cell_reference )->isInternal(); if ( $value instanceof \TablePress\PhpOffice\PhpSpreadsheet\RichText\RichText ) { $cell_data = $this->parse_rich_text( $value, $cell_has_hyperlink ); } else { $cell_data = (string) $value; } // Apply data type formatting. $style = $spreadsheet->getCellXfByIndex( $cell->getXfIndex() ); $format = $style->getNumberFormat()->getFormatCode() ?? \TablePress\PhpOffice\PhpSpreadsheet\Style\NumberFormat::FORMAT_GENERAL; // Fix floating point precision issues with numbers in the "General" Excel .xlsx format. if ( 'xlsx' === $detected_format && \TablePress\PhpOffice\PhpSpreadsheet\Style\NumberFormat::FORMAT_GENERAL === $format && is_numeric( $cell_data ) ) { $cell_data = (string) (float) $cell_data; // Type-cast strings to float and back. } $cell_data = \TablePress\PhpOffice\PhpSpreadsheet\Style\NumberFormat::toFormattedString( $cell_data, $format, array( $this, 'format_color' ) ); if ( strlen( $cell_data ) > 1 && '=' === $cell_data[0] ) { if ( $style->getQuotePrefix() ) { // Prepend a ' to quoted/escaped formulas (so that they are shown as text). This is currently not supported (at least) for the XLS format. $cell_data = "'{$cell_data}"; } else { // Bail early, to not add inline HTML styling around formulas, as they won't work anymore then. $row_data[] = $cell_data; continue; } } $font = $style->getFont(); if ( $font->getSuperscript() ) { $cell_data = "{$cell_data}"; } if ( $font->getSubscript() ) { $cell_data = "{$cell_data}"; } if ( $font->getStrikethrough() ) { $cell_data = "{$cell_data}"; } if ( $font->getUnderline() !== \TablePress\PhpOffice\PhpSpreadsheet\Style\Font::UNDERLINE_NONE && ! $cell_has_hyperlink ) { $cell_data = "{$cell_data}"; } if ( $font->getBold() ) { $cell_data = "{$cell_data}"; } if ( $font->getItalic() ) { $cell_data = "{$cell_data}"; } $color = $font->getColor()->getRGB(); if ( '' !== $color && '000000' !== $color && ! $cell_has_hyperlink ) { // Don't add the span if the color is black, as that's the default, or if it's in a hyperlink. $color_css = esc_attr( "color:#{$color};" ); $cell_data = "{$cell_data}"; } // Convert Hyperlinks to HTML code. if ( $cell_has_hyperlink ) { $url = $worksheet->getHyperlink( $cell_reference )->getUrl(); if ( '' !== $url ) { $title = $worksheet->getHyperlink( $cell_reference )->getTooltip(); if ( '' !== $title ) { $title = ' title="' . esc_attr( $title ) . '"'; } $url = esc_url( $url ); $cell_data = "{$cell_data}"; } } // Add comments. if ( isset( $comments[ $cell_reference ] ) ) { $sanitized_comment = esc_html( $worksheet->getComment( $cell_reference )->getText()->getPlainText() ); if ( '' !== $sanitized_comment ) { $cell_data .= '
' . $sanitized_comment . '
'; } } $row_data[] = $cell_data; } $table['data'][] = $row_data; } // Convert merged cells to trigger words. $merged_cells = $worksheet->getMergeCells(); foreach ( $merged_cells as $merged_cells_range ) { $cells = explode( ':', $merged_cells_range ); $first_cell = \TablePress\PhpOffice\PhpSpreadsheet\Cell\Coordinate::indexesFromString( $cells[0] ); $last_cell = \TablePress\PhpOffice\PhpSpreadsheet\Cell\Coordinate::indexesFromString( $cells[1] ); for ( $row_idx = $first_cell[1]; $row_idx <= $last_cell[1]; $row_idx++ ) { for ( $column_idx = $first_cell[0]; $column_idx <= $last_cell[0]; $column_idx++ ) { if ( $row_idx === $first_cell[1] && $column_idx === $first_cell[0] ) { continue; // Keep value of first cell. } elseif ( $row_idx === $first_cell[1] && $column_idx > $first_cell[0] ) { $table['data'][ $row_idx - 1 ][ $column_idx - 1 ] = '#colspan#'; } elseif ( $row_idx > $first_cell[1] && $column_idx === $first_cell[0] ) { $table['data'][ $row_idx - 1 ][ $column_idx - 1 ] = '#rowspan#'; } else { $table['data'][ $row_idx - 1 ][ $column_idx - 1 ] = '#span#'; } } } } // Save PHP memory. $spreadsheet->disconnectWorksheets(); unset( $comments, $cell_collection, $worksheet, $spreadsheet ); return $table; } catch ( \TablePress\PhpOffice\PhpSpreadsheet\Reader\Exception $exception ) { return new WP_Error( 'table_import_phpspreadsheet_failed', '', 'Exception: ' . $exception->getMessage() ); } } /** * Parses PHPSpreadsheet RichText elements and converts formatting to HTML tags. * * @param \TablePress\PhpOffice\PhpSpreadsheet\RichText\RichText $value RichText element. * @param bool $cell_has_hyperlink Whether the cell has a hyperlink. * @return string Cell value with HTML formatting. */ protected function parse_rich_text( \TablePress\PhpOffice\PhpSpreadsheet\RichText\RichText $value, bool $cell_has_hyperlink ): string { $cell_data = ''; $elements = $value->getRichTextElements(); foreach ( $elements as $element ) { $element_data = $element->getText(); // Rich text start? if ( $element instanceof \TablePress\PhpOffice\PhpSpreadsheet\RichText\Run ) { $font = $element->getFont(); if ( $font->getSuperscript() ) { $element_data = "{$element_data}"; } if ( $font->getSubscript() ) { $element_data = "{$element_data}"; } if ( $font->getStrikethrough() ) { $element_data = "{$element_data}"; } if ( $font->getUnderline() !== \TablePress\PhpOffice\PhpSpreadsheet\Style\Font::UNDERLINE_NONE ) { $element_data = "{$element_data}"; } if ( $font->getBold() ) { $element_data = "{$element_data}"; } if ( $font->getItalic() ) { $element_data = "{$element_data}"; } $color = $font->getColor()->getRGB(); if ( '' !== $color && '000000' !== $color && ! $cell_has_hyperlink ) { // Don't add the span if the color is black, as that's the default, or if it's in a hyperlink. $color_css = esc_attr( "color:#{$color};" ); $element_data = "{$element_data}"; } } $cell_data .= $element_data; } return $cell_data; } /** * Adds color to formatted string as inline style, e.g. from conditional formatting. * * @param string $value Plain formatted value without color. * @param string $format_code Format code. * @return string Value with color format applied. */ public function format_color( string $value, string $format_code ): string { // Color information, e.g. [Red] is always at the beginning of the format code. $color = ''; if ( 1 === preg_match( '/^\\[[a-zA-Z]+\\]/', $format_code, $matches ) ) { $color = str_replace( array( '[', ']' ), '', $matches[0] ); $color = strtolower( $color ); } if ( '' !== $color ) { $color = esc_attr( "color:{$color};" ); $value = "{$value}"; } return $value; } } // class TablePress_Import_PHPSpreadsheet