> */ public $error_info = array(); /** * Class Constructor. * * @since 1.0.0 */ public function __construct() { // Unused. } /** * Load CSV data that shall be parsed. * * @since 1.0.0 * * @param string $data Data to be parsed. */ public function load_data( string $data ): void { // Check for mandatory trailing line break. if ( ! str_ends_with( $data, "\n" ) ) { $data .= "\n"; } $this->import_data = $data; } /** * Detect the CSV delimiter, by analyzing some rows to determine the most probable delimiter character. * * @since 1.0.0 * * @return string Most probable delimiter character. */ public function find_delimiter(): string { $data = &$this->import_data; $delimiter_count = array(); $enclosed = false; $current_line = 0; // Walk through each character in the CSV string (up to $this->delimiter_search_max_lines) and search potential delimiter characters. $data_length = strlen( $data ); for ( $i = 0; $i < $data_length; $i++ ) { $prev_char = ( $i - 1 >= 0 ) ? $data[ $i - 1 ] : ''; $curr_char = $data[ $i ]; $next_char = ( $i + 1 < $data_length ) ? $data[ $i + 1 ] : ''; if ( $curr_char === $this->enclosure ) { // Open and closing quotes. if ( ! $enclosed || $next_char !== $this->enclosure ) { $enclosed = ! $enclosed; // Flip bool. } elseif ( $enclosed ) { ++$i; // Skip next character. } } elseif ( ( "\n" === $curr_char && "\r" !== $prev_char || "\r" === $curr_char ) && ! $enclosed ) { // Reached end of a line. ++$current_line; if ( $current_line >= $this->delimiter_search_max_lines ) { break; } } elseif ( ! $enclosed ) { // At this point, $curr_char seems to be used as a delimiter, as it is not enclosed. // Count $curr_char if it is not in the $this->non_delimiter_chars list. if ( 0 === preg_match( '#[' . $this->non_delimiter_chars . ']#i', $curr_char ) ) { if ( ! isset( $delimiter_count[ $curr_char ][ $current_line ] ) ) { $delimiter_count[ $curr_char ][ $current_line ] = 0; // Initialize empty. } ++$delimiter_count[ $curr_char ][ $current_line ]; } } } // Find most probable delimiter, by sorting their counts. $potential_delimiters = array(); foreach ( $delimiter_count as $char => $line_counts ) { $is_possible_delimiter = $this->_check_delimiter_count( $char, $line_counts, $current_line ); if ( false !== $is_possible_delimiter ) { $potential_delimiters[ $is_possible_delimiter ] = $char; } } ksort( $potential_delimiters ); // If no valid delimiter was found, use the character that was found in most rows. if ( empty( $potential_delimiters ) ) { $delimiter_counts = array_map( 'count', $delimiter_count ); arsort( $delimiter_counts, SORT_NUMERIC ); $potential_delimiters = array_keys( $delimiter_counts ); } // If still no delimiter was found, fall back to a comma. if ( empty( $potential_delimiters ) ) { $potential_delimiters = array( ',' ); } // Return first array element, as that has the highest count. return array_shift( $potential_delimiters ); } /** * Check if passed character can be a delimiter, by checking counts in each line. * * @since 1.0.0 * * @param string $char Character to check. * @param int[] $line_counts Counts for the characters in the lines. * @param int $number_lines Number of lines. * @return bool|string False if delimiter is not possible, string to be used as a sort key if character could be a delimiter. */ protected function _check_delimiter_count( string $char, array $line_counts, int $number_lines ) /* : bool|string */ { // Was the potential delimiter found in every line? if ( count( $line_counts ) !== $number_lines ) { return false; } // Check if the count in every line is the same (or one higher for an "almost"). $first = null; $equal = null; $almost = false; foreach ( $line_counts as $count ) { if ( is_null( $first ) ) { $first = $count; } elseif ( $count === $first && false !== $equal ) { $equal = true; } elseif ( $count === $first + 1 && false !== $equal ) { $equal = true; $almost = true; } else { $equal = false; } } // Check equality only if there's more than one line. if ( $number_lines > 1 && ! $equal ) { return false; } // At this point, count is equal in all lines, so determine a string to sort priority. $match = ( $almost ) ? 2 : 1; // There must not be more than 9 characters in the preferred delimiter character list. $pref = strpos( $this->preferred_delimiter_chars, $char ); if ( false === $pref ) { $pref = 9; } return $pref . $match . '.' . ( 99999 - $first ); } /** * Parse CSV string into a two-dimensional array. * * @since 1.0.0 * * @param string $delimiter Delimiter character for the CSV parsing. * @return array> Two-dimensional array with the data from the CSV string. */ public function parse( string $delimiter ): array { $data = &$this->import_data; // Filter delimiter from the list, if it is a whitespace character. $white_spaces = str_replace( $delimiter, '', " \t\x0B\0" ); $rows = array(); // Complete rows. $row = array(); // Row that is currently built. $column = 0; // Current column index. $cell_content = ''; // Content of the currently processed cell. $enclosed = false; $was_enclosed = false; // To determine if the cell content will be trimmed of whitespace (only for enclosed cells). // Walk through each character in the CSV string. $data_length = strlen( $data ); for ( $i = 0; $i < $data_length; $i++ ) { $curr_char = $data[ $i ]; $next_char = ( $i + 1 < $data_length ) ? $data[ $i + 1 ] : ''; if ( $curr_char === $this->enclosure ) { // Open/close quotes, and inline quotes. if ( ! $enclosed ) { if ( '' === ltrim( $cell_content, $white_spaces ) ) { $enclosed = true; $was_enclosed = true; } else { $this->error = 2; $error_line = count( $rows ) + 1; $error_column = $column + 1; if ( ! isset( $this->error_info[ "{$error_line}-{$error_column}" ] ) ) { $this->error_info[ "{$error_line}-{$error_column}" ] = array( 'type' => 2, 'info' => "Syntax error found in line {$error_line}. Non-enclosed fields can not contain double-quotes.", 'line' => $error_line, 'column' => $error_column, ); } $cell_content .= $curr_char; } } elseif ( $next_char === $this->enclosure ) { // Enclosure character within enclosed cell (" encoded as ""). $cell_content .= $curr_char; ++$i; // Skip next character. } elseif ( $next_char !== $delimiter && "\r" !== $next_char && "\n" !== $next_char ) { // for-loop (instead of while-loop) that skips whitespace. for ( $x = ( $i + 1 ); isset( $data[ $x ] ) && '' === ltrim( $data[ $x ], $white_spaces ); $x++ ) { // phpcs:ignore Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed,Generic.CodeAnalysis.EmptyStatement.DetectedFor // Action is in iterator check. } if ( $data[ $x ] === $delimiter ) { $enclosed = false; $i = $x; } else { if ( $this->error < 1 ) { $this->error = 1; } $error_line = count( $rows ) + 1; $error_column = $column + 1; if ( ! isset( $this->error_info[ "{$error_line}-{$error_column}" ] ) ) { $this->error_info[ "{$error_line}-{$error_column}" ] = array( 'type' => 1, 'info' => "Syntax error found in line {$error_line}. A single double-quote was found within an enclosed string. Enclosed double-quotes must be escaped with a second double-quote.", 'line' => $error_line, 'column' => $error_column, ); } $cell_content .= $curr_char; $enclosed = false; } } else { // The " was the closing one for the cell. $enclosed = false; } } elseif ( ( $curr_char === $delimiter || "\n" === $curr_char || "\r" === $curr_char ) && ! $enclosed ) { // End of cell (by $delimiter), or end of line (by line break, and not enclosed!). $row[ $column ] = ( $was_enclosed ) ? $cell_content : trim( $cell_content ); $cell_content = ''; $was_enclosed = false; ++$column; // End of line. if ( "\n" === $curr_char || "\r" === $curr_char ) { // Append completed row. $rows[] = $row; $row = array(); $column = 0; if ( "\r" === $curr_char && "\n" === $next_char ) { // Skip next character in \r\n line breaks. ++$i; } } } else { // Append character to current cell. $cell_content .= $curr_char; } } return $rows; } } // class CSV_Parser