summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorV Narayanan <v.narayanan@sun.com>2009-12-03 17:18:43 +0530
committerV Narayanan <v.narayanan@sun.com>2009-12-03 17:18:43 +0530
commita5aa3b3c919ab53cdcaa657a6446051348371245 (patch)
tree93acd035717949e4567c97f47e5ade78fe2f641c
parent40ec012c905be0262ba5c36bbccfa0db0105e31f (diff)
downloadmariadb-git-a5aa3b3c919ab53cdcaa657a6446051348371245.tar.gz
Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields
When a .CSV file for table in the CSV engine contains \X characters as part of unquoted fields, e.g. 2,naraya\nan \n is not interpreted as a new line (it is however interpreted as a newline in a quoted field). The old algorithm copied the entire value for a unquoted field without parsing the \X characters. The new algorithm adds the capability to handle \X characters in the unquoted fields of a .CSV file. mysql-test/r/csv.result: Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields Contains additional test output corresponding to the new tests added. mysql-test/t/csv.test: Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields Contains additional tests for testing the behaviour of the CSV storage engine when the fields are not enclosed in quotes and contain \X characters. storage/csv/ha_tina.cc: Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields Changes the parsing logic of the rows in a CSV file, to parse \X characters that might be present in the unquoted fields.
-rw-r--r--mysql-test/r/csv.result56
-rw-r--r--mysql-test/t/csv.test80
-rw-r--r--storage/csv/ha_tina.cc73
3 files changed, 201 insertions, 8 deletions
diff --git a/mysql-test/r/csv.result b/mysql-test/r/csv.result
index 4b96f5a5ed0..97996b484bb 100644
--- a/mysql-test/r/csv.result
+++ b/mysql-test/r/csv.result
@@ -5407,4 +5407,60 @@ test.t1 repair status OK
select * from t1 limit 1;
a
drop table t1;
+#
+# Test for the following cases
+# 1) integers and strings enclosed in quotes
+# 2) integers and strings not enclosed in quotes
+# 3) \X characters with quotes
+# 4) \X characters outside quotes
+#
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,"integer sans quotes"
+1,string sans quotes
+1,quotes"in between" strings
+"1",Integer with quote and string with no quote
+1,"escape sequence \n \" \\ \r \a within quotes"
+1,escape sequence \n \" \\ \r \a without quotes
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+c1 c2
+1 integer sans quotes
+1 string sans quotes
+1 quotes"in between" strings
+1 Integer with quote and string with no quote
+1 escape sequence
+ " \ \a within quotes
+1 escape sequence
+ " \ \a without quotes
+DROP TABLE t1;
+# Test for the case when a field begins with a quote, but does not end in a
+# quote.
+# Note: This results in an error.
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,"string only at the beginning quotes
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+ERROR HY000: Table 't1' is marked as crashed and should be repaired
+DROP TABLE t1;
+# Test for the case when a field ends with a quote, but does not begin in a
+# quote.
+# Note: This results in an error.
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,string with only ending quotes"
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+ERROR HY000: Table 't1' is marked as crashed and should be repaired
+DROP TABLE t1;
End of 5.1 tests
diff --git a/mysql-test/t/csv.test b/mysql-test/t/csv.test
index cdf274190dd..ea949f463c9 100644
--- a/mysql-test/t/csv.test
+++ b/mysql-test/t/csv.test
@@ -1819,4 +1819,84 @@ repair table t1;
select * from t1 limit 1;
drop table t1;
+#
+# Bug #40814 CSV engine does not parse \X characters when they occur in unquoted fields
+#
+
+--echo #
+--echo # Test for the following cases
+--echo # 1) integers and strings enclosed in quotes
+--echo # 2) integers and strings not enclosed in quotes
+--echo # 3) \X characters with quotes
+--echo # 4) \X characters outside quotes
+--echo #
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLD_DATADIR/test/t1.CSV
+1,"integer sans quotes"
+1,string sans quotes
+1,quotes"in between" strings
+"1",Integer with quote and string with no quote
+1,"escape sequence \n \" \\ \r \a within quotes"
+1,escape sequence \n \" \\ \r \a without quotes
+EOF
+--cat_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+--echo # Test for the case when a field begins with a quote, but does not end in a
+--echo # quote.
+--echo # Note: This results in an error.
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLD_DATADIR/test/t1.CSV
+1,"string only at the beginning quotes
+EOF
+--cat_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+--error ER_CRASHED_ON_USAGE
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+--echo # Test for the case when a field ends with a quote, but does not begin in a
+--echo # quote.
+--echo # Note: This results in an error.
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLD_DATADIR/test/t1.CSV
+1,string with only ending quotes"
+EOF
+--cat_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+--error ER_CRASHED_ON_USAGE
+SELECT * FROM t1;
+
+DROP TABLE t1;
--echo End of 5.1 tests
diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc
index 9cc0f1e607b..fac78986563 100644
--- a/storage/csv/ha_tina.cc
+++ b/storage/csv/ha_tina.cc
@@ -614,6 +614,33 @@ int ha_tina::find_current_row(uchar *buf)
memset(buf, 0, table->s->null_bytes);
+ /*
+ Parse the line obtained using the following algorithm
+
+ BEGIN
+ 1) Store the EOL (end of line) for the current row
+ 2) Until all the fields in the current query have not been
+ filled
+ 2.1) If the current character is a quote
+ 2.1.1) Until EOL has not been reached
+ a) If end of current field is reached, move
+ to next field and jump to step 2.3
+ b) If current character is a \\ handle
+ \\n, \\r, \\, \\"
+ c) else append the current character into the buffer
+ before checking that EOL has not been reached.
+ 2.2) If the current character does not begin with a quote
+ 2.2.1) Until EOL has not been reached
+ a) If the end of field has been reached move to the
+ next field and jump to step 2.3
+ b) If current character begins with \\ handle
+ \\n, \\r, \\, \\"
+ c) else append the current character into the buffer
+ before checking that EOL has not been reached.
+ 2.3) Store the current field value and jump to 2)
+ TERMINATE
+ */
+
for (Field **field=table->field ; *field ; field++)
{
char curr_char;
@@ -622,19 +649,23 @@ int ha_tina::find_current_row(uchar *buf)
if (curr_offset >= end_offset)
goto err;
curr_char= file_buff->get_value(curr_offset);
+ /* Handle the case where the first character is a quote */
if (curr_char == '"')
{
- curr_offset++; // Incrementpast the first quote
+ /* Increment past the first quote */
+ curr_offset++;
- for(; curr_offset < end_offset; curr_offset++)
+ /* Loop through the row to extract the values for the current field */
+ for ( ; curr_offset < end_offset; curr_offset++)
{
curr_char= file_buff->get_value(curr_offset);
- // Need to convert line feeds!
+ /* check for end of the current field */
if (curr_char == '"' &&
(curr_offset == end_offset - 1 ||
file_buff->get_value(curr_offset + 1) == ','))
{
- curr_offset+= 2; // Move past the , and the "
+ /* Move past the , and the " */
+ curr_offset+= 2;
break;
}
if (curr_char == '\\' && curr_offset != (end_offset - 1))
@@ -656,7 +687,7 @@ int ha_tina::find_current_row(uchar *buf)
else // ordinary symbol
{
/*
- We are at final symbol and no last quote was found =>
+ If we are at final symbol and no last quote was found =>
we are working with a damaged file.
*/
if (curr_offset == end_offset - 1)
@@ -667,15 +698,41 @@ int ha_tina::find_current_row(uchar *buf)
}
else
{
- for(; curr_offset < end_offset; curr_offset++)
+ for ( ; curr_offset < end_offset; curr_offset++)
{
curr_char= file_buff->get_value(curr_offset);
+ /* Move past the ,*/
if (curr_char == ',')
{
- curr_offset++; // Skip the ,
+ curr_offset++;
break;
}
- buffer.append(curr_char);
+ if (curr_char == '\\' && curr_offset != (end_offset - 1))
+ {
+ curr_offset++;
+ curr_char= file_buff->get_value(curr_offset);
+ if (curr_char == 'r')
+ buffer.append('\r');
+ else if (curr_char == 'n' )
+ buffer.append('\n');
+ else if (curr_char == '\\' || curr_char == '"')
+ buffer.append(curr_char);
+ else /* This could only happed with an externally created file */
+ {
+ buffer.append('\\');
+ buffer.append(curr_char);
+ }
+ }
+ else
+ {
+ /*
+ We are at the final symbol and a quote was found for the
+ unquoted field => We are working with a damaged field.
+ */
+ if (curr_offset == end_offset - 1 && curr_char == '"')
+ goto err;
+ buffer.append(curr_char);
+ }
}
}