Bug#39616: Missing quotes from .CSV crashes server

When a CSV file contained comma separated elements that were not enclosed in quotes, it was causing the mysql server to crash. The old algorithm that parsed the content of a row in mysql 5.0 was assuming that the values of the fields in a .CSV file will be enclosed in quotes and will be separated by commas. This was causing the old algorithm to fail when the content of the file resembled the following 3,"sans quotes" The CSV engine that is part of mysql 5.0 was expecting the above to be "3","sans quotes" The above is just one example of where the engine was failing for what would be recognized as a valid .CSV file content otherwise. The proposed fix changes the old algorithm being used to parse rows from the .CSV file to handle two separate cases 1) When the current field of the row is enclosed in quotes 2) When the current field of the row is not enclosed in quotes mysql-test/r/csv.result: Contains additional test output corresponding to the new tests added. mysql-test/t/csv.test: Contains additional tests for testing the behaviour of the CSV storage engine when the fields are not enclosed in quotes. sql/examples/ha_tina.cc: Changes the parsing logic of the rows in a CSV file to account for fields that are not enclosed in quotes.
author: V Narayanan <v.narayanan@sun.com> 2008-11-18 11:31:03 +0530
committer: V Narayanan <v.narayanan@sun.com> 2008-11-18 11:31:03 +0530
commit: 065321f0be594e5229409996205e6a271477c255 (patch)
tree: c8d0482c7146c7e1c02180e6d153040e4d4429f1 /sql
parent: 88a88636271aee2b89000a99c18d551813f0dcc5 (diff)
download: mariadb-git-065321f0be594e5229409996205e6a271477c255.tar.gz
1 files changed, 80 insertions, 21 deletions
diff --git a/sql/examples/ha_tina.cc b/sql/examples/ha_tina.cc
index 0b57fe86e62..aaaa3b8ffb4 100644
--- a/sql/examples/ha_tina.cc
+++ b/sql/examples/ha_tina.cc
@@ -416,37 +416,96 @@ int ha_tina::find_current_row(byte *buf)
   if ((end_ptr=  find_eoln(share->mapped_file, current_position, share->file_stat.st_size)) == 0)
     DBUG_RETURN(HA_ERR_END_OF_FILE);
 
+  /*
+    Parse the line obtained using the following algorithm
+   
+    BEGIN
+      1) Store the EOL (end of line) for the current row
+      2) Until all the fields in the current query have not been 
+         filled
+         2.1) If the current character begins with a quote
+              2.1.1) Until EOL has not been reached
+                     a) If end of current field is reached, move
+                        to next field and jump to step 2.3
+                     b) If current character begins with \\ handle
+                        \\n, \\r, \\, \\"
+                     c) else append the current character into the buffer
+                        before checking that EOL has not been reached.
+          2.2) If the current character does not begin with a quote
+               2.2.1) Until EOL has not been reached
+                      a) If the end of field has been reached move to the
+                         next field and jump to step 2.3
+                      b) append the current character into the buffer
+          2.3) Store the current field value and jump to 2)
+    TERMINATE
+   */
+
   for (Field **field=table->field ; *field ; field++)
   {
     buffer.length(0);
-    mapped_ptr++; // Increment past the first quote
-    for(;mapped_ptr != end_ptr; mapped_ptr++)
+    /* Handle the case where the first character begins with a quote */
+    if (*mapped_ptr == '"')
     {
-      //Need to convert line feeds!
-      if (*mapped_ptr == '"' && 
-          (((mapped_ptr[1] == ',') && (mapped_ptr[2] == '"')) || (mapped_ptr == end_ptr -1 )))
+      /* Increment past the first quote */
+      mapped_ptr++;
+      /* Loop through the row to extract the values for the current field */
+      for(; mapped_ptr != end_ptr; mapped_ptr++)
       {
-        mapped_ptr += 2; // Move past the , and the "
-        break;
-      } 
-      if (*mapped_ptr == '\\' && mapped_ptr != (end_ptr - 1)) 
-      {
-        mapped_ptr++;
-        if (*mapped_ptr == 'r')
-          buffer.append('\r');
-        else if (*mapped_ptr == 'n' )
-          buffer.append('\n');
-        else if ((*mapped_ptr == '\\') || (*mapped_ptr == '"'))
-          buffer.append(*mapped_ptr);
-        else  /* This could only happed with an externally created file */
+        /* check for end of the current field */
+        if (*mapped_ptr == '"' && 
+            (mapped_ptr[1] == ',' || mapped_ptr == end_ptr -1 ))
         {
-          buffer.append('\\');
+          /* Move past the , and the " */
+          mapped_ptr += 2;
+          break;
+        } 
+        if (*mapped_ptr == '\\' && mapped_ptr != (end_ptr - 1)) 
+        {
+          mapped_ptr++;
+          if (*mapped_ptr == 'r')
+            buffer.append('\r');
+          else if (*mapped_ptr == 'n' )
+            buffer.append('\n');
+          else if ((*mapped_ptr == '\\') || (*mapped_ptr == '"'))
+            buffer.append(*mapped_ptr);
+          else  /* This could only happed with an externally created file */
+          {
+            buffer.append('\\');
+            buffer.append(*mapped_ptr);
+          }
+        } 
+        else
+        {
+          /*
+           If no last quote was found, but the end of row has been reached
+           it implies that there has been error.
+          */
+          if (mapped_ptr == end_ptr -1)
+            DBUG_RETURN(HA_ERR_END_OF_FILE);
+          /* Store current character in the buffer for the field */
           buffer.append(*mapped_ptr);
         }
-      } 
-      else
+      }
+    }
+    else
+    {
+      /* Handle the case where the current row does not start with quotes */
+        
+      /* Loop through the row to extract the values for the current field */
+      for (; mapped_ptr != end_ptr; mapped_ptr++)
+      {
+        /* check for end of current field */
+        if (*mapped_ptr == ',')
+        {
+          /* Increment past the current comma */
+          mapped_ptr++;
+          break;
+        }
+        /* store the current character in the buffer for the field */
         buffer.append(*mapped_ptr);
+      }
     }
+    /* Store the field value from the buffer */
     (*field)->store(buffer.ptr(), buffer.length(), buffer.charset());
   }
   next_position= (end_ptr - share->mapped_file)+1;
author	V Narayanan <v.narayanan@sun.com>	2008-11-18 11:31:03 +0530
committer	V Narayanan <v.narayanan@sun.com>	2008-11-18 11:31:03 +0530
commit	065321f0be594e5229409996205e6a271477c255 (patch)
tree	c8d0482c7146c7e1c02180e6d153040e4d4429f1 /sql
parent	88a88636271aee2b89000a99c18d551813f0dcc5 (diff)
download	mariadb-git-065321f0be594e5229409996205e6a271477c255.tar.gz