summaryrefslogtreecommitdiff
path: root/paste/webkit/FakeWebware/MiscUtils/CSVParser.py
diff options
context:
space:
mode:
Diffstat (limited to 'paste/webkit/FakeWebware/MiscUtils/CSVParser.py')
-rw-r--r--paste/webkit/FakeWebware/MiscUtils/CSVParser.py265
1 files changed, 0 insertions, 265 deletions
diff --git a/paste/webkit/FakeWebware/MiscUtils/CSVParser.py b/paste/webkit/FakeWebware/MiscUtils/CSVParser.py
deleted file mode 100644
index 047b3b9..0000000
--- a/paste/webkit/FakeWebware/MiscUtils/CSVParser.py
+++ /dev/null
@@ -1,265 +0,0 @@
-# The states of the parser
-StartRecord = 0
-StartField = 1
-InField = 2
-QuoteInField = 3
-InQuotedField = 4
-QuoteInQuotedField = 5
-EndQuotedField = 6
-
-# State handlers can return Finished to terminate parsing early
-Finished = 10
-
-
-class ParseError(Exception):
- pass
-
-
-class CSVParser:
- """
- Parses CSV files including all subtleties such as:
- * commas in fields
- * double quotes in fields
- * embedded newlines in fields
- - Examples of programs that produce such beasts include
- MySQL and Excel
-
- For a higher-level, friendlier CSV class with many conveniences,
- see DataTable (which uses this class for its parsing).
-
- Example:
- records = []
- parse = CSVParser().parse
- for line in lines:
- results = parse(line)
- if results is not None:
- records.append(results)
-
- CREDIT
-
- The algorithm was taken directly from the open source Python
- C-extension, csv:
- http://www.object-craft.com.au/projects/csv/
-
- It would be nice to use the csv module when present, since it is
- substantially faster. Before that can be done, it needs to support
- allowComments and stripWhitespace, and pass the TestCSVParser.py
- test suite.
- """
-
- def __init__(self, allowComments=1, stripWhitespace=1, fieldSep=',', autoReset=1, doubleQuote=1):
- """
- @@ document
- """
- # settings
- self._allowComments = allowComments
- self._stripWhitespace = stripWhitespace
- self._doubleQuote = doubleQuote
- self._fieldSep = fieldSep
- self._autoReset = autoReset
-
- # Other
- self._state = StartRecord
- self._fields = []
- self._hadParseError = 0
- self._field = [] # a list of chars for the cur field
- self.addChar = self._field.append
-
- # The handlers for the various states
- self._handlers = [
- self.startRecord,
- self.startField,
- self.inField,
- self.quoteInField,
- self.inQuotedField,
- self.quoteInQuotedField,
- self.endQuotedField,
- ]
-
-
- ## Parse ##
-
- def parse(self, line):
- """
- Parse the single line and return a list or string fields, or
- None if the CSV record contains embedded newlines and the
- record is not yet complete.
- """
- if self._autoReset and self._hadParseError:
- self.reset()
- handlers = self._handlers
-
- i = 0
- lineLen = len(line)
- while i<lineLen:
- c = line[i]
- if c=='\r':
- i += 1
- if i==lineLen:
- break # mac end of line
- c = line[i]
- if c=='\n':
- i += 1
- if i==lineLen:
- break # dos end of line
-
- self._hadParseError = 1
- raise ParseError('Newline inside string')
-
- elif c=='\n':
- i += 1
- if i==lineLen:
- break # unix end of line
-
- self._hadParseError = 1
- raise ParseError('Newline inside string')
-
- else:
- if handlers[self._state](c)==Finished: # process a character
- break
-
- i += 1
-
- handlers[self._state]('\0') # signal the end of the input
-
- if self._state==StartRecord:
- fields = self._fields
- self._fields = []
- if self._stripWhitespace:
- fields = [field.strip() for field in fields]
- return fields
- else:
- return None # indicates multi-line record; eg not finished
-
-
- ## Reset ##
-
- def reset(self):
- """
- Resets the parser to a fresh state in order to recover from
- exceptions. But if autoReset is true (the default), this is
- done automatically.
- """
- self._fields = []
- self._state = StartRecord
- self.hasParseError = 0
-
-
- ## State Handlers ##
-
- def startRecord(self, c):
- if c!='\0': # not empty line
- if c=='#' and self._allowComments:
- return Finished
- else:
- self._state = StartField
- self.startField(c)
-
- def startField(self, c):
- if c=='"':
- self._state = InQuotedField # start quoted field
- elif c==self._fieldSep:
- self.saveField() # save empty field
- elif c==' ' and self._stripWhitespace:
- pass # skip over preceding whitespace
- elif c=='\0':
- self.saveField() # save empty field
- self._state = StartRecord
- else:
- self.addChar(c) # begin new unquoted field
- self._state = InField
-
- def inField(self, c):
- # in unquoted field
- if c==self._fieldSep:
- self.saveField()
- self._state = StartField
- elif c=='\0':
- self.saveField() # end of line
- self._state = StartRecord
- elif c=='"' and self._doubleQuote:
- self._state = QuoteInField
- else:
- self.addChar(c) # normal character
-
- def quoteInField(self, c):
- self.addChar('"')
- if c=='"':
- self._state = InField # save "" as "
- elif c=='\0':
- self.saveField() # end of line
- self._state = StartRecord
- elif c==self._fieldSep:
- self.saveField()
- self._state = StartField
- else:
- self.addChar(c) # normal character
- self._state = InField
-
- def inQuotedField(self, c):
- if c=='"':
- if self._doubleQuote:
- self._state = QuoteInQuotedField
- else:
- self.saveField() # end of field
- self._state = EndQuotedField
- elif c=='\0':
- self.addChar('\n') # end of line
- else:
- self.addChar(c) # normal character
-
- def quoteInQuotedField(self, c):
- if c=='"':
- self.addChar('"') # save "" as "
- self._state = InQuotedField
- elif c==self._fieldSep:
- self.saveField()
- self._state = StartField
- elif c==' ' and self._stripWhitespace:
- pass # skip it
- elif c=='\0':
- self.saveField() # end of line
- self._state = StartRecord
- else:
- self._hadParseError = 1 # illegal
- raise ParseError, '%s expected after "' % self._fieldSep
-
- def endQuotedField(self, c):
- if c==self._fieldSep: # seen closing " on quoted field
- self._state = StartField # wait for new field
- elif c=='\0':
- self._state = StartRecord # end of line
- else:
- self._hadParseError = 1
- raise ParseError, '%s expected after "' % self._fieldSep
-
- def saveField(self):
- self._fields.append(''.join(self._field))
- self._field = []
- self.addChar = self._field.append
-
-
-# Call the global function parse() if you like the default settings of the CSVParser
-_parser = CSVParser()
-parse = _parser.parse
-
-
-import types
-def joinCSVFields(fields):
- """
- Returns a CSV record (eg a string) from a sequence of fields.
- Fields containing commands (,) or double quotes (") are quoted
- and double quotes are escaped (""). The terminating newline is
- NOT included.
- """
- newFields = []
- for field in fields:
- assert type(field) is types.StringType
- if field.find('"')!=-1:
- newField = '"' + field.replace('"', '""') + '"'
- elif field.find(',')!=-1:
- newField = '"' + field + '"'
- else:
- newField = field
- newFields.append(newField)
- return ','.join(newFields)