diff options
author | Ian Bicking <ian@ianbicking.org> | 2005-08-22 23:29:31 +0000 |
---|---|---|
committer | Ian Bicking <ian@ianbicking.org> | 2005-08-22 23:29:31 +0000 |
commit | 82d331036f311709b77a1e3a90a9b191c9e49c61 (patch) | |
tree | 2c91bcd0181be8babe4146c3f448429e4840f29f /paste/webkit/FakeWebware/MiscUtils/CSVParser.py | |
parent | 36d1c63f5e12f1dfd6d5187359093a96a2fd2de4 (diff) | |
download | paste-git-82d331036f311709b77a1e3a90a9b191c9e49c61.tar.gz |
Moved webkit to separate project
Diffstat (limited to 'paste/webkit/FakeWebware/MiscUtils/CSVParser.py')
-rw-r--r-- | paste/webkit/FakeWebware/MiscUtils/CSVParser.py | 265 |
1 files changed, 0 insertions, 265 deletions
diff --git a/paste/webkit/FakeWebware/MiscUtils/CSVParser.py b/paste/webkit/FakeWebware/MiscUtils/CSVParser.py deleted file mode 100644 index 047b3b9..0000000 --- a/paste/webkit/FakeWebware/MiscUtils/CSVParser.py +++ /dev/null @@ -1,265 +0,0 @@ -# The states of the parser -StartRecord = 0 -StartField = 1 -InField = 2 -QuoteInField = 3 -InQuotedField = 4 -QuoteInQuotedField = 5 -EndQuotedField = 6 - -# State handlers can return Finished to terminate parsing early -Finished = 10 - - -class ParseError(Exception): - pass - - -class CSVParser: - """ - Parses CSV files including all subtleties such as: - * commas in fields - * double quotes in fields - * embedded newlines in fields - - Examples of programs that produce such beasts include - MySQL and Excel - - For a higher-level, friendlier CSV class with many conveniences, - see DataTable (which uses this class for its parsing). - - Example: - records = [] - parse = CSVParser().parse - for line in lines: - results = parse(line) - if results is not None: - records.append(results) - - CREDIT - - The algorithm was taken directly from the open source Python - C-extension, csv: - http://www.object-craft.com.au/projects/csv/ - - It would be nice to use the csv module when present, since it is - substantially faster. Before that can be done, it needs to support - allowComments and stripWhitespace, and pass the TestCSVParser.py - test suite. - """ - - def __init__(self, allowComments=1, stripWhitespace=1, fieldSep=',', autoReset=1, doubleQuote=1): - """ - @@ document - """ - # settings - self._allowComments = allowComments - self._stripWhitespace = stripWhitespace - self._doubleQuote = doubleQuote - self._fieldSep = fieldSep - self._autoReset = autoReset - - # Other - self._state = StartRecord - self._fields = [] - self._hadParseError = 0 - self._field = [] # a list of chars for the cur field - self.addChar = self._field.append - - # The handlers for the various states - self._handlers = [ - self.startRecord, - self.startField, - self.inField, - self.quoteInField, - self.inQuotedField, - self.quoteInQuotedField, - self.endQuotedField, - ] - - - ## Parse ## - - def parse(self, line): - """ - Parse the single line and return a list or string fields, or - None if the CSV record contains embedded newlines and the - record is not yet complete. - """ - if self._autoReset and self._hadParseError: - self.reset() - handlers = self._handlers - - i = 0 - lineLen = len(line) - while i<lineLen: - c = line[i] - if c=='\r': - i += 1 - if i==lineLen: - break # mac end of line - c = line[i] - if c=='\n': - i += 1 - if i==lineLen: - break # dos end of line - - self._hadParseError = 1 - raise ParseError('Newline inside string') - - elif c=='\n': - i += 1 - if i==lineLen: - break # unix end of line - - self._hadParseError = 1 - raise ParseError('Newline inside string') - - else: - if handlers[self._state](c)==Finished: # process a character - break - - i += 1 - - handlers[self._state]('\0') # signal the end of the input - - if self._state==StartRecord: - fields = self._fields - self._fields = [] - if self._stripWhitespace: - fields = [field.strip() for field in fields] - return fields - else: - return None # indicates multi-line record; eg not finished - - - ## Reset ## - - def reset(self): - """ - Resets the parser to a fresh state in order to recover from - exceptions. But if autoReset is true (the default), this is - done automatically. - """ - self._fields = [] - self._state = StartRecord - self.hasParseError = 0 - - - ## State Handlers ## - - def startRecord(self, c): - if c!='\0': # not empty line - if c=='#' and self._allowComments: - return Finished - else: - self._state = StartField - self.startField(c) - - def startField(self, c): - if c=='"': - self._state = InQuotedField # start quoted field - elif c==self._fieldSep: - self.saveField() # save empty field - elif c==' ' and self._stripWhitespace: - pass # skip over preceding whitespace - elif c=='\0': - self.saveField() # save empty field - self._state = StartRecord - else: - self.addChar(c) # begin new unquoted field - self._state = InField - - def inField(self, c): - # in unquoted field - if c==self._fieldSep: - self.saveField() - self._state = StartField - elif c=='\0': - self.saveField() # end of line - self._state = StartRecord - elif c=='"' and self._doubleQuote: - self._state = QuoteInField - else: - self.addChar(c) # normal character - - def quoteInField(self, c): - self.addChar('"') - if c=='"': - self._state = InField # save "" as " - elif c=='\0': - self.saveField() # end of line - self._state = StartRecord - elif c==self._fieldSep: - self.saveField() - self._state = StartField - else: - self.addChar(c) # normal character - self._state = InField - - def inQuotedField(self, c): - if c=='"': - if self._doubleQuote: - self._state = QuoteInQuotedField - else: - self.saveField() # end of field - self._state = EndQuotedField - elif c=='\0': - self.addChar('\n') # end of line - else: - self.addChar(c) # normal character - - def quoteInQuotedField(self, c): - if c=='"': - self.addChar('"') # save "" as " - self._state = InQuotedField - elif c==self._fieldSep: - self.saveField() - self._state = StartField - elif c==' ' and self._stripWhitespace: - pass # skip it - elif c=='\0': - self.saveField() # end of line - self._state = StartRecord - else: - self._hadParseError = 1 # illegal - raise ParseError, '%s expected after "' % self._fieldSep - - def endQuotedField(self, c): - if c==self._fieldSep: # seen closing " on quoted field - self._state = StartField # wait for new field - elif c=='\0': - self._state = StartRecord # end of line - else: - self._hadParseError = 1 - raise ParseError, '%s expected after "' % self._fieldSep - - def saveField(self): - self._fields.append(''.join(self._field)) - self._field = [] - self.addChar = self._field.append - - -# Call the global function parse() if you like the default settings of the CSVParser -_parser = CSVParser() -parse = _parser.parse - - -import types -def joinCSVFields(fields): - """ - Returns a CSV record (eg a string) from a sequence of fields. - Fields containing commands (,) or double quotes (") are quoted - and double quotes are escaped (""). The terminating newline is - NOT included. - """ - newFields = [] - for field in fields: - assert type(field) is types.StringType - if field.find('"')!=-1: - newField = '"' + field.replace('"', '""') + '"' - elif field.find(',')!=-1: - newField = '"' + field + '"' - else: - newField = field - newFields.append(newField) - return ','.join(newFields) |