From 3332228c3626b3d06ede6d4cac1a78c65eae1b76 Mon Sep 17 00:00:00 2001 From: marco Date: Mon, 1 May 2017 20:22:18 +0000 Subject: --- Merging r34871 into '.': A packages/fcl-base/examples/databom.txt A packages/fcl-base/examples/csvbom.pp U packages/fcl-base/examples/README.txt U packages/fcl-base/src/csvreadwrite.pp --- Recording mergeinfo for merge of r34871 into '.': U . # revisions: 34871 git-svn-id: https://svn.freepascal.org/svn/fpc/branches/fixes_3_0@36049 3ad0048d-3df7-0310-abae-a5850022a9f2 --- packages/fcl-base/examples/README.txt | 1 + packages/fcl-base/examples/csvbom.pp | 53 ++++++++++++++++++++++++++++++++++ packages/fcl-base/examples/databom.txt | 2 ++ packages/fcl-base/src/csvreadwrite.pp | 31 ++++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 packages/fcl-base/examples/csvbom.pp create mode 100644 packages/fcl-base/examples/databom.txt diff --git a/packages/fcl-base/examples/README.txt b/packages/fcl-base/examples/README.txt index e6fdb1e2a6..4a89f33250 100644 --- a/packages/fcl-base/examples/README.txt +++ b/packages/fcl-base/examples/README.txt @@ -75,3 +75,4 @@ daemon.pp Test for daemonapp (MVC) testtimer.pp Test for TFPTimer (MVC) testini.pp Test/Demo for inifiles, ReadSectionValues. contit.pp Test/Demo for iterators in contnr.pp +csvbom.pp Test/Demo for BOM detection in CSV document. (needs databom.txt) diff --git a/packages/fcl-base/examples/csvbom.pp b/packages/fcl-base/examples/csvbom.pp new file mode 100644 index 0000000000..a16adf582a --- /dev/null +++ b/packages/fcl-base/examples/csvbom.pp @@ -0,0 +1,53 @@ +program csvbom; + +{$APPTYPE Console} +{$mode objfpc}{$H+} + +uses + sysutils, classes, dateutils, csvreadwrite; + +type + TDataRec = record + FDate: TDate; + FNumber: Integer; + FText: String; + end; + +const + FILENAME = 'databom.txt'; + +var + parser: TCSVParser; + stream: TFileStream; + data: array of TDataRec; + s: String; + i: Integer; +begin + parser := TCSVParser.Create; + try + parser.Delimiter := ','; + parser.DetectBOM := true; // uncomment for running with patched version + stream := TFileStream.Create(FILENAME, fmOpenRead); + parser.SetSource(stream); + SetLength(data, 0); + while parser.ParseNextCell do begin + if parser.CurrentRow > High(data) then + SetLength(data, parser.CurrentRow + 1); + s := parser.CurrentCellText; + case parser.CurrentCol of + 0: data[High(data)].FDate := ScanDateTime('yyyy-mm-dd', s); + 1: data[High(data)].FNumber := StrToInt(s); + 2: data[High(data)].FText := s; + end; + end; + + for i:=0 to High(data) do + WriteLn(DateToStr(data[i].FDate), '; ', data[i].FNumber, '; ', data[i].FText); + Writeln('Press enter to quit program'); + Readln; + finally + stream.Free; + parser.Free; + end; +end. + diff --git a/packages/fcl-base/examples/databom.txt b/packages/fcl-base/examples/databom.txt new file mode 100644 index 0000000000..4316a46865 --- /dev/null +++ b/packages/fcl-base/examples/databom.txt @@ -0,0 +1,2 @@ +2016-01-01,100,ABC +2016-01-02,110,DEF diff --git a/packages/fcl-base/src/csvreadwrite.pp b/packages/fcl-base/src/csvreadwrite.pp index 4c68f151ff..32f195d411 100644 --- a/packages/fcl-base/src/csvreadwrite.pp +++ b/packages/fcl-base/src/csvreadwrite.pp @@ -92,12 +92,16 @@ Type { TCSVParser } + TCSVByteOrderMark = (bomNone, bomUTF8, bomUTF16LE, bomUTF16BE); + TCSVParser = class(TCSVHandler) private FFreeStream: Boolean; // fields FSourceStream: TStream; FStrStreamWrapper: TStringStream; + FBOM: TCSVByteOrderMark; + FDetectBOM: Boolean; // parser state EndOfFile: Boolean; EndOfLine: Boolean; @@ -140,6 +144,10 @@ Type property MaxColCount: Integer read FMaxColCount; // Does the parser own the stream ? If true, a previous stream is freed when set or when parser is destroyed. Property FreeStream : Boolean Read FFreeStream Write FFreeStream; + // Return BOM found in file + property BOM: TCSVByteOrderMark read FBOM; + // Detect whether a BOM marker is present. If set to True, then BOM can be used to see what BOM marker there was. + property DetectBOM: Boolean read FDetectBOM write FDetectBOM default false; end; // Sequential output to CSV stream @@ -441,9 +449,32 @@ begin end; procedure TCSVParser.ResetParser; +var + b: packed array[0..2] of byte; + n: Integer; begin ClearOutput; FSourceStream.Seek(0, soFromBeginning); + if FDetectBOM then + begin + FSourceStream.ReadBuffer(b[0], 3); + if (b[0] = $EF) and (b[1] = $BB) and (b[2] = $BF) then begin + FBOM := bomUTF8; + n := 3; + end else + if (b[0] = $FE) and (b[1] = $FF) then begin + FBOM := bomUTF16BE; + n := 2; + end else + if (b[0] = $FF) and (b[1] = $FE) then begin + FBOM := bomUTF16LE; + n := 2; + end else begin + FBOM := bomNone; + n := 0; + end; + FSourceStream.Seek(n, soFromBeginning); + end; EndOfFile := False; NextChar; end; -- cgit v1.2.1