diff options
author | florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2008-08-30 18:42:37 +0000 |
---|---|---|
committer | florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2008-08-30 18:42:37 +0000 |
commit | 29aa69f6ce7e9d378226c73dcb33c7001fd059dc (patch) | |
tree | f7276b1eafdd63cd2b917a4a500aaaaa4550b8f0 | |
parent | f871415b717d7330d8435300c14a52de4c7b9ff9 (diff) | |
download | fpc-29aa69f6ce7e9d378226c73dcb33c7001fd059dc.tar.gz |
* more unicodestring stuff fixed, test results on win32 are already good
git-svn-id: http://svn.freepascal.org/svn/fpc/branches/unicodestring@11667 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r-- | compiler/ccharset.pas | 254 | ||||
-rw-r--r-- | compiler/cp1251.pas | 2 | ||||
-rw-r--r-- | compiler/cp437.pas | 2 | ||||
-rw-r--r-- | compiler/cp850.pas | 2 | ||||
-rw-r--r-- | compiler/cp866.pas | 2 | ||||
-rw-r--r-- | compiler/cp8859_1.pas | 2 | ||||
-rw-r--r-- | compiler/cp8859_5.pas | 2 | ||||
-rw-r--r-- | compiler/ncgcon.pas | 10 | ||||
-rw-r--r-- | compiler/options.pas | 2 | ||||
-rw-r--r-- | compiler/widestr.pas | 3 | ||||
-rw-r--r-- | rtl/win32/system.pp | 61 | ||||
-rw-r--r-- | tests/test/tunistr5.pp | 16 | ||||
-rw-r--r-- | tests/test/tunistr6.pp | 397 | ||||
-rw-r--r-- | tests/test/tunistr7.pp | 47 |
14 files changed, 775 insertions, 27 deletions
diff --git a/compiler/ccharset.pas b/compiler/ccharset.pas new file mode 100644 index 0000000000..258a0359d2 --- /dev/null +++ b/compiler/ccharset.pas @@ -0,0 +1,254 @@ +{ + This file is part of the Free Pascal run time library. + Copyright (c) 2000 by Florian Klaempfl + member of the Free Pascal development team. + + This unit implements several classes for charset conversions + + See the file COPYING.FPC, included in this distribution, + for details about the copyright. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + **********************************************************************} + +{ this unit is included temporarily for 2.2 bootstrapping and can be + removed after the next release after 2.2.2 } +{$mode objfpc} +unit ccharset; + + interface + + type + tunicodechar = word; + tunicodestring = ^tunicodechar; + + tcsconvert = class + // !!!!!!1constructor create; + end; + + tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined, + umf_unused); + + punicodecharmapping = ^tunicodecharmapping; + tunicodecharmapping = record + unicode : tunicodechar; + flag : tunicodecharmappingflag; + reserved : byte; + end; + + punicodemap = ^tunicodemap; + tunicodemap = record + cpname : string[20]; + map : punicodecharmapping; + lastchar : longint; + next : punicodemap; + internalmap : boolean; + end; + + tcp2unicode = class(tcsconvert) + end; + + function loadunicodemapping(const cpname,f : string) : punicodemap; + procedure registermapping(p : punicodemap); + function getmap(const s : string) : punicodemap; + function mappingavailable(const s : string) : boolean; + function getunicode(c : char;p : punicodemap) : tunicodechar; + function getascii(c : tunicodechar;p : punicodemap) : string; + + implementation + + var + mappings : punicodemap; + + function loadunicodemapping(const cpname,f : string) : punicodemap; + + var + data : punicodecharmapping; + datasize : longint; + t : text; + s,hs : string; + scanpos,charpos,unicodevalue : longint; + code : word; + flag : tunicodecharmappingflag; + p : punicodemap; + lastchar : longint; + + begin + lastchar:=-1; + loadunicodemapping:=nil; + datasize:=256; + getmem(data,sizeof(tunicodecharmapping)*datasize); + assign(t,f); + {$I-} + reset(t); + {$I+} + if ioresult<>0 then + begin + freemem(data,sizeof(tunicodecharmapping)*datasize); + exit; + end; + while not(eof(t)) do + begin + readln(t,s); + if (s[1]='0') and (s[2]='x') then + begin + flag:=umf_unused; + scanpos:=3; + hs:='$'; + while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do + begin + hs:=hs+s[scanpos]; + inc(scanpos); + end; + val(hs,charpos,code); + if code<>0 then + begin + freemem(data,sizeof(tunicodecharmapping)*datasize); + close(t); + exit; + end; + while not(s[scanpos] in ['0','#']) do + inc(scanpos); + if s[scanpos]='#' then + begin + { special char } + unicodevalue:=$ffff; + hs:=copy(s,scanpos,length(s)-scanpos+1); + if hs='#DBCS LEAD BYTE' then + flag:=umf_leadbyte; + end + else + begin + { C hex prefix } + inc(scanpos,2); + hs:='$'; + while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do + begin + hs:=hs+s[scanpos]; + inc(scanpos); + end; + val(hs,unicodevalue,code); + if code<>0 then + begin + freemem(data,sizeof(tunicodecharmapping)*datasize); + close(t); + exit; + end; + if charpos>datasize then + begin + { allocate 1024 bytes more because } + { if we need more than 256 entries it's } + { probably a mbcs with a lot of } + { entries } + datasize:=charpos+1024; + reallocmem(data,sizeof(tunicodecharmapping)*datasize); + end; + flag:=umf_noinfo; + end; + data[charpos].flag:=flag; + data[charpos].unicode:=unicodevalue; + if charpos>lastchar then + lastchar:=charpos; + end; + end; + close(t); + new(p); + p^.lastchar:=lastchar; + p^.cpname:=cpname; + p^.internalmap:=false; + p^.next:=nil; + p^.map:=data; + loadunicodemapping:=p; + end; + + procedure registermapping(p : punicodemap); + + begin + p^.next:=mappings; + mappings:=p; + end; + + function getmap(const s : string) : punicodemap; + + var + hp : punicodemap; + + const + mapcache : string = ''; + mapcachep : punicodemap = nil; + + begin + if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then + begin + getmap:=mapcachep; + exit; + end; + hp:=mappings; + while assigned(hp) do + begin + if hp^.cpname=s then + begin + getmap:=hp; + mapcache:=s; + mapcachep:=hp; + exit; + end; + hp:=hp^.next; + end; + getmap:=nil; + end; + + function mappingavailable(const s : string) : boolean; + + begin + mappingavailable:=getmap(s)<>nil; + end; + + function getunicode(c : char;p : punicodemap) : tunicodechar; + + begin + if ord(c)<=p^.lastchar then + getunicode:=p^.map[ord(c)].unicode + else + getunicode:=0; + end; + + function getascii(c : tunicodechar;p : punicodemap) : string; + + var + i : longint; + + begin + { at least map to space } + getascii:=#32; + for i:=0 to p^.lastchar do + if p^.map[i].unicode=c then + begin + if i<256 then + getascii:=chr(i) + else + getascii:=chr(i div 256)+chr(i mod 256); + exit; + end; + end; + + var + hp : punicodemap; + +initialization + mappings:=nil; +finalization + while assigned(mappings) do + begin + hp:=mappings^.next; + if not(mappings^.internalmap) then + begin + freemem(mappings^.map); + dispose(mappings); + end; + mappings:=hp; + end; +end. diff --git a/compiler/cp1251.pas b/compiler/cp1251.pas index a2534fb531..b5f99074ea 100644 --- a/compiler/cp1251.pas +++ b/compiler/cp1251.pas @@ -6,7 +6,7 @@ unit cp1251; implementation uses - charset; + {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2}; const map : array[0..255] of tunicodecharmapping = ( diff --git a/compiler/cp437.pas b/compiler/cp437.pas index a8ca25d3d3..144c854fcd 100644 --- a/compiler/cp437.pas +++ b/compiler/cp437.pas @@ -6,7 +6,7 @@ unit cp437; implementation uses - charset; + {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2}; const map : array[0..255] of tunicodecharmapping = ( diff --git a/compiler/cp850.pas b/compiler/cp850.pas index 0a47f68268..51b3a3d1c8 100644 --- a/compiler/cp850.pas +++ b/compiler/cp850.pas @@ -6,7 +6,7 @@ unit cp850; implementation uses - charset; + {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2}; const map : array[0..255] of tunicodecharmapping = ( diff --git a/compiler/cp866.pas b/compiler/cp866.pas index 435528b133..1279ae51a1 100644 --- a/compiler/cp866.pas +++ b/compiler/cp866.pas @@ -6,7 +6,7 @@ unit cp866; implementation uses - charset; + {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2}; const map : array[0..255] of tunicodecharmapping = ( diff --git a/compiler/cp8859_1.pas b/compiler/cp8859_1.pas index 5b01d1664b..ae36fe3c18 100644 --- a/compiler/cp8859_1.pas +++ b/compiler/cp8859_1.pas @@ -6,7 +6,7 @@ unit cp8859_1; implementation uses - charset; + {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2}; const map : array[0..255] of tunicodecharmapping = ( diff --git a/compiler/cp8859_5.pas b/compiler/cp8859_5.pas index e859ff0963..cc8f0c27f7 100644 --- a/compiler/cp8859_5.pas +++ b/compiler/cp8859_5.pas @@ -6,7 +6,7 @@ unit cp8859_5; implementation uses - charset; + {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2}; const map : array[0..255] of tunicodecharmapping = ( diff --git a/compiler/ncgcon.pas b/compiler/ncgcon.pas index ffcea0bd2e..11bc914709 100644 --- a/compiler/ncgcon.pas +++ b/compiler/ncgcon.pas @@ -270,7 +270,7 @@ implementation pooltype: TConstPoolType; pool: THashSet; entry: PHashSetItem; - + const PoolMap: array[tconststringtype] of TConstPoolType = ( sp_conststr, @@ -295,7 +295,7 @@ implementation if current_asmdata.ConstPools[pooltype] = nil then current_asmdata.ConstPools[pooltype] := THashSet.Create(64, True, False); pool := current_asmdata.ConstPools[pooltype]; - + if cst_type in [cst_widestring, cst_unicodestring] then entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data, len*cwidechartype.size) else @@ -402,7 +402,7 @@ implementation end; end; end; - if cst_type in [cst_ansistring, cst_widestring] then + if cst_type in [cst_ansistring, cst_widestring, cst_unicodestring] then begin location_reset(location, LOC_REGISTER, OS_ADDR); reference_reset_symbol(href, lab_str, 0); @@ -412,8 +412,8 @@ implementation else begin location_reset(location, LOC_CREFERENCE, def_cgsize(resultdef)); - location.reference.symbol:=lab_str; - end; + location.reference.symbol:=lab_str; + end; end; diff --git a/compiler/options.pas b/compiler/options.pas index b1f6eef45b..ae804c51cb 100644 --- a/compiler/options.pas +++ b/compiler/options.pas @@ -71,7 +71,7 @@ implementation uses widestr, - charset, + {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2}, SysUtils, version, cutils,cmsgs, diff --git a/compiler/widestr.pas b/compiler/widestr.pas index 1c0c0d634d..4747e655b6 100644 --- a/compiler/widestr.pas +++ b/compiler/widestr.pas @@ -28,8 +28,7 @@ unit widestr; interface uses - charset,globtype - ; + {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2},globtype; type diff --git a/rtl/win32/system.pp b/rtl/win32/system.pp index 15cd5a8574..c60c63c5f4 100644 --- a/rtl/win32/system.pp +++ b/rtl/win32/system.pp @@ -899,10 +899,6 @@ end; {$endif Set_i386_Exception_handler} -{**************************************************************************** - OS dependend widestrings -****************************************************************************} - const { MultiByteToWideChar } MB_PRECOMPOSED = 1; @@ -918,6 +914,9 @@ function CharUpperBuff(lpsz:LPWSTR; cchLength:DWORD):DWORD; function CharLowerBuff(lpsz:LPWSTR; cchLength:DWORD):DWORD; stdcall; external 'user32' name 'CharLowerBuffW'; +{****************************************************************************** + Widestring + ******************************************************************************} procedure Win32Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt); var @@ -947,7 +946,6 @@ procedure Win32Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt); function Win32WideUpper(const s : WideString) : WideString; begin result:=s; - UniqueString(result); if length(result)>0 then CharUpperBuff(LPWSTR(result),length(result)); end; @@ -956,6 +954,51 @@ function Win32WideUpper(const s : WideString) : WideString; function Win32WideLower(const s : WideString) : WideString; begin result:=s; + if length(result)>0 then + CharLowerBuff(LPWSTR(result),length(result)); + end; + +{****************************************************************************** + Unicode + ******************************************************************************} + +procedure Win32Unicode2AnsiMove(source:punicodechar;var dest:ansistring;len:SizeInt); + var + destlen: SizeInt; + begin + // retrieve length including trailing #0 + // not anymore, because this must also be usable for single characters + destlen:=WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS, source, len, nil, 0, nil, nil); + // this will null-terminate + setlength(dest, destlen); + WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS, source, len, @dest[1], destlen, nil, nil); + end; + +procedure Win32Ansi2UnicodeMove(source:pchar;var dest:UnicodeString;len:SizeInt); + var + destlen: SizeInt; + begin + // retrieve length including trailing #0 + // not anymore, because this must also be usable for single characters + destlen:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, nil, 0); + // this will null-terminate + setlength(dest, destlen); + MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, @dest[1], destlen); + end; + + +function Win32UnicodeUpper(const s : UnicodeString) : UnicodeString; + begin + result:=s; + UniqueString(result); + if length(result)>0 then + CharUpperBuff(LPWSTR(result),length(result)); + end; + + +function Win32UnicodeLower(const s : UnicodeString) : UnicodeString; + begin + result:=s; UniqueString(result); if length(result)>0 then CharLowerBuff(LPWSTR(result),length(result)); @@ -966,10 +1009,17 @@ function Win32WideLower(const s : WideString) : WideString; are only relevant for the sysutils units } procedure InitWin32Widestrings; begin + { Widestring } widestringmanager.Wide2AnsiMoveProc:=@Win32Wide2AnsiMove; widestringmanager.Ansi2WideMoveProc:=@Win32Ansi2WideMove; widestringmanager.UpperWideStringProc:=@Win32WideUpper; widestringmanager.LowerWideStringProc:=@Win32WideLower; + + { Unicode } + widestringmanager.Unicode2AnsiMoveProc:=@Win32Unicode2AnsiMove; + widestringmanager.Ansi2UnicodeMoveProc:=@Win32Ansi2UnicodeMove; + widestringmanager.UpperUnicodeStringProc:=@Win32UnicodeUpper; + widestringmanager.LowerUnicodeStringProc:=@Win32UnicodeLower; end; @@ -1198,3 +1248,4 @@ begin InitWin32Widestrings; DispCallByIDProc:=@DoDispCallByIDError; end. + diff --git a/tests/test/tunistr5.pp b/tests/test/tunistr5.pp index 0a62ed5484..b09a333a54 100644 --- a/tests/test/tunistr5.pp +++ b/tests/test/tunistr5.pp @@ -20,18 +20,18 @@ begin (ws[7]<>#$d87e) or (ws[8]<>#$dc04) then halt(1); - us:=WideStringToUCS4String(ws); + us:=UnicodeStringToUCS4String(ws); if (length(us)<>8) or - (us[0]<>UCS4Char(widechar('é'))) or - (us[1]<>UCS4Char(widechar('ł'))) or - (us[2]<>UCS4Char(widechar('Ł'))) or - (us[3]<>UCS4Char(widechar('ć'))) or - (us[4]<>UCS4Char(widechar('ç'))) or - (us[5]<>UCS4Char(widechar('Ź'))) or + (us[0]<>UCS4Char(unicodechar('é'))) or + (us[1]<>UCS4Char(unicodechar('ł'))) or + (us[2]<>UCS4Char(unicodechar('Ł'))) or + (us[3]<>UCS4Char(unicodechar('ć'))) or + (us[4]<>UCS4Char(unicodechar('ç'))) or + (us[5]<>UCS4Char(unicodechar('Ź'))) or (us[6]<>UCS4Char($2F804)) or (us[7]<>UCS4Char(0)) then halt(2); - ws:=UCS4StringToWideString(us); + ws:=UCS4StringToUnicodeString(us); if (length(ws)<>8) or (ws[1]<>'é') or (ws[2]<>'ł') or diff --git a/tests/test/tunistr6.pp b/tests/test/tunistr6.pp new file mode 100644 index 0000000000..033892d172 --- /dev/null +++ b/tests/test/tunistr6.pp @@ -0,0 +1,397 @@ +{%skiptarget=wince} +{$codepage utf-8} +uses + {$ifdef unix} + cwstring, + {$endif} + sysutils; + +procedure doerror(i : integer); + begin + writeln('Error: ',i); + halt(i); + end; + + +{ normal upper case testing } +procedure testupper; +var + s: ansistring; + w1,w2,w3,w4: unicodestring; + i: longint; +begin + w1:='aé'#0'èàł'#$d87e#$dc04; + w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original upper: ',w2); +{$endif print} + s:=w1; +{$ifdef print} + writeln('ansi: ',s); +{$endif print} + w3:=s; + w4:=AnsiUpperCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeUpperCase(w1); +{$ifdef print} + writeln('unicodeupper: ',w1); + writeln('original upper: ',w2); + writeln('ansiupper: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(1); + if (w4 <> w2) then + doerror(2); + + w1:='aéèàł'#$d87e#$dc04; + w2:='AÉÈÀŁ'#$d87e#$dc04; + s:=w1; + w3:=s; + w4:=AnsiStrUpper(pchar(s)); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeUpperCase(w1); +{$ifdef print} + writeln('unicodeupper: ',w1); + writeln('ansistrupper: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(21); + if (w4 <> w2) then + doerror(22); + +end; + + +{ normal lower case testing } +procedure testlower; +var + s: ansistring; + w1,w2,w3,w4: unicodestring; + i: longint; +begin + w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04; + w2:='aé'#0'èàł'#$d87e#$dc04; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original lower: ',w2); +{$endif print} + s:=w1; + w3:=s; + w4:=AnsiLowerCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeLowerCase(w1); +{$ifdef print} + writeln('unicodelower: ',w1); + writeln('ansilower: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(3); + if (w4 <> w2) then + doerror(4); + + + w1:='AÉÈÀŁ'#$d87e#$dc04; + w2:='aéèàł'#$d87e#$dc04; + s:=w1; + w3:=s; + w4:=AnsiStrLower(pchar(s)); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeLowerCase(w1); +{$ifdef print} + writeln('unicodelower: ',w1); + writeln('ansistrlower: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(3); + if (w4 <> w2) then + doerror(4); +end; + + + +{ upper case testing with a missing utf-16 pair at the end } +procedure testupperinvalid; +var + s: ansistring; + w1,w2,w3,w4: unicodestring; + i: longint; +begin + { missing utf-16 pair at end } + w1:='aé'#0'èàł'#$d87e; + w2:='AÉ'#0'ÈÀŁ'#$d87e; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original upper: ',w2); +{$endif print} + s:=w1; + w3:=s; + w4:=AnsiUpperCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeUpperCase(w1); +{$ifdef print} + writeln('unicodeupper: ',w1); + writeln('ansiupper: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(5); + if (w4 <> w2) then + doerror(6); +end; + + +{ lower case testing with a missing utf-16 pair at the end } +procedure testlowerinvalid; +var + s: ansistring; + w1,w2,w3,w4: unicodestring; + i: longint; +begin + { missing utf-16 pair at end} + w1:='AÉ'#0'ÈÀŁ'#$d87e; + w2:='aé'#0'èàł'#$d87e; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original lower: ',w2); +{$endif print} + s:=w1; + w3:=s; + w4:=AnsiLowerCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeLowerCase(w1); +{$ifdef print} + writeln('unicodelower: ',w1); + writeln('ansilower: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(7); + if (w4 <> w2) then + doerror(8); +end; + + + +{ upper case testing with a missing utf-16 pair at the end, followed by a normal char } +procedure testupperinvalid1; +var + s: ansistring; + w1,w2,w3,w4: unicodestring; + i: longint; +begin + { missing utf-16 pair at end with char after it} + w1:='aé'#0'èàł'#$d87e'j'; + w2:='AÉ'#0'ÈÀŁ'#$d87e'J'; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original upper: ',w2); +{$endif print} + s:=w1; + w3:=s; + w4:=AnsiUpperCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeUpperCase(w1); +{$ifdef print} + writeln('unicodeupper: ',w1); + writeln('ansiupper: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(9); + if (w4 <> w2) then + doerror(10); +end; + + +{ lower case testing with a missing utf-16 pair at the end, followed by a normal char } +procedure testlowerinvalid1; +var + s: ansistring; + w1,w2,w3,w4: unicodestring; + i: longint; +begin + { missing utf-16 pair at end with char after it} + w1:='AÉ'#0'ÈÀŁ'#$d87e'J'; + w2:='aé'#0'èàł'#$d87e'j'; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original lower: ',w2); +{$endif print} + s:=w1; + w3:=s; + w4:=AnsiLowerCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeLowerCase(w1); +{$ifdef print} + writeln('unicodelower: ',w1); + writeln('ansilower: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(11); + if (w4 <> w2) then + doerror(12); +end; + + +{ upper case testing with corrupting the utf-8 string after conversion } +procedure testupperinvalid2; +var + s: ansistring; + w1,w2,w3,w4: unicodestring; + i: longint; +begin + w1:='aé'#0'èàł'#$d87e#$dc04'ö'; + w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö'; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original upper: ',w2); +{$endif print} + s:=w1; + { truncate the last utf-8 character } + setlength(s,length(s)-1); + w3:=s; + { adjust checking values for new length due to corruption } + if length(w3)<>length(w2) then + begin + setlength(w2,length(w3)); + setlength(w1,length(w3)); + end; + w4:=AnsiUpperCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeUpperCase(w1); +{$ifdef print} + writeln('unicodeupper: ',w1); + writeln('ansiupper: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(13); + if (w4 <> w2) then + doerror(14); +end; + + +{ lower case testing with corrupting the utf-8 string after conversion } +procedure testlowerinvalid2; +var + s: ansistring; + w1,w2,w3,w4: unicodestring; + i: longint; +begin + w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö'; + w2:='aé'#0'èàł'#$d87e#$dc04'ö'; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original lower: ',w2); +{$endif print} + s:=w1; + { truncate the last utf-8 character } + setlength(s,length(s)-1); + w3:=s; + { adjust checking values for new length due to corruption } + if length(w3)<>length(w2) then + begin + setlength(w2,length(w3)); + setlength(w1,length(w3)); + end; + w4:=AnsiLowerCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=UnicodeLowerCase(w1); +{$ifdef print} + writeln('unicodelower: ',w1); + writeln('ansilower: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(15); + if (w4 <> w2) then + doerror(16); +end; + + + +begin + testupper; + writeln; + testlower; + writeln; + writeln; + testupperinvalid; + writeln; + testlowerinvalid; + writeln; + writeln; + testupperinvalid1; + writeln; + testlowerinvalid1; + writeln; + writeln; + testupperinvalid2; + writeln; + testlowerinvalid2; + writeln('ok'); +end. diff --git a/tests/test/tunistr7.pp b/tests/test/tunistr7.pp new file mode 100644 index 0000000000..c7623793d8 --- /dev/null +++ b/tests/test/tunistr7.pp @@ -0,0 +1,47 @@ +{$codepage utf-8} + +uses +{$ifdef unix} + cwstring, +{$endif unix} + sysutils; + +procedure testwcmp; +var + w1,w2: unicodestring; + s: ansistring; +begin + w1:='aécde'; + { filter unsupported characters } + s:=w1; + w1:=s; + w2:=w1; + + if (w1<>w2) then + halt(1); + w1[2]:='f'; + if (w1=w2) or + WideSameStr(w1,w2) or + (WideCompareText(w1,w2)=0) or + (WideCompareStr(w1,w2)<0) or + (WideCompareStr(w2,w1)>0) then + halt(2); + w1[2]:=#0; + w2[2]:=#0; + if (w1<>w2) or + not WideSameStr(w1,w2) or + (WideCompareStr(w1,w2)<>0) or + (WideCompareText(w1,w2)<>0) then + halt(3); + w1[3]:='m'; + if WideSameStr(w1,w2) or + (WideCompareText(w1,w2)=0) or + (WideCompareStr(w1,w2)<0) or + (WideCompareStr(w2,w1)>0) then + halt(4); +end; + + +begin + testwcmp; +end. |