summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorflorian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>2008-08-30 18:42:37 +0000
committerflorian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>2008-08-30 18:42:37 +0000
commit29aa69f6ce7e9d378226c73dcb33c7001fd059dc (patch)
treef7276b1eafdd63cd2b917a4a500aaaaa4550b8f0
parentf871415b717d7330d8435300c14a52de4c7b9ff9 (diff)
downloadfpc-29aa69f6ce7e9d378226c73dcb33c7001fd059dc.tar.gz
* more unicodestring stuff fixed, test results on win32 are already good
git-svn-id: http://svn.freepascal.org/svn/fpc/branches/unicodestring@11667 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r--compiler/ccharset.pas254
-rw-r--r--compiler/cp1251.pas2
-rw-r--r--compiler/cp437.pas2
-rw-r--r--compiler/cp850.pas2
-rw-r--r--compiler/cp866.pas2
-rw-r--r--compiler/cp8859_1.pas2
-rw-r--r--compiler/cp8859_5.pas2
-rw-r--r--compiler/ncgcon.pas10
-rw-r--r--compiler/options.pas2
-rw-r--r--compiler/widestr.pas3
-rw-r--r--rtl/win32/system.pp61
-rw-r--r--tests/test/tunistr5.pp16
-rw-r--r--tests/test/tunistr6.pp397
-rw-r--r--tests/test/tunistr7.pp47
14 files changed, 775 insertions, 27 deletions
diff --git a/compiler/ccharset.pas b/compiler/ccharset.pas
new file mode 100644
index 0000000000..258a0359d2
--- /dev/null
+++ b/compiler/ccharset.pas
@@ -0,0 +1,254 @@
+{
+ This file is part of the Free Pascal run time library.
+ Copyright (c) 2000 by Florian Klaempfl
+ member of the Free Pascal development team.
+
+ This unit implements several classes for charset conversions
+
+ See the file COPYING.FPC, included in this distribution,
+ for details about the copyright.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+{ this unit is included temporarily for 2.2 bootstrapping and can be
+ removed after the next release after 2.2.2 }
+{$mode objfpc}
+unit ccharset;
+
+ interface
+
+ type
+ tunicodechar = word;
+ tunicodestring = ^tunicodechar;
+
+ tcsconvert = class
+ // !!!!!!1constructor create;
+ end;
+
+ tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
+ umf_unused);
+
+ punicodecharmapping = ^tunicodecharmapping;
+ tunicodecharmapping = record
+ unicode : tunicodechar;
+ flag : tunicodecharmappingflag;
+ reserved : byte;
+ end;
+
+ punicodemap = ^tunicodemap;
+ tunicodemap = record
+ cpname : string[20];
+ map : punicodecharmapping;
+ lastchar : longint;
+ next : punicodemap;
+ internalmap : boolean;
+ end;
+
+ tcp2unicode = class(tcsconvert)
+ end;
+
+ function loadunicodemapping(const cpname,f : string) : punicodemap;
+ procedure registermapping(p : punicodemap);
+ function getmap(const s : string) : punicodemap;
+ function mappingavailable(const s : string) : boolean;
+ function getunicode(c : char;p : punicodemap) : tunicodechar;
+ function getascii(c : tunicodechar;p : punicodemap) : string;
+
+ implementation
+
+ var
+ mappings : punicodemap;
+
+ function loadunicodemapping(const cpname,f : string) : punicodemap;
+
+ var
+ data : punicodecharmapping;
+ datasize : longint;
+ t : text;
+ s,hs : string;
+ scanpos,charpos,unicodevalue : longint;
+ code : word;
+ flag : tunicodecharmappingflag;
+ p : punicodemap;
+ lastchar : longint;
+
+ begin
+ lastchar:=-1;
+ loadunicodemapping:=nil;
+ datasize:=256;
+ getmem(data,sizeof(tunicodecharmapping)*datasize);
+ assign(t,f);
+ {$I-}
+ reset(t);
+ {$I+}
+ if ioresult<>0 then
+ begin
+ freemem(data,sizeof(tunicodecharmapping)*datasize);
+ exit;
+ end;
+ while not(eof(t)) do
+ begin
+ readln(t,s);
+ if (s[1]='0') and (s[2]='x') then
+ begin
+ flag:=umf_unused;
+ scanpos:=3;
+ hs:='$';
+ while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
+ begin
+ hs:=hs+s[scanpos];
+ inc(scanpos);
+ end;
+ val(hs,charpos,code);
+ if code<>0 then
+ begin
+ freemem(data,sizeof(tunicodecharmapping)*datasize);
+ close(t);
+ exit;
+ end;
+ while not(s[scanpos] in ['0','#']) do
+ inc(scanpos);
+ if s[scanpos]='#' then
+ begin
+ { special char }
+ unicodevalue:=$ffff;
+ hs:=copy(s,scanpos,length(s)-scanpos+1);
+ if hs='#DBCS LEAD BYTE' then
+ flag:=umf_leadbyte;
+ end
+ else
+ begin
+ { C hex prefix }
+ inc(scanpos,2);
+ hs:='$';
+ while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
+ begin
+ hs:=hs+s[scanpos];
+ inc(scanpos);
+ end;
+ val(hs,unicodevalue,code);
+ if code<>0 then
+ begin
+ freemem(data,sizeof(tunicodecharmapping)*datasize);
+ close(t);
+ exit;
+ end;
+ if charpos>datasize then
+ begin
+ { allocate 1024 bytes more because }
+ { if we need more than 256 entries it's }
+ { probably a mbcs with a lot of }
+ { entries }
+ datasize:=charpos+1024;
+ reallocmem(data,sizeof(tunicodecharmapping)*datasize);
+ end;
+ flag:=umf_noinfo;
+ end;
+ data[charpos].flag:=flag;
+ data[charpos].unicode:=unicodevalue;
+ if charpos>lastchar then
+ lastchar:=charpos;
+ end;
+ end;
+ close(t);
+ new(p);
+ p^.lastchar:=lastchar;
+ p^.cpname:=cpname;
+ p^.internalmap:=false;
+ p^.next:=nil;
+ p^.map:=data;
+ loadunicodemapping:=p;
+ end;
+
+ procedure registermapping(p : punicodemap);
+
+ begin
+ p^.next:=mappings;
+ mappings:=p;
+ end;
+
+ function getmap(const s : string) : punicodemap;
+
+ var
+ hp : punicodemap;
+
+ const
+ mapcache : string = '';
+ mapcachep : punicodemap = nil;
+
+ begin
+ if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
+ begin
+ getmap:=mapcachep;
+ exit;
+ end;
+ hp:=mappings;
+ while assigned(hp) do
+ begin
+ if hp^.cpname=s then
+ begin
+ getmap:=hp;
+ mapcache:=s;
+ mapcachep:=hp;
+ exit;
+ end;
+ hp:=hp^.next;
+ end;
+ getmap:=nil;
+ end;
+
+ function mappingavailable(const s : string) : boolean;
+
+ begin
+ mappingavailable:=getmap(s)<>nil;
+ end;
+
+ function getunicode(c : char;p : punicodemap) : tunicodechar;
+
+ begin
+ if ord(c)<=p^.lastchar then
+ getunicode:=p^.map[ord(c)].unicode
+ else
+ getunicode:=0;
+ end;
+
+ function getascii(c : tunicodechar;p : punicodemap) : string;
+
+ var
+ i : longint;
+
+ begin
+ { at least map to space }
+ getascii:=#32;
+ for i:=0 to p^.lastchar do
+ if p^.map[i].unicode=c then
+ begin
+ if i<256 then
+ getascii:=chr(i)
+ else
+ getascii:=chr(i div 256)+chr(i mod 256);
+ exit;
+ end;
+ end;
+
+ var
+ hp : punicodemap;
+
+initialization
+ mappings:=nil;
+finalization
+ while assigned(mappings) do
+ begin
+ hp:=mappings^.next;
+ if not(mappings^.internalmap) then
+ begin
+ freemem(mappings^.map);
+ dispose(mappings);
+ end;
+ mappings:=hp;
+ end;
+end.
diff --git a/compiler/cp1251.pas b/compiler/cp1251.pas
index a2534fb531..b5f99074ea 100644
--- a/compiler/cp1251.pas
+++ b/compiler/cp1251.pas
@@ -6,7 +6,7 @@ unit cp1251;
implementation
uses
- charset;
+ {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
const
map : array[0..255] of tunicodecharmapping = (
diff --git a/compiler/cp437.pas b/compiler/cp437.pas
index a8ca25d3d3..144c854fcd 100644
--- a/compiler/cp437.pas
+++ b/compiler/cp437.pas
@@ -6,7 +6,7 @@ unit cp437;
implementation
uses
- charset;
+ {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
const
map : array[0..255] of tunicodecharmapping = (
diff --git a/compiler/cp850.pas b/compiler/cp850.pas
index 0a47f68268..51b3a3d1c8 100644
--- a/compiler/cp850.pas
+++ b/compiler/cp850.pas
@@ -6,7 +6,7 @@ unit cp850;
implementation
uses
- charset;
+ {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
const
map : array[0..255] of tunicodecharmapping = (
diff --git a/compiler/cp866.pas b/compiler/cp866.pas
index 435528b133..1279ae51a1 100644
--- a/compiler/cp866.pas
+++ b/compiler/cp866.pas
@@ -6,7 +6,7 @@ unit cp866;
implementation
uses
- charset;
+ {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
const
map : array[0..255] of tunicodecharmapping = (
diff --git a/compiler/cp8859_1.pas b/compiler/cp8859_1.pas
index 5b01d1664b..ae36fe3c18 100644
--- a/compiler/cp8859_1.pas
+++ b/compiler/cp8859_1.pas
@@ -6,7 +6,7 @@ unit cp8859_1;
implementation
uses
- charset;
+ {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
const
map : array[0..255] of tunicodecharmapping = (
diff --git a/compiler/cp8859_5.pas b/compiler/cp8859_5.pas
index e859ff0963..cc8f0c27f7 100644
--- a/compiler/cp8859_5.pas
+++ b/compiler/cp8859_5.pas
@@ -6,7 +6,7 @@ unit cp8859_5;
implementation
uses
- charset;
+ {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2};
const
map : array[0..255] of tunicodecharmapping = (
diff --git a/compiler/ncgcon.pas b/compiler/ncgcon.pas
index ffcea0bd2e..11bc914709 100644
--- a/compiler/ncgcon.pas
+++ b/compiler/ncgcon.pas
@@ -270,7 +270,7 @@ implementation
pooltype: TConstPoolType;
pool: THashSet;
entry: PHashSetItem;
-
+
const
PoolMap: array[tconststringtype] of TConstPoolType = (
sp_conststr,
@@ -295,7 +295,7 @@ implementation
if current_asmdata.ConstPools[pooltype] = nil then
current_asmdata.ConstPools[pooltype] := THashSet.Create(64, True, False);
pool := current_asmdata.ConstPools[pooltype];
-
+
if cst_type in [cst_widestring, cst_unicodestring] then
entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data, len*cwidechartype.size)
else
@@ -402,7 +402,7 @@ implementation
end;
end;
end;
- if cst_type in [cst_ansistring, cst_widestring] then
+ if cst_type in [cst_ansistring, cst_widestring, cst_unicodestring] then
begin
location_reset(location, LOC_REGISTER, OS_ADDR);
reference_reset_symbol(href, lab_str, 0);
@@ -412,8 +412,8 @@ implementation
else
begin
location_reset(location, LOC_CREFERENCE, def_cgsize(resultdef));
- location.reference.symbol:=lab_str;
- end;
+ location.reference.symbol:=lab_str;
+ end;
end;
diff --git a/compiler/options.pas b/compiler/options.pas
index b1f6eef45b..ae804c51cb 100644
--- a/compiler/options.pas
+++ b/compiler/options.pas
@@ -71,7 +71,7 @@ implementation
uses
widestr,
- charset,
+ {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2},
SysUtils,
version,
cutils,cmsgs,
diff --git a/compiler/widestr.pas b/compiler/widestr.pas
index 1c0c0d634d..4747e655b6 100644
--- a/compiler/widestr.pas
+++ b/compiler/widestr.pas
@@ -28,8 +28,7 @@ unit widestr;
interface
uses
- charset,globtype
- ;
+ {$ifdef VER2_2}ccharset{$else VER2_2}charset{$endif VER2_2},globtype;
type
diff --git a/rtl/win32/system.pp b/rtl/win32/system.pp
index 15cd5a8574..c60c63c5f4 100644
--- a/rtl/win32/system.pp
+++ b/rtl/win32/system.pp
@@ -899,10 +899,6 @@ end;
{$endif Set_i386_Exception_handler}
-{****************************************************************************
- OS dependend widestrings
-****************************************************************************}
-
const
{ MultiByteToWideChar }
MB_PRECOMPOSED = 1;
@@ -918,6 +914,9 @@ function CharUpperBuff(lpsz:LPWSTR; cchLength:DWORD):DWORD;
function CharLowerBuff(lpsz:LPWSTR; cchLength:DWORD):DWORD;
stdcall; external 'user32' name 'CharLowerBuffW';
+{******************************************************************************
+ Widestring
+ ******************************************************************************}
procedure Win32Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
var
@@ -947,7 +946,6 @@ procedure Win32Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
function Win32WideUpper(const s : WideString) : WideString;
begin
result:=s;
- UniqueString(result);
if length(result)>0 then
CharUpperBuff(LPWSTR(result),length(result));
end;
@@ -956,6 +954,51 @@ function Win32WideUpper(const s : WideString) : WideString;
function Win32WideLower(const s : WideString) : WideString;
begin
result:=s;
+ if length(result)>0 then
+ CharLowerBuff(LPWSTR(result),length(result));
+ end;
+
+{******************************************************************************
+ Unicode
+ ******************************************************************************}
+
+procedure Win32Unicode2AnsiMove(source:punicodechar;var dest:ansistring;len:SizeInt);
+ var
+ destlen: SizeInt;
+ begin
+ // retrieve length including trailing #0
+ // not anymore, because this must also be usable for single characters
+ destlen:=WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS, source, len, nil, 0, nil, nil);
+ // this will null-terminate
+ setlength(dest, destlen);
+ WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS, source, len, @dest[1], destlen, nil, nil);
+ end;
+
+procedure Win32Ansi2UnicodeMove(source:pchar;var dest:UnicodeString;len:SizeInt);
+ var
+ destlen: SizeInt;
+ begin
+ // retrieve length including trailing #0
+ // not anymore, because this must also be usable for single characters
+ destlen:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, nil, 0);
+ // this will null-terminate
+ setlength(dest, destlen);
+ MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, @dest[1], destlen);
+ end;
+
+
+function Win32UnicodeUpper(const s : UnicodeString) : UnicodeString;
+ begin
+ result:=s;
+ UniqueString(result);
+ if length(result)>0 then
+ CharUpperBuff(LPWSTR(result),length(result));
+ end;
+
+
+function Win32UnicodeLower(const s : UnicodeString) : UnicodeString;
+ begin
+ result:=s;
UniqueString(result);
if length(result)>0 then
CharLowerBuff(LPWSTR(result),length(result));
@@ -966,10 +1009,17 @@ function Win32WideLower(const s : WideString) : WideString;
are only relevant for the sysutils units }
procedure InitWin32Widestrings;
begin
+ { Widestring }
widestringmanager.Wide2AnsiMoveProc:=@Win32Wide2AnsiMove;
widestringmanager.Ansi2WideMoveProc:=@Win32Ansi2WideMove;
widestringmanager.UpperWideStringProc:=@Win32WideUpper;
widestringmanager.LowerWideStringProc:=@Win32WideLower;
+
+ { Unicode }
+ widestringmanager.Unicode2AnsiMoveProc:=@Win32Unicode2AnsiMove;
+ widestringmanager.Ansi2UnicodeMoveProc:=@Win32Ansi2UnicodeMove;
+ widestringmanager.UpperUnicodeStringProc:=@Win32UnicodeUpper;
+ widestringmanager.LowerUnicodeStringProc:=@Win32UnicodeLower;
end;
@@ -1198,3 +1248,4 @@ begin
InitWin32Widestrings;
DispCallByIDProc:=@DoDispCallByIDError;
end.
+
diff --git a/tests/test/tunistr5.pp b/tests/test/tunistr5.pp
index 0a62ed5484..b09a333a54 100644
--- a/tests/test/tunistr5.pp
+++ b/tests/test/tunistr5.pp
@@ -20,18 +20,18 @@ begin
(ws[7]<>#$d87e) or
(ws[8]<>#$dc04) then
halt(1);
- us:=WideStringToUCS4String(ws);
+ us:=UnicodeStringToUCS4String(ws);
if (length(us)<>8) or
- (us[0]<>UCS4Char(widechar('é'))) or
- (us[1]<>UCS4Char(widechar('ł'))) or
- (us[2]<>UCS4Char(widechar('Ł'))) or
- (us[3]<>UCS4Char(widechar('ć'))) or
- (us[4]<>UCS4Char(widechar('ç'))) or
- (us[5]<>UCS4Char(widechar('Ź'))) or
+ (us[0]<>UCS4Char(unicodechar('é'))) or
+ (us[1]<>UCS4Char(unicodechar('ł'))) or
+ (us[2]<>UCS4Char(unicodechar('Ł'))) or
+ (us[3]<>UCS4Char(unicodechar('ć'))) or
+ (us[4]<>UCS4Char(unicodechar('ç'))) or
+ (us[5]<>UCS4Char(unicodechar('Ź'))) or
(us[6]<>UCS4Char($2F804)) or
(us[7]<>UCS4Char(0)) then
halt(2);
- ws:=UCS4StringToWideString(us);
+ ws:=UCS4StringToUnicodeString(us);
if (length(ws)<>8) or
(ws[1]<>'é') or
(ws[2]<>'ł') or
diff --git a/tests/test/tunistr6.pp b/tests/test/tunistr6.pp
new file mode 100644
index 0000000000..033892d172
--- /dev/null
+++ b/tests/test/tunistr6.pp
@@ -0,0 +1,397 @@
+{%skiptarget=wince}
+{$codepage utf-8}
+uses
+ {$ifdef unix}
+ cwstring,
+ {$endif}
+ sysutils;
+
+procedure doerror(i : integer);
+ begin
+ writeln('Error: ',i);
+ halt(i);
+ end;
+
+
+{ normal upper case testing }
+procedure testupper;
+var
+ s: ansistring;
+ w1,w2,w3,w4: unicodestring;
+ i: longint;
+begin
+ w1:='aé'#0'èàł'#$d87e#$dc04;
+ w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+ writeln('original: ',w1);
+ writeln('original upper: ',w2);
+{$endif print}
+ s:=w1;
+{$ifdef print}
+ writeln('ansi: ',s);
+{$endif print}
+ w3:=s;
+ w4:=AnsiUpperCase(s);
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeUpperCase(w1);
+{$ifdef print}
+ writeln('unicodeupper: ',w1);
+ writeln('original upper: ',w2);
+ writeln('ansiupper: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(1);
+ if (w4 <> w2) then
+ doerror(2);
+
+ w1:='aéèàł'#$d87e#$dc04;
+ w2:='AÉÈÀŁ'#$d87e#$dc04;
+ s:=w1;
+ w3:=s;
+ w4:=AnsiStrUpper(pchar(s));
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeUpperCase(w1);
+{$ifdef print}
+ writeln('unicodeupper: ',w1);
+ writeln('ansistrupper: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(21);
+ if (w4 <> w2) then
+ doerror(22);
+
+end;
+
+
+{ normal lower case testing }
+procedure testlower;
+var
+ s: ansistring;
+ w1,w2,w3,w4: unicodestring;
+ i: longint;
+begin
+ w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
+ w2:='aé'#0'èàł'#$d87e#$dc04;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+ writeln('original: ',w1);
+ writeln('original lower: ',w2);
+{$endif print}
+ s:=w1;
+ w3:=s;
+ w4:=AnsiLowerCase(s);
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeLowerCase(w1);
+{$ifdef print}
+ writeln('unicodelower: ',w1);
+ writeln('ansilower: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(3);
+ if (w4 <> w2) then
+ doerror(4);
+
+
+ w1:='AÉÈÀŁ'#$d87e#$dc04;
+ w2:='aéèàł'#$d87e#$dc04;
+ s:=w1;
+ w3:=s;
+ w4:=AnsiStrLower(pchar(s));
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeLowerCase(w1);
+{$ifdef print}
+ writeln('unicodelower: ',w1);
+ writeln('ansistrlower: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(3);
+ if (w4 <> w2) then
+ doerror(4);
+end;
+
+
+
+{ upper case testing with a missing utf-16 pair at the end }
+procedure testupperinvalid;
+var
+ s: ansistring;
+ w1,w2,w3,w4: unicodestring;
+ i: longint;
+begin
+ { missing utf-16 pair at end }
+ w1:='aé'#0'èàł'#$d87e;
+ w2:='AÉ'#0'ÈÀŁ'#$d87e;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+ writeln('original: ',w1);
+ writeln('original upper: ',w2);
+{$endif print}
+ s:=w1;
+ w3:=s;
+ w4:=AnsiUpperCase(s);
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeUpperCase(w1);
+{$ifdef print}
+ writeln('unicodeupper: ',w1);
+ writeln('ansiupper: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(5);
+ if (w4 <> w2) then
+ doerror(6);
+end;
+
+
+{ lower case testing with a missing utf-16 pair at the end }
+procedure testlowerinvalid;
+var
+ s: ansistring;
+ w1,w2,w3,w4: unicodestring;
+ i: longint;
+begin
+ { missing utf-16 pair at end}
+ w1:='AÉ'#0'ÈÀŁ'#$d87e;
+ w2:='aé'#0'èàł'#$d87e;
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+ writeln('original: ',w1);
+ writeln('original lower: ',w2);
+{$endif print}
+ s:=w1;
+ w3:=s;
+ w4:=AnsiLowerCase(s);
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeLowerCase(w1);
+{$ifdef print}
+ writeln('unicodelower: ',w1);
+ writeln('ansilower: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(7);
+ if (w4 <> w2) then
+ doerror(8);
+end;
+
+
+
+{ upper case testing with a missing utf-16 pair at the end, followed by a normal char }
+procedure testupperinvalid1;
+var
+ s: ansistring;
+ w1,w2,w3,w4: unicodestring;
+ i: longint;
+begin
+ { missing utf-16 pair at end with char after it}
+ w1:='aé'#0'èàł'#$d87e'j';
+ w2:='AÉ'#0'ÈÀŁ'#$d87e'J';
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+ writeln('original: ',w1);
+ writeln('original upper: ',w2);
+{$endif print}
+ s:=w1;
+ w3:=s;
+ w4:=AnsiUpperCase(s);
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeUpperCase(w1);
+{$ifdef print}
+ writeln('unicodeupper: ',w1);
+ writeln('ansiupper: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(9);
+ if (w4 <> w2) then
+ doerror(10);
+end;
+
+
+{ lower case testing with a missing utf-16 pair at the end, followed by a normal char }
+procedure testlowerinvalid1;
+var
+ s: ansistring;
+ w1,w2,w3,w4: unicodestring;
+ i: longint;
+begin
+ { missing utf-16 pair at end with char after it}
+ w1:='AÉ'#0'ÈÀŁ'#$d87e'J';
+ w2:='aé'#0'èàł'#$d87e'j';
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+ writeln('original: ',w1);
+ writeln('original lower: ',w2);
+{$endif print}
+ s:=w1;
+ w3:=s;
+ w4:=AnsiLowerCase(s);
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeLowerCase(w1);
+{$ifdef print}
+ writeln('unicodelower: ',w1);
+ writeln('ansilower: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(11);
+ if (w4 <> w2) then
+ doerror(12);
+end;
+
+
+{ upper case testing with corrupting the utf-8 string after conversion }
+procedure testupperinvalid2;
+var
+ s: ansistring;
+ w1,w2,w3,w4: unicodestring;
+ i: longint;
+begin
+ w1:='aé'#0'èàł'#$d87e#$dc04'ö';
+ w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+ writeln('original: ',w1);
+ writeln('original upper: ',w2);
+{$endif print}
+ s:=w1;
+ { truncate the last utf-8 character }
+ setlength(s,length(s)-1);
+ w3:=s;
+ { adjust checking values for new length due to corruption }
+ if length(w3)<>length(w2) then
+ begin
+ setlength(w2,length(w3));
+ setlength(w1,length(w3));
+ end;
+ w4:=AnsiUpperCase(s);
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeUpperCase(w1);
+{$ifdef print}
+ writeln('unicodeupper: ',w1);
+ writeln('ansiupper: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(13);
+ if (w4 <> w2) then
+ doerror(14);
+end;
+
+
+{ lower case testing with corrupting the utf-8 string after conversion }
+procedure testlowerinvalid2;
+var
+ s: ansistring;
+ w1,w2,w3,w4: unicodestring;
+ i: longint;
+begin
+ w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
+ w2:='aé'#0'èàł'#$d87e#$dc04'ö';
+{$ifdef print}
+// the utf-8 output can confuse the testsuite parser
+ writeln('original: ',w1);
+ writeln('original lower: ',w2);
+{$endif print}
+ s:=w1;
+ { truncate the last utf-8 character }
+ setlength(s,length(s)-1);
+ w3:=s;
+ { adjust checking values for new length due to corruption }
+ if length(w3)<>length(w2) then
+ begin
+ setlength(w2,length(w3));
+ setlength(w1,length(w3));
+ end;
+ w4:=AnsiLowerCase(s);
+ { filter out unsupported characters }
+ for i:=1 to length(w3) do
+ if w3[i]='?' then
+ begin
+ w2[i]:='?';
+ w1[i]:='?';
+ end;
+ w1:=UnicodeLowerCase(w1);
+{$ifdef print}
+ writeln('unicodelower: ',w1);
+ writeln('ansilower: ',w4);
+{$endif print}
+ if (w1 <> w2) then
+ doerror(15);
+ if (w4 <> w2) then
+ doerror(16);
+end;
+
+
+
+begin
+ testupper;
+ writeln;
+ testlower;
+ writeln;
+ writeln;
+ testupperinvalid;
+ writeln;
+ testlowerinvalid;
+ writeln;
+ writeln;
+ testupperinvalid1;
+ writeln;
+ testlowerinvalid1;
+ writeln;
+ writeln;
+ testupperinvalid2;
+ writeln;
+ testlowerinvalid2;
+ writeln('ok');
+end.
diff --git a/tests/test/tunistr7.pp b/tests/test/tunistr7.pp
new file mode 100644
index 0000000000..c7623793d8
--- /dev/null
+++ b/tests/test/tunistr7.pp
@@ -0,0 +1,47 @@
+{$codepage utf-8}
+
+uses
+{$ifdef unix}
+ cwstring,
+{$endif unix}
+ sysutils;
+
+procedure testwcmp;
+var
+ w1,w2: unicodestring;
+ s: ansistring;
+begin
+ w1:='aécde';
+ { filter unsupported characters }
+ s:=w1;
+ w1:=s;
+ w2:=w1;
+
+ if (w1<>w2) then
+ halt(1);
+ w1[2]:='f';
+ if (w1=w2) or
+ WideSameStr(w1,w2) or
+ (WideCompareText(w1,w2)=0) or
+ (WideCompareStr(w1,w2)<0) or
+ (WideCompareStr(w2,w1)>0) then
+ halt(2);
+ w1[2]:=#0;
+ w2[2]:=#0;
+ if (w1<>w2) or
+ not WideSameStr(w1,w2) or
+ (WideCompareStr(w1,w2)<>0) or
+ (WideCompareText(w1,w2)<>0) then
+ halt(3);
+ w1[3]:='m';
+ if WideSameStr(w1,w2) or
+ (WideCompareText(w1,w2)=0) or
+ (WideCompareStr(w1,w2)<0) or
+ (WideCompareStr(w2,w1)>0) then
+ halt(4);
+end;
+
+
+begin
+ testwcmp;
+end.