diff options
author | hajny <hajny@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2015-02-28 22:54:48 +0000 |
---|---|---|
committer | hajny <hajny@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2015-02-28 22:54:48 +0000 |
commit | d412f77bd508d9a50cdafcc1bef484f69454f565 (patch) | |
tree | c3f03e22114797c117a6868a80b31cdf163c4d22 /compiler/widestr.pas | |
parent | cde4ffabd5e6e70887af620ec8ae8cd7bdb3a147 (diff) | |
download | fpc-d412f77bd508d9a50cdafcc1bef484f69454f565.tar.gz |
* fix determination of help line length with codepage set to UTF-8 (the string conversion routine in widestr.pas doesn't work for UTF-8 :-((( )
git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@30039 3ad0048d-3df7-0310-abae-a5850022a9f2
Diffstat (limited to 'compiler/widestr.pas')
-rw-r--r-- | compiler/widestr.pas | 54 |
1 files changed, 49 insertions, 5 deletions
diff --git a/compiler/widestr.pas b/compiler/widestr.pas index 4c9e576841..51fdd2eb01 100644 --- a/compiler/widestr.pas +++ b/compiler/widestr.pas @@ -345,21 +345,65 @@ unit widestr; end; function CharLength (P: PChar; L: SizeInt): SizeInt; + + function UTF8CodePointLength(firstbyte: byte): SizeInt; + var + firstzerobit: SizeInt; + begin + result:=1; + { bsr searches for the leftmost 1 bit. We are interested in the + leftmost 0 bit, so first invert the value + } + firstzerobit:=BsrByte(not(firstbyte)); + { if there is no zero bit or the first zero bit is the rightmost bit + (bit 0), this is an invalid UTF-8 byte ($ff cannot appear in an + UTF-8-encoded string, and in the worst case bit 1 has to be zero) + } + if (firstzerobit=0) or (firstzerobit=7) then + exit; + { the number of bytes belonging to this code point is + 7-(pos first 0-bit). + } + result:=7-firstzerobit; + end; + var P2: PChar; + I, J, K: SizeInt; begin +{$IFDEF FPC_HAS_CPSTRING} if L = 0 then begin Result := 0; Exit; end; - GetMem (P2, Succ (L)); { Length of the string converted to a SBCS codepage (e.g. ISO 8859-1) should be equal to the amount of characters in the source string. } - ChangeCodePage (P, L, DefaultSystemCodepage, P2, 28591); - P2 [L] := #0; - Result := StrLen (P2); - FreeMem (P2, Succ (L)); + if DefaultSystemCodepage = CP_UTF8 then +{ ChangeCodePage does not work for UTF-8 apparently... :-( } + begin + I := 1; + J := 0; + while I <= L do + begin + K := Utf8CodePointLength (byte (P^)); + Inc (I, K); + Inc (P, K); + Inc (J); + end; + Result := J; + end + else + begin + GetMem (P2, Succ (L)); + FillChar (P2^, Succ (L), 0); + ChangeCodePage (P, L, DefaultSystemCodepage, P2, 28591); + Result := StrLen (P2); + FreeMem (P2, Succ (L)); + end; +{$ELSE FPC_HAS_CPSTRING} + Result := L; +{$ENDIF FPC_HAS_CPSTRING} end; function CharLength (const S: string): SizeInt; |