diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-03-04 09:14:25 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-08-29 09:56:00 -0600 |
commit | 8c4a053775b71d5f962f7a584bc3d44b0092d9b2 (patch) | |
tree | 4d3e7106ab8e7c950861785723d36e061bd87a17 /toke.c | |
parent | 5f0aa340d4f4312ffa23535d638d891e4f3b2f7a (diff) | |
download | perl-8c4a053775b71d5f962f7a584bc3d44b0092d9b2.tar.gz |
toke.c: Remove character set dependency
Instead of hard-coding the bit patterns that comprise the Byte Order
Mark in the UTF-8 or UTF-EBCDIC encodings, use the generated ones for
the current platform.
This removes some EBCDIC-only code.
Diffstat (limited to 'toke.c')
-rw-r--r-- | toke.c | 24 |
1 files changed, 9 insertions, 15 deletions
@@ -5264,7 +5264,7 @@ Perl_yylex(pTHX) * check if it in fact is. */ if (bof && PL_rsfp && (*s == 0 || - *(U8*)s == 0xEF || + *(U8*)s == BOM_UTF8_FIRST_BYTE || *(U8*)s >= 0xFE || s[1] == 0)) { Off_t offset = (IV)PerlIO_tell(PL_rsfp); @@ -11539,12 +11539,14 @@ S_swallow_bom(pTHX_ U8 *s) #endif } break; - case 0xEF: - if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) { - if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); - s += 3; /* UTF-8 */ - } - break; + case BOM_UTF8_FIRST_BYTE: { + const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */ + if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) { + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); + s += len + 1; /* UTF-8 */ + } + break; + } case 0: if (slen > 3) { if (s[1] == 0) { @@ -11567,14 +11569,6 @@ S_swallow_bom(pTHX_ U8 *s) #endif } } -#ifdef EBCDIC - case 0xDD: - if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) { - if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); - s += 4; /* UTF-8 */ - } - break; -#endif default: if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { |