diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 2000-01-31 04:57:42 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 2000-01-31 04:57:42 +0000 |
commit | 7e2040f0b7c6fc88ec07b6e169aa2f75fc0130a4 (patch) | |
tree | de43e349e9f70e27ef30b2a0de9de2df628cc1c3 /utf8.h | |
parent | 8004f2ac219abdd8660c02a4a46ed97695dc379d (diff) | |
download | perl-7e2040f0b7c6fc88ec07b6e169aa2f75fc0130a4.tar.gz |
runtime now looks at the SVf_UTF8 bit on the SV to decide
whether to use widechar semantics; lexer and RE engine continue
to need "use utf8" to enable unicode awareness in literals
and patterns (TODO: this needs to be fixed); $1 et al are marked
SvUTF8 if the pattern was compiled for utf8 (TODO: propagating
it from the data is probably better)
p4raw-id: //depot/perl@4930
Diffstat (limited to 'utf8.h')
-rw-r--r-- | utf8.h | 16 |
1 files changed, 16 insertions, 0 deletions
@@ -28,5 +28,21 @@ END_EXTERN_C #define IN_UTF8 (PL_curcop->op_private & HINT_UTF8) #define IN_BYTE (PL_curcop->op_private & HINT_BYTE) +#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTE) #define UTF8SKIP(s) PL_utf8skip[*(U8*)s] + +/* + * Note: we try to be careful never to call the isXXX_utf8() functions + * unless we're pretty sure we've seen the beginning of a UTF-8 character + * (that is, the two high bits are set). Otherwise we risk loading in the + * heavy-duty SWASHINIT and SWASHGET routines unnecessarily. + */ +#define isIDFIRST_lazy_if(p,c) ((!c || (*((U8*)p) < 0xc0)) \ + ? isIDFIRST(*(p)) \ + : isIDFIRST_utf8((U8*)p)) +#define isALNUM_lazy_if(p,c) ((!c || (*((U8*)p) < 0xc0)) \ + ? isALNUM(*(p)) \ + : isALNUM_utf8((U8*)p)) +#define isIDFIRST_lazy(p) isIDFIRST_lazy_if(p,1) +#define isALNUM_lazy(p) isALNUM_lazy_if(p,1) |