diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2002-04-07 14:25:28 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2002-04-07 14:25:28 +0000 |
commit | dd36d13c89140c2d9d7954b9f1de583003154c13 (patch) | |
tree | adaef7a5e5beeb3683ba8880a94fc181ad7f00d4 /ext/File/Glob | |
parent | 525c8498a83d993a86ed1c5080d595040c6663f5 (diff) | |
download | perl-dd36d13c89140c2d9d7954b9f1de583003154c13.tar.gz |
There was no nice way of getting in UTF-8 filenames:
now one can use in the (new) three-arg form of readdir()
and in File::Glob import a ":utf8" to transparently accept
the filenames as Unicode. Note that only :utf8 is supported,
not fancier stuff like :encoding(foobar)
p4raw-id: //depot/perl@15776
Diffstat (limited to 'ext/File/Glob')
-rw-r--r-- | ext/File/Glob/Glob.pm | 27 | ||||
-rw-r--r-- | ext/File/Glob/Glob.xs | 2 | ||||
-rw-r--r-- | ext/File/Glob/Makefile.PL | 2 | ||||
-rw-r--r-- | ext/File/Glob/bsd_glob.h | 1 | ||||
-rw-r--r-- | ext/File/Glob/t/utf8.t | 17 |
5 files changed, 44 insertions, 5 deletions
diff --git a/ext/File/Glob/Glob.pm b/ext/File/Glob/Glob.pm index a704b567bb..4c34d380b0 100644 --- a/ext/File/Glob/Glob.pm +++ b/ext/File/Glob/Glob.pm @@ -31,6 +31,7 @@ use XSLoader (); GLOB_NOSPACE GLOB_QUOTE GLOB_TILDE + GLOB_UTF8 ); %EXPORT_TAGS = ( @@ -51,6 +52,7 @@ use XSLoader (); GLOB_NOSPACE GLOB_QUOTE GLOB_TILDE + GLOB_UTF8 glob bsd_glob ) ], @@ -62,10 +64,11 @@ sub import { require Exporter; my $i = 1; while ($i < @_) { - if ($_[$i] =~ /^:(case|nocase|globally)$/) { + if ($_[$i] =~ /^:(case|nocase|globally|utf8)$/) { splice(@_, $i, 1); $DEFAULT_FLAGS &= ~GLOB_NOCASE() if $1 eq 'case'; $DEFAULT_FLAGS |= GLOB_NOCASE() if $1 eq 'nocase'; + $DEFAULT_FLAGS |= GLOB_UTF8() if $1 eq 'utf8'; if ($1 eq 'globally') { local $^W; *CORE::GLOBAL::glob = \&File::Glob::csh_glob; @@ -203,15 +206,19 @@ File::Glob - Perl extension for BSD glob routine ## override the core glob (CORE::glob() does this automatically ## by default anyway, since v5.6.0) use File::Glob ':globally'; - my @sources = <*.{c,h,y}> + my @sources = <*.{c,h,y}>; ## override the core glob, forcing case sensitivity use File::Glob qw(:globally :case); - my @sources = <*.{c,h,y}> + my @sources = <*.{c,h,y}>; ## override the core glob forcing case insensitivity use File::Glob qw(:globally :nocase); - my @sources = <*.{c,h,y}> + my @sources = <*.{c,h,y}>; + + ## override the core glob forcing UTF-8 names + use File::Glob qw(:globally :utf8); + my @sources = <*.{c,h,y}>; =head1 DESCRIPTION @@ -321,6 +328,18 @@ order (case does not matter) rather than in ASCII order. =back +The following flag has been added in the Perl implementation for +Unicode compatibility: + +=over 4 + +=item C<GLOB_UTF8> + +The filenames returned will be marked as being in UTF-8 encoding of +Unicode. Note that it is your responsibility to ascertain that the +filesystem you are globbing in returns valid UTF-8 filenames. +The encoding pragma affects this feature, see L<encoding>. + =head1 DIAGNOSTICS bsd_glob() returns a list of matching paths, possibly zero length. If an diff --git a/ext/File/Glob/Glob.xs b/ext/File/Glob/Glob.xs index bc58b6a934..5d95666137 100644 --- a/ext/File/Glob/Glob.xs +++ b/ext/File/Glob/Glob.xs @@ -63,6 +63,8 @@ PPCODE: strlen(pglob.gl_pathv[i]))); TAINT; SvTAINT(tmp); + if (pglob.gl_flags & GLOB_UTF8) + sv_utf8_upgrade(tmp); PUSHs(tmp); } diff --git a/ext/File/Glob/Makefile.PL b/ext/File/Glob/Makefile.PL index 0d4267a25b..a24b6635e7 100644 --- a/ext/File/Glob/Makefile.PL +++ b/ext/File/Glob/Makefile.PL @@ -26,7 +26,7 @@ WriteConstants( NAME => 'File::Glob', NAMES => [qw(GLOB_ABEND GLOB_ALPHASORT GLOB_ALTDIRFUNC GLOB_BRACE GLOB_ERR GLOB_LIMIT GLOB_MARK GLOB_NOCASE GLOB_NOCHECK GLOB_NOMAGIC - GLOB_NOSORT GLOB_NOSPACE GLOB_QUOTE GLOB_TILDE), + GLOB_NOSORT GLOB_NOSPACE GLOB_QUOTE GLOB_TILDE GLOB_UTF8), {name=>"GLOB_ERROR", macro=>["#ifdef GLOB_ERROR\n\tdMY_CXT;\n\n","#endif\n"]}], BREAKOUT_AT => 8, ); diff --git a/ext/File/Glob/bsd_glob.h b/ext/File/Glob/bsd_glob.h index af92c04ea2..ef46c001e0 100644 --- a/ext/File/Glob/bsd_glob.h +++ b/ext/File/Glob/bsd_glob.h @@ -76,6 +76,7 @@ typedef struct { #define GLOB_ALPHASORT 0x2000 /* Alphabetic, not ASCII sort, like csh. */ #define GLOB_LIMIT 0x4000 /* Limit pattern match output to ARG_MAX (usually from limits.h). */ +#define GLOB_UTF8 0x8000 /* Return UTF-8. (Perl extension.) */ #define GLOB_NOSPACE (-1) /* Malloc call failed. */ #define GLOB_ABEND (-2) /* Unignored error. */ diff --git a/ext/File/Glob/t/utf8.t b/ext/File/Glob/t/utf8.t new file mode 100644 index 0000000000..97d0b2c6bf --- /dev/null +++ b/ext/File/Glob/t/utf8.t @@ -0,0 +1,17 @@ +use File::Glob qw(:globally :utf8); + +# Can't really depend on Tru64 UTF-8 filenames being so must just see +# that things don't crash and that *if* UTF-8 were to be received, it's +# valid. (Maybe later add checks that are run if we are on NTFS/HFS+.) +# (see also t/op/readdir.t) + +print "1..2\n"; + +my $a = <*>; + +print utf8::valid($a) ? "ok 1\n" : "not ok 1\n"; + +my @a=<*>; + +print utf8::valid($a[0]) ? "ok 2\n" : "not ok 2\n"; + |