summaryrefslogtreecommitdiff
path: root/ext/File/Glob
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-04-07 14:25:28 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-04-07 14:25:28 +0000
commitdd36d13c89140c2d9d7954b9f1de583003154c13 (patch)
treeadaef7a5e5beeb3683ba8880a94fc181ad7f00d4 /ext/File/Glob
parent525c8498a83d993a86ed1c5080d595040c6663f5 (diff)
downloadperl-dd36d13c89140c2d9d7954b9f1de583003154c13.tar.gz
There was no nice way of getting in UTF-8 filenames:
now one can use in the (new) three-arg form of readdir() and in File::Glob import a ":utf8" to transparently accept the filenames as Unicode. Note that only :utf8 is supported, not fancier stuff like :encoding(foobar) p4raw-id: //depot/perl@15776
Diffstat (limited to 'ext/File/Glob')
-rw-r--r--ext/File/Glob/Glob.pm27
-rw-r--r--ext/File/Glob/Glob.xs2
-rw-r--r--ext/File/Glob/Makefile.PL2
-rw-r--r--ext/File/Glob/bsd_glob.h1
-rw-r--r--ext/File/Glob/t/utf8.t17
5 files changed, 44 insertions, 5 deletions
diff --git a/ext/File/Glob/Glob.pm b/ext/File/Glob/Glob.pm
index a704b567bb..4c34d380b0 100644
--- a/ext/File/Glob/Glob.pm
+++ b/ext/File/Glob/Glob.pm
@@ -31,6 +31,7 @@ use XSLoader ();
GLOB_NOSPACE
GLOB_QUOTE
GLOB_TILDE
+ GLOB_UTF8
);
%EXPORT_TAGS = (
@@ -51,6 +52,7 @@ use XSLoader ();
GLOB_NOSPACE
GLOB_QUOTE
GLOB_TILDE
+ GLOB_UTF8
glob
bsd_glob
) ],
@@ -62,10 +64,11 @@ sub import {
require Exporter;
my $i = 1;
while ($i < @_) {
- if ($_[$i] =~ /^:(case|nocase|globally)$/) {
+ if ($_[$i] =~ /^:(case|nocase|globally|utf8)$/) {
splice(@_, $i, 1);
$DEFAULT_FLAGS &= ~GLOB_NOCASE() if $1 eq 'case';
$DEFAULT_FLAGS |= GLOB_NOCASE() if $1 eq 'nocase';
+ $DEFAULT_FLAGS |= GLOB_UTF8() if $1 eq 'utf8';
if ($1 eq 'globally') {
local $^W;
*CORE::GLOBAL::glob = \&File::Glob::csh_glob;
@@ -203,15 +206,19 @@ File::Glob - Perl extension for BSD glob routine
## override the core glob (CORE::glob() does this automatically
## by default anyway, since v5.6.0)
use File::Glob ':globally';
- my @sources = <*.{c,h,y}>
+ my @sources = <*.{c,h,y}>;
## override the core glob, forcing case sensitivity
use File::Glob qw(:globally :case);
- my @sources = <*.{c,h,y}>
+ my @sources = <*.{c,h,y}>;
## override the core glob forcing case insensitivity
use File::Glob qw(:globally :nocase);
- my @sources = <*.{c,h,y}>
+ my @sources = <*.{c,h,y}>;
+
+ ## override the core glob forcing UTF-8 names
+ use File::Glob qw(:globally :utf8);
+ my @sources = <*.{c,h,y}>;
=head1 DESCRIPTION
@@ -321,6 +328,18 @@ order (case does not matter) rather than in ASCII order.
=back
+The following flag has been added in the Perl implementation for
+Unicode compatibility:
+
+=over 4
+
+=item C<GLOB_UTF8>
+
+The filenames returned will be marked as being in UTF-8 encoding of
+Unicode. Note that it is your responsibility to ascertain that the
+filesystem you are globbing in returns valid UTF-8 filenames.
+The encoding pragma affects this feature, see L<encoding>.
+
=head1 DIAGNOSTICS
bsd_glob() returns a list of matching paths, possibly zero length. If an
diff --git a/ext/File/Glob/Glob.xs b/ext/File/Glob/Glob.xs
index bc58b6a934..5d95666137 100644
--- a/ext/File/Glob/Glob.xs
+++ b/ext/File/Glob/Glob.xs
@@ -63,6 +63,8 @@ PPCODE:
strlen(pglob.gl_pathv[i])));
TAINT;
SvTAINT(tmp);
+ if (pglob.gl_flags & GLOB_UTF8)
+ sv_utf8_upgrade(tmp);
PUSHs(tmp);
}
diff --git a/ext/File/Glob/Makefile.PL b/ext/File/Glob/Makefile.PL
index 0d4267a25b..a24b6635e7 100644
--- a/ext/File/Glob/Makefile.PL
+++ b/ext/File/Glob/Makefile.PL
@@ -26,7 +26,7 @@ WriteConstants(
NAME => 'File::Glob',
NAMES => [qw(GLOB_ABEND GLOB_ALPHASORT GLOB_ALTDIRFUNC GLOB_BRACE GLOB_ERR
GLOB_LIMIT GLOB_MARK GLOB_NOCASE GLOB_NOCHECK GLOB_NOMAGIC
- GLOB_NOSORT GLOB_NOSPACE GLOB_QUOTE GLOB_TILDE),
+ GLOB_NOSORT GLOB_NOSPACE GLOB_QUOTE GLOB_TILDE GLOB_UTF8),
{name=>"GLOB_ERROR", macro=>["#ifdef GLOB_ERROR\n\tdMY_CXT;\n\n","#endif\n"]}],
BREAKOUT_AT => 8,
);
diff --git a/ext/File/Glob/bsd_glob.h b/ext/File/Glob/bsd_glob.h
index af92c04ea2..ef46c001e0 100644
--- a/ext/File/Glob/bsd_glob.h
+++ b/ext/File/Glob/bsd_glob.h
@@ -76,6 +76,7 @@ typedef struct {
#define GLOB_ALPHASORT 0x2000 /* Alphabetic, not ASCII sort, like csh. */
#define GLOB_LIMIT 0x4000 /* Limit pattern match output to ARG_MAX
(usually from limits.h). */
+#define GLOB_UTF8 0x8000 /* Return UTF-8. (Perl extension.) */
#define GLOB_NOSPACE (-1) /* Malloc call failed. */
#define GLOB_ABEND (-2) /* Unignored error. */
diff --git a/ext/File/Glob/t/utf8.t b/ext/File/Glob/t/utf8.t
new file mode 100644
index 0000000000..97d0b2c6bf
--- /dev/null
+++ b/ext/File/Glob/t/utf8.t
@@ -0,0 +1,17 @@
+use File::Glob qw(:globally :utf8);
+
+# Can't really depend on Tru64 UTF-8 filenames being so must just see
+# that things don't crash and that *if* UTF-8 were to be received, it's
+# valid. (Maybe later add checks that are run if we are on NTFS/HFS+.)
+# (see also t/op/readdir.t)
+
+print "1..2\n";
+
+my $a = <*>;
+
+print utf8::valid($a) ? "ok 1\n" : "not ok 1\n";
+
+my @a=<*>;
+
+print utf8::valid($a[0]) ? "ok 2\n" : "not ok 2\n";
+