summaryrefslogtreecommitdiff
path: root/regen/regcharclass.pl
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-09-02 16:48:14 -0600
committerKarl Williamson <public@khwilliamson.com>2012-09-13 21:14:02 -0600
commit5e6c6c1e0c9ebf171541d4b160a124670d0d46d2 (patch)
tree6916ef652abc796ff806fb5ce73d4c03a64af177 /regen/regcharclass.pl
parent765ec46cc8c8533a1aed8a26490b879042d7fff3 (diff)
downloadperl-5e6c6c1e0c9ebf171541d4b160a124670d0d46d2.tar.gz
regen/regcharclass.pl: Allow comments in input
Lines whose first non-blank character is a '#' are now considered to be comments, and ignored. This allows the moving of some lines that have been commented out back to after the __DATA__ where they really belong.
Diffstat (limited to 'regen/regcharclass.pl')
-rwxr-xr-xregen/regcharclass.pl16
1 files changed, 8 insertions, 8 deletions
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index 6225697c3e..e870df9409 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -303,10 +303,8 @@ sub new {
}
}
next;
- } elsif ( /\S/ ) {
- die "Unparsable line: $txt\n";
} else {
- next;
+ die "Unparsable line: $txt\n";
}
my ( $cp, $low, $latin1, $utf8 )= __uni_latin1( $str );
my $UTF8= $low || $utf8;
@@ -708,7 +706,7 @@ if ( !caller ) {
};
while ( <DATA> ) {
- s/^\s*#//;
+ s/^ \s* (?: \# .* ) ? $ //x; # squeeze out comment and blanks
next unless /\S/;
chomp;
if ( /^([A-Z]+)/ ) {
@@ -744,12 +742,16 @@ if ( !caller ) {
# Accepts a single Unicode code point per line, prefaced by '0x'
# or a range of two code points separated by a minus (and optional space)
# or a single \p{} per line.
+# A blank line or one whose first non-blank character is '#' is a comment
#
# If run on a non-ASCII platform will automatically convert the Unicode input
# to native
#
-# This is no longer used, but retained in case it is needed some day. Put the
-# lines below under __DATA__
+
+1; # in the unlikely case we are being used as a module
+
+__DATA__
+# This is no longer used, but retained in case it is needed some day.
# TRICKYFOLD: Problematic fold case letters. When adding to this list, also should add them to regcomp.c and fold_grind.t
# => generic cp generic-cp generic-both :fast safe
# 0x00DF # LATIN SMALL LETTER SHARP S
@@ -759,9 +761,7 @@ if ( !caller ) {
# 0x1FD3 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA; maps same as 0390
# 0x1FE3 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA; maps same as 03B0
-1; # in the unlikely case we are being used as a module
-__DATA__
LNBREAK: Line Break: \R
=> generic UTF8 LATIN1 :fast safe
"\x0D\x0A" # CRLF - Network (Windows) line ending