summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Ing-Simmons <nik@tiuk.ti.com>2001-04-29 15:43:22 +0000
committerNick Ing-Simmons <nik@tiuk.ti.com>2001-04-29 15:43:22 +0000
commita2f19d56607c6631ca34ae920e740e4fa02ba501 (patch)
treeab6f086c6285918ec41aed0d8afd4931a040dffe
parenta5b5a15b6f3539d598c74676dd07c037780e6604 (diff)
parentaa58aa353209e3416c78e241b039154fdfd9415b (diff)
downloadperl-a2f19d56607c6631ca34ae920e740e4fa02ba501.tar.gz
Integrate mainline.
p4raw-id: //depot/perlio@9904
-rw-r--r--INSTALL4
-rw-r--r--MANIFEST186
-rw-r--r--doop.c8
-rw-r--r--embed.h2
-rwxr-xr-xembed.pl2
-rw-r--r--ext/B/B/Deparse.pm19
-rw-r--r--ext/IO/lib/IO/Seekable.pm4
-rw-r--r--hints/hpux.sh90
-rw-r--r--lib/unicode/Blocks.pl203
-rw-r--r--lib/unicode/In.pl101
-rw-r--r--lib/unicode/In/0.pl (renamed from lib/unicode/In/BasicLatin.pl)0
-rw-r--r--lib/unicode/In/1.pl (renamed from lib/unicode/In/Latin-1Supplement.pl)0
-rw-r--r--lib/unicode/In/10.pl (renamed from lib/unicode/In/Hebrew.pl)0
-rw-r--r--lib/unicode/In/11.pl (renamed from lib/unicode/In/Arabic.pl)0
-rw-r--r--lib/unicode/In/12.pl (renamed from lib/unicode/In/Syriac.pl)0
-rw-r--r--lib/unicode/In/13.pl (renamed from lib/unicode/In/Thaana.pl)0
-rw-r--r--lib/unicode/In/14.pl (renamed from lib/unicode/In/Devanagari.pl)0
-rw-r--r--lib/unicode/In/15.pl (renamed from lib/unicode/In/Bengali.pl)0
-rw-r--r--lib/unicode/In/16.pl (renamed from lib/unicode/In/Gurmukhi.pl)0
-rw-r--r--lib/unicode/In/17.pl (renamed from lib/unicode/In/Gujarati.pl)0
-rw-r--r--lib/unicode/In/18.pl (renamed from lib/unicode/In/Oriya.pl)0
-rw-r--r--lib/unicode/In/19.pl (renamed from lib/unicode/In/Tamil.pl)0
-rw-r--r--lib/unicode/In/2.pl (renamed from lib/unicode/In/LatinExtended-A.pl)0
-rw-r--r--lib/unicode/In/20.pl (renamed from lib/unicode/In/Telugu.pl)0
-rw-r--r--lib/unicode/In/21.pl (renamed from lib/unicode/In/Kannada.pl)0
-rw-r--r--lib/unicode/In/22.pl (renamed from lib/unicode/In/Malayalam.pl)0
-rw-r--r--lib/unicode/In/23.pl (renamed from lib/unicode/In/Sinhala.pl)0
-rw-r--r--lib/unicode/In/24.pl (renamed from lib/unicode/In/Thai.pl)0
-rw-r--r--lib/unicode/In/25.pl (renamed from lib/unicode/In/Lao.pl)0
-rw-r--r--lib/unicode/In/26.pl (renamed from lib/unicode/In/Tibetan.pl)0
-rw-r--r--lib/unicode/In/27.pl (renamed from lib/unicode/In/Myanmar.pl)0
-rw-r--r--lib/unicode/In/28.pl (renamed from lib/unicode/In/Georgian.pl)0
-rw-r--r--lib/unicode/In/29.pl (renamed from lib/unicode/In/HangulJamo.pl)0
-rw-r--r--lib/unicode/In/3.pl (renamed from lib/unicode/In/LatinExtended-B.pl)0
-rw-r--r--lib/unicode/In/30.pl (renamed from lib/unicode/In/Ethiopic.pl)0
-rw-r--r--lib/unicode/In/31.pl (renamed from lib/unicode/In/Cherokee.pl)0
-rw-r--r--lib/unicode/In/32.pl (renamed from lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl)0
-rw-r--r--lib/unicode/In/33.pl (renamed from lib/unicode/In/Ogham.pl)0
-rw-r--r--lib/unicode/In/34.pl (renamed from lib/unicode/In/Runic.pl)0
-rw-r--r--lib/unicode/In/35.pl (renamed from lib/unicode/In/Khmer.pl)0
-rw-r--r--lib/unicode/In/36.pl (renamed from lib/unicode/In/Mongolian.pl)0
-rw-r--r--lib/unicode/In/37.pl (renamed from lib/unicode/In/LatinExtendedAdditional.pl)0
-rw-r--r--lib/unicode/In/38.pl (renamed from lib/unicode/In/GreekExtended.pl)0
-rw-r--r--lib/unicode/In/39.pl (renamed from lib/unicode/In/GeneralPunctuation.pl)0
-rw-r--r--lib/unicode/In/4.pl (renamed from lib/unicode/In/IPAExtensions.pl)0
-rw-r--r--lib/unicode/In/40.pl (renamed from lib/unicode/In/SuperscriptsandSubscripts.pl)0
-rw-r--r--lib/unicode/In/41.pl (renamed from lib/unicode/In/CurrencySymbols.pl)0
-rw-r--r--lib/unicode/In/42.pl (renamed from lib/unicode/In/CombiningMarksforSymbols.pl)0
-rw-r--r--lib/unicode/In/43.pl (renamed from lib/unicode/In/LetterlikeSymbols.pl)0
-rw-r--r--lib/unicode/In/44.pl (renamed from lib/unicode/In/NumberForms.pl)0
-rw-r--r--lib/unicode/In/45.pl (renamed from lib/unicode/In/Arrows.pl)0
-rw-r--r--lib/unicode/In/46.pl (renamed from lib/unicode/In/MathematicalOperators.pl)0
-rw-r--r--lib/unicode/In/47.pl (renamed from lib/unicode/In/MiscellaneousTechnical.pl)0
-rw-r--r--lib/unicode/In/48.pl (renamed from lib/unicode/In/ControlPictures.pl)0
-rw-r--r--lib/unicode/In/49.pl (renamed from lib/unicode/In/OpticalCharacterRecognition.pl)0
-rw-r--r--lib/unicode/In/5.pl (renamed from lib/unicode/In/SpacingModifierLetters.pl)0
-rw-r--r--lib/unicode/In/50.pl (renamed from lib/unicode/In/EnclosedAlphanumerics.pl)0
-rw-r--r--lib/unicode/In/51.pl (renamed from lib/unicode/In/BoxDrawing.pl)0
-rw-r--r--lib/unicode/In/52.pl (renamed from lib/unicode/In/BlockElements.pl)0
-rw-r--r--lib/unicode/In/53.pl (renamed from lib/unicode/In/GeometricShapes.pl)0
-rw-r--r--lib/unicode/In/54.pl (renamed from lib/unicode/In/MiscellaneousSymbols.pl)0
-rw-r--r--lib/unicode/In/55.pl (renamed from lib/unicode/In/Dingbats.pl)0
-rw-r--r--lib/unicode/In/56.pl (renamed from lib/unicode/In/BraillePatterns.pl)0
-rw-r--r--lib/unicode/In/57.pl (renamed from lib/unicode/In/CJKRadicalsSupplement.pl)0
-rw-r--r--lib/unicode/In/58.pl (renamed from lib/unicode/In/KangxiRadicals.pl)0
-rw-r--r--lib/unicode/In/59.pl (renamed from lib/unicode/In/IdeographicDescriptionCharacters.pl)0
-rw-r--r--lib/unicode/In/6.pl (renamed from lib/unicode/In/CombiningDiacriticalMarks.pl)0
-rw-r--r--lib/unicode/In/60.pl (renamed from lib/unicode/In/CJKSymbolsandPunctuation.pl)0
-rw-r--r--lib/unicode/In/61.pl (renamed from lib/unicode/In/Hiragana.pl)0
-rw-r--r--lib/unicode/In/62.pl (renamed from lib/unicode/In/Katakana.pl)0
-rw-r--r--lib/unicode/In/63.pl (renamed from lib/unicode/In/Bopomofo.pl)0
-rw-r--r--lib/unicode/In/64.pl (renamed from lib/unicode/In/HangulCompatibilityJamo.pl)0
-rw-r--r--lib/unicode/In/65.pl (renamed from lib/unicode/In/Kanbun.pl)0
-rw-r--r--lib/unicode/In/66.pl (renamed from lib/unicode/In/BopomofoExtended.pl)0
-rw-r--r--lib/unicode/In/67.pl (renamed from lib/unicode/In/EnclosedCJKLettersandMonths.pl)0
-rw-r--r--lib/unicode/In/68.pl (renamed from lib/unicode/In/CJKCompatibility.pl)0
-rw-r--r--lib/unicode/In/69.pl (renamed from lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl)0
-rw-r--r--lib/unicode/In/7.pl (renamed from lib/unicode/In/Greek.pl)0
-rw-r--r--lib/unicode/In/70.pl (renamed from lib/unicode/In/CJKUnifiedIdeographs.pl)0
-rw-r--r--lib/unicode/In/71.pl (renamed from lib/unicode/In/YiSyllables.pl)0
-rw-r--r--lib/unicode/In/72.pl (renamed from lib/unicode/In/YiRadicals.pl)0
-rw-r--r--lib/unicode/In/73.pl (renamed from lib/unicode/In/HangulSyllables.pl)0
-rw-r--r--lib/unicode/In/74.pl (renamed from lib/unicode/In/HighSurrogates.pl)0
-rw-r--r--lib/unicode/In/75.pl (renamed from lib/unicode/In/HighPrivateUseSurrogates.pl)0
-rw-r--r--lib/unicode/In/76.pl (renamed from lib/unicode/In/LowSurrogates.pl)0
-rw-r--r--lib/unicode/In/77.pl6
-rw-r--r--lib/unicode/In/78.pl (renamed from lib/unicode/In/CJKCompatibilityIdeographs.pl)0
-rw-r--r--lib/unicode/In/79.pl (renamed from lib/unicode/In/AlphabeticPresentationForms.pl)0
-rw-r--r--lib/unicode/In/8.pl (renamed from lib/unicode/In/Cyrillic.pl)0
-rw-r--r--lib/unicode/In/80.pl (renamed from lib/unicode/In/ArabicPresentationForms-A.pl)0
-rw-r--r--lib/unicode/In/81.pl (renamed from lib/unicode/In/CombiningHalfMarks.pl)0
-rw-r--r--lib/unicode/In/82.pl (renamed from lib/unicode/In/CJKCompatibilityForms.pl)0
-rw-r--r--lib/unicode/In/83.pl (renamed from lib/unicode/In/SmallFormVariants.pl)0
-rw-r--r--lib/unicode/In/84.pl (renamed from lib/unicode/In/ArabicPresentationForms-B.pl)0
-rw-r--r--lib/unicode/In/85.pl (renamed from lib/unicode/In/Specials.pl)0
-rw-r--r--lib/unicode/In/86.pl (renamed from lib/unicode/In/HalfwidthandFullwidthForms.pl)0
-rw-r--r--lib/unicode/In/87.pl (renamed from lib/unicode/Block.pl)1
-rw-r--r--lib/unicode/In/88.pl6
-rw-r--r--lib/unicode/In/89.pl6
-rw-r--r--lib/unicode/In/9.pl (renamed from lib/unicode/In/Armenian.pl)0
-rw-r--r--lib/unicode/In/90.pl6
-rw-r--r--lib/unicode/In/91.pl6
-rw-r--r--lib/unicode/In/92.pl6
-rw-r--r--lib/unicode/In/93.pl6
-rw-r--r--lib/unicode/In/94.pl6
-rw-r--r--lib/unicode/In/95.pl6
-rw-r--r--lib/unicode/In/PrivateUse.pl6
-rwxr-xr-xlib/unicode/mktables.PL51
-rw-r--r--lib/utf8_heavy.pl11
-rw-r--r--objXSUB.h12
-rw-r--r--perl.h6
-rw-r--r--pod/perldiag.pod11
-rw-r--r--pod/perlunicode.pod9
-rw-r--r--proto.h2
-rw-r--r--regcomp.c103
-rw-r--r--regexec.c65
-rw-r--r--t/lib/b-deparse.t4
-rw-r--r--t/lib/selfstubber.t4
-rwxr-xr-xt/op/pat.t114
-rw-r--r--utf8.c71
-rw-r--r--win32/Makefile87
121 files changed, 921 insertions, 309 deletions
diff --git a/INSTALL b/INSTALL
index 4f5c039861..39d28d6341 100644
--- a/INSTALL
+++ b/INSTALL
@@ -896,8 +896,8 @@ You can elect to build a shared libperl by
To build a shared libperl, the environment variable controlling shared
library search (LD_LIBRARY_PATH in most systems, DYLD_LIBRARY_PATH for
-NeXTSTEP/OPENSTEP/Darwin, LIBRARY_PATH for BeOS, SHLIB_PATH for
-HP-UX, LIBPATH for AIX, PATH for Cygwin) must be set up to include
+NeXTSTEP/OPENSTEP/Darwin, LIBRARY_PATH for BeOS, LD_LIBRARY_PATH/SHLIB_PATH
+for HP-UX, LIBPATH for AIX, PATH for Cygwin) must be set up to include
the Perl build directory because that's where the shared libperl will
be created. Configure arranges makefile to have the correct shared
library search settings.
diff --git a/MANIFEST b/MANIFEST
index 33e69ebb30..988302e628 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -888,7 +888,7 @@ lib/unicode/ArabLnkGrp.pl Unicode character database
lib/unicode/ArabShap.txt Unicode character database
lib/unicode/BidiMirr.txt Unicode character database
lib/unicode/Bidirectional.pl Unicode character database
-lib/unicode/Block.pl Unicode character database
+lib/unicode/Blocks.pl Unicode character database
lib/unicode/Blocks.txt Unicode character database
lib/unicode/CaseFold.txt Unicode character database
lib/unicode/Category.pl Unicode character database
@@ -896,93 +896,103 @@ lib/unicode/CombiningClass.pl Unicode character database
lib/unicode/CompExcl.txt Unicode character database
lib/unicode/Decomposition.pl Unicode character database
lib/unicode/EAWidth.txt Unicode character database
-lib/unicode/In/AlphabeticPresentationForms.pl Unicode character database
-lib/unicode/In/Arabic.pl Unicode character database
-lib/unicode/In/ArabicPresentationForms-A.pl Unicode character database
-lib/unicode/In/ArabicPresentationForms-B.pl Unicode character database
-lib/unicode/In/Armenian.pl Unicode character database
-lib/unicode/In/Arrows.pl Unicode character database
-lib/unicode/In/BasicLatin.pl Unicode character database
-lib/unicode/In/Bengali.pl Unicode character database
-lib/unicode/In/BlockElements.pl Unicode character database
-lib/unicode/In/Bopomofo.pl Unicode character database
-lib/unicode/In/BopomofoExtended.pl Unicode character database
-lib/unicode/In/BoxDrawing.pl Unicode character database
-lib/unicode/In/BraillePatterns.pl Unicode character database
-lib/unicode/In/CJKCompatibility.pl Unicode character database
-lib/unicode/In/CJKCompatibilityForms.pl Unicode character database
-lib/unicode/In/CJKCompatibilityIdeographs.pl Unicode character database
-lib/unicode/In/CJKRadicalsSupplement.pl Unicode character database
-lib/unicode/In/CJKSymbolsandPunctuation.pl Unicode character database
-lib/unicode/In/CJKUnifiedIdeographs.pl Unicode character database
-lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl Unicode character database
-lib/unicode/In/Cherokee.pl Unicode character database
-lib/unicode/In/CombiningDiacriticalMarks.pl Unicode character database
-lib/unicode/In/CombiningHalfMarks.pl Unicode character database
-lib/unicode/In/CombiningMarksforSymbols.pl Unicode character database
-lib/unicode/In/ControlPictures.pl Unicode character database
-lib/unicode/In/CurrencySymbols.pl Unicode character database
-lib/unicode/In/Cyrillic.pl Unicode character database
-lib/unicode/In/Devanagari.pl Unicode character database
-lib/unicode/In/Dingbats.pl Unicode character database
-lib/unicode/In/EnclosedAlphanumerics.pl Unicode character database
-lib/unicode/In/EnclosedCJKLettersandMonths.pl Unicode character database
-lib/unicode/In/Ethiopic.pl Unicode character database
-lib/unicode/In/GeneralPunctuation.pl Unicode character database
-lib/unicode/In/GeometricShapes.pl Unicode character database
-lib/unicode/In/Georgian.pl Unicode character database
-lib/unicode/In/Greek.pl Unicode character database
-lib/unicode/In/GreekExtended.pl Unicode character database
-lib/unicode/In/Gujarati.pl Unicode character database
-lib/unicode/In/Gurmukhi.pl Unicode character database
-lib/unicode/In/HalfwidthandFullwidthForms.pl Unicode character database
-lib/unicode/In/HangulCompatibilityJamo.pl Unicode character database
-lib/unicode/In/HangulJamo.pl Unicode character database
-lib/unicode/In/HangulSyllables.pl Unicode character database
-lib/unicode/In/Hebrew.pl Unicode character database
-lib/unicode/In/HighPrivateUseSurrogates.pl Unicode character database
-lib/unicode/In/HighSurrogates.pl Unicode character database
-lib/unicode/In/Hiragana.pl Unicode character database
-lib/unicode/In/IPAExtensions.pl Unicode character database
-lib/unicode/In/IdeographicDescriptionCharacters.pl Unicode character database
-lib/unicode/In/Kanbun.pl Unicode character database
-lib/unicode/In/KangxiRadicals.pl Unicode character database
-lib/unicode/In/Kannada.pl Unicode character database
-lib/unicode/In/Katakana.pl Unicode character database
-lib/unicode/In/Khmer.pl Unicode character database
-lib/unicode/In/Lao.pl Unicode character database
-lib/unicode/In/Latin-1Supplement.pl Unicode character database
-lib/unicode/In/LatinExtended-A.pl Unicode character database
-lib/unicode/In/LatinExtended-B.pl Unicode character database
-lib/unicode/In/LatinExtendedAdditional.pl Unicode character database
-lib/unicode/In/LetterlikeSymbols.pl Unicode character database
-lib/unicode/In/LowSurrogates.pl Unicode character database
-lib/unicode/In/Malayalam.pl Unicode character database
-lib/unicode/In/MathematicalOperators.pl Unicode character database
-lib/unicode/In/MiscellaneousSymbols.pl Unicode character database
-lib/unicode/In/MiscellaneousTechnical.pl Unicode character database
-lib/unicode/In/Mongolian.pl Unicode character database
-lib/unicode/In/Myanmar.pl Unicode character database
-lib/unicode/In/NumberForms.pl Unicode character database
-lib/unicode/In/Ogham.pl Unicode character database
-lib/unicode/In/OpticalCharacterRecognition.pl Unicode character database
-lib/unicode/In/Oriya.pl Unicode character database
-lib/unicode/In/PrivateUse.pl Unicode character database
-lib/unicode/In/Runic.pl Unicode character database
-lib/unicode/In/Sinhala.pl Unicode character database
-lib/unicode/In/SmallFormVariants.pl Unicode character database
-lib/unicode/In/SpacingModifierLetters.pl Unicode character database
-lib/unicode/In/Specials.pl Unicode character database
-lib/unicode/In/SuperscriptsandSubscripts.pl Unicode character database
-lib/unicode/In/Syriac.pl Unicode character database
-lib/unicode/In/Tamil.pl Unicode character database
-lib/unicode/In/Telugu.pl Unicode character database
-lib/unicode/In/Thaana.pl Unicode character database
-lib/unicode/In/Thai.pl Unicode character database
-lib/unicode/In/Tibetan.pl Unicode character database
-lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl Unicode character database
-lib/unicode/In/YiRadicals.pl Unicode character database
-lib/unicode/In/YiSyllables.pl Unicode character database
+lib/unicode/In.pl Unicode character database
+lib/unicode/In/0.pl Unicode character database
+lib/unicode/In/1.pl Unicode character database
+lib/unicode/In/2.pl Unicode character database
+lib/unicode/In/3.pl Unicode character database
+lib/unicode/In/4.pl Unicode character database
+lib/unicode/In/5.pl Unicode character database
+lib/unicode/In/6.pl Unicode character database
+lib/unicode/In/7.pl Unicode character database
+lib/unicode/In/8.pl Unicode character database
+lib/unicode/In/9.pl Unicode character database
+lib/unicode/In/10.pl Unicode character database
+lib/unicode/In/11.pl Unicode character database
+lib/unicode/In/12.pl Unicode character database
+lib/unicode/In/13.pl Unicode character database
+lib/unicode/In/14.pl Unicode character database
+lib/unicode/In/15.pl Unicode character database
+lib/unicode/In/16.pl Unicode character database
+lib/unicode/In/17.pl Unicode character database
+lib/unicode/In/18.pl Unicode character database
+lib/unicode/In/19.pl Unicode character database
+lib/unicode/In/20.pl Unicode character database
+lib/unicode/In/21.pl Unicode character database
+lib/unicode/In/22.pl Unicode character database
+lib/unicode/In/23.pl Unicode character database
+lib/unicode/In/24.pl Unicode character database
+lib/unicode/In/25.pl Unicode character database
+lib/unicode/In/26.pl Unicode character database
+lib/unicode/In/27.pl Unicode character database
+lib/unicode/In/28.pl Unicode character database
+lib/unicode/In/29.pl Unicode character database
+lib/unicode/In/30.pl Unicode character database
+lib/unicode/In/31.pl Unicode character database
+lib/unicode/In/32.pl Unicode character database
+lib/unicode/In/33.pl Unicode character database
+lib/unicode/In/34.pl Unicode character database
+lib/unicode/In/35.pl Unicode character database
+lib/unicode/In/36.pl Unicode character database
+lib/unicode/In/37.pl Unicode character database
+lib/unicode/In/38.pl Unicode character database
+lib/unicode/In/39.pl Unicode character database
+lib/unicode/In/40.pl Unicode character database
+lib/unicode/In/41.pl Unicode character database
+lib/unicode/In/42.pl Unicode character database
+lib/unicode/In/43.pl Unicode character database
+lib/unicode/In/44.pl Unicode character database
+lib/unicode/In/45.pl Unicode character database
+lib/unicode/In/46.pl Unicode character database
+lib/unicode/In/47.pl Unicode character database
+lib/unicode/In/48.pl Unicode character database
+lib/unicode/In/49.pl Unicode character database
+lib/unicode/In/50.pl Unicode character database
+lib/unicode/In/51.pl Unicode character database
+lib/unicode/In/52.pl Unicode character database
+lib/unicode/In/53.pl Unicode character database
+lib/unicode/In/54.pl Unicode character database
+lib/unicode/In/55.pl Unicode character database
+lib/unicode/In/56.pl Unicode character database
+lib/unicode/In/57.pl Unicode character database
+lib/unicode/In/58.pl Unicode character database
+lib/unicode/In/59.pl Unicode character database
+lib/unicode/In/60.pl Unicode character database
+lib/unicode/In/61.pl Unicode character database
+lib/unicode/In/62.pl Unicode character database
+lib/unicode/In/63.pl Unicode character database
+lib/unicode/In/64.pl Unicode character database
+lib/unicode/In/65.pl Unicode character database
+lib/unicode/In/66.pl Unicode character database
+lib/unicode/In/67.pl Unicode character database
+lib/unicode/In/68.pl Unicode character database
+lib/unicode/In/69.pl Unicode character database
+lib/unicode/In/70.pl Unicode character database
+lib/unicode/In/71.pl Unicode character database
+lib/unicode/In/72.pl Unicode character database
+lib/unicode/In/73.pl Unicode character database
+lib/unicode/In/74.pl Unicode character database
+lib/unicode/In/75.pl Unicode character database
+lib/unicode/In/76.pl Unicode character database
+lib/unicode/In/77.pl Unicode character database
+lib/unicode/In/78.pl Unicode character database
+lib/unicode/In/79.pl Unicode character database
+lib/unicode/In/80.pl Unicode character database
+lib/unicode/In/81.pl Unicode character database
+lib/unicode/In/82.pl Unicode character database
+lib/unicode/In/83.pl Unicode character database
+lib/unicode/In/84.pl Unicode character database
+lib/unicode/In/85.pl Unicode character database
+lib/unicode/In/86.pl Unicode character database
+lib/unicode/In/87.pl Unicode character database
+lib/unicode/In/88.pl Unicode character database
+lib/unicode/In/89.pl Unicode character database
+lib/unicode/In/90.pl Unicode character database
+lib/unicode/In/91.pl Unicode character database
+lib/unicode/In/92.pl Unicode character database
+lib/unicode/In/93.pl Unicode character database
+lib/unicode/In/94.pl Unicode character database
+lib/unicode/In/95.pl Unicode character database
lib/unicode/Index.txt Unicode character database
lib/unicode/Is/ASCII.pl Unicode character database
lib/unicode/Is/Alnum.pl Unicode character database
diff --git a/doop.c b/doop.c
index d7baecc792..755cbfd16a 100644
--- a/doop.c
+++ b/doop.c
@@ -344,7 +344,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
}
while (s < send) {
- if ((uv = swash_fetch(rv, s)) < none) {
+ if ((uv = swash_fetch(rv, s, TRUE)) < none) {
s += UTF8SKIP(s);
matches++;
d = uvuni_to_utf8(d, uv);
@@ -423,7 +423,7 @@ S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
send = s + len;
while (s < send) {
- if ((uv = swash_fetch(rv, s)) < none || uv == extra)
+ if ((uv = swash_fetch(rv, s, TRUE)) < none || uv == extra)
matches++;
s += UTF8SKIP(s);
}
@@ -491,7 +491,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
if (squash) {
UV puv = 0xfeedface;
while (s < send) {
- uv = swash_fetch(rv, s);
+ uv = swash_fetch(rv, s, TRUE);
if (d > dend) {
STRLEN clen = d - dstart;
@@ -546,7 +546,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
}
else {
while (s < send) {
- uv = swash_fetch(rv, s);
+ uv = swash_fetch(rv, s, TRUE);
if (d > dend) {
STRLEN clen = d - dstart;
STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
diff --git a/embed.h b/embed.h
index 887e9eb66d..dd5d658969 100644
--- a/embed.h
+++ b/embed.h
@@ -2216,7 +2216,7 @@
#define sv_vsetpvfn(a,b,c,d,e,f,g) Perl_sv_vsetpvfn(aTHX_ a,b,c,d,e,f,g)
#define str_to_version(a) Perl_str_to_version(aTHX_ a)
#define swash_init(a,b,c,d,e) Perl_swash_init(aTHX_ a,b,c,d,e)
-#define swash_fetch(a,b) Perl_swash_fetch(aTHX_ a,b)
+#define swash_fetch(a,b,c) Perl_swash_fetch(aTHX_ a,b,c)
#define taint_env() Perl_taint_env(aTHX)
#define taint_proper(a,b) Perl_taint_proper(aTHX_ a,b)
#define to_utf8_lower(a) Perl_to_utf8_lower(aTHX_ a)
diff --git a/embed.pl b/embed.pl
index 9e272b8572..fcaaaed90a 100755
--- a/embed.pl
+++ b/embed.pl
@@ -2089,7 +2089,7 @@ Apd |void |sv_vsetpvfn |SV* sv|const char* pat|STRLEN patlen \
Ap |NV |str_to_version |SV *sv
Ap |SV* |swash_init |char* pkg|char* name|SV* listsv \
|I32 minbits|I32 none
-Ap |UV |swash_fetch |SV *sv|U8 *ptr
+Ap |UV |swash_fetch |SV *sv|U8 *ptr|bool do_utf8
Ap |void |taint_env
Ap |void |taint_proper |const char* f|const char* s
Ap |UV |to_utf8_lower |U8 *p
diff --git a/ext/B/B/Deparse.pm b/ext/B/B/Deparse.pm
index e8ebb39774..7e57a58b51 100644
--- a/ext/B/B/Deparse.pm
+++ b/ext/B/B/Deparse.pm
@@ -1024,7 +1024,22 @@ sub scopeop {
}
}
-sub pp_scope { scopeop(0, @_); }
+sub invoker {
+ my $caller = (caller(2))[3];
+ if ($caller eq "B::Deparse::deparse") {
+ return (caller(3))[3];
+ }
+ else {
+ return $caller;
+ }
+}
+
+sub pp_scope {
+ my ($self, $op, $cx) = @_;
+ my $body = scopeop(0, @_);
+ return $body if $cx > 0 || invoker() ne "B::Deparse::lineseq";
+ return "do {\n\t$body\n\b};";
+}
sub pp_lineseq { scopeop(0, @_); }
sub pp_leave { scopeop(1, @_); }
@@ -2347,6 +2362,8 @@ sub pp_null {
return $self->maybe_parens($self->deparse($op->first, 20) . " =~ "
. $self->deparse($op->first->sibling, 20),
$cx, 20);
+ } elsif ($op->flags & OPf_SPECIAL && $cx == 0 && !$op->targ) {
+ return "do {\n\t". $self->deparse($op->first, $cx) ."\n\b};";
} else {
return $self->deparse($op->first, $cx);
}
diff --git a/ext/IO/lib/IO/Seekable.pm b/ext/IO/lib/IO/Seekable.pm
index d3dfa1e697..95dd4d0c36 100644
--- a/ext/IO/lib/IO/Seekable.pm
+++ b/ext/IO/lib/IO/Seekable.pm
@@ -41,7 +41,7 @@ corresponding built-in functions:
=over 4
-=item $io->setpos ( POS, WHENCE )
+=item $io->seek ( POS, WHENCE )
Seek the IO::File to position POS, relative to WHENCE:
@@ -55,7 +55,7 @@ POS is absolute position. (Seek relative to the start of the file)
POS is an offset from the current position. (Seek relative to current)
-=item WHENCE=1 (SEEK_END)
+=item WHENCE=2 (SEEK_END)
POS is an offset from the end of the file. (Seek relative to end)
diff --git a/hints/hpux.sh b/hints/hpux.sh
index 8623715654..da481dac38 100644
--- a/hints/hpux.sh
+++ b/hints/hpux.sh
@@ -37,24 +37,12 @@ echo "Archname is $archname"
### HP-UX OS specific behaviour
-case "$ccflags" in
-'') cc_cppflags='' ;;
-*) set `echo " $ccflags " | sed -e 's/ -A[ea] / /g' -e 's/ -D_HPUX_SOURCE / /'`
- cc_cppflags="$* -D_HPUX_SOURCE"
- ;;
-esac
-ccflags="-Ae $cc_cppflags"
-cppflags="-Aa -D__STDC_EXT__ $cc_cppflags"
-
-case "$prefix" in
- "") prefix='/opt/perl5' ;;
- esac
-
# -ldbm is obsolete and should not be used
# -lBSD contains BSD-style duplicates of SVR4 routines that cause confusion
# -lPW is obsolete and should not be used
# The libraries crypt, malloc, ndir, and net are empty.
-set `echo " $libswanted " | sed -e 's/ ld / /' -e 's/ dbm / /' -e 's/ BSD / /' -e 's/ PW / /'`
+set `echo "X $libswanted " | sed -e 's/ ld / /' -e 's/ dbm / /' -e 's/ BSD / /' -e 's/ PW / /'`
+shift
libswanted="$*"
# By setting the deferred flag below, this means that if you run perl
@@ -71,17 +59,30 @@ libswanted="$*"
ccdlflags="-Wl,-E -Wl,-B,deferred $ccdlflags"
cc=${cc:-cc}
+ar=/usr/bin/ar # Yes, truly override. We do not want the GNU ar.
+full_ar=$ar # I repeat, no GNU ar. arrr.
-ar=/usr/bin/ar # Yes, truly override. We do not want the GNU ar.
-full_ar=$ar # I repeat, no GNU ar. arrr.
+set `echo "X $ccflags " | sed -e 's/ -A[ea] / /' -e 's/ -D_HPUX_SOURCE / /'`
+shift
+ cc_cppflags="$* -D_HPUX_SOURCE"
+cppflags="-Aa -D__STDC_EXT__ $cc_cppflags"
+
+case "$prefix" in
+ "") prefix='/opt/perl5' ;;
+ esac
case `$cc -v 2>&1`"" in
- *gcc*) ccisgcc="$define" ;;
- *) ccisgcc=''
- ccversion=`which cc | xargs what | awk '/Compiler/{print $2}'`
+ *gcc*) ccisgcc="$define"
+ ccflags="$cc_cppflags"
case "`getconf KERNEL_BITS 2>/dev/null`" in
- *64*) ldflags="$ldflags -Wl,+vnocompatwarnings" ;;
+ *64*) ldflags="$ldflags -Wl,+vnocompatwarnings"
+ ccflags="$ccflags -Wl,+vnocompatwarnings -Wa,+DA2.0"
+ ;;
esac
+ ;;
+ *) ccisgcc=''
+ ccversion=`which cc | xargs what | awk '/Compiler/{print $2}'`
+ ccflags="-Ae $cc_cppflags"
case "$d_casti32" in
"") d_casti32='undef' ;;
esac
@@ -175,37 +176,54 @@ case "$ccisgcc" in
$define|true|[Yy])
case "$optimize" in
- "") optimize="-g -O" ;;
+ "") optimize="-g -O" ;;
+ *O[3456789]*) optimize=`echo "$optimize" | sed -e 's/O[3-9]/O2/'` ;;
esac
- ld="$cc"
+ #ld="$cc"
+ ld="/usr/bin/ld"
cccdlflags='-fPIC'
- lddlflags='-shared'
+ #lddlflags='-shared'
+ lddlflags='-b +vnocompatwarnings'
+ case "$optimize" in
+ *-g*-O*|*-O*-g*)
+ # gcc without gas will not accept -g
+ echo "main(){}">try.c
+ case "`$cc $optimize -c try.c 2>&1`" in
+ *"-g option disabled"*)
+ set `echo "X $optimize " | sed -e 's/ -g / /'`
+ shift
+ optimize="$*"
+ ;;
+ esac
+ ;;
+ esac
;;
*) # HP's compiler cannot combine -g and -O
case "$optimize" in
- "") optimize="-O" ;;
+ "") optimize="-O" ;;
+ *O[3456789]*) optimize=`echo "$optimize" | sed -e 's/O[3-9]/O2/'` ;;
esac
ld=/usr/bin/ld
cccdlflags='+Z'
- lddlflags='-b'
+ lddlflags='-b +vnocompatwarnings'
;;
esac
## LARGEFILES
-case "$uselargefiles-$ccisgcc" in
- "$define-$define"|'-define')
- cat <<EOM >&4
-
-*** I'm ignoring large files for this build because
-*** I don't know how to do use large files in HP-UX using gcc.
-
-EOM
- uselargefiles="$undef"
- ;;
- esac
+#case "$uselargefiles-$ccisgcc" in
+# "$define-$define"|'-define')
+# cat <<EOM >&4
+#
+#*** I'm ignoring large files for this build because
+#*** I don't know how to do use large files in HP-UX using gcc.
+#
+#EOM
+# uselargefiles="$undef"
+# ;;
+# esac
cat >UU/uselargefiles.cbu <<'EOCBU'
# This script UU/uselargefiles.cbu will get 'called-back' by Configure
diff --git a/lib/unicode/Blocks.pl b/lib/unicode/Blocks.pl
new file mode 100644
index 0000000000..ef60058ba3
--- /dev/null
+++ b/lib/unicode/Blocks.pl
@@ -0,0 +1,203 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+0000 007F Basic Latin
+# In/0.pl BasicLatin
+0080 00FF Latin-1 Supplement
+# In/1.pl Latin1Supplement
+0100 017F Latin Extended-A
+# In/2.pl LatinExtendedA
+0180 024F Latin Extended-B
+# In/3.pl LatinExtendedB
+0250 02AF IPA Extensions
+# In/4.pl IPAExtensions
+02B0 02FF Spacing Modifier Letters
+# In/5.pl SpacingModifierLetters
+0300 036F Combining Diacritical Marks
+# In/6.pl CombiningDiacriticalMarks
+0370 03FF Greek
+# In/7.pl Greek
+0400 04FF Cyrillic
+# In/8.pl Cyrillic
+0530 058F Armenian
+# In/9.pl Armenian
+0590 05FF Hebrew
+# In/10.pl Hebrew
+0600 06FF Arabic
+# In/11.pl Arabic
+0700 074F Syriac
+# In/12.pl Syriac
+0780 07BF Thaana
+# In/13.pl Thaana
+0900 097F Devanagari
+# In/14.pl Devanagari
+0980 09FF Bengali
+# In/15.pl Bengali
+0A00 0A7F Gurmukhi
+# In/16.pl Gurmukhi
+0A80 0AFF Gujarati
+# In/17.pl Gujarati
+0B00 0B7F Oriya
+# In/18.pl Oriya
+0B80 0BFF Tamil
+# In/19.pl Tamil
+0C00 0C7F Telugu
+# In/20.pl Telugu
+0C80 0CFF Kannada
+# In/21.pl Kannada
+0D00 0D7F Malayalam
+# In/22.pl Malayalam
+0D80 0DFF Sinhala
+# In/23.pl Sinhala
+0E00 0E7F Thai
+# In/24.pl Thai
+0E80 0EFF Lao
+# In/25.pl Lao
+0F00 0FFF Tibetan
+# In/26.pl Tibetan
+1000 109F Myanmar
+# In/27.pl Myanmar
+10A0 10FF Georgian
+# In/28.pl Georgian
+1100 11FF Hangul Jamo
+# In/29.pl HangulJamo
+1200 137F Ethiopic
+# In/30.pl Ethiopic
+13A0 13FF Cherokee
+# In/31.pl Cherokee
+1400 167F Unified Canadian Aboriginal Syllabics
+# In/32.pl UnifiedCanadianAboriginalSyllabics
+1680 169F Ogham
+# In/33.pl Ogham
+16A0 16FF Runic
+# In/34.pl Runic
+1780 17FF Khmer
+# In/35.pl Khmer
+1800 18AF Mongolian
+# In/36.pl Mongolian
+1E00 1EFF Latin Extended Additional
+# In/37.pl LatinExtendedAdditional
+1F00 1FFF Greek Extended
+# In/38.pl GreekExtended
+2000 206F General Punctuation
+# In/39.pl GeneralPunctuation
+2070 209F Superscripts and Subscripts
+# In/40.pl SuperscriptsandSubscripts
+20A0 20CF Currency Symbols
+# In/41.pl CurrencySymbols
+20D0 20FF Combining Marks for Symbols
+# In/42.pl CombiningMarksforSymbols
+2100 214F Letterlike Symbols
+# In/43.pl LetterlikeSymbols
+2150 218F Number Forms
+# In/44.pl NumberForms
+2190 21FF Arrows
+# In/45.pl Arrows
+2200 22FF Mathematical Operators
+# In/46.pl MathematicalOperators
+2300 23FF Miscellaneous Technical
+# In/47.pl MiscellaneousTechnical
+2400 243F Control Pictures
+# In/48.pl ControlPictures
+2440 245F Optical Character Recognition
+# In/49.pl OpticalCharacterRecognition
+2460 24FF Enclosed Alphanumerics
+# In/50.pl EnclosedAlphanumerics
+2500 257F Box Drawing
+# In/51.pl BoxDrawing
+2580 259F Block Elements
+# In/52.pl BlockElements
+25A0 25FF Geometric Shapes
+# In/53.pl GeometricShapes
+2600 26FF Miscellaneous Symbols
+# In/54.pl MiscellaneousSymbols
+2700 27BF Dingbats
+# In/55.pl Dingbats
+2800 28FF Braille Patterns
+# In/56.pl BraillePatterns
+2E80 2EFF CJK Radicals Supplement
+# In/57.pl CJKRadicalsSupplement
+2F00 2FDF Kangxi Radicals
+# In/58.pl KangxiRadicals
+2FF0 2FFF Ideographic Description Characters
+# In/59.pl IdeographicDescriptionCharacters
+3000 303F CJK Symbols and Punctuation
+# In/60.pl CJKSymbolsandPunctuation
+3040 309F Hiragana
+# In/61.pl Hiragana
+30A0 30FF Katakana
+# In/62.pl Katakana
+3100 312F Bopomofo
+# In/63.pl Bopomofo
+3130 318F Hangul Compatibility Jamo
+# In/64.pl HangulCompatibilityJamo
+3190 319F Kanbun
+# In/65.pl Kanbun
+31A0 31BF Bopomofo Extended
+# In/66.pl BopomofoExtended
+3200 32FF Enclosed CJK Letters and Months
+# In/67.pl EnclosedCJKLettersandMonths
+3300 33FF CJK Compatibility
+# In/68.pl CJKCompatibility
+3400 4DB5 CJK Unified Ideographs Extension A
+# In/69.pl CJKUnifiedIdeographsExtensionA
+4E00 9FFF CJK Unified Ideographs
+# In/70.pl CJKUnifiedIdeographs
+A000 A48F Yi Syllables
+# In/71.pl YiSyllables
+A490 A4CF Yi Radicals
+# In/72.pl YiRadicals
+AC00 D7A3 Hangul Syllables
+# In/73.pl HangulSyllables
+D800 DB7F High Surrogates
+# In/74.pl HighSurrogates
+DB80 DBFF High Private Use Surrogates
+# In/75.pl HighPrivateUseSurrogates
+DC00 DFFF Low Surrogates
+# In/76.pl LowSurrogates
+E000 F8FF Private Use
+# In/77.pl PrivateUse
+F900 FAFF CJK Compatibility Ideographs
+# In/78.pl CJKCompatibilityIdeographs
+FB00 FB4F Alphabetic Presentation Forms
+# In/79.pl AlphabeticPresentationForms
+FB50 FDFF Arabic Presentation Forms-A
+# In/80.pl ArabicPresentationFormsA
+FE20 FE2F Combining Half Marks
+# In/81.pl CombiningHalfMarks
+FE30 FE4F CJK Compatibility Forms
+# In/82.pl CJKCompatibilityForms
+FE50 FE6F Small Form Variants
+# In/83.pl SmallFormVariants
+FE70 FEFE Arabic Presentation Forms-B
+# In/84.pl ArabicPresentationFormsB
+FEFF FEFF Specials
+# In/85.pl Specials
+FF00 FFEF Halfwidth and Fullwidth Forms
+# In/86.pl HalfwidthandFullwidthForms
+FFF0 FFFD Specials
+# In/85.pl Specials
+10300 1032F Old Italic
+# In/87.pl OldItalic
+10330 1034F Gothic
+# In/88.pl Gothic
+10400 1044F Deseret
+# In/89.pl Deseret
+1D000 1D0FF Byzantine Musical Symbols
+# In/90.pl ByzantineMusicalSymbols
+1D100 1D1FF Musical Symbols
+# In/91.pl MusicalSymbols
+1D400 1D7FF Mathematical Alphanumeric Symbols
+# In/92.pl MathematicalAlphanumericSymbols
+20000 2A6D6 CJK Unified Ideographs Extension B
+# In/93.pl CJKUnifiedIdeographsExtensionB
+2F800 2FA1F CJK Compatibility Ideographs Supplement
+# In/94.pl CJKCompatibilityIdeographsSupplement
+E0000 E007F Tags
+# In/95.pl Tags
+F0000 FFFFD Private Use
+# In/77.pl PrivateUse
+100000 10FFFD Private Use
+# In/77.pl PrivateUse
+END
diff --git a/lib/unicode/In.pl b/lib/unicode/In.pl
new file mode 100644
index 0000000000..e0b7a5a5f8
--- /dev/null
+++ b/lib/unicode/In.pl
@@ -0,0 +1,101 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+%utf8::In = (
+'BasicLatin' => 0,
+'Latin1Supplement' => 1,
+'LatinExtendedA' => 2,
+'LatinExtendedB' => 3,
+'IPAExtensions' => 4,
+'SpacingModifierLetters' => 5,
+'CombiningDiacriticalMarks' => 6,
+'Greek' => 7,
+'Cyrillic' => 8,
+'Armenian' => 9,
+'Hebrew' => 10,
+'Arabic' => 11,
+'Syriac' => 12,
+'Thaana' => 13,
+'Devanagari' => 14,
+'Bengali' => 15,
+'Gurmukhi' => 16,
+'Gujarati' => 17,
+'Oriya' => 18,
+'Tamil' => 19,
+'Telugu' => 20,
+'Kannada' => 21,
+'Malayalam' => 22,
+'Sinhala' => 23,
+'Thai' => 24,
+'Lao' => 25,
+'Tibetan' => 26,
+'Myanmar' => 27,
+'Georgian' => 28,
+'HangulJamo' => 29,
+'Ethiopic' => 30,
+'Cherokee' => 31,
+'UnifiedCanadianAboriginalSyllabics' => 32,
+'Ogham' => 33,
+'Runic' => 34,
+'Khmer' => 35,
+'Mongolian' => 36,
+'LatinExtendedAdditional' => 37,
+'GreekExtended' => 38,
+'GeneralPunctuation' => 39,
+'SuperscriptsandSubscripts' => 40,
+'CurrencySymbols' => 41,
+'CombiningMarksforSymbols' => 42,
+'LetterlikeSymbols' => 43,
+'NumberForms' => 44,
+'Arrows' => 45,
+'MathematicalOperators' => 46,
+'MiscellaneousTechnical' => 47,
+'ControlPictures' => 48,
+'OpticalCharacterRecognition' => 49,
+'EnclosedAlphanumerics' => 50,
+'BoxDrawing' => 51,
+'BlockElements' => 52,
+'GeometricShapes' => 53,
+'MiscellaneousSymbols' => 54,
+'Dingbats' => 55,
+'BraillePatterns' => 56,
+'CJKRadicalsSupplement' => 57,
+'KangxiRadicals' => 58,
+'IdeographicDescriptionCharacters' => 59,
+'CJKSymbolsandPunctuation' => 60,
+'Hiragana' => 61,
+'Katakana' => 62,
+'Bopomofo' => 63,
+'HangulCompatibilityJamo' => 64,
+'Kanbun' => 65,
+'BopomofoExtended' => 66,
+'EnclosedCJKLettersandMonths' => 67,
+'CJKCompatibility' => 68,
+'CJKUnifiedIdeographsExtensionA' => 69,
+'CJKUnifiedIdeographs' => 70,
+'YiSyllables' => 71,
+'YiRadicals' => 72,
+'HangulSyllables' => 73,
+'HighSurrogates' => 74,
+'HighPrivateUseSurrogates' => 75,
+'LowSurrogates' => 76,
+'PrivateUse' => 77,
+'CJKCompatibilityIdeographs' => 78,
+'AlphabeticPresentationForms' => 79,
+'ArabicPresentationFormsA' => 80,
+'CombiningHalfMarks' => 81,
+'CJKCompatibilityForms' => 82,
+'SmallFormVariants' => 83,
+'ArabicPresentationFormsB' => 84,
+'Specials' => 85,
+'HalfwidthandFullwidthForms' => 86,
+'OldItalic' => 87,
+'Gothic' => 88,
+'Deseret' => 89,
+'ByzantineMusicalSymbols' => 90,
+'MusicalSymbols' => 91,
+'MathematicalAlphanumericSymbols' => 92,
+'CJKUnifiedIdeographsExtensionB' => 93,
+'CJKCompatibilityIdeographsSupplement' => 94,
+'Tags' => 95,
+);
diff --git a/lib/unicode/In/BasicLatin.pl b/lib/unicode/In/0.pl
index 475c1dfed0..475c1dfed0 100644
--- a/lib/unicode/In/BasicLatin.pl
+++ b/lib/unicode/In/0.pl
diff --git a/lib/unicode/In/Latin-1Supplement.pl b/lib/unicode/In/1.pl
index 5a5aa0e18c..5a5aa0e18c 100644
--- a/lib/unicode/In/Latin-1Supplement.pl
+++ b/lib/unicode/In/1.pl
diff --git a/lib/unicode/In/Hebrew.pl b/lib/unicode/In/10.pl
index f1d866c049..f1d866c049 100644
--- a/lib/unicode/In/Hebrew.pl
+++ b/lib/unicode/In/10.pl
diff --git a/lib/unicode/In/Arabic.pl b/lib/unicode/In/11.pl
index 7546a743b5..7546a743b5 100644
--- a/lib/unicode/In/Arabic.pl
+++ b/lib/unicode/In/11.pl
diff --git a/lib/unicode/In/Syriac.pl b/lib/unicode/In/12.pl
index e5247ad937..e5247ad937 100644
--- a/lib/unicode/In/Syriac.pl
+++ b/lib/unicode/In/12.pl
diff --git a/lib/unicode/In/Thaana.pl b/lib/unicode/In/13.pl
index 5bda401f7b..5bda401f7b 100644
--- a/lib/unicode/In/Thaana.pl
+++ b/lib/unicode/In/13.pl
diff --git a/lib/unicode/In/Devanagari.pl b/lib/unicode/In/14.pl
index a20b68d031..a20b68d031 100644
--- a/lib/unicode/In/Devanagari.pl
+++ b/lib/unicode/In/14.pl
diff --git a/lib/unicode/In/Bengali.pl b/lib/unicode/In/15.pl
index 306f653dbc..306f653dbc 100644
--- a/lib/unicode/In/Bengali.pl
+++ b/lib/unicode/In/15.pl
diff --git a/lib/unicode/In/Gurmukhi.pl b/lib/unicode/In/16.pl
index d37d4849ca..d37d4849ca 100644
--- a/lib/unicode/In/Gurmukhi.pl
+++ b/lib/unicode/In/16.pl
diff --git a/lib/unicode/In/Gujarati.pl b/lib/unicode/In/17.pl
index 65d853b314..65d853b314 100644
--- a/lib/unicode/In/Gujarati.pl
+++ b/lib/unicode/In/17.pl
diff --git a/lib/unicode/In/Oriya.pl b/lib/unicode/In/18.pl
index 14e1027fb6..14e1027fb6 100644
--- a/lib/unicode/In/Oriya.pl
+++ b/lib/unicode/In/18.pl
diff --git a/lib/unicode/In/Tamil.pl b/lib/unicode/In/19.pl
index a28ba3d909..a28ba3d909 100644
--- a/lib/unicode/In/Tamil.pl
+++ b/lib/unicode/In/19.pl
diff --git a/lib/unicode/In/LatinExtended-A.pl b/lib/unicode/In/2.pl
index 0f6acf9853..0f6acf9853 100644
--- a/lib/unicode/In/LatinExtended-A.pl
+++ b/lib/unicode/In/2.pl
diff --git a/lib/unicode/In/Telugu.pl b/lib/unicode/In/20.pl
index aff6cc93f4..aff6cc93f4 100644
--- a/lib/unicode/In/Telugu.pl
+++ b/lib/unicode/In/20.pl
diff --git a/lib/unicode/In/Kannada.pl b/lib/unicode/In/21.pl
index 41e05bdc3b..41e05bdc3b 100644
--- a/lib/unicode/In/Kannada.pl
+++ b/lib/unicode/In/21.pl
diff --git a/lib/unicode/In/Malayalam.pl b/lib/unicode/In/22.pl
index b42bbeea8d..b42bbeea8d 100644
--- a/lib/unicode/In/Malayalam.pl
+++ b/lib/unicode/In/22.pl
diff --git a/lib/unicode/In/Sinhala.pl b/lib/unicode/In/23.pl
index 00da6d144f..00da6d144f 100644
--- a/lib/unicode/In/Sinhala.pl
+++ b/lib/unicode/In/23.pl
diff --git a/lib/unicode/In/Thai.pl b/lib/unicode/In/24.pl
index 2fa00eb135..2fa00eb135 100644
--- a/lib/unicode/In/Thai.pl
+++ b/lib/unicode/In/24.pl
diff --git a/lib/unicode/In/Lao.pl b/lib/unicode/In/25.pl
index 5fd607c08f..5fd607c08f 100644
--- a/lib/unicode/In/Lao.pl
+++ b/lib/unicode/In/25.pl
diff --git a/lib/unicode/In/Tibetan.pl b/lib/unicode/In/26.pl
index 3ae5e6248d..3ae5e6248d 100644
--- a/lib/unicode/In/Tibetan.pl
+++ b/lib/unicode/In/26.pl
diff --git a/lib/unicode/In/Myanmar.pl b/lib/unicode/In/27.pl
index ecc3448361..ecc3448361 100644
--- a/lib/unicode/In/Myanmar.pl
+++ b/lib/unicode/In/27.pl
diff --git a/lib/unicode/In/Georgian.pl b/lib/unicode/In/28.pl
index 73a8818c71..73a8818c71 100644
--- a/lib/unicode/In/Georgian.pl
+++ b/lib/unicode/In/28.pl
diff --git a/lib/unicode/In/HangulJamo.pl b/lib/unicode/In/29.pl
index 692be7d813..692be7d813 100644
--- a/lib/unicode/In/HangulJamo.pl
+++ b/lib/unicode/In/29.pl
diff --git a/lib/unicode/In/LatinExtended-B.pl b/lib/unicode/In/3.pl
index 68f093234e..68f093234e 100644
--- a/lib/unicode/In/LatinExtended-B.pl
+++ b/lib/unicode/In/3.pl
diff --git a/lib/unicode/In/Ethiopic.pl b/lib/unicode/In/30.pl
index ad4776df99..ad4776df99 100644
--- a/lib/unicode/In/Ethiopic.pl
+++ b/lib/unicode/In/30.pl
diff --git a/lib/unicode/In/Cherokee.pl b/lib/unicode/In/31.pl
index f40dfa2be0..f40dfa2be0 100644
--- a/lib/unicode/In/Cherokee.pl
+++ b/lib/unicode/In/31.pl
diff --git a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl b/lib/unicode/In/32.pl
index 7318008076..7318008076 100644
--- a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl
+++ b/lib/unicode/In/32.pl
diff --git a/lib/unicode/In/Ogham.pl b/lib/unicode/In/33.pl
index 5d7bd970e8..5d7bd970e8 100644
--- a/lib/unicode/In/Ogham.pl
+++ b/lib/unicode/In/33.pl
diff --git a/lib/unicode/In/Runic.pl b/lib/unicode/In/34.pl
index d404cb6cfb..d404cb6cfb 100644
--- a/lib/unicode/In/Runic.pl
+++ b/lib/unicode/In/34.pl
diff --git a/lib/unicode/In/Khmer.pl b/lib/unicode/In/35.pl
index 2b0b198216..2b0b198216 100644
--- a/lib/unicode/In/Khmer.pl
+++ b/lib/unicode/In/35.pl
diff --git a/lib/unicode/In/Mongolian.pl b/lib/unicode/In/36.pl
index 06526c64d9..06526c64d9 100644
--- a/lib/unicode/In/Mongolian.pl
+++ b/lib/unicode/In/36.pl
diff --git a/lib/unicode/In/LatinExtendedAdditional.pl b/lib/unicode/In/37.pl
index c288810ca2..c288810ca2 100644
--- a/lib/unicode/In/LatinExtendedAdditional.pl
+++ b/lib/unicode/In/37.pl
diff --git a/lib/unicode/In/GreekExtended.pl b/lib/unicode/In/38.pl
index 74cd2c88e0..74cd2c88e0 100644
--- a/lib/unicode/In/GreekExtended.pl
+++ b/lib/unicode/In/38.pl
diff --git a/lib/unicode/In/GeneralPunctuation.pl b/lib/unicode/In/39.pl
index b9b0e7efaa..b9b0e7efaa 100644
--- a/lib/unicode/In/GeneralPunctuation.pl
+++ b/lib/unicode/In/39.pl
diff --git a/lib/unicode/In/IPAExtensions.pl b/lib/unicode/In/4.pl
index f6e9454fe0..f6e9454fe0 100644
--- a/lib/unicode/In/IPAExtensions.pl
+++ b/lib/unicode/In/4.pl
diff --git a/lib/unicode/In/SuperscriptsandSubscripts.pl b/lib/unicode/In/40.pl
index 2e36ac331c..2e36ac331c 100644
--- a/lib/unicode/In/SuperscriptsandSubscripts.pl
+++ b/lib/unicode/In/40.pl
diff --git a/lib/unicode/In/CurrencySymbols.pl b/lib/unicode/In/41.pl
index 12c67371cc..12c67371cc 100644
--- a/lib/unicode/In/CurrencySymbols.pl
+++ b/lib/unicode/In/41.pl
diff --git a/lib/unicode/In/CombiningMarksforSymbols.pl b/lib/unicode/In/42.pl
index 2d58a56712..2d58a56712 100644
--- a/lib/unicode/In/CombiningMarksforSymbols.pl
+++ b/lib/unicode/In/42.pl
diff --git a/lib/unicode/In/LetterlikeSymbols.pl b/lib/unicode/In/43.pl
index c735821edc..c735821edc 100644
--- a/lib/unicode/In/LetterlikeSymbols.pl
+++ b/lib/unicode/In/43.pl
diff --git a/lib/unicode/In/NumberForms.pl b/lib/unicode/In/44.pl
index a1949a194d..a1949a194d 100644
--- a/lib/unicode/In/NumberForms.pl
+++ b/lib/unicode/In/44.pl
diff --git a/lib/unicode/In/Arrows.pl b/lib/unicode/In/45.pl
index 799f739085..799f739085 100644
--- a/lib/unicode/In/Arrows.pl
+++ b/lib/unicode/In/45.pl
diff --git a/lib/unicode/In/MathematicalOperators.pl b/lib/unicode/In/46.pl
index 8bc8295cc5..8bc8295cc5 100644
--- a/lib/unicode/In/MathematicalOperators.pl
+++ b/lib/unicode/In/46.pl
diff --git a/lib/unicode/In/MiscellaneousTechnical.pl b/lib/unicode/In/47.pl
index 67867951d6..67867951d6 100644
--- a/lib/unicode/In/MiscellaneousTechnical.pl
+++ b/lib/unicode/In/47.pl
diff --git a/lib/unicode/In/ControlPictures.pl b/lib/unicode/In/48.pl
index 7aad2fcacf..7aad2fcacf 100644
--- a/lib/unicode/In/ControlPictures.pl
+++ b/lib/unicode/In/48.pl
diff --git a/lib/unicode/In/OpticalCharacterRecognition.pl b/lib/unicode/In/49.pl
index c7cecd02da..c7cecd02da 100644
--- a/lib/unicode/In/OpticalCharacterRecognition.pl
+++ b/lib/unicode/In/49.pl
diff --git a/lib/unicode/In/SpacingModifierLetters.pl b/lib/unicode/In/5.pl
index a242e0207a..a242e0207a 100644
--- a/lib/unicode/In/SpacingModifierLetters.pl
+++ b/lib/unicode/In/5.pl
diff --git a/lib/unicode/In/EnclosedAlphanumerics.pl b/lib/unicode/In/50.pl
index 7b1b778af0..7b1b778af0 100644
--- a/lib/unicode/In/EnclosedAlphanumerics.pl
+++ b/lib/unicode/In/50.pl
diff --git a/lib/unicode/In/BoxDrawing.pl b/lib/unicode/In/51.pl
index 4d446863fe..4d446863fe 100644
--- a/lib/unicode/In/BoxDrawing.pl
+++ b/lib/unicode/In/51.pl
diff --git a/lib/unicode/In/BlockElements.pl b/lib/unicode/In/52.pl
index 6135c93e90..6135c93e90 100644
--- a/lib/unicode/In/BlockElements.pl
+++ b/lib/unicode/In/52.pl
diff --git a/lib/unicode/In/GeometricShapes.pl b/lib/unicode/In/53.pl
index 855d98ebff..855d98ebff 100644
--- a/lib/unicode/In/GeometricShapes.pl
+++ b/lib/unicode/In/53.pl
diff --git a/lib/unicode/In/MiscellaneousSymbols.pl b/lib/unicode/In/54.pl
index 0949bc2b55..0949bc2b55 100644
--- a/lib/unicode/In/MiscellaneousSymbols.pl
+++ b/lib/unicode/In/54.pl
diff --git a/lib/unicode/In/Dingbats.pl b/lib/unicode/In/55.pl
index 3013f73c75..3013f73c75 100644
--- a/lib/unicode/In/Dingbats.pl
+++ b/lib/unicode/In/55.pl
diff --git a/lib/unicode/In/BraillePatterns.pl b/lib/unicode/In/56.pl
index d785c31676..d785c31676 100644
--- a/lib/unicode/In/BraillePatterns.pl
+++ b/lib/unicode/In/56.pl
diff --git a/lib/unicode/In/CJKRadicalsSupplement.pl b/lib/unicode/In/57.pl
index 2bf56517d1..2bf56517d1 100644
--- a/lib/unicode/In/CJKRadicalsSupplement.pl
+++ b/lib/unicode/In/57.pl
diff --git a/lib/unicode/In/KangxiRadicals.pl b/lib/unicode/In/58.pl
index 3903f15c4c..3903f15c4c 100644
--- a/lib/unicode/In/KangxiRadicals.pl
+++ b/lib/unicode/In/58.pl
diff --git a/lib/unicode/In/IdeographicDescriptionCharacters.pl b/lib/unicode/In/59.pl
index 07799e6941..07799e6941 100644
--- a/lib/unicode/In/IdeographicDescriptionCharacters.pl
+++ b/lib/unicode/In/59.pl
diff --git a/lib/unicode/In/CombiningDiacriticalMarks.pl b/lib/unicode/In/6.pl
index cf9bb94991..cf9bb94991 100644
--- a/lib/unicode/In/CombiningDiacriticalMarks.pl
+++ b/lib/unicode/In/6.pl
diff --git a/lib/unicode/In/CJKSymbolsandPunctuation.pl b/lib/unicode/In/60.pl
index 0c66f051a4..0c66f051a4 100644
--- a/lib/unicode/In/CJKSymbolsandPunctuation.pl
+++ b/lib/unicode/In/60.pl
diff --git a/lib/unicode/In/Hiragana.pl b/lib/unicode/In/61.pl
index 49b4e4976a..49b4e4976a 100644
--- a/lib/unicode/In/Hiragana.pl
+++ b/lib/unicode/In/61.pl
diff --git a/lib/unicode/In/Katakana.pl b/lib/unicode/In/62.pl
index e5568a283a..e5568a283a 100644
--- a/lib/unicode/In/Katakana.pl
+++ b/lib/unicode/In/62.pl
diff --git a/lib/unicode/In/Bopomofo.pl b/lib/unicode/In/63.pl
index 4f9b5f46b2..4f9b5f46b2 100644
--- a/lib/unicode/In/Bopomofo.pl
+++ b/lib/unicode/In/63.pl
diff --git a/lib/unicode/In/HangulCompatibilityJamo.pl b/lib/unicode/In/64.pl
index b15c4cc760..b15c4cc760 100644
--- a/lib/unicode/In/HangulCompatibilityJamo.pl
+++ b/lib/unicode/In/64.pl
diff --git a/lib/unicode/In/Kanbun.pl b/lib/unicode/In/65.pl
index d78c2088c0..d78c2088c0 100644
--- a/lib/unicode/In/Kanbun.pl
+++ b/lib/unicode/In/65.pl
diff --git a/lib/unicode/In/BopomofoExtended.pl b/lib/unicode/In/66.pl
index 96150b4f3f..96150b4f3f 100644
--- a/lib/unicode/In/BopomofoExtended.pl
+++ b/lib/unicode/In/66.pl
diff --git a/lib/unicode/In/EnclosedCJKLettersandMonths.pl b/lib/unicode/In/67.pl
index 2708fec7e3..2708fec7e3 100644
--- a/lib/unicode/In/EnclosedCJKLettersandMonths.pl
+++ b/lib/unicode/In/67.pl
diff --git a/lib/unicode/In/CJKCompatibility.pl b/lib/unicode/In/68.pl
index d504529398..d504529398 100644
--- a/lib/unicode/In/CJKCompatibility.pl
+++ b/lib/unicode/In/68.pl
diff --git a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl b/lib/unicode/In/69.pl
index 83adb815d7..83adb815d7 100644
--- a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl
+++ b/lib/unicode/In/69.pl
diff --git a/lib/unicode/In/Greek.pl b/lib/unicode/In/7.pl
index 8d89b7176b..8d89b7176b 100644
--- a/lib/unicode/In/Greek.pl
+++ b/lib/unicode/In/7.pl
diff --git a/lib/unicode/In/CJKUnifiedIdeographs.pl b/lib/unicode/In/70.pl
index f74552e661..f74552e661 100644
--- a/lib/unicode/In/CJKUnifiedIdeographs.pl
+++ b/lib/unicode/In/70.pl
diff --git a/lib/unicode/In/YiSyllables.pl b/lib/unicode/In/71.pl
index 0636a82e56..0636a82e56 100644
--- a/lib/unicode/In/YiSyllables.pl
+++ b/lib/unicode/In/71.pl
diff --git a/lib/unicode/In/YiRadicals.pl b/lib/unicode/In/72.pl
index 56404c5fb5..56404c5fb5 100644
--- a/lib/unicode/In/YiRadicals.pl
+++ b/lib/unicode/In/72.pl
diff --git a/lib/unicode/In/HangulSyllables.pl b/lib/unicode/In/73.pl
index e1e26945e5..e1e26945e5 100644
--- a/lib/unicode/In/HangulSyllables.pl
+++ b/lib/unicode/In/73.pl
diff --git a/lib/unicode/In/HighSurrogates.pl b/lib/unicode/In/74.pl
index 0f4eb5727b..0f4eb5727b 100644
--- a/lib/unicode/In/HighSurrogates.pl
+++ b/lib/unicode/In/74.pl
diff --git a/lib/unicode/In/HighPrivateUseSurrogates.pl b/lib/unicode/In/75.pl
index ec4ca07885..ec4ca07885 100644
--- a/lib/unicode/In/HighPrivateUseSurrogates.pl
+++ b/lib/unicode/In/75.pl
diff --git a/lib/unicode/In/LowSurrogates.pl b/lib/unicode/In/76.pl
index d056168c66..d056168c66 100644
--- a/lib/unicode/In/LowSurrogates.pl
+++ b/lib/unicode/In/76.pl
diff --git a/lib/unicode/In/77.pl b/lib/unicode/In/77.pl
new file mode 100644
index 0000000000..530166da95
--- /dev/null
+++ b/lib/unicode/In/77.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+100000 10FFFD
+END
diff --git a/lib/unicode/In/CJKCompatibilityIdeographs.pl b/lib/unicode/In/78.pl
index 0c553d6ee2..0c553d6ee2 100644
--- a/lib/unicode/In/CJKCompatibilityIdeographs.pl
+++ b/lib/unicode/In/78.pl
diff --git a/lib/unicode/In/AlphabeticPresentationForms.pl b/lib/unicode/In/79.pl
index 42cc1ca029..42cc1ca029 100644
--- a/lib/unicode/In/AlphabeticPresentationForms.pl
+++ b/lib/unicode/In/79.pl
diff --git a/lib/unicode/In/Cyrillic.pl b/lib/unicode/In/8.pl
index 0075ce1ddf..0075ce1ddf 100644
--- a/lib/unicode/In/Cyrillic.pl
+++ b/lib/unicode/In/8.pl
diff --git a/lib/unicode/In/ArabicPresentationForms-A.pl b/lib/unicode/In/80.pl
index ffb4f1eb3a..ffb4f1eb3a 100644
--- a/lib/unicode/In/ArabicPresentationForms-A.pl
+++ b/lib/unicode/In/80.pl
diff --git a/lib/unicode/In/CombiningHalfMarks.pl b/lib/unicode/In/81.pl
index cc8a4a21b1..cc8a4a21b1 100644
--- a/lib/unicode/In/CombiningHalfMarks.pl
+++ b/lib/unicode/In/81.pl
diff --git a/lib/unicode/In/CJKCompatibilityForms.pl b/lib/unicode/In/82.pl
index 4e462b8402..4e462b8402 100644
--- a/lib/unicode/In/CJKCompatibilityForms.pl
+++ b/lib/unicode/In/82.pl
diff --git a/lib/unicode/In/SmallFormVariants.pl b/lib/unicode/In/83.pl
index 4eff1ea01e..4eff1ea01e 100644
--- a/lib/unicode/In/SmallFormVariants.pl
+++ b/lib/unicode/In/83.pl
diff --git a/lib/unicode/In/ArabicPresentationForms-B.pl b/lib/unicode/In/84.pl
index dc5a32e4b1..dc5a32e4b1 100644
--- a/lib/unicode/In/ArabicPresentationForms-B.pl
+++ b/lib/unicode/In/84.pl
diff --git a/lib/unicode/In/Specials.pl b/lib/unicode/In/85.pl
index 931fc5b902..931fc5b902 100644
--- a/lib/unicode/In/Specials.pl
+++ b/lib/unicode/In/85.pl
diff --git a/lib/unicode/In/HalfwidthandFullwidthForms.pl b/lib/unicode/In/86.pl
index 03e85154fb..03e85154fb 100644
--- a/lib/unicode/In/HalfwidthandFullwidthForms.pl
+++ b/lib/unicode/In/86.pl
diff --git a/lib/unicode/Block.pl b/lib/unicode/In/87.pl
index 272f63fc9f..44a5e47510 100644
--- a/lib/unicode/Block.pl
+++ b/lib/unicode/In/87.pl
@@ -2,4 +2,5 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+10300 1032F
END
diff --git a/lib/unicode/In/88.pl b/lib/unicode/In/88.pl
new file mode 100644
index 0000000000..803041101c
--- /dev/null
+++ b/lib/unicode/In/88.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+10330 1034F
+END
diff --git a/lib/unicode/In/89.pl b/lib/unicode/In/89.pl
new file mode 100644
index 0000000000..d2c50bbcad
--- /dev/null
+++ b/lib/unicode/In/89.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+10400 1044F
+END
diff --git a/lib/unicode/In/Armenian.pl b/lib/unicode/In/9.pl
index a6d50e3be5..a6d50e3be5 100644
--- a/lib/unicode/In/Armenian.pl
+++ b/lib/unicode/In/9.pl
diff --git a/lib/unicode/In/90.pl b/lib/unicode/In/90.pl
new file mode 100644
index 0000000000..f1073c7392
--- /dev/null
+++ b/lib/unicode/In/90.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+1D000 1D0FF
+END
diff --git a/lib/unicode/In/91.pl b/lib/unicode/In/91.pl
new file mode 100644
index 0000000000..7435889d7c
--- /dev/null
+++ b/lib/unicode/In/91.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+1D100 1D1FF
+END
diff --git a/lib/unicode/In/92.pl b/lib/unicode/In/92.pl
new file mode 100644
index 0000000000..7e40edc3ed
--- /dev/null
+++ b/lib/unicode/In/92.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+1D400 1D7FF
+END
diff --git a/lib/unicode/In/93.pl b/lib/unicode/In/93.pl
new file mode 100644
index 0000000000..931aec3891
--- /dev/null
+++ b/lib/unicode/In/93.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+20000 2A6D6
+END
diff --git a/lib/unicode/In/94.pl b/lib/unicode/In/94.pl
new file mode 100644
index 0000000000..c025148c04
--- /dev/null
+++ b/lib/unicode/In/94.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+2F800 2FA1F
+END
diff --git a/lib/unicode/In/95.pl b/lib/unicode/In/95.pl
new file mode 100644
index 0000000000..495d2d581d
--- /dev/null
+++ b/lib/unicode/In/95.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+E0000 E007F
+END
diff --git a/lib/unicode/In/PrivateUse.pl b/lib/unicode/In/PrivateUse.pl
deleted file mode 100644
index c81b567a74..0000000000
--- a/lib/unicode/In/PrivateUse.pl
+++ /dev/null
@@ -1,6 +0,0 @@
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-E000 F8FF
-END
diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL
index 818785452b..637050a3fc 100755
--- a/lib/unicode/mktables.PL
+++ b/lib/unicode/mktables.PL
@@ -231,11 +231,24 @@ mkdir "To", 0755;
# This is not written for speed...
+my %InId;
+my $InId = 0;
+
foreach $file (@todo) {
my ($table, $wanted, $val) = @$file;
next if @ARGV and not grep { $_ eq $table } @ARGV;
- print $table,"\n";
- if ($table =~ /^(Is|In|To)(.*)/) {
+ print $table, "\n";
+ $table =~ s/\W+//g;
+ if ($table =~ /^In(.+)/) {
+ my $id;
+ unless (exists $InId{$1}) {
+ $InId{$1} = $InId++;
+ }
+ $id = $InId{$1};
+ open(OUT, ">In/$id.pl") or die "Can't create In/$id.pl: $!\n";
+ print OUT "# In/$id.pl $1\n";
+ }
+ elsif ($table =~ /^(Is|To)(.+)/) {
open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n";
}
else {
@@ -257,9 +270,9 @@ END
# Must treat blocks specially.
exit if @ARGV and not grep { $_ eq Block } @ARGV;
-print "Block\n";
+print "Blocks\n";
open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n";
-open(OUT, ">Block.pl") or die "Can't create Block.pl: $!\n";
+open(OUT, ">Blocks.pl") or die "Can't create Blocks.pl: $!\n";
print OUT <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. $UnicodeData.
@@ -273,11 +286,17 @@ while (<UD>) {
next if /^#/;
next if /^$/;
chomp;
- ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]); (.+)/i;
+ ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i;
if ($name) {
print OUT "$code $last $name\n";
- $name =~ s/\s+//g;
- open(BLOCK, ">In/$name.pl");
+ $name =~ s/\W+//g;
+ my $id;
+ unless (exists $InId{$name}) {
+ $InId{$name} = $InId++;
+ }
+ $id = $InId{$name};
+ open(BLOCK, ">In/$id.pl");
+ print OUT "# In/$id.pl $name\n";
print BLOCK <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. $UnicodeData.
@@ -295,6 +314,24 @@ END2
print OUT "END\n";
close OUT;
+open(INID, ">In.pl");
+
+print INID <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+%utf8::In = (
+EOH
+
+# Order doesn't matter but let's prettyprint anyway.
+foreach my $in (sort { $InId{$a} <=> $InId{$b} } keys %InId) {
+ printf INID "%-40s => %3d,\n", "'$in'", $InId{$in};
+}
+
+print INID ");\n";
+
+close(INID);
+
##################################################
sub proplist {
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index 8649e9e07e..ab2e15ddec 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -26,12 +26,19 @@ sub SWASHNEW {
while (($caller = caller($i)) eq __PACKAGE__) { $i++ }
my $encoding = $enc{$caller} || "unicode";
(my $file = $type) =~ s!::!/!g;
- $file =~ s#^(I[sn]|To)([A-Z].*)#$1/$2#;
+ if ($file =~ /^In(.+)/) {
+ defined %utf8::In || do "$encoding/In.pl";
+ if (exists $utf8::In{$1}) {
+ $file = "$enconding/In/$utf8::In{$1}";
+ }
+ } else {
+ $file =~ s#^(Is|To)([A-Z].*)#$1/$2#;
+ }
$list ||= eval { $caller->$type(); }
|| do "$file.pl"
|| do "$encoding/$file.pl"
|| do "$encoding/Is/${type}.pl"
- || croak("Can't find $encoding character property definition via $caller->$type or $file.pl");
+ || croak("Can't find $encoding character property \"$type\"");
$| = 1;
diff --git a/objXSUB.h b/objXSUB.h
index 99d9a3ea26..28bed782f5 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -579,6 +579,10 @@
#define Perl_init_stacks pPerl->Perl_init_stacks
#undef init_stacks
#define init_stacks Perl_init_stacks
+#undef Perl_init_tm
+#define Perl_init_tm pPerl->Perl_init_tm
+#undef init_tm
+#define init_tm Perl_init_tm
#undef Perl_instr
#define Perl_instr pPerl->Perl_instr
#undef instr
@@ -857,6 +861,10 @@
#define Perl_mg_size pPerl->Perl_mg_size
#undef mg_size
#define mg_size Perl_mg_size
+#undef Perl_mini_mktime
+#define Perl_mini_mktime pPerl->Perl_mini_mktime
+#undef mini_mktime
+#define mini_mktime Perl_mini_mktime
#undef Perl_moreswitches
#define Perl_moreswitches pPerl->Perl_moreswitches
#undef moreswitches
@@ -927,6 +935,10 @@
#define Perl_my_stat pPerl->Perl_my_stat
#undef my_stat
#define my_stat Perl_my_stat
+#undef Perl_my_strftime
+#define Perl_my_strftime pPerl->Perl_my_strftime
+#undef my_strftime
+#define my_strftime Perl_my_strftime
#if defined(MYSWAP)
#undef Perl_my_swap
#define Perl_my_swap pPerl->Perl_my_swap
diff --git a/perl.h b/perl.h
index 01a6d3fcb9..57afb3e37c 100644
--- a/perl.h
+++ b/perl.h
@@ -1842,10 +1842,12 @@ typedef pthread_key_t perl_key;
#endif
/* This defines a way to flush all output buffers. This may be a
- * performance issue, so we allow people to disable it.
+ * performance issue, so we allow people to disable it. Also, if
+ * we are using stdio, there are broken implementations of fflush(NULL)
+ * out there, Solaris being the most prominent.
*/
#ifndef PERL_FLUSHALL_FOR_CHILD
-# if defined(FFLUSH_NULL) || defined(USE_SFIO)
+# if defined(USE_PERLIO) || defined(FFLUSH_NULL) || defined(USE_SFIO)
# define PERL_FLUSHALL_FOR_CHILD PerlIO_flush((PerlIO*)NULL)
# else
# ifdef FFLUSH_ALL
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 6f7ed5fd39..a2614c121e 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -628,6 +628,13 @@ found in the PATH did not have correct permissions.
(F) A string of a form C<CORE::word> was given to prototype(), but there
is no builtin with the name C<word>.
+=item Can't find %s character property "%s"
+
+(F) You used C<\p{}> or C<\P{}> but the character property by that name
+could not be find. Maybe you mispelled the name of the property
+(remember that the names of character properties consist only of
+alphanumeric characters), or maybe you forgot the C<Is> or C<In> prefix?
+
=item Can't find label %s
(F) You said to goto a label that isn't mentioned anywhere that it's
@@ -1707,7 +1714,9 @@ L<perlfunc/sprintf>.
=item invalid [] range "%s" in regexp
(F) The range specified in a character class had a minimum character
-greater than the maximum character. See L<perlre>.
+greater than the maximum character. One possibility is that you
+forgot the C<{}> from your ending C<\x{}> - C<\x> without the curly
+braces can go only up to C<ff>. See L<perlre>.
=item invalid [] range "%s" in transliteration operator
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index 8ddcdd2b06..12bee5c7a3 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -158,9 +158,12 @@ Named Unicode properties and block ranges make be used as character
classes via the new C<\p{}> (matches property) and C<\P{}> (doesn't
match property) constructs. For instance, C<\p{Lu}> matches any
character with the Unicode uppercase property, while C<\p{M}> matches
-any mark character. Single letter properties may omit the brackets, so
-that can be written C<\pM> also. Many predefined character classes are
-available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>.
+any mark character. Single letter properties may omit the brackets,
+so that can be written C<\pM> also. Many predefined character classes
+are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. The
+names of the C<In> classes are the official Unicode block names but
+with all non-alphanumeric characters removed, for example the block
+name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>.
=item *
diff --git a/proto.h b/proto.h
index 63fc5187f2..cc4050d86e 100644
--- a/proto.h
+++ b/proto.h
@@ -808,7 +808,7 @@ PERL_CALLCONV void Perl_sv_vcatpvfn(pTHX_ SV* sv, const char* pat, STRLEN patlen
PERL_CALLCONV void Perl_sv_vsetpvfn(pTHX_ SV* sv, const char* pat, STRLEN patlen, va_list* args, SV** svargs, I32 svmax, bool *maybe_tainted);
PERL_CALLCONV NV Perl_str_to_version(pTHX_ SV *sv);
PERL_CALLCONV SV* Perl_swash_init(pTHX_ char* pkg, char* name, SV* listsv, I32 minbits, I32 none);
-PERL_CALLCONV UV Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr);
+PERL_CALLCONV UV Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8);
PERL_CALLCONV void Perl_taint_env(pTHX);
PERL_CALLCONV void Perl_taint_proper(pTHX_ const char* f, const char* s);
PERL_CALLCONV UV Perl_to_utf8_lower(pTHX_ U8 *p);
diff --git a/regcomp.c b/regcomp.c
index 1cc3a984e1..20388f1350 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2799,11 +2799,12 @@ tryagain:
break;
case 'p':
case 'P':
- { /* a lovely hack--pretend we saw [\pX] instead */
+ {
char* oldregxend = RExC_end;
char* parse_start = RExC_parse;
if (RExC_parse[1] == '{') {
+ /* a lovely hack--pretend we saw [\pX] instead */
RExC_end = strchr(RExC_parse, '}');
if (!RExC_end) {
RExC_parse += 2;
@@ -3259,7 +3260,7 @@ STATIC regnode *
S_regclass(pTHX_ RExC_state_t *pRExC_state)
{
register UV value;
- register IV lastvalue = OOB_UNICODE;
+ register IV prevvalue = OOB_UNICODE;
register IV range = 0;
register regnode *ret;
STRLEN numlen;
@@ -3270,7 +3271,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
register char *e;
char *parse_start = RExC_parse; /* MJD */
UV n;
- bool dont_optimize_invert = FALSE;
+ bool optimize_invert = TRUE;
ret = reganode(pRExC_state, ANYOF, 0);
@@ -3312,8 +3313,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
rangebegin = RExC_parse;
if (UTF) {
value = utf8n_to_uvchr((U8*)RExC_parse,
- RExC_end - RExC_parse,
- &numlen, 0);
+ RExC_end - RExC_parse,
+ &numlen, 0);
RExC_parse += numlen;
}
else
@@ -3423,14 +3424,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
RExC_parse - rangebegin,
RExC_parse - rangebegin,
rangebegin);
- if (lastvalue < 256) {
- ANYOF_BITMAP_SET(ret, lastvalue);
+ if (prevvalue < 256) {
+ ANYOF_BITMAP_SET(ret, prevvalue);
ANYOF_BITMAP_SET(ret, '-');
}
else {
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
Perl_sv_catpvf(aTHX_ listsv,
- "%04"UVxf"\n%04"UVxf"\n", (UV)lastvalue, (UV) '-');
+ "%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
}
}
@@ -3438,6 +3439,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
}
if (!SIZE_ONLY) {
+ if (namedclass > OOB_NAMEDCLASS)
+ optimize_invert = FALSE;
/* Possible truncation here but in some 64-bit environments
* the compiler gets heartburn about switch on 64-bit values.
* A similar issue a little earlier when switching on value.
@@ -3451,7 +3454,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isALNUM(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");
break;
case ANYOF_NALNUM:
@@ -3462,7 +3464,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isALNUM(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");
break;
case ANYOF_ALNUMC:
@@ -3473,7 +3474,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isALNUMC(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
break;
case ANYOF_NALNUMC:
@@ -3484,7 +3484,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isALNUMC(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");
break;
case ANYOF_ALPHA:
@@ -3495,7 +3494,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isALPHA(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");
break;
case ANYOF_NALPHA:
@@ -3506,7 +3504,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isALPHA(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");
break;
case ANYOF_ASCII:
@@ -3529,7 +3526,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
}
#endif /* EBCDIC */
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");
break;
case ANYOF_NASCII:
@@ -3552,7 +3548,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
}
#endif /* EBCDIC */
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");
break;
case ANYOF_BLANK:
@@ -3563,7 +3558,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isBLANK(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n");
break;
case ANYOF_NBLANK:
@@ -3574,7 +3568,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isBLANK(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n");
break;
case ANYOF_CNTRL:
@@ -3585,7 +3578,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isCNTRL(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");
break;
case ANYOF_NCNTRL:
@@ -3596,7 +3588,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isCNTRL(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");
break;
case ANYOF_DIGIT:
@@ -3607,7 +3598,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
for (value = '0'; value <= '9'; value++)
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
break;
case ANYOF_NDIGIT:
@@ -3620,7 +3610,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
for (value = '9' + 1; value < 256; value++)
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");
break;
case ANYOF_GRAPH:
@@ -3631,7 +3620,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isGRAPH(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");
break;
case ANYOF_NGRAPH:
@@ -3642,7 +3630,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isGRAPH(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");
break;
case ANYOF_LOWER:
@@ -3653,7 +3640,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isLOWER(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");
break;
case ANYOF_NLOWER:
@@ -3664,7 +3650,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isLOWER(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");
break;
case ANYOF_PRINT:
@@ -3675,7 +3660,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isPRINT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");
break;
case ANYOF_NPRINT:
@@ -3686,7 +3670,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isPRINT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");
break;
case ANYOF_PSXSPC:
@@ -3697,7 +3680,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isPSXSPC(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
break;
case ANYOF_NPSXSPC:
@@ -3708,7 +3690,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isPSXSPC(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");
break;
case ANYOF_PUNCT:
@@ -3719,7 +3700,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isPUNCT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");
break;
case ANYOF_NPUNCT:
@@ -3730,7 +3710,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isPUNCT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");
break;
case ANYOF_SPACE:
@@ -3741,7 +3720,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isSPACE(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");
break;
case ANYOF_NSPACE:
@@ -3752,7 +3730,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isSPACE(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");
break;
case ANYOF_UPPER:
@@ -3763,7 +3740,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isUPPER(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");
break;
case ANYOF_NUPPER:
@@ -3774,7 +3750,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isUPPER(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");
break;
case ANYOF_XDIGIT:
@@ -3785,7 +3760,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (isXDIGIT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");
break;
case ANYOF_NXDIGIT:
@@ -3796,7 +3770,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!isXDIGIT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
break;
default:
@@ -3810,17 +3783,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
} /* end of namedclass \blah */
if (range) {
- if (((lastvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
- ((NATIVE_TO_UNI(lastvalue) > NATIVE_TO_UNI(value)) && (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
+ if (((prevvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
+ ((NATIVE_TO_UNI(prevvalue) > NATIVE_TO_UNI(value)) &&
+ (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
Simple_vFAIL4("Invalid [] range \"%*.*s\"",
RExC_parse - rangebegin,
RExC_parse - rangebegin,
rangebegin);
+ range = 0; /* not a valid range */
}
- range = 0; /* not a true range */
}
else {
- lastvalue = value; /* save the beginning of the range */
+ prevvalue = value; /* save the beginning of the range */
if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
RExC_parse[1] != ']') {
RExC_parse++;
@@ -3843,42 +3817,45 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
/* now is the next time */
if (!SIZE_ONLY) {
- if (lastvalue < 256 && value < 256) {
-#ifdef EBCDIC /* EBCDIC, for example. */
- if (PL_hints & HINT_RE_ASCIIR) {
- IV i;
+ IV i;
+
+ if (prevvalue < 256) {
+ IV ceilvalue = value < 256 ? value : 255;
+
+#ifdef EBCDIC
/* New style scheme for ranges:
- * after :
* use re 'asciir';
* do ranges in ASCII/Unicode space
*/
- for (i = NATIVE_TO_ASCII(lastvalue) ; i <= NATIVE_TO_ASCII(value); i++)
- ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
+ for (i = NATIVE_TO_ASCII(prevvalue);
+ i <= NATIVE_TO_ASCII(ceilvalue);
+ i++)
+ ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
}
- else if ((isLOWER(lastvalue) && isLOWER(value)) ||
- (isUPPER(lastvalue) && isUPPER(value)))
+ else if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+ (isUPPER(prevvalue) && isUPPER(ceilvalue)))
{
- IV i;
- if (isLOWER(lastvalue)) {
- for (i = lastvalue; i <= value; i++)
+ if (isLOWER(prevvalue)) {
+ for (i = prevvalue; i <= ceilvalue; i++)
if (isLOWER(i))
ANYOF_BITMAP_SET(ret, i);
} else {
- for (i = lastvalue; i <= value; i++)
+ for (i = prevvalue; i <= ceilvalue; i++)
if (isUPPER(i))
ANYOF_BITMAP_SET(ret, i);
}
}
else
#endif
- for ( ; lastvalue <= value; lastvalue++)
- ANYOF_BITMAP_SET(ret, lastvalue);
- } else {
+ for (i = prevvalue; i <= ceilvalue; i++)
+ ANYOF_BITMAP_SET(ret, i);
+ }
+ if (value > 255) {
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
- if (lastvalue < value)
+ if (prevvalue < value)
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
- (UV)lastvalue, (UV)value);
- else
+ (UV)prevvalue, (UV)value);
+ else if (prevvalue == value)
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
(UV)value);
}
@@ -3912,7 +3889,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
}
/* optimize inverted simple patterns (e.g. [^a-z]) */
- if (!SIZE_ONLY && !dont_optimize_invert &&
+ if (!SIZE_ONLY && optimize_invert &&
/* If the only flag is inversion. */
(ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
@@ -4448,7 +4425,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
for (i = 0; i <= 256; i++) { /* just the first 256 */
U8 *e = uvchr_to_utf8(s, i);
- if (i < 256 && swash_fetch(sw, s)) {
+ if (i < 256 && swash_fetch(sw, s, TRUE)) {
if (rangestart == -1)
rangestart = i;
} else if (rangestart != -1) {
diff --git a/regexec.c b/regexec.c
index e358d63d5f..c9096f0597 100644
--- a/regexec.c
+++ b/regexec.c
@@ -958,7 +958,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
if (tmp == !(OP(c) == BOUND ?
- swash_fetch(PL_utf8_alnum, (U8*)s) :
+ swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
{
tmp = !tmp;
@@ -1001,7 +1001,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
if (tmp == !(OP(c) == NBOUND ?
- swash_fetch(PL_utf8_alnum, (U8*)s) :
+ swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
else if ((norun || regtry(prog, s)))
@@ -1029,7 +1029,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
- if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
+ if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
@@ -1087,7 +1087,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
- if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
+ if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
@@ -1145,7 +1145,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
while (s < strend) {
- if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
+ if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
@@ -1203,7 +1203,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
while (s < strend) {
- if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
+ if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
@@ -1261,7 +1261,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
while (s < strend) {
- if (swash_fetch(PL_utf8_digit,(U8*)s)) {
+ if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
@@ -1319,7 +1319,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
while (s < strend) {
- if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
+ if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
@@ -2214,7 +2214,7 @@ S_regmatch(pTHX_ regnode *prog)
sayNO;
if (do_utf8) {
if (!(OP(scan) == ALNUM
- ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
+ ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput)))
{
sayNO;
@@ -2237,7 +2237,7 @@ S_regmatch(pTHX_ regnode *prog)
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
if (OP(scan) == NALNUM
- ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
+ ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput))
{
sayNO;
@@ -2269,7 +2269,7 @@ S_regmatch(pTHX_ regnode *prog)
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
ln = isALNUM_uni(ln);
LOAD_UTF8_CHARCLASS(alnum,"a");
- n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
+ n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
}
else {
ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
@@ -2302,7 +2302,7 @@ S_regmatch(pTHX_ regnode *prog)
if (UTF8_IS_CONTINUED(nextchr)) {
LOAD_UTF8_CHARCLASS(space," ");
if (!(OP(scan) == SPACE
- ? swash_fetch(PL_utf8_space, (U8*)locinput)
+ ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput)))
{
sayNO;
@@ -2332,7 +2332,7 @@ S_regmatch(pTHX_ regnode *prog)
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
if (OP(scan) == NSPACE
- ? swash_fetch(PL_utf8_space, (U8*)locinput)
+ ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput))
{
sayNO;
@@ -2355,7 +2355,7 @@ S_regmatch(pTHX_ regnode *prog)
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
if (!(OP(scan) == DIGIT
- ? swash_fetch(PL_utf8_digit, (U8*)locinput)
+ ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput)))
{
sayNO;
@@ -2378,7 +2378,7 @@ S_regmatch(pTHX_ regnode *prog)
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
if (OP(scan) == NDIGIT
- ? swash_fetch(PL_utf8_digit, (U8*)locinput)
+ ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput))
{
sayNO;
@@ -2394,10 +2394,12 @@ S_regmatch(pTHX_ regnode *prog)
break;
case CLUMP:
LOAD_UTF8_CHARCLASS(mark,"~");
- if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
+ if (locinput >= PL_regeol ||
+ swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
sayNO;
locinput += PL_utf8skip[nextchr];
- while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
+ while (locinput < PL_regeol &&
+ swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
locinput += UTF8SKIP(locinput);
if (locinput > PL_regeol)
sayNO;
@@ -3623,7 +3625,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(alnum,"a");
while (hardcount < max && scan < loceol &&
- swash_fetch(PL_utf8_alnum, (U8*)scan)) {
+ swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
@@ -3651,7 +3653,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(alnum,"a");
while (hardcount < max && scan < loceol &&
- !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
+ !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
@@ -3679,7 +3681,8 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(space," ");
while (hardcount < max && scan < loceol &&
- (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
+ (*scan == ' ' ||
+ swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
scan += UTF8SKIP(scan);
hardcount++;
}
@@ -3707,7 +3710,8 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(space," ");
while (hardcount < max && scan < loceol &&
- !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
+ !(*scan == ' ' ||
+ swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
scan += UTF8SKIP(scan);
hardcount++;
}
@@ -3735,7 +3739,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(digit,"0");
while (hardcount < max && scan < loceol &&
- swash_fetch(PL_utf8_digit,(U8*)scan)) {
+ swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
@@ -3749,7 +3753,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(digit,"0");
while (hardcount < max && scan < loceol &&
- !swash_fetch(PL_utf8_digit,(U8*)scan)) {
+ !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
@@ -3879,25 +3883,22 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
char flags = ANYOF_FLAGS(n);
bool match = FALSE;
UV c;
- STRLEN len;
+ STRLEN len = 0;
- if (do_utf8)
- c = utf8_to_uvchr(p, &len);
- else
- c = *p;
+ c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
if (do_utf8 || (flags & ANYOF_UNICODE)) {
if (do_utf8 && !ANYOF_RUNTIME(n)) {
if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
match = TRUE;
}
- if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
+ if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
match = TRUE;
if (!match) {
SV *sw = regclass_swash(n, TRUE, 0);
if (sw) {
- if (swash_fetch(sw, p))
+ if (swash_fetch(sw, p, do_utf8))
match = TRUE;
else if (flags & ANYOF_FOLD) {
U8 tmpbuf[UTF8_MAXLEN+1];
@@ -3908,7 +3909,7 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
}
else
uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
- if (swash_fetch(sw, tmpbuf))
+ if (swash_fetch(sw, tmpbuf, do_utf8))
match = TRUE;
}
}
@@ -3918,7 +3919,7 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
if (ANYOF_BITMAP_TEST(n, c))
match = TRUE;
else if (flags & ANYOF_FOLD) {
- I32 f;
+ I32 f;
if (flags & ANYOF_LOCALE) {
PL_reg_flags |= RF_tainted;
diff --git a/t/lib/b-deparse.t b/t/lib/b-deparse.t
index 1f9bdb75b0..24ff3279b1 100644
--- a/t/lib/b-deparse.t
+++ b/t/lib/b-deparse.t
@@ -124,8 +124,8 @@ my $foo = $deparse->coderef2text(sub { { 234; }});
print "not " unless $foo =~ /{.*{.*234;.*}.*}/sm;
ok;
$foo = $deparse->coderef2text(sub { { 234; } continue { 123; } });
-unless ($foo =~ /{\s*{\s*234;\s*}\s*continue\s*{\s*123;\s*}/sm) {
- print "# [$foo]\n\# vs expected\n# [sub { { 234; } continue { 123; } }]\n";
+unless ($foo =~ /{\s*{\s*do\s*{\s*234;\s*};\s*}\s*continue\s*{\s*123;\s*}\s*}/sm) {
+ print "# [$foo]\n\# vs expected\n# [{ { do { 234; }; } continue { 123; } }]\n";
print "not ";
}
ok;
diff --git a/t/lib/selfstubber.t b/t/lib/selfstubber.t
index d338489599..fd0cf0640e 100644
--- a/t/lib/selfstubber.t
+++ b/t/lib/selfstubber.t
@@ -10,10 +10,6 @@ use Devel::SelfStubber;
my $runperl = "$^X \"-I../lib\"";
-# ensure correct output ordering for system() calls
-
-select STDERR; $| = 1; select STDOUT; $| = 1;
-
print "1..12\n";
my @cleanup;
diff --git a/t/op/pat.t b/t/op/pat.t
index 9130454dcb..1be72346f8 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -5,7 +5,8 @@
# that does fit that format, add it to op/re_tests, not here.
$| = 1;
-print "1..587\n";
+
+print "1..615\n";
BEGIN {
chdir 't' if -d 't';
@@ -1302,6 +1303,7 @@ print "ok 247\n";
{
# the second half of 20001028.003
+ my $X = '';
$X =~ s/^/chr(1488)/e;
print "not " unless length $X == 1 && ord($X) == 1488;
print "ok 260\n";
@@ -1353,10 +1355,11 @@ print "ok 247\n";
"\0" => 'Cc',
);
- for my $char (keys %s) {
+ for my $char (map { s/^\S+ //; $_ }
+ sort map { sprintf("%06x", ord($_))." $_" } keys %s) {
my $class = $s{$char};
- my $code = sprintf("%04x", ord($char));
- printf "# 0x$code\n";
+ my $code = sprintf("%06x", ord($char));
+ printf "#\n# 0x$code\n#\n";
print "# IsAlpha\n";
if ($class =~ /^[LM]/) {
print "not " unless $char =~ /\p{IsAlpha}/;
@@ -1382,7 +1385,7 @@ print "ok 247\n";
print "ok $test\n"; $test++;
}
print "# IsASCII\n";
- if ($code <= 127) {
+ if ($code le '00007f') {
print "not " unless $char =~ /\p{IsASCII}/;
print "ok $test\n"; $test++;
print "not " if $char =~ /\P{IsASCII}/;
@@ -1583,3 +1586,104 @@ EOT
print "not " unless ord($x) == 0x12345678 && length($x) == 1;
print "ok 587\n";
}
+
+{
+ my $x = "\x7f";
+
+ print "not " if $x =~ /[\x80-\xff]/;
+ print "ok 588\n";
+
+ print "not " if $x =~ /[\x80-\x{100}]/;
+ print "ok 589\n";
+
+ print "not " if $x =~ /[\x{100}]/;
+ print "ok 590\n";
+
+ print "not " if $x =~ /\p{InLatin1Supplement}/;
+ print "ok 591\n";
+
+ print "not " unless $x =~ /\P{InLatin1Supplement}/;
+ print "ok 592\n";
+
+ print "not " if $x =~ /\p{InLatinExtendedA}/;
+ print "ok 593\n";
+
+ print "not " unless $x =~ /\P{InLatinExtendedA}/;
+ print "ok 594\n";
+}
+
+{
+ my $x = "\x80";
+
+ print "not " unless $x =~ /[\x80-\xff]/;
+ print "ok 595\n";
+
+ print "not " unless $x =~ /[\x80-\x{100}]/;
+ print "ok 596\n";
+
+ print "not " if $x =~ /[\x{100}]/;
+ print "ok 597\n";
+
+ print "not " unless $x =~ /\p{InLatin1Supplement}/;
+ print "ok 598\n";
+
+ print "not " if $x =~ /\P{InLatin1Supplement}/;
+ print "ok 599\n";
+
+ print "not " if $x =~ /\p{InLatinExtendedA}/;
+ print "ok 600\n";
+
+ print "not " unless $x =~ /\P{InLatinExtendedA}/;
+ print "ok 601\n";
+}
+
+{
+ my $x = "\xff";
+
+ print "not " unless $x =~ /[\x80-\xff]/;
+ print "ok 602\n";
+
+ print "not " unless $x =~ /[\x80-\x{100}]/;
+ print "ok 603\n";
+
+ print "not " if $x =~ /[\x{100}]/;
+ print "ok 604\n";
+
+ print "not " unless $x =~ /\p{InLatin1Supplement}/;
+ print "ok 605\n";
+
+ print "not " if $x =~ /\P{InLatin1Supplement}/;
+ print "ok 606\n";
+
+ print "not " if $x =~ /\p{InLatinExtendedA}/;
+ print "ok 607\n";
+
+ print "not " unless $x =~ /\P{InLatinExtendedA}/;
+ print "ok 608\n";
+}
+
+{
+ my $x = "\x{100}";
+
+ print "not " if $x =~ /[\x80-\xff]/;
+ print "ok 609\n";
+
+ print "not " unless $x =~ /[\x80-\x{100}]/;
+ print "ok 610\n";
+
+ print "not " unless $x =~ /[\x{100}]/;
+ print "ok 611\n";
+
+ print "not " if $x =~ /\p{InLatin1Supplement}/;
+ print "ok 612\n";
+
+ print "not " unless $x =~ /\P{InLatin1Supplement}/;
+ print "ok 613\n";
+
+ print "not " unless $x =~ /\p{InLatinExtendedA}/;
+ print "ok 614\n";
+
+ print "not " if $x =~ /\P{InLatinExtendedA}/;
+ print "ok 615\n";
+}
+
diff --git a/utf8.c b/utf8.c
index fda9920933..b682cf65ca 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1045,13 +1045,13 @@ Perl_is_utf8_alnum(pTHX_ U8 *p)
* descendant of isalnum(3), in other words, it doesn't
* contain the '_'. --jhi */
PL_utf8_alnum = swash_init("utf8", "IsWord", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_alnum, p);
+ return swash_fetch(PL_utf8_alnum, p, TRUE);
/* return *p == '_' || is_utf8_alpha(p) || is_utf8_digit(p); */
#ifdef SURPRISINGLY_SLOWER /* probably because alpha is usually true */
if (!PL_utf8_alnum)
PL_utf8_alnum = swash_init("utf8", "",
sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0);
- return swash_fetch(PL_utf8_alnum, p);
+ return swash_fetch(PL_utf8_alnum, p, TRUE);
#endif
}
@@ -1062,13 +1062,13 @@ Perl_is_utf8_alnumc(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_alnum)
PL_utf8_alnum = swash_init("utf8", "IsAlnumC", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_alnum, p);
+ return swash_fetch(PL_utf8_alnum, p, TRUE);
/* return is_utf8_alpha(p) || is_utf8_digit(p); */
#ifdef SURPRISINGLY_SLOWER /* probably because alpha is usually true */
if (!PL_utf8_alnum)
PL_utf8_alnum = swash_init("utf8", "",
sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0);
- return swash_fetch(PL_utf8_alnum, p);
+ return swash_fetch(PL_utf8_alnum, p, TRUE);
#endif
}
@@ -1085,7 +1085,7 @@ Perl_is_utf8_alpha(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_alpha)
PL_utf8_alpha = swash_init("utf8", "IsAlpha", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_alpha, p);
+ return swash_fetch(PL_utf8_alpha, p, TRUE);
}
bool
@@ -1095,7 +1095,7 @@ Perl_is_utf8_ascii(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_ascii)
PL_utf8_ascii = swash_init("utf8", "IsAscii", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_ascii, p);
+ return swash_fetch(PL_utf8_ascii, p, TRUE);
}
bool
@@ -1105,7 +1105,7 @@ Perl_is_utf8_space(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_space)
PL_utf8_space = swash_init("utf8", "IsSpacePerl", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_space, p);
+ return swash_fetch(PL_utf8_space, p, TRUE);
}
bool
@@ -1115,7 +1115,7 @@ Perl_is_utf8_digit(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_digit)
PL_utf8_digit = swash_init("utf8", "IsDigit", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_digit, p);
+ return swash_fetch(PL_utf8_digit, p, TRUE);
}
bool
@@ -1125,7 +1125,7 @@ Perl_is_utf8_upper(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_upper)
PL_utf8_upper = swash_init("utf8", "IsUpper", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_upper, p);
+ return swash_fetch(PL_utf8_upper, p, TRUE);
}
bool
@@ -1135,7 +1135,7 @@ Perl_is_utf8_lower(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_lower)
PL_utf8_lower = swash_init("utf8", "IsLower", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_lower, p);
+ return swash_fetch(PL_utf8_lower, p, TRUE);
}
bool
@@ -1145,7 +1145,7 @@ Perl_is_utf8_cntrl(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_cntrl)
PL_utf8_cntrl = swash_init("utf8", "IsCntrl", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_cntrl, p);
+ return swash_fetch(PL_utf8_cntrl, p, TRUE);
}
bool
@@ -1155,7 +1155,7 @@ Perl_is_utf8_graph(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_graph)
PL_utf8_graph = swash_init("utf8", "IsGraph", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_graph, p);
+ return swash_fetch(PL_utf8_graph, p, TRUE);
}
bool
@@ -1165,7 +1165,7 @@ Perl_is_utf8_print(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_print)
PL_utf8_print = swash_init("utf8", "IsPrint", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_print, p);
+ return swash_fetch(PL_utf8_print, p, TRUE);
}
bool
@@ -1175,7 +1175,7 @@ Perl_is_utf8_punct(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_punct)
PL_utf8_punct = swash_init("utf8", "IsPunct", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_punct, p);
+ return swash_fetch(PL_utf8_punct, p, TRUE);
}
bool
@@ -1185,7 +1185,7 @@ Perl_is_utf8_xdigit(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_xdigit)
PL_utf8_xdigit = swash_init("utf8", "IsXDigit", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_xdigit, p);
+ return swash_fetch(PL_utf8_xdigit, p, TRUE);
}
bool
@@ -1195,7 +1195,7 @@ Perl_is_utf8_mark(pTHX_ U8 *p)
return FALSE;
if (!PL_utf8_mark)
PL_utf8_mark = swash_init("utf8", "IsM", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_mark, p);
+ return swash_fetch(PL_utf8_mark, p, TRUE);
}
UV
@@ -1205,7 +1205,7 @@ Perl_to_utf8_upper(pTHX_ U8 *p)
if (!PL_utf8_toupper)
PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0);
- uv = swash_fetch(PL_utf8_toupper, p);
+ uv = swash_fetch(PL_utf8_toupper, p, TRUE);
return uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p,0);
}
@@ -1216,7 +1216,7 @@ Perl_to_utf8_title(pTHX_ U8 *p)
if (!PL_utf8_totitle)
PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0);
- uv = swash_fetch(PL_utf8_totitle, p);
+ uv = swash_fetch(PL_utf8_totitle, p, TRUE);
return uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p,0);
}
@@ -1227,7 +1227,7 @@ Perl_to_utf8_lower(pTHX_ U8 *p)
if (!PL_utf8_tolower)
PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0);
- uv = swash_fetch(PL_utf8_tolower, p);
+ uv = swash_fetch(PL_utf8_tolower, p, TRUE);
return uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p,0);
}
@@ -1282,21 +1282,31 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none)
}
UV
-Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
+Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8)
{
HV* hv = (HV*)SvRV(sv);
- /* Given a UTF-X encoded char 0xAA..0xYY,0xZZ
- then the "swatch" is a vec() for al the chars which start
- with 0xAA..0xYY
- So the key in the hash is length of encoded char -1
- */
- U32 klen = UTF8SKIP(ptr) - 1;
- U32 off = ptr[klen];
+ U32 klen;
+ U32 off;
STRLEN slen;
STRLEN needents;
U8 *tmps;
U32 bit;
SV *retval;
+ U8 tmputf8[2];
+ UV c = NATIVE_TO_ASCII(*ptr);
+
+ if (!do_utf8 && !UNI_IS_INVARIANT(c)) {
+ tmputf8[0] = UTF8_EIGHT_BIT_HI(c);
+ tmputf8[1] = UTF8_EIGHT_BIT_LO(c);
+ ptr = tmputf8;
+ }
+ /* Given a UTF-X encoded char 0xAA..0xYY,0xZZ
+ * then the "swatch" is a vec() for al the chars which start
+ * with 0xAA..0xYY
+ * So the key in the hash (klen) is length of encoded char -1
+ */
+ klen = UTF8SKIP(ptr) - 1;
+ off = ptr[klen];
if (klen == 0)
{
@@ -1322,9 +1332,9 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
* NB: this code assumes that swatches are never modified, once generated!
*/
- if (hv == PL_last_swash_hv &&
+ if (hv == PL_last_swash_hv &&
klen == PL_last_swash_klen &&
- (!klen || memEQ((char *)ptr,(char *)PL_last_swash_key,klen)) )
+ (!klen || memEQ((char *)ptr, (char *)PL_last_swash_key, klen)) )
{
tmps = PL_last_swash_tmps;
slen = PL_last_swash_slen;
@@ -1348,7 +1358,8 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
EXTEND(SP,3);
PUSHs((SV*)sv);
/* On EBCDIC & ~(0xA0-1) isn't a useful thing to do */
- PUSHs(sv_2mortal(newSViv((klen) ? (code_point & ~(needents - 1)) : 0)));
+ PUSHs(sv_2mortal(newSViv((klen) ?
+ (code_point & ~(needents - 1)) : 0)));
PUSHs(sv_2mortal(newSViv(needents)));
PUTBACK;
if (call_method("SWASHGET", G_SCALAR))
diff --git a/win32/Makefile b/win32/Makefile
index 15aa5de183..ccab8b5af7 100644
--- a/win32/Makefile
+++ b/win32/Makefile
@@ -609,7 +609,8 @@ SETARGV_OBJ = setargv$(o)
DYNAMIC_EXT = Socket IO Fcntl Opcode SDBM_File POSIX attrs Thread B re \
Data/Dumper Devel/Peek ByteLoader Devel/DProf File/Glob \
Sys/Hostname Storable Filter/Util/Call Encode Digest/MD5 \
- PerlIO/Scalar MIME/Base64 Time/HiRes Time/Piece
+ PerlIO/Scalar MIME/Base64 Time/HiRes Time/Piece \
+ Cwd List/Util PerlIO/Via XS/Typemap
STATIC_EXT = DynaLoader
NONXS_EXT = Errno
@@ -639,6 +640,13 @@ PERLIOSCALAR = $(EXTDIR)\PerlIO\Scalar\Scalar
MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64
TIMEHIRES = $(EXTDIR)\Time\HiRes\HiRes
TIMEPIECE = $(EXTDIR)\Time\Piece\Piece
+CWD = $(EXTDIR)\Cwd\Cwd
+LISTUTIL = $(EXTDIR)\List\Util\Util
+PERLIOVIA = $(EXTDIR)\PerlIO\Via\Via
+XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap
+
+# Help out FindExt::scan_ext() with a copy
+LISTUTIL_PM = $(LISTUTIL).pm
SOCKET_DLL = $(AUTODIR)\Socket\Socket.dll
FCNTL_DLL = $(AUTODIR)\Fcntl\Fcntl.dll
@@ -664,6 +672,10 @@ PERLIOSCALAR_DLL= $(AUTODIR)\PerlIO\Scalar\Scalar.dll
MIMEBASE64_DLL = $(AUTODIR)\MIME\Base64\Base64.dll
TIMEHIRES_DLL = $(AUTODIR)\Time\HiRes\HiRes.dll
TIMEPIECE_DLL = $(AUTODIR)\Time\Piece\Piece.dll
+CWD_DLL = $(AUTODIR)\Cwd\Cwd.dll
+LISTUTIL_DLL = $(AUTODIR)\List\Util\Util.dll
+PERLIOVIA_DLL = $(AUTODIR)\PerlIO\Via\Via.dll
+XSTYPEMAP_DLL = $(AUTODIR)\XS\Typemap\Typemap.dll
ERRNO_PM = $(LIBDIR)\Errno.pm
@@ -691,7 +703,11 @@ EXTENSION_C = \
$(PERLIOSCALAR).c \
$(MIMEBASE64).c \
$(TIMEHIRES).c \
- $(TIMEPIECE).c
+ $(TIMEPIECE).c \
+ $(CWD).c \
+ $(LISTUTIL).c \
+ $(PERLIOVIA).c \
+ $(XSTYPEMAP).c
EXTENSION_DLL = \
$(SOCKET_DLL) \
@@ -717,7 +733,11 @@ EXTENSION_DLL = \
$(PERLIOSCALAR_DLL) \
$(MIMEBASE64_DLL) \
$(TIMEHIRES_DLL) \
- $(TIMEPIECE_DLL)
+ $(TIMEPIECE_DLL) \
+ $(CWD_DLL) \
+ $(LISTUTIL_DLL) \
+ $(PERLIOVIA_DLL) \
+ $(XSTYPEMAP_DLL)
EXTENSION_PM = \
$(ERRNO_PM)
@@ -780,7 +800,7 @@ config.w32 : $(CFGSH_TMPL)
-del /f config.h
copy $(CFGH_TMPL) config.h
-..\config.sh : config.w32 $(MINIPERL) config_sh.PL
+..\config.sh : config.w32 $(MINIPERL) $(LISTUTIL_PM) config_sh.PL
$(MINIPERL) -I..\lib config_sh.PL $(CFG_VARS) config.w32 > ..\config.sh
# this target is for when changes to the main config.sh happen
@@ -1020,35 +1040,64 @@ $(ENCODE_DLL): $(PERLEXE) $(ENCODE).xs
cd ..\..\win32
$(MD5_DLL): $(PERLEXE) $(MD5).xs
- cd $(EXTDIR)\$(*B)
- ..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl
+ cd $(EXTDIR)\Digest\$(*B)
+ ..\..\..\miniperl -I..\..\..\lib Makefile.PL INSTALLDIRS=perl
$(MAKE)
- cd ..\..\win32
+ cd ..\..\..\win32
$(PERLIOSCALAR_DLL): $(PERLEXE) $(PERLIOSCALAR).xs
- cd $(EXTDIR)\$(*B)
- ..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl
+ cd $(EXTDIR)\PerlIO\$(*B)
+ ..\..\..\miniperl -I..\..\..\lib Makefile.PL INSTALLDIRS=perl
$(MAKE)
- cd ..\..\win32
+ cd ..\..\..\win32
$(MIMEBASE64_DLL): $(PERLEXE) $(MIMEBASE64).xs
- cd $(EXTDIR)\$(*B)
- ..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl
+ cd $(EXTDIR)\MIME\$(*B)
+ ..\..\..\miniperl -I..\..\..\lib Makefile.PL INSTALLDIRS=perl
$(MAKE)
- cd ..\..\win32
+ cd ..\..\..\win32
$(TIMEHIRES_DLL): $(PERLEXE) $(TIMEHIRES).xs
- cd $(EXTDIR)\$(*B)
- ..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl
+ cd $(EXTDIR)\Time\$(*B)
+ ..\..\..\miniperl -I..\..\..\lib Makefile.PL INSTALLDIRS=perl
$(MAKE)
- cd ..\..\win32
+ cd ..\..\..\win32
$(TIMEPIECE_DLL): $(PERLEXE) $(TIMEPIECE).xs
+ cd $(EXTDIR)\Time\$(*B)
+ ..\..\..\miniperl -I..\..\..\lib Makefile.PL INSTALLDIRS=perl
+ $(MAKE)
+ cd ..\..\..\win32
+
+$(CWD_DLL): $(PERLEXE) $(CWD).xs
cd $(EXTDIR)\$(*B)
..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl
$(MAKE)
cd ..\..\win32
+$(LISTUTIL_PM): $(EXTDIR)\List\Util\lib\List\Util.pm
+ cd $(EXTDIR)
+ copy List\Util\lib\List\Util.pm List\Util\Util.pm
+ cd ..\win32
+
+$(LISTUTIL_DLL): $(LISTUTIL_PM) $(PERLEXE) $(LISTUTIL).xs
+ cd $(EXTDIR)\List\$(*B)
+ ..\..\..\miniperl -I..\..\..\lib Makefile.PL INSTALLDIRS=perl
+ $(MAKE)
+ cd ..\..\..\win32
+
+$(PERLIOVIA_DLL): $(PERLEXE) $(PERLIOVIA).xs
+ cd $(EXTDIR)\PerlIO\$(*B)
+ ..\..\..\miniperl -I..\..\..\lib Makefile.PL INSTALLDIRS=perl
+ $(MAKE)
+ cd ..\..\..\win32
+
+$(XSTYPEMAP_DLL): $(PERLEXE) $(XSTYPEMAP).xs
+ cd $(EXTDIR)\XS\$(*B)
+ ..\..\..\miniperl -I..\..\..\lib Makefile.PL INSTALLDIRS=perl
+ $(MAKE)
+ cd ..\..\..\win32
+
$(ERRNO_PM): $(PERLEXE) $(ERRNO)_pm.PL
cd $(EXTDIR)\$(*B)
..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl
@@ -1107,12 +1156,16 @@ distclean: clean
-del /f $(LIBDIR)\Filter\Util\Call.pm
-del /f $(LIBDIR)\Digest\MD5.pm
-del /f $(LIBDIR)\PerlIO\Scalar.pm
+ -del /f $(LIBDIR)\PerlIO\Via.pm
-del /f $(LIBDIR)\MIME\Base64.pm
-del /f $(LIBDIR)\MIME\QuotedPrint.pm
-del /f $(LIBDIR)\List\Util.pm
+ -del /f $(LISTUTIL_PM)
-del /f $(LIBDIR)\Scalar\Util.pm
-del /f $(LIBDIR)\Time\HiRes.pm
-del /f $(LIBDIR)\Time\Piece.pm
+ -del /f $(LIBDIR)\Cwd.pm
+ -del /f $(LIBDIR)\XS\Typemap.pm
-if exist $(LIBDIR)\IO rmdir /s /q $(LIBDIR)\IO
-rmdir /s $(LIBDIR)\IO
-if exist $(LIBDIR)\Thread rmdir /s /q $(LIBDIR)\Thread
@@ -1130,6 +1183,8 @@ distclean: clean
-rmdir /s $(LIBDIR)\List
-if exist $(LIBDIR)\Scalar rmdir /s /q $(LIBDIR)\Scalar
-rmdir /s $(LIBDIR)\Scalar
+ -if exist $(LIBDIR)\XS rmdir /s /q $(LIBDIR)\XS
+ -rmdir /s $(LIBDIR)\XS
cd $(PODDIR)
-del /f *.html *.bat checkpods \
perlaix.pod perlamiga.pod perlbs2000.pod perlcygwin.pod \