diff options
345 files changed, 16208 insertions, 5308 deletions
@@ -15,7 +15,7 @@ -- A. C. Yardley <yardley@tanet.net> A. Sinan Unur <nanis@cpan.org> -Aaron Crane <perl@aaroncrane.co.uk> +Aaron Crane <arc@cpan.org> Aaron B. Dossett <aaron@iglou.com> Aaron J. Mackey <ajm6q@virginia.edu> Abe Timmerman <abe@ztreet.demon.nl> @@ -848,6 +848,7 @@ Pavel Kaňkovský <kan@dcit.cz> Pavel Zakouril <Pavel.Zakouril@mff.cuni.cz> Pedro Felipe Horrillo Guerra <pancho@pancho.name> Per Einar Ellefsen <per.einar@skynet.be> +Perlover <perlover@perlover.com> Peter BARABAS Pete Peterson <petersonp@genrad.com> Peter Chines <pchines@nhgri.nih.gov> @@ -939,6 +940,7 @@ Robin Houston <robin@cpan.org> Rocco Caputo <troc@netrus.net> Roderick Schertler <roderick@argon.org> Rodger Anderson <rodger@boi.hp.com> +Rodolfo Carvalho <rhcarvalho@gmail.com> Ronald F. Guilmette <rfg@monkeys.com> Ronald J. Kimball <rjk@linguist.dartmouth.edu> Ronald Schmidt <RonaldWS@aol.com> diff --git a/Cross/Makefile-cross-SH b/Cross/Makefile-cross-SH index bb6752f9d4..4e72adcf8a 100644 --- a/Cross/Makefile-cross-SH +++ b/Cross/Makefile-cross-SH @@ -334,6 +334,7 @@ addedbyconf = UU $(shextract) lib/lib.pm pstruct # Unicode data files generated by mktables unidatafiles = lib/unicore/Decomposition.pl lib/unicore/TestProp.pl \ lib/unicore/CombiningClass.pl lib/unicore/Name.pl \ + lib/unicore/UCD.pl lib/unicore/Name.pm \ lib/unicore/Heavy.pl lib/unicore/mktables.lst # Directories of Unicode data files generated by mktables diff --git a/Cross/config.sh-arm-linux b/Cross/config.sh-arm-linux index fbdedb831f..9dbb4d4f36 100644 --- a/Cross/config.sh-arm-linux +++ b/Cross/config.sh-arm-linux @@ -32,12 +32,12 @@ alignbytes='4' ansi2knr='' aphostname='/bin/hostname' api_revision='5' -api_subversion='4' +api_subversion='5' api_version='15' -api_versionstring='5.15.4' +api_versionstring='5.15.5' ar='ar' -archlib='/usr/lib/perl5/5.15.4/armv4l-linux' -archlibexp='/usr/lib/perl5/5.15.4/armv4l-linux' +archlib='/usr/lib/perl5/5.15.5/armv4l-linux' +archlibexp='/usr/lib/perl5/5.15.5/armv4l-linux' archname64='' archname='armv4l-linux' archobjs='' @@ -55,7 +55,7 @@ castflags='0' cat='cat' cc='cc' cccdlflags='-fpic' -ccdlflags='-rdynamic -Wl,-rpath,/usr/lib/perl5/5.15.4/armv4l-linux/CORE' +ccdlflags='-rdynamic -Wl,-rpath,/usr/lib/perl5/5.15.5/armv4l-linux/CORE' ccflags='-fno-strict-aliasing -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64' ccflags_uselargefiles='-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64' ccname='arm-linux-gcc' @@ -725,7 +725,7 @@ inc_version_list=' ' inc_version_list_init='0' incpath='' inews='' -installarchlib='./install_me_here/usr/lib/perl5/5.15.4/armv4l-linux' +installarchlib='./install_me_here/usr/lib/perl5/5.15.5/armv4l-linux' installbin='./install_me_here/usr/bin' installhtml1dir='' installhtml3dir='' @@ -733,13 +733,13 @@ installman1dir='./install_me_here/usr/share/man/man1' installman3dir='./install_me_here/usr/share/man/man3' installprefix='./install_me_here/usr' installprefixexp='./install_me_here/usr' -installprivlib='./install_me_here/usr/lib/perl5/5.15.4' +installprivlib='./install_me_here/usr/lib/perl5/5.15.5' installscript='./install_me_here/usr/bin' -installsitearch='./install_me_here/usr/lib/perl5/site_perl/5.15.4/armv4l-linux' +installsitearch='./install_me_here/usr/lib/perl5/site_perl/5.15.5/armv4l-linux' installsitebin='./install_me_here/usr/bin' installsitehtml1dir='' installsitehtml3dir='' -installsitelib='./install_me_here/usr/lib/perl5/site_perl/5.15.4' +installsitelib='./install_me_here/usr/lib/perl5/site_perl/5.15.5' installsiteman1dir='./install_me_here/usr/share/man/man1' installsiteman3dir='./install_me_here/usr/share/man/man3' installsitescript='./install_me_here/usr/bin' @@ -868,8 +868,8 @@ pmake='' pr='' prefix='/usr' prefixexp='/usr' -privlib='/usr/lib/perl5/5.15.4' -privlibexp='/usr/lib/perl5/5.15.4' +privlib='/usr/lib/perl5/5.15.5' +privlibexp='/usr/lib/perl5/5.15.5' procselfexe='"/proc/self/exe"' prototype='define' ptrsize='4' @@ -934,17 +934,17 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 6, 17, 29, 31, 0' sig_size='68' signal_t='void' -sitearch='/usr/lib/perl5/site_perl/5.15.4/armv4l-linux' -sitearchexp='/usr/lib/perl5/site_perl/5.15.4/armv4l-linux' +sitearch='/usr/lib/perl5/site_perl/5.15.5/armv4l-linux' +sitearchexp='/usr/lib/perl5/site_perl/5.15.5/armv4l-linux' sitebin='/usr/bin' sitebinexp='/usr/bin' sitehtml1dir='' sitehtml1direxp='' sitehtml3dir='' sitehtml3direxp='' -sitelib='/usr/lib/perl5/site_perl/5.15.4' +sitelib='/usr/lib/perl5/site_perl/5.15.5' sitelib_stem='/usr/lib/perl5/site_perl' -sitelibexp='/usr/lib/perl5/site_perl/5.15.4' +sitelibexp='/usr/lib/perl5/site_perl/5.15.5' siteman1dir='/usr/share/man/man1' siteman1direxp='/usr/share/man/man1' siteman3dir='/usr/share/man/man3' @@ -983,7 +983,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/usr/include/string.h' submit='' -subversion='4' +subversion='5' sysman='/usr/share/man/man1' tail='' tar='' @@ -1073,8 +1073,8 @@ vendorprefix='' vendorprefixexp='' vendorscript='' vendorscriptexp='' -version='5.15.4' -version_patchlevel_string='version 15 subversion 4' +version='5.15.5' +version_patchlevel_string='version 15 subversion 5' versiononly='undef' vi='' voidflags='15' @@ -1089,9 +1089,9 @@ config_args='' config_argc=0 PERL_REVISION=5 PERL_VERSION=15 -PERL_SUBVERSION=4 +PERL_SUBVERSION=5 PERL_API_REVISION=5 PERL_API_VERSION=15 -PERL_API_SUBVERSION=4 +PERL_API_SUBVERSION=5 PERL_PATCHLEVEL= PERL_CONFIG_SH=true diff --git a/Cross/config.sh-arm-linux-n770 b/Cross/config.sh-arm-linux-n770 index 32df9686f8..404eaabe6b 100644 --- a/Cross/config.sh-arm-linux-n770 +++ b/Cross/config.sh-arm-linux-n770 @@ -32,12 +32,12 @@ alignbytes='4' ansi2knr='' aphostname='/bin/hostname' api_revision='5' -api_subversion='4' +api_subversion='5' api_version='15' -api_versionstring='5.15.4' +api_versionstring='5.15.5' ar='ar' -archlib='/usr/lib/perl5/5.15.4/armv4l-linux' -archlibexp='/usr/lib/perl5/5.15.4/armv4l-linux' +archlib='/usr/lib/perl5/5.15.5/armv4l-linux' +archlibexp='/usr/lib/perl5/5.15.5/armv4l-linux' archname64='' archname='armv4l-linux' archobjs='' @@ -55,7 +55,7 @@ castflags='0' cat='cat' cc='arm-none-linux-gnueabi-gcc' cccdlflags='-fpic' -ccdlflags='-rdynamic -Wl,-rpath,/usr/lib/perl5/5.15.4/armv4l-linux/CORE' +ccdlflags='-rdynamic -Wl,-rpath,/usr/lib/perl5/5.15.5/armv4l-linux/CORE' ccflags='-fno-strict-aliasing -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64' ccflags_uselargefiles='-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64' ccname='arm-linux-gcc' @@ -703,7 +703,7 @@ inc_version_list=' ' inc_version_list_init='0' incpath='' inews='' -installarchlib='./install_me_here/usr/lib/perl5/5.15.4/armv4l-linux' +installarchlib='./install_me_here/usr/lib/perl5/5.15.5/armv4l-linux' installbin='./install_me_here/usr/bin' installhtml1dir='' installhtml3dir='' @@ -711,13 +711,13 @@ installman1dir='./install_me_here/usr/share/man/man1' installman3dir='./install_me_here/usr/share/man/man3' installprefix='./install_me_here/usr' installprefixexp='./install_me_here/usr' -installprivlib='./install_me_here/usr/lib/perl5/5.15.4' +installprivlib='./install_me_here/usr/lib/perl5/5.15.5' installscript='./install_me_here/usr/bin' -installsitearch='./install_me_here/usr/lib/perl5/site_perl/5.15.4/armv4l-linux' +installsitearch='./install_me_here/usr/lib/perl5/site_perl/5.15.5/armv4l-linux' installsitebin='./install_me_here/usr/bin' installsitehtml1dir='' installsitehtml3dir='' -installsitelib='./install_me_here/usr/lib/perl5/site_perl/5.15.4' +installsitelib='./install_me_here/usr/lib/perl5/site_perl/5.15.5' installsiteman1dir='./install_me_here/usr/share/man/man1' installsiteman3dir='./install_me_here/usr/share/man/man3' installsitescript='./install_me_here/usr/bin' @@ -845,8 +845,8 @@ pmake='' pr='' prefix='/usr' prefixexp='/usr' -privlib='/usr/lib/perl5/5.15.4' -privlibexp='/usr/lib/perl5/5.15.4' +privlib='/usr/lib/perl5/5.15.5' +privlibexp='/usr/lib/perl5/5.15.5' procselfexe='"/proc/self/exe"' prototype='define' ptrsize='4' @@ -907,17 +907,17 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 6, 17, 29, 31, 0' sig_size='68' signal_t='void' -sitearch='/usr/lib/perl5/site_perl/5.15.4/armv4l-linux' -sitearchexp='/usr/lib/perl5/site_perl/5.15.4/armv4l-linux' +sitearch='/usr/lib/perl5/site_perl/5.15.5/armv4l-linux' +sitearchexp='/usr/lib/perl5/site_perl/5.15.5/armv4l-linux' sitebin='/usr/bin' sitebinexp='/usr/bin' sitehtml1dir='' sitehtml1direxp='' sitehtml3dir='' sitehtml3direxp='' -sitelib='/usr/lib/perl5/site_perl/5.15.4' +sitelib='/usr/lib/perl5/site_perl/5.15.5' sitelib_stem='/usr/lib/perl5/site_perl' -sitelibexp='/usr/lib/perl5/site_perl/5.15.4' +sitelibexp='/usr/lib/perl5/site_perl/5.15.5' siteman1dir='/usr/share/man/man1' siteman1direxp='/usr/share/man/man1' siteman3dir='/usr/share/man/man3' @@ -954,7 +954,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/usr/include/string.h' submit='' -subversion='4' +subversion='5' sysman='/usr/share/man/man1' tail='' tar='' @@ -1040,8 +1040,8 @@ vendorprefix='' vendorprefixexp='' vendorscript='' vendorscriptexp='' -version='5.15.4' -version_patchlevel_string='version 15 subversion 4' +version='5.15.5' +version_patchlevel_string='version 15 subversion 5' versiononly='undef' vi='' voidflags='15' @@ -1056,9 +1056,9 @@ config_args='' config_argc=0 PERL_REVISION=5 PERL_VERSION=15 -PERL_SUBVERSION=4 +PERL_SUBVERSION=5 PERL_API_REVISION=5 PERL_API_VERSION=15 -PERL_API_SUBVERSION=4 +PERL_API_SUBVERSION=5 PERL_PATCHLEVEL= PERL_CONFIG_SH=true @@ -530,7 +530,7 @@ The directories set up by Configure fall into three broad categories. =item Directories for the perl distribution -By default, Configure will use the following directories for 5.15.4. +By default, Configure will use the following directories for 5.15.5. $version is the full perl version number, including subversion, e.g. 5.12.3, and $archname is a string like sun4-sunos, determined by Configure. The full definitions of all Configure @@ -841,7 +841,7 @@ must contain a space separated list of directories under the site_perl directory, and has to include architecture-dependent directories separately, eg. - sh Configure -Dinc_version_list="5.15.4/x86_64-linux 5.14.0" ... + sh Configure -Dinc_version_list="5.15.5/x86_64-linux 5.14.0" ... When using the newer perl, you can add these paths again in the PERL5LIB environment variable or with perl's -I runtime option. @@ -2373,9 +2373,9 @@ won't interfere with another version. (The defaults guarantee this for libraries after 5.6.0, but not for executables. TODO?) One convenient way to do this is by using a separate prefix for each version, such as - sh Configure -Dprefix=/opt/perl5.15.4 + sh Configure -Dprefix=/opt/perl5.15.5 -and adding /opt/perl5.15.4/bin to the shell PATH variable. Such users +and adding /opt/perl5.15.5/bin to the shell PATH variable. Such users may also wish to add a symbolic link /usr/local/bin/perl so that scripts can still start with #!/usr/local/bin/perl. @@ -2388,13 +2388,13 @@ seriously consider using a separate directory, since development subversions may not have all the compatibility wrinkles ironed out yet. -=head2 Upgrading from 5.15.3 or earlier +=head2 Upgrading from 5.15.4 or earlier -B<Perl 5.15.4 is binary incompatible with Perl 5.15.3 and any earlier +B<Perl 5.15.5 is binary incompatible with Perl 5.15.4 and any earlier Perl release.> Perl modules having binary parts (meaning that a C compiler is used) will have to be recompiled to be -used with 5.15.4. If you find you do need to rebuild an extension with -5.15.4, you may safely do so without disturbing the older +used with 5.15.5. If you find you do need to rebuild an extension with +5.15.5, you may safely do so without disturbing the older installations. (See L<"Coexistence with earlier versions of perl 5"> above.) @@ -1046,6 +1046,7 @@ cpan/ExtUtils-MakeMaker/MANIFEST ExtUtils::MakeMaker MANIFEST cpan/ExtUtils-MakeMaker/NOTES Notes about MakeMaker internals cpan/ExtUtils-MakeMaker/PATCHING Suggestions for patching MakeMaker cpan/ExtUtils-MakeMaker/README MakeMaker README +cpan/ExtUtils-MakeMaker/README.packaging MakeMaker packaging README cpan/ExtUtils-MakeMaker/t/00compile.t See if MakeMaker modules compile cpan/ExtUtils-MakeMaker/t/arch_check.t Test MakeMaker's arch_check() cpan/ExtUtils-MakeMaker/t/backwards.t Check MakeMaker's backwards compatibility @@ -2686,6 +2687,7 @@ cpan/Unicode-Collate/Collate/Locale/as.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/az.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/be.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/bg.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/bn.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/ca.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/cs.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/cy.pl Unicode::Collate @@ -2695,6 +2697,7 @@ cpan/Unicode-Collate/Collate/Locale/eo.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/es.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/es_trad.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/et.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/fa.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/fil.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/fi_phone.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/fi.pl Unicode::Collate @@ -2732,16 +2735,26 @@ cpan/Unicode-Collate/Collate/Locale/pl.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale.pm Unicode::Collate cpan/Unicode-Collate/Collate/Locale/ro.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/ru.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/sa.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/se.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/si_dict.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/si.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/sk.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/sl.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/sq.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/sr.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/sv.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/sv_refo.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/ta.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/te.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/th.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/tn.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/to.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/tr.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/uk.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/ur.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/vi.pl Unicode::Collate +cpan/Unicode-Collate/Collate/Locale/wae.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/wo.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/yo.pl Unicode::Collate cpan/Unicode-Collate/Collate/Locale/zh_big5.pl Unicode::Collate @@ -2779,8 +2792,10 @@ cpan/Unicode-Collate/t/loc_as.t Unicode::Collate cpan/Unicode-Collate/t/loc_az.t Unicode::Collate cpan/Unicode-Collate/t/loc_be.t Unicode::Collate cpan/Unicode-Collate/t/loc_bg.t Unicode::Collate +cpan/Unicode-Collate/t/loc_bn.t Unicode::Collate cpan/Unicode-Collate/t/loc_bs.t Unicode::Collate cpan/Unicode-Collate/t/loc_ca.t Unicode::Collate +cpan/Unicode-Collate/t/loc_cjk.t Unicode::Collate cpan/Unicode-Collate/t/loc_cs.t Unicode::Collate cpan/Unicode-Collate/t/loc_cyrl.t Unicode::Collate cpan/Unicode-Collate/t/loc_cy.t Unicode::Collate @@ -2791,6 +2806,7 @@ cpan/Unicode-Collate/t/loc_eo.t Unicode::Collate cpan/Unicode-Collate/t/loc_es.t Unicode::Collate cpan/Unicode-Collate/t/loc_estr.t Unicode::Collate cpan/Unicode-Collate/t/loc_et.t Unicode::Collate +cpan/Unicode-Collate/t/loc_fa.t Unicode::Collate cpan/Unicode-Collate/t/loc_fil.t Unicode::Collate cpan/Unicode-Collate/t/loc_fiph.t Unicode::Collate cpan/Unicode-Collate/t/loc_fi.t Unicode::Collate @@ -2829,19 +2845,29 @@ cpan/Unicode-Collate/t/loc_pa.t Unicode::Collate cpan/Unicode-Collate/t/loc_pl.t Unicode::Collate cpan/Unicode-Collate/t/loc_ro.t Unicode::Collate cpan/Unicode-Collate/t/loc_ru.t Unicode::Collate +cpan/Unicode-Collate/t/loc_sa.t Unicode::Collate cpan/Unicode-Collate/t/loc_se.t Unicode::Collate +cpan/Unicode-Collate/t/loc_sidt.t Unicode::Collate +cpan/Unicode-Collate/t/loc_si.t Unicode::Collate cpan/Unicode-Collate/t/loc_sk.t Unicode::Collate cpan/Unicode-Collate/t/loc_sl.t Unicode::Collate cpan/Unicode-Collate/t/loc_sq.t Unicode::Collate +cpan/Unicode-Collate/t/loc_srla.t Unicode::Collate cpan/Unicode-Collate/t/loc_sr.t Unicode::Collate +cpan/Unicode-Collate/t/loc_svrf.t Unicode::Collate cpan/Unicode-Collate/t/loc_sv.t Unicode::Collate cpan/Unicode-Collate/t/loc_sw.t Unicode::Collate +cpan/Unicode-Collate/t/loc_ta.t Unicode::Collate cpan/Unicode-Collate/t/loc_test.t Unicode::Collate +cpan/Unicode-Collate/t/loc_te.t Unicode::Collate +cpan/Unicode-Collate/t/loc_th.t Unicode::Collate cpan/Unicode-Collate/t/loc_tn.t Unicode::Collate cpan/Unicode-Collate/t/loc_to.t Unicode::Collate cpan/Unicode-Collate/t/loc_tr.t Unicode::Collate cpan/Unicode-Collate/t/loc_uk.t Unicode::Collate +cpan/Unicode-Collate/t/loc_ur.t Unicode::Collate cpan/Unicode-Collate/t/loc_vi.t Unicode::Collate +cpan/Unicode-Collate/t/loc_wae.t Unicode::Collate cpan/Unicode-Collate/t/loc_wo.t Unicode::Collate cpan/Unicode-Collate/t/loc_yo.t Unicode::Collate cpan/Unicode-Collate/t/loc_zhb5.t Unicode::Collate @@ -3424,6 +3450,7 @@ dist/Storable/t/overload.t See if Storable works dist/Storable/t/recurse.t See if Storable works dist/Storable/t/restrict.t See if Storable works dist/Storable/t/retrieve.t See if Storable works +dist/Storable/t/robust.t See if it survives mangled %INC dist/Storable/t/sig_die.t See if Storable works dist/Storable/t/st-dump.pl See if Storable works dist/Storable/t/store.t See if Storable works @@ -3679,6 +3706,7 @@ ext/NDBM_File/hints/dec_osf.pl Hint for NDBM_File for named architecture ext/NDBM_File/hints/dynixptx.pl Hint for NDBM_File for named architecture ext/NDBM_File/hints/gnukfreebsd.pl Hint for NDBM_File for named architecture ext/NDBM_File/hints/gnuknetbsd.pl Hint for NDBM_File for named architecture +ext/NDBM_File/hints/gnu.pl Hint for NDBM_File for named architecture ext/NDBM_File/hints/linux.pl Hint for NDBM_File for named architecture ext/NDBM_File/hints/sco.pl Hint for NDBM_File for named architecture ext/NDBM_File/hints/solaris.pl Hint for NDBM_File for named architecture @@ -3879,6 +3907,7 @@ ext/XS-APItest/t/Block.pm Helper for ./blockhooks.t ext/XS-APItest/t/call_checker.t test call checker plugin API ext/XS-APItest/t/caller.t XS::APItest: tests for caller_cx ext/XS-APItest/t/call.t XS::APItest extension +ext/XS-APItest/t/check_warnings.t test scope of "Too late for CHECK" ext/XS-APItest/t/cleanup.t test stack behaviour on unwinding ext/XS-APItest/t/clone-with-stack.t test clone with CLONEf_COPY_STACKS works ext/XS-APItest/t/cophh.t test COPHH API @@ -4540,6 +4569,7 @@ pod/perl5151delta.pod Perl changes in version 5.15.1 pod/perl5152delta.pod Perl changes in version 5.15.2 pod/perl5153delta.pod Perl changes in version 5.15.3 pod/perl5154delta.pod Perl changes in version 5.15.4 +pod/perl5155delta.pod Perl changes in version 5.15.5 pod/perl561delta.pod Perl changes in version 5.6.1 pod/perl56delta.pod Perl changes in version 5.6 pod/perl581delta.pod Perl changes in version 5.8.1 @@ -4690,9 +4720,12 @@ Porting/makerel Release making utility Porting/make_snapshot.pl Make a tgz snapshot of our tree with a .patch file in it Porting/manicheck Check against MANIFEST Porting/manisort Sort the MANIFEST +Porting/new-perldelta.pl Generate a new perldelta Porting/newtests-perldelta.pl Generate Perldelta stub for newly added tests Porting/perldelta_template.pod Template for creating new perldelta.pod files Porting/perlhist_calculate.pl Perform calculations to update perlhist +Porting/pod_lib.pl Code for handling pod.lst +Porting/pod_rules.pl generate lists of pod files for Makefiles Porting/podtidy Reformat pod using Pod::Tidy Porting/pumpkin.pod Guidelines and hints for Perl maintainers Porting/README.y2038 Perl notes for the 2038 fix @@ -5110,6 +5143,7 @@ t/op/dor.t See if defined-or (//) works t/op/do.t See if subroutines work t/op/each_array.t See if array iterators work t/op/each.t See if hash iterators work +t/op/evalbytes.t See if evalbytes operator works t/op/eval.t See if eval operator works t/op/exec.t See if exec, system and qx work t/op/exists_sub.t See if exists(&sub) works @@ -5252,7 +5286,6 @@ t/perl.supp Perl valgrind suppressions t/porting/args_assert.t Check that all PERL_ARGS_ASSERT* macros are used t/porting/authors.t Check that all authors have been acknowledged t/porting/bincompat.t Check that {non_,}bincompat_options are ordered -t/porting/buildtoc.t Check that various pod lists are consistent t/porting/checkcase.t Check whether we are case-insensitive-fs-friendly t/porting/checkcfgvar.t Check that all config.sh-like files are good t/porting/cmp_version.t Test whether all changed module files have their VERSION bumped @@ -5266,6 +5299,7 @@ t/porting/known_pod_issues.dat Data file for porting/podcheck.t t/porting/maintainers.t Test that Porting/Maintainers.pl is up to date t/porting/manifest.t Test that this MANIFEST file is well formed t/porting/podcheck.t Test the POD of shipped modules is well formed +t/porting/pod_rules.t Check that various pod lists are consistent t/porting/regen.t Check that regen.pl doesn't need running t/porting/test_bootstrap.t Test that the instructions for test bootstrapping aren't accidentally overlooked. t/README Instructions for regression tests @@ -5343,6 +5377,7 @@ t/run/switchx.aux Data for switchx.t t/run/switchx.t Test the -x switch t/TEST The regression tester t/test.pl Simple testing library +t/test_pl/can_isa_ok.t Tests for the simple testing library t/thread_it.pl Run regression tests in a new thread t/uni/attrs.t See if Unicode attributes work t/uni/bless.t See if Unicode bless works @@ -5352,6 +5387,7 @@ t/uni/case.pl See if Unicode casing works t/uni/chomp.t See if Unicode chomp works t/uni/chr.t See if Unicode chr works t/uni/class.t See if Unicode classes work (\p) +t/uni/eval.t See if Unicode hints don't affect eval() t/uni/fold.t See if Unicode folding works t/uni/goto.t See if Unicode goto &sub works t/uni/greek.t See if Unicode in greek works @@ -5441,7 +5477,6 @@ vos/Changes Changes made to port Perl to the VOS operating system vos/compile_full_perl.cm VOS command macro to build "full" Perl vos/configure_full_perl.sh VOS shell script to configure "full" perl before building vos/make_full_perl.sh VOS shell script to build and test "full" perl -vos/syslog.h syslog header for VOS vos/vos.c VOS emulations for missing POSIX functions vos/vosish.h VOS-specific header file warnings.h The warning numbers @@ -1,5 +1,5 @@ name: perl -version: 5.015004 +version: 5.015005 abstract: The Perl 5 language interpreter author: perl5-porters@perl.org license: perl diff --git a/Makefile.SH b/Makefile.SH index 22358882c6..318c5abfa0 100755 --- a/Makefile.SH +++ b/Makefile.SH @@ -447,6 +447,7 @@ addedbyconf = UU $(shextract) pstruct # Unicode data files generated by mktables unidatafiles = lib/unicore/Decomposition.pl lib/unicore/TestProp.pl \ lib/unicore/CombiningClass.pl lib/unicore/Name.pl \ + lib/unicore/UCD.pl lib/unicore/Name.pm \ lib/unicore/Heavy.pl lib/unicore/mktables.lst # Directories of Unicode data files generated by mktables @@ -478,7 +479,7 @@ mini_obj = $(minindt_obj) $(MINIDTRACE_O) ndt_obj = $(obj0) $(obj1) $(obj2) $(obj3) $(ARCHOBJS) obj = $(ndt_obj) $(DTRACE_O) -perltoc_pod_prereqs = extra.pods pod/perl5155delta.pod pod/perlapi.pod pod/perlintern.pod pod/perlmodlib.pod pod/perluniprops.pod +perltoc_pod_prereqs = extra.pods pod/perl5156delta.pod pod/perlapi.pod pod/perlintern.pod pod/perlmodlib.pod pod/perluniprops.pod generated_pods = pod/perltoc.pod $(perltoc_pod_prereqs) generated_headers = uudmap.h bitcount.h mg_data.h @@ -1044,7 +1045,7 @@ uni.data: $(MINIPERL_EXE) $(CONFIGPM) lib/unicore/mktables $(nonxs_ext) # But also this ensures that all extensions are built before we try to scan # them, which picks up Devel::PPPort's documentation. pod/perltoc.pod: $(perltoc_pod_prereqs) $(PERL_EXE) $(ext) pod/buildtoc - $(RUN_PERL) -f -Ilib pod/buildtoc --build-toc -q + $(RUN_PERL) -f -Ilib pod/buildtoc -q pod/perlapi.pod: pod/perlintern.pod @@ -1054,8 +1055,8 @@ pod/perlintern.pod: $(MINIPERL_EXE) autodoc.pl embed.fnc pod/perlmodlib.pod: $(MINIPERL_EXE) pod/perlmodlib.PL MANIFEST $(MINIPERL) $(Icwd) pod/perlmodlib.PL -q -pod/perl5155delta.pod: pod/perldelta.pod - $(LNS) perldelta.pod pod/perl5155delta.pod +pod/perl5156delta.pod: pod/perldelta.pod + $(LNS) perldelta.pod pod/perl5156delta.pod extra.pods: $(MINIPERL_EXE) -@test ! -f extra.pods || rm -f `cat extra.pods` diff --git a/NetWare/Makefile b/NetWare/Makefile index 9849836309..74d93b0c39 100644 --- a/NetWare/Makefile +++ b/NetWare/Makefile @@ -86,7 +86,7 @@ NLM_VERSION = 3,20,0 # Here comes the CW tools - TO BE FILLED TO BUILD WITH CW - -MODULE_DESC = "Perl 5.15.4 for NetWare" +MODULE_DESC = "Perl 5.15.5 for NetWare" CCTYPE = CodeWarrior C_COMPILER = mwccnlm -c CPP_COMPILER = mwccnlm @@ -462,7 +462,7 @@ INST_NW_TOP2 = $(INST_NW_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -INST_VER = \5.15.4 +INST_VER = \5.15.5 # # Comment this out if you DON'T want your perl installation to have diff --git a/NetWare/config_H.wc b/NetWare/config_H.wc index a029ddbfac..eba1af6593 100644 --- a/NetWare/config_H.wc +++ b/NetWare/config_H.wc @@ -1048,7 +1048,7 @@ * This symbol contains the ~name expanded version of ARCHLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define ARCHLIB "c:\\perl\\5.15.4\\lib\\NetWare-x86-multi-thread" /**/ +#define ARCHLIB "c:\\perl\\5.15.5\\lib\\NetWare-x86-multi-thread" /**/ /*#define ARCHLIB_EXP "" /**/ /* ARCHNAME: @@ -1079,8 +1079,8 @@ * This symbol is the filename expanded version of the BIN symbol, for * programs that do not want to deal with that at run-time. */ -#define BIN "c:\\perl\\5.15.4\\bin\\NetWare-x86-multi-thread" /**/ -#define BIN_EXP "c:\\perl\\5.15.4\\bin\\NetWare-x86-multi-thread" /**/ +#define BIN "c:\\perl\\5.15.5\\bin\\NetWare-x86-multi-thread" /**/ +#define BIN_EXP "c:\\perl\\5.15.5\\bin\\NetWare-x86-multi-thread" /**/ /* BYTEORDER: * This symbol holds the hexadecimal constant defined in byteorder, @@ -3063,7 +3063,7 @@ * This symbol contains the ~name expanded version of SITEARCH, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define SITEARCH "c:\\perl\\site\\5.15.4\\lib\\NetWare-x86-multi-thread" /**/ +#define SITEARCH "c:\\perl\\site\\5.15.5\\lib\\NetWare-x86-multi-thread" /**/ /*#define SITEARCH_EXP "" /**/ /* SITELIB: @@ -3086,7 +3086,7 @@ * removed. The elements in inc_version_list (inc_version_list.U) can * be tacked onto this variable to generate a list of directories to search. */ -#define SITELIB "c:\\perl\\site\\5.15.4\\lib" /**/ +#define SITELIB "c:\\perl\\site\\5.15.5\\lib" /**/ /*#define SITELIB_EXP "" /**/ #define SITELIB_STEM "" /**/ diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index 4735921c82..28016d1039 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -353,7 +353,7 @@ use File::Glob qw(:case); 'CGI' => { 'MAINTAINER' => 'lstein', - 'DISTRIBUTION' => 'MARKSTOS/CGI.pm-3.55.tar.gz', + 'DISTRIBUTION' => 'MARKSTOS/CGI.pm-3.58.tar.gz', 'FILES' => q[cpan/CGI], 'EXCLUDED' => [ qr{^t/lib/Test}, qw( cgi-lib_porting.html @@ -377,7 +377,7 @@ use File::Glob qw(:case); 'Compress::Raw::Bzip2' => { 'MAINTAINER' => 'pmqs', - 'DISTRIBUTION' => 'PMQS/Compress-Raw-Bzip2-2.037.tar.gz', + 'DISTRIBUTION' => 'PMQS/Compress-Raw-Bzip2-2.042.tar.gz', 'FILES' => q[cpan/Compress-Raw-Bzip2], 'EXCLUDED' => [ qr{^t/Test/}, qw( bzip2-src/bzip2-cpp.patch @@ -389,7 +389,7 @@ use File::Glob qw(:case); 'Compress::Raw::Zlib' => { 'MAINTAINER' => 'pmqs', - 'DISTRIBUTION' => 'PMQS/Compress-Raw-Zlib-2.037.tar.gz', + 'DISTRIBUTION' => 'PMQS/Compress-Raw-Zlib-2.042.tar.gz', 'FILES' => q[cpan/Compress-Raw-Zlib], 'EXCLUDED' => [ qr{^t/Test/}, @@ -462,7 +462,7 @@ use File::Glob qw(:case); 'CPANPLUS' => { 'MAINTAINER' => 'kane', - 'DISTRIBUTION' => 'BINGOS/CPANPLUS-0.9111.tar.gz', + 'DISTRIBUTION' => 'BINGOS/CPANPLUS-0.9112.tar.gz', 'FILES' => q[cpan/CPANPLUS], 'EXCLUDED' => [ qr{^inc/}, qr{^t/dummy-.*\.hidden$}, @@ -597,7 +597,7 @@ use File::Glob qw(:case); 'Digest::SHA' => { 'MAINTAINER' => 'mshelor', - 'DISTRIBUTION' => 'MSHELOR/Digest-SHA-5.62.tar.gz', + 'DISTRIBUTION' => 'MSHELOR/Digest-SHA-5.63.tar.gz', 'FILES' => q[cpan/Digest-SHA], 'EXCLUDED' => [ qw{t/pod.t t/podcover.t examples/dups} ], 'UPSTREAM' => 'cpan', @@ -734,7 +734,7 @@ use File::Glob qw(:case); 'ExtUtils::MakeMaker' => { 'MAINTAINER' => 'mschwern', - 'DISTRIBUTION' => 'MSCHWERN/ExtUtils-MakeMaker-6.63_01.tar.gz', + 'DISTRIBUTION' => 'MSCHWERN/ExtUtils-MakeMaker-6.63_02.tar.gz', 'FILES' => q[cpan/ExtUtils-MakeMaker], 'EXCLUDED' => [ qr{^t/lib/Test/}, qr{^(bundled|my)/}, @@ -1034,7 +1034,7 @@ use File::Glob qw(:case); 'IO-Compress' => { 'MAINTAINER' => 'pmqs', - 'DISTRIBUTION' => 'PMQS/IO-Compress-2.037.tar.gz', + 'DISTRIBUTION' => 'PMQS/IO-Compress-2.042.tar.gz', 'FILES' => q[cpan/IO-Compress], 'EXCLUDED' => [ qr{t/Test/} ], 'UPSTREAM' => 'cpan', @@ -1707,7 +1707,7 @@ use File::Glob qw(:case); 'Term::ANSIColor' => { 'MAINTAINER' => 'rra', - 'DISTRIBUTION' => 'RRA/ANSIColor-3.01.tar.gz', + 'DISTRIBUTION' => 'RRA/Term-ANSIColor-3.01.tar.gz', 'FILES' => q[cpan/Term-ANSIColor], 'EXCLUDED' => [ qr{^tests/}, qw(t/pod-spelling.t t/pod.t) ], 'UPSTREAM' => 'cpan', @@ -1970,7 +1970,7 @@ use File::Glob qw(:case); 'Unicode::Collate' => { 'MAINTAINER' => 'sadahiro', - 'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-0.81.tar.gz', + 'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-0.85.tar.gz', 'FILES' => q[cpan/Unicode-Collate], 'EXCLUDED' => [ qr{N$}, qr{^data/}, diff --git a/Porting/bisect-runner.pl b/Porting/bisect-runner.pl index 4af93dcf27..fea722b32f 100755 --- a/Porting/bisect-runner.pl +++ b/Porting/bisect-runner.pl @@ -53,7 +53,7 @@ my %defines = ( usedevel => '', optimize => '-g', - cc => 'ccache cc', + cc => (`ccache --version`, $?) ? 'cc' : 'ccache cc', ld => 'cc', ($linux64 ? (libpth => \@paths) : ()), ); @@ -62,7 +62,8 @@ unless(GetOptions(\%options, 'target=s', 'jobs|j=i', 'expect-pass=i', 'expect-fail' => sub { $options{'expect-pass'} = 0; }, 'clean!', 'one-liner|e=s', 'match=s', 'force-manifest', - 'test-build', 'check-args', 'A=s@', 'usage|help|?', + 'force-regen', 'test-build', 'A=s@', 'l', 'w', + 'check-args', 'check-shebang!', 'usage|help|?', 'D=s@' => sub { my (undef, $val) = @_; if ($val =~ /\A([^=]+)=(.*)/s) { @@ -83,6 +84,10 @@ my ($target, $j, $match) = @options{qw(target jobs match)}; pod2usage(exitval => 255, verbose => 1) if $options{usage}; pod2usage(exitval => 255, verbose => 1) unless @ARGV || $match || $options{'test-build'} || defined $options{'one-liner'}; +pod2usage(exitval => 255, verbose => 1) + if !$options{'one-liner'} && ($options{l} || $options{w}); + +check_shebang($ARGV[0]) if $options{'check-shebang'} && @ARGV; exit 0 if $options{'check-args'}; @@ -269,6 +274,27 @@ which interferes with detecting errors in the example code itself. =item * +-l + +Add C<-l> to the command line with C<-e> + +This will automatically append a newline to every output line of your testcase. +Note that you can't specify an argument to F<perl>'s C<-l> with this, as it's +not feasible to emulate F<perl>'s somewhat quirky switch parsing with +L<Getopt::Long>. If you need the full flexibility of C<-l>, you need to write +a full test case, instead of using C<bisect.pl>'s C<-e> shortcut. + +=item * + +-w + +Add C<-w> to the command line with C<-e> + +It's not valid to pass C<-l> or C<-w> to C<bisect.pl> unless you are also +using C<-e> + +=item * + --expect-fail The test case should fail for the I<start> revision, and pass for the I<end> @@ -359,6 +385,15 @@ F<MANIFEST> are missing. =item * +--force-regen + +Run C<make regen_headers> before building F<miniperl>. This may fix a build +that otherwise would skip because the generated headers at that revision +are stale. It's not the default because it conceals this error in the true +state of such revisions. + +=item * + --expect-pass [0|1] C<--expect-pass=0> is equivalent to C<--expect-fail>. I<1> is the default. @@ -375,12 +410,41 @@ Passing this to F<bisect.pl> will likely cause the bisect to fail badly. =item * +--validate + +Test that all stable revisions can be built. By default, attempts to build +I<blead>, I<v5.14.0> .. I<perl-5.002>. Stops at the first failure, without +cleaning the checkout. Use I<--start> to specify the earliest revision to +test, I<--end> to specify the most recent. Useful for validating a new +OS/CPU/compiler combination. For example + + ../perl/Porting/bisect.pl --validate -le 'print "Hello from $]"' + +If no testcase is specified, the default is to use F<t/TEST> to run +F<t/base/*.t> + +=item * + --check-args Validate the options and arguments, and exit silently if they are valid. =item * +--check-shebang + +Validate that the test case isn't an executable file with a +C<#!/usr/bin/perl> line (or similar). As F<bisect-runner.pl> does B<not> +prepend C<./perl> to the test case, a I<#!> line specifying an external +F<perl> binary will cause the test case to always run with I<that> F<perl>, +not the F<perl> built by the bisect runner. Likely this is not what you +wanted. If your test case is actually a wrapper script to run other +commands, you should run it with an explicit interpreter, to be clear. For +example, instead of C<../perl/Porting/bisect.pl ~/test/testcase.pl> you'd +run C<../perl/Porting/bisect.pl /usr/bin/perl ~/test/testcase.pl> + +=item * + --usage =item * @@ -478,6 +542,36 @@ sub checkout_file { and die "Could not extract $file at revision $commit"; } +sub check_shebang { + my $file = shift; + return unless -e $file; + if (!-x $file) { + die "$file is not executable. +system($file, ...) is always going to fail. + +Bailing out"; + } + my $fh = open_or_die($file); + my $line = <$fh>; + return unless $line =~ m{\A#!(/\S+/perl\S*)\s}; + die "$file will always be run by $1 +It won't be tested by the ./perl we build. +If you intended to run it with that perl binary, please change your +test case to + + $1 @ARGV + +If you intended to test it with the ./perl we build, please change your +test case to + + ./perl -Ilib @ARGV + +[You may also need to add -- before ./perl to prevent that -Ilib as being +parsed as an argument to bisect.pl] + +Bailing out"; +} + sub clean { if ($options{clean}) { # Needed, because files that are build products in this checked out @@ -560,12 +654,263 @@ my $major qr/^#define\s+(?:PERL_VERSION|PATCHLEVEL)\s+(\d+)\s/, 0); -if ($major < 1) { - if (extract_from_file('Configure', - qr/^ \*=\*\) echo "\$1" >> \$optdef;;$/)) { - # This is " Spaces now allowed in -D command line options.", - # part of commit ecfc54246c2a6f42 - apply_patch(<<'EOPATCH'); +patch_Configure(); +patch_hints(); + +# if Encode is not needed for the test, you can speed up the bisect by +# excluding it from the runs with -Dnoextensions=Encode +# ccache is an easy win. Remove it if it causes problems. +# Commit 1cfa4ec74d4933da adds ignore_versioned_solibs to Configure, and sets it +# to true in hints/linux.sh +# On dromedary, from that point on, Configure (by default) fails to find any +# libraries, because it scans /usr/local/lib /lib /usr/lib, which only contain +# versioned libraries. Without -lm, the build fails. +# Telling /usr/local/lib64 /lib64 /usr/lib64 works from that commit onwards, +# until commit faae14e6e968e1c0 adds it to the hints. +# However, prior to 1cfa4ec74d4933da telling Configure the truth doesn't work, +# because it will spot versioned libraries, pass them to the compiler, and then +# bail out pretty early on. Configure won't let us override libswanted, but it +# will let us override the entire libs list. + +unless (extract_from_file('Configure', 'ignore_versioned_solibs')) { + # Before 1cfa4ec74d4933da, so force the libs list. + + my @libs; + # This is the current libswanted list from Configure, less the libs removed + # by current hints/linux.sh + foreach my $lib (qw(sfio socket inet nsl nm ndbm gdbm dbm db malloc dl dld + ld sun m crypt sec util c cposix posix ucb BSD)) { + foreach my $dir (@paths) { + next unless -f "$dir/lib$lib.so"; + push @libs, "-l$lib"; + last; + } + } + $defines{libs} = \@libs unless exists $defines{libs}; +} + +$defines{usenm} = undef + if $major < 2 && !exists $defines{usenm}; + +my ($missing, $created_dirs); +($missing, $created_dirs) = force_manifest() + if $options{'force-manifest'}; + +my @ARGS = '-dEs'; +foreach my $key (sort keys %defines) { + my $val = $defines{$key}; + if (ref $val) { + push @ARGS, "-D$key=@$val"; + } elsif (!defined $val) { + push @ARGS, "-U$key"; + } elsif (!length $val) { + push @ARGS, "-D$key"; + } else { + $val = "" if $val eq "\0"; + push @ARGS, "-D$key=$val"; + } +} +push @ARGS, map {"-A$_"} @{$options{A}}; + +# </dev/null because it seems that some earlier versions of Configure can +# call commands in a way that now has them reading from stdin (and hanging) +my $pid = fork; +die "Can't fork: $!" unless defined $pid; +if (!$pid) { + open STDIN, '<', '/dev/null'; + # If a file in MANIFEST is missing, Configure asks if you want to + # continue (the default being 'n'). With stdin closed or /dev/null, + # it exits immediately and the check for config.sh below will skip. + exec './Configure', @ARGS; + die "Failed to start Configure: $!"; +} +waitpid $pid, 0 + or die "wait for Configure, pid $pid failed: $!"; + +patch_SH(); + +if (-f 'config.sh') { + # Emulate noextensions if Configure doesn't support it. + fake_noextensions() + if $major < 10 && $defines{noextensions}; + system './Configure -S </dev/null' and die; +} + +if ($target =~ /config\.s?h/) { + match_and_exit($target) if $match && -f $target; + report_and_exit(!-f $target, 'could build', 'could not build', $target) + if $options{'test-build'}; + + my $ret = system @ARGV; + report_and_exit($ret, 'zero exit from', 'non-zero exit from', "@ARGV"); +} elsif (!-f 'config.sh') { + # Skip if something went wrong with Configure + + skip('could not build config.sh'); +} + +force_manifest_cleanup($missing, $created_dirs) + if $missing; + +if($options{'force-regen'} + && extract_from_file('Makefile', qr/\bregen_headers\b/)) { + # regen_headers was added in e50aee73b3d4c555, patch.1m for perl5.001 + # It's not worth faking it for earlier revisions. + system "make regen_headers </dev/null" + and die; +} + +patch_C(); +patch_ext(); + +# Parallel build for miniperl is safe +system "make $j miniperl </dev/null"; + +my $expected = $target =~ /^test/ ? 't/perl' + : $target eq 'Fcntl' ? "lib/auto/Fcntl/Fcntl.$Config{so}" + : $target; +my $real_target = $target eq 'Fcntl' ? $expected : $target; + +if ($target ne 'miniperl') { + # Nearly all parallel build issues fixed by 5.10.0. Untrustworthy before that. + $j = '' if $major < 10; + + if ($real_target eq 'test_prep') { + if ($major < 8) { + # test-prep was added in 5.004_01, 3e3baf6d63945cb6. + # renamed to test_prep in 2001 in 5fe84fd29acaf55c. + # earlier than that, just make test. It will be fast enough. + $real_target = extract_from_file('Makefile.SH', + qr/^(test[-_]prep):/, + 'test'); + } + } + + system "make $j $real_target </dev/null"; +} + +my $missing_target = $expected =~ /perl$/ ? !-x $expected : !-r $expected; + +if ($options{'test-build'}) { + report_and_exit($missing_target, 'could build', 'could not build', + $real_target); +} elsif ($missing_target) { + skip("could not build $real_target"); +} + +match_and_exit($real_target) if $match; + +if (defined $options{'one-liner'}) { + my $exe = $target =~ /^(?:perl$|test)/ ? 'perl' : 'miniperl'; + unshift @ARGV, '-e', $options{'one-liner'}; + unshift @ARGV, '-l' if $options{l}; + unshift @ARGV, '-w' if $options{w}; + unshift @ARGV, "./$exe", '-Ilib'; +} + +# This is what we came here to run: + +if (exists $Config{ldlibpthname}) { + require Cwd; + my $varname = $Config{ldlibpthname}; + my $cwd = Cwd::getcwd(); + if (defined $ENV{$varname}) { + $ENV{$varname} = $cwd . $Config{path_sep} . $ENV{$varname}; + } else { + $ENV{$varname} = $cwd; + } +} + +my $ret = system @ARGV; + +report_and_exit($ret, 'zero exit from', 'non-zero exit from', "@ARGV"); + +############################################################################ +# +# Patching, editing and faking routines only below here. +# +############################################################################ + +sub fake_noextensions { + edit_file('config.sh', sub { + my @lines = split /\n/, shift; + my @ext = split /\s+/, $defines{noextensions}; + foreach (@lines) { + next unless /^extensions=/ || /^dynamic_ext/; + foreach my $ext (@ext) { + s/\b$ext( )?\b/$1/; + } + } + return join "\n", @lines; + }); +} + +sub force_manifest { + my (@missing, @created_dirs); + my $fh = open_or_die('MANIFEST'); + while (<$fh>) { + next unless /^(\S+)/; + # -d is special case needed (at least) between 27332437a2ed1941 and + # bf3d9ec563d25054^ inclusive, as manifest contains ext/Thread/Thread + push @missing, $1 + unless -f $1 || -d $1; + } + close_or_die($fh); + + foreach my $pathname (@missing) { + my @parts = split '/', $pathname; + my $leaf = pop @parts; + my $path = '.'; + while (@parts) { + $path .= '/' . shift @parts; + next if -d $path; + mkdir $path, 0700 or die "Can't create $path: $!"; + unshift @created_dirs, $path; + } + $fh = open_or_die($pathname, '>'); + close_or_die($fh); + chmod 0, $pathname or die "Can't chmod 0 $pathname: $!"; + } + return \@missing, \@created_dirs; +} + +sub force_manifest_cleanup { + my ($missing, $created_dirs) = @_; + # This is probably way too paranoid: + my @errors; + require Fcntl; + foreach my $file (@$missing) { + my (undef, undef, $mode, undef, undef, undef, undef, $size) + = stat $file; + if (!defined $mode) { + push @errors, "Added file $file has been deleted by Configure"; + next; + } + if (Fcntl::S_IMODE($mode) != 0) { + push @errors, + sprintf 'Added file %s had mode changed by Configure to %03o', + $file, $mode; + } + if ($size != 0) { + push @errors, + "Added file $file had sized changed by Configure to $size"; + } + unlink $file or die "Can't unlink $file: $!"; + } + foreach my $dir (@$created_dirs) { + rmdir $dir or die "Can't rmdir $dir: $!"; + } + skip("@errors") + if @errors; +} + +sub patch_Configure { + if ($major < 1) { + if (extract_from_file('Configure', + qr/^\t\t\*=\*\) echo "\$1" >> \$optdef;;$/)) { + # This is " Spaces now allowed in -D command line options.", + # part of commit ecfc54246c2a6f42 + apply_patch(<<'EOPATCH'); diff --git a/Configure b/Configure index 3d3b38d..78ffe16 100755 --- a/Configure @@ -581,16 +926,17 @@ index 3d3b38d..78ffe16 100755 esac shift EOPATCH - } - if (extract_from_file('Configure', qr/^if \$contains 'd_namlen' \$xinc\b/)) { - # Configure's original simple "grep" for d_namlen falls foul of the - # approach taken by the glibc headers: - # #ifdef _DIRENT_HAVE_D_NAMLEN - # # define _D_EXACT_NAMLEN(d) ((d)->d_namlen) - # - # where _DIRENT_HAVE_D_NAMLEN is not defined on Linux. - # This is also part of commit ecfc54246c2a6f42 - apply_patch(<<'EOPATCH'); + } + + if (extract_from_file('Configure', qr/^if \$contains 'd_namlen' \$xinc\b/)) { + # Configure's original simple "grep" for d_namlen falls foul of the + # approach taken by the glibc headers: + # #ifdef _DIRENT_HAVE_D_NAMLEN + # # define _D_EXACT_NAMLEN(d) ((d)->d_namlen) + # + # where _DIRENT_HAVE_D_NAMLEN is not defined on Linux. + # This is also part of commit ecfc54246c2a6f42 + apply_patch(<<'EOPATCH'); diff --git a/Configure b/Configure index 3d3b38d..78ffe16 100755 --- a/Configure @@ -606,19 +952,20 @@ index 3d3b38d..78ffe16 100755 val="$define" else EOPATCH + } } -} -if ($major < 2 - && !extract_from_file('Configure', - qr/Try to guess additional flags to pick up local libraries/)) { - my $mips = extract_from_file('Configure', - qr!(''\) if (?:\./)?mips; then)!); - # This is part of perl-5.001n. It's needed, to add -L/usr/local/lib to the - # ld flags if libraries are found there. It shifts the code to set up - # libpth earlier, and then adds the code to add libpth entries to ldflags - # mips was changed to ./mips in ecfc54246c2a6f42, perl5.000 patch.0g - apply_patch(sprintf <<'EOPATCH', $mips); + if ($major < 2 + && !extract_from_file('Configure', + qr/Try to guess additional flags to pick up local libraries/)) { + my $mips = extract_from_file('Configure', + qr!(''\) if (?:\./)?mips; then)!); + # This is part of perl-5.001n. It's needed, to add -L/usr/local/lib to + # theld flags if libraries are found there. It shifts the code to set up + # libpth earlier, and then adds the code to add libpth entries to + # ldflags + # mips was changed to ./mips in ecfc54246c2a6f42, perl5.000 patch.0g + apply_patch(sprintf <<'EOPATCH', $mips); diff --git a/Configure b/Configure index 53649d5..0635a6e 100755 --- a/Configure @@ -754,19 +1101,19 @@ index 53649d5..0635a6e 100755 case "$so" in '') EOPATCH -} + } -if ($major < 5 && extract_from_file('Configure', - qr!if \$cc \$ccflags try\.c -o try >/dev/null 2>&1; then!)) { - # Analogous to the more general fix of dfe9444ca7881e71 - # Without this flags such as -m64 may not be passed to this compile, which - # results in a byteorder of '1234' instead of '12345678', which can then - # cause crashes. - - if (extract_from_file('Configure', qr/xxx_prompt=y/)) { - # 8e07c86ebc651fe9 or later - # ("This is my patch patch.1n for perl5.001.") - apply_patch(<<'EOPATCH'); + if ($major < 5 && extract_from_file('Configure', + qr!if \$cc \$ccflags try\.c -o try >/dev/null 2>&1; then!)) { + # Analogous to the more general fix of dfe9444ca7881e71 + # Without this flags such as -m64 may not be passed to this compile, + # which results in a byteorder of '1234' instead of '12345678', which + # can then cause crashes. + + if (extract_from_file('Configure', qr/xxx_prompt=y/)) { + # 8e07c86ebc651fe9 or later + # ("This is my patch patch.1n for perl5.001.") + apply_patch(<<'EOPATCH'); diff --git a/Configure b/Configure index 62249dd..c5c384e 100755 --- a/Configure @@ -781,8 +1128,8 @@ index 62249dd..c5c384e 100755 case "$dflt" in [1-4][1-4][1-4][1-4]|12345678|87654321) EOPATCH - } else { - apply_patch(<<'EOPATCH'); + } else { + apply_patch(<<'EOPATCH'); diff --git a/Configure b/Configure index 53649d5..f1cd64a 100755 --- a/Configure @@ -797,34 +1144,35 @@ index 53649d5..f1cd64a 100755 case "$dflt" in ????|????????) echo "(The test program ran ok.)";; EOPATCH + } } -} -if ($major < 6 && !extract_from_file('Configure', - qr!^\t-A\)$!)) { - # This adds the -A option to Configure, which is incredibly useful - # Effectively this is commits 02e93a22d20fc9a5, 5f83a3e9d818c3ad, - # bde6b06b2c493fef, f7c3111703e46e0c and 2 lines of trailing whitespace - # removed by 613d6c3e99b9decc, but applied at slightly different locations - # to ensure a clean patch back to 5.000 - # Note, if considering patching to the intermediate revisions to fix bugs - # in -A handling, f7c3111703e46e0c is from 2002, and hence $major == 8 - - # To add to the fun, early patches add -K and -O options, and it's not - # trivial to get patch to put the C<. ./posthint.sh> in the right place - edit_file('Configure', sub { - my $code = shift; - $code =~ s/(optstr = ")([^"]+";\s*# getopt-style specification)/$1A:$2/ - or die "Substitution failed"; - $code =~ s!^(: who configured the system)! + if ($major < 6 && !extract_from_file('Configure', + qr!^\t-A\)$!)) { + # This adds the -A option to Configure, which is incredibly useful + # Effectively this is commits 02e93a22d20fc9a5, 5f83a3e9d818c3ad, + # bde6b06b2c493fef, f7c3111703e46e0c and 2 lines of trailing whitespace + # removed by 613d6c3e99b9decc, but applied at slightly different + # locations to ensure a clean patch back to 5.000 + # Note, if considering patching to the intermediate revisions to fix + # bugs in -A handling, f7c3111703e46e0c is from 2002, and hence + # $major == 8 + + # To add to the fun, early patches add -K and -O options, and it's not + # trivial to get patch to put the C<. ./posthint.sh> in the right place + edit_file('Configure', sub { + my $code = shift; + $code =~ s/(optstr = ")([^"]+";\s*# getopt-style specification)/$1A:$2/ + or die "Substitution failed"; + $code =~ s!^(: who configured the system)! touch posthint.sh . ./posthint.sh $1!ms - or die "Substitution failed"; - return $code; - }); - apply_patch(<<'EOPATCH'); + or die "Substitution failed"; + return $code; + }); + apply_patch(<<'EOPATCH'); diff --git a/Configure b/Configure index 4b55fa6..60c3c64 100755 --- a/Configure @@ -895,35 +1243,36 @@ index 4b55fa6..60c3c64 100755 shift cd .. EOPATCH -} + } -if ($major < 8 && !extract_from_file('Configure', - qr/^\t\tif test ! -t 0; then$/)) { - # Before dfe9444ca7881e71, Configure would refuse to run if stdin was not a - # tty. With that commit, the tty requirement was dropped for -de and -dE - # Commit aaeb8e512e8e9e14 dropped the tty requirement for -S - # For those older versions, it's probably easiest if we simply remove the - # sanity test. - edit_file('Configure', sub { - my $code = shift; - $code =~ s/test ! -t 0/test Perl = rules/; - return $code; - }); -} + if ($major < 8 && !extract_from_file('Configure', + qr/^\t\tif test ! -t 0; then$/)) { + # Before dfe9444ca7881e71, Configure would refuse to run if stdin was + # not a tty. With that commit, the tty requirement was dropped for -de + # and -dE + # Commit aaeb8e512e8e9e14 dropped the tty requirement for -S + # For those older versions, it's probably easiest if we simply remove + # the sanity test. + edit_file('Configure', sub { + my $code = shift; + $code =~ s/test ! -t 0/test Perl = rules/; + return $code; + }); + } -if ($major == 8 || $major == 9) { - # Fix symbol detection to that of commit 373dfab3839ca168 if it's any - # intermediate version 5129fff43c4fe08c or later, as the intermediate - # versions don't work correctly on (at least) Sparc Linux. - # 5129fff43c4fe08c adds the first mention of mistrustnm. - # 373dfab3839ca168 removes the last mention of lc="" - edit_file('Configure', sub { - my $code = shift; - return $code - if $code !~ /\btc="";/; # 373dfab3839ca168 or later - return $code - if $code !~ /\bmistrustnm\b/; # before 5129fff43c4fe08c - my $fixed = <<'EOC'; + if ($major == 8 || $major == 9) { + # Fix symbol detection to that of commit 373dfab3839ca168 if it's any + # intermediate version 5129fff43c4fe08c or later, as the intermediate + # versions don't work correctly on (at least) Sparc Linux. + # 5129fff43c4fe08c adds the first mention of mistrustnm. + # 373dfab3839ca168 removes the last mention of lc="" + edit_file('Configure', sub { + my $code = shift; + return $code + if $code !~ /\btc="";/; # 373dfab3839ca168 or later + return $code + if $code !~ /\bmistrustnm\b/; # before 5129fff43c4fe08c + my $fixed = <<'EOC'; : is a C symbol defined? csym='tlook=$1; @@ -965,17 +1314,18 @@ esac; eval "$2=$tval"' EOC - $code =~ s/\n: is a C symbol defined\?\n.*?\neval "\$2=\$tval"'\n\n/$fixed/sm - or die "substitution failed"; - return $code; - }); -} + $code =~ s/\n: is a C symbol defined\?\n.*?\neval "\$2=\$tval"'\n\n/$fixed/sm + or die "substitution failed"; + return $code; + }); + } -if ($major < 10 && extract_from_file('Configure', qr/^set malloc\.h i_malloc$/)) { - # This is commit 01d07975f7ef0e7d, trimmed, with $compile inlined as - # prior to bd9b35c97ad661cc Configure had the malloc.h test before the - # definition of $compile. - apply_patch(<<'EOPATCH'); + if ($major < 10 + && extract_from_file('Configure', qr/^set malloc\.h i_malloc$/)) { + # This is commit 01d07975f7ef0e7d, trimmed, with $compile inlined as + # prior to bd9b35c97ad661cc Configure had the malloc.h test before the + # definition of $compile. + apply_patch(<<'EOPATCH'); diff --git a/Configure b/Configure index 3d2e8b9..6ce7766 100755 --- a/Configure @@ -1006,203 +1356,35 @@ index 3d2e8b9..6ce7766 100755 +eval $setvar EOPATCH -} - -# Cwd.xs added in commit 0d2079faa739aaa9. Cwd.pm moved to ext/ 8 years later -# in commit 403f501d5b37ebf0 -if ($major > 0 && <*/Cwd/Cwd.xs>) { - if ($major < 10 && !extract_from_file('Makefile.SH', qr/^extra_dep=''$/)) { - # The Makefile.PL for Unicode::Normalize needs - # lib/unicore/CombiningClass.pl. Even without a parallel build, we need - # a dependency to ensure that it builds. This is a variant of commit - # 9f3ef600c170f61e. Putting this for earlier versions gives us a spot - # on which to hang the edits below - apply_patch(<<'EOPATCH'); -diff --git a/Makefile.SH b/Makefile.SH -index f61d0db..6097954 100644 ---- a/Makefile.SH -+++ b/Makefile.SH -@@ -155,10 +155,20 @@ esac - - : Prepare dependency lists for Makefile. - dynamic_list=' ' -+extra_dep='' - for f in $dynamic_ext; do - : the dependency named here will never exist - base=`echo "$f" | sed 's/.*\///'` -- dynamic_list="$dynamic_list lib/auto/$f/$base.$dlext" -+ this_target="lib/auto/$f/$base.$dlext" -+ dynamic_list="$dynamic_list $this_target" -+ -+ : Parallel makes reveal that we have some interdependencies -+ case $f in -+ Math/BigInt/FastCalc) extra_dep="$extra_dep -+$this_target: lib/auto/List/Util/Util.$dlext" ;; -+ Unicode/Normalize) extra_dep="$extra_dep -+$this_target: lib/unicore/CombiningClass.pl" ;; -+ esac - done - - static_list=' ' -@@ -987,2 +997,9 @@ n_dummy $(nonxs_ext): miniperl$(EXE_EXT) preplibrary $(DYNALOADER) FORCE - @$(LDLIBPTH) sh ext/util/make_ext nonxs $@ MAKE=$(MAKE) LIBPERL_A=$(LIBPERL) -+!NO!SUBS! -+ -+$spitshell >>Makefile <<EOF -+$extra_dep -+EOF -+ -+$spitshell >>Makefile <<'!NO!SUBS!' - -EOPATCH - } - if ($major < 14) { - # Commits dc0655f797469c47 and d11a62fe01f2ecb2 - edit_file('Makefile.SH', sub { - my $code = shift; - foreach my $ext (qw(Encode SDBM_File)) { - next if $code =~ /\b$ext\) extra_dep=/s; - $code =~ s!(\) extra_dep="\$extra_dep -\$this_target: .*?" ;;) -( esac -)!$1 - $ext) extra_dep="\$extra_dep -\$this_target: lib/auto/Cwd/Cwd.\$dlext" ;; -$2!; - } - return $code; - }); - } -} -if ($major == 7) { - # Remove commits 9fec149bb652b6e9 and 5bab1179608f81d8, which add/amend - # rules to automatically run regen scripts that rebuild C headers. These - # cause problems because a git checkout doesn't preserve relative file - # modification times, hence the regen scripts may fire. This will obscure - # whether the repository had the correct generated headers checked in. - # Also, the dependency rules for running the scripts were not correct, - # which could cause spurious re-builds on re-running make, and can cause - # complete build failures for a parallel make. - if (extract_from_file('Makefile.SH', - qr/Writing it this way gives make a big hint to always run opcode\.pl before/)) { - apply_commit('70c6e6715e8fec53'); - } elsif (extract_from_file('Makefile.SH', - qr/^opcode\.h opnames\.h pp_proto\.h pp\.sym: opcode\.pl$/)) { - revert_commit('9fec149bb652b6e9'); } } -# There was a bug in makedepend.SH which was fixed in version 96a8704c. -# Symptom was './makedepend: 1: Syntax error: Unterminated quoted string' -# Remove this if you're actually bisecting a problem related to makedepend.SH -# If you do this, you may need to add in code to correct the output of older -# makedepends, which don't correctly filter newer gcc output such as <built-in> -checkout_file('makedepend.SH'); - -if ($^O eq 'freebsd') { - # There are rather too many version-specific FreeBSD hints fixes to patch - # individually. Also, more than once the FreeBSD hints file has been - # written in what turned out to be a rather non-future-proof style, - # with case statements treating the most recent version as the exception, - # instead of treating previous versions' behaviour explicitly and changing - # the default to cater for the current behaviour. (As strangely, future - # versions inherit the current behaviour.) - checkout_file('hints/freebsd.sh'); -} elsif ($^O eq 'darwin') { - if ($major < 8) { - my $faking_it; - # We can't build on darwin without some of the data in the hints file. - foreach ('ext/DynaLoader/dl_dyld.xs', 'hints/darwin.sh') { - next if -f $_; - ++$faking_it; - # Probably less surprising to use the earliest version of +sub patch_hints { + if ($^O eq 'freebsd') { + # There are rather too many version-specific FreeBSD hints fixes to + # patch individually. Also, more than once the FreeBSD hints file has + # been written in what turned out to be a rather non-future-proof style, + # with case statements treating the most recent version as the + # exception, instead of treating previous versions' behaviour explicitly + # and changing the default to cater for the current behaviour. (As + # strangely, future versions inherit the current behaviour.) + checkout_file('hints/freebsd.sh'); + } elsif ($^O eq 'darwin') { + if ($major < 8) { + # We can't build on darwin without some of the data in the hints + # file. Probably less surprising to use the earliest version of # hints/darwin.sh and then edit in place just below, than use # blead's version, as that would create a discontinuity at # f556e5b971932902 - before it, hints bugs would be "fixed", after # it they'd resurface. This way, we should give the illusion of # monotonic bug fixing. - checkout_file($_, 'f556e5b971932902'); - } - if ($faking_it) { - apply_patch(<<'EOPATCH'); -diff -u a/ext/DynaLoader/dl_dyld.xs~ a/ext/DynaLoader/dl_dyld.xs ---- a/ext/DynaLoader/dl_dyld.xs~ 2011-10-11 21:41:27.000000000 +0100 -+++ b/ext/DynaLoader/dl_dyld.xs 2011-10-11 21:42:20.000000000 +0100 -@@ -41,6 +41,35 @@ - #include "perl.h" - #include "XSUB.h" - -+#ifndef pTHX -+# define pTHX void -+# define pTHX_ -+#endif -+#ifndef aTHX -+# define aTHX -+# define aTHX_ -+#endif -+#ifndef dTHX -+# define dTHXa(a) extern int Perl___notused(void) -+# define dTHX extern int Perl___notused(void) -+#endif -+ -+#ifndef Perl_form_nocontext -+# define Perl_form_nocontext form -+#endif -+ -+#ifndef Perl_warn_nocontext -+# define Perl_warn_nocontext warn -+#endif -+ -+#ifndef PTR2IV -+# define PTR2IV(p) (IV)(p) -+#endif -+ -+#ifndef get_av -+# define get_av perl_get_av -+#endif -+ - #define DL_LOADONCEONLY - - #include "dlutils.c" /* SaveError() etc */ -@@ -185,7 +191,7 @@ - CODE: - DLDEBUG(1,PerlIO_printf(Perl_debug_log, "dl_load_file(%s,%x):\n", filename,flags)); - if (flags & 0x01) -- Perl_warn(aTHX_ "Can't make loaded symbols global on this platform while loading %s",filename); -+ Perl_warn_nocontext("Can't make loaded symbols global on this platform while loading %s",filename); - RETVAL = dlopen(filename, mode) ; - DLDEBUG(2,PerlIO_printf(Perl_debug_log, " libref=%x\n", RETVAL)); - ST(0) = sv_newmortal() ; -EOPATCH - if ($major < 4 && !extract_from_file('util.c', qr/^form/m)) { - apply_patch(<<'EOPATCH'); -diff -u a/ext/DynaLoader/dl_dyld.xs~ a/ext/DynaLoader/dl_dyld.xs ---- a/ext/DynaLoader/dl_dyld.xs~ 2011-10-11 21:56:25.000000000 +0100 -+++ b/ext/DynaLoader/dl_dyld.xs 2011-10-11 22:00:00.000000000 +0100 -@@ -60,6 +60,18 @@ - # define get_av perl_get_av - #endif - -+static char * -+form(char *pat, ...) -+{ -+ char *retval; -+ va_list args; -+ va_start(args, pat); -+ vasprintf(&retval, pat, &args); -+ va_end(args); -+ SAVEFREEPV(retval); -+ return retval; -+} -+ - #define DL_LOADONCEONLY - - #include "dlutils.c" /* SaveError() etc */ -EOPATCH + my $faking_it; + if (!-f 'hints/darwin.sh') { + checkout_file('hints/darwin.sh', 'f556e5b971932902'); + ++$faking_it; } - } - edit_file('hints/darwin.sh', sub { + edit_file('hints/darwin.sh', sub { my $code = shift; # Part of commit 8f4f83badb7d1ba9, which mostly undoes # commit 0511a818910f476c. @@ -1223,13 +1405,13 @@ EOPATCH if $faking_it; return $code; }); - } -} elsif ($^O eq 'netbsd') { - if ($major < 6) { - # These are part of commit 099685bc64c7dbce - edit_file('hints/netbsd.sh', sub { - my $code = shift; - my $fixed = <<'EOC'; + } + } elsif ($^O eq 'netbsd') { + if ($major < 6) { + # These are part of commit 099685bc64c7dbce + edit_file('hints/netbsd.sh', sub { + my $code = shift; + my $fixed = <<'EOC'; case "$osvers" in 0.9|0.8*) usedl="$undef" @@ -1258,42 +1440,23 @@ case "$osvers" in ;; esac EOC - $code =~ s/^case "\$osvers" in\n0\.9\|0\.8.*?^esac\n/$fixed/ms; - return $code; - }); - if (!extract_from_file('unixish.h', - qr/defined\(NSIG\).*defined\(__NetBSD__\)/)) { - apply_patch(<<'EOPATCH') -diff --git a/unixish.h b/unixish.h -index 2a6cbcd..eab2de1 100644 ---- a/unixish.h -+++ b/unixish.h -@@ -89,7 +89,7 @@ - */ - /* #define ALTERNATE_SHEBANG "#!" / **/ - --#if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX) -+#if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX) || defined(__NetBSD__) - # include <signal.h> - #endif - -EOPATCH + $code =~ s/^case "\$osvers" in\n0\.9\|0\.8.*?^esac\n/$fixed/ms; + return $code; + }); } - } -} elsif ($^O eq 'openbsd') { - checkout_file('hints/openbsd.sh', '43051805d53a3e4c') - unless -f 'hints/openbsd.sh'; - - if ($major < 8) { - my $which = extract_from_file('hints/openbsd.sh', - qr/# from (2\.8|3\.1) onwards/, - ''); - if ($which eq '') { - my $was = extract_from_file('hints/openbsd.sh', - qr/(lddlflags="(?:-Bforcearchive )?-Bshareable)/); - # This is commit 154d43cbcf57271c and parts of 5c75dbfa77b0949c - # and 29b5585702e5e025 - apply_patch(sprintf <<'EOPATCH', $was); + } elsif ($^O eq 'openbsd') { + if ($major < 8) { + checkout_file('hints/openbsd.sh', '43051805d53a3e4c') + unless -f 'hints/openbsd.sh'; + my $which = extract_from_file('hints/openbsd.sh', + qr/# from (2\.8|3\.1) onwards/, + ''); + if ($which eq '') { + my $was = extract_from_file('hints/openbsd.sh', + qr/(lddlflags="(?:-Bforcearchive )?-Bshareable)/); + # This is commit 154d43cbcf57271c and parts of 5c75dbfa77b0949c + # and 29b5585702e5e025 + apply_patch(sprintf <<'EOPATCH', $was); diff --git a/hints/openbsd.sh b/hints/openbsd.sh index a7d8bf2..5b79709 100644 --- a/hints/openbsd.sh @@ -1326,13 +1489,13 @@ index a7d8bf2..5b79709 100644 esac EOPATCH - } elsif ($which eq '2.8') { - # This is parts of 5c75dbfa77b0949c and 29b5585702e5e025, and - # possibly eb9cd59d45ad2908 - my $was = extract_from_file('hints/openbsd.sh', - qr/lddlflags="(-shared(?: -fPIC)?) \$lddlflags"/); + } elsif ($which eq '2.8') { + # This is parts of 5c75dbfa77b0949c and 29b5585702e5e025, and + # possibly eb9cd59d45ad2908 + my $was = extract_from_file('hints/openbsd.sh', + qr/lddlflags="(-shared(?: -fPIC)?) \$lddlflags"/); - apply_patch(sprintf <<'EOPATCH', $was); + apply_patch(sprintf <<'EOPATCH', $was); --- a/hints/openbsd.sh 2011-10-21 17:25:20.000000000 +0200 +++ b/hints/openbsd.sh 2011-10-21 16:58:43.000000000 +0200 @@ -44,11 +44,21 @@ @@ -1360,11 +1523,11 @@ EOPATCH esac EOPATCH - } elsif ($which eq '3.1' - && !extract_from_file('hints/openbsd.sh', - qr/We need to force ld to export symbols on ELF platforms/)) { - # This is part of 29b5585702e5e025 - apply_patch(<<'EOPATCH'); + } elsif ($which eq '3.1' + && !extract_from_file('hints/openbsd.sh', + qr/We need to force ld to export symbols on ELF platforms/)) { + # This is part of 29b5585702e5e025 + apply_patch(<<'EOPATCH'); diff --git a/hints/openbsd.sh b/hints/openbsd.sh index c6b6bc9..4839d04 100644 --- a/hints/openbsd.sh @@ -1382,9 +1545,248 @@ index c6b6bc9..4839d04 100644 esac EOPATCH + } } + } elsif ($^O eq 'linux') { + if ($major < 1) { + # sparc linux seems to need the -Dbool=char -DHAS_BOOL part of + # perl5.000 patch.0n: [address Configure and build issues] + edit_file('hints/linux.sh', sub { + my $code = shift; + $code =~ s!-I/usr/include/bsd!-Dbool=char -DHAS_BOOL!g; + return $code; + }); + } + + if ($major <= 9) { + if (`uname -sm` =~ qr/^Linux sparc/) { + if (extract_from_file('hints/linux.sh', qr/sparc-linux/)) { + # Be sure to use -fPIC not -fpic on Linux/SPARC + apply_commit('f6527d0ef0c13ad4'); + } elsif(!extract_from_file('hints/linux.sh', + qr/^sparc-linux\)$/)) { + my $fh = open_or_die('hints/linux.sh', '>>'); + print $fh <<'EOT' or die $!; + +case "`uname -m`" in +sparc*) + case "$cccdlflags" in + *-fpic*) cccdlflags="`echo $cccdlflags|sed 's/-fpic/-fPIC/'`" ;; + *) cccdlflags="$cccdlflags -fPIC" ;; + esac + ;; +esac +EOT + close_or_die($fh); + } + } + } + } +} + +sub patch_SH { + # Cwd.xs added in commit 0d2079faa739aaa9. Cwd.pm moved to ext/ 8 years + # later in commit 403f501d5b37ebf0 + if ($major > 0 && <*/Cwd/Cwd.xs>) { + if ($major < 10 + && !extract_from_file('Makefile.SH', qr/^extra_dep=''$/)) { + # The Makefile.PL for Unicode::Normalize needs + # lib/unicore/CombiningClass.pl. Even without a parallel build, we + # need a dependency to ensure that it builds. This is a variant of + # commit 9f3ef600c170f61e. Putting this for earlier versions gives + # us a spot on which to hang the edits below + apply_patch(<<'EOPATCH'); +diff --git a/Makefile.SH b/Makefile.SH +index f61d0db..6097954 100644 +--- a/Makefile.SH ++++ b/Makefile.SH +@@ -155,10 +155,20 @@ esac + + : Prepare dependency lists for Makefile. + dynamic_list=' ' ++extra_dep='' + for f in $dynamic_ext; do + : the dependency named here will never exist + base=`echo "$f" | sed 's/.*\///'` +- dynamic_list="$dynamic_list lib/auto/$f/$base.$dlext" ++ this_target="lib/auto/$f/$base.$dlext" ++ dynamic_list="$dynamic_list $this_target" ++ ++ : Parallel makes reveal that we have some interdependencies ++ case $f in ++ Math/BigInt/FastCalc) extra_dep="$extra_dep ++$this_target: lib/auto/List/Util/Util.$dlext" ;; ++ Unicode/Normalize) extra_dep="$extra_dep ++$this_target: lib/unicore/CombiningClass.pl" ;; ++ esac + done + + static_list=' ' +@@ -987,2 +997,9 @@ n_dummy $(nonxs_ext): miniperl$(EXE_EXT) preplibrary $(DYNALOADER) FORCE + @$(LDLIBPTH) sh ext/util/make_ext nonxs $@ MAKE=$(MAKE) LIBPERL_A=$(LIBPERL) ++!NO!SUBS! ++ ++$spitshell >>Makefile <<EOF ++$extra_dep ++EOF ++ ++$spitshell >>Makefile <<'!NO!SUBS!' + +EOPATCH + } + if ($major < 14) { + # Commits dc0655f797469c47 and d11a62fe01f2ecb2 + edit_file('Makefile.SH', sub { + my $code = shift; + foreach my $ext (qw(Encode SDBM_File)) { + next if $code =~ /\b$ext\) extra_dep=/s; + $code =~ s!(\) extra_dep="\$extra_dep +\$this_target: .*?" ;;) +( esac +)!$1 + $ext) extra_dep="\$extra_dep +\$this_target: lib/auto/Cwd/Cwd.\$dlext" ;; +$2!; + } + return $code; + }); + } + } + + if ($major == 7) { + # Remove commits 9fec149bb652b6e9 and 5bab1179608f81d8, which add/amend + # rules to automatically run regen scripts that rebuild C headers. These + # cause problems because a git checkout doesn't preserve relative file + # modification times, hence the regen scripts may fire. This will + # obscure whether the repository had the correct generated headers + # checked in. + # Also, the dependency rules for running the scripts were not correct, + # which could cause spurious re-builds on re-running make, and can cause + # complete build failures for a parallel make. + if (extract_from_file('Makefile.SH', + qr/Writing it this way gives make a big hint to always run opcode\.pl before/)) { + apply_commit('70c6e6715e8fec53'); + } elsif (extract_from_file('Makefile.SH', + qr/^opcode\.h opnames\.h pp_proto\.h pp\.sym: opcode\.pl$/)) { + revert_commit('9fec149bb652b6e9'); + } + } + + # There was a bug in makedepend.SH which was fixed in version 96a8704c. + # Symptom was './makedepend: 1: Syntax error: Unterminated quoted string' + # Remove this if you're actually bisecting a problem related to + # makedepend.SH + # If you do this, you may need to add in code to correct the output of older + # makedepends, which don't correctly filter newer gcc output such as + # <built-in> + checkout_file('makedepend.SH'); + + if ($major < 4 && -f 'config.sh' + && !extract_from_file('config.sh', qr/^trnl=/)) { + # This seems to be necessary to avoid makedepend becoming confused, + # and hanging on stdin. Seems that the code after + # make shlist || ...here... is never run. + edit_file('makedepend.SH', sub { + my $code = shift; + $code =~ s/^trnl='\$trnl'$/trnl='\\n'/m; + return $code; + }); } - if ($major < 4) { +} + +sub patch_C { + # This is ordered by $major, as it's likely that different platforms may + # well want to share code. + + if ($major == 2 && extract_from_file('perl.c', qr/^\tfclose\(e_fp\);$/)) { + # need to patch perl.c to avoid calling fclose() twice on e_fp when + # using -e + # This diff is part of commit ab821d7fdc14a438. The second close was + # introduced with perl-5.002, commit a5f75d667838e8e7 + # Might want a6c477ed8d4864e6 too, for the corresponding change to + # pp_ctl.c (likely without this, eval will have "fun") + apply_patch(<<'EOPATCH'); +diff --git a/perl.c b/perl.c +index 03c4d48..3c814a2 100644 +--- a/perl.c ++++ b/perl.c +@@ -252,6 +252,7 @@ setuid perl scripts securely.\n"); + #ifndef VMS /* VMS doesn't have environ array */ + origenviron = environ; + #endif ++ e_tmpname = Nullch; + + if (do_undump) { + +@@ -405,6 +406,7 @@ setuid perl scripts securely.\n"); + if (e_fp) { + if (Fflush(e_fp) || ferror(e_fp) || fclose(e_fp)) + croak("Can't write to temp file for -e: %s", Strerror(errno)); ++ e_fp = Nullfp; + argc++,argv--; + scriptname = e_tmpname; + } +@@ -470,10 +472,10 @@ setuid perl scripts securely.\n"); + curcop->cop_line = 0; + curstash = defstash; + preprocess = FALSE; +- if (e_fp) { +- fclose(e_fp); +- e_fp = Nullfp; ++ if (e_tmpname) { + (void)UNLINK(e_tmpname); ++ Safefree(e_tmpname); ++ e_tmpname = Nullch; + } + + /* now that script is parsed, we can modify record separator */ +@@ -1369,7 +1371,7 @@ SV *sv; + scriptname = xfound; + } + +- origfilename = savepv(e_fp ? "-e" : scriptname); ++ origfilename = savepv(e_tmpname ? "-e" : scriptname); + curcop->cop_filegv = gv_fetchfile(origfilename); + if (strEQ(origfilename,"-")) + scriptname = ""; + +EOPATCH + } + + if ($major < 3 && $^O eq 'openbsd' + && !extract_from_file('pp_sys.c', qr/BSD_GETPGRP/)) { + # Part of commit c3293030fd1b7489 + apply_patch(<<'EOPATCH'); +diff --git a/pp_sys.c b/pp_sys.c +index 4608a2a..f0c9d1d 100644 +--- a/pp_sys.c ++++ b/pp_sys.c +@@ -2903,8 +2903,8 @@ PP(pp_getpgrp) + pid = 0; + else + pid = SvIVx(POPs); +-#ifdef USE_BSDPGRP +- value = (I32)getpgrp(pid); ++#ifdef BSD_GETPGRP ++ value = (I32)BSD_GETPGRP(pid); + #else + if (pid != 0) + DIE("POSIX getpgrp can't take an argument"); +@@ -2933,8 +2933,8 @@ PP(pp_setpgrp) + } + + TAINT_PROPER("setpgrp"); +-#ifdef USE_BSDPGRP +- SETi( setpgrp(pid, pgrp) >= 0 ); ++#ifdef BSD_SETPGRP ++ SETi( BSD_SETPGRP(pid, pgrp) >= 0 ); + #else + if ((pgrp != 0) || (pid != 0)) { + DIE("POSIX setpgrp can't take an argument"); +EOPATCH + } + + if ($major < 4 && $^O eq 'openbsd') { my $bad; # Need changes from commit a6e633defa583ad5. # Commits c07a80fdfe3926b5 and f82b3d4130164d5f changed the same part @@ -1533,340 +1935,25 @@ $bad--- 91,144 ---- EOPATCH } } - if ($major < 3 && !extract_from_file('pp_sys.c', qr/BSD_GETPGRP/)) { - # Part of commit c3293030fd1b7489 - apply_patch(<<'EOPATCH'); -diff --git a/pp_sys.c b/pp_sys.c -index 4608a2a..f0c9d1d 100644 ---- a/pp_sys.c -+++ b/pp_sys.c -@@ -2903,8 +2903,8 @@ PP(pp_getpgrp) - pid = 0; - else - pid = SvIVx(POPs); --#ifdef USE_BSDPGRP -- value = (I32)getpgrp(pid); -+#ifdef BSD_GETPGRP -+ value = (I32)BSD_GETPGRP(pid); - #else - if (pid != 0) - DIE("POSIX getpgrp can't take an argument"); -@@ -2933,8 +2933,8 @@ PP(pp_setpgrp) - } - - TAINT_PROPER("setpgrp"); --#ifdef USE_BSDPGRP -- SETi( setpgrp(pid, pgrp) >= 0 ); -+#ifdef BSD_SETPGRP -+ SETi( BSD_SETPGRP(pid, pgrp) >= 0 ); - #else - if ((pgrp != 0) || (pid != 0)) { - DIE("POSIX setpgrp can't take an argument"); -EOPATCH - } -} elsif ($^O eq 'linux') { - if ($major < 1) { - # sparc linux seems to need the -Dbool=char -DHAS_BOOL part of - # perl5.000 patch.0n: [address Configure and build issues] - edit_file('hints/linux.sh', sub { - my $code = shift; - $code =~ s!-I/usr/include/bsd!-Dbool=char -DHAS_BOOL!g; - return $code; - }); - } - - if ($major <= 9) { - if (`uname -sm` =~ qr/^Linux sparc/) { - if (extract_from_file('hints/linux.sh', qr/sparc-linux/)) { - # Be sure to use -fPIC not -fpic on Linux/SPARC - apply_commit('f6527d0ef0c13ad4'); - } elsif(!extract_from_file('hints/linux.sh', qr/^sparc-linux\)$/)) { - my $fh = open_or_die('hints/linux.sh', '>>'); - print $fh <<'EOT' or die $!; - -case "`uname -m`" in -sparc*) - case "$cccdlflags" in - *-fpic*) cccdlflags="`echo $cccdlflags|sed 's/-fpic/-fPIC/'`" ;; - *) cccdlflags="$cccdlflags -fPIC" ;; - esac - ;; -esac -EOT - close_or_die($fh); - } - } - } -} - -if ($major < 10) { - if (!extract_from_file('ext/DB_File/DB_File.xs', - qr!^#else /\* Berkeley DB Version > 2 \*/$!)) { - # This DB_File.xs is really too old to patch up. - # Skip DB_File, unless we're invoked with an explicit -Unoextensions - if (!exists $defines{noextensions}) { - $defines{noextensions} = 'DB_File'; - } elsif (defined $defines{noextensions}) { - $defines{noextensions} .= ' DB_File'; - } - } elsif (!extract_from_file('ext/DB_File/DB_File.xs', - qr/^#ifdef AT_LEAST_DB_4_1$/)) { - # This line is changed by commit 3245f0580c13b3ab - my $line = extract_from_file('ext/DB_File/DB_File.xs', - qr/^( status = \(?RETVAL->dbp->open\)?\(RETVAL->dbp, name, NULL, RETVAL->type, $)/); - apply_patch(<<"EOPATCH"); -diff --git a/ext/DB_File/DB_File.xs b/ext/DB_File/DB_File.xs -index 489ba96..fba8ded 100644 ---- a/ext/DB_File/DB_File.xs -+++ b/ext/DB_File/DB_File.xs -\@\@ -183,4 +187,8 \@\@ - #endif - -+#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) -+# define AT_LEAST_DB_4_1 -+#endif -+ - /* map version 2 features & constants onto their version 1 equivalent */ - -\@\@ -1334,7 +1419,12 \@\@ SV * sv ; - #endif - -+#ifdef AT_LEAST_DB_4_1 -+ status = (RETVAL->dbp->open)(RETVAL->dbp, NULL, name, NULL, RETVAL->type, -+ Flags, mode) ; -+#else - $line - Flags, mode) ; -+#endif - /* printf("open returned %d %s\\n", status, db_strerror(status)) ; */ - -EOPATCH - } -} - -# if Encode is not needed for the test, you can speed up the bisect by -# excluding it from the runs with -Dnoextensions=Encode -# ccache is an easy win. Remove it if it causes problems. -# Commit 1cfa4ec74d4933da adds ignore_versioned_solibs to Configure, and sets it -# to true in hints/linux.sh -# On dromedary, from that point on, Configure (by default) fails to find any -# libraries, because it scans /usr/local/lib /lib /usr/lib, which only contain -# versioned libraries. Without -lm, the build fails. -# Telling /usr/local/lib64 /lib64 /usr/lib64 works from that commit onwards, -# until commit faae14e6e968e1c0 adds it to the hints. -# However, prior to 1cfa4ec74d4933da telling Configure the truth doesn't work, -# because it will spot versioned libraries, pass them to the compiler, and then -# bail out pretty early on. Configure won't let us override libswanted, but it -# will let us override the entire libs list. - -unless (extract_from_file('Configure', 'ignore_versioned_solibs')) { - # Before 1cfa4ec74d4933da, so force the libs list. - - my @libs; - # This is the current libswanted list from Configure, less the libs removed - # by current hints/linux.sh - foreach my $lib (qw(sfio socket inet nsl nm ndbm gdbm dbm db malloc dl dld - ld sun m crypt sec util c cposix posix ucb BSD)) { - foreach my $dir (@paths) { - next unless -f "$dir/lib$lib.so"; - push @libs, "-l$lib"; - last; - } - } - $defines{libs} = \@libs unless exists $defines{libs}; -} -$defines{usenm} = undef - if $major < 2 && !exists $defines{usenm}; - -my (@missing, @created_dirs); - -if ($options{'force-manifest'}) { - my $fh = open_or_die('MANIFEST'); - while (<$fh>) { - next unless /^(\S+)/; - # -d is special case needed (at least) between 27332437a2ed1941 and - # bf3d9ec563d25054^ inclusive, as manifest contains ext/Thread/Thread - push @missing, $1 - unless -f $1 || -d $1; + if ($major == 4 && extract_from_file('scope.c', qr/\(SV\*\)SSPOPINT/)) { + # [PATCH] 5.004_04 +MAINT_TRIAL_1 broken when sizeof(int) != sizeof(void) + # Fixes a bug introduced in 161b7d1635bc830b + apply_commit('9002cb76ec83ef7f'); } - close_or_die($fh); - foreach my $pathname (@missing) { - my @parts = split '/', $pathname; - my $leaf = pop @parts; - my $path = '.'; - while (@parts) { - $path .= '/' . shift @parts; - next if -d $path; - mkdir $path, 0700 or die "Can't create $path: $!"; - unshift @created_dirs, $path; - } - $fh = open_or_die($pathname, '>'); - close_or_die($fh); - chmod 0, $pathname or die "Can't chmod 0 $pathname: $!"; + if ($major == 4 && extract_from_file('av.c', qr/AvARRAY\(av\) = 0;/)) { + # Fixes a bug introduced in 1393e20655efb4bc + apply_commit('e1c148c28bf3335b', 'av.c'); } -} -my @ARGS = '-dEs'; -foreach my $key (sort keys %defines) { - my $val = $defines{$key}; - if (ref $val) { - push @ARGS, "-D$key=@$val"; - } elsif (!defined $val) { - push @ARGS, "-U$key"; - } elsif (!length $val) { - push @ARGS, "-D$key"; - } else { - $val = "" if $val eq "\0"; - push @ARGS, "-D$key=$val"; - } -} -push @ARGS, map {"-A$_"} @{$options{A}}; - -# </dev/null because it seems that some earlier versions of Configure can -# call commands in a way that now has them reading from stdin (and hanging) -my $pid = fork; -die "Can't fork: $!" unless defined $pid; -if (!$pid) { - open STDIN, '<', '/dev/null'; - # If a file in MANIFEST is missing, Configure asks if you want to - # continue (the default being 'n'). With stdin closed or /dev/null, - # it exits immediately and the check for config.sh below will skip. - exec './Configure', @ARGS; - die "Failed to start Configure: $!"; -} -waitpid $pid, 0 - or die "wait for Configure, pid $pid failed: $!"; - -# Emulate noextensions if Configure doesn't support it. -if (-f 'config.sh') { - if ($major < 10 && $defines{noextensions}) { - edit_file('config.sh', sub { - my @lines = split /\n/, shift; - my @ext = split /\s+/, $defines{noextensions}; - foreach (@lines) { - next unless /^extensions=/ || /^dynamic_ext/; - foreach my $ext (@ext) { - s/\b$ext( )?\b/$1/; - } - } - return join "\n", @lines; - }); - } - if ($major < 4 && !extract_from_file('config.sh', qr/^trnl=/)) { - # This seems to be necessary to avoid makedepend becoming confused, - # and hanging on stdin. Seems that the code after - # make shlist || ...here... is never run. - edit_file('makedepend.SH', sub { - my $code = shift; - $code =~ s/^trnl='\$trnl'$/trnl='\\n'/m; - return $code; - }); - } - - system './Configure -S </dev/null' and die; -} - -if ($target =~ /config\.s?h/) { - match_and_exit($target) if $match && -f $target; - report_and_exit(!-f $target, 'could build', 'could not build', $target) - if $options{'test-build'}; - - my $ret = system @ARGV; - report_and_exit($ret, 'zero exit from', 'non-zero exit from', "@ARGV"); -} elsif (!-f 'config.sh') { - # Skip if something went wrong with Configure - - skip('could not build config.sh'); -} - -# This is probably way too paranoid: -if (@missing) { - my @errors; - require Fcntl; - foreach my $file (@missing) { - my (undef, undef, $mode, undef, undef, undef, undef, $size) - = stat $file; - if (!defined $mode) { - push @errors, "Added file $file has been deleted by Configure"; - next; - } - if (Fcntl::S_IMODE($mode) != 0) { - push @errors, - sprintf 'Added file %s had mode changed by Configure to %03o', - $file, $mode; - } - if ($size != 0) { - push @errors, - "Added file $file had sized changed by Configure to $size"; - } - unlink $file or die "Can't unlink $file: $!"; - } - foreach my $dir (@created_dirs) { - rmdir $dir or die "Can't rmdir $dir: $!"; - } - skip("@errors") - if @errors; -} - -if ($major == 2 && extract_from_file('perl.c', qr/^ fclose\(e_fp\);$/)) { - # need to patch perl.c to avoid calling fclose() twice on e_fp when using -e - # This diff is part of commit ab821d7fdc14a438. The second close was - # introduced with perl-5.002, commit a5f75d667838e8e7 - # Might want a6c477ed8d4864e6 too, for the corresponding change to pp_ctl.c - # (likely without this, eval will have "fun") - apply_patch(<<'EOPATCH'); -diff --git a/perl.c b/perl.c -index 03c4d48..3c814a2 100644 ---- a/perl.c -+++ b/perl.c -@@ -252,6 +252,7 @@ setuid perl scripts securely.\n"); - #ifndef VMS /* VMS doesn't have environ array */ - origenviron = environ; - #endif -+ e_tmpname = Nullch; - - if (do_undump) { - -@@ -405,6 +406,7 @@ setuid perl scripts securely.\n"); - if (e_fp) { - if (Fflush(e_fp) || ferror(e_fp) || fclose(e_fp)) - croak("Can't write to temp file for -e: %s", Strerror(errno)); -+ e_fp = Nullfp; - argc++,argv--; - scriptname = e_tmpname; - } -@@ -470,10 +472,10 @@ setuid perl scripts securely.\n"); - curcop->cop_line = 0; - curstash = defstash; - preprocess = FALSE; -- if (e_fp) { -- fclose(e_fp); -- e_fp = Nullfp; -+ if (e_tmpname) { - (void)UNLINK(e_tmpname); -+ Safefree(e_tmpname); -+ e_tmpname = Nullch; - } - - /* now that script is parsed, we can modify record separator */ -@@ -1369,7 +1371,7 @@ SV *sv; - scriptname = xfound; - } - -- origfilename = savepv(e_fp ? "-e" : scriptname); -+ origfilename = savepv(e_tmpname ? "-e" : scriptname); - curcop->cop_filegv = gv_fetchfile(origfilename); - if (strEQ(origfilename,"-")) - scriptname = ""; - -EOPATCH -} - -if ($major == 4 && !extract_from_file('perl.c', qr/delimcpy.*,$/)) { - # bug introduced in 2a92aaa05aa1acbf, fixed in 8490252049bf42d3 - apply_patch(<<'EOPATCH'); + if ($major == 4) { + my $rest = extract_from_file('perl.c', qr/delimcpy(.*)/); + if (defined $rest and $rest !~ /,$/) { + # delimcpy added in fc36a67e8855d031, perl.c refactored to use it. + # bug introduced in 2a92aaa05aa1acbf, fixed in 8490252049bf42d3 + # code then moved to util.c in commit 491527d0220de34e + apply_patch(<<'EOPATCH'); diff --git a/perl.c b/perl.c index 4eb69e3..54bbb00 100644 --- a/perl.c @@ -1881,122 +1968,35 @@ index 4eb69e3..54bbb00 100644 &len); #endif /* ! (atarist || DOSISH) */ EOPATCH -} - -if (($major >= 7 || $major <= 9) && $^O eq 'openbsd' - && `uname -m` eq "sparc64\n" - # added in 2000 by commit cb434fcc98ac25f5: - && extract_from_file('regexec.c', - qr!/\* No need to save/restore up to this paren \*/!) - # re-indented in 2006 by commit 95b2444054382532: - && extract_from_file('regexec.c', qr/^\t\tCURCUR cc;$/)) { - # Need to work around a bug in (at least) OpenBSD's 4.6's sparc64 compiler - # ["gcc (GCC) 3.3.5 (propolice)"]. Between commits 3ec562b0bffb8b8b (2002) - # and 1a4fad37125bac3e^ (2005) the darling thing fails to compile any code - # for the statement cc.oldcc = PL_regcc; - # If you refactor the code to "fix" that, or force the issue using set in - # the debugger, the stack smashing detection code fires on return from - # S_regmatch(). Turns out that the compiler doesn't allocate any (or at - # least enough) space for cc. - # Restore the "uninitialised" value for cc before function exit, and the - # stack smashing code is placated. - # "Fix" 3ec562b0bffb8b8b (which changes the size of auto variables used - # elsewhere in S_regmatch), and the crash is visible back to - # bc517b45fdfb539b (which also changes buffer sizes). "Unfix" - # 1a4fad37125bac3e and the crash is visible until 5b47454deb66294b. - # Problem goes away if you compile with -O, or hack the code as below. - # - # Hence this turns out to be a bug in (old) gcc. Not a security bug we - # still need to fix. - apply_patch(<<'EOPATCH'); -diff --git a/regexec.c b/regexec.c -index 900b491..6251a0b 100644 ---- a/regexec.c -+++ b/regexec.c -@@ -2958,7 +2958,11 @@ S_regmatch(pTHX_ regnode *prog) - I,I - *******************************************************************/ - case CURLYX: { -- CURCUR cc; -+ union { -+ CURCUR hack_cc; -+ char hack_buff[sizeof(CURCUR) + 1]; -+ } hack; -+#define cc hack.hack_cc - CHECKPOINT cp = PL_savestack_ix; - /* No need to save/restore up to this paren */ - I32 parenfloor = scan->flags; -@@ -2983,6 +2987,7 @@ S_regmatch(pTHX_ regnode *prog) - n = regmatch(PREVOPER(next)); /* start on the WHILEM */ - regcpblow(cp); - PL_regcc = cc.oldcc; -+#undef cc - saySAME(n); - } - /* NOT REACHED */ -EOPATCH -} - -if ($major < 8 && $^O eq 'openbsd' - && !extract_from_file('perl.h', qr/include <unistd\.h>/)) { - # This is part of commit 3f270f98f9305540, applied at a slightly different - # location in perl.h, where the context is stable back to 5.000 - apply_patch(<<'EOPATCH'); -diff --git a/perl.h b/perl.h -index 9418b52..b8b1a7c 100644 ---- a/perl.h -+++ b/perl.h -@@ -496,6 +496,10 @@ register struct op *Perl_op asm(stringify(OP_IN_REGISTER)); - # include <sys/param.h> - #endif - -+/* If this causes problems, set i_unistd=undef in the hint file. */ -+#ifdef I_UNISTD -+# include <unistd.h> -+#endif - - /* Use all the "standard" definitions? */ - #if defined(STANDARD_C) && defined(I_STDLIB) -EOPATCH -} - -if ($major == 4 && extract_from_file('scope.c', qr/\(SV\*\)SSPOPINT/)) { - # [PATCH] 5.004_04 +MAINT_TRIAL_1 broken when sizeof(int) != sizeof(void) - # Fixes a bug introduced in 161b7d1635bc830b - apply_commit('9002cb76ec83ef7f'); -} - -if ($major == 4 && extract_from_file('av.c', qr/AvARRAY\(av\) = 0;/)) { - # Fixes a bug introduced in 1393e20655efb4bc - apply_commit('e1c148c28bf3335b', 'av.c'); -} - -if ($major == 4 && $^O eq 'linux') { - # Whilst this is fixed properly in f0784f6a4c3e45e1 which provides the - # Configure probe, it's easier to back out the problematic changes made in - # these previous commits: - if (extract_from_file('doio.c', - qr!^/\* XXX REALLY need metaconfig test \*/$!)) { - revert_commit('4682965a1447ea44', 'doio.c'); - } - if (my $token = extract_from_file('doio.c', - qr!^#if (defined\(__sun(?:__)?\)) && defined\(__svr4__\) /\* XXX Need metaconfig test \*/$!)) { - my $patch = `git show -R 9b599b2a63d2324d doio.c`; - $patch =~ s/defined\(__sun__\)/$token/g; - apply_patch($patch); - } - if (extract_from_file('doio.c', - qr!^/\* linux \(and Solaris2\?\) uses :$!)) { - revert_commit('8490252049bf42d3', 'doio.c'); - } - if (extract_from_file('doio.c', - qr/^ unsemds.buf = &semds;$/)) { - revert_commit('8e591e46b4c6543e'); + } } - if (extract_from_file('doio.c', - qr!^#ifdef __linux__ /\* XXX Need metaconfig test \*/$!)) { - # Reverts part of commit 3e3baf6d63945cb6 - apply_patch(<<'EOPATCH'); + + if ($major == 4 && $^O eq 'linux') { + # Whilst this is fixed properly in f0784f6a4c3e45e1 which provides the + # Configure probe, it's easier to back out the problematic changes made + # in these previous commits: + if (extract_from_file('doio.c', + qr!^/\* XXX REALLY need metaconfig test \*/$!)) { + revert_commit('4682965a1447ea44', 'doio.c'); + } + if (my $token = extract_from_file('doio.c', + qr!^#if (defined\(__sun(?:__)?\)) && defined\(__svr4__\) /\* XXX Need metaconfig test \*/$!)) { + my $patch = `git show -R 9b599b2a63d2324d doio.c`; + $patch =~ s/defined\(__sun__\)/$token/g; + apply_patch($patch); + } + if (extract_from_file('doio.c', + qr!^/\* linux \(and Solaris2\?\) uses :$!)) { + revert_commit('8490252049bf42d3', 'doio.c'); + } + if (extract_from_file('doio.c', + qr/^ unsemds.buf = &semds;$/)) { + revert_commit('8e591e46b4c6543e'); + } + if (extract_from_file('doio.c', + qr!^#ifdef __linux__ /\* XXX Need metaconfig test \*/$!)) { + # Reverts part of commit 3e3baf6d63945cb6 + apply_patch(<<'EOPATCH'); diff --git b/doio.c a/doio.c index 62b7de9..0d57425 100644 --- b/doio.c @@ -2055,25 +2055,263 @@ index 62b7de9..0d57425 100644 #endif #ifdef HAS_SHM EOPATCH + } + # Incorrect prototype added as part of 8ac853655d9b7447, fixed as part + # of commit dc45a647708b6c54, with at least one intermediate + # modification. Correct prototype for gethostbyaddr has socklen_t + # second. Linux has uint32_t first for getnetbyaddr. + # Easiest just to remove, instead of attempting more complex patching. + # Something similar may be needed on other platforms. + edit_file('pp_sys.c', sub { + my $code = shift; + $code =~ s/^ struct hostent \*(?:PerlSock_)?gethostbyaddr\([^)]+\);$//m; + $code =~ s/^ struct netent \*getnetbyaddr\([^)]+\);$//m; + return $code; + }); } - # Incorrect prototype added as part of 8ac853655d9b7447, fixed as part of - # commit dc45a647708b6c54, with at least one intermediate modification. - # Correct prototype for gethostbyaddr has socklen_t second. Linux has - # uint32_t first for getnetbyaddr. - # Easiest just to remove, instead of attempting more complex patching. - # Something similar may be needed on other platforms. - edit_file('pp_sys.c', sub { - my $code = shift; - $code =~ s/^ struct hostent \*(?:PerlSock_)?gethostbyaddr\([^)]+\);$//m; - $code =~ s/^ struct netent \*getnetbyaddr\([^)]+\);$//m; - return $code; - }); + + if ($major < 6 && $^O eq 'netbsd' + && !extract_from_file('unixish.h', + qr/defined\(NSIG\).*defined\(__NetBSD__\)/)) { + apply_patch(<<'EOPATCH') +diff --git a/unixish.h b/unixish.h +index 2a6cbcd..eab2de1 100644 +--- a/unixish.h ++++ b/unixish.h +@@ -89,7 +89,7 @@ + */ + /* #define ALTERNATE_SHEBANG "#!" / **/ + +-#if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX) ++#if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX) || defined(__NetBSD__) + # include <signal.h> + #endif + +EOPATCH + } + + if (($major >= 7 || $major <= 9) && $^O eq 'openbsd' + && `uname -m` eq "sparc64\n" + # added in 2000 by commit cb434fcc98ac25f5: + && extract_from_file('regexec.c', + qr!/\* No need to save/restore up to this paren \*/!) + # re-indented in 2006 by commit 95b2444054382532: + && extract_from_file('regexec.c', qr/^\t\tCURCUR cc;$/)) { + # Need to work around a bug in (at least) OpenBSD's 4.6's sparc64 # + # compiler ["gcc (GCC) 3.3.5 (propolice)"]. Between commits + # 3ec562b0bffb8b8b (2002) and 1a4fad37125bac3e^ (2005) the darling thing + # fails to compile any code for the statement cc.oldcc = PL_regcc; + # + # If you refactor the code to "fix" that, or force the issue using set + # in the debugger, the stack smashing detection code fires on return + # from S_regmatch(). Turns out that the compiler doesn't allocate any + # (or at least enough) space for cc. + # + # Restore the "uninitialised" value for cc before function exit, and the + # stack smashing code is placated. "Fix" 3ec562b0bffb8b8b (which + # changes the size of auto variables used elsewhere in S_regmatch), and + # the crash is visible back to bc517b45fdfb539b (which also changes + # buffer sizes). "Unfix" 1a4fad37125bac3e and the crash is visible until + # 5b47454deb66294b. Problem goes away if you compile with -O, or hack + # the code as below. + # + # Hence this turns out to be a bug in (old) gcc. Not a security bug we + # still need to fix. + apply_patch(<<'EOPATCH'); +diff --git a/regexec.c b/regexec.c +index 900b491..6251a0b 100644 +--- a/regexec.c ++++ b/regexec.c +@@ -2958,7 +2958,11 @@ S_regmatch(pTHX_ regnode *prog) + I,I + *******************************************************************/ + case CURLYX: { +- CURCUR cc; ++ union { ++ CURCUR hack_cc; ++ char hack_buff[sizeof(CURCUR) + 1]; ++ } hack; ++#define cc hack.hack_cc + CHECKPOINT cp = PL_savestack_ix; + /* No need to save/restore up to this paren */ + I32 parenfloor = scan->flags; +@@ -2983,6 +2987,7 @@ S_regmatch(pTHX_ regnode *prog) + n = regmatch(PREVOPER(next)); /* start on the WHILEM */ + regcpblow(cp); + PL_regcc = cc.oldcc; ++#undef cc + saySAME(n); + } + /* NOT REACHED */ +EOPATCH } -if ($major < 10 and -f 'ext/IPC/SysV/SysV.xs') { - edit_file('ext/IPC/SysV/SysV.xs', sub { - my $xs = shift; - my $fixed = <<'EOFIX'; + if ($major < 8 && $^O eq 'openbsd' + && !extract_from_file('perl.h', qr/include <unistd\.h>/)) { + # This is part of commit 3f270f98f9305540, applied at a slightly + # different location in perl.h, where the context is stable back to + # 5.000 + apply_patch(<<'EOPATCH'); +diff --git a/perl.h b/perl.h +index 9418b52..b8b1a7c 100644 +--- a/perl.h ++++ b/perl.h +@@ -496,6 +496,10 @@ register struct op *Perl_op asm(stringify(OP_IN_REGISTER)); + # include <sys/param.h> + #endif + ++/* If this causes problems, set i_unistd=undef in the hint file. */ ++#ifdef I_UNISTD ++# include <unistd.h> ++#endif + + /* Use all the "standard" definitions? */ + #if defined(STANDARD_C) && defined(I_STDLIB) +EOPATCH + } +} + +sub patch_ext { + if (-f 'ext/POSIX/Makefile.PL' + && extract_from_file('ext/POSIX/Makefile.PL', + qr/Explicitly avoid including/)) { + # commit 6695a346c41138df, which effectively reverts 170888cff5e2ffb7 + + # PERL5LIB is populated by make_ext.pl with paths to the modules we need + # to run, don't override this with "../../lib" since that may not have + # been populated yet in a parallel build. + apply_commit('6695a346c41138df'); + } + + if ($major < 8 && $^O eq 'darwin' && !-f 'ext/DynaLoader/dl_dyld.xs') { + checkout_file('ext/DynaLoader/dl_dyld.xs', 'f556e5b971932902'); + apply_patch(<<'EOPATCH'); +diff -u a/ext/DynaLoader/dl_dyld.xs~ a/ext/DynaLoader/dl_dyld.xs +--- a/ext/DynaLoader/dl_dyld.xs~ 2011-10-11 21:41:27.000000000 +0100 ++++ b/ext/DynaLoader/dl_dyld.xs 2011-10-11 21:42:20.000000000 +0100 +@@ -41,6 +41,35 @@ + #include "perl.h" + #include "XSUB.h" + ++#ifndef pTHX ++# define pTHX void ++# define pTHX_ ++#endif ++#ifndef aTHX ++# define aTHX ++# define aTHX_ ++#endif ++#ifndef dTHX ++# define dTHXa(a) extern int Perl___notused(void) ++# define dTHX extern int Perl___notused(void) ++#endif ++ ++#ifndef Perl_form_nocontext ++# define Perl_form_nocontext form ++#endif ++ ++#ifndef Perl_warn_nocontext ++# define Perl_warn_nocontext warn ++#endif ++ ++#ifndef PTR2IV ++# define PTR2IV(p) (IV)(p) ++#endif ++ ++#ifndef get_av ++# define get_av perl_get_av ++#endif ++ + #define DL_LOADONCEONLY + + #include "dlutils.c" /* SaveError() etc */ +@@ -185,7 +191,7 @@ + CODE: + DLDEBUG(1,PerlIO_printf(Perl_debug_log, "dl_load_file(%s,%x):\n", filename,flags)); + if (flags & 0x01) +- Perl_warn(aTHX_ "Can't make loaded symbols global on this platform while loading %s",filename); ++ Perl_warn_nocontext("Can't make loaded symbols global on this platform while loading %s",filename); + RETVAL = dlopen(filename, mode) ; + DLDEBUG(2,PerlIO_printf(Perl_debug_log, " libref=%x\n", RETVAL)); + ST(0) = sv_newmortal() ; +EOPATCH + if ($major < 4 && !extract_from_file('util.c', qr/^form/m)) { + apply_patch(<<'EOPATCH'); +diff -u a/ext/DynaLoader/dl_dyld.xs~ a/ext/DynaLoader/dl_dyld.xs +--- a/ext/DynaLoader/dl_dyld.xs~ 2011-10-11 21:56:25.000000000 +0100 ++++ b/ext/DynaLoader/dl_dyld.xs 2011-10-11 22:00:00.000000000 +0100 +@@ -60,6 +60,18 @@ + # define get_av perl_get_av + #endif + ++static char * ++form(char *pat, ...) ++{ ++ char *retval; ++ va_list args; ++ va_start(args, pat); ++ vasprintf(&retval, pat, &args); ++ va_end(args); ++ SAVEFREEPV(retval); ++ return retval; ++} ++ + #define DL_LOADONCEONLY + + #include "dlutils.c" /* SaveError() etc */ +EOPATCH + } + } + + if ($major < 10) { + if (!extract_from_file('ext/DB_File/DB_File.xs', + qr!^#else /\* Berkeley DB Version > 2 \*/$!)) { + # This DB_File.xs is really too old to patch up. + # Skip DB_File, unless we're invoked with an explicit -Unoextensions + if (!exists $defines{noextensions}) { + $defines{noextensions} = 'DB_File'; + } elsif (defined $defines{noextensions}) { + $defines{noextensions} .= ' DB_File'; + } + } elsif (!extract_from_file('ext/DB_File/DB_File.xs', + qr/^#ifdef AT_LEAST_DB_4_1$/)) { + # This line is changed by commit 3245f0580c13b3ab + my $line = extract_from_file('ext/DB_File/DB_File.xs', + qr/^( status = \(?RETVAL->dbp->open\)?\(RETVAL->dbp, name, NULL, RETVAL->type, $)/); + apply_patch(<<"EOPATCH"); +diff --git a/ext/DB_File/DB_File.xs b/ext/DB_File/DB_File.xs +index 489ba96..fba8ded 100644 +--- a/ext/DB_File/DB_File.xs ++++ b/ext/DB_File/DB_File.xs +\@\@ -183,4 +187,8 \@\@ + #endif + ++#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) ++# define AT_LEAST_DB_4_1 ++#endif ++ + /* map version 2 features & constants onto their version 1 equivalent */ + +\@\@ -1334,7 +1419,12 \@\@ SV * sv ; + #endif + ++#ifdef AT_LEAST_DB_4_1 ++ status = (RETVAL->dbp->open)(RETVAL->dbp, NULL, name, NULL, RETVAL->type, ++ Flags, mode) ; ++#else + $line + Flags, mode) ; ++#endif + /* printf("open returned %d %s\\n", status, db_strerror(status)) ; */ + +EOPATCH + } + } + + if ($major < 10 and -f 'ext/IPC/SysV/SysV.xs') { + edit_file('ext/IPC/SysV/SysV.xs', sub { + my $xs = shift; + my $fixed = <<'EOFIX'; #include <sys/types.h> #if defined(HAS_MSG) || defined(HAS_SEM) || defined(HAS_SHM) @@ -2101,84 +2339,15 @@ if ($major < 10 and -f 'ext/IPC/SysV/SysV.xs') { # endif #endif EOFIX - $xs =~ s! + $xs =~ s! #include <sys/types\.h> .* (#ifdef newCONSTSUB|/\* Required)!$fixed$1!ms; - return $xs; - }); -} - -if (-f 'ext/POSIX/Makefile.PL' - && extract_from_file('ext/POSIX/Makefile.PL', - qr/Explicitly avoid including/)) { - # commit 6695a346c41138df, which effectively reverts 170888cff5e2ffb7 - - # PERL5LIB is populated by make_ext.pl with paths to the modules we need - # to run, don't override this with "../../lib" since that may not have - # been populated yet in a parallel build. - apply_commit('6695a346c41138df'); -} - -# Parallel build for miniperl is safe -system "make $j miniperl </dev/null"; - -my $expected = $target =~ /^test/ ? 't/perl' - : $target eq 'Fcntl' ? "lib/auto/Fcntl/Fcntl.$Config{so}" - : $target; -my $real_target = $target eq 'Fcntl' ? $expected : $target; - -if ($target ne 'miniperl') { - # Nearly all parallel build issues fixed by 5.10.0. Untrustworthy before that. - $j = '' if $major < 10; - - if ($real_target eq 'test_prep') { - if ($major < 8) { - # test-prep was added in 5.004_01, 3e3baf6d63945cb6. - # renamed to test_prep in 2001 in 5fe84fd29acaf55c. - # earlier than that, just make test. It will be fast enough. - $real_target = extract_from_file('Makefile.SH', - qr/^(test[-_]prep):/, - 'test'); - } - } - - system "make $j $real_target </dev/null"; -} - -my $missing_target = $expected =~ /perl$/ ? !-x $expected : !-r $expected; - -if ($options{'test-build'}) { - report_and_exit($missing_target, 'could build', 'could not build', - $real_target); -} elsif ($missing_target) { - skip("could not build $real_target"); -} - -match_and_exit($real_target) if $match; - -if (defined $options{'one-liner'}) { - my $exe = $target =~ /^(?:perl$|test)/ ? 'perl' : 'miniperl'; - unshift @ARGV, "./$exe", '-Ilib', '-e', $options{'one-liner'}; -} - -# This is what we came here to run: - -if (exists $Config{ldlibpthname}) { - require Cwd; - my $varname = $Config{ldlibpthname}; - my $cwd = Cwd::getcwd(); - if (defined $ENV{$varname}) { - $ENV{$varname} = $cwd . $Config{path_sep} . $ENV{$varname}; - } else { - $ENV{$varname} = $cwd; + return $xs; + }); } } -my $ret = system @ARGV; - -report_and_exit($ret, 'zero exit from', 'non-zero exit from', "@ARGV"); - # Local variables: # cperl-indent-level: 4 # indent-tabs-mode: nil diff --git a/Porting/bisect.pl b/Porting/bisect.pl index 292a3ef50d..d28e1c549c 100755 --- a/Porting/bisect.pl +++ b/Porting/bisect.pl @@ -1,15 +1,25 @@ #!/usr/bin/perl -w use strict; +=for comment + +Documentation for this is in bisect-runner.pl + +=cut + my $start_time = time; # The default, auto_abbrev will treat -e as an abbreviation of --end # Which isn't what we want. use Getopt::Long qw(:config pass_through no_auto_abbrev); -my ($start, $end); +my ($start, $end, $validate); unshift @ARGV, '--help' unless GetOptions('start=s' => \$start, - 'end=s' => \$end); + 'end=s' => \$end, + validate => \$validate); + +@ARGV = ('--', 'sh', '-c', 'cd t && ./perl TEST base/*.t') + if $validate && !@ARGV; my $runner = $0; $runner =~ s/bisect\.pl/bisect-runner.pl/; @@ -24,7 +34,7 @@ die "Can't find bisect runner $runner" unless -f $runner; if defined $dev1 && $dev0 == $dev1 && $ino0 == $ino1; } -system $^X, $runner, '--check-args', @ARGV and exit 255; +system $^X, $runner, '--check-args', '--check-shebang', @ARGV and exit 255; # We try these in this order for the start revision if none is specified. my @stable = qw(perl-5.002 perl-5.003 perl-5.004 perl-5.005 perl-5.6.0 @@ -46,6 +56,36 @@ my $modified = () = `git ls-files --modified --deleted --others`; die "This checkout is not clean - $modified modified or untracked file(s)" if $modified; +sub validate { + my $commit = shift; + if (defined $start && `git rev-list -n1 $commit ^$start^` eq "") { + print "Skipping $commit, as it is earlier than $start\n"; + return; + } + if (defined $end && `git rev-list -n1 $end ^$commit^` eq "") { + print "Skipping $commit, as it is more recent than $end\n"; + return; + } + print "Testing $commit...\n"; + system "git checkout $commit </dev/null" and die; + my $ret = system $^X, $runner, '--no-clean', @ARGV; + die "Runner returned $ret, not 0 for revision $commit" if $ret; + system 'git clean -dxf </dev/null' and die; + system 'git reset --hard HEAD </dev/null' and die; + return $commit; +} + +if ($validate) { + require Text::Wrap; + my @built = map {validate $_} 'blead', reverse @stable; + if (@built) { + print Text::Wrap::wrap("", "", "Successfully validated @built\n"); + exit 0; + } + print "Did not validate anything\n"; + exit 1; +} + my $git_version = `git --version`; if (defined $git_version && $git_version =~ /\Agit version (\d+\.\d+\.\d+)(.*)/) { @@ -97,6 +137,12 @@ END { if defined $start_time; } +=for comment + +Documentation for this is in bisect-runner.pl + +=cut + # Local variables: # cperl-indent-level: 4 # indent-tabs-mode: nil diff --git a/Porting/config.sh b/Porting/config.sh index cebc7badc9..977ca2dddd 100644 --- a/Porting/config.sh +++ b/Porting/config.sh @@ -45,12 +45,12 @@ alignbytes='4' ansi2knr='' aphostname='' api_revision='5' -api_subversion='4' +api_subversion='5' api_version='15' -api_versionstring='5.15.4' +api_versionstring='5.15.5' ar='ar' -archlib='/opt/perl/lib/5.15.4/i686-linux-64int' -archlibexp='/opt/perl/lib/5.15.4/i686-linux-64int' +archlib='/opt/perl/lib/5.15.5/i686-linux-64int' +archlibexp='/opt/perl/lib/5.15.5/i686-linux-64int' archname64='64int' archname='i686-linux-64int' archobjs='' @@ -743,7 +743,7 @@ inc_version_list_init='0' incpath='' inews='' initialinstalllocation='/opt/perl/bin' -installarchlib='/opt/perl/lib/5.15.4/i686-linux-64int' +installarchlib='/opt/perl/lib/5.15.5/i686-linux-64int' installbin='/opt/perl/bin' installhtml1dir='' installhtml3dir='' @@ -751,13 +751,13 @@ installman1dir='/opt/perl/man/man1' installman3dir='/opt/perl/man/man3' installprefix='/opt/perl' installprefixexp='/opt/perl' -installprivlib='/opt/perl/lib/5.15.4' +installprivlib='/opt/perl/lib/5.15.5' installscript='/opt/perl/bin' -installsitearch='/opt/perl/lib/site_perl/5.15.4/i686-linux-64int' +installsitearch='/opt/perl/lib/site_perl/5.15.5/i686-linux-64int' installsitebin='/opt/perl/bin' installsitehtml1dir='' installsitehtml3dir='' -installsitelib='/opt/perl/lib/site_perl/5.15.4' +installsitelib='/opt/perl/lib/site_perl/5.15.5' installsiteman1dir='/opt/perl/man/man1' installsiteman3dir='/opt/perl/man/man3' installsitescript='/opt/perl/bin' @@ -880,7 +880,7 @@ perl_patchlevel='34948' perl_static_inline='static __inline__' perladmin='yourname@yourhost.yourplace.com' perllibs='-lnsl -ldl -lm -lcrypt -lutil -lc' -perlpath='/opt/perl/bin/perl5.15.4' +perlpath='/opt/perl/bin/perl5.15.5' pg='pg' phostname='' pidtype='pid_t' @@ -889,8 +889,8 @@ pmake='' pr='' prefix='/opt/perl' prefixexp='/opt/perl' -privlib='/opt/perl/lib/5.15.4' -privlibexp='/opt/perl/lib/5.15.4' +privlib='/opt/perl/lib/5.15.5' +privlibexp='/opt/perl/lib/5.15.5' procselfexe='"/proc/self/exe"' prototype='define' ptrsize='4' @@ -956,17 +956,17 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 6, 17, 29, 31, 0' sig_size='69' signal_t='void' -sitearch='/opt/perl/lib/site_perl/5.15.4/i686-linux-64int' -sitearchexp='/opt/perl/lib/site_perl/5.15.4/i686-linux-64int' +sitearch='/opt/perl/lib/site_perl/5.15.5/i686-linux-64int' +sitearchexp='/opt/perl/lib/site_perl/5.15.5/i686-linux-64int' sitebin='/opt/perl/bin' sitebinexp='/opt/perl/bin' sitehtml1dir='' sitehtml1direxp='' sitehtml3dir='' sitehtml3direxp='' -sitelib='/opt/perl/lib/site_perl/5.15.4' +sitelib='/opt/perl/lib/site_perl/5.15.5' sitelib_stem='/opt/perl/lib/site_perl' -sitelibexp='/opt/perl/lib/site_perl/5.15.4' +sitelibexp='/opt/perl/lib/site_perl/5.15.5' siteman1dir='/opt/perl/man/man1' siteman1direxp='/opt/perl/man/man1' siteman3dir='/opt/perl/man/man3' @@ -992,7 +992,7 @@ src='.' ssizetype='ssize_t' st_ino_sign='1' st_ino_size='4' -startperl='#!/opt/perl/bin/perl5.15.4' +startperl='#!/opt/perl/bin/perl5.15.5' startsh='#!/bin/sh' static_ext=' ' stdchar='char' @@ -1005,7 +1005,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/usr/include/string.h' submit='' -subversion='4' +subversion='5' sysman='/usr/share/man/man1' tail='' tar='' @@ -1095,8 +1095,8 @@ vendorprefix='' vendorprefixexp='' vendorscript='' vendorscriptexp='' -version='5.15.4' -version_patchlevel_string='version 15 subversion 4' +version='5.15.5' +version_patchlevel_string='version 15 subversion 5' versiononly='define' vi='' voidflags='15' @@ -1120,10 +1120,10 @@ config_arg8='-Dusedevel' config_arg9='-dE' PERL_REVISION=5 PERL_VERSION=15 -PERL_SUBVERSION=4 +PERL_SUBVERSION=5 PERL_API_REVISION=5 PERL_API_VERSION=15 -PERL_API_SUBVERSION=4 +PERL_API_SUBVERSION=5 PERL_PATCHLEVEL=34948 PERL_CONFIG_SH=true : Variables propagated from previous config.sh file. diff --git a/Porting/config_H b/Porting/config_H index e8338d0084..1df8aa0041 100644 --- a/Porting/config_H +++ b/Porting/config_H @@ -966,8 +966,8 @@ * This symbol contains the ~name expanded version of ARCHLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define ARCHLIB "/pro/lib/perl5/5.15.4/i686-linux-64int-ld" /**/ -#define ARCHLIB_EXP "/pro/lib/perl5/5.15.4/i686-linux-64int-ld" /**/ +#define ARCHLIB "/pro/lib/perl5/5.15.5/i686-linux-64int-ld" /**/ +#define ARCHLIB_EXP "/pro/lib/perl5/5.15.5/i686-linux-64int-ld" /**/ /* ARCHNAME: * This symbol holds a string representing the architecture name. @@ -2074,8 +2074,8 @@ * This symbol contains the ~name expanded version of PRIVLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define PRIVLIB "/pro/lib/perl5/5.15.4" /**/ -#define PRIVLIB_EXP "/pro/lib/perl5/5.15.4" /**/ +#define PRIVLIB "/pro/lib/perl5/5.15.5" /**/ +#define PRIVLIB_EXP "/pro/lib/perl5/5.15.5" /**/ /* PTRSIZE: * This symbol contains the size of a pointer, so that the C preprocessor @@ -2125,8 +2125,8 @@ * This symbol contains the ~name expanded version of SITEARCH, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define SITEARCH "/pro/lib/perl5/site_perl/5.15.4/i686-linux-64int-ld" /**/ -#define SITEARCH_EXP "/pro/lib/perl5/site_perl/5.15.4/i686-linux-64int-ld" /**/ +#define SITEARCH "/pro/lib/perl5/site_perl/5.15.5/i686-linux-64int-ld" /**/ +#define SITEARCH_EXP "/pro/lib/perl5/site_perl/5.15.5/i686-linux-64int-ld" /**/ /* SITELIB: * This symbol contains the name of the private library for this package. @@ -2148,8 +2148,8 @@ * removed. The elements in inc_version_list (inc_version_list.U) can * be tacked onto this variable to generate a list of directories to search. */ -#define SITELIB "/pro/lib/perl5/site_perl/5.15.4" /**/ -#define SITELIB_EXP "/pro/lib/perl5/site_perl/5.15.4" /**/ +#define SITELIB "/pro/lib/perl5/site_perl/5.15.5" /**/ +#define SITELIB_EXP "/pro/lib/perl5/site_perl/5.15.5" /**/ #define SITELIB_STEM "/pro/lib/perl5/site_perl" /**/ /* SSize_t: @@ -4369,7 +4369,7 @@ * script to make sure (one hopes) that it runs with perl and not * some shell. */ -#define STARTPERL "#!/pro/bin/perl5.15.4" /**/ +#define STARTPERL "#!/pro/bin/perl5.15.5" /**/ /* HAS_STDIO_STREAM_ARRAY: * This symbol, if defined, tells that there is an array diff --git a/Porting/epigraphs.pod b/Porting/epigraphs.pod index 2b19990f19..e360cdfac1 100644 --- a/Porting/epigraphs.pod +++ b/Porting/epigraphs.pod @@ -17,6 +17,36 @@ Consult your favorite dictionary for details. =head1 EPIGRAPHS +=head2 v5.15.5 - Nikolai Gogol, The Diary of a Madman + +L<Announced on 2011-11-20 by Steve +Hay|http://www.nntp.perl.org/group/perl.perl5.porters/2011/11/msg179588.html> + +This day - is a day of the greatest solemnity! Spain has a king. He has +been found. I am that king. Only this very day did I learn of it. I +confess, it came to me suddenly in a flash of lightning. I don't understand +how I could have thought and imagined that I was a titular councillor. How +could such a wild notion enter my head? It's a good thing no one thought of +putting me in an insane asylum. Now everything is laid open before me. Now +I see everything as on the palm of my hand. And before, I don't understand, +before everything around me was in some sort of fog. And all this happens, I +think, because people imagine that the human brain is in the head. Not at +all: it is brought by a wind from the direction of the Caspian Sea. First +off, I announced to Mavra who I am. When she heard that the king of Spain +was standing before her, she clasped her hands and nearly died of fright. +The stupid woman had never seen a king of Spain before. However, I +endeavoured to calm her down and assured her in gracious words of my +benevolence and that I was not at all angry that she sometimes polished my +boots poorly. They're benighted folk. It's impossible to tell them about +lofty matters. She got frightened because she's convinced that all kings of +Spain are like Philip II. But I explained to her that there was no +resemblance between me and Philip II, and that I didn't have a single +Capuchin . . . I didn't go to the office . . . To hell with it! No friends, +you won't lure me there now; I'm not going to copy your vile papers! + + -- Nikolai Gogol, The Diary of a Madman, + trans. Richard Pevear and Larissa Volokhonsky + =head2 v5.15.4 - Steve Jobs L<Announced on 2011-10-20 by Florian diff --git a/Porting/new-perldelta.pl b/Porting/new-perldelta.pl new file mode 100644 index 0000000000..4fa7fd13f2 --- /dev/null +++ b/Porting/new-perldelta.pl @@ -0,0 +1,108 @@ +#!/usr/bin/perl -w +use strict; + +# This needs to be able to run from a clean checkout, hence assume only system +# perl, which may be too old to have autodie + +require 'Porting/pod_lib.pl'; + +my $state = get_pod_metadata(1); +my (undef, $old_major, $old_minor) = @{$state->{delta_version}}; +# For now, hard code it for the simple ones... +my $new_major = $old_major; +my $new_minor = $old_minor + 1; +# These two are just for "If you are upgrading from earlier releases..." in +# the perldelta template. +my $was_major = $old_major; +my $was_minor = $old_minor - 1; +# I may have missed some corner cases here: +if ($was_minor < 0) { + $was_minor = 0; + --$was_major; +} +my $newdelta_filename = "perl5$new_major${new_minor}delta.pod"; + +{ + # For now, just tell the user what to add, as it's safer. + my %add; + + sub git_add_new { + push @{$add{new}}, shift; + } + + sub git_add_modified { + push @{$add{modified}}, shift; + } + + sub notify_success { + return unless %add; + print "Please run:\n"; + foreach (qw(new modified)) { + print " git add @{$add{$_}}\n" if $add{$_}; + } + print "\nBefore committing please check that the build works and make test_porting passes\n"; + } +} + +my $filename = 'pod/.gitignore'; +my $gitignore = slurp_or_die($filename); + +$gitignore =~ s{^/$state->{delta_target}$} + {/$newdelta_filename}m + or die "Can't find /$state->{delta_target} in $filename"; + +write_or_die($filename, $gitignore); +git_add_modified($filename); + +my $olddelta = slurp_or_die('pod/perldelta.pod'); + +$olddelta =~ s{^(perl)(delta - what is new for perl v5.$old_major.$old_minor)$} + {$1 . "5$old_major$old_minor" . $2}me + or die "Can't find expected NAME contents in $olddelta"; + +my $olddeltaname = "pod/perl5$old_major${old_minor}delta.pod"; +write_or_die($olddeltaname, $olddelta); +git_add_new($olddeltaname); + +$filename = 'Porting/perldelta_template.pod'; +my $newdelta = slurp_or_die($filename); + +foreach([rXXX => $was_major], + [sXXX => $old_major], + [tXXX => $new_major], + [aXXX => $was_minor], + [bXXX => $old_minor], + [cXXX => $new_minor], + ['5XXX' => 5 . $old_major . $old_minor]) { + my ($token, $value) = @$_; + $newdelta =~ s/$token/$value/g + or die "Can't find '$token' in $filename"; +} + +write_or_die('pod/perldelta.pod', $newdelta); +git_add_modified('pod/perldelta.pod'); + +$filename = 'pod.lst'; +my $pod_master = slurp_or_die($filename); + +$pod_master =~ s{^(\s*perl5)($old_major$old_minor)(delta\s+Perl changes in version )(5\.\d+\.\d+)(.*)} + {$1 . $new_major . $new_minor .$3 . "5.$new_major.$new_minor" . $5 . "\n" . + "$1$2$3$4$5"}me + or die "Can't find perldelta line in $filename"; + +write_or_die($filename, $pod_master); +git_add_modified($filename); + +my $command = "$^X Porting/pod_rules.pl"; +system $command + and die "Could not run '$command', \$? = $?"; +git_add_modified(map {chomp $_; $_} `$^X Porting/pod_rules.pl --showfiles`); + +notify_success(); + +# Local variables: +# cperl-indent-level: 4 +# indent-tabs-mode: nil +# End: +# +# ex: set ts=8 sts=4 sw=4 et: diff --git a/Porting/perldelta_template.pod b/Porting/perldelta_template.pod index 84064385c8..8adf0aeaab 100644 --- a/Porting/perldelta_template.pod +++ b/Porting/perldelta_template.pod @@ -5,16 +5,16 @@ [ this is a template for a new perldelta file. Any text flagged as XXX needs to be processed before release. ] -perldelta - what is new for perl v5.XXX.XXX +perldelta - what is new for perl v5.tXXX.cXXX =head1 DESCRIPTION -This document describes differences between the 5.XXX.XXX release and -the 5.XXX.XXX release. +This document describes differences between the 5.sXXX.bXXX release and +the 5.tXXX.cXXX release. -If you are upgrading from an earlier release such as 5.YYY.YYY, first read -L<perl5YYYdelta>, which describes differences between 5.ZZZ.ZZZ and -5.YYY.YYY. +If you are upgrading from an earlier release such as 5.rXXX.aXXX, first read +L<perl5XXXdelta>, which describes differences between 5.rXXX.aXXX and +5.sXXX.bXXX. =head1 Notice @@ -355,7 +355,7 @@ here. XXX Generate this with: - perl Porting/acknowledgements.pl v5.15.4..HEAD + perl Porting/acknowledgements.pl v5.15.5..HEAD =head1 Reporting Bugs diff --git a/Porting/pod_lib.pl b/Porting/pod_lib.pl new file mode 100644 index 0000000000..25e33d58bc --- /dev/null +++ b/Porting/pod_lib.pl @@ -0,0 +1,239 @@ +#!/usr/bin/perl -w + +use strict; + +# make it clearer when we haven't run to completion, as we can be quite +# noisy when things are working ok + +sub my_die { + print STDERR "$0: ", @_; + print STDERR "\n" unless $_[-1] =~ /\n\z/; + print STDERR "ABORTED\n"; + exit 255; +} + +sub open_or_die { + my $filename = shift; + open my $fh, '<', $filename or my_die "Can't open $filename: $!"; + return $fh; +} + +sub slurp_or_die { + my $filename = shift; + my $fh = open_or_die($filename); + binmode $fh; + local $/; + my $contents = <$fh>; + die "Can't read $filename: $!" unless defined $contents and close $fh; + return $contents; +} + +sub write_or_die { + my ($filename, $contents) = @_; + open my $fh, '>', $filename or die "Can't open $filename for writing: $!"; + binmode $fh; + print $fh $contents or die "Can't write to $filename: $!"; + close $fh or die "Can't close $filename: $!"; +} + +sub get_pod_metadata { + # Do we expect to find generated pods on disk? + my $permit_missing_generated = shift; + my %BuildFiles; + + foreach my $path (@_) { + $path =~ m!([^/]+)$!; + ++$BuildFiles{$1}; + } + + my %state = + ( + # Don't copy these top level READMEs + ignore => + { + micro => 1, + # vms => 1, + }, + ); + + my $source = 'perldelta.pod'; + my $filename = "pod/$source"; + my $fh = open_or_die($filename); + my $contents = do {local $/; <$fh>}; + my @want = + $contents =~ /perldelta - what is new for perl v(5)\.(\d+)\.(\d+)\n/; + die "Can't extract version from $filename" unless @want; + $state{delta_target} = join '', 'perl', @want, 'delta.pod'; + $state{delta_version} = \@want; + + # This way round so that keys can act as a MANIFEST skip list + # Targets will always be in the pod directory. Currently we can only cope + # with sources being in the same directory. + $state{copies}{$state{delta_target}} = $source; + + + # process pod.lst + my %Readmepods; + my $master = open_or_die('pod.lst'); + + foreach (<$master>) { + next if /^\#/; + + # At least one upper case letter somewhere in the first group + if (/^(\S+)\s(.*)/ && $1 =~ tr/h//) { + # it's a heading + my $flags = $1; + $flags =~ tr/h//d; + my %flags = (header => 1); + $flags{toc_omit} = 1 if $flags =~ tr/o//d; + $flags{aux} = 1 if $flags =~ tr/a//d; + my_die "Unknown flag found in heading line: $_" if length $flags; + + push @{$state{master}}, [\%flags, $2]; + } elsif (/^(\S*)\s+(\S+)\s+(.*)/) { + # it's a section + my ($flags, $podname, $desc) = ($1, $2, $3); + my $filename = "${podname}.pod"; + $filename = "pod/${filename}" if $filename !~ m{/}; + + my %flags = (indent => 0); + $flags{indent} = $1 if $flags =~ s/(\d+)//; + $flags{toc_omit} = 1 if $flags =~ tr/o//d; + $flags{aux} = 1 if $flags =~ tr/a//d; + $flags{perlpod_omit} = "$podname.pod" eq $state{delta_target}; + + $state{generated}{"$podname.pod"}++ if $flags =~ tr/g//d; + + if ($flags =~ tr/r//d) { + my $readme = $podname; + $readme =~ s/^perl//; + $Readmepods{$podname} = $state{readmes}{$readme} = $desc; + $flags{readme} = 1; + } elsif ($flags{aux}) { + $state{aux}{$podname} = $desc; + } else { + $state{pods}{$podname} = $desc; + } + my_die "Unknown flag found in section line: $_" if length $flags; + my ($leafname) = $podname =~ m!([^/]+)$!; + push @{$state{master}}, + [\%flags, $podname, $filename, $desc, $leafname]; + } elsif (/^$/) { + push @{$state{master}}, undef; + } else { + my_die "Malformed line: $_" if $1 =~ tr/A-Z//; + } + } + close $master or my_die "close pod.lst: $!"; + + # Sanity cross check + + my (%disk_pods, %manipods, %manireadmes, %perlpods); + my (%cpanpods, %cpanpods_leaf); + my (%our_pods); + + # These are stub files for deleted documents. We don't want them to show up + # in perl.pod, they just exist so that if someone types "perldoc perltoot" + # they get some sort of pointer to the new docs. + my %ignoredpods + = map { ( "$_.pod" => 1 ) } qw( perlboot perlbot perltooc perltoot ); + + # Convert these to a list of filenames. + foreach (keys %{$state{pods}}, keys %Readmepods) { + $our_pods{"$_.pod"}++; + } + + opendir my $dh, 'pod'; + while (defined ($_ = readdir $dh)) { + next unless /\.pod\z/; + ++$disk_pods{$_}; + } + + # Things we copy from won't be in perl.pod + # Things we copy to won't be in MANIFEST + + my $mani = open_or_die('MANIFEST'); + while (<$mani>) { + chomp; + s/\s+.*$//; + if (m!^pod/([^.]+\.pod)!i) { + ++$manipods{$1}; + } elsif (m!^README\.(\S+)!i) { + next if $state{ignore}{$1}; + ++$manireadmes{"perl$1.pod"}; + } elsif (exists $our_pods{$_}) { + ++$cpanpods{$_}; + m!([^/]+)$!; + ++$cpanpods_leaf{$1}; + $disk_pods{$_}++ + if -e $_; + } + } + close $mani or my_die "close MANIFEST: $!\n"; + + my $perlpod = open_or_die('pod/perl.pod'); + while (<$perlpod>) { + if (/^For ease of access, /../^\(If you're intending /) { + if (/^\s+(perl\S*)\s+\w/) { + ++$perlpods{"$1.pod"}; + } + } + } + close $perlpod or my_die "close perlpod: $!\n"; + my_die "could not find the pod listing of perl.pod\n" + unless %perlpods; + + # Are we running before known generated files have been generated? + # (eg in a clean checkout) + my %not_yet_there; + if ($permit_missing_generated) { + # If so, don't complain if these files aren't yet in place + %not_yet_there = (%manireadmes, %{$state{generated}}, %{$state{copies}}) + } + + my @inconsistent; + foreach my $i (sort keys %disk_pods) { + push @inconsistent, "$0: $i exists but is unknown by buildtoc\n" + unless $our_pods{$i}; + push @inconsistent, "$0: $i exists but is unknown by MANIFEST\n" + if !$BuildFiles{'MANIFEST'} # Ignore if we're rebuilding MANIFEST + && !$manipods{$i} && !$manireadmes{$i} && !$state{copies}{$i} + && !$state{generated}{$i} && !$cpanpods{$i}; + push @inconsistent, "$0: $i exists but is unknown by perl.pod\n" + if !$BuildFiles{'perl.pod'} # Ignore if we're rebuilding perl.pod + && !$perlpods{$i} && !exists $state{copies}{$i} + && !$cpanpods{$i} && !$ignoredpods{$i}; + } + foreach my $i (sort keys %our_pods) { + push @inconsistent, "$0: $i is known by buildtoc but does not exist\n" + unless $disk_pods{$i} or $BuildFiles{$i} or $not_yet_there{$i}; + } + unless ($BuildFiles{'MANIFEST'}) { + # Again, ignore these if we're about to rebuild MANIFEST + foreach my $i (sort keys %manipods) { + push @inconsistent, "$0: $i is known by MANIFEST but does not exist\n" + unless $disk_pods{$i}; + push @inconsistent, "$0: $i is known by MANIFEST but is marked as generated\n" + if $state{generated}{$i}; + } + } + unless ($BuildFiles{'perl.pod'}) { + # Again, ignore these if we're about to rebuild perl.pod + foreach my $i (sort keys %perlpods) { + push @inconsistent, "$0: $i is known by perl.pod but does not exist\n" + unless $disk_pods{$i} or $BuildFiles{$i} or $cpanpods_leaf{$i} + or $not_yet_there{$i}; + } + } + $state{inconsistent} = \@inconsistent; + return \%state; +} + +1; + +# Local variables: +# cperl-indent-level: 4 +# indent-tabs-mode: nil +# End: +# +# ex: set ts=8 sts=4 sw=4 et: diff --git a/Porting/pod_rules.pl b/Porting/pod_rules.pl new file mode 100644 index 0000000000..acea2d22f5 --- /dev/null +++ b/Porting/pod_rules.pl @@ -0,0 +1,308 @@ +#!/usr/bin/perl -w + +use strict; +use vars qw(%Build %Targets $Verbose $Test); +use Text::Tabs; +use Text::Wrap; +use Getopt::Long; +use Carp; + +# Generate the sections of files listed in %Targets from pod.lst +# Mostly these are rules in Makefiles +# +# --verbose gives slightly more output +# --build-all tries to build everything +# --build-foo updates foo as follows +# --showfiles shows the files to be changed +# --test exit if perl.pod, pod.lst, MANIFEST are consistent, and regenerated +# files are up to date, die otherwise. + +%Targets = ( + manifest => 'MANIFEST', + perlpod => 'pod/perl.pod', + vms => 'vms/descrip_mms.template', + nmake => 'win32/Makefile', + dmake => 'win32/makefile.mk', + podmak => 'win32/pod.mak', + unix => 'Makefile.SH', + # plan9 => 'plan9/mkfile', + ); + +require 'Porting/pod_lib.pl'; +sub my_die; + +# process command-line switches +{ + my @files = keys %Targets; + my $filesopts = join(" | ", map { "--build-$_" } "all", sort @files); + my $showfiles; + my %build_these; + die "$0: Usage: $0 [--verbose] [--showfiles] [$filesopts]\n" + unless GetOptions (verbose => \$Verbose, + showfiles => \$showfiles, + tap => \$Test, + map {+"build-$_", \$build_these{$_}} @files, 'all') + && !@ARGV; + if ($build_these{all}) { + %Build = %Targets; + } else { + while (my ($file, $want) = each %build_these) { + $Build{$file} = $Targets{$file} if $want; + } + # Default to --build-all if no targets given. + %Build = %Targets if !%Build; + } + if ($showfiles) { + print join(" ", sort { lc $a cmp lc $b } values %Build), "\n"; + exit(0); + } +} + +if ($Verbose) { + print "I will be building $_\n" foreach keys %Build; +} + +# For testing, generated files must be present and we're rebuilding nothing. +# For normal rebuilding, generated files may not be present, and we mute +# warnings about inconsistencies in any file we're about to rebuild. +my $state = get_pod_metadata($Test ? () : (1, values %Build)); + +my $test = 1; +if ($Test) { + printf "1..%d\n", 1 + scalar keys %Build; + if (@{$state->{inconsistent}}) { + print "not ok $test\n"; + die @{$state->{inconsistent}}; + } + print "ok $test\n"; +} +else { + warn @{$state->{inconsistent}} if @{$state->{inconsistent}}; +} + +sub generate_perlpod { + my @output; + my $maxlength = 0; + foreach (@{$state->{master}}) { + my $flags = $_->[0]; + next if $flags->{aux}; + next if $flags->{perlpod_omit}; + + if (@$_ == 2) { + # Heading + push @output, "=head2 $_->[1]\n"; + } elsif (@$_ == 5) { + # Section + my $start = " " x (4 + $flags->{indent}) . $_->[4]; + $maxlength = length $start if length ($start) > $maxlength; + push @output, [$start, $_->[3]]; + } elsif (@$_ == 0) { + # blank line + push @output, "\n"; + } else { + my_die "Illegal length " . scalar @$_; + } + } + # want at least 2 spaces padding + $maxlength += 2; + $maxlength = ($maxlength + 3) & ~3; + # sprintf gives $1.....$2 where ... are spaces: + return unexpand (map {ref $_ ? sprintf "%-${maxlength}s%s\n", @$_ : $_} + @output); +} + +sub generate_manifest { + # Annoyingly, unexpand doesn't consider it good form to replace a single + # space before a tab with a tab + # Annoyingly (2) it returns read only values. + my @temp = unexpand (map {sprintf "%-32s%s", @$_} @_); + map {s/ \t/\t\t/g; $_} @temp; +} + +sub generate_manifest_pod { + generate_manifest map {["pod/$_.pod", $state->{pods}{$_}]} + sort grep { + !$state->{copies}{"$_.pod"} + && !$state->{generated}{"$_.pod"} + && !-e "$_.pod" + } keys %{$state->{pods}}; +} + +sub generate_manifest_readme { + generate_manifest sort {$a->[0] cmp $b->[0]} + ["README.vms", "Notes about installing the VMS port"], + map {["README.$_", $state->{readmes}{$_}]} keys %{$state->{readmes}}; +} + +sub generate_nmake_1 { + # XXX Fix this with File::Spec + (map {sprintf "\tcopy ..\\README.%-8s ..\\pod\\perl$_.pod\n", $_} + sort keys %{$state->{readmes}}), + (map {"\tcopy ..\\pod\\$state->{copies}{$_} ..\\pod\\$_\n"} + sort keys %{$state->{copies}}); +} + +# This doesn't have a trailing newline +sub generate_nmake_2 { + # Spot the special case + local $Text::Wrap::columns = 76; + my $line = wrap ("\t ", "\t ", + join " ", sort(keys %{$state->{copies}}, + keys %{$state->{generated}}, + map {"perl$_.pod"} keys %{$state->{readmes}})); + $line =~ s/$/ \\/mg; + $line =~ s/ \\$//; + $line; +} + +sub generate_pod_mak { + my $variable = shift; + my @lines; + my $line = "\U$variable = " . join "\t\\\n\t", + map {"$_.$variable"} sort grep { $_ !~ m{/} } keys %{$state->{pods}}; + # Special case + $line =~ s/.*perltoc.html.*\n//m; + $line; +} + +sub verify_contiguous { + my ($name, $content, $what) = @_; + my $sections = () = $content =~ m/\0+/g; + croak("$0: $name contains no $what") if $sections < 1; + croak("$0: $name contains discontiguous $what") if $sections > 1; +} + +sub do_manifest { + my ($name, $prev) = @_; + my @manifest = + grep {! m!^pod/[^.]+\.pod.*!} + grep {! m!^README\.(\S+)! || $state->{ignore}{$1}} split "\n", $prev; + join "\n", ( + # Dictionary order - fold and handle non-word chars as nothing + map { $_->[0] } + sort { $a->[1] cmp $b->[1] || $a->[0] cmp $b->[0] } + map { my $f = lc $_; $f =~ s/[^a-z0-9\s]//g; [ $_, $f ] } + @manifest, + &generate_manifest_pod(), + &generate_manifest_readme()), ''; +} + +sub do_nmake { + my ($name, $makefile) = @_; + $makefile =~ s/^\tcopy \.\.\\README.*\n/\0/gm; + verify_contiguous($name, $makefile, 'README copies'); + # Now remove the other copies that follow + 1 while $makefile =~ s/\0\tcopy .*\n/\0/gm; + $makefile =~ s/\0+/join ("", &generate_nmake_1)/se; + + $makefile =~ s{(-cd \$\(PODDIR\) && del /f[^\n]+).*?(-cd \.\.\\utils && del /f)} + {"$1\n" . &generate_nmake_2."\n\t$2"}se; + $makefile; +} + +# shut up used only once warning +*do_dmake = *do_dmake = \&do_nmake; + +sub do_perlpod { + my ($name, $pod) = @_; + + unless ($pod =~ s{(For\ ease\ of\ access,\ .*\n) + (?:\s+[a-z]{4,}.*\n # fooo + |=head.*\n # =head foo + |\s*\n # blank line + )+ + } + {$1 . join "", &generate_perlpod}mxe) { + my_die "Failed to insert amendments in do_perlpod"; + } + $pod; +} + +sub do_podmak { + my ($name, $body) = @_; + foreach my $variable (qw(pod man html tex)) { + my_die "could not find $variable in $name" + unless $body =~ s{\n\U$variable\E = (?:[^\n]*\\\n)*[^\n]*} + {"\n" . generate_pod_mak ($variable)}se; + } + $body; +} + +sub do_vms { + my ($name, $makefile) = @_; + + # Looking for the macro defining the current perldelta: + #PERLDELTA_CURRENT = [.pod]perl5139delta.pod + + $makefile =~ s{\nPERLDELTA_CURRENT\s+=\s+\Q[.pod]perl\E\d+delta\.pod\n} + {\0}sx; + verify_contiguous($name, $makefile, 'current perldelta macro'); + $makefile =~ s/\0+/join "\n", '', "PERLDELTA_CURRENT = [.pod]$state->{delta_target}", ''/se; + + $makefile; +} + +sub do_unix { + my ($name, $makefile_SH) = @_; + + $makefile_SH =~ s{^(perltoc_pod_prereqs = extra.pods).*} + {join ' ', $1, map "pod/$_", + sort(keys %{$state->{copies}}, + grep {!/perltoc/} keys %{$state->{generated}}) + }mge; + + # pod/perl511delta.pod: pod/perldelta.pod + # cd pod && $(LNS) perldelta.pod perl511delta.pod + + $makefile_SH =~ s!( +pod/perl[a-z0-9_]+\.pod: pod/perl[a-z0-9_]+\.pod + \$\(LNS\) perl[a-z0-9_]+\.pod pod/perl[a-z0-9_]+\.pod +)+!\0!gm; + + verify_contiguous($name, $makefile_SH, 'copy rules'); + + my @copy_rules = map " +pod/$_: pod/$state->{copies}{$_} + \$(LNS) $state->{copies}{$_} pod/$_ +", keys %{$state->{copies}}; + + $makefile_SH =~ s/\0+/join '', @copy_rules/se; + $makefile_SH; +} + +# Do stuff +while (my ($target, $name) = each %Targets) { + print "Now processing $name\n" if $Verbose; + + my $orig = slurp_or_die($name); + my_die "$name contains NUL bytes" if $orig =~ /\0/; + + my $new = do { + no strict 'refs'; + &{"do_$target"}($target, $orig); + }; + + if ($Test) { + printf "%s %d # $name is up to date\n", + $new eq $orig ? 'ok' : 'not ok', + ++$test; + next; + } elsif ($new eq $orig) { + print "Was not modified\n" + if $Verbose; + next; + } + + my $mode = (stat $name)[2] // my_die "Can't stat $name: $!"; + rename $name, "$name.old" or my_die "Can't rename $name to $name.old: $!"; + + write_or_die($name, $new); + chmod $mode & 0777, $name or my_die "can't chmod $mode $name: $!"; +} + +# Local variables: +# cperl-indent-level: 4 +# indent-tabs-mode: nil +# End: +# +# ex: set ts=8 sts=4 sw=4 et: diff --git a/Porting/release_managers_guide.pod b/Porting/release_managers_guide.pod index 42ecf44e89..54fd7b80b2 100644 --- a/Porting/release_managers_guide.pod +++ b/Porting/release_managers_guide.pod @@ -863,71 +863,40 @@ I<You MUST SKIP this step for RC> Create a new perldelta. -B<Note>: currently, the buildtoc below must be run in a I<built> perl source -directory, as at least one of the pod files it expects to find is -autogenerated: perluniprops.pod. But you can't build perl if you've added -the new perldelta file and not updated toc. So, make sure you have a built -perl (with a pod/perluniprops.pod file) now, I<before> continuing. - -First, update the F<pod/.gitignore> file to ignore the next -release's generated F<pod/perlNNNdelta.pod> file rather than this release's -one which we are about to set in stone (where NNN is the perl version number -without the dots. i.e. 5135 for 5.13.5). - - $ (edit pod/.gitignore ) - $ git add pod/.gitignore - -Then, move the existing F<pod/perldelta.pod> to F<pod/perlNNNdelta.pod>, -and edit the moved delta file to change the C<NAME> from C<perldelta> to -C<perlNNNdelta>. For example, assuming you just released 5.10.1, and are -about to create the 5.10.2 perldelta: - - $ rm pod/perl5101delta.pod # remove the auto-generated file, if any - $ git mv pod/perldelta.pod pod/perl5101delta.pod - $ (edit pod/perl5101delta.pod to retitle) - $ git add pod/perl5101delta.pod +=over 4 + +=item * + +Confirm that you have a clean checkout with no local changes. -Then create a new empty perldelta.pod file for the new release; see -F<Porting/how_to_write_a_perldelta.pod>. You should be able to do this by -just copying in a skeleton template and then doing a quick fix up of the -version numbers. Then commit the move and the new file. +=item * - $ cp -i Porting/perldelta_template.pod pod/perldelta.pod - $ (edit pod/perldelta.pod) - $ git add pod/perldelta.pod - $ git commit -m 'create perldelta for 5.10.2' +Run F<Porting/new-perldelta.pl> -=head3 update perldelta TOC and references +=item * -Now you need to update various tables of contents related to perldelta, -most of which can be generated automatically. +Run the C<git add> commands it outputs to add new and modified files. -Edit F<pod.lst>: add the new entry for the perlNNNdelta file for the -current version (the file that will be symlinked to perldelta). +=item * -Manually create a temporary link to the new delta file; normally this is -done from the Makefile, but the Makefile is updated by buildtoc, and -buildtoc won't run without the file there: +Verify that the build still works, by running C<./Configure> and +C<make test_porting>. (On Win32, run C<nmake> and +C<nmake test TEST_FILES="porting\*.t ..\lib\diagnostics.t">.) - $ ln -s pod/perldelta.pod pod/perl5102delta.pod +=item * -Run C<perl pod/buildtoc --build-all> to update the F<perldelta> version in -the following files: +If F<t/porting/podcheck.t> spots errors in the new F<pod/perldelta.pod>, +run C<./perl -MTestInit t/porting/podcheck.t | less> for more detail. +Skip to the end of its test output to see the options it offers you. - MANIFEST - Makefile.SH - pod/perl.pod - vms/descrip_mms.template - win32/Makefile - win32/makefile.mk - win32/pod.mak +=item * -Finally, commit: +When C<make test_porting> passes, commit the new perldelta. - $ git commit -a -m 'update TOC for perlNNNdelta' +=back At this point you may want to compare the commit with a previous bump to -see if they look similar. See commit dd885b5 for an example of a +see if they look similar. See commit e3c71926d3 for an example of a previous version bump. diff --git a/README.haiku b/README.haiku index b09b1108e1..09d21599cc 100644 --- a/README.haiku +++ b/README.haiku @@ -22,9 +22,9 @@ The build procedure is completely standard: Make perl executable and create a symlink for libperl: chmod a+x /boot/common/bin/perl - cd /boot/common/lib; ln -s perl5/5.15.4/BePC-haiku/CORE/libperl.so . + cd /boot/common/lib; ln -s perl5/5.15.5/BePC-haiku/CORE/libperl.so . -Replace C<5.15.4> with your respective version of Perl. +Replace C<5.15.5> with your respective version of Perl. =head1 KNOWN PROBLEMS diff --git a/README.os2 b/README.os2 index d973b82748..a5f837f1ca 100644 --- a/README.os2 +++ b/README.os2 @@ -618,7 +618,7 @@ C<set PERLLIB_PREFIX> in F<Config.sys>, see L<"PERLLIB_PREFIX">. =item Additional Perl modules - unzip perl_ste.zip -d f:/perllib/lib/site_perl/5.15.4/ + unzip perl_ste.zip -d f:/perllib/lib/site_perl/5.15.5/ Same remark as above applies. Additionally, if this directory is not one of directories on @INC (and @INC is influenced by C<PERLLIB_PREFIX>), you diff --git a/README.vms b/README.vms index 383185cfd6..0fa3a30cbc 100644 --- a/README.vms +++ b/README.vms @@ -154,12 +154,12 @@ recent versions of vmstar (e.g. V3.4 or later). Contrary to advice provided with previous versions of Perl, do I<not> use the ODS-2 compatibility qualifier. Instead, use a command like the following: - vmstar -xvf perl-5^.15^.4.tar + vmstar -xvf perl-5^.15^.5.tar Then rename the top-level source directory like so: - set security/protection=(o:rwed) perl-5^.15^.4.dir - rename perl-5^.15^.4.dir perl-5_15_4.dir + set security/protection=(o:rwed) perl-5^.15^.5.dir + rename perl-5^.15^.5.dir perl-5_15_5.dir The reason for this last step is that while filenames with multiple dots are generally supported by Perl on VMS, I<directory> names with multiple diff --git a/cpan/CGI/Changes b/cpan/CGI/Changes index 7c1b734fa6..1c1b9c9435 100644 --- a/cpan/CGI/Changes +++ b/cpan/CGI/Changes @@ -1,3 +1,29 @@ +Version 3.58 Nov 11th, 2011 + + [DOCUMENTATION] + - Clarify that using query_string() only has defined behavior when using the GET method. (RT#60813) + +Version 3.57 Nov 9th, 2011 + [INTERNALS] + - test failure in t/fast.t introduced in 3.56 is fixed. (Thanks to zefram and chansen). + - Test::More requirement has been bumped to 0.98 + +Version 3.56 Nov 8th, 2011 + + [SECURITY] + Use public and documented FCGI.pm API in CGI::Fast + CGI::Fast was using an FCGI API that was deprecated and removed from + documentation more than ten years ago. Usage of this deprecated API with + FCGI >= 0.70 or FCGI <= 0.73 introduces a security issue. + <https://rt.cpan.org/Public/Bug/Display.html?id=68380> + <http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2011-2766> + (Thanks to chansen) + + [INTERNALS] + - tmp files are now cleaned up on VMS ( RT#69210, thanks to cberry@cpan.org ) + - Fixed test failure: done_testing() added to url.t (Thanks to Ryan Jendoubi) + - Clarify preferred bug submission location in docs, and note that Mark Stosberg + is the current maintainer. Version 3.55 June 3rd, 2011 diff --git a/cpan/CGI/lib/CGI.pm b/cpan/CGI/lib/CGI.pm index 1d2ed8bc10..65fdb59c66 100644 --- a/cpan/CGI/lib/CGI.pm +++ b/cpan/CGI/lib/CGI.pm @@ -20,7 +20,7 @@ use Carp 'croak'; # The revision is no longer being updated since moving to git. $CGI::revision = '$Id: CGI.pm,v 1.266 2009/07/30 16:32:34 lstein Exp $'; -$CGI::VERSION='3.55'; +$CGI::VERSION='3.58'; # HARD-CODED LOCATION FOR FILE UPLOAD TEMPORARY FILES. # UNCOMMENT THIS ONLY IF YOU KNOW WHAT YOU'RE DOING. @@ -386,7 +386,7 @@ sub new { # user is still holding any reference to them as well. sub DESTROY { my $self = shift; - if ($OS eq 'WINDOWS') { + if ($OS eq 'WINDOWS' || $OS eq 'VMS') { for my $href (values %{$self->{'.tmpfiles'}}) { $href->{hndl}->DESTROY if defined $href->{hndl}; $href->{name}->DESTROY if defined $href->{name}; @@ -5565,13 +5565,13 @@ place to put HTML extensions, such as colors and wallpaper patterns. =head2 ENDING THE HTML DOCUMENT: - print end_html + print $q->end_html; This ends an HTML document by printing the </body></html> tags. =head2 CREATING A SELF-REFERENCING URL THAT PRESERVES STATE INFORMATION: - $myself = self_url; + $myself = $q->self_url; print q(<a href="$myself">I'm talking to myself.</a>); self_url() will return a URL, that, when selected, will reinvoke @@ -5580,7 +5580,7 @@ useful when you want to jump around within the document using internal anchors but you don't want to disrupt the current contents of the form(s). Something like this will do the trick. - $myself = self_url; + $myself = $q->self_url; print "<a href=\"$myself#table1\">See table 1</a>"; print "<a href=\"$myself#table2\">See table 2</a>"; print "<a href=\"$myself#yourself\">See for yourself</a>"; @@ -5590,7 +5590,10 @@ method instead. You can also retrieve the unprocessed query string with query_string(): - $the_string = query_string; + $the_string = $q->query_string(); + +The behavior of calling query_string is currently undefined when the HTTP method is +something other than GET. =head2 OBTAINING THE SCRIPT'S URL @@ -7988,15 +7991,15 @@ available for your use: =head1 AUTHOR INFORMATION -The CGI.pm distribution is copyright 1995-2007, Lincoln D. Stein. It is -distributed under GPL and the Artistic License 2.0. +The CGI.pm distribution is copyright 1995-2007, Lincoln D. Stein. It is +distributed under GPL and the Artistic License 2.0. It is currently +maintained by Mark Stosberg with help from many contributors. -Address bug reports and comments to: lstein@cshl.org. When sending -bug reports, please provide the version of CGI.pm, the version of -Perl, the name and version of your Web server, and the name and -version of the operating system you are using. If the problem is even -remotely browser dependent, please provide information about the -affected browsers as well. +Address bug reports and comments to: https://rt.cpan.org/Public/Dist/Display.html?Queue=CGI.pm +When sending bug reports, please provide the version of CGI.pm, the version of +Perl, the name and version of your Web server, and the name and version of the +operating system you are using. If the problem is even remotely browser +dependent, please provide information about the affected browsers as well. =head1 CREDITS diff --git a/cpan/CGI/lib/CGI/Fast.pm b/cpan/CGI/lib/CGI/Fast.pm index e31dac3f50..288d854a9d 100644 --- a/cpan/CGI/lib/CGI/Fast.pm +++ b/cpan/CGI/lib/CGI/Fast.pm @@ -19,7 +19,7 @@ local $^W = 1; # wish, but if you redistribute a modified version, please attach a note # listing the modifications you have made. -$CGI::Fast::VERSION='1.08'; +$CGI::Fast::VERSION='1.09'; use CGI; use FCGI; @@ -43,27 +43,23 @@ sub save_request { # in this package variable. use vars qw($Ext_Request); BEGIN { - # If ENV{FCGI_SOCKET_PATH} is given, explicitly open the socket, - # and keep the request handle around from which to call Accept(). - if ($ENV{FCGI_SOCKET_PATH}) { - my $path = $ENV{FCGI_SOCKET_PATH}; - my $backlog = $ENV{FCGI_LISTEN_QUEUE} || 100; - my $socket = FCGI::OpenSocket( $path, $backlog ); - $Ext_Request = FCGI::Request( \*STDIN, \*STDOUT, \*STDERR, - \%ENV, $socket, 1 ); - } + # If ENV{FCGI_SOCKET_PATH} is given, explicitly open the socket. + if ($ENV{FCGI_SOCKET_PATH}) { + my $path = $ENV{FCGI_SOCKET_PATH}; + my $backlog = $ENV{FCGI_LISTEN_QUEUE} || 100; + my $socket = FCGI::OpenSocket( $path, $backlog ); + $Ext_Request = FCGI::Request( \*STDIN, \*STDOUT, \*STDERR, + \%ENV, $socket, 1 ); + } + else { + $Ext_Request = FCGI::Request(); + } } -# New is slightly different in that it calls FCGI's -# accept() method. sub new { my ($self, $initializer, @param) = @_; unless (defined $initializer) { - if ($Ext_Request) { - return undef unless $Ext_Request->Accept() >= 0; - } else { - return undef unless FCGI::accept() >= 0; - } + return undef unless $Ext_Request->Accept() >= 0; } CGI->_reset_globals; $self->_setup_symbols(@CGI::SAVED_SYMBOLS) if @CGI::SAVED_SYMBOLS; diff --git a/cpan/CGI/t/tmpdir.t b/cpan/CGI/t/tmpdir.t index 1407356a2f..6e3fcbd87d 100644 --- a/cpan/CGI/t/tmpdir.t +++ b/cpan/CGI/t/tmpdir.t @@ -37,4 +37,4 @@ isnt($CGITempFile::TMPDIRECTORY, $testdir, "unwritable \$ENV{TMPDIR} not overridden with an unwritable \$CGITempFile::TMPDIRECTORY"); } -END { for ($testdir, $testdir2) { chmod 0700, $_; rmdir; } } +END { rmdir for ($testdir, $testdir2) } diff --git a/cpan/CGI/t/url.t b/cpan/CGI/t/url.t index 9af1c0c055..6ca229801d 100644 --- a/cpan/CGI/t/url.t +++ b/cpan/CGI/t/url.t @@ -63,6 +63,7 @@ subtest 'rewrite_interactions' => sub { '$q->url(-rewrite=>1,-path=>1), with rewriting detected' ); is( $q->url(-rewrite=>0,-path=>0), 'http://example.com/real/cgi-bin/dispatch.cgi', '$q->url(-rewrite=>0,-path=>1), with rewriting detected' ); + done_testing(); }; diff --git a/cpan/CPANPLUS/lib/CPANPLUS.pm b/cpan/CPANPLUS/lib/CPANPLUS.pm index 6a37717d05..e5b04fd45d 100644 --- a/cpan/CPANPLUS/lib/CPANPLUS.pm +++ b/cpan/CPANPLUS/lib/CPANPLUS.pm @@ -13,7 +13,7 @@ BEGIN { use vars qw( @EXPORT @ISA $VERSION ); @EXPORT = qw( shell fetch get install ); @ISA = qw( Exporter ); - $VERSION = "0.9111"; #have to hardcode or cpan.org gets unhappy + $VERSION = "0.9112"; #have to hardcode or cpan.org gets unhappy } ### purely for backward compatibility, so we can call it from the commandline: diff --git a/cpan/CPANPLUS/lib/CPANPLUS/Config.pm b/cpan/CPANPLUS/lib/CPANPLUS/Config.pm index 321e0659f2..ead0ae7a2b 100644 --- a/cpan/CPANPLUS/lib/CPANPLUS/Config.pm +++ b/cpan/CPANPLUS/lib/CPANPLUS/Config.pm @@ -18,6 +18,7 @@ use File::Basename qw[dirname]; use IPC::Cmd qw[can_run]; use Locale::Maketext::Simple Class => 'CPANPLUS', Style => 'gettext'; use Module::Load::Conditional qw[check_install]; +use version; =pod @@ -350,7 +351,7 @@ and L<CPANPLUS::Dist::Build> are available. $Conf->{'conf'}->{'prefer_makefile'} = ( $] >= 5.010001 or ( check_install( module => 'Module::Build', version => '0.32' ) and - check_install( module => INSTALLER_BUILD, version => '0.24' ) ) + check_install( module => INSTALLER_BUILD, version => '0.60' ) ) ? 0 : 1 ); =item prereqs @@ -589,6 +590,8 @@ remains empty if you do not require super user permissions to install. =item perlwrapper +B<DEPRECATED> + A string holding the path to the C<cpanp-run-perl> utility bundled with CPANPLUS, which is used to enable autoflushing in spawned processes. @@ -675,6 +678,12 @@ with CPANPLUS, which is used to enable autoflushing in spawned processes. ### we should have a $path by now ideally, if so return it return $path if defined $path; + ### CPANPLUS::Dist::MM doesn't require this anymore + ### but CPANPLUS::Dist::Build might if it is less than 0.60 + my $cpdb = check_install( module => INSTALLER_BUILD ); + return '' unless + $cpdb and eval { version->parse($cpdb->{version}) < version->parse('0.60') }; + ### if not, warn about it and give sensible default. ### XXX try to be a no-op instead then.. ### cross your fingers... diff --git a/cpan/CPANPLUS/lib/CPANPLUS/Dist/MM.pm b/cpan/CPANPLUS/lib/CPANPLUS/Dist/MM.pm index de3d138235..4ef9fc1933 100644 --- a/cpan/CPANPLUS/lib/CPANPLUS/Dist/MM.pm +++ b/cpan/CPANPLUS/lib/CPANPLUS/Dist/MM.pm @@ -353,20 +353,8 @@ sub prepare { ### in @INC, stopping us from resolving dependencies on CPANPLUS ### at bootstrap time properly. - ### XXX this fails under ipc::run due to the extra quotes, - ### but it works in ipc::open3. however, ipc::open3 doesn't work - ### on win32/cygwin. XXX TODO get a windows box and sort this out - # my $cmd = qq[$perl -MEnglish -le ] . - # QUOTE_PERL_ONE_LINER->( - # qq[\$OUTPUT_AUTOFLUSH++,do(q($makefile_pl))] - # ) - # . $mmflags; - - # my $flush = OPT_AUTOFLUSH; - # my $cmd = "$perl $flush $makefile_pl $mmflags"; - - my $run_perl = $conf->get_program('perlwrapper'); - my $cmd = [$perl, $run_perl, $makefile_pl, @mmflags]; + my @run_perl = ( '-e', PERL_WRAPPER ); + my $cmd = [$perl, @run_perl, $makefile_pl, @mmflags]; ### set ENV var to tell underlying code this is what we're ### executing. diff --git a/cpan/CPANPLUS/lib/CPANPLUS/Internals.pm b/cpan/CPANPLUS/lib/CPANPLUS/Internals.pm index f070b1452d..b52cbf9204 100644 --- a/cpan/CPANPLUS/lib/CPANPLUS/Internals.pm +++ b/cpan/CPANPLUS/lib/CPANPLUS/Internals.pm @@ -42,7 +42,7 @@ use vars qw[@ISA $VERSION]; CPANPLUS::Internals::Report ]; -$VERSION = "0.9111"; +$VERSION = "0.9112"; =pod diff --git a/cpan/CPANPLUS/lib/CPANPLUS/Internals/Constants.pm b/cpan/CPANPLUS/lib/CPANPLUS/Internals/Constants.pm index 556fb349df..bd48a1db8c 100644 --- a/cpan/CPANPLUS/lib/CPANPLUS/Internals/Constants.pm +++ b/cpan/CPANPLUS/lib/CPANPLUS/Internals/Constants.pm @@ -332,6 +332,7 @@ use constant CALLING_FUNCTION return join '::', (caller(2+$lvl))[3] }; use constant PERL_CORE => 'perl'; +use constant PERL_WRAPPER => 'use strict; BEGIN { my $old = select STDERR; $|++; select $old; $|++; $0 = shift(@ARGV); my $rv = do($0); die $@ if $@; }'; use constant STORABLE_EXT => '.stored'; use constant GET_XS_FILES => sub { my $dir = $_[0] or return; diff --git a/cpan/CPANPLUS/lib/CPANPLUS/Module.pm b/cpan/CPANPLUS/lib/CPANPLUS/Module.pm index bdfe1ae5fa..b9ddf408b5 100644 --- a/cpan/CPANPLUS/lib/CPANPLUS/Module.pm +++ b/cpan/CPANPLUS/lib/CPANPLUS/Module.pm @@ -688,7 +688,7 @@ sub get_installer_type { if( $type and $type eq INSTALLER_BUILD and ( not CPANPLUS::Dist->has_dist_type( INSTALLER_BUILD ) or not $cb->module_tree( INSTALLER_BUILD ) - ->is_uptodate( version => '0.24' ) + ->is_uptodate( version => '0.60' ) ) ) { ### XXX this is for recording purposes only. We *have* to install @@ -696,7 +696,7 @@ sub get_installer_type { ### saying 'no such dist type'; ### XXX duplicated from CPANPLUS::Selfupdate. fix somehow? my $href = $self->status->configure_requires || {}; - my $deps = { INSTALLER_BUILD, '0.24', %$href }; + my $deps = { INSTALLER_BUILD, '0.60', %$href }; $self->status->configure_requires( $deps ); diff --git a/cpan/CPANPLUS/lib/CPANPLUS/Selfupdate.pm b/cpan/CPANPLUS/lib/CPANPLUS/Selfupdate.pm index e27f98b09c..654f48fc35 100644 --- a/cpan/CPANPLUS/lib/CPANPLUS/Selfupdate.pm +++ b/cpan/CPANPLUS/lib/CPANPLUS/Selfupdate.pm @@ -69,7 +69,7 @@ CPANPLUS::Selfupdate 'Parse::CPAN::Meta' => '1.4200', # config_requires support 'ExtUtils::Install' => '1.42', # uninstall outside @INC ( check_install( module => 'CPANPLUS::Dist::Build' ) - ? ( 'CPANPLUS::Dist::Build' => '0.24' ) : () ), + ? ( 'CPANPLUS::Dist::Build' => '0.60' ) : () ), }, features => { @@ -82,7 +82,7 @@ CPANPLUS::Selfupdate my $cb = shift; $cb->configure_object->get_conf('prefer_makefile') ? { } - : { 'CPANPLUS::Dist::Build' => '0.24' }; + : { 'CPANPLUS::Dist::Build' => '0.60' }; }, sub { return 1 }, # always enabled ], diff --git a/cpan/CPANPLUS/lib/CPANPLUS/Shell/Default.pm b/cpan/CPANPLUS/lib/CPANPLUS/Shell/Default.pm index 2350615c21..ef9a927d45 100644 --- a/cpan/CPANPLUS/lib/CPANPLUS/Shell/Default.pm +++ b/cpan/CPANPLUS/lib/CPANPLUS/Shell/Default.pm @@ -26,7 +26,7 @@ local $Data::Dumper::Indent = 1; # for dumpering from ! BEGIN { use vars qw[ $VERSION @ISA ]; @ISA = qw[ CPANPLUS::Shell::_Base::ReadLine ]; - $VERSION = "0.9111"; + $VERSION = "0.9112"; } load CPANPLUS::Shell; diff --git a/cpan/Compress-Raw-Bzip2/Changes b/cpan/Compress-Raw-Bzip2/Changes index 3206ef24d9..06eb11bbda 100644 --- a/cpan/Compress-Raw-Bzip2/Changes +++ b/cpan/Compress-Raw-Bzip2/Changes @@ -1,6 +1,19 @@ CHANGES ------- + 2.042 17 November 2011 + + * No Changes + + 2.040 28 October 2011 + + * No Changes + + 2.039 28 October 2011 + + * croak if attempt to freeze/thaw compression object + [RT #69985] + 2.037 22 June 2011 * No Changes diff --git a/cpan/Compress-Raw-Bzip2/README b/cpan/Compress-Raw-Bzip2/README index 83c0824a15..fe09adaebd 100644 --- a/cpan/Compress-Raw-Bzip2/README +++ b/cpan/Compress-Raw-Bzip2/README @@ -1,9 +1,9 @@ Compress-Raw-Bzip2 - Version 2.037 + Version 2.042 - 22nd June 2011 + 17th November 2011 Copyright (c) 2005-2011 Paul Marquess. All rights reserved. This program is free software; you can redistribute it @@ -164,7 +164,7 @@ To help me help you, I need all of the following information: If you haven't installed Compress-Raw-Bzip2 then search Compress::Raw::Bzip2.pm for a line like this: - $VERSION = "2.037" ; + $VERSION = "2.042" ; c. The version of bzip2 you have used. If you have successfully installed Compress-Raw-Bzip2, this one-liner diff --git a/cpan/Compress-Raw-Bzip2/lib/Compress/Raw/Bzip2.pm b/cpan/Compress-Raw-Bzip2/lib/Compress/Raw/Bzip2.pm index 14e225e8fb..807ab83bcd 100644 --- a/cpan/Compress-Raw-Bzip2/lib/Compress/Raw/Bzip2.pm +++ b/cpan/Compress-Raw-Bzip2/lib/Compress/Raw/Bzip2.pm @@ -12,7 +12,7 @@ use Carp ; use bytes ; our ($VERSION, $XS_VERSION, @ISA, @EXPORT, $AUTOLOAD); -$VERSION = '2.037'; +$VERSION = '2.042'; $XS_VERSION = $VERSION; $VERSION = eval $VERSION; @@ -91,6 +91,31 @@ or do { # return wantarray ? ($obj, $status) : $obj; #} +sub Compress::Raw::Bzip2::STORABLE_freeze +{ + my $type = ref shift; + croak "Cannot freeze $type object\n"; +} + +sub Compress::Raw::Bzip2::STORABLE_thaw +{ + my $type = ref shift; + croak "Cannot thaw $type object\n"; +} + +sub Compress::Raw::Bunzip2::STORABLE_freeze +{ + my $type = ref shift; + croak "Cannot freeze $type object\n"; +} + +sub Compress::Raw::Bunzip2::STORABLE_thaw +{ + my $type = ref shift; + croak "Cannot thaw $type object\n"; +} + + package Compress::Raw::Bzip2; 1; diff --git a/cpan/Compress-Raw-Bzip2/t/000prereq.t b/cpan/Compress-Raw-Bzip2/t/000prereq.t index 8122a5e8d6..dc265c9229 100644 --- a/cpan/Compress-Raw-Bzip2/t/000prereq.t +++ b/cpan/Compress-Raw-Bzip2/t/000prereq.t @@ -19,7 +19,7 @@ BEGIN if eval { require Test::NoWarnings ; import Test::NoWarnings; 1 }; - my $VERSION = '2.037'; + my $VERSION = '2.042'; my @NAMES = qw( ); diff --git a/cpan/Compress-Raw-Zlib/Changes b/cpan/Compress-Raw-Zlib/Changes index 566b064905..6e86f18693 100644 --- a/cpan/Compress-Raw-Zlib/Changes +++ b/cpan/Compress-Raw-Zlib/Changes @@ -1,6 +1,19 @@ CHANGES ------- + 2.042 17 November 2011 + + * No Changes + + 2.040 28 October 2011 + + * No Changes + + 2.039 28 October 2011 + + * croak if attempt to freeze/thaw compression object + [RT #69985] + 2.037 22 June 2011 * No Changes diff --git a/cpan/Compress-Raw-Zlib/README b/cpan/Compress-Raw-Zlib/README index db2043cc5f..1e7c4aef5d 100644 --- a/cpan/Compress-Raw-Zlib/README +++ b/cpan/Compress-Raw-Zlib/README @@ -1,9 +1,9 @@ Compress-Raw-Zlib - Version 2.037 + Version 2.042 - 22nd June 2011 + 17th November 2011 Copyright (c) 2005-2011 Paul Marquess. All rights reserved. This program is free software; you can redistribute it @@ -355,7 +355,7 @@ To help me help you, I need all of the following information: If you haven't installed Compress-Raw-Zlib then search Compress::Raw::Zlib.pm for a line like this: - $VERSION = "2.037" ; + $VERSION = "2.042" ; c. The version of zlib you have used. If you have successfully installed Compress-Raw-Zlib, this one-liner diff --git a/cpan/Compress-Raw-Zlib/lib/Compress/Raw/Zlib.pm b/cpan/Compress-Raw-Zlib/lib/Compress/Raw/Zlib.pm index 759254523f..853e2ed691 100644 --- a/cpan/Compress-Raw-Zlib/lib/Compress/Raw/Zlib.pm +++ b/cpan/Compress-Raw-Zlib/lib/Compress/Raw/Zlib.pm @@ -13,7 +13,7 @@ use warnings ; use bytes ; our ($VERSION, $XS_VERSION, @ISA, @EXPORT, $AUTOLOAD); -$VERSION = '2.037'; +$VERSION = '2.042'; $XS_VERSION = $VERSION; $VERSION = eval $VERSION; @@ -383,6 +383,19 @@ sub Compress::Raw::Zlib::Deflate::new } +sub Compress::Raw::Zlib::deflateStream::STORABLE_freeze +{ + my $type = ref shift; + croak "Cannot freeze $type object\n"; +} + +sub Compress::Raw::Zlib::deflateStream::STORABLE_thaw +{ + my $type = ref shift; + croak "Cannot thaw $type object\n"; +} + + sub Compress::Raw::Zlib::Inflate::new { my $pkg = shift ; @@ -420,6 +433,18 @@ sub Compress::Raw::Zlib::Inflate::new $got->value('Dictionary')) ; } +sub Compress::Raw::Zlib::inflateStream::STORABLE_freeze +{ + my $type = ref shift; + croak "Cannot freeze $type object\n"; +} + +sub Compress::Raw::Zlib::inflateStream::STORABLE_thaw +{ + my $type = ref shift; + croak "Cannot thaw $type object\n"; +} + sub Compress::Raw::Zlib::InflateScan::new { my $pkg = shift ; diff --git a/cpan/Digest-SHA/Changes b/cpan/Digest-SHA/Changes index 80529d3c2a..b6c056f0ac 100644 --- a/cpan/Digest-SHA/Changes +++ b/cpan/Digest-SHA/Changes @@ -1,5 +1,14 @@ Revision history for Perl extension Digest::SHA. +5.63 Tue Nov 8 02:36:42 MST 2011 + - added code to allow very large data inputs all at once + -- previously limited to several hundred MB at a time + -- many thanks to Thomas Drugeon for his elegant patch + - removed outdated reference URLs from several test scripts + -- these URLs aren't essential, and often go stale + -- thanks to Leon Brocard for spotting this + -- ref. rt.cpan.org #68740 + 5.62 Sat May 14 04:00:34 MST 2011 - removed unnecessary loading of MIME::Base64 module -- thanks to dolmen for pointing this out diff --git a/cpan/Digest-SHA/README b/cpan/Digest-SHA/README index e83311f859..8498bb34fb 100644 --- a/cpan/Digest-SHA/README +++ b/cpan/Digest-SHA/README @@ -1,4 +1,4 @@ -Digest::SHA version 5.62 +Digest::SHA version 5.63 ======================== Digest::SHA is a complete implementation of the NIST Secure Hash diff --git a/cpan/Digest-SHA/SHA.xs b/cpan/Digest-SHA/SHA.xs index 7088a33bf6..8124a5de70 100644 --- a/cpan/Digest-SHA/SHA.xs +++ b/cpan/Digest-SHA/SHA.xs @@ -20,6 +20,8 @@ PROTOTYPES: ENABLE #define INT2PTR(p, i) (p) (i) #endif +#define MAX_WRITE_SIZE 16384 + int shaclose(s) SHA * s @@ -86,6 +88,11 @@ PPCODE: XSRETURN_UNDEF; for (i = 0; i < items; i++) { data = (unsigned char *) (SvPV(ST(i), len)); + while (len > MAX_WRITE_SIZE) { + shawrite(data, MAX_WRITE_SIZE << 3, state); + data += MAX_WRITE_SIZE; + len -= MAX_WRITE_SIZE; + } shawrite(data, len << 3, state); } shafinish(state); @@ -139,6 +146,11 @@ PPCODE: XSRETURN_UNDEF; for (i = 0; i < items - 1; i++) { data = (unsigned char *) (SvPV(ST(i), len)); + while (len > MAX_WRITE_SIZE) { + hmacwrite(data, MAX_WRITE_SIZE << 3, state); + data += MAX_WRITE_SIZE; + len -= MAX_WRITE_SIZE; + } hmacwrite(data, len << 3, state); } hmacfinish(state); @@ -182,6 +194,11 @@ PPCODE: state = INT2PTR(SHA *, SvIV(SvRV(SvRV(self)))); for (i = 1; i < items; i++) { data = (unsigned char *) (SvPV(ST(i), len)); + while (len > MAX_WRITE_SIZE) { + shawrite(data, MAX_WRITE_SIZE << 3, state); + data += MAX_WRITE_SIZE; + len -= MAX_WRITE_SIZE; + } shawrite(data, len << 3, state); } XSRETURN(1); diff --git a/cpan/Digest-SHA/lib/Digest/SHA.pm b/cpan/Digest-SHA/lib/Digest/SHA.pm index 0c649c087c..ce652b63d1 100644 --- a/cpan/Digest-SHA/lib/Digest/SHA.pm +++ b/cpan/Digest-SHA/lib/Digest/SHA.pm @@ -7,7 +7,7 @@ use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); use Fcntl; use integer; -$VERSION = '5.62'; +$VERSION = '5.63'; require Exporter; require DynaLoader; @@ -669,6 +669,7 @@ The author is particularly grateful to Chris Carey Alexandr Ciornii Jim Doble + Thomas Drugeon Julius Duque Jeffrey Friedl Robert Gilmour diff --git a/cpan/Digest-SHA/shasum b/cpan/Digest-SHA/shasum index 9f3ca4f22c..4dd2572fca 100644 --- a/cpan/Digest-SHA/shasum +++ b/cpan/Digest-SHA/shasum @@ -4,8 +4,8 @@ ## ## Copyright (C) 2003-2011 Mark Shelor, All Rights Reserved ## - ## Version: 5.62 - ## Sat May 14 04:00:34 MST 2011 + ## Version: 5.63 + ## Tue Nov 8 02:36:42 MST 2011 ## shasum SYNOPSIS adapted from GNU Coreutils sha1sum. ## Include an "-a" option for algorithm selection, and a @@ -85,7 +85,7 @@ use strict; use Fcntl; use Getopt::Long; -my $VERSION = "5.62"; +my $VERSION = "5.63"; ## Try to use Digest::SHA. If not installed, use the slower diff --git a/cpan/Digest-SHA/src/hmac.c b/cpan/Digest-SHA/src/hmac.c index 05c8a371c4..35fd887f76 100644 --- a/cpan/Digest-SHA/src/hmac.c +++ b/cpan/Digest-SHA/src/hmac.c @@ -5,8 +5,8 @@ * * Copyright (C) 2003-2011 Mark Shelor, All Rights Reserved * - * Version: 5.62 - * Sat May 14 04:00:34 MST 2011 + * Version: 5.63 + * Tue Nov 8 02:36:42 MST 2011 * */ diff --git a/cpan/Digest-SHA/src/hmac.h b/cpan/Digest-SHA/src/hmac.h index 626be6e968..d08bd9a45c 100644 --- a/cpan/Digest-SHA/src/hmac.h +++ b/cpan/Digest-SHA/src/hmac.h @@ -5,8 +5,8 @@ * * Copyright (C) 2003-2011 Mark Shelor, All Rights Reserved * - * Version: 5.62 - * Sat May 14 04:00:34 MST 2011 + * Version: 5.63 + * Tue Nov 8 02:36:42 MST 2011 * */ diff --git a/cpan/Digest-SHA/src/sha.c b/cpan/Digest-SHA/src/sha.c index 7020c2ee02..2cd0fa319e 100644 --- a/cpan/Digest-SHA/src/sha.c +++ b/cpan/Digest-SHA/src/sha.c @@ -5,8 +5,8 @@ * * Copyright (C) 2003-2011 Mark Shelor, All Rights Reserved * - * Version: 5.62 - * Sat May 14 04:00:34 MST 2011 + * Version: 5.63 + * Tue Nov 8 02:36:42 MST 2011 * */ diff --git a/cpan/Digest-SHA/src/sha.h b/cpan/Digest-SHA/src/sha.h index fc074325c0..ed260c6cfe 100644 --- a/cpan/Digest-SHA/src/sha.h +++ b/cpan/Digest-SHA/src/sha.h @@ -5,8 +5,8 @@ * * Copyright (C) 2003-2011 Mark Shelor, All Rights Reserved * - * Version: 5.62 - * Sat May 14 04:00:34 MST 2011 + * Version: 5.63 + * Tue Nov 8 02:36:42 MST 2011 * */ diff --git a/cpan/Digest-SHA/t/gg.t b/cpan/Digest-SHA/t/gg.t index 7f973ef661..6ca8f7fb9c 100644 --- a/cpan/Digest-SHA/t/gg.t +++ b/cpan/Digest-SHA/t/gg.t @@ -1,6 +1,4 @@ # Test against short bitwise vectors from Jim Gillogly and Francois Grieu -# -# http://www.chiark.greenend.org.uk/pipermail/ukcrypto/1999-February/003538.html use strict; diff --git a/cpan/Digest-SHA/t/gglong.t b/cpan/Digest-SHA/t/gglong.t index 571048535e..12f7e5df81 100644 --- a/cpan/Digest-SHA/t/gglong.t +++ b/cpan/Digest-SHA/t/gglong.t @@ -1,6 +1,4 @@ # Test against long bitwise vectors from Jim Gillogly and Francois Grieu -# -# http://www.chiark.greenend.org.uk/pipermail/ukcrypto/1999-February/003538.html use strict; use FileHandle; diff --git a/cpan/Digest-SHA/t/nistbit.t b/cpan/Digest-SHA/t/nistbit.t index 9068815290..91dc2804d9 100644 --- a/cpan/Digest-SHA/t/nistbit.t +++ b/cpan/Digest-SHA/t/nistbit.t @@ -1,6 +1,4 @@ # Test against SHA-1 Sample Vectors from NIST -# -# ref: http://www.nsrl.nist.gov/testdata/ use strict; diff --git a/cpan/Digest-SHA/t/nistbyte.t b/cpan/Digest-SHA/t/nistbyte.t index 6d0c9ef070..ce133d6467 100644 --- a/cpan/Digest-SHA/t/nistbyte.t +++ b/cpan/Digest-SHA/t/nistbyte.t @@ -1,6 +1,4 @@ # Test against SHA-1 Sample Vectors from NIST -# -# ref: http://www.nsrl.nist.gov/testdata/ use strict; diff --git a/cpan/ExtUtils-MakeMaker/Changes b/cpan/ExtUtils-MakeMaker/Changes index 27910e6e66..d30b8580af 100644 --- a/cpan/ExtUtils-MakeMaker/Changes +++ b/cpan/ExtUtils-MakeMaker/Changes @@ -1,3 +1,29 @@ +6.63_02 Tue Nov 1 17:02:40 PDT 2011 + Test Fixes + * Inform BSDPAN (on FreeBSD) to not register modules installed while + testing. [rt.cpan.org 70232] + * Fix t/echo.t test on Win32. [rt.cpan.org 72097] + * Fix t/echo.t test on VMS. (Craig A. Berry) + * Fix t/Win32.t to handle the change to init_others(). [rt.cpan.org 72096] + + Bug Fixes + * Fix VMS's echo() for the interface changes. (Craig A. Berry) + * Fix VMS's dollar sign escaping. (Craig A. Berry) + + Misc + * my::bundle::copy_bundles() will only load File::Copy::Recursive if + it needs it, allowing vendors packaging MakeMaker to just delete + bundled/* + * Bundling can be overridden by setting the BUILDING_AS_PACKAGE + environment variable. This makes life easier for vendor packagers. + * Stripped the Windows newlines off Parse::CPAN::Meta to avoid + confusing old versions of Module::Signature. + + Docs + * README.packaging explains how to package MakeMaker + * bundled/README explains what the bundled directory is about. + + 6.63_01 Sun Oct 23 16:57:24 PDT 2011 Bug Fixes * Stray $ in the PPD and meta files (for example, from the ABSTRACT) diff --git a/cpan/ExtUtils-MakeMaker/MANIFEST b/cpan/ExtUtils-MakeMaker/MANIFEST index e42011b183..fe47e55b18 100644 --- a/cpan/ExtUtils-MakeMaker/MANIFEST +++ b/cpan/ExtUtils-MakeMaker/MANIFEST @@ -19,6 +19,7 @@ bundled/JSON-PP-Compat5006/JSON/PP/Compat5006.pm bundled/JSON-PP/JSON/PP.pm bundled/JSON-PP/JSON/PP/Boolean.pm bundled/Parse-CPAN-Meta/Parse/CPAN/Meta.pm +bundled/README bundled/Scalar-List-Utils/List/Util.pm bundled/Scalar-List-Utils/List/Util/PP.pm bundled/Scalar-List-Utils/Scalar/Util.pm @@ -65,6 +66,7 @@ my/bundles.pm NOTES PATCHING README +README.packaging t/00compile.t t/arch_check.t t/backwards.t diff --git a/cpan/ExtUtils-MakeMaker/README.packaging b/cpan/ExtUtils-MakeMaker/README.packaging new file mode 100644 index 0000000000..2e2d2952b8 --- /dev/null +++ b/cpan/ExtUtils-MakeMaker/README.packaging @@ -0,0 +1,23 @@ +If you wish to package MakeMaker in a binary package, here's some tips. + +tl;dr version: + +1a) Set the BUILDING_AS_PACKAGE environment variable to a true value. +OR +1b) Set the $BUILDING_AS_PACKAGE variable in the Makefile.PL to true. +2) Package normally, but watch out for dependency loops. + +MakeMaker cannot have any dependencies, everything depends on it and +that would be a dependency loop. It instead bundles pre-built copies +of all its non-core dependencies in the bundled/ directory. It adds +them to itself if they're not already installed. + +This can confuse packagers, it makes it look like MakeMaker contains a +lot more modules than it really does and causes conflicts. + +You can tell MakeMaker not to use it's bundles and instead declare the +dependencies normally. This is done either by setting the +BUILDING_AS_PACKAGE environment variable to true or by patching the +Makefile.PL and setting $BUILDING_AS_PACKAGE to true. On the down +side, there will be dependency loops which your packaging system will +have to resolve. diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Command/MM.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Command/MM.pm index b38bfc3dd3..8d82ee49b1 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Command/MM.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Command/MM.pm @@ -10,7 +10,7 @@ our @ISA = qw(Exporter); our @EXPORT = qw(test_harness pod2man perllocal_install uninstall warn_if_old_packlist); -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; my $Is_VMS = $^O eq 'VMS'; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Liblist.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Liblist.pm index b8a6c9114a..a2ef4268d1 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Liblist.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Liblist.pm @@ -2,7 +2,7 @@ package ExtUtils::Liblist; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; use File::Spec; require ExtUtils::Liblist::Kid; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Liblist/Kid.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Liblist/Kid.pm index 6b7d9fe735..eddcc72c74 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Liblist/Kid.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Liblist/Kid.pm @@ -11,7 +11,7 @@ use 5.006; use strict; use warnings; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; use ExtUtils::MakeMaker::Config; use Cwd 'cwd'; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM.pm index 83fbe1c8a3..418eb190dd 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM.pm @@ -3,7 +3,7 @@ package ExtUtils::MM; use strict; use ExtUtils::MakeMaker::Config; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::Liblist; require ExtUtils::MakeMaker; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_AIX.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_AIX.pm index c9cf24ff45..0b0547ff52 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_AIX.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_AIX.pm @@ -1,7 +1,7 @@ package ExtUtils::MM_AIX; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::MM_Unix; our @ISA = qw(ExtUtils::MM_Unix); diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Any.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Any.pm index 7791107b3d..fd856129af 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Any.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Any.pm @@ -1,7 +1,7 @@ package ExtUtils::MM_Any; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; use Carp; use File::Spec; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_BeOS.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_BeOS.pm index 65250c30c1..a31cf2320c 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_BeOS.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_BeOS.pm @@ -26,7 +26,7 @@ require ExtUtils::MM_Any; require ExtUtils::MM_Unix; our @ISA = qw( ExtUtils::MM_Any ExtUtils::MM_Unix ); -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; =item os_flavor diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Cygwin.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Cygwin.pm index 6586d823f1..de6f13085e 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Cygwin.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Cygwin.pm @@ -9,7 +9,7 @@ require ExtUtils::MM_Unix; require ExtUtils::MM_Win32; our @ISA = qw( ExtUtils::MM_Unix ); -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; =head1 NAME diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_DOS.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_DOS.pm index 105023cbfa..d902038412 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_DOS.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_DOS.pm @@ -2,7 +2,7 @@ package ExtUtils::MM_DOS; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::MM_Any; require ExtUtils::MM_Unix; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Darwin.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Darwin.pm index ab41435ea2..6d78157fe5 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Darwin.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Darwin.pm @@ -7,7 +7,7 @@ BEGIN { our @ISA = qw( ExtUtils::MM_Unix ); } -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; =head1 NAME diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_MacOS.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_MacOS.pm index dbb26af4b7..17bcde790e 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_MacOS.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_MacOS.pm @@ -2,7 +2,7 @@ package ExtUtils::MM_MacOS; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; sub new { die <<'UNSUPPORTED'; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_NW5.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_NW5.pm index e225096586..be1e155557 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_NW5.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_NW5.pm @@ -22,7 +22,7 @@ use strict; use ExtUtils::MakeMaker::Config; use File::Basename; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::MM_Win32; our @ISA = qw(ExtUtils::MM_Win32); diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_OS2.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_OS2.pm index a07a4fb5db..f4ebd1e738 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_OS2.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_OS2.pm @@ -5,7 +5,7 @@ use strict; use ExtUtils::MakeMaker qw(neatvalue); use File::Spec; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::MM_Any; require ExtUtils::MM_Unix; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_QNX.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_QNX.pm index 0d314eb391..9d1f29223d 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_QNX.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_QNX.pm @@ -1,7 +1,7 @@ package ExtUtils::MM_QNX; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::MM_Unix; our @ISA = qw(ExtUtils::MM_Unix); diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_UWIN.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_UWIN.pm index d2932f1816..d4b9370bd1 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_UWIN.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_UWIN.pm @@ -1,7 +1,7 @@ package ExtUtils::MM_UWIN; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::MM_Unix; our @ISA = qw(ExtUtils::MM_Unix); diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Unix.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Unix.pm index a44582895b..b537393d3e 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Unix.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Unix.pm @@ -15,7 +15,7 @@ use ExtUtils::MakeMaker qw($Verbose neatvalue); # If we make $VERSION an our variable parse_version() breaks use vars qw($VERSION); -$VERSION = '6.63_01'; +$VERSION = '6.63_02'; $VERSION = eval $VERSION; require ExtUtils::MM_Any; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_VMS.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_VMS.pm index a63a2a1855..c204abcb2d 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_VMS.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_VMS.pm @@ -15,7 +15,7 @@ BEGIN { use File::Basename; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::MM_Any; require ExtUtils::MM_Unix; @@ -1771,13 +1771,21 @@ native Write command instead. Besides, its faster. =cut sub echo { - my($self, $text, $file, $appending) = @_; - $appending ||= 0; + my($self, $text, $file, $opts) = @_; - my $opencmd = $appending ? 'Open/Append' : 'Open/Write'; + # Compatibility with old options + if( !ref $opts ) { + my $append = $opts; + $opts = { append => $append || 0 }; + } + my $opencmd = $opts->{append} ? 'Open/Append' : 'Open/Write'; + + $opts->{allow_variables} = 0 unless defined $opts->{allow_variables}; + + my $ql_opts = { allow_variables => $opts->{allow_variables} }; my @cmds = ("\$(NOECHO) $opencmd MMECHOFILE $file "); - push @cmds, map { '$(NOECHO) Write MMECHOFILE '.$self->quote_literal($_) } + push @cmds, map { '$(NOECHO) Write MMECHOFILE '.$self->quote_literal($_, $ql_opts) } split /\n/, $text; push @cmds, '$(NOECHO) Close MMECHOFILE'; return @cmds; @@ -1801,6 +1809,37 @@ sub quote_literal { return qq{"$text"}; } +=item escape_dollarsigns + +Quote, don't escape. + +=cut + +sub escape_dollarsigns { + my($self, $text) = @_; + + # Quote dollar signs which are not starting a variable + $text =~ s{\$ (?!\() }{"\$"}gx; + + return $text; +} + + +=item escape_all_dollarsigns + +Quote, don't escape. + +=cut + +sub escape_all_dollarsigns { + my($self, $text) = @_; + + # Quote dollar signs + $text =~ s{\$}{"\$\"}gx; + + return $text; +} + =item escape_newlines =cut diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_VOS.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_VOS.pm index 7afc0db0cc..cdc6659b7f 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_VOS.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_VOS.pm @@ -1,7 +1,7 @@ package ExtUtils::MM_VOS; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::MM_Unix; our @ISA = qw(ExtUtils::MM_Unix); diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Win32.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Win32.pm index 4592b1e833..43ae9a4dd5 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Win32.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Win32.pm @@ -27,7 +27,7 @@ use ExtUtils::MakeMaker qw( neatvalue ); require ExtUtils::MM_Any; require ExtUtils::MM_Unix; our @ISA = qw( ExtUtils::MM_Any ExtUtils::MM_Unix ); -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; $ENV{EMXSHELL} = 'sh'; # to run `commands` diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Win95.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Win95.pm index 67ca842798..694655c1ba 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Win95.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_Win95.pm @@ -2,7 +2,7 @@ package ExtUtils::MM_Win95; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require ExtUtils::MM_Win32; our @ISA = qw(ExtUtils::MM_Win32); diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MY.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MY.pm index 7051e3757f..92d4da0a4b 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MY.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MY.pm @@ -3,7 +3,7 @@ package ExtUtils::MY; use strict; require ExtUtils::MM; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; our @ISA = qw(ExtUtils::MM); { diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker.pm index 4f45b4b6a5..3885ab00f1 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker.pm @@ -18,7 +18,7 @@ our @Overridable; my @Prepend_parent; my %Recognized_Att_Keys; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; $VERSION = eval $VERSION; # Emulate something resembling CVS $Revision$ diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/Config.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/Config.pm index b35472798a..872f8c181c 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/Config.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/Config.pm @@ -2,7 +2,7 @@ package ExtUtils::MakeMaker::Config; use strict; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; use Config (); diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/FAQ.pod b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/FAQ.pod index 7b51bf7725..675c95fc11 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/FAQ.pod +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/FAQ.pod @@ -1,6 +1,6 @@ package ExtUtils::MakeMaker::FAQ; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; 1; __END__ diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/Tutorial.pod b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/Tutorial.pod index 72884a648b..5120271979 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/Tutorial.pod +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker/Tutorial.pod @@ -1,6 +1,6 @@ package ExtUtils::MakeMaker::Tutorial; -our $VERSION = 6.63_01; +our $VERSION = 6.63_02; =head1 NAME diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Mkbootstrap.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Mkbootstrap.pm index 1b8fc20baf..b5f19ea95c 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Mkbootstrap.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Mkbootstrap.pm @@ -3,7 +3,7 @@ package ExtUtils::Mkbootstrap; # There's just too much Dynaloader incest here to turn on strict vars. use strict 'refs'; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; require Exporter; our @ISA = ('Exporter'); diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Mksymlists.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Mksymlists.pm index 495424eca5..f086085592 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Mksymlists.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/Mksymlists.pm @@ -10,7 +10,7 @@ use Config; our @ISA = qw(Exporter); our @EXPORT = qw(&Mksymlists); -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; sub Mksymlists { my(%spec) = @_; diff --git a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/testlib.pm b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/testlib.pm index 3b48232dd1..44832ee353 100644 --- a/cpan/ExtUtils-MakeMaker/lib/ExtUtils/testlib.pm +++ b/cpan/ExtUtils-MakeMaker/lib/ExtUtils/testlib.pm @@ -3,7 +3,7 @@ package ExtUtils::testlib; use strict; use warnings; -our $VERSION = '6.63_01'; +our $VERSION = '6.63_02'; use Cwd; use File::Spec; diff --git a/cpan/ExtUtils-MakeMaker/t/MM_Win32.t b/cpan/ExtUtils-MakeMaker/t/MM_Win32.t index 44a01e3ba7..4bc030d902 100644 --- a/cpan/ExtUtils-MakeMaker/t/MM_Win32.t +++ b/cpan/ExtUtils-MakeMaker/t/MM_Win32.t @@ -9,11 +9,9 @@ use strict; use Test::More; BEGIN { - if ($^O =~ /MSWin32/i) { - plan tests => 61; - } else { - plan skip_all => 'This is not Win32'; - } + if ($^O !~ /MSWin32/i) { + plan skip_all => 'This is not Win32'; + } } use Config; @@ -104,13 +102,20 @@ delete $ENV{PATHEXT} unless $had_pathext; 'catfile() eq File::Spec->catfile()' ); } -# init_others(): check if all keys are created and set? -# qw( TOUCH CHMOD CP RM_F RM_RF MV NOOP TEST_F LD AR LDLOADLIBS DEV_NUL ) -{ +# init_tools(): check if all keys are created and set? +note "init_tools creates expected keys"; { + my $mm_w32 = bless( { BASEEXT => 'Foo', MAKE => $Config{make} }, 'MM' ); + $mm_w32->init_tools(); + my @keys = qw( TOUCH CHMOD CP RM_F RM_RF MV NOOP NOECHO ECHO ECHO_N TEST_F DEV_NULL ); + for my $key ( @keys ) { + ok( $mm_w32->{ $key }, "init_tools: $key" ); + } +} + +note "init_others creates expected keys"; { my $mm_w32 = bless( { BASEEXT => 'Foo', MAKE => $Config{make} }, 'MM' ); $mm_w32->init_others(); - my @keys = qw( TOUCH CHMOD CP RM_F RM_RF MV NOOP - TEST_F LD AR LDLOADLIBS DEV_NULL ); + my @keys = qw( LD AR LDLOADLIBS ); for my $key ( @keys ) { ok( $mm_w32->{ $key }, "init_others: $key" ); } @@ -185,7 +190,7 @@ delete $ENV{PATHEXT} unless $had_pathext; { my $path = 'c:\\Program Files/SomeApp\\Progje.exe'; is( $MM->canonpath( $path ), File::Spec->canonpath( $path ), - 'canonpath() eq File::Spec->canonpath' ); + 'canonpath() eq File::Spec->canonpath' ); } # perl_script() @@ -235,28 +240,28 @@ unlink "${script_name}$script_ext" if -f "${script_name}$script_ext"; SKIP: { skip("Not using 'nmake'", 2) unless $Config{make} eq 'nmake'; ok( $MM->is_make_type('nmake'), '->is_make_type(nmake) true' ); - ok( ! $MM->is_make_type('dmake'), '->is_make_type(dmake) false' ); + ok( ! $MM->is_make_type('dmake'), '->is_make_type(dmake) false' ); } # Check for literal nmake SKIP: { skip("Not using /nmake/", 2) unless $Config{make} =~ /nmake/; ok( $MM->is_make_type('nmake'), '->is_make_type(nmake) true' ); - ok( ! $MM->is_make_type('dmake'), '->is_make_type(dmake) false' ); + ok( ! $MM->is_make_type('dmake'), '->is_make_type(dmake) false' ); } # Check for literal dmake SKIP: { skip("Not using 'dmake'", 2) unless $Config{make} eq 'dmake'; ok( $MM->is_make_type('dmake'), '->is_make_type(dmake) true' ); - ok( ! $MM->is_make_type('nmake'), '->is_make_type(nmake) false' ); + ok( ! $MM->is_make_type('nmake'), '->is_make_type(nmake) false' ); } # Check for literal dmake SKIP: { skip("Not using /dmake/", 2) unless $Config{make} =~ /dmake/; ok( $MM->is_make_type('dmake'), '->is_make_type(dmake) true' ); - ok( ! $MM->is_make_type('nmake'), '->is_make_type(nmake) false' ); + ok( ! $MM->is_make_type('nmake'), '->is_make_type(nmake) false' ); } } @@ -275,128 +280,108 @@ unlink "${script_name}$script_ext" if -f "${script_name}$script_ext"; # _identify_compiler_environment() { - sub _run_cc_id { - my ( $config ) = @_; - - $config->{cc} ||= ''; - - my @cc_env = ExtUtils::MM_Win32::_identify_compiler_environment( $config ); - - my %cc_env = ( BORLAND => $cc_env[0], GCC => $cc_env[1], DLLTOOL => $cc_env[2] ); - - return \%cc_env; - } - - sub _check_cc_id_value { - my ( $test ) = @_; - - my $res = _run_cc_id( $test->{config} ); - - fail( "unknown key '$test->{key}'" ) if !exists $res->{$test->{key}}; - my $val = $res->{$test->{key}}; - - is( $val, $test->{expect}, $test->{desc} ); - - return; - } - - my @tests = ( - { - config => {}, - key => 'DLLTOOL', expect => 'dlltool', - desc => 'empty dlltool defaults to "dlltool"', - }, - { - config => { dlltool => 'test' }, - key => 'DLLTOOL', expect => 'test', - desc => 'dlltool value is taken over verbatim from %Config, if set', - }, - { - config => {}, - key => 'GCC', expect => 0, - desc => 'empty cc is not recognized as gcc', - }, - { - config => { cc => 'gcc' }, - key => 'GCC', expect => 1, - desc => 'plain "gcc" is recognized', - }, - { - config => { cc => 'C:/MinGW/bin/gcc.exe' }, - key => 'GCC', expect => 1, - desc => 'fully qualified "gcc" is recognized', - }, - { - config => { cc => 'C:/MinGW/bin/gcc-1.exe' }, - key => 'GCC', expect => 1, - desc => 'dash-extended gcc is recognized', - }, - { - config => { cc => 'C:/MinGW/bin/gcc_1.exe' }, - key => 'GCC', expect => 0, - desc => 'underscore-extended gcc is not recognized', - }, - { - config => {}, - key => 'BORLAND', expect => 0, - desc => 'empty cc is not recognized as borland', - }, - { - config => { cc => 'bcc' }, - key => 'BORLAND', expect => 1, - desc => 'plain "bcc" is recognized', - }, - { - config => { cc => 'C:/Borland/bin/bcc.exe' }, - key => 'BORLAND', expect => 0, - desc => 'fully qualified borland cc is not recognized', - }, - { - config => { cc => 'bcc-1.exe' }, - key => 'BORLAND', expect => 1, - desc => 'dash-extended borland cc is recognized', - }, - { - config => { cc => 'bcc_1.exe' }, - key => 'BORLAND', expect => 1, - desc => 'underscore-extended borland cc is recognized', - }, - ); - - _check_cc_id_value($_) for @tests; + sub _run_cc_id { + my ( $config ) = @_; -} + $config->{cc} ||= ''; -package FakeOut; + my @cc_env = ExtUtils::MM_Win32::_identify_compiler_environment( $config ); -sub TIEHANDLE { - bless(\(my $scalar), $_[0]); -} + my %cc_env = ( BORLAND => $cc_env[0], GCC => $cc_env[1], DLLTOOL => $cc_env[2] ); -sub PRINT { - my $self = shift; - $$self .= shift; -} + return \%cc_env; + } -__END__ + sub _check_cc_id_value { + my ( $test ) = @_; -=head1 NAME + my $res = _run_cc_id( $test->{config} ); -MM_Win32.t - Tests for ExtUtils::MM_Win32 + fail( "unknown key '$test->{key}'" ) if !exists $res->{$test->{key}}; + my $val = $res->{$test->{key}}; -=head1 TODO + is( $val, $test->{expect}, $test->{desc} ); - - Methods to still be checked: - # static_lib() should look into that - # dynamic_bs() should look into that - # dynamic_lib() should look into that - # xs_o() should look into that - # top_targets() should look into that - # dist_ci() should look into that - # dist_core() should look into that + return; + } -=head1 AUTHOR + my @tests = ( + { + config => {}, + key => 'DLLTOOL', expect => 'dlltool', + desc => 'empty dlltool defaults to "dlltool"', + }, + { + config => { dlltool => 'test' }, + key => 'DLLTOOL', expect => 'test', + desc => 'dlltool value is taken over verbatim from %Config, if set', + }, + { + config => {}, + key => 'GCC', expect => 0, + desc => 'empty cc is not recognized as gcc', + }, + { + config => { cc => 'gcc' }, + key => 'GCC', expect => 1, + desc => 'plain "gcc" is recognized', + }, + { + config => { cc => 'C:/MinGW/bin/gcc.exe' }, + key => 'GCC', expect => 1, + desc => 'fully qualified "gcc" is recognized', + }, + { + config => { cc => 'C:/MinGW/bin/gcc-1.exe' }, + key => 'GCC', expect => 1, + desc => 'dash-extended gcc is recognized', + }, + { + config => { cc => 'C:/MinGW/bin/gcc_1.exe' }, + key => 'GCC', expect => 0, + desc => 'underscore-extended gcc is not recognized', + }, + { + config => {}, + key => 'BORLAND', expect => 0, + desc => 'empty cc is not recognized as borland', + }, + { + config => { cc => 'bcc' }, + key => 'BORLAND', expect => 1, + desc => 'plain "bcc" is recognized', + }, + { + config => { cc => 'C:/Borland/bin/bcc.exe' }, + key => 'BORLAND', expect => 0, + desc => 'fully qualified borland cc is not recognized', + }, + { + config => { cc => 'bcc-1.exe' }, + key => 'BORLAND', expect => 1, + desc => 'dash-extended borland cc is recognized', + }, + { + config => { cc => 'bcc_1.exe' }, + key => 'BORLAND', expect => 1, + desc => 'underscore-extended borland cc is recognized', + }, + ); + + _check_cc_id_value($_) for @tests; +} -20011228 Abe Timmerman <abe@ztreet.demon.nl> -=cut +done_testing; + + +package FakeOut; + +sub TIEHANDLE { + bless(\(my $scalar), $_[0]); +} + +sub PRINT { + my $self = shift; + $$self .= shift; +} diff --git a/cpan/ExtUtils-MakeMaker/t/echo.t b/cpan/ExtUtils-MakeMaker/t/echo.t index a4ef9c7fe4..04fcc4322d 100644 --- a/cpan/ExtUtils-MakeMaker/t/echo.t +++ b/cpan/ExtUtils-MakeMaker/t/echo.t @@ -22,9 +22,13 @@ use Test::More; my $cwd = abs_path; my $perl = which_perl; my $make = make_run(); -my $mm = bless { NAME => "Foo", MAKE => $Config{make} }, "MM"; +my $mm = bless { NAME => "Foo", MAKE => $Config{make}, PARENT_NAME => '' }, "MM"; +$mm->init_INST; # *PERLRUN needs INIT_* +$mm->init_PERL; # generic ECHO needs ABSPERLRUN $mm->init_tools; # need ECHO +# Run Perl with the currently installing MakeMaker +$mm->{$_} .= q[ "-I$(INST_ARCHLIB)" "-I$(INST_LIB)"] for qw( PERLRUN FULLPERLRUN ABSPERLRUN ); #------------------- Testing functions @@ -42,8 +46,12 @@ sub test_for_echo { { open my $makefh, ">", "Makefile" or croak "Can't open Makefile: $!"; print $makefh "FOO=42\n"; # a variable to test with - print $makefh "ECHO=$mm->{ECHO}\n\n"; - print $makefh "all:\n"; + + for my $key (qw(INST_ARCHLIB INST_LIB PERL ABSPERL ABSPERLRUN ECHO)) { + print $makefh "$key=$mm->{$key}\n"; + } + + print $makefh "all :\n"; for my $args (@$calls) { print $makefh map { "\t$_\n" } $mm->echo(@$args); } diff --git a/cpan/ExtUtils-MakeMaker/t/lib/MakeMaker/Test/Utils.pm b/cpan/ExtUtils-MakeMaker/t/lib/MakeMaker/Test/Utils.pm index b4e450407f..9a6ab60e35 100644 --- a/cpan/ExtUtils-MakeMaker/t/lib/MakeMaker/Test/Utils.pm +++ b/cpan/ExtUtils-MakeMaker/t/lib/MakeMaker/Test/Utils.pm @@ -7,8 +7,9 @@ use Config; require Exporter; our @ISA = qw(Exporter); -our $Is_VMS = $^O eq 'VMS'; -our $Is_MacOS = $^O eq 'MacOS'; +our $Is_VMS = $^O eq 'VMS'; +our $Is_MacOS = $^O eq 'MacOS'; +our $Is_FreeBSD = $^O eq 'freebsd'; our @EXPORT = qw(which_perl perl_lib makefile_name makefile_backup make make_run run make_macro calibrate_mtime @@ -32,11 +33,20 @@ our @EXPORT = qw(which_perl perl_lib makefile_name makefile_backup MAKEFLAGS ); + my %default_env_keys; + + # Inform the BSDPAN hacks not to register modules installed for testing. + $default_env_keys{PORTOBJFORMAT} = 1 if $Is_FreeBSD; + # Remember the ENV values because on VMS %ENV is global # to the user, not the process. my %restore_env_keys; sub clean_env { + for my $key (keys %default_env_keys) { + $ENV{$key} = $default_env_keys{$key} unless $ENV{$key}; + } + for my $key (@delete_env_keys) { if( exists $ENV{$key} ) { $restore_env_keys{$key} = delete $ENV{$key}; diff --git a/cpan/IO-Compress/Changes b/cpan/IO-Compress/Changes index 675689c9cd..1f0e5b4dbb 100644 --- a/cpan/IO-Compress/Changes +++ b/cpan/IO-Compress/Changes @@ -1,6 +1,41 @@ CHANGES ------- + 2.042 17 November 2011 + + * IO::Compress::Zip + - Added exUnixN option to allow creation of the "ux" extra field. + This allows 32-bit UID/GID to be stored. + - In one-shot mode use exUnixN rather than exUnix2 for the UID/GID. + + * IO::Compress::Zlib::Extra::parseExtraField + - Fixed bad test for length of ID field + [RT# 72329 & #72505] + + 2.040 28 October 2011 + + * t/105oneshot-zip-only.t + - CanonicalName test failure on Windows + [RT# 68926] + + * IO::Compress::Zip + - ExtAttr now populates MSDOS attributes + + 2.039 28 October 2011 + + * IO::Compress::Zip + - Added CanonicalName option. + Note this option is set to true by default. + - Added FilterName option + + * IO::Unompress::Base + - Fixed issue where setting $\ would corrupt the uncompressed data. + Thanks to Steffen Goeldner for reporting the issue. + + * t/050interop-*.t + - Handle case when external command contains a whitespace + RT #71335 + 2.037 22 June 2011 * IO::Uncompress diff --git a/cpan/IO-Compress/Makefile.PL b/cpan/IO-Compress/Makefile.PL index 3cb62725a8..b8725a3df4 100644 --- a/cpan/IO-Compress/Makefile.PL +++ b/cpan/IO-Compress/Makefile.PL @@ -3,7 +3,7 @@ use strict ; require 5.004 ; -$::VERSION = '2.037' ; +$::VERSION = '2.042' ; use private::MakeUtil; use ExtUtils::MakeMaker 5.16 ; diff --git a/cpan/IO-Compress/README b/cpan/IO-Compress/README index 6f51a805b8..368234d7aa 100644 --- a/cpan/IO-Compress/README +++ b/cpan/IO-Compress/README @@ -1,9 +1,9 @@ IO-Compress - Version 2.037 + Version 2.042 - 22nd June 2011 + 17th November 2011 Copyright (c) 1995-2011 Paul Marquess. All rights reserved. This program is free software; you can redistribute it @@ -89,7 +89,7 @@ To help me help you, I need all of the following information: If you haven't installed IO-Compress then search IO::Compress::Gzip.pm for a line like this: - $VERSION = "2.037" ; + $VERSION = "2.042" ; 2. If you are having problems building IO-Compress, send me a complete log of what happened. Start by unpacking the IO-Compress diff --git a/cpan/IO-Compress/lib/Compress/Zlib.pm b/cpan/IO-Compress/lib/Compress/Zlib.pm index 5c19ccb523..212c6e0747 100644 --- a/cpan/IO-Compress/lib/Compress/Zlib.pm +++ b/cpan/IO-Compress/lib/Compress/Zlib.pm @@ -7,17 +7,17 @@ use Carp ; use IO::Handle ; use Scalar::Util qw(dualvar); -use IO::Compress::Base::Common 2.037 ; -use Compress::Raw::Zlib 2.037 ; -use IO::Compress::Gzip 2.037 ; -use IO::Uncompress::Gunzip 2.037 ; +use IO::Compress::Base::Common 2.042 ; +use Compress::Raw::Zlib 2.042 ; +use IO::Compress::Gzip 2.042 ; +use IO::Uncompress::Gunzip 2.042 ; use strict ; use warnings ; use bytes ; our ($VERSION, $XS_VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); -$VERSION = '2.037'; +$VERSION = '2.042'; $XS_VERSION = $VERSION; $VERSION = eval $VERSION; @@ -453,7 +453,7 @@ sub inflate package Compress::Zlib ; -use IO::Compress::Gzip::Constants 2.037 ; +use IO::Compress::Gzip::Constants 2.042 ; sub memGzip($) { diff --git a/cpan/IO-Compress/lib/IO/Compress/Adapter/Bzip2.pm b/cpan/IO-Compress/lib/IO/Compress/Adapter/Bzip2.pm index ca351e2b96..c2d3e131c0 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Adapter/Bzip2.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Adapter/Bzip2.pm @@ -4,13 +4,13 @@ use strict; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(:Status); +use IO::Compress::Base::Common 2.042 qw(:Status); #use Compress::Bzip2 ; -use Compress::Raw::Bzip2 2.037 ; +use Compress::Raw::Bzip2 2.042 ; our ($VERSION); -$VERSION = '2.037'; +$VERSION = '2.042'; sub mkCompObject { diff --git a/cpan/IO-Compress/lib/IO/Compress/Adapter/Deflate.pm b/cpan/IO-Compress/lib/IO/Compress/Adapter/Deflate.pm index 91e5843e9b..74dead0ef4 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Adapter/Deflate.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Adapter/Deflate.pm @@ -4,12 +4,12 @@ use strict; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(:Status); +use IO::Compress::Base::Common 2.042 qw(:Status); -use Compress::Raw::Zlib 2.037 qw(Z_OK Z_FINISH MAX_WBITS) ; +use Compress::Raw::Zlib 2.042 qw(Z_OK Z_FINISH MAX_WBITS) ; our ($VERSION); -$VERSION = '2.037'; +$VERSION = '2.042'; sub mkCompObject { diff --git a/cpan/IO-Compress/lib/IO/Compress/Adapter/Identity.pm b/cpan/IO-Compress/lib/IO/Compress/Adapter/Identity.pm index c48c8883a2..8a97740bab 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Adapter/Identity.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Adapter/Identity.pm @@ -4,10 +4,10 @@ use strict; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(:Status); +use IO::Compress::Base::Common 2.042 qw(:Status); our ($VERSION); -$VERSION = '2.037'; +$VERSION = '2.042'; sub mkCompObject { diff --git a/cpan/IO-Compress/lib/IO/Compress/Base.pm b/cpan/IO-Compress/lib/IO/Compress/Base.pm index 6f491f668e..464f401956 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Base.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Base.pm @@ -6,7 +6,7 @@ require 5.004 ; use strict ; use warnings; -use IO::Compress::Base::Common 2.037 ; +use IO::Compress::Base::Common 2.042 ; use IO::File qw(SEEK_SET SEEK_END); ; use Scalar::Util qw(blessed readonly); @@ -20,7 +20,7 @@ use bytes; our (@ISA, $VERSION); @ISA = qw(Exporter IO::File); -$VERSION = '2.037'; +$VERSION = '2.042'; #Can't locate object method "SWASHNEW" via package "utf8" (perhaps you forgot to load "utf8"?) at .../ext/Compress-Zlib/Gzip/blib/lib/Compress/Zlib/Common.pm line 16. @@ -123,9 +123,9 @@ sub output return 1 if length $data == 0 && ! $last ; - if ( *$self->{FilterEnvelope} ) { + if ( *$self->{FilterContainer} ) { *_ = \$data; - &{ *$self->{FilterEnvelope} }(); + &{ *$self->{FilterContainer} }(); } if (length $data) { @@ -163,7 +163,7 @@ sub checkParams 'Append' => [1, 1, Parse_boolean, 0], 'BinModeIn' => [1, 1, Parse_boolean, 0], - 'FilterEnvelope' => [1, 1, Parse_any, undef], + 'FilterContainer' => [1, 1, Parse_code, undef], $self->getExtraParams(), *$self->{OneShot} ? $self->getOneShotParams() @@ -214,7 +214,7 @@ sub _create my $merge = $got->value('Merge') ; my $appendOutput = $got->value('Append') || $merge ; *$obj->{Append} = $appendOutput; - *$obj->{FilterEnvelope} = $got->value('FilterEnvelope') ; + *$obj->{FilterContainer} = $got->value('FilterContainer') ; if ($merge) { diff --git a/cpan/IO-Compress/lib/IO/Compress/Base/Common.pm b/cpan/IO-Compress/lib/IO/Compress/Base/Common.pm index b6d3342619..5778d4f429 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Base/Common.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Base/Common.pm @@ -11,7 +11,7 @@ use File::GlobMapper; require Exporter; our ($VERSION, @ISA, @EXPORT, %EXPORT_TAGS, $HAS_ENCODE); @ISA = qw(Exporter); -$VERSION = '2.037'; +$VERSION = '2.042'; @EXPORT = qw( isaFilehandle isaFilename whatIsInput whatIsOutput isaFileGlobString cleanFileGlobString oneTarget @@ -451,7 +451,8 @@ sub createSelfTiedObject $EXPORT_TAGS{Parse} = [qw( ParseParameters Parse_any Parse_unsigned Parse_signed - Parse_boolean Parse_custom Parse_string + Parse_boolean Parse_string + Parse_code Parse_multiple Parse_writable_scalar ) ]; @@ -463,7 +464,7 @@ use constant Parse_unsigned => 0x02; use constant Parse_signed => 0x04; use constant Parse_boolean => 0x08; use constant Parse_string => 0x10; -use constant Parse_custom => 0x12; +use constant Parse_code => 0x20; #use constant Parse_store_ref => 0x100 ; use constant Parse_multiple => 0x100 ; @@ -741,6 +742,13 @@ sub IO::Compress::Base::Parameters::_checkType $$output = defined $value ? $value != 0 : 0 ; return 1; } + elsif ($type & Parse_code) + { + return $self->setError("Parameter '$key' must be a code reference, got '$value'") + if $validate && (! defined $value || ref $value ne 'CODE') ; + $$output = defined $value ? $value : "" ; + return 1; + } elsif ($type & Parse_string) { $$output = defined $value ? $value : "" ; @@ -937,7 +945,7 @@ sub subtract if ($value > $self->[LOW]) { -- $self->[HIGH] ; - $self->[LOW] = MAX32 - $self->[LOW] ; + $self->[LOW] = MAX32 - $value + $self->[LOW] + 1 ; } else { $self->[LOW] -= $value; diff --git a/cpan/IO-Compress/lib/IO/Compress/Bzip2.pm b/cpan/IO-Compress/lib/IO/Compress/Bzip2.pm index e4bb983f2d..f6767875e9 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Bzip2.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Bzip2.pm @@ -5,16 +5,16 @@ use warnings; use bytes; require Exporter ; -use IO::Compress::Base 2.037 ; +use IO::Compress::Base 2.042 ; -use IO::Compress::Base::Common 2.037 qw(createSelfTiedObject); -use IO::Compress::Adapter::Bzip2 2.037 ; +use IO::Compress::Base::Common 2.042 qw(createSelfTiedObject); +use IO::Compress::Adapter::Bzip2 2.042 ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $Bzip2Error); -$VERSION = '2.037'; +$VERSION = '2.042'; $Bzip2Error = ''; @ISA = qw(Exporter IO::Compress::Base); @@ -51,7 +51,7 @@ sub getExtraParams { my $self = shift ; - use IO::Compress::Base::Common 2.037 qw(:Parse); + use IO::Compress::Base::Common 2.042 qw(:Parse); return ( 'BlockSize100K' => [0, 1, Parse_unsigned, 1], diff --git a/cpan/IO-Compress/lib/IO/Compress/Deflate.pm b/cpan/IO-Compress/lib/IO/Compress/Deflate.pm index f2f0ce97a1..c8b8f5e5c0 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Deflate.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Deflate.pm @@ -6,16 +6,16 @@ use bytes; require Exporter ; -use IO::Compress::RawDeflate 2.037 ; +use IO::Compress::RawDeflate 2.042 ; -use Compress::Raw::Zlib 2.037 ; -use IO::Compress::Zlib::Constants 2.037 ; -use IO::Compress::Base::Common 2.037 qw(createSelfTiedObject); +use Compress::Raw::Zlib 2.042 ; +use IO::Compress::Zlib::Constants 2.042 ; +use IO::Compress::Base::Common 2.042 qw(createSelfTiedObject); our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $DeflateError); -$VERSION = '2.037'; +$VERSION = '2.042'; $DeflateError = ''; @ISA = qw(Exporter IO::Compress::RawDeflate); diff --git a/cpan/IO-Compress/lib/IO/Compress/Gzip.pm b/cpan/IO-Compress/lib/IO/Compress/Gzip.pm index 343d87fbdd..b9d3149e6b 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Gzip.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Gzip.pm @@ -8,12 +8,12 @@ use warnings; use bytes; -use IO::Compress::RawDeflate 2.037 ; +use IO::Compress::RawDeflate 2.042 ; -use Compress::Raw::Zlib 2.037 ; -use IO::Compress::Base::Common 2.037 qw(:Status :Parse createSelfTiedObject); -use IO::Compress::Gzip::Constants 2.037 ; -use IO::Compress::Zlib::Extra 2.037 ; +use Compress::Raw::Zlib 2.042 ; +use IO::Compress::Base::Common 2.042 qw(:Status :Parse createSelfTiedObject); +use IO::Compress::Gzip::Constants 2.042 ; +use IO::Compress::Zlib::Extra 2.042 ; BEGIN { @@ -27,7 +27,7 @@ require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GzipError); -$VERSION = '2.037'; +$VERSION = '2.042'; $GzipError = '' ; @ISA = qw(Exporter IO::Compress::RawDeflate); diff --git a/cpan/IO-Compress/lib/IO/Compress/Gzip/Constants.pm b/cpan/IO-Compress/lib/IO/Compress/Gzip/Constants.pm index 918f3d21a3..6e87905932 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Gzip/Constants.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Gzip/Constants.pm @@ -9,7 +9,7 @@ require Exporter; our ($VERSION, @ISA, @EXPORT, %GZIP_OS_Names); our ($GZIP_FNAME_INVALID_CHAR_RE, $GZIP_FCOMMENT_INVALID_CHAR_RE); -$VERSION = '2.037'; +$VERSION = '2.042'; @ISA = qw(Exporter); diff --git a/cpan/IO-Compress/lib/IO/Compress/RawDeflate.pm b/cpan/IO-Compress/lib/IO/Compress/RawDeflate.pm index 4c7548882e..5d3a029f9a 100644 --- a/cpan/IO-Compress/lib/IO/Compress/RawDeflate.pm +++ b/cpan/IO-Compress/lib/IO/Compress/RawDeflate.pm @@ -7,16 +7,16 @@ use warnings; use bytes; -use IO::Compress::Base 2.037 ; -use IO::Compress::Base::Common 2.037 qw(:Status createSelfTiedObject); -use IO::Compress::Adapter::Deflate 2.037 ; +use IO::Compress::Base 2.042 ; +use IO::Compress::Base::Common 2.042 qw(:Status createSelfTiedObject); +use IO::Compress::Adapter::Deflate 2.042 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %DEFLATE_CONSTANTS, %EXPORT_TAGS, $RawDeflateError); -$VERSION = '2.037'; +$VERSION = '2.042'; $RawDeflateError = ''; @ISA = qw(Exporter IO::Compress::Base); @@ -142,8 +142,8 @@ sub getZlibParams { my $self = shift ; - use IO::Compress::Base::Common 2.037 qw(:Parse); - use Compress::Raw::Zlib 2.037 qw(Z_DEFLATED Z_DEFAULT_COMPRESSION Z_DEFAULT_STRATEGY); + use IO::Compress::Base::Common 2.042 qw(:Parse); + use Compress::Raw::Zlib 2.042 qw(Z_DEFLATED Z_DEFAULT_COMPRESSION Z_DEFAULT_STRATEGY); return ( diff --git a/cpan/IO-Compress/lib/IO/Compress/Zip.pm b/cpan/IO-Compress/lib/IO/Compress/Zip.pm index 55588c0a34..dc36a5d6fc 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Zip.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Zip.pm @@ -4,26 +4,28 @@ use strict ; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(:Status createSelfTiedObject); -use IO::Compress::RawDeflate 2.037 ; -use IO::Compress::Adapter::Deflate 2.037 ; -use IO::Compress::Adapter::Identity 2.037 ; -use IO::Compress::Zlib::Extra 2.037 ; -use IO::Compress::Zip::Constants 2.037 ; +use IO::Compress::Base::Common 2.042 qw(:Status createSelfTiedObject); +use IO::Compress::RawDeflate 2.042 ; +use IO::Compress::Adapter::Deflate 2.042 ; +use IO::Compress::Adapter::Identity 2.042 ; +use IO::Compress::Zlib::Extra 2.042 ; +use IO::Compress::Zip::Constants 2.042 ; +use File::Spec(); +use Config; -use Compress::Raw::Zlib 2.037 qw(crc32) ; +use Compress::Raw::Zlib 2.042 qw(crc32) ; BEGIN { eval { require IO::Compress::Adapter::Bzip2 ; - import IO::Compress::Adapter::Bzip2 2.037 ; + import IO::Compress::Adapter::Bzip2 2.042 ; require IO::Compress::Bzip2 ; - import IO::Compress::Bzip2 2.037 ; + import IO::Compress::Bzip2 2.042 ; } ; eval { require IO::Compress::Adapter::Lzma ; - import IO::Compress::Adapter::Lzma 2.036 ; + import IO::Compress::Adapter::Lzma 2.042 ; require IO::Compress::Lzma ; - import IO::Compress::Lzma 2.037 ; + import IO::Compress::Lzma 2.042 ; } ; } @@ -32,7 +34,7 @@ require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $ZipError); -$VERSION = '2.037'; +$VERSION = '2.042'; $ZipError = ''; @ISA = qw(Exporter IO::Compress::RawDeflate); @@ -155,6 +157,52 @@ sub filterUncompressed } } +sub canonicalName +{ + # This sub is derived from Archive::Zip::_asZipDirName + + # Return the normalized name as used in a zip file (path + # separators become slashes, etc.). + # Will translate internal slashes in path components (i.e. on Macs) to + # underscores. Discards volume names. + # When $forceDir is set, returns paths with trailing slashes + # + # input output + # . '.' + # ./a a + # ./a/b a/b + # ./a/b/ a/b + # a/b/ a/b + # /a/b/ a/b + # c:\a\b\c.doc a/b/c.doc # on Windows + # "i/o maps:whatever" i_o maps/whatever # on Macs + + my $name = shift; + my $forceDir = shift ; + + my ( $volume, $directories, $file ) = + File::Spec->splitpath( File::Spec->canonpath($name), $forceDir ); + + my @dirs = map { $_ =~ s{/}{_}g; $_ } + File::Spec->splitdir($directories); + + if ( @dirs > 0 ) { pop (@dirs) if $dirs[-1] eq '' } # remove empty component + push @dirs, defined($file) ? $file : '' ; + + my $normalised_path = join '/', @dirs; + + # Leading directory separators should not be stored in zip archives. + # Example: + # C:\a\b\c\ a/b/c + # C:\a\b\c.txt a/b/c.txt + # /a/b/c/ a/b/c + # /a/b/c.txt a/b/c.txt + $normalised_path =~ s{^/}{}; # remove leading separator + + return $normalised_path; +} + + sub mkHeader { my $self = shift; @@ -163,11 +211,27 @@ sub mkHeader *$self->{ZipData}{LocalHdrOffset} = U64::clone(*$self->{ZipData}{Offset}); + my $comment = ''; + $comment = $param->value('Comment') || ''; + my $filename = ''; $filename = $param->value('Name') || ''; - my $comment = ''; - $comment = $param->value('Comment') || ''; + $filename = canonicalName($filename) + if length $filename && $param->value('CanonicalName') ; + + if (defined *$self->{ZipData}{FilterName} ) { + local *_ = \$filename ; + &{ *$self->{ZipData}{FilterName} }() ; + } + +# if ( $param->value('UTF8') ) { +# require Encode ; +# $filename = Encode::encode_utf8($filename) +# if length $filename ; +# $comment = Encode::encode_utf8($filename) +# if length $comment ; +# } my $hdr = ''; @@ -202,10 +266,20 @@ sub mkHeader $ctlExtra .= mkExtendedTime($param->value('MTime')); } - if ( $param->value('UID') && $osCode == ZIP_OS_CODE_UNIX) + if ( $osCode == ZIP_OS_CODE_UNIX ) { - $extra .= mkUnix2Extra( $param->value('UID'), $param->value('GID')); - $ctlExtra .= mkUnix2Extra(); + if ( $param->value('want_exUnixN') ) + { + my $ux3 = mkUnixNExtra( @{ $param->value('want_exUnixN') }); + $extra .= $ux3; + $ctlExtra .= $ux3; + } + + if ( $param->value('exUnix2') ) + { + $extra .= mkUnix2Extra( @{ $param->value('exUnix2') }); + $ctlExtra .= mkUnix2Extra(); + } } $extFileAttr = $param->value('ExtAttr') @@ -226,6 +300,9 @@ sub mkHeader $gpFlag |= ZIP_GP_FLAG_LZMA_EOS_PRESENT if $method == ZIP_CM_LZMA ; + #$gpFlag |= ZIP_GP_FLAG_LANGUAGE_ENCODING + #if $param->value('UTF8') && length($filename) + length($comment); + my $version = $ZIP_CM_MIN_VERSIONS{$method}; $version = ZIP64_MIN_VERSION @@ -478,16 +555,20 @@ sub ckParams $got->value("CTime", $timeRef->[2]); } - # Unix2 Extended Attribute - if ($got->parsed('exUnix2') ) { - my $timeRef = $got->value('exUnix2'); - if ( defined $timeRef) { - return $self->saveErrorString(undef, "exUnix2 not a 2-element array ref") - if ref $timeRef ne 'ARRAY' || @$timeRef != 2; + # Unix2/3 Extended Attribute + for my $name (qw(exUnix2 exUnixN)) + { + if ($got->parsed($name) ) { + my $idRef = $got->value($name); + if ( defined $idRef) { + return $self->saveErrorString(undef, "$name not a 2-element array ref") + if ref $idRef ne 'ARRAY' || @$idRef != 2; + } + + $got->value("UID", $idRef->[0]); + $got->value("GID", $idRef->[1]); + $got->value("want_$name", $idRef); } - - $got->value("UID", $timeRef->[0]); - $got->value("GID", $timeRef->[1]); } *$self->{ZipData}{AnyZip64} = 1 @@ -532,6 +613,12 @@ sub ckParams *$self->{ZipData}{Method} = ZIP_CM_STORE; } + if ($got->parsed('FilterName')) { + my $v = $got->value('FilterName') ; + *$self->{ZipData}{FilterName} = $v + if ref $v eq 'CODE' ; + } + return 1 ; } @@ -554,8 +641,8 @@ sub getExtraParams { my $self = shift ; - use IO::Compress::Base::Common 2.037 qw(:Parse); - use Compress::Raw::Zlib 2.037 qw(Z_DEFLATED Z_DEFAULT_COMPRESSION Z_DEFAULT_STRATEGY); + use IO::Compress::Base::Common 2.042 qw(:Parse); + use Compress::Raw::Zlib 2.042 qw(Z_DEFLATED Z_DEFAULT_COMPRESSION Z_DEFAULT_STRATEGY); my @Bzip2 = (); @@ -577,9 +664,13 @@ sub getExtraParams 'Comment' => [0, 1, Parse_any, ''], 'ZipComment'=> [0, 1, Parse_any, ''], 'Name' => [0, 1, Parse_any, ''], + 'FilterName'=> [0, 1, Parse_code, undef], + 'CanonicalName'=> [0, 1, Parse_boolean, 1], + #'UTF8' => [0, 1, Parse_boolean, 0], 'Time' => [0, 1, Parse_any, undef], 'exTime' => [0, 1, Parse_any, undef], 'exUnix2' => [0, 1, Parse_any, undef], + 'exUnixN' => [0, 1, Parse_any, undef], 'ExtAttr' => [0, 1, Parse_any, $Compress::Raw::Zlib::gzip_os_code == 3 ? 0100644 << 16 @@ -631,9 +722,17 @@ sub getFileInfo } # NOTE - Unix specific code alert - $params->value('ExtAttr' => $mode << 16) - if ! $params->parsed('ExtAttr'); + if (! $params->parsed('ExtAttr')) + { + use Fcntl qw(:mode) ; + my $attr = $mode << 16; + $attr |= ZIP_A_RONLY if ($mode & S_IWRITE) == 0 ; + $attr |= ZIP_A_DIR if ($mode & S_IFMT ) == S_IFDIR ; + + $params->value('ExtAttr' => $attr); + } + $params->value('want_exUnixN', [$uid, $gid]); $params->value('UID' => $uid) ; $params->value('GID' => $gid) ; @@ -674,6 +773,23 @@ sub mkUnix2Extra $ids); } +sub mkUnixNExtra +{ + my $uid = shift; + my $gid = shift; + + # Assumes UID/GID are 32-bit + my $ids ; + $ids .= pack "C", 1; # version + $ids .= pack "C", $Config{uidsize}; + $ids .= pack "V", $uid; + $ids .= pack "C", $Config{gidsize}; + $ids .= pack "V", $gid; + + return IO::Compress::Zlib::Extra::mkSubField(ZIP_EXTRA_ID_INFO_ZIP_UNIXN, + $ids); +} + # from Archive::Zip sub _unixToDosTime # Archive::Zip::Member @@ -832,10 +948,10 @@ See L<File::GlobMapper|File::GlobMapper> for more details. If the C<$input> parameter is any other type, C<undef> will be returned. In addition, if C<$input> is a simple filename, the default values for -the C<Name>, C<Time>, C<ExtAttr> and C<exTime> options will be sourced from that file. +the C<Name>, C<Time>, C<ExtAttr>, C<exUnixN> and C<exTime> options will be sourced from that file. If you do not want to use these defaults they can be overridden by -explicitly setting the C<Name>, C<Time>, C<ExtAttr> and C<exTime> options or by setting the +explicitly setting the C<Name>, C<Time>, C<ExtAttr>, C<exUnixN> and C<exTime> options or by setting the C<Minimal> parameter. =head3 The C<$output> parameter @@ -1112,12 +1228,67 @@ This parameter defaults to 0. Stores the contents of C<$string> in the zip filename header field. -If C<Name> is not specified and the C<$input> parameter is a filename that -will be used for the zip filename header field. +If C<Name> is not specified and the C<$input> parameter is a filename, the +value of C<$input> will be used for the zip filename header field. If C<Name> is not specified and the C<$input> parameter is not a filename, no zip filename field will be created. +Note that both the C<CanonicalName> and C<FilterName> options +can modify the value used for the zip filename header field. + +=item C<< CanonicalName => 0|1 >> + +This option controls whether the filename field in the zip header is +I<normalized> into Unix format before being written to the zip file. + +It is recommended that you leave this option enabled unless you really need +to create a non-standard Zip file. + +This is what APPNOTE.TXT has to say on what should be stored in the zip +filename header field. + + The name of the file, with optional relative path. + The path stored should not contain a drive or + device letter, or a leading slash. All slashes + should be forward slashes '/' as opposed to + backwards slashes '\' for compatibility with Amiga + and UNIX file systems etc. + +This option defaults to B<true>. + +=item C<< FilterName => sub { ... } >> + +This option allow the filename field in the zip header to be modified +before it is written to the zip file. + +This option takes a parameter that must be a reference to a sub. On entry +to the sub the C<$_> variable will contain the name to be filtered. If no +filename is available C<$_> will contain an empty string. + +The value of C<$_> when the sub returns will be stored in the filename +header field. + +Note that if C<CanonicalName> is enabled (and it is by default), a +normalized filename will be passed to the sub. + +If you use C<FilterName> to modify the filename, it is your responsibility +to keep the filename in Unix format. + +Although this option can be used with the OO ointerface, it is of most use +with the one-shot interface. For example, the code below shows how +C<FilterName> can be used to remove the path component from a series of +filenames before they are stored in C<$zipfile>. + + sub compressTxtFiles + { + my $zipfile = shift ; + my $dir = shift ; + + zip [ <$dir/*.txt> ] => $zipfile, + FilterName => sub { s[^$dir/][] } ; + } + =item C<< Time => $number >> Sets the last modified time field in the zip header to $number. @@ -1163,18 +1334,37 @@ By default no extended time field is created. =item C<< exUnix2 => [$uid, $gid] >> This option expects an array reference with exactly two elements: C<$uid> -and C<$gid>. These values correspond to the numeric user ID and group ID -of the owner of the files respectively. +and C<$gid>. These values correspond to the numeric User ID (UID) and Group ID +(GID) of the owner of the files respectively. When the C<exUnix2> option is present it will trigger the creation of a -Unix2 extra field (ID is "Ux") in the local zip. This will be populated -with C<$uid> and C<$gid>. In addition an empty Unix2 extra field will also -be created in the central zip header +Unix2 extra field (ID is "Ux") in the local zip header. This will be populated +with C<$uid> and C<$gid>. An empty Unix2 extra field will also +be created in the central zip header. + +Note - The UID & GID are stored as 16-bit +integers in the "Ux" field. Use C<< exUnixN >> if your UID or GID are +32-bit. If the C<Minimal> option is set to true, this option will be ignored. By default no Unix2 extra field is created. +=item C<< exUnixN => [$uid, $gid] >> + +This option expects an array reference with exactly two elements: C<$uid> +and C<$gid>. These values correspond to the numeric User ID (UID) and Group ID +(GID) of the owner of the files respectively. + +When the C<exUnixN> option is present it will trigger the creation of a +UnixN extra field (ID is "ux") in bothe the local and central zip headers. +This will be populated with C<$uid> and C<$gid>. +The UID & GID are stored as 32-bit integers. + +If the C<Minimal> option is set to true, this option will be ignored. + +By default no UnixN extra field is created. + =item C<< Comment => $comment >> Stores the contents of C<$comment> in the Central File Header of @@ -1283,6 +1473,9 @@ Alternatively the list of subfields can by supplied as a scalar, thus ExtraField => $rawdata +In this case C<IO::Compress::Zip> will check that C<$rawdata> consists of +zero or more conformant sub-fields. + The Extended Time field (ID "UT"), set using the C<exTime> option, and the Unix2 extra field (ID "Ux), set using the C<exUnix2> option, are examples of extra fields. @@ -1295,7 +1488,8 @@ The maximum size of an extra field 65535 bytes. If specified, this option will disable the creation of all extra fields in the zip local and central headers. So the C<exTime>, C<exUnix2>, -C<ExtraFieldLocal> and C<ExtraFieldCentral> options will be ignored. +C<exUnixN>, C<ExtraFieldLocal> and C<ExtraFieldCentral> options will +be ignored. This parameter defaults to 0. diff --git a/cpan/IO-Compress/lib/IO/Compress/Zip/Constants.pm b/cpan/IO-Compress/lib/IO/Compress/Zip/Constants.pm index 47fd75117e..bdeb22e1f1 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Zip/Constants.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Zip/Constants.pm @@ -7,7 +7,7 @@ require Exporter; our ($VERSION, @ISA, @EXPORT, %ZIP_CM_MIN_VERSIONS); -$VERSION = '2.037'; +$VERSION = '2.042'; @ISA = qw(Exporter); @@ -38,7 +38,7 @@ $VERSION = '2.037'; ZIP_EXTRA_ID_ZIP64 ZIP_EXTRA_ID_EXT_TIMESTAMP ZIP_EXTRA_ID_INFO_ZIP_UNIX2 - ZIP_EXTRA_ID_INFO_ZIP_UNIXn + ZIP_EXTRA_ID_INFO_ZIP_UNIXN ZIP_EXTRA_ID_INFO_ZIP_Upath ZIP_EXTRA_ID_INFO_ZIP_Ucom ZIP_EXTRA_ID_JAVA_EXE @@ -51,6 +51,12 @@ $VERSION = '2.037'; %ZIP_CM_MIN_VERSIONS ZIP64_MIN_VERSION + ZIP_A_RONLY + ZIP_A_HIDDEN + ZIP_A_SYSTEM + ZIP_A_LABEL + ZIP_A_DIR + ZIP_A_ARCHIVE ); # Compression types supported @@ -89,18 +95,26 @@ use constant ZIP_OS_CODE_DEFAULT => 3; use constant ZIP_EXTRA_ID_ZIP64 => pack "v", 1; use constant ZIP_EXTRA_ID_EXT_TIMESTAMP => "UT"; use constant ZIP_EXTRA_ID_INFO_ZIP_UNIX2 => "Ux"; -use constant ZIP_EXTRA_ID_INFO_ZIP_UNIXn => "ux"; +use constant ZIP_EXTRA_ID_INFO_ZIP_UNIXN => "ux"; use constant ZIP_EXTRA_ID_INFO_ZIP_Upath => "up"; use constant ZIP_EXTRA_ID_INFO_ZIP_Ucom => "uc"; use constant ZIP_EXTRA_ID_JAVA_EXE => pack "v", 0xCAFE; +# DOS Attributes +use constant ZIP_A_RONLY => 0x01; +use constant ZIP_A_HIDDEN => 0x02; +use constant ZIP_A_SYSTEM => 0x04; +use constant ZIP_A_LABEL => 0x08; +use constant ZIP_A_DIR => 0x10; +use constant ZIP_A_ARCHIVE => 0x20; + use constant ZIP64_MIN_VERSION => 45; %ZIP_CM_MIN_VERSIONS = ( - ZIP_CM_STORE() => 20, - ZIP_CM_DEFLATE() => 20, - ZIP_CM_BZIP2() => 46, - ZIP_CM_LZMA() => 63, + ZIP_CM_STORE() => 20, + ZIP_CM_DEFLATE() => 20, + ZIP_CM_BZIP2() => 46, + ZIP_CM_LZMA() => 63, ); diff --git a/cpan/IO-Compress/lib/IO/Compress/Zlib/Constants.pm b/cpan/IO-Compress/lib/IO/Compress/Zlib/Constants.pm index 4eeab80c7f..be81dabc4b 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Zlib/Constants.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Zlib/Constants.pm @@ -9,7 +9,7 @@ require Exporter; our ($VERSION, @ISA, @EXPORT); -$VERSION = '2.037'; +$VERSION = '2.042'; @ISA = qw(Exporter); diff --git a/cpan/IO-Compress/lib/IO/Compress/Zlib/Extra.pm b/cpan/IO-Compress/lib/IO/Compress/Zlib/Extra.pm index 66d0a2e5d9..218a0d1ee9 100644 --- a/cpan/IO-Compress/lib/IO/Compress/Zlib/Extra.pm +++ b/cpan/IO-Compress/lib/IO/Compress/Zlib/Extra.pm @@ -8,9 +8,9 @@ use bytes; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS); -$VERSION = '2.037'; +$VERSION = '2.042'; -use IO::Compress::Gzip::Constants 2.037 ; +use IO::Compress::Gzip::Constants 2.042 ; sub ExtraFieldError { @@ -174,7 +174,6 @@ sub parseExtraField return parseRawExtra($dataRef, undef, 1, $gzipMode); } - #my $data = $$dataRef; my $data = $dataRef; my $out = '' ; @@ -195,7 +194,7 @@ sub parseExtraField return ExtraFieldError("Not even number of elements") unless @$data % 2 == 0; - for (my $ix = 0; $ix <= length(@$data) -1 ; $ix += 2) { + for (my $ix = 0; $ix <= @$data -1 ; $ix += 2) { my $bad = validateExtraFieldPair([$data->[$ix], $data->[$ix+1]], $strict, $gzipMode) ; diff --git a/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Bunzip2.pm b/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Bunzip2.pm index 026627a18b..4a00fe2648 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Bunzip2.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Bunzip2.pm @@ -4,12 +4,12 @@ use strict; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(:Status); +use IO::Compress::Base::Common 2.042 qw(:Status); -use Compress::Raw::Bzip2 2.037 ; +use Compress::Raw::Bzip2 2.042 ; our ($VERSION, @ISA); -$VERSION = '2.037'; +$VERSION = '2.042'; sub mkUncompObject { diff --git a/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Identity.pm b/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Identity.pm index 4d36999ba3..7d14534cd2 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Identity.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Identity.pm @@ -4,14 +4,14 @@ use warnings; use strict; use bytes; -use IO::Compress::Base::Common 2.037 qw(:Status); +use IO::Compress::Base::Common 2.042 qw(:Status); use IO::Compress::Zip::Constants ; our ($VERSION); -$VERSION = '2.037'; +$VERSION = '2.042'; -use Compress::Raw::Zlib 2.037 (); +use Compress::Raw::Zlib 2.042 (); sub mkUncompObject { diff --git a/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Inflate.pm b/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Inflate.pm index ab7449608d..bd28bf49a7 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Inflate.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/Adapter/Inflate.pm @@ -4,11 +4,11 @@ use strict; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(:Status); -use Compress::Raw::Zlib 2.037 qw(Z_OK Z_BUF_ERROR Z_STREAM_END Z_FINISH MAX_WBITS); +use IO::Compress::Base::Common 2.042 qw(:Status); +use Compress::Raw::Zlib 2.042 qw(Z_OK Z_BUF_ERROR Z_STREAM_END Z_FINISH MAX_WBITS); our ($VERSION); -$VERSION = '2.037'; +$VERSION = '2.042'; diff --git a/cpan/IO-Compress/lib/IO/Uncompress/AnyInflate.pm b/cpan/IO-Compress/lib/IO/Uncompress/AnyInflate.pm index 4184f0dca9..54e5733b13 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/AnyInflate.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/AnyInflate.pm @@ -6,22 +6,22 @@ use strict; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(createSelfTiedObject); +use IO::Compress::Base::Common 2.042 qw(createSelfTiedObject); -use IO::Uncompress::Adapter::Inflate 2.037 (); +use IO::Uncompress::Adapter::Inflate 2.042 (); -use IO::Uncompress::Base 2.037 ; -use IO::Uncompress::Gunzip 2.037 ; -use IO::Uncompress::Inflate 2.037 ; -use IO::Uncompress::RawInflate 2.037 ; -use IO::Uncompress::Unzip 2.037 ; +use IO::Uncompress::Base 2.042 ; +use IO::Uncompress::Gunzip 2.042 ; +use IO::Uncompress::Inflate 2.042 ; +use IO::Uncompress::RawInflate 2.042 ; +use IO::Uncompress::Unzip 2.042 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyInflateError); -$VERSION = '2.037'; +$VERSION = '2.042'; $AnyInflateError = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @@ -48,7 +48,7 @@ sub anyinflate sub getExtraParams { - use IO::Compress::Base::Common 2.037 qw(:Parse); + use IO::Compress::Base::Common 2.042 qw(:Parse); return ( 'RawInflate' => [1, 1, Parse_boolean, 0] ) ; } diff --git a/cpan/IO-Compress/lib/IO/Uncompress/AnyUncompress.pm b/cpan/IO-Compress/lib/IO/Uncompress/AnyUncompress.pm index b7d8bf9330..979f892473 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/AnyUncompress.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/AnyUncompress.pm @@ -4,16 +4,16 @@ use strict; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(createSelfTiedObject); +use IO::Compress::Base::Common 2.042 qw(createSelfTiedObject); -use IO::Uncompress::Base 2.037 ; +use IO::Uncompress::Base 2.042 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyUncompressError); -$VERSION = '2.037'; +$VERSION = '2.042'; $AnyUncompressError = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @@ -27,22 +27,22 @@ Exporter::export_ok_tags('all'); BEGIN { - eval ' use IO::Uncompress::Adapter::Inflate 2.037 ;'; - eval ' use IO::Uncompress::Adapter::Bunzip2 2.037 ;'; - eval ' use IO::Uncompress::Adapter::LZO 2.037 ;'; - eval ' use IO::Uncompress::Adapter::Lzf 2.037 ;'; - eval ' use IO::Uncompress::Adapter::UnLzma 2.020 ;'; - eval ' use IO::Uncompress::Adapter::UnXz 2.020 ;'; - - eval ' use IO::Uncompress::Bunzip2 2.037 ;'; - eval ' use IO::Uncompress::UnLzop 2.037 ;'; - eval ' use IO::Uncompress::Gunzip 2.037 ;'; - eval ' use IO::Uncompress::Inflate 2.037 ;'; - eval ' use IO::Uncompress::RawInflate 2.037 ;'; - eval ' use IO::Uncompress::Unzip 2.037 ;'; - eval ' use IO::Uncompress::UnLzf 2.037 ;'; - eval ' use IO::Uncompress::UnLzma 2.037 ;'; - eval ' use IO::Uncompress::UnXz 2.037 ;'; + eval ' use IO::Uncompress::Adapter::Inflate 2.042 ;'; + eval ' use IO::Uncompress::Adapter::Bunzip2 2.042 ;'; + eval ' use IO::Uncompress::Adapter::LZO 2.042 ;'; + eval ' use IO::Uncompress::Adapter::Lzf 2.042 ;'; + eval ' use IO::Uncompress::Adapter::UnLzma 2.042 ;'; + eval ' use IO::Uncompress::Adapter::UnXz 2.042 ;'; + + eval ' use IO::Uncompress::Bunzip2 2.042 ;'; + eval ' use IO::Uncompress::UnLzop 2.042 ;'; + eval ' use IO::Uncompress::Gunzip 2.042 ;'; + eval ' use IO::Uncompress::Inflate 2.042 ;'; + eval ' use IO::Uncompress::RawInflate 2.042 ;'; + eval ' use IO::Uncompress::Unzip 2.042 ;'; + eval ' use IO::Uncompress::UnLzf 2.042 ;'; + eval ' use IO::Uncompress::UnLzma 2.042 ;'; + eval ' use IO::Uncompress::UnXz 2.042 ;'; } sub new @@ -60,7 +60,7 @@ sub anyuncompress sub getExtraParams { - use IO::Compress::Base::Common 2.037 qw(:Parse); + use IO::Compress::Base::Common 2.042 qw(:Parse); return ( 'RawInflate' => [1, 1, Parse_boolean, 0] , 'UnLzma' => [1, 1, Parse_boolean, 0] ) ; } diff --git a/cpan/IO-Compress/lib/IO/Uncompress/Base.pm b/cpan/IO-Compress/lib/IO/Uncompress/Base.pm index 7d0747796a..7747bfeed2 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/Base.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/Base.pm @@ -9,12 +9,12 @@ our (@ISA, $VERSION, @EXPORT_OK, %EXPORT_TAGS); @ISA = qw(Exporter IO::File); -$VERSION = '2.037'; +$VERSION = '2.042'; use constant G_EOF => 0 ; use constant G_ERR => -1 ; -use IO::Compress::Base::Common 2.037 ; +use IO::Compress::Base::Common 2.042 ; use IO::File ; use Symbol; @@ -730,7 +730,7 @@ sub _rd2 while (($status = $z->read($x->{buff})) > 0) { if ($fh) { - print $fh ${ $x->{buff} } + syswrite $fh, ${ $x->{buff} } or return $z->saveErrorString(undef, "Error writing to output file: $!", $!); ${ $x->{buff} } = '' ; } diff --git a/cpan/IO-Compress/lib/IO/Uncompress/Bunzip2.pm b/cpan/IO-Compress/lib/IO/Uncompress/Bunzip2.pm index b08ab47994..8b47ce3659 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/Bunzip2.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/Bunzip2.pm @@ -4,15 +4,15 @@ use strict ; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(:Status createSelfTiedObject); +use IO::Compress::Base::Common 2.042 qw(:Status createSelfTiedObject); -use IO::Uncompress::Base 2.037 ; -use IO::Uncompress::Adapter::Bunzip2 2.037 ; +use IO::Uncompress::Base 2.042 ; +use IO::Uncompress::Adapter::Bunzip2 2.042 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $Bunzip2Error); -$VERSION = '2.037'; +$VERSION = '2.042'; $Bunzip2Error = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @@ -40,7 +40,7 @@ sub getExtraParams { my $self = shift ; - use IO::Compress::Base::Common 2.037 qw(:Parse); + use IO::Compress::Base::Common 2.042 qw(:Parse); return ( 'Verbosity' => [1, 1, Parse_boolean, 0], diff --git a/cpan/IO-Compress/lib/IO/Uncompress/Gunzip.pm b/cpan/IO-Compress/lib/IO/Uncompress/Gunzip.pm index e191ee6751..79542e5c39 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/Gunzip.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/Gunzip.pm @@ -9,12 +9,12 @@ use strict ; use warnings; use bytes; -use IO::Uncompress::RawInflate 2.037 ; +use IO::Uncompress::RawInflate 2.042 ; -use Compress::Raw::Zlib 2.037 qw( crc32 ) ; -use IO::Compress::Base::Common 2.037 qw(:Status createSelfTiedObject); -use IO::Compress::Gzip::Constants 2.037 ; -use IO::Compress::Zlib::Extra 2.037 ; +use Compress::Raw::Zlib 2.042 qw( crc32 ) ; +use IO::Compress::Base::Common 2.042 qw(:Status createSelfTiedObject); +use IO::Compress::Gzip::Constants 2.042 ; +use IO::Compress::Zlib::Extra 2.042 ; require Exporter ; @@ -28,7 +28,7 @@ Exporter::export_ok_tags('all'); $GunzipError = ''; -$VERSION = '2.037'; +$VERSION = '2.042'; sub new { @@ -47,7 +47,7 @@ sub gunzip sub getExtraParams { - use IO::Compress::Base::Common 2.037 qw(:Parse); + use IO::Compress::Base::Common 2.042 qw(:Parse); return ( 'ParseExtra' => [1, 1, Parse_boolean, 0] ) ; } diff --git a/cpan/IO-Compress/lib/IO/Uncompress/Inflate.pm b/cpan/IO-Compress/lib/IO/Uncompress/Inflate.pm index 7435de3d18..597cd247c8 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/Inflate.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/Inflate.pm @@ -5,15 +5,15 @@ use strict ; use warnings; use bytes; -use IO::Compress::Base::Common 2.037 qw(:Status createSelfTiedObject); -use IO::Compress::Zlib::Constants 2.037 ; +use IO::Compress::Base::Common 2.042 qw(:Status createSelfTiedObject); +use IO::Compress::Zlib::Constants 2.042 ; -use IO::Uncompress::RawInflate 2.037 ; +use IO::Uncompress::RawInflate 2.042 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $InflateError); -$VERSION = '2.037'; +$VERSION = '2.042'; $InflateError = ''; @ISA = qw( Exporter IO::Uncompress::RawInflate ); diff --git a/cpan/IO-Compress/lib/IO/Uncompress/RawInflate.pm b/cpan/IO-Compress/lib/IO/Uncompress/RawInflate.pm index f628eef4d3..b091a6cbba 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/RawInflate.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/RawInflate.pm @@ -5,16 +5,16 @@ use strict ; use warnings; use bytes; -use Compress::Raw::Zlib 2.037 ; -use IO::Compress::Base::Common 2.037 qw(:Status createSelfTiedObject); +use Compress::Raw::Zlib 2.042 ; +use IO::Compress::Base::Common 2.042 qw(:Status createSelfTiedObject); -use IO::Uncompress::Base 2.037 ; -use IO::Uncompress::Adapter::Inflate 2.037 ; +use IO::Uncompress::Base 2.042 ; +use IO::Uncompress::Adapter::Inflate 2.042 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, %DEFLATE_CONSTANTS, $RawInflateError); -$VERSION = '2.037'; +$VERSION = '2.042'; $RawInflateError = ''; @ISA = qw( Exporter IO::Uncompress::Base ); diff --git a/cpan/IO-Compress/lib/IO/Uncompress/Unzip.pm b/cpan/IO-Compress/lib/IO/Uncompress/Unzip.pm index 0ef881328f..a782b62627 100644 --- a/cpan/IO-Compress/lib/IO/Uncompress/Unzip.pm +++ b/cpan/IO-Compress/lib/IO/Uncompress/Unzip.pm @@ -9,14 +9,14 @@ use warnings; use bytes; use IO::File; -use IO::Uncompress::RawInflate 2.037 ; -use IO::Compress::Base::Common 2.037 qw(:Status createSelfTiedObject); -use IO::Uncompress::Adapter::Inflate 2.037 ; -use IO::Uncompress::Adapter::Identity 2.037 ; -use IO::Compress::Zlib::Extra 2.037 ; -use IO::Compress::Zip::Constants 2.037 ; +use IO::Uncompress::RawInflate 2.042 ; +use IO::Compress::Base::Common 2.042 qw(:Status createSelfTiedObject); +use IO::Uncompress::Adapter::Inflate 2.042 ; +use IO::Uncompress::Adapter::Identity 2.042 ; +use IO::Compress::Zlib::Extra 2.042 ; +use IO::Compress::Zip::Constants 2.042 ; -use Compress::Raw::Zlib 2.037 qw(crc32) ; +use Compress::Raw::Zlib 2.042 qw(crc32) ; BEGIN { @@ -31,7 +31,7 @@ require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup); -$VERSION = '2.037'; +$VERSION = '2.042'; $UnzipError = ''; @ISA = qw(Exporter IO::Uncompress::RawInflate); @@ -64,7 +64,7 @@ sub unzip sub getExtraParams { - use IO::Compress::Base::Common 2.037 qw(:Parse); + use IO::Compress::Base::Common 2.042 qw(:Parse); return ( diff --git a/cpan/IO-Compress/t/000prereq.t b/cpan/IO-Compress/t/000prereq.t index a8e5452131..13417aa66c 100644 --- a/cpan/IO-Compress/t/000prereq.t +++ b/cpan/IO-Compress/t/000prereq.t @@ -25,7 +25,7 @@ BEGIN if eval { require Test::NoWarnings ; import Test::NoWarnings; 1 }; - my $VERSION = '2.037'; + my $VERSION = '2.042'; my @NAMES = qw( Compress::Raw::Bzip2 Compress::Raw::Zlib diff --git a/cpan/IO-Compress/t/004gziphdr.t b/cpan/IO-Compress/t/004gziphdr.t index 2ef9459785..14071f9f20 100644 --- a/cpan/IO-Compress/t/004gziphdr.t +++ b/cpan/IO-Compress/t/004gziphdr.t @@ -20,7 +20,7 @@ BEGIN { if eval { require Test::NoWarnings ; import Test::NoWarnings; 1 }; - plan tests => 915 + $extra ; + plan tests => 918 + $extra ; use_ok('Compress::Raw::Zlib') ; use_ok('IO::Compress::Gzip::Constants') ; @@ -972,6 +972,20 @@ EOM } } + + { + # RT #72329 + my $error = 'Error with ExtraField Parameter: ' . + 'SubField ID not two chars long' ; + my $buffer ; + my $x ; + eval { $x = new IO::Compress::Gzip \$buffer, + -ExtraField => [ at => 'mouse', bad => 'dog'] ; + }; + like $@, mkErr("$error"); + like $GzipError, "/$error/"; + ok ! $x ; + } } diff --git a/cpan/IO-Compress/t/01misc.t b/cpan/IO-Compress/t/01misc.t index a37aac000c..76d6a7bdc2 100644 --- a/cpan/IO-Compress/t/01misc.t +++ b/cpan/IO-Compress/t/01misc.t @@ -19,7 +19,7 @@ BEGIN { $extra = 1 if eval { require Test::NoWarnings ; import Test::NoWarnings; 1 }; - plan tests => 132 + $extra ; + plan tests => 136 + $extra ; use_ok('Scalar::Util'); use_ok('IO::Compress::Base::Common'); @@ -63,6 +63,10 @@ sub My::testParseParameters() like $@, mkErr("Parameter 'Fred' must be a signed int, got 'abc'"), "wanted signed, got 'abc'"; + eval { ParseParameters(1, {'Fred' => [1, 1, Parse_code, undef]}, Fred => 'abc') ; }; + like $@, mkErr("Parameter 'Fred' must be a code reference, got 'abc'"), + "wanted code, got 'abc'"; + SKIP: { @@ -321,9 +325,17 @@ My::testParseParameters(); $x->subtract($y); is $x->getHigh, 0, " getHigh is 0"; - is $x->getLow, 0xFFFFFFFD, " getLow is 1"; + is $x->getLow, 0xFFFFFFFF, " getLow is 1"; ok ! $x->is64bit(), " ! is64bit"; + $x = new U64(0x01CADCE2, 0x4E815983); + $y = new U64(0x19DB1DE, 0xD53E8000); # NTFS to Unix time delta + + $x->subtract($y); + is $x->getHigh, 0x2D2B03, " getHigh is 2D2B03"; + is $x->getLow, 0x7942D983, " getLow is 7942D983"; + ok $x->is64bit(), " is64bit"; + title "U64 - equal" ; $x = new U64(0, 1); diff --git a/cpan/IO-Compress/t/050interop-gzip.t b/cpan/IO-Compress/t/050interop-gzip.t index 22be0646c8..27c1d7db8c 100644 --- a/cpan/IO-Compress/t/050interop-gzip.t +++ b/cpan/IO-Compress/t/050interop-gzip.t @@ -10,6 +10,7 @@ use strict; use warnings; use bytes; +use File::Spec ; use Test::More ; use CompTestUtils; @@ -91,10 +92,13 @@ BEGIN { for my $dir (reverse split $split, $ENV{PATH}) { - $GZIP = "$dir/$name" - if -x "$dir/$name" ; + $GZIP = File::Spec->catfile($dir,$name) + if -x File::Spec->catfile($dir,$name) } + # Handle spaces in path to gzip + $GZIP = "\"$GZIP\"" if defined $GZIP && $GZIP =~ /\s/; + plan(skip_all => "Cannot find $name") if ! $GZIP ; diff --git a/cpan/IO-Compress/t/105oneshot-zip-only.t b/cpan/IO-Compress/t/105oneshot-zip-only.t index 0da219e416..1f43c2c261 100644 --- a/cpan/IO-Compress/t/105oneshot-zip-only.t +++ b/cpan/IO-Compress/t/105oneshot-zip-only.t @@ -11,6 +11,7 @@ use warnings; use bytes; use Test::More ; +use File::Spec ; use CompTestUtils; BEGIN { @@ -23,13 +24,11 @@ BEGIN { $extra = 1 if eval { require Test::NoWarnings ; import Test::NoWarnings; 1 }; - plan tests => 170 + $extra ; + plan tests => 216 + $extra ; #use_ok('IO::Compress::Zip', qw(zip $ZipError :zip_method)) ; use_ok('IO::Compress::Zip', qw(:all)) ; use_ok('IO::Uncompress::Unzip', qw(unzip $UnzipError)) ; - - } @@ -133,6 +132,35 @@ sub zipGetHeader cmp_ok $hdr->{Time} >> 1, '<=', $after >> 1, " Time is ok"; } +{ + title "Check CanonicalName & FilterName"; + + my $lex = new LexFile my $file1; + + my $content = "hello" ; + writeFile($file1, $content); + my $hdr; + + my $abs = File::Spec->catfile("", "fred", "joe"); + $hdr = zipGetHeader($file1, $content, Name => $abs, CanonicalName => 1) ; + is $hdr->{Name}, "fred/joe", " Name is 'fred/joe'" ; + + $hdr = zipGetHeader($file1, $content, Name => $abs, CanonicalName => 0) ; + is $hdr->{Name}, File::Spec->catfile("", "fred", "joe"), " Name is '/fred/joe'" ; + + $hdr = zipGetHeader($file1, $content, FilterName => sub {$_ = "abcde"}); + is $hdr->{Name}, "abcde", " Name is 'abcde'" ; + + $hdr = zipGetHeader($file1, $content, Name => $abs, + FilterName => sub { s/joe/jim/ }); + is $hdr->{Name}, "fred/jim", " Name is 'fred/jim'" ; + + $hdr = zipGetHeader($file1, $content, Name => $abs, + CanonicalName => 0, + FilterName => sub { s/joe/jim/ }); + is $hdr->{Name}, File::Spec->catfile("", "fred", "jim"), " Name is '/fred/jim'" ; +} + for my $stream (0, 1) { for my $zip64 (0, 1) diff --git a/cpan/IO-Compress/t/compress/oneshot.pl b/cpan/IO-Compress/t/compress/oneshot.pl index 102f221da5..14309ab8c5 100644 --- a/cpan/IO-Compress/t/compress/oneshot.pl +++ b/cpan/IO-Compress/t/compress/oneshot.pl @@ -16,7 +16,7 @@ BEGIN { $extra = 1 if eval { require Test::NoWarnings ; import Test::NoWarnings; 1 }; - plan tests => 986 + $extra ; + plan tests => 989 + $extra ; use_ok('IO::Uncompress::AnyUncompress', qw(anyuncompress $AnyUncompressError)) ; @@ -1578,8 +1578,26 @@ sub run } } -} + { + # check setting $/ + + my $CompFunc = getTopFuncRef($CompressClass); + my $UncompFunc = getTopFuncRef($UncompressClass); + my $lex = new LexFile my $file ; + + local $\ = "\n" ; + my $input = "hello world"; + my $compressed ; + my $output; + ok &$CompFunc(\$input => \$compressed), ' Compressed ok' ; + ok &$UncompFunc(\$compressed => $file), ' UnCompressed ok' ; + my $content = readFile($file) ; + is $content, $input, "round trip ok" ; + + } + +} # TODO add more error cases 1; diff --git a/cpan/IO-Compress/t/cz-14gzopen.t b/cpan/IO-Compress/t/cz-14gzopen.t index 89b04ff1f6..5d0f1fbd08 100644 --- a/cpan/IO-Compress/t/cz-14gzopen.t +++ b/cpan/IO-Compress/t/cz-14gzopen.t @@ -491,7 +491,8 @@ foreach my $stdio ( ['-', '-'], [*STDIN, *STDOUT]) { my $lex = new LexFile my $name ; writeFile($name, "abc"); - chmod 0444, $name ; + chmod 0444, $name + or skip "Cannot create non-writable file", 3 ; skip "Cannot create non-writable file", 3 if -w $name ; diff --git a/cpan/Time-Piece/t/02core.t b/cpan/Time-Piece/t/02core.t index 8a0673e5f9..1e8d2649a6 100644 --- a/cpan/Time-Piece/t/02core.t +++ b/cpan/Time-Piece/t/02core.t @@ -2,6 +2,7 @@ use Test::More tests => 95; my $is_win32 = ($^O =~ /Win32/); my $is_qnx = ($^O eq 'qnx'); +my $is_vos = ($^O eq 'vos'); BEGIN { use_ok('Time::Piece'); } ok(1); @@ -113,7 +114,7 @@ SKIP: { cmp_ok($t->strftime('%U'), 'eq', '09'); # Sun cmp Mon SKIP: { - skip "can't strftime %V on Win32 or QNX", 1 if $is_win32 or $is_qnx; + skip "can't strftime %V on Win32 or QNX or VOS", 1 if $is_win32 or $is_qnx or $is_vos; # is this test really broken on Mac OS? -- rjbs, 2006-02-08 cmp_ok($t->strftime('%V'), 'eq', '09'); # Sun cmp Mon } diff --git a/cpan/Unicode-Collate/Changes b/cpan/Unicode-Collate/Changes index 0779628217..67166f513b 100644 --- a/cpan/Unicode-Collate/Changes +++ b/cpan/Unicode-Collate/Changes @@ -1,5 +1,31 @@ Revision history for Perl module Unicode::Collate. +0.85 Sat Nov 19 20:01:57 2011 + - U::C::Locale newly supports locales: bn, sa. + - added loc_bn.t, loc_cjk.t, loc_sa.t in t. + - updated some locales to CLDR 2.0 : zh__pinyin, zh__stroke. + * supported compatibility decomposable characters and U+FDD0 indexes. + * updated CJK/Pinyin.pm and CJK/Stroke.pm. + +0.84 Sun Nov 6 14:44:51 2011 + - U::C::Locale supports script codes. + - U::C::Locale newly supports locales: fa, sr_Latn, ur. + - added loc_fa.t, loc_srla.t, loc_ur.t in t. + +0.83 Sun Oct 30 20:22:04 2011 + - mklocale: auto-generate equivalents for suppressed contractions. + * be.txt, bg.txt, kk.txt, mk.txt, ru.txt, sr.txt, uk.txt in data + are simplified. + * but no Locale/*.pl will be modified. + +0.82 Sun Oct 30 10:03:48 2011 + - U::C::Locale newly supports locales: si, si__dictionary, + sv__reformed, ta, te, th, wae. + - added loc_si.t, loc_sidt.t, loc_svrf.t, loc_ta.t, loc_te.t, + loc_th.t, loc_wae.t in t. + - updated some locales to CLDR 2.0 : sk, sr, sv, uk. + - updated CJK/Pinyin.pm according to CLDR 2.0. + 0.81 Sun Oct 23 21:32:36 2011 - U::C::Locale newly supports locales: ml, mr, or, pa. - added loc_ml.t, loc_mr.t, loc_or.t, loc_pa.t in t. @@ -129,7 +155,7 @@ Revision history for Perl module Unicode::Collate. 0.61 Sat Oct 2 11:41:29 2010 - U::C::Locale newly supports locales: hr, ig, sq. - added loc_hr.t, loc_ig.t, loc_sq.t in t. - - precomposites of e-dot-below, o-dot-below, o-tilde are tailored as well. + - precomposed e-dot-below, o-dot-below, o-tilde are tailored as well. (affected locales: et, yo) - Vietnamese (vi): added contractions for non-blocked decompositions * base + dot-below + mark such as a\x{323}\x{306}, \x{1EA1}\x{306} etc. @@ -143,14 +169,14 @@ Revision history for Perl module Unicode::Collate. - U::C::Locale newly supports locales: de__phonebook, nso, om, tn, vi. - added loc_de.t, loc_deph.t, loc_nso.t, loc_om.t, loc_tn.t, loc_vi.t in t. - - precomposites of a-breve, a-circ, e-circ, o-circ are tailored as well. + - precomposed a-breve, a-circ, e-circ, o-circ are tailored as well. (affected locales: ro, sk, sv) 0.59 Sun Sep 5 17:03:52 2010 - U::C::Locale newly supports locales: az, fil, ha, lt, mt, tr, wo, yo. - added loc_az.t, loc_fil.t, loc_ha.t, loc_lt.t, loc_mt.t, loc_tr.t, loc_wo.t, loc_yo.t in t. - - precomposites of a-uml, o-uml, and u-uml are tailored as well. + - precomposed a-uml, o-uml, and u-uml are tailored as well. (affected locales: da, et, fi, fo, is, kl, nb, nn, sk, sv) 0.58 Sun Aug 29 19:56:50 2010 @@ -287,7 +313,7 @@ Revision history for Perl module Unicode::Collate. 0.26 Sun Aug 03 22:23:17 2003 - fix: an expansion in which a CE is level 3 ignorable and others are not was wrongly made level 3 ignorable as a whole entry. - (In DUCET, some precomposites in Musical Symbols are so) + (In DUCET, some precomposed characters in Musical Symbols are so) 0.25 Mon Jun 06 23:20:17 2003 - fix Makefile.PL. diff --git a/cpan/Unicode-Collate/Collate.pm b/cpan/Unicode-Collate/Collate.pm index 0ed7f2a4b8..d45e597d6d 100644 --- a/cpan/Unicode-Collate/Collate.pm +++ b/cpan/Unicode-Collate/Collate.pm @@ -14,7 +14,7 @@ use File::Spec; no warnings 'utf8'; -our $VERSION = '0.81'; +our $VERSION = '0.85'; our $PACKAGE = __PACKAGE__; ### begin XS only ### diff --git a/cpan/Unicode-Collate/Collate.xs b/cpan/Unicode-Collate/Collate.xs index d9c14a6e3f..02269c93d9 100644 --- a/cpan/Unicode-Collate/Collate.xs +++ b/cpan/Unicode-Collate/Collate.xs @@ -86,6 +86,8 @@ static const UV max_div_16 = UV_MAX / 16; #define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */ #define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */ +#define CJK_CompIni (0xFA0E) +#define CJK_CompFin (0xFA29) static STDCHAR UnifiedCompat[] = { 1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1 }; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */ @@ -303,8 +305,8 @@ _derivCE_9 (code) bool basic_unified = 0; PPCODE: if (CJK_UidIni <= code) { - if (codeRange(0xFA0E, 0xFA29)) - basic_unified = (bool)UnifiedCompat[code - 0xFA0E]; + if (codeRange(CJK_CompIni, CJK_CompFin)) + basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; else basic_unified = (ix >= 3 ? (code <= CJK_UidF52) : ix == 2 ? (code <= CJK_UidF51) : @@ -373,8 +375,8 @@ _isUIdeo (code, uca_vers) CODE: /* uca_vers = 0 for _uideoCE_8() */ if (CJK_UidIni <= code) { - if (codeRange(0xFA0E, 0xFA29)) - basic_unified = (bool)UnifiedCompat[code - 0xFA0E]; + if (codeRange(CJK_CompIni, CJK_CompFin)) + basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; else basic_unified = (uca_vers >= 20 ? (code <= CJK_UidF52) : uca_vers >= 18 ? (code <= CJK_UidF51) : diff --git a/cpan/Unicode-Collate/Collate/CJK/Pinyin.pm b/cpan/Unicode-Collate/Collate/CJK/Pinyin.pm index 3e0ca7696d..d3d9941237 100644 --- a/cpan/Unicode-Collate/Collate/CJK/Pinyin.pm +++ b/cpan/Unicode-Collate/Collate/CJK/Pinyin.pm @@ -3,7 +3,7 @@ package Unicode::Collate::CJK::Pinyin; use 5.006; use strict; -our $VERSION = '0.76'; +our $VERSION = '0.85'; my %u2p; my $wt = 0x8000; @@ -13,7 +13,7 @@ while (<DATA>) { my @c = split; for my $c (@c) { next if !$c; - $u2p{hex($c)} = $wt; + $u2p{hex($c)} = $wt if $c !~ /-/; $wt++; } } @@ -25,6 +25,7 @@ sub weightPinyin { 1; __DATA__ +FDD0-0041 963F 5475 9515 55C4 554A @@ -55,8 +56,10 @@ __DATA__ 5C99 6277 5773 5787 5CB0 50B2 5961 5965 5967 5AEF 6160 9A9C 96A9 58BA 5DB4 61CA 6FB3 64D9 93CA 9A41 7FF6 +FDD0-0042 516B 4EC8 6252 6733 7390 593F 5C9C 82AD 5CC7 67ED -75A4 54F5 634C 7C91 7F93 8686 91DB 91DF 8C5D 9C83 +75A4 54F5 5DFC 634C 7C91 7F93 8686 91DB 91DF 8C5D +9C83 53D0 72AE 629C 577A 59AD 62D4 8307 70A6 7679 80C8 83DD 8A59 8DCB 8EF7 98B0 9B43 58A2 9F25 628A 94AF 9200 9776 @@ -179,11 +182,11 @@ __DATA__ 535C 5575 8421 818A 5CEC 5EAF 900B 6661 923D 8AA7 9CEA 8F50 91AD -535F 8865 54FA 6355 88DC 9D4F +535F 8865 54FA 6355 55B8 88DC 9D4F 4E0D 5E03 4F48 5425 6B65 5498 6016 62AA 6B68 6B69 67E8 949A 52CF 57D4 57D7 6091 6357 8379 90E8 94B8 57E0 74FF 8500 8E04 90F6 9914 7BF0 9922 7C3F -517A +FDD0-0043 5693 64E6 6503 7924 906A 56C3 @@ -289,9 +292,10 @@ __DATA__ 5C3A 53FA 544E 4F88 5376 9F7F 5791 80E3 6065 7C8E 803B 8687 88B3 6B3C 6B6F 88B2 88ED 9279 892B 9F52 5F73 53F1 65A5 6758 707B 8D64 996C 62B6 52C5 605C -70BD 52D1 7FC4 7FC5 6555 70FE 75D3 557B 6E41 98ED -50BA 75F8 815F 8DEE 9253 96F4 618F 7608 7FE4 906B -9290 6157 761B 7FE8 71BE 61D8 8DA9 994E 9D92 9DD8 +70BD 52D1 7FC4 7FC5 6555 70FE 75D3 557B 6E41 7873 +98ED 50BA 75F8 815F 8DEE 9253 96F4 618F 7608 7FE4 +906B 9290 6157 761B 7FE8 71BE 61D8 8DA9 994E 9D92 +9DD8 599B 9EB6 5145 51B2 5FE1 6C96 833A 6D7A 73EB 7FC0 8202 5603 644F 5FB8 6183 61A7 885D 7F7F 825F 8E56 @@ -387,6 +391,7 @@ __DATA__ 811E 5249 5252 539D 590E 632B 839D 83A1 63AA 902A 65AE 68E4 9509 84CC 9519 6B75 92BC 932F +FDD0-0044 5491 54D2 8037 8345 7B1A 55D2 642D 8921 5660 6498 939D 8FBE 8FD6 547E 59B2 601B 6C93 709F 7F8D 8359 7557 @@ -458,8 +463,8 @@ __DATA__ 7538 6541 6382 508E 53A7 5D6E 6EC7 69C7 69D9 7628 98A0 8E4E 5DC5 985A 985B 766B 5DD3 5DD4 6527 7672 9F7B -5178 594C 70B9 5A70 655F 8DD5 7898 84A7 8547 8E2E -9EDE 56B8 +5178 594C 70B9 5A70 7320 655F 8DD5 7898 84A7 8547 +8E2E 9EDE 56B8 7535 4F43 963D 576B 5E97 57AB 6242 73B7 94BF 5A5D 60E6 6DC0 5960 7414 6BBF 8714 96FB 588A 58C2 6A42 6A5D 6FB1 975B 765C 7C1F 9A54 @@ -484,7 +489,7 @@ __DATA__ 914A 91D8 976A 5975 9876 9802 9F0E 5D7F 9F11 6FCE 85A1 9424 8BA2 5FCA 9964 77F4 5B9A 8A02 98E3 5576 94E4 6917 -815A 7887 952D 78A0 92CC 9320 78F8 9841 +815A 7887 952D 78A0 874A 92CC 9320 78F8 9841 8423 8062 4E1F 4E22 94E5 92A9 4E1C 51AC 549A 5CBD 6771 82F3 6638 6C21 5032 9E2B @@ -513,6 +518,7 @@ __DATA__ 77ED 6BB5 65AD 5845 7F0E 846E 6934 7145 7456 8176 78AB 953B 7DDE 6BC8 7C16 935B 65B7 8E96 7C6A +8968 5796 5806 5860 5D5F 75FD 78D3 9D2D 941C 9827 961F 5BF9 514A 514C 5151 5BFE 794B 603C 966E 968A @@ -534,6 +540,7 @@ __DATA__ 5815 8235 60F0 8DE2 8DE5 8DFA 98FF 58AE 5D9E 58AF 9D7D 6735 67A4 +FDD0-0045 59B8 59BF 5A3F 5A40 5C59 94B6 75FE 8BB9 542A 56EE 8FD7 4FC4 5A25 5CE8 5CE9 6D90 83AA 73F4 8A1B 7692 774B 920B 9507 9E45 86FE 78C0 8A90 @@ -553,13 +560,14 @@ __DATA__ 5CCE 6441 97A5 -800C 5150 4F95 9651 5CCF 6D0F 834B 682D 80F9 5532 -88BB 9E38 804F 8F00 9C95 96AD 9AF5 9B9E 9D2F 8F5C -5C12 5C13 5C14 8033 8FE9 6D31 9975 682E 6BE6 73E5 -94D2 723E 990C 99EC 85BE 9087 +513F 800C 5150 4F95 5152 9651 5CCF 6D0F 834B 682D +80F9 5532 88BB 9E38 7CAB 804F 8F00 9C95 96AD 9AF5 +9B9E 9D2F 8F5C +53BC 5C12 5C13 5C14 8033 8FE9 6D31 9975 682E 6BE6 +73E5 94D2 723E 990C 99EC 85BE 9087 8DB0 4E8C 5F0D 5F10 4F74 5235 54A1 8D30 8CAE 8848 8CB3 8A80 927A 6A32 -7CAB 8DB0 +FDD0-0046 53D1 6CB7 767A 50A0 767C 9166 5F42 91B1 4E4F 4F10 59C2 57A1 6D4C 75BA 7F5A 8337 9600 6830 781D 7B4F 7782 7F70 95A5 7F78 6A43 85C5 @@ -594,8 +602,9 @@ __DATA__ 4FF7 5255 539E 75BF 966B 5C5D 8409 5EC3 8CBB 75F1 9544 5EE2 66CA 7648 9F23 6FF7 6AE0 9BE1 9428 9745 5A54 6683 -5206 5429 5E09 7EB7 82AC 6610 6C1B 886F 7D1B 7FC2 -515D 68FB 8A1C 915A 9216 96F0 6706 71D3 9934 9959 +5206 5429 5E09 7EB7 82AC 6610 6C1B 54DB 886F 517A +7D1B 7FC2 515D 68FB 8A1C 915A 9216 96F0 6706 71D3 +9934 9959 575F 59A2 5C8E 6C7E 670C 678C 7083 80A6 7F92 86A0 86A1 68A4 68FC 711A 84B6 999A 96AB 58B3 5E69 6FC6 8561 9B75 6A68 71CC 8C6E 9F22 7FB5 9F16 8C76 8F52 @@ -607,8 +616,8 @@ __DATA__ 4E30 98CE 4EF9 51E8 51EC 59A6 6CA3 6CA8 51EE 67AB 5C01 75AF 76FD 781C 98A8 5CEF 5CF0 5051 687B 70FD 5D36 7326 8451 950B 6953 728E 8702 760B 78B8 50FC -7BC8 9137 92D2 6A92 8C50 93E0 9146 5BF7 7043 8634 -973B 882D 974A 98CC 9EB7 +7BC8 9137 92D2 6A92 95CF 8C50 93E0 9146 5BF7 7043 +8634 973B 882D 974A 98CC 9EB7 51AF 5906 6340 6D72 9022 5838 99AE 6453 6F28 7D98 8242 8BBD 8982 552A 8AF7 @@ -645,6 +654,7 @@ __DATA__ 8D59 7DEE 8567 875C 876E 8CE6 99D9 5B14 7E1B 8F39 9B92 8CFB 9351 9362 9CC6 8986 99A5 9C12 592B 752B 5490 88B1 915C 5085 6928 8984 79A3 9BB2 +FDD0-0047 65EE 5477 560E 5620 9486 5C1C 5676 9337 5C15 738D @@ -695,12 +705,11 @@ __DATA__ 9CA0 9ABE 9BC1 66F4 5829 6685 63B6 6929 -603E 5DE5 5F13 516C 53B7 529F 653B 675B 4F9B 739C 7CFC 80B1 5BAB 5BAE 606D 8EAC 9F9A 5311 5868 5E4A 6129 89E5 8EB3 7195 78BD 9AF8 89F5 9F8F 9F94 5EFE 5DE9 6C5E 62F1 62F2 6831 73D9 8F01 92DB 978F -5171 8D21 7FBE 551D 8CA2 +5171 8D21 7FBE 551D 8CA2 83BB 86A3 6150 52FE 4F5D 6C9F 94A9 88A7 7F11 920E 6E9D 9264 7DF1 8920 7BDD 97B2 97DD @@ -766,6 +775,7 @@ __DATA__ 873E 88F9 8F20 9301 991C 9439 8FC7 904E 556F +FDD0-0048 54C8 94EA 86E4 5964 @@ -780,7 +790,7 @@ __DATA__ 9097 542B 90AF 51FD 5481 80A3 51FE 8677 5505 5705 5A22 6D5B 5D21 6657 6892 6DB5 7113 7400 5BD2 5D45 97E9 751D 7B68 872C 6F8F 92E1 9B7D 97D3 -5388 7F55 6D6B 558A 850A 961A 8C43 9B2B +4E06 5388 7F55 6D6B 558A 850A 961A 8C43 9B2B 6C49 5C7D 6C57 95EC 65F1 5CBE 54FB 57BE 608D 634D 6D86 7302 839F 6658 6665 710A 83E1 91EC 9588 7694 7745 50BC 86FF 9894 99AF 6496 6F22 872D 8C8B 66B5 @@ -810,8 +820,8 @@ __DATA__ 7BD5 7FEE 879B 9B7A 7909 95D4 97A8 9F55 8988 9DA1 76AC 9449 9FA2 4F6B 578E 8D3A 8894 7103 8CC0 55C3 7142 788B 7187 -8910 8D6B 9E64 7FEF 58D1 764B 8B1E 7200 9DAE 9DB4 -974E 9E16 974F +8910 8D6B 9E64 7A52 7FEF 58D1 764B 8B1E 7200 9DAE +9DB4 974E 9E16 974F 7CAD 974D 9ED2 9ED1 563F 6F76 62EB 75D5 978E @@ -823,7 +833,6 @@ __DATA__ 583C 6DA5 9D46 5677 -4E4A 53FF 543D 544D 7074 8F70 54C4 8A07 70D8 8EE3 63C8 6E39 7122 7861 8C3E 85A8 8F37 569D 9367 8F5F 4EDC 5F18 5985 7EA2 5430 5B8F 6C6F 7392 7EAE 95F3 @@ -850,11 +859,11 @@ __DATA__ 7E20 879C 9190 9836 89F3 9378 992C 9D60 702B 9B0D 9C17 9D98 9DA6 4E55 6C7B 864E 6D52 4FFF 8400 7425 865D 6EF8 -4E92 5F16 6236 6237 6238 51B1 51B4 8290 5E0D 62A4 -6C8D 6CAA 5CB5 6019 623D 6608 6791 6018 795C 7B0F -5A5F 6248 74E0 695B 55C0 7D94 9120 96FD 5AED 5AEE -6462 6EEC 8530 69F4 71A9 9CF8 7C04 9359 569B 9E71 -8B77 9CE0 97C4 9800 9C6F 9E0C +4E65 4E92 5F16 6236 6237 6238 51B1 51B4 8290 5E0D +62A4 6C8D 6CAA 5CB5 6019 623D 6608 6791 6018 795C +7B0F 5A5F 6248 74E0 695B 55C0 7D94 9120 96FD 5AED +5AEE 6462 6EEC 8530 69F4 71A9 9CF8 7C04 9359 569B +9E71 8B77 9CE0 97C4 9800 9C6F 9E0C 4E4E 7C90 552C 7CCA 933F 9BF1 82B1 82B2 54D7 5629 848A 9335 534E 59E1 9A85 83EF 91EA 91EB 94E7 6ED1 733E 6433 @@ -919,6 +928,7 @@ __DATA__ 6FE9 7372 970D 6AB4 8B0B 77C6 7A6B 956C 56AF 7016 802F 8267 85FF 8816 56BF 66E4 81DB 7668 77D0 944A 9743 +FDD0-004A 4E0C 8BA5 51FB 5209 53FD 9965 4E69 520F 573E 673A 7391 808C 82A8 77F6 9E21 6785 54AD 59EB 8FF9 525E 5527 59EC 5C50 79EF 7B04 98E2 57FA 7EE9 559E 5D46 @@ -1004,8 +1014,8 @@ __DATA__ 6648 70C4 768E 77EB 811A 94F0 6405 6E6B 7D5E 527F 656B 6E6C 714D 8173 8CCB 50E5 6477 669E 8E0B 9278 9903 510C 528B 5FBA 649F 64B9 96A6 5FBC 61BF 657D -657F 7F34 66D2 74AC 77EF 76A6 87DC 7E73 8B51 5B42 -652A 705A 9C4E +657F 71DE 7F34 66D2 74AC 77EF 76A6 87DC 7E73 8B51 +5B42 652A 705A 9C4E 53EB 544C 5CE4 630D 8A06 73D3 7A8C 8F7F 8F83 654E 6559 7A96 6ED8 8F03 5602 5626 65A0 6F16 9175 564D 5DA0 6F50 566D 5B13 7365 85E0 8DAD 8F4E 91AE 8B65 @@ -1106,6 +1116,7 @@ __DATA__ 5441 4FCA 90E1 9656 57C8 5CFB 6343 6D5A 9982 9A8F 6659 710C 73FA 68DE 756F 7AE3 5101 7B98 7B9F 8720 5BEF 61CF 9915 71C7 6FEC 99FF 9D54 9D58 6508 651F +FDD0-004B 5494 5496 5580 8849 64D6 5361 4F67 80E9 9272 57B0 88C3 @@ -1194,6 +1205,7 @@ __DATA__ 5ED3 9822 9AFA 64F4 6FF6 95CA 979F 61D6 9729 97B9 9B20 97D5 +FDD0-004C 5783 62C9 67C6 7FCB 83C8 641A 908B 65EF 524C 782C 63E6 78D6 5587 85DE @@ -1211,8 +1223,8 @@ __DATA__ 7046 7C43 7E7F 862D 6595 6B04 7937 8974 56D2 7061 7C63 6B17 8B95 8E9D 9484 97CA 89C8 6D68 63FD 7F06 6984 6F24 7F71 9182 58C8 61D2 -89A7 64E5 5B3E 61F6 5B44 89BD 5B4F 652C 7060 6B16 -9872 7E9C +89A7 64E5 5B3E 61F6 5B44 89BD 5B4F 652C 7060 56D5 +6B16 9872 7E9C 70C2 6EE5 71D7 5682 6FEB 7201 721B 74D3 7224 946D 7CF7 7226 897D @@ -1300,7 +1312,7 @@ __DATA__ 5AFD 5BEE 5D9A 5D9B 6579 7360 7F2D 907C 66B8 71CE 7499 81AB 7642 9E69 5C6A 5EEB 7C1D 7E5A 87DF 8C42 8CFF 8E58 9410 9ACE 85D4 98C9 9DEF -948C 91D5 911D 84FC 61AD 77AD 957D 7212 +53FE 948C 91D5 911D 84FC 61AD 77AD 66E2 957D 7212 5C25 5C26 7093 6599 5C1E 5ED6 6482 7AB7 9563 720E 5217 52A3 51BD 52BD 59F4 6312 6D0C 8322 8FFE 54F7 @@ -1399,13 +1411,14 @@ __DATA__ 8BBA 6EA3 8AD6 78EE 7F57 5570 9831 56C9 -7321 8136 841D 903B 6924 8161 8999 9523 7BA9 9AA1 -9559 87BA 7F85 89B6 93CD 5138 89BC 9A3E 651E 7380 -863F 908F 6B0F 9A58 9E01 7C6E 947C 9960 +7F56 7321 8136 841D 903B 6924 8161 8999 9523 7BA9 +9AA1 9559 87BA 7F85 89B6 93CD 5138 89BC 9A3E 651E +7380 863F 908F 6B0F 9A58 9E01 7C6E 947C 9960 5246 502E 84CF 88F8 8EB6 7630 8803 81DD 66EA 7673 6CFA 5CC8 6D1B 7EDC 8366 9A86 6D1C 73DE 7866 7B3F 7D61 843D 55E0 645E 6F2F 7296 927B 96D2 99F1 9BA5 9D3C 9D45 6FFC 7E99 +FDD0-004D 5463 5988 5B56 5ABD 5B24 5B37 9EBB 75F2 8534 7298 87C7 @@ -1497,7 +1510,7 @@ __DATA__ 910D 5AC7 6E9F 733D 84C2 669D 69A0 9298 9CF4 7791 879F 89AD 4F72 59F3 51D5 614F 9169 -547D 8A7A +547D 6927 8A7A 63B5 8C2C 8B2C 6478 @@ -1506,11 +1519,11 @@ __DATA__ 5298 995D 62B9 61E1 672B 52B0 573D 59BA 5E13 6B7E 6B7F 6B81 6CAB 8309 -964C 5E1E 6629 67BA 768C 771C 773F 781E 79E3 8388 -83AB 773D 7C96 7D48 6E50 86E8 8C83 55FC 587B 5BDE -6F20 734F 84E6 8C8A 66AF 9286 977A 5AFC 9ED9 763C -7790 7799 9546 9B69 58A8 9ED8 700E 8B29 8C98 85E6 -87D4 93CC 7205 9A40 7933 7E86 8031 +964C 5E1E 6629 67BA 551C 768C 771C 773F 781E 79E3 +8388 83AB 773D 7C96 7D48 6E50 86E8 8C83 55FC 587B +5BDE 6F20 734F 84E6 8C8A 66AF 9286 977A 5AFC 9ED9 +763C 7790 7799 9546 9B69 58A8 9ED8 700E 8B29 8C98 +85E6 87D4 93CC 7205 9A40 7933 7E86 8031 5E85 603D 5C1B 9B79 9EBF 54DE 725F 4F94 52BA 6048 6D20 7738 8C0B 86D1 7F2A 8E0E @@ -1519,10 +1532,11 @@ __DATA__ 6BEA 6C01 58B2 6BCD 4EA9 7261 5776 59C6 5CD4 7273 7546 7552 80DF 755D 755E 782A 756E 9267 8E07 -6728 4EEB 76EE 6C90 72C7 7091 7267 82DC 6BE3 83AF -869E 94BC 52DF 96EE 5893 5E55 5E59 6154 6958 7766 -926C 6155 66AE 8252 9702 7A46 7E38 97AA +6728 4EEB 6730 76EE 6C90 72C7 7091 7267 82DC 6BE3 +83AF 869E 94BC 52DF 96EE 5893 5E55 5E59 6154 6958 +7766 926C 6155 66AE 8252 9702 7A46 7E38 97AA 51E9 62C7 +FDD0-004E 55EF 62CF 62FF 6310 55F1 954E 93BF 4E78 54EA 96EB @@ -1620,6 +1634,7 @@ __DATA__ 6A60 8BFA 558F 63BF 903D 611E 6426 9518 643B 6992 7A2C 8AFE 8E43 7CD1 61E6 61E7 7CE5 7A64 7CEF +FDD0-004F 5594 5662 54E6 7B7D @@ -1629,6 +1644,7 @@ __DATA__ 5418 5455 5076 8162 5614 8026 8545 85D5 6004 616A 85F2 +FDD0-0050 5991 7685 8DB4 8225 556A 8469 6777 722C 63B1 7436 7B62 6F56 5E0A 5E15 6015 8899 @@ -1640,7 +1656,7 @@ __DATA__ 723F 6D00 76D8 8DD8 5ABB 5E4B 84B0 642B 69C3 76E4 78D0 7E0F 78FB 8E52 700A 87E0 8E63 939C 97B6 51B8 5224 6C9C 62DA 6CEE 708D 53DB 7249 76FC 7554 -88A2 8A4A 6EBF 9816 92EC 897B 947B +8041 88A2 8A4A 6EBF 9816 92EC 897B 947B 9D65 4E53 6C97 80EE 96F1 6EC2 8196 9736 5390 5E9E 5396 9004 65C1 823D 5ACE 5FAC 8783 9CD1 @@ -1672,7 +1688,6 @@ __DATA__ 6367 6DCE 768F 527B 63BD 692A 78B0 8E2B 7BF7 -95CF 4E15 4F13 4F3E 6279 7EB0 90B3 576F 62AB 62B7 708B 72C9 7812 6082 79DB 79E0 7D15 94CD 65C7 7FCD 801A 8C7E 9208 921A 9239 925F 9294 5288 78C7 99D3 9AEC @@ -1720,7 +1735,6 @@ __DATA__ 5256 5A1D 6294 6299 634A 638A 88D2 7B81 9307 5485 54E3 5A44 7283 5ECD -54DB 4EC6 6534 6251 9660 5657 64B2 6F7D 64C8 9BC6 530D 8386 812F 83E9 83D0 8461 84B1 84B2 50D5 917A 58A3 735B 749E 6FEE 77A8 7A59 9564 8965 7E80 93F7 @@ -1728,6 +1742,7 @@ __DATA__ 6C06 6A8F 9568 8B5C 8E7C 9420 94FA 8216 8217 92EA 7011 66DD 5DEC 5DED 99C7 8D0C +FDD0-0051 4E03 8FC9 6C8F 59BB 67D2 501B 51C4 6816 6864 90EA 5A38 60BD 687C 6DD2 840B 6532 671F 68F2 6B3A 86E3 50DB 5601 617D 69BF 6F06 7DC0 617C 69ED 8AC6 8AFF @@ -1842,8 +1857,8 @@ __DATA__ 77BF 9F29 8627 5FC2 7048 6235 6B0B 6C0D 7C67 81DE 766F 8837 8862 8EA3 883C 947A 9E1C 53D6 7AD8 5A36 8A53 7AEC 877A 9F8B 9F72 -53BA 53BB 521E 547F 801D 9612 89D1 8DA3 95B4 9EAE -95C3 89B7 9F01 +53BA 53BB 521E 547F 551F 801D 9612 89D1 8DA3 95B4 +9EAE 95C3 89B7 9F01 8FF2 8850 5CD1 5F2E 606E 609B 5708 570F 68EC 99E9 9409 5168 6743 4F7A 8BE0 59FE 6CC9 6D24 8343 62F3 7277 @@ -1862,8 +1877,7 @@ __DATA__ 9E4A 9D72 590B 56F7 5CEE 9021 5BAD 5E2C 88D9 7FA3 7FA4 88E0 -513F 5152 -7F56 +FDD0-0052 5465 80B0 887B 8887 86A6 88A1 86BA 7136 9AE5 562B 9AEF 71C3 7E4E 5184 5189 59CC 82D2 67D3 73C3 5AA3 6A6A @@ -1922,10 +1936,11 @@ __DATA__ 53D2 82E5 504C 5F31 9100 6E03 712B 6949 84BB 7BAC 7BDB 7207 9C19 9C2F 9DB8 5D76 +FDD0-0053 4EE8 6331 6332 6492 6D12 8A2F 9778 6F75 7051 8EA0 5345 6CE7 98D2 810E 8428 9212 644B 99BA 98AF 85A9 -6AD2 +6AD2 8644 96A1 6BE2 6122 63CC 585E 6BF8 816E 567B 9CC3 984B 9C13 55EE 8D5B 50FF 8CFD 7C3A @@ -2003,8 +2018,8 @@ __DATA__ 613C 614E 6939 7606 7F67 8703 8704 6EF2 92E0 762E 5814 698A 9C30 5347 751F 9629 544F 58F0 6598 6607 6CE9 72CC 82FC -6B85 7272 73C4 965E 9679 7B19 6E66 713A 7525 924E -8072 9F2A 9D7F +680D 6B85 7272 73C4 965E 9679 7B19 6E66 713A 7525 +924E 8072 9F2A 9D7F 7EF3 61B4 7E69 8B5D 7701 771A 5057 6E3B 5723 80DC 6660 5270 76DB 5269 52DD 8CB9 5D4A 741E @@ -2123,8 +2138,9 @@ __DATA__ 7463 8928 7485 9388 938D 9396 93BB 93C1 9024 6EB9 8736 7411 55E6 +FDD0-0054 4ED6 5B83 5979 7260 7942 8DBF 94CA 584C 6999 6EBB -891F 5683 +891F 5683 95E7 8E79 5854 6E9A 5896 736D 9CCE 737A 9C28 4EA3 62D3 631E 72E7 95FC 5D09 6DBE 6428 8DF6 905D @@ -2253,6 +2269,7 @@ __DATA__ 9C16 67DD 6BE4 553E 841A 8DC5 6BFB 7BA8 8600 7C5C 9A7C 99DD +FDD0-0057 7A75 52B8 6316 6D3C 5A32 7556 7A8A 5AA7 55D7 86D9 6432 6E9B 6F25 7AAA 9F03 6528 5A03 @@ -2261,7 +2278,7 @@ __DATA__ 5C72 74F2 54C7 6B6A 558E 7AF5 5D34 -5916 9861 +5916 591E 9861 5F2F 525C 5A60 5E35 5846 6E7E 873F 6F6B 8C4C 5F4E 58EA 7063 4E38 5213 6C4D 7EA8 8284 5B8C 5C8F 628F 73A9 7D08 @@ -2288,12 +2305,12 @@ __DATA__ 84F6 912C 6F59 6F7F 78D1 9180 6FF0 934F 95C8 9BA0 7653 89B9 72A9 973A 6B08 5383 4F1F 4F2A 5C3E 7EAC 829B 82C7 59D4 709C 73AE -6D27 5A13 6D58 8371 8BFF 5049 507D 5D23 68B6 75CF -784A 9AA9 5D54 5FAB 6107 7325 8466 848D 9AAA 9AAB -6690 6932 7152 744B 75FF 8172 8249 97EA 50DE 64B1 -78C8 9C94 5BEA 7DEF 853F 8AC9 8E13 97D1 9820 85B3 -5130 6FFB 9361 9BAA 58DD 7022 97D9 98B9 97E1 8624 -6596 +6D27 5A13 5C57 6D58 8371 8BFF 5049 507D 5D23 68B6 +75CF 784A 9AA9 5D54 5FAB 6107 7325 8466 848D 9AAA +9AAB 6690 6932 7152 744B 75FF 8172 8249 97EA 50DE +64B1 78C8 9C94 5BEA 7DEF 853F 8AC9 8E13 97D1 9820 +85B3 5130 6FFB 9361 9BAA 58DD 7022 97D9 98B9 97E1 +8624 6596 536B 4E3A 672A 4F4D 5473 82FF 70BA 754F 80C3 53DE 8ECE 5C09 83CB 8C13 5582 5AA6 6E2D 7232 715F 78A8 851A 873C 6170 71AD 729A 7DED 885B 61C0 748F 7F7B @@ -2302,9 +2319,9 @@ __DATA__ 6364 7140 732C 589B 7E05 875F 5DB6 6637 586D 6E29 6985 6B9F 6EAB 7465 8F92 761F 8570 8C71 8F3C 8F40 9CC1 97B0 9C1B 9C2E -6587 5F63 7EB9 82A0 7086 739F 95FB 7D0B 8689 868A -73F3 960C 741D 96EF 7612 805E 99BC 9B70 9CFC 9D0D -87A1 95BA 95BF 87C1 95C5 9F24 95E6 +5301 6587 5F63 7EB9 82A0 7086 739F 95FB 7D0B 8689 +868A 73F3 960C 741D 96EF 7612 805E 99BC 9B70 9CFC +9D0D 87A1 95BA 95BF 87C1 95C5 9F24 95E6 520E 543B 5FDF 6286 5461 80B3 7D0A 687D 8117 7A33 7A4F 7A69 95EE 598F 6C76 83AC 554F 6E02 63FE 6435 9850 74BA @@ -2336,6 +2353,7 @@ __DATA__ 9E5C 907B 92C8 7AB9 971A 9F3F 9727 9F40 8601 9A16 9DA9 4E44 52A1 4F0D 52D9 933B +FDD0-0058 5915 516E 5438 5FDA 6271 6C50 8980 5E0C 6278 5365 6614 6790 7A78 80B8 80B9 4FD9 5F86 6038 6053 90D7 997B 550F 595A 5C56 6095 6C25 6D60 727A 72F6 8383 @@ -2362,8 +2380,8 @@ __DATA__ 856E 89A4 6231 9ED6 6232 78F6 8669 993C 9B29 7E6B 56B1 95DF 973C 5C6D 884B 897F 606F 6E13 6A72 72A0 7902 9BD1 -75A8 867E 8C3A 5084 9595 7146 7175 98AC 778E 8766 -9C15 +8672 75A8 867E 8C3A 5084 9595 7146 7175 98AC 778E +8766 9C15 5323 4FA0 72CE 4FE0 5CE1 67D9 70A0 72ED 965C 5CFD 70DA 72F9 73E8 796B 7856 7FC8 823A 967F 7864 9050 656E 6687 7455 7B6A 821D 78AC 8F96 78CD 7E00 8578 @@ -2477,8 +2495,8 @@ __DATA__ 8053 7D9A 84FF 5405 8F69 660D 5BA3 5F32 8ED2 688B 8C16 55A7 5847 5A97 6103 610B 63CE 8431 8432 6684 714A 7444 84D2 -777B 5107 79A4 7BAE 7FE7 8756 92D7 61C1 857F 8AE0 -8AFC 9379 99FD 77CE 7FFE 85FC 8610 8809 8B5E +777B 5107 79A4 7BAE 7E07 7FE7 8756 92D7 61C1 857F +8AE0 8AFC 9379 99FD 77CE 7FFE 85FC 8610 8809 8B5E 7384 73B9 75C3 60AC 65CB 7401 8701 5AD9 6F29 66B6 7487 6A88 74BF 61F8 54BA 9009 6645 70DC 9078 9848 7663 766C @@ -2503,6 +2521,7 @@ __DATA__ 900A 6B89 8A0A 8A19 595E 5DFD 6BBE 7A04 905C 613B 8CD0 5640 6F60 8548 9D55 720B 9868 9442 8BAD 8A13 5691 +FDD0-0059 4E2B 5727 538B 5416 5E98 62BC 6792 57AD 9E26 6860 9E2D 57E1 5B72 690F 9D09 930F 9D28 58D3 9D76 941A 7259 4F22 5391 5C88 82BD 5393 73A1 740A 7B0C 869C @@ -2572,10 +2591,10 @@ __DATA__ 9923 5688 64EB 66D7 77B8 9371 64EA 7217 790F 9391 9941 9D7A 9437 9768 9A5C 9E08 7237 4EAA 723A -4E00 5F0C 4F0A 8863 533B 541A 58F1 4F9D 794E 54BF -6D22 6098 7317 90FC 94F1 58F9 63D6 6B39 86DC 7995 -5ADB 6F2A 7A26 92A5 5B04 566B 5901 747F 9E65 7E44 -6AB9 6BC9 91AB 9EDF 8B69 9DD6 9EF3 +4E00 4E4A 5F0C 4F0A 8863 533B 541A 58F1 4F9D 794E +54BF 6D22 6098 7317 90FC 94F1 58F9 63D6 6B39 86DC +7995 5ADB 6F2A 7A26 92A5 5B04 566B 5901 747F 9E65 +7E44 6AB9 6BC9 91AB 9EDF 8B69 9DD6 9EF3 4E41 4EEA 531C 572F 5937 8FC6 519D 5B90 6C82 8BD2 4F87 6021 6CB6 72CB 886A 8FE4 9974 54A6 59E8 5CD3 605E 62F8 67C2 73C6 74F5 8D3B 8FFB 5BA7 5DF8 5F2C @@ -2595,20 +2614,20 @@ __DATA__ 9A7F 4FCB 5955 5E1F 5E20 5F08 67BB 6D02 6D42 73B4 75AB 7FBF 8875 8F76 5508 57BC 6092 6339 6359 6827 683A 6B2D 6D65 6D73 76CA 88A3 8C0A 966D 52DA 57F6 -57F8 60A5 639C 6BB9 7570 7F9B 7FCA 7FCC 8A32 8A33 -8C59 8C5B 9038 91F4 96BF 5E46 6561 6679 68ED 6B94 -6E59 7132 86E1 8A4D 8DC7 8EFC 9220 9AAE 4E84 517F -610F 6EA2 7348 75EC 776A 7AE9 7F22 7FA9 8084 88D4 -88DB 8A63 52E9 5AD5 5ED9 698F 6F69 7617 8189 84FA -8734 977E 99C5 5104 648E 69F8 6BC5 71A0 -71A4 71BC 761E 8ABC 9552 9E5D 9E62 9ED3 5293 571B -58BF 5B11 5B1F 5DA7 61B6 61CC 66C0 6BAA 6FBA 71DA -7631 7796 7A53 7E0A 8257 858F 87A0 8939 5BF1 6581 -66CE 6A8D 6B5D 71E1 71F1 7FF3 7FFC 81C6 8CF9 9BA8 -7654 85D9 85DD 8D00 93B0 9571 7E76 7E79 8C77 972C -9BE3 9D82 9D83 7037 8619 8B6F 8B70 91B3 91B7 9950 -56C8 943F 9DC1 9DCA 61FF 897C 9A5B 9DE7 8649 9DFE -8B9B 9F78 +57F8 60A5 639C 6BB9 7570 785B 7F9B 7FCA 7FCC 8A32 +8A33 8C59 8C5B 9038 91F4 96BF 5E46 6561 6679 68ED +6B94 6E59 7132 86E1 8A4D 8DC7 8EFC 9220 9AAE 4E84 +517F 610F 6EA2 7348 75EC 776A 7AE9 7F22 7FA9 8084 +88D4 88DB 8A63 52E9 5AD5 5ED9 698F 6F69 7617 8189 +84FA 8734 977E 99C5 5104 648E 69F8 6BC5 +71A0 71A4 71BC 761E 8ABC 9552 9E5D 9E62 9ED3 5293 +571B 58BF 5B11 5B1F 5DA7 61B6 61CC 66C0 6BAA 6FBA +71DA 7631 7796 7A53 7E0A 8257 858F 87A0 8939 5BF1 +6581 66CE 6A8D 6B5D 71E1 71F1 7FF3 7FFC 81C6 8CF9 +9BA8 7654 85D9 85DD 8D00 93B0 9571 7E76 7E79 8C77 +972C 9BE3 9D82 9D83 7037 8619 8B6F 8B70 91B3 91B7 +9950 56C8 943F 9DC1 9DCA 61FF 897C 9A5B 9DE7 8649 +9DFE 8B9B 9F78 8FB7 5307 8864 5B9C 7569 841F 692C 9D8D 7C4E 56D9 56E0 9625 9634 4F8C 5794 59FB 6D07 8335 836B 97F3 9A83 6836 6BB7 6C24 9670 51D0 79F5 88C0 94DF @@ -2672,11 +2691,11 @@ __DATA__ 7AFD 8201 8330 5A1B 5A2F 5A31 6859 72F3 8C00 9151 9980 6E14 8438 9685 96E9 9B5A 5823 582C 5D33 5D4E 5D5B 6109 63C4 6970 6E1D 6E61 756D 7862 8174 842E -903E 9AAC 611A 6961 6986 6B48 724F 745C 8245 865E -89CE 6F01 776E 7AAC 8206 8915 6B76 7FAD 854D 8753 -8ADB 96D3 9918 5B29 6F9E 89A6 8E30 6B5F 74B5 87B8 -8F3F 935D 8B23 9AC3 9BBD 65DF 7C45 9A1F 861B 9C05 -9DE0 9E06 +903E 9AAC 611A 65D5 6961 6986 6B48 724F 745C 8245 +865E 89CE 6F01 776E 7AAC 8206 8915 6B76 7FAD 854D +8753 8ADB 96D3 9918 5B29 6F9E 89A6 8E30 6B5F 74B5 +87B8 8F3F 935D 8B23 9AC3 9BBD 65DF 7C45 9A1F 861B +9C05 9DE0 9E06 4E0E 4E88 4F1B 5B87 5C7F 7FBD 96E8 4FC1 4FE3 79B9 8BED 5704 5CFF 7964 504A 532C 5709 5EBE 6554 9105 659E 842D 50B4 5BD9 6940 7440 7610 8207 8A9E 7AB3 @@ -2687,14 +2706,14 @@ __DATA__ 7821 94B0 9884 5590 57DF 5809 6086 60D0 6B32 6DE2 6DEF 8C15 9033 9608 5585 55A9 55BB 5A80 5BD3 5EBD 5FA1 68DB 68DC 68EB 7134 7419 77DE 7872 88D5 9047 -98EB 99AD 9E46 6108 6EEA 715C 7A22 7F6D 84AE 84E3 -8A89 923A 9810 5AD7 5D8E 622B 6BD3 7344 7609 7DCE -871F 872E 8F0D 9289 564A 617E 6F4F 7A36 84F9 8581 -8C6B 9079 92CA 9CFF 6FA6 71CF 71E0 8577 8AED 9325 -95BE 9D25 9D2A 5125 7907 79A6 9B4A 9E6C 7652 7916 -791C 7A65 7BFD 7E58 91A7 9D52 6AF2 9947 8B7D 8F5D -942D 9731 6B0E 9A48 9B3B 7C5E 9C4A 9DF8 9E12 6B1D -9FA5 8EC9 9B30 9B31 706A 7C72 7229 +98EB 99AD 9E46 6108 6EEA 715C 7A22 7F6D 8248 84AE +84E3 8A89 923A 9810 5AD7 5D8E 622B 6BD3 7344 7609 +7DCE 871F 872E 8F0D 9289 564A 617E 6F4F 7A36 84F9 +8581 8C6B 9079 92CA 9CFF 6FA6 71CF 71E0 8577 8AED +9325 95BE 9D25 9D2A 5125 7907 79A6 9B4A 9E6C 7652 +7916 791C 7A65 7BFD 7E58 91A7 9D52 6AF2 9947 8B7D +8F5D 942D 9731 6B0E 9A48 9B3B 7C5E 9C4A 9DF8 9E12 +6B1D 9FA5 8EC9 9B30 9B31 706A 7C72 7229 6327 8362 6F9A 9BF2 56E6 9E22 5248 51A4 6081 7722 9E33 5BC3 6E01 6E06 6E0A 6E15 60CC 6DF5 847E 68E9 84AC 870E 88F7 9E53 @@ -2727,6 +2746,7 @@ __DATA__ 904B 614D 816A 97EB 97F5 7185 71A8 7DF7 7DFC 8574 8580 9196 919E 992B 85F4 97D7 97DE 860A 97FB 62A3 7E67 +FDD0-005A 5E00 531D 6C9E 8FCA 5482 62F6 7D25 7D2E 9254 9B73 81DC 81E2 6742 7838 507A 5592 97F4 96D1 5DBB 78FC 894D 96DC @@ -2838,12 +2858,12 @@ __DATA__ 9D44 7E54 8635 9F05 6267 4F84 59B7 76F4 59EA 5024 503C 8040 91DE 57F4 57F7 6DD4 804C 8CAD 690D 6B96 7286 7983 7D77 8901 -8DD6 74E1 9244 588C 646D 99BD 5B02 6179 6F10 8E2F -6A34 81B1 5128 7E36 8077 87D9 8E60 8EC4 8E91 +8DD6 55ED 74E1 9244 588C 646D 99BD 5B02 6179 6F10 +8E2F 6A34 81B1 5128 7E36 8077 87D9 8E60 8EC4 8E91 5902 6B62 53EA 52A7 65E8 962F 5740 5741 5E0B 627A -6C66 6C9A 7EB8 82B7 62A7 7949 54AB 6049 6307 67B3 -6D14 780B 8879 8F75 6DFD 75BB 7D19 8A28 8DBE 8EF9 -9EF9 916F 85E2 8967 +6C66 6C9A 7EB8 82B7 603E 62A7 7949 54AB 6049 6307 +67B3 6D14 780B 8879 8F75 6DFD 75BB 7D19 8A28 8DBE +8EF9 9EF9 916F 85E2 8967 9624 81F3 8296 5FD7 5FEE 627B 8C78 5236 5394 5781 5E19 5E1C 6CBB 7099 8D28 8FE3 90C5 5CD9 5EA2 5EA4 6303 67E3 6809 6D37 7951 965F 5A21 5F8F 631A 664A @@ -2921,19 +2941,19 @@ __DATA__ 7C71 5285 7AA7 -5B5C 830A 5179 54A8 59D5 59FF 8332 6825 7386 7D0E -8D40 8D44 6DC4 79F6 7F01 8C18 55DE 5B73 5D6B 6914 -6E7D 6ECB 7CA2 8458 8F8E 9111 5B76 798C 89DC 8A3E -8CB2 8CC7 8D91 9531 7A35 7DC7 922D 9543 9F87 8F1C -9F12 6FAC 8AEE 8DA6 8F3A 9319 9AED 9CBB 937F 93A1 -74BE 983F 983E 9BD4 9D85 9F4D 9C26 +4E72 5B5C 830A 5179 54A8 59D5 59FF 8332 6825 7386 +7D0E 8D40 8D44 6DC4 79F6 7F01 8C18 55DE 5B73 5D6B +6914 6E7D 6ECB 7CA2 8458 8F8E 9111 5B76 798C 89DC +8A3E 8CB2 8CC7 8D91 9531 7A35 7DC7 922D 9543 9F87 +8F1C 9F12 6FAC 8AEE 8DA6 8F3A 9319 9AED 9CBB 937F +93A1 74BE 983F 983E 9BD4 9D85 9F4D 9C26 84FB 4ED4 5407 59C9 59CA 674D 77F7 79C4 80CF 5470 79ED 7C7D 8014 8678 7B2B 6893 91E8 5559 7D2B 6ED3 8A3F 699F 5B57 81EA 8293 8321 5033 525A 6063 7278 6E0D 7725 7726 80D4 80FE 6F2C -5B50 5D30 55ED 6A74 +5B50 5D30 6A74 5B97 5027 7EFC 9A94 582B 5D4F 5D55 60FE 68D5 7323 8159 847C 6721 6936 5D78 7A2F 7D9C 7DC3 71A7 7DF5 7FEA 876C 8E28 8E2A 78EB 9350 8C75 8E64 9A0C 9B03 diff --git a/cpan/Unicode-Collate/Collate/CJK/Stroke.pm b/cpan/Unicode-Collate/Collate/CJK/Stroke.pm index b96aaee3cf..eeb4c07bd0 100644 --- a/cpan/Unicode-Collate/Collate/CJK/Stroke.pm +++ b/cpan/Unicode-Collate/Collate/CJK/Stroke.pm @@ -3,7 +3,7 @@ package Unicode::Collate::CJK::Stroke; use 5.006; use strict; -our $VERSION = '0.76'; +our $VERSION = '0.85'; my %u2p; my $wt = 0x8000; @@ -13,7 +13,7 @@ while (<DATA>) { my @c = split; for my $c (@c) { next if !$c; - $u2p{hex($c)} = $wt; + $u2p{hex($c)} = $wt if $c !~ /-/; $wt++; } } @@ -25,15 +25,18 @@ sub weightStroke { 1; __DATA__ +FDD0-2801 4E00 4E28 4E36 4E3F 4E40 4E41 2E84 4E59 4E5A 4E5B 200CA 200CB 200CC 200CD 200D1 4E85 2010C 3006 3007 3021 3025 303B +FDD0-2802 4E01 4E02 4E03 4E04 4E05 4E06 4E29 4E37 4E42 4E43 4E44 20086 20087 2008A 4E5C 4E5D 4E86 2010E 4E8C 4EA0 4EBA 4EBB 513F 5165 516B 2E86 5182 5196 51AB 2E87 51E0 51F5 2E88 5200 5201 5202 529B 52F9 5315 531A 5338 5341 2E8A 535C 5369 5382 53B6 2E80 53C8 5DDC 8BA0 2ECF 2ED6 28E0F 3022 3024 3026 +FDD0-2803 4E07 4E08 4E09 4E0A 4E0B 4E0C 4E90 5344 3404 4E2A 4E2B 4E38 4E49 4E45 4E46 4E47 4E48 4E4A 4E5E 4E5F 4E60 4E87 4E8D 4E8E 4E8F 4EA1 4EBC 4EBD 4EBE 4EBF @@ -45,6 +48,7 @@ __DATA__ 5E7A 5E7F 5EF4 5EFE 5F0B 5F13 2E95 5F50 5F51 5F61 5F73 5FC4 624C 624D 6C35 72AD 7E9F 2EBE 8279 2ECC 95E8 961D 98DE 9963 9A6C 3005 3023 3027 +FDD0-2804 4E0D 4E0E 4E0F 4E10 4E11 4E12 4E13 4E2D 4E2E 4E2F 4E30 4E39 4E3A 4E4B 4E4C 5C39 4E63 4E64 4E65 4E66 4E88 4E91 4E92 4E93 4E94 4E95 4E96 4EA2 4EA3 4EC0 @@ -67,6 +71,7 @@ __DATA__ 725B 725C 72AC 738B 248E9 793B 25605 7F53 8002 8080 2EBC 89C1 8BA1 8BA2 8BA3 8BA4 8BA5 8D1D 8F66 2ECD 8FB6 95E9 97E6 98CE 3028 3029 +FDD0-2805 4E17 3400 4E14 4E15 4E16 4E18 4E19 4E1A 4E1B 4E1C 4E1D 4E31 4E3B 4E3C 4E4D 4E4E 4E4F 4E50 20094 4E67 4E97 3430 3431 3432 3433 3434 3435 3436 3437 4ED4 @@ -102,6 +107,7 @@ __DATA__ 8BAB 8BAC 8BAD 8BAE 8BAF 8BB0 8BB1 8F67 8FB7 9092 9093 9485 957F 95EA 961E 961F 9964 9965 9A6D 9E1F 9F99 +FDD0-2806 3401 4E1E 4E1F 4E20 4E21 4E22 4E51 4E52 4E53 4E54 4E68 4E69 4E6A 4E6B 4E6C 4E6D 4E6E 4E6F 4E70 4E89 4E98 4E99 4E9A 342B 4EA4 4EA5 4EA6 4EA7 3438 3439 @@ -161,6 +167,7 @@ __DATA__ 9098 9099 909A 909B 909C 909D 9486 9487 95EB 95EC 95ED 95EE 95EF 9620 9621 9622 9623 9624 9875 9966 9967 9A6E 9A6F 9A70 9F50 +FDD0-2807 4E23 4E24 4E25 4E3D 20021 4E32 20068 4E55 4E71 4E72 4E8A 20118 4E9C 4EA8 4EA9 4EAA 3446 4F2D 4F2E 4F2F 4F30 4F31 4F32 4F33 4F34 4F35 4F36 4F37 4F38 4F39 @@ -260,6 +267,7 @@ __DATA__ 962C 962D 962E 962F 9630 9631 9632 9633 9634 9635 9636 28E36 28E39 97E7 98CF 9968 9969 996A 996B 996C 996D 996E 9A71 9A72 9A73 9A74 9E20 9E21 9EA6 9F9F +FDD0-2808 4E26 4E27 4E33 4E56 4E73 4E74 4E75 4E76 4E77 4E78 200EE 4E8B 4E9B 4E9D 4E9E 4E9F 342D 4EAB 4EAC 4F4C 344C 3450 4F69 4F6A 4F6B 4F6C 4F6D 4F6E 4F6F 4F70 @@ -404,6 +412,7 @@ __DATA__ 9A75 9A76 9A77 9A78 9A79 9A7A 9A7B 9A7C 9A7D 9A7E 9A7F 9A80 9C7C 9E22 9E23 9E24 9EFE 9F21 9F7F +FDD0-2809 4E34 4E3E 4E57 3420 4E79 4E7A 4E7B 4E7C 4EAD 4EAE 4EAF 4EB0 4EB1 4EB2 4FAE 4FAF 4FB0 4FB1 4FB2 4FB3 4FB4 4FB5 4FB6 4FB7 4FB8 4FB9 4FBA 4FBB 4FBC 4FBD @@ -568,6 +577,7 @@ __DATA__ 9975 9976 9977 9978 9979 997A 997B 997C 9996 29810 9999 9A81 9A82 9A83 9A84 9A85 9A86 9A87 9A88 9A89 2EE3 9CEC 9E25 9E26 9E27 9E28 9E29 +FDD0-280A 2003E 4E35 4E58 4E7D 4EB3 3465 3466 4FEE 4FEF 4FF0 4FF1 4FF2 4FF3 4FF4 4FF5 4FF6 4FF7 4FF8 4FF9 4FFA 4FFB 4FFC 4FFD 4FFE 4FFF 5000 5001 5002 5003 5004 @@ -758,6 +768,7 @@ __DATA__ 9A8D 9A8E 9A8F 9AA8 9AD8 9ADF 9B25 9B2F 9B32 9B3C 9C7D 9E2A 9E2B 9E2C 9E2D 9E2E 9E2F 9E30 9E31 9E32 9E33 9E34 9E35 9E36 9F80 +FDD0-280B 3422 4E7E 4E7F 4E80 3464 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 504A 504B 504C 504D 504E 504F 5050 5051 5052 5053 5054 5055 5057 5058 5059 @@ -967,6 +978,7 @@ __DATA__ 9A95 9A96 9AD9 9B5A 9C7E 9CE5 9E37 9E38 9E39 9E3A 9E3B 9E3C 9E3D 9E3E 9E3F 9E75 9E7F 9EA5 9EB8 9EBB 9ED2 9F81 9F9A 9F9B +FDD0-280C 20046 4E81 4EB4 4EB5 5068 3473 347A 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 508A 508B 508C 508D 508E 508F 5090 5091 5092 5093 5094 5095 5096 @@ -1185,6 +1197,7 @@ __DATA__ 9A99 9A9A 9A9B 9AA9 9AE0 9C7F 9C80 9C81 9C82 9C83 9CE6 9E40 9E41 9E42 9E43 9E44 9E45 9E46 9E47 9E48 9EC3 9EC4 9ECD 9ED1 9EF9 9F0B 9F82 +FDD0-280D 4E82 4E83 4E84 4EB6 4EB7 347D 347E 50AA 50AB 50AC 50AD 50AE 50AF 50B0 50B1 50B2 50B3 50B4 50B5 50B6 50B7 50B8 50B9 50BA 50BB 50BC 50BD 50BE 50BF 50C0 @@ -1388,6 +1401,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9E4B 9E4C 9E4D 9E4E 9E4F 9E50 9E51 9E52 9E53 9E54 9E80 9E81 9E82 2A2FF 9EFD 9F0C 9F0E 9F13 9F14 9F20 9F83 9F84 9F85 9F86 +FDD0-280E 2004E 221A1 50CE 50CF 50D0 50D1 50D2 50D3 50D4 50D5 50D6 50D7 50D8 50D9 50DA 50DB 50DC 50DD 50DE 50DF 50E0 50E1 50E2 50E3 50E4 50E5 50E6 50E7 50E8 50E9 @@ -1572,6 +1586,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9C9D 9C9E 9C9F 9CF1 9CF2 9CF3 9CF4 9CF5 9CF6 9E55 9E56 9E57 9E59 9E5A 9E5B 9E5C 9EA7 9EBC 9EBD 9F3B 9F4A 9F87 9F88 +FDD0-280F 3493 3496 3498 50F5 50F6 50F8 50F9 50FA 50FB 50FC 50FD 50FE 50FF 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 510A 510B 510C 510D 510E 510F 203C9 @@ -1757,6 +1772,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9E5E 9E5F 9E60 9E61 9E62 9E63 9E64 9E76 9E83 9E84 2A293 9EA8 9EA9 9EAA 9EAB 9EB9 9EBE 9ECE 58A8 9ED3 9F0F 9F10 9F11 9F51 9F52 9F89 9F8A +FDD0-2810 4EB8 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 511A 511B 511C 511D 511E 512B 203F5 203FC 5163 204FC 5180 51AA 51DD 51DE 20615 5290 5291 5292 5293 @@ -1911,6 +1927,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9E7E 9E85 9E86 9E87 9E88 2A29F 4D34 9EAC 9EAD 9EAE 9EBA 9EC5 9EC6 9ED4 9ED5 9ED6 9ED7 9ED8 9EFA 9F12 9F3C 9F3D 9F53 9F8D 9F9C +FDD0-2811 511F 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 512A 512C 20413 20414 2041F 5132 51DF 20619 2061A 5295 3525 3526 52F4 52F5 52F6 5335 3553 53B3 20B8F @@ -2038,6 +2055,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9EC9 9ECF 9EDA 9EDB 9EDC 9EDD 9EDE 2A434 9EFB 9EFF 9F22 9F23 9F24 9F3E 9F3F 9F4B 2A5C6 9F54 9F62 9F8B 9F8C 9FA0 +FDD0-2812 512D 512E 512F 5131 34AF 204FE 5181 205A5 20AC2 53E2 3609 5694 5695 5696 5697 5698 5699 569A 569B 569C 569D 569E 569F 56A0 56A1 56A2 56A3 56A4 21096 2109D @@ -2133,6 +2151,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9E90 2A2B4 2A2B6 2A2BA 2A2BD 4D36 9EB1 9EB2 9EBF 9ECA 9ECB 9EDF 9EE0 9EE1 9F00 9F01 9F02 9F15 9F16 9F25 9F26 9F27 9F28 9F29 9F2A 9F2B 9F2C 9F4C 9F55 9F8E +FDD0-2813 3426 34A3 5133 5134 5135 5296 52F7 52F8 5336 53B4 58E1 56A5 56A6 56A7 56A8 56A9 56AA 56AB 56AC 56AD 56AF 56B0 210E4 210F4 210F5 210F6 58DA 58DB 58DC 58DD @@ -2217,6 +2236,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9E95 9E96 9E97 9EB3 9EB4 9EC0 4D4C 9EE2 9EE3 9EFC 9F03 9F04 9F17 9F2D 9F40 9F41 9F4D 9F56 9F57 9F58 9F8F 2A6A9 +FDD0-2814 34A5 5136 5337 56B1 56B2 56B3 56B4 56B5 56B6 56B7 56B8 56B9 2112F 56BC 58E3 58E4 58E5 214E8 3736 3737 3738 5B40 5B41 5B42 5B43 5B44 5B45 5B46 21910 21911 @@ -2281,6 +2301,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9EE5 9EE6 9EE7 9EE8 9EE9 9EEA 2A45B 9F0D 9F2E 9F2F 9F30 2A5CB 9F59 9F5A 9F5B 9F5D 9F5E 9F5F 9F60 9F61 9F63 9F91 +FDD0-2815 34A7 5137 5138 5139 513A 5164 5297 5298 2082C 535B 56BA 56BB 56BD 56BE 56BF 56C0 56C1 56C2 56C3 56C4 56CD 2113B 2113D 21145 21148 2114F 58E6 214FD 5914 3739 @@ -2326,6 +2347,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9E7B 9E9C 9E9D 4D4E 9EEB 9EEC 9EED 9EEE 9EEF 9F05 9F18 9F19 9F1A 9F1B 9F31 9F4E 9F5C 9F64 9F65 9F66 9F67 9F68 9F69 2A601 9F92 9F9D 9FA1 +FDD0-2816 4EB9 513B 513C 20465 20979 3618 56C5 56C6 56C7 56C8 56C9 56CA 56CB 56CE 571D 5971 373A 5B4A 5B4B 5B4C 21922 21927 5B7F 5DCE 5DD1 5DD2 5DD3 5DD4 5DD5 5DD7 @@ -2360,6 +2382,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 2A133 2A134 2A150 9DE9 9DF5 9E73 9E74 9E9E 2A2DF 9EB6 9ED0 9EF0 9EF1 9F32 9F33 9F34 9F35 9F42 4D9C 9F6A 9F6B 9F6C 9F93 9F94 9F95 9FA2 +FDD0-2817 513D 5299 529A 20ACD 361A 56CC 56CF 56D0 58E7 58E8 21582 5972 5B4D 5DD6 5DD8 5DDA 5F4F 6200 6201 6203 6204 6523 3A77 6525 6528 6529 652A 652B 6596 3B2E @@ -2387,6 +2410,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9EF2 9EF3 9EF4 9F06 9F07 9F1C 9F36 9F37 9F38 9F39 9F43 9F44 9F4F 9F6D 9F6E 9F6F 9F70 9F71 2A632 +FDD0-2818 513E 204D7 56D1 56D2 56D3 21180 3681 58E9 5B4E 5B4F 5C6D 5DD9 22980 652C 652D 66ED 66EE 6B13 6B14 6B15 705D 705E 705F 7060 7061 7223 74DB 74E5 7671 7672 @@ -2404,6 +2428,7 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 4D09 9DF9 9DFD 9DFE 9DFF 9E00 9E01 9E02 9E03 9E04 9E05 9E06 9E07 9E08 9E09 9E0A 2A1B4 9E7C 9E7D 9EA0 9F1E 9F45 9F46 9F72 9F73 9F74 9F75 9F76 9F77 +FDD0-2819 56D4 56D5 21187 58EA 5EF3 6205 6206 652E 65B8 3B2F 66EF 6B16 6B17 6B18 6B19 6B1A 6B1B 6B1D 7062 7063 7224 7225 7226 72AA 24D06 77D8 77D9 77E1 7939 7C69 @@ -2416,32 +2441,44 @@ FA13 2355A 23567 23595 23599 2359C 3C3C 3C3E 9C6C 9C6D 9C6E 9C6F 29F30 9E0B 9E0C 9E0D 9E0E 9E0F 9E10 9E11 9E12 2A1DF 9EA1 9ECC 9EF5 9F08 9F09 9F1D 9F1F 9F47 9F78 9F79 9F7A 9F7B 2A64A 9FA3 +FDD0-281A 3536 571E 373B 5F60 6B1C 6C0E 704E 7064 7066 24AE2 7673 77DA 7C6F 7C70 25E0E 7CF3 8644 866A 883C 8B9A 8B9B 27E4F 8DB2 8EA6 8EA7 91C3 91C4 9474 9475 9476 9477 9478 9479 947A 28C09 28C1C 28C1D 974A 97C9 4BBE 9A60 9A61 9A62 9A63 9A65 9AD7 9C71 9C72 9C73 9C74 9C75 9C76 9E13 9E14 2A1F5 9EF6 9F0A 2A65B 9FA4 9FA5 +FDD0-281B 7065 7067 7068 24177 2417A 72AB 7CF7 7E9C 7E9D 864A 883D 883E 883F 897D 8B9C 8B9D 8B9E 8C53 8C9C 8EA9 8EAA 8EC9 8F65 91C5 947B 947C 947D 947E 28C23 28C26 974B 974C 974D 974E 9873 9874 98CC 98CD 98DD 9960 9961 99AB 9A64 9A66 9A67 9B24 9B2E 9B30 9C77 9C78 9E15 9E16 9E17 9EF7 9F48 +FDD0-281C 56D6 6207 22EB3 2331F 6B1E 6B1F 7227 24AE9 3FDC 7674 27175 864C 8C54 8EA8 28207 947F 9480 9481 9482 28C2B 28C30 96E7 4BC0 9A68 9A69 9E18 9E19 9E1A 2A220 9EA2 9EF8 9F3A 9F7C 9F7D 9F9E +FDD0-281D 7228 7E9E 864B 8B9F 4979 9483 9484 974F 9A6A 9B31 9C79 9E1B 9E1C 9EB7 +FDD0-281E 53B5 7675 4190 7C71 4585 28C39 97CA 9962 9A6B 29C73 9C7A 9E1D 9E1E 2A233 4D91 +FDD0-281F 7069 275A3 4D10 9EA3 +FDD0-2820 706A 7C72 28C3B 9F96 +FDD0-2821 2193B 7229 9C7B 9EA4 9F97 +FDD0-2823 9F7E +FDD0-2824 9F49 +FDD0-2827 9750 +FDD0-2830 9F98 __END__ diff --git a/cpan/Unicode-Collate/Collate/Locale.pm b/cpan/Unicode-Collate/Collate/Locale.pm index 6e23bff526..67c972ba61 100644 --- a/cpan/Unicode-Collate/Collate/Locale.pm +++ b/cpan/Unicode-Collate/Collate/Locale.pm @@ -4,7 +4,7 @@ use strict; use Carp; use base qw(Unicode::Collate); -our $VERSION = '0.81'; +our $VERSION = '0.85'; use File::Spec; @@ -12,37 +12,65 @@ use File::Spec; my $PL_EXT = '.pl'; my %LocaleFile = map { ($_, $_) } qw( - af ar as az be bg ca cs cy da eo es et fi fil fo fr gu - ha haw hi hr hu hy ig is ja kk kl kn ko kok ln lt lv - mk ml mr mt nb nn nso om or pa pl ro ru se sk sl sq sv - tn to tr uk vi wo yo zh + af ar as az be bg bn ca cs cy da eo es et fa fi fil fo fr + gu ha haw hi hr hu hy ig is ja kk kl kn ko kok ln lt lv + mk ml mr mt nb nn nso om or pa pl ro ru sa se si sk sl sq + sr sv ta te th tn to tr uk ur vi wae wo yo zh ); $LocaleFile{'default'} = ''; # aliases - $LocaleFile{'bs'} = 'hr'; - $LocaleFile{'sr'} = 'ru'; + $LocaleFile{'bs'} = 'hr'; + $LocaleFile{'sr_Latn'} = 'hr'; # short file names $LocaleFile{'de__phonebook'} = 'de_phone'; $LocaleFile{'es__traditional'} = 'es_trad'; $LocaleFile{'fi__phonebook'} = 'fi_phone'; + $LocaleFile{'si__dictionary'} = 'si_dict'; + $LocaleFile{'sv__reformed'} = 'sv_refo'; $LocaleFile{'zh__big5han'} = 'zh_big5'; $LocaleFile{'zh__gb2312han'} = 'zh_gb'; $LocaleFile{'zh__pinyin'} = 'zh_pin'; $LocaleFile{'zh__stroke'} = 'zh_strk'; +my %TypeAlias = qw( + phone phonebook + phonebk phonebook + dict dictionary + reform reformed + trad traditional + big5 big5han + gb2312 gb2312han +); + sub _locale { my $locale = shift; if ($locale) { $locale = lc $locale; $locale =~ tr/\-\ \./_/; - $locale =~ s/_phone(?:bk)?\z/_phonebook/; - $locale =~ s/_trad\z/_traditional/; - $locale =~ s/_big5\z/_big5han/; - $locale =~ s/_gb2312\z/_gb2312han/; + $locale =~ s/_([0-9a-z]+)\z/$TypeAlias{$1} ? + "_$TypeAlias{$1}" : "_$1"/e; $LocaleFile{$locale} and return $locale; - my ($l,$t,$v) = split(/_/, $locale.'__'); - for my $loc ("${l}_${t}_$v", "${l}_$t", "${l}__$v", "${l}__$t", $l) { + my @code = split /_/, $locale; + my $lan = shift @code; + my $scr = @code && length $code[0] == 4 ? ucfirst shift @code : ''; + my $reg = @code && length $code[0] < 4 ? uc shift @code : ''; + my $var = @code ? shift @code : ''; + + my @list; + push @list, ( + "${lan}_${scr}_${reg}_$var", + "${lan}_${scr}__$var", # empty $scr should not be ${lan}__$var. + "${lan}_${reg}_$var", # empty $reg may be ${lan}__$var. + "${lan}__$var", + ) if $var ne ''; + push @list, ( + "${lan}_${scr}_${reg}", + "${lan}_${scr}", + "${lan}_${reg}", + ${lan}, + ); + for my $loc (@list) { $LocaleFile{$loc} and return $loc; } } @@ -93,10 +121,11 @@ locale based CLDR af 2.0 = 1.8.1 ar 2.0 as 2.0 = 1.8.1 -az 2.0 = 1.8.1 +az 2.0 = 1.8.1 (type="standard") be 2.0 bg 2.0 -bs 2.0 +bn 2.0.1 (type="standard") +bs 2.0 (alias source="hr") ca 2.0 = 1.8.1 (alt="proposed" type="standard") cs 2.0 = 1.8.1 (type="standard") cy 2.0 = 1.8.1 @@ -104,8 +133,9 @@ da 2.0 = 1.8.1 (type="standard") [modify aA to pass CLDR tests] de__phonebook 2.0 (type="phonebook") eo 2.0 = 1.8.1 es 2.0 (type="standard") -es__traditional 2.0 = 1.8.1 (type="traditional") +es__traditional 2.0 = 1.8.1 (type="traditional") et 2.0 = 1.8.1 +fa 2.0 = 1.8.1 fi 2.0 = 1.8.1 (type="standard" alt="proposed") fi__phonebook 2.0 = 1.8.1 (type="phonebook") fil 2.0 (type="standard") = 1.8.1 @@ -114,15 +144,15 @@ fr 2.0 (fr_CA, backwards="on") gu 2.0 (type="standard") ha 2.0 haw 2.0 = 1.8.1 -hi 2.0 -hr 2.0 +hi 2.0 (type="standard") +hr 2.0 (type="standard") hu 2.0 = 1.8.1 (alt="proposed" type="standard") hy 2.0 = 1.8.1 ig 2.0 = 1.8.1 -is 2.0 = 1.8.1 +is 2.0 = 1.8.1 (type="standard") ja 2.0 = 1.8.1 (type="standard") kk 2.0 -kl 2.0 = 1.8.1 +kl 2.0 = 1.8.1 (type="standard") kn 2.0 (type="standard") ko 2.0 = 1.8.1 (type="standard") kok 2.0 = 1.8.1 @@ -142,24 +172,34 @@ pa 2.0 = 1.8.1 pl 2.0 = 1.8.1 ro 2.0 (type="standard") ru 2.0 -se -sk -sl -sq -sr -sv -tn -to -tr -uk -vi -wo -yo -zh -zh__big5han -zh__gb2312han -zh__pinyin -zh__stroke +sa 1.8.1 (type="standard" alt="proposed") [currently in /seed] +se 2.0 = 1.8.1 (type="standard") +si 2.0 (type="standard") +si__dictionary 2.0 (type="dictionary") +sk 2.0 (type="standard") +sl 2.0 = 1.8.1 (type="standard" alt="proposed") +sq 2.0 = 1.8.1 (alt="proposed" type="standard") +sr 2.0 (type="standard") +sr_Latn 2.0 = 1.8.1 (alias source="hr") +sv 2.0 (type="standard") +sv__reformed 2.0 = 1.8.1 (type="reformed") +ta 2.0 +te 2.0 +th 2.0 (type="standard") +tn 2.0 = 1.8.1 +to 2.0 = 1.8.1 (type="standard" alt="proposed") +tr 2.0 = 1.8.1 (type="standard") +uk 2.0 +ur 2.0 +vi 2.0 = 1.8.1 +wae 2.0 +wo 1.8.1 [currently in /seed] +yo 2.0 = 1.8.1 +zh 2.0 = 1.8.1 (type="standard") +zh__big5han 2.0 = 1.8.1 (type="big5han") +zh__gb2312han 2.0 = 1.8.1 (type="gb2312han") +zh__pinyin 2.0 (type='pinyin' alt='short') +zh__stroke 2.0 = 1.9.1 (type='stroke' alt='short') ---------------------------------------------------------------------------- =head1 NAME @@ -197,27 +237,29 @@ The C<new> method returns a collator object. A parameter list for the constructor is a hash, which can include a special key C<locale> and its value (case-insensitive) standing -for a two-letter language code (ISO-639) like C<'en'> for English. +for a Unicode base language code (two or three-letter). For example, C<Unicode::Collate::Locale-E<gt>new(locale =E<gt> 'FR')> returns a collator tailored for French. -C<$locale_name> may be suffixed with a territory(country) -code or a variant code, which are separated with C<'_'>. +C<$locale_name> may be suffixed with a Unicode script code (four-letter), +a Unicode region code, a Unicode language variant code. These codes are +case-insensitive, and separated with C<'_'> or C<'-'>. E.g. C<en_US> for English in USA, -C<es_ES_traditional> for Spanish in Spain (Traditional), +C<az_Cyrl> for Azerbaijani in the Cyrillic script, +C<es_ES_traditional> for Spanish in Spain (Traditional). -If C<$localename> is not defined, +If C<$locale_name> is not available, fallback is selected in the following order: - 1. language_territory_variant - 2. language_territory - 3. language__variant + 1. language with a variant code + 2. language with a script code + 3. language with a region code 4. language 5. default Tailoring tags provided by C<Unicode::Collate> are allowed as long as they are not used for C<locale> support. Esp. the C<table> tag -is always untailorable since it is reserved for DUCET. +is always untailorable, since it is reserved for DUCET. E.g. a collator for French, which ignores diacritics and case difference (i.e. level 1), with reversed case ordering and no normalization. @@ -265,13 +307,14 @@ this method returns a string C<'default'> meaning no special tailoring. =head2 A list of tailorable locales locale name description - ---------------------------------------------------------- + -------------------------------------------------------------- af Afrikaans ar Arabic as Assamese az Azerbaijani (Azeri) be Belarusian bg Bulgarian + bn Bengali bs Bosnian ca Catalan cs Czech @@ -282,6 +325,7 @@ this method returns a string C<'default'> meaning no special tailoring. es Spanish es__traditional Spanish ('ch' and 'll' as a grapheme) et Estonian + fa Persian fi Finnish (v and w are primary equal) fi__phonebook Finnish (v and w as separate characters) fil Filipino @@ -318,17 +362,27 @@ this method returns a string C<'default'> meaning no special tailoring. pl Polish ro Romanian ru Russian + sa Sanskrit se Northern Sami + si Sinhala + si__dictionary Sinhala (U+0DA5 = U+0DA2,0DCA,0DA4) sk Slovak sl Slovenian sq Albanian sr Serbian - sv Swedish + sr_Latn Serbian in Latin (tailored as Croatian) + sv Swedish (v and w are primary equal) + sv__reformed Swedish (v and w as separate characters) + ta Tamil + te Telugu + th Thai tn Tswana to Tonga tr Turkish uk Ukrainian + ur Urdu vi Vietnamese + wae Walser wo Wolof yo Yoruba zh Chinese @@ -336,7 +390,7 @@ this method returns a string C<'default'> meaning no special tailoring. zh__gb2312han Chinese (ideographs: GB-2312 order) zh__pinyin Chinese (ideographs: pinyin order) zh__stroke Chinese (ideographs: stroke order) - ---------------------------------------------------------- + -------------------------------------------------------------- Locales according to the default UCA rules include chr (Cherokee), diff --git a/cpan/Unicode-Collate/Collate/Locale/bn.pl b/cpan/Unicode-Collate/Collate/Locale/bn.pl new file mode 100644 index 0000000000..5df56fb9d2 --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/bn.pl @@ -0,0 +1,7 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0982 ; [.1FE8.0020.0002.0982][.FFF1.0000.0000.0000] # BENGALI SIGN ANUSVARA +0983 ; [.1FE8.0020.0002.0983][.FFF2.0000.0000.0000] # BENGALI SIGN VISARGA +0981 ; [.1FE8.0020.0002.0981][.FFF3.0000.0000.0000] # BENGALI SIGN CANDRABINDU +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/fa.pl b/cpan/Unicode-Collate/Collate/Locale/fa.pl new file mode 100644 index 0000000000..c413106150 --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/fa.pl @@ -0,0 +1,49 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +064E ; [.0000.00A2.0002.064E] # ARABIC FATHA +0650 ; [.0000.00A3.0002.0650] # ARABIC KASRA +064F ; [.0000.00A4.0002.064F] # ARABIC DAMMA +064B ; [.0000.00A5.0002.064B] # ARABIC FATHATAN +064D ; [.0000.00A6.0002.064D] # ARABIC KASRATAN +064C ; [.0000.00A7.0002.064C] # ARABIC DAMMATAN +0622 ; [.1C45.0020.0002.0622][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE +0627 0653 ; [.1C45.0020.0002.0622][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE +0671 ; [.1C46.0021.0002.0671] # ARABIC LETTER ALEF WASLA +0621 ; [.1C46.0020.0002.0621][.FFF1.0000.0000.0000] # ARABIC LETTER HAMZA +0623 ; [.1C46.0021.0002.0623][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA ABOVE +0627 0654 ; [.1C46.0021.0002.0623][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA ABOVE +0672 ; [.1C46.0022.0002.0672][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE +0625 ; [.1C46.0023.0002.0625][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA BELOW +0627 0655 ; [.1C46.0023.0002.0625][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA BELOW +0673 ; [.1C46.0024.0002.0673][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0624 ; [.1C46.0025.0002.0624][.FFF1.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE +0648 0654 ; [.1C46.0025.0002.0624][.FFF1.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE +06CC 0654 ; [.1C46.0026.0002.06CC][.FFF1.0000.0000.0000] # <ARABIC LETTER FARSI YEH, ARABIC HAMZA ABOVE> +0649 0654 ; [.1C46.0026.0003.0649][.FFF1.0000.0000.0000] # <ARABIC LETTER ALEF MAKSURA, ARABIC HAMZA ABOVE> +0626 ; [.1C46.0026.0004.0626][.FFF1.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE +064A 0654 ; [.1C46.0026.0004.0626][.FFF1.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE +06AA ; [.1CB2.0021.0002.06AA] # ARABIC LETTER SWASH KAF +06AB ; [.1CB2.0022.0002.06AB] # ARABIC LETTER KAF WITH RING +0643 ; [.1CB2.0023.0002.0643] # ARABIC LETTER KAF +06AC ; [.1CB2.0024.0002.06AC] # ARABIC LETTER KAF WITH DOT ABOVE +06AD ; [.1CB2.0025.0002.06AD] # ARABIC LETTER NG +06AE ; [.1CB2.0026.0002.06AE] # ARABIC LETTER KAF WITH THREE DOTS BELOW +0647 ; [.1CE5.0020.0002.0647][.FFF1.0000.0000.0000] # ARABIC LETTER HEH +06D5 ; [.1CE5.0021.0002.06D5][.FFF1.0000.0000.0000] # ARABIC LETTER AE +06C1 ; [.1CE5.0022.0002.06C1][.FFF1.0000.0000.0000] # ARABIC LETTER HEH GOAL +06C2 ; [.1CE5.0022.0002.06C1][.FFF1.0000.0000.0000][.0000.00B1.0002.0654] # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE +0629 ; [.1CE5.0023.0002.0629][.FFF1.0000.0000.0000] # ARABIC LETTER TEH MARBUTA +06C3 ; [.1CE5.0024.0002.06C3][.FFF1.0000.0000.0000] # ARABIC LETTER TEH MARBUTA GOAL +06C0 ; [.1CE5.0025.0002.06C0][.FFF1.0000.0000.0000] # ARABIC LETTER HEH WITH YEH ABOVE +06D5 0654 ; [.1CE5.0025.0002.06C0][.FFF1.0000.0000.0000] # ARABIC LETTER HEH WITH YEH ABOVE +06BE ; [.1CE5.0026.0002.06BE][.FFF1.0000.0000.0000] # ARABIC LETTER HEH DOACHASHMEE +0649 ; [.1CEA.0021.0002.0649] # ARABIC LETTER ALEF MAKSURA +06D2 ; [.1CEA.0022.0002.06D2] # ARABIC LETTER YEH BARREE +06D3 ; [.1CEA.0022.0002.06D2][.0000.00B1.0002.0654] # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +064A ; [.1CEA.0023.0002.064A] # ARABIC LETTER YEH +06D0 ; [.1CEA.0024.0002.06D0] # ARABIC LETTER E +06D1 ; [.1CEA.0025.0002.06D1] # ARABIC LETTER YEH WITH THREE DOTS BELOW +06CD ; [.1CEA.0026.0002.06CD] # ARABIC LETTER YEH WITH TAIL +06CE ; [.1CEA.0027.0002.06CE] # ARABIC LETTER YEH WITH SMALL V +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/sa.pl b/cpan/Unicode-Collate/Collate/Locale/sa.pl new file mode 100644 index 0000000000..c29ff4089c --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/sa.pl @@ -0,0 +1,11 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0902 ; [.1F7B.0020.0002.0902][.FFF1.0000.0000.0000] # DEVANAGARI SIGN ANUSVARA +0901 ; [.1F7B.0021.0002.0901][.FFF1.0000.0000.0901] # DEVANAGARI SIGN CANDRABINDU +0903 ; [.1F7B.0020.0002.0903][.FFF2.0000.0000.0000] # DEVANAGARI SIGN VISARGA +0933 ; [.1FBC.0020.0002.0933][.FFF1.0000.0000.0000] # DEVANAGARI LETTER LLA +0934 ; [.1FBC.0020.0002.0933][.FFF1.0000.0000.0000][.0000.00DD.0002.093C] # DEVANAGARI LETTER LLLA +0915 094D 0937 ; [.1FBC.0020.0002.0915][.FFF2.0000.0000.0000] # <DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER SSA> +091C 094D 091E ; [.1FBC.0020.0002.091C][.FFF3.0000.0000.0000] # <DEVANAGARI LETTER JA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER NYA> +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/si.pl b/cpan/Unicode-Collate/Collate/Locale/si.pl new file mode 100644 index 0000000000..0e59bb931c --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/si.pl @@ -0,0 +1,8 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0D82 ; [.21F7.0020.0002.0D82][.FFF1.0000.0000.0000] # SINHALA SIGN ANUSVARAYA +0D83 ; [.21F7.0020.0002.0D83][.FFF2.0000.0000.0000] # SINHALA SIGN VISARGAYA +0DA5 ; [.2202.0020.0002.0DA5] # SINHALA LETTER TAALUJA SANYOOGA NAAKSIKYAYA +0DA4 ; [.2203.0020.0002.0DA4] # SINHALA LETTER TAALUJA NAASIKYAYA +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/si_dict.pl b/cpan/Unicode-Collate/Collate/Locale/si_dict.pl new file mode 100644 index 0000000000..6b1b36a631 --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/si_dict.pl @@ -0,0 +1,7 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0D82 ; [.21F7.0020.0002.0D82][.FFF1.0000.0000.0000] # SINHALA SIGN ANUSVARAYA +0D83 ; [.21F7.0020.0002.0D83][.FFF2.0000.0000.0000] # SINHALA SIGN VISARGAYA +0DA5 ; [.2200.0020.0002.0DA2][.2232.0020.0002.0DCA][.2202.0021.0002.0DA5] # SINHALA LETTER TAALUJA SANYOOGA NAAKSIKYAYA +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/sk.pl b/cpan/Unicode-Collate/Collate/Locale/sk.pl index b9b27ad3ee..73c2e46ac9 100644 --- a/cpan/Unicode-Collate/Collate/Locale/sk.pl +++ b/cpan/Unicode-Collate/Collate/Locale/sk.pl @@ -28,6 +28,10 @@ 1ED4 ; [.1725.0020.0008.00D4][.0000.0064.0002.0309] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE 1ED9 ; [.1725.0020.0002.00F4][.0000.0070.0002.0323] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW 1ED8 ; [.1725.0020.0008.00D4][.0000.0070.0002.0323] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +0159 ; [.1771.0020.0002.0159] # LATIN SMALL LETTER R WITH CARON +0072 030C ; [.1771.0020.0002.0159] # LATIN SMALL LETTER R WITH CARON +0158 ; [.1771.0020.0008.0158] # LATIN CAPITAL LETTER R WITH CARON +0052 030C ; [.1771.0020.0008.0158] # LATIN CAPITAL LETTER R WITH CARON 0161 ; [.17A7.0020.0002.0161] # LATIN SMALL LETTER S WITH CARON 0073 030C ; [.17A7.0020.0002.0161] # LATIN SMALL LETTER S WITH CARON 0160 ; [.17A7.0020.0008.0160] # LATIN CAPITAL LETTER S WITH CARON diff --git a/cpan/Unicode-Collate/Collate/Locale/sl.pl b/cpan/Unicode-Collate/Collate/Locale/sl.pl index d1281feffe..d9a4ae4609 100644 --- a/cpan/Unicode-Collate/Collate/Locale/sl.pl +++ b/cpan/Unicode-Collate/Collate/Locale/sl.pl @@ -1,5 +1,4 @@ +{ -# c-acute not included entry => <<'ENTRY', # for DUCET v6.0.0 010D ; [.15D2.0020.0002.010D] # LATIN SMALL LETTER C WITH CARON 0063 030C ; [.15D2.0020.0002.010D] # LATIN SMALL LETTER C WITH CARON diff --git a/cpan/Unicode-Collate/Collate/Locale/sr.pl b/cpan/Unicode-Collate/Collate/Locale/sr.pl new file mode 100644 index 0000000000..db010969bb --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/sr.pl @@ -0,0 +1,48 @@ ++{ + suppress => [0x0410, 0x0430, 0x04D8, 0x04D9, 0x0413, 0x0433, 0x0415, 0x0435, + 0x0416, 0x0436, 0x0417, 0x0437, 0x0418, 0x0438, 0x0406, 0x0456, + 0x041E, 0x043E, 0x04E8, 0x04E9, 0x041A, 0x043A, 0x0423, 0x0443, + 0x0427, 0x0447, 0x042B, 0x044B, 0x042D, 0x044D, 0x0474, 0x0475], + entry => <<'ENTRY', # for DUCET v6.0.0 +04D1 ; [.1943.0020.0002.0430][.0000.0037.0002.0306] # CYRILLIC SMALL LETTER A WITH BREVE +04D0 ; [.1943.0020.0008.0410][.0000.0037.0002.0306] # CYRILLIC CAPITAL LETTER A WITH BREVE +04D3 ; [.1943.0020.0002.0430][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER A WITH DIAERESIS +04D2 ; [.1943.0020.0008.0410][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04DB ; [.194F.0020.0002.04D9][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DA ; [.194F.0020.0008.04D8][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +0453 ; [.1963.0020.0002.0433][.0000.0032.0002.0301] # CYRILLIC SMALL LETTER GJE +0403 ; [.1963.0020.0008.0413][.0000.0032.0002.0301] # CYRILLIC CAPITAL LETTER GJE +04D7 ; [.198B.0020.0002.0435][.0000.0037.0002.0306] # CYRILLIC SMALL LETTER IE WITH BREVE +04D6 ; [.198B.0020.0008.0415][.0000.0037.0002.0306] # CYRILLIC CAPITAL LETTER IE WITH BREVE +04DD ; [.1997.0020.0002.0436][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DC ; [.1997.0020.0008.0416][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DF ; [.19A4.0020.0002.0437][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04DE ; [.19A4.0020.0008.0417][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E5 ; [.19BC.0020.0002.0438][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER I WITH DIAERESIS +04E4 ; [.19BC.0020.0008.0418][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER I WITH DIAERESIS +0457 ; [.19C8.0020.0002.0456][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER YI +0407 ; [.19C8.0020.0008.0406][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER YI +0439 ; [.19BC.0020.0002.0438][.0000.0037.0002.0306] # CYRILLIC SMALL LETTER SHORT I +0419 ; [.19BC.0020.0008.0418][.0000.0037.0002.0306] # CYRILLIC CAPITAL LETTER SHORT I +04E7 ; [.1A29.0020.0002.043E][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER O WITH DIAERESIS +04E6 ; [.1A29.0020.0008.041E][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04EB ; [.1A31.0020.0002.04E9][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS +04EA ; [.1A31.0020.0008.04E8][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +045C ; [.19DA.0020.0002.043A][.0000.0032.0002.0301] # CYRILLIC SMALL LETTER KJE +040C ; [.19DA.0020.0008.041A][.0000.0032.0002.0301] # CYRILLIC CAPITAL LETTER KJE +045E ; [.1A6B.0020.0002.0443][.0000.0037.0002.0306] # CYRILLIC SMALL LETTER SHORT U +040E ; [.1A6B.0020.0008.0423][.0000.0037.0002.0306] # CYRILLIC CAPITAL LETTER SHORT U +04F1 ; [.1A6B.0020.0002.0443][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER U WITH DIAERESIS +04F0 ; [.1A6B.0020.0008.0423][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F3 ; [.1A6B.0020.0002.0443][.0000.004D.0002.030B] # CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F2 ; [.1A6B.0020.0008.0423][.0000.004D.0002.030B] # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F5 ; [.1ABE.0020.0002.0447][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F4 ; [.1ABE.0020.0008.0427][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F9 ; [.1AF1.0020.0002.044B][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04F8 ; [.1AF1.0020.0008.042B][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04ED ; [.1B06.0020.0002.044D][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER E WITH DIAERESIS +04EC ; [.1B06.0020.0008.042D][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +0477 ; [.1B3C.0020.0002.0475][.0000.0065.0002.030F] # CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0476 ; [.1B3C.0020.0008.0474][.0000.0065.0002.030F] # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/sv.pl b/cpan/Unicode-Collate/Collate/Locale/sv.pl index 1243167fc3..de95d5fae3 100644 --- a/cpan/Unicode-Collate/Collate/Locale/sv.pl +++ b/cpan/Unicode-Collate/Collate/Locale/sv.pl @@ -8,8 +8,8 @@ 00D0 ; [.15E4.0022.0008.00D0] # LATIN CAPITAL LETTER ETH 00FE ; [.17C9.0020.0003.00FE][.1667.0020.0003.00FE] # LATIN SMALL LETTER THORN 00DE ; [.17C9.0020.0009.00DE][.1667.0020.0009.00DE] # LATIN CAPITAL LETTER THORN -0077 ; [.1812.0020.0003.0077] # LATIN SMALL LETTER W -0057 ; [.1812.0020.0009.0057] # LATIN CAPITAL LETTER W +0077 ; [.1812.0021.0002.0077] # LATIN SMALL LETTER W +0057 ; [.1812.0021.0008.0057] # LATIN CAPITAL LETTER W 00FC ; [.1833.0021.0002.00FC] # LATIN SMALL LETTER U WITH DIAERESIS 0075 0308 ; [.1833.0021.0002.00FC] # LATIN SMALL LETTER U WITH DIAERESIS 00DC ; [.1833.0021.0008.00DC] # LATIN CAPITAL LETTER U WITH DIAERESIS diff --git a/cpan/Unicode-Collate/Collate/Locale/sv_refo.pl b/cpan/Unicode-Collate/Collate/Locale/sv_refo.pl new file mode 100644 index 0000000000..f3a867fc33 --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/sv_refo.pl @@ -0,0 +1,84 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0111 ; [.15E4.0021.0002.0111] # LATIN SMALL LETTER D WITH STROKE +0064 0335 ; [.15E4.0021.0002.0111] # LATIN SMALL LETTER D WITH STROKE +0110 ; [.15E4.0021.0008.0110] # LATIN CAPITAL LETTER D WITH STROKE +0044 0335 ; [.15E4.0021.0008.0110] # LATIN CAPITAL LETTER D WITH STROKE +00F0 ; [.15E4.0022.0002.00F0] # LATIN SMALL LETTER ETH +00D0 ; [.15E4.0022.0008.00D0] # LATIN CAPITAL LETTER ETH +00FE ; [.17C9.0020.0003.00FE][.1667.0020.0003.00FE] # LATIN SMALL LETTER THORN +00DE ; [.17C9.0020.0009.00DE][.1667.0020.0009.00DE] # LATIN CAPITAL LETTER THORN +00FC ; [.1833.0021.0002.00FC] # LATIN SMALL LETTER U WITH DIAERESIS +0075 0308 ; [.1833.0021.0002.00FC] # LATIN SMALL LETTER U WITH DIAERESIS +00DC ; [.1833.0021.0008.00DC] # LATIN CAPITAL LETTER U WITH DIAERESIS +0055 0308 ; [.1833.0021.0008.00DC] # LATIN CAPITAL LETTER U WITH DIAERESIS +01DC ; [.1833.0021.0002.00FC][.0000.0035.0002.0300] # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE +01DB ; [.1833.0021.0008.00DC][.0000.0035.0002.0300] # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01D8 ; [.1833.0021.0002.00FC][.0000.0032.0002.0301] # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01D7 ; [.1833.0021.0008.00DC][.0000.0032.0002.0301] # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D6 ; [.1833.0021.0002.00FC][.0000.005B.0002.0304] # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D5 ; [.1833.0021.0008.00DC][.0000.005B.0002.0304] # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01DA ; [.1833.0021.0002.00FC][.0000.0041.0002.030C] # LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01D9 ; [.1833.0021.0008.00DC][.0000.0041.0002.030C] # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +0171 ; [.1833.0022.0002.0171] # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0075 030B ; [.1833.0022.0002.0171] # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0170 ; [.1833.0022.0008.0170] # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0055 030B ; [.1833.0022.0008.0170] # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +00E5 ; [.18B9.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE +0061 030A ; [.18B9.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE +00C5 ; [.18B9.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE +0041 030A ; [.18B9.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE +212B ; [.18B9.0020.0008.00C5] # ANGSTROM SIGN +01FB ; [.18B9.0020.0002.00E5][.0000.0032.0002.0301] # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE +01FA ; [.18B9.0020.0008.00C5][.0000.0032.0002.0301] # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +00E4 ; [.18BA.0020.0002.00E4] # LATIN SMALL LETTER A WITH DIAERESIS +0061 0308 ; [.18BA.0020.0002.00E4] # LATIN SMALL LETTER A WITH DIAERESIS +00C4 ; [.18BA.0020.0008.00C4] # LATIN CAPITAL LETTER A WITH DIAERESIS +0041 0308 ; [.18BA.0020.0008.00C4] # LATIN CAPITAL LETTER A WITH DIAERESIS +01DF ; [.18BA.0020.0002.00E4][.0000.005B.0002.0304] # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON +01DE ; [.18BA.0020.0008.00C4][.0000.005B.0002.0304] # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +00E6 ; [.18BA.0021.0002.00E6] # LATIN SMALL LETTER AE +00C6 ; [.18BA.0021.0008.00C6] # LATIN CAPITAL LETTER AE +1D2D ; [.18BA.0021.0014.1D2D] # MODIFIER LETTER CAPITAL AE +01FD ; [.18BA.0021.0002.00E6][.0000.0032.0002.0301] # LATIN SMALL LETTER AE WITH ACUTE +01FC ; [.18BA.0021.0008.00C6][.0000.0032.0002.0301] # LATIN CAPITAL LETTER AE WITH ACUTE +01E3 ; [.18BA.0021.0002.00E6][.0000.005B.0002.0304] # LATIN SMALL LETTER AE WITH MACRON +01E2 ; [.18BA.0021.0008.00C6][.0000.005B.0002.0304] # LATIN CAPITAL LETTER AE WITH MACRON +0119 ; [.18BA.0022.0002.0119] # LATIN SMALL LETTER E WITH OGONEK +0065 0328 ; [.18BA.0022.0002.0119] # LATIN SMALL LETTER E WITH OGONEK +0118 ; [.18BA.0022.0008.0118] # LATIN CAPITAL LETTER E WITH OGONEK +0045 0328 ; [.18BA.0022.0008.0118] # LATIN CAPITAL LETTER E WITH OGONEK +00F6 ; [.18BB.0020.0002.00F6] # LATIN SMALL LETTER O WITH DIAERESIS +006F 0308 ; [.18BB.0020.0002.00F6] # LATIN SMALL LETTER O WITH DIAERESIS +00D6 ; [.18BB.0020.0008.00D6] # LATIN CAPITAL LETTER O WITH DIAERESIS +004F 0308 ; [.18BB.0020.0008.00D6] # LATIN CAPITAL LETTER O WITH DIAERESIS +022B ; [.18BB.0020.0002.00F6][.0000.005B.0002.0304] # LATIN SMALL LETTER O WITH DIAERESIS AND MACRON +022A ; [.18BB.0020.0008.00D6][.0000.005B.0002.0304] # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +00F8 ; [.18BB.0021.0002.00F8] # LATIN SMALL LETTER O WITH STROKE +006F 0338 ; [.18BB.0021.0002.00F8] # LATIN SMALL LETTER O WITH STROKE +00D8 ; [.18BB.0021.0008.00D8] # LATIN CAPITAL LETTER O WITH STROKE +004F 0338 ; [.18BB.0021.0008.00D8] # LATIN CAPITAL LETTER O WITH STROKE +01FF ; [.18BB.0021.0002.00F8][.0000.0032.0002.0301] # LATIN SMALL LETTER O WITH STROKE AND ACUTE +01FE ; [.18BB.0021.0008.00D8][.0000.0032.0002.0301] # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0151 ; [.18BB.0022.0002.0151] # LATIN SMALL LETTER O WITH DOUBLE ACUTE +006F 030B ; [.18BB.0022.0002.0151] # LATIN SMALL LETTER O WITH DOUBLE ACUTE +0150 ; [.18BB.0022.0008.0150] # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +004F 030B ; [.18BB.0022.0008.0150] # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0153 ; [.18BB.0023.0002.0153] # LATIN SMALL LIGATURE OE +0152 ; [.18BB.0023.0008.0152] # LATIN CAPITAL LIGATURE OE +00F4 ; [.18BB.0024.0002.00F4] # LATIN SMALL LETTER O WITH CIRCUMFLEX +006F 0302 ; [.18BB.0024.0002.00F4] # LATIN SMALL LETTER O WITH CIRCUMFLEX +00D4 ; [.18BB.0024.0008.00D4] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +004F 0302 ; [.18BB.0024.0008.00D4] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +1ED3 ; [.18BB.0024.0002.00F4][.0000.0035.0002.0300] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED2 ; [.18BB.0024.0008.00D4][.0000.0035.0002.0300] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED1 ; [.18BB.0024.0002.00F4][.0000.0032.0002.0301] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED0 ; [.18BB.0024.0008.00D4][.0000.0032.0002.0301] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED7 ; [.18BB.0024.0002.00F4][.0000.004E.0002.0303] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE +1ED6 ; [.18BB.0024.0008.00D4][.0000.004E.0002.0303] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED5 ; [.18BB.0024.0002.00F4][.0000.0064.0002.0309] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED4 ; [.18BB.0024.0008.00D4][.0000.0064.0002.0309] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED9 ; [.18BB.0024.0002.00F4][.0000.0070.0002.0323] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1ED8 ; [.18BB.0024.0008.00D4][.0000.0070.0002.0323] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/ta.pl b/cpan/Unicode-Collate/Collate/Locale/ta.pl new file mode 100644 index 0000000000..ef5223e854 --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/ta.pl @@ -0,0 +1,30 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0B82 ; [.20E7.0020.0002.0B82][.FFF1.0000.0000.0000] # TAMIL SIGN ANUSVARA +0B95 0BCD ; [.20E8.0020.0002.0B95][.FFF1.0000.0000.0000] # <TAMIL LETTER KA, TAMIL SIGN VIRAMA> +0B99 0BCD ; [.20E9.0020.0002.0B99][.FFF1.0000.0000.0000] # <TAMIL LETTER NGA, TAMIL SIGN VIRAMA> +0B9A 0BCD ; [.20EA.0020.0002.0B9A][.FFF1.0000.0000.0000] # <TAMIL LETTER CA, TAMIL SIGN VIRAMA> +0B9E 0BCD ; [.20EB.0020.0002.0B9E][.FFF1.0000.0000.0000] # <TAMIL LETTER NYA, TAMIL SIGN VIRAMA> +0B9F 0BCD ; [.20EC.0020.0002.0B9F][.FFF1.0000.0000.0000] # <TAMIL LETTER TTA, TAMIL SIGN VIRAMA> +0BA3 0BCD ; [.20ED.0020.0002.0BA3][.FFF1.0000.0000.0000] # <TAMIL LETTER NNA, TAMIL SIGN VIRAMA> +0BA4 0BCD ; [.20EE.0020.0002.0BA4][.FFF1.0000.0000.0000] # <TAMIL LETTER TA, TAMIL SIGN VIRAMA> +0BA8 0BCD ; [.20EF.0020.0002.0BA8][.FFF1.0000.0000.0000] # <TAMIL LETTER NA, TAMIL SIGN VIRAMA> +0BAA 0BCD ; [.20F0.0020.0002.0BAA][.FFF1.0000.0000.0000] # <TAMIL LETTER PA, TAMIL SIGN VIRAMA> +0BAE 0BCD ; [.20F1.0020.0002.0BAE][.FFF1.0000.0000.0000] # <TAMIL LETTER MA, TAMIL SIGN VIRAMA> +0BAF 0BCD ; [.20F2.0020.0002.0BAF][.FFF1.0000.0000.0000] # <TAMIL LETTER YA, TAMIL SIGN VIRAMA> +0BB0 0BCD ; [.20F3.0020.0002.0BB0][.FFF1.0000.0000.0000] # <TAMIL LETTER RA, TAMIL SIGN VIRAMA> +0BB2 0BCD ; [.20F4.0020.0002.0BB2][.FFF1.0000.0000.0000] # <TAMIL LETTER LA, TAMIL SIGN VIRAMA> +0BB5 0BCD ; [.20F5.0020.0002.0BB5][.FFF1.0000.0000.0000] # <TAMIL LETTER VA, TAMIL SIGN VIRAMA> +0BB4 0BCD ; [.20F6.0020.0002.0BB4][.FFF1.0000.0000.0000] # <TAMIL LETTER LLLA, TAMIL SIGN VIRAMA> +0BB3 0BCD ; [.20F7.0020.0002.0BB3][.FFF1.0000.0000.0000] # <TAMIL LETTER LLA, TAMIL SIGN VIRAMA> +0BB1 0BCD ; [.20F8.0020.0002.0BB1][.FFF1.0000.0000.0000] # <TAMIL LETTER RRA, TAMIL SIGN VIRAMA> +0BA9 0BCD ; [.20F9.0020.0002.0BA9][.FFF1.0000.0000.0000] # <TAMIL LETTER NNNA, TAMIL SIGN VIRAMA> +0B9C 0BCD ; [.20FA.0020.0002.0B9C][.FFF1.0000.0000.0000] # <TAMIL LETTER JA, TAMIL SIGN VIRAMA> +0BB6 0BCD ; [.20FB.0020.0002.0BB6][.FFF1.0000.0000.0000] # <TAMIL LETTER SHA, TAMIL SIGN VIRAMA> +0BB7 0BCD ; [.20FC.0020.0002.0BB7][.FFF1.0000.0000.0000] # <TAMIL LETTER SSA, TAMIL SIGN VIRAMA> +0BB8 0BCD ; [.20FD.0020.0002.0BB8][.FFF1.0000.0000.0000] # <TAMIL LETTER SA, TAMIL SIGN VIRAMA> +0BB9 0BCD ; [.20FE.0020.0002.0BB9][.FFF1.0000.0000.0000] # <TAMIL LETTER HA, TAMIL SIGN VIRAMA> +0B95 0BCD 0BB7 0BCD ; [.20FF.0020.0002.0B95][.FFF1.0000.0000.0000] # <TAMIL LETTER KA, TAMIL SIGN VIRAMA, TAMIL LETTER SSA, TAMIL SIGN VIRAMA> +0B95 0BCD 0BB7 ; [.20FF.0020.0002.0B95][.FFF2.0000.0000.0000] # <TAMIL LETTER KA, TAMIL SIGN VIRAMA, TAMIL LETTER SSA> +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/te.pl b/cpan/Unicode-Collate/Collate/Locale/te.pl new file mode 100644 index 0000000000..fd32bc6542 --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/te.pl @@ -0,0 +1,7 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0C01 ; [.211C.0020.0002.0C01][.FFF1.0000.0000.0000] # TELUGU SIGN CANDRABINDU +0C02 ; [.211C.0020.0002.0C02][.FFF2.0000.0000.0000] # TELUGU SIGN ANUSVARA +0C03 ; [.211C.0020.0002.0C03][.FFF3.0000.0000.0000] # TELUGU SIGN VISARGA +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/th.pl b/cpan/Unicode-Collate/Collate/Locale/th.pl new file mode 100644 index 0000000000..6fe86cce69 --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/th.pl @@ -0,0 +1,15 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0E2F ; [*0480.0020.0002.0E2F][*FFF1.0000.0000.0000] # THAI CHARACTER PAIYANNOI +0E46 ; [*0480.0020.0002.0E46][*FFF2.0000.0000.0000] # THAI CHARACTER MAIYAMOK +0E4C ; [.0000.011D.0002.0E4C] # THAI CHARACTER THANTHAKHAT +0E47 ; [.0000.011E.0002.0E47] # THAI CHARACTER MAITAIKHU +0E48 ; [.0000.011F.0002.0E48] # THAI CHARACTER MAI EK +0E49 ; [.0000.0120.0002.0E49] # THAI CHARACTER MAI THO +0E4A ; [.0000.0121.0002.0E4A] # THAI CHARACTER MAI TRI +0E4B ; [.0000.0122.0002.0E4B] # THAI CHARACTER MAI CHATTAWA +0E4D ; [.23C3.0020.0002.0E4D] # THAI CHARACTER NIKHAHIT +0E45 ; [.23C6.0020.0003.0E45] # THAI CHARACTER LAKKHANGYAO +0E3A ; [.23D4.0020.0002.0E3A] # THAI CHARACTER PHINTHU +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/uk.pl b/cpan/Unicode-Collate/Collate/Locale/uk.pl index 9f24b3250e..3b3db69bf5 100644 --- a/cpan/Unicode-Collate/Collate/Locale/uk.pl +++ b/cpan/Unicode-Collate/Collate/Locale/uk.pl @@ -1,6 +1,46 @@ +{ + suppress => [0x0410, 0x0430, 0x04D8, 0x04D9, 0x0413, 0x0433, 0x0415, 0x0435, + 0x0416, 0x0436, 0x0417, 0x0437, 0x041E, 0x043E, 0x04E8, 0x04E9, + 0x041A, 0x043A, 0x0423, 0x0443, 0x0427, 0x0447, 0x042B, 0x044B, + 0x042D, 0x044D, 0x0474, 0x0475], entry => <<'ENTRY', # for DUCET v6.0.0 0491 ; [.1964.0020.0002.0491] # CYRILLIC SMALL LETTER GHE WITH UPTURN 0490 ; [.1964.0020.0008.0490] # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +044C ; [.1B15.0020.0002.044C] # CYRILLIC SMALL LETTER SOFT SIGN +042C ; [.1B15.0020.0008.042C] # CYRILLIC CAPITAL LETTER SOFT SIGN +04D1 ; [.1943.0020.0002.0430][.0000.0037.0002.0306] # CYRILLIC SMALL LETTER A WITH BREVE +04D0 ; [.1943.0020.0008.0410][.0000.0037.0002.0306] # CYRILLIC CAPITAL LETTER A WITH BREVE +04D3 ; [.1943.0020.0002.0430][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER A WITH DIAERESIS +04D2 ; [.1943.0020.0008.0410][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04DB ; [.194F.0020.0002.04D9][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DA ; [.194F.0020.0008.04D8][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +0453 ; [.1963.0020.0002.0433][.0000.0032.0002.0301] # CYRILLIC SMALL LETTER GJE +0403 ; [.1963.0020.0008.0413][.0000.0032.0002.0301] # CYRILLIC CAPITAL LETTER GJE +04D7 ; [.198B.0020.0002.0435][.0000.0037.0002.0306] # CYRILLIC SMALL LETTER IE WITH BREVE +04D6 ; [.198B.0020.0008.0415][.0000.0037.0002.0306] # CYRILLIC CAPITAL LETTER IE WITH BREVE +04DD ; [.1997.0020.0002.0436][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DC ; [.1997.0020.0008.0416][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DF ; [.19A4.0020.0002.0437][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04DE ; [.19A4.0020.0008.0417][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E7 ; [.1A29.0020.0002.043E][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER O WITH DIAERESIS +04E6 ; [.1A29.0020.0008.041E][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04EB ; [.1A31.0020.0002.04E9][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS +04EA ; [.1A31.0020.0008.04E8][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +045C ; [.19DA.0020.0002.043A][.0000.0032.0002.0301] # CYRILLIC SMALL LETTER KJE +040C ; [.19DA.0020.0008.041A][.0000.0032.0002.0301] # CYRILLIC CAPITAL LETTER KJE +045E ; [.1A6B.0020.0002.0443][.0000.0037.0002.0306] # CYRILLIC SMALL LETTER SHORT U +040E ; [.1A6B.0020.0008.0423][.0000.0037.0002.0306] # CYRILLIC CAPITAL LETTER SHORT U +04F1 ; [.1A6B.0020.0002.0443][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER U WITH DIAERESIS +04F0 ; [.1A6B.0020.0008.0423][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F3 ; [.1A6B.0020.0002.0443][.0000.004D.0002.030B] # CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F2 ; [.1A6B.0020.0008.0423][.0000.004D.0002.030B] # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F5 ; [.1ABE.0020.0002.0447][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F4 ; [.1ABE.0020.0008.0427][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F9 ; [.1AF1.0020.0002.044B][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04F8 ; [.1AF1.0020.0008.042B][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04ED ; [.1B06.0020.0002.044D][.0000.0047.0002.0308] # CYRILLIC SMALL LETTER E WITH DIAERESIS +04EC ; [.1B06.0020.0008.042D][.0000.0047.0002.0308] # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +0477 ; [.1B3C.0020.0002.0475][.0000.0065.0002.030F] # CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0476 ; [.1B3C.0020.0008.0474][.0000.0065.0002.030F] # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/ur.pl b/cpan/Unicode-Collate/Collate/Locale/ur.pl new file mode 100644 index 0000000000..5b4f72960f --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/ur.pl @@ -0,0 +1,88 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0627 ; [.1C43.0020.0002.0627] # ARABIC LETTER ALEF +0623 ; [.1C43.0021.0002.0623] # ARABIC LETTER ALEF WITH HAMZA ABOVE +0627 0654 ; [.1C43.0021.0002.0623] # ARABIC LETTER ALEF WITH HAMZA ABOVE +0622 ; [.1C43.0020.0002.0622][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE +0627 0653 ; [.1C43.0020.0002.0622][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE +0628 ; [.1C43.0020.0002.0628][.FFF2.0000.0000.0000] # ARABIC LETTER BEH +0628 06BE ; [.1C43.0020.0002.0628][.FFF3.0000.0000.0000] # <ARABIC LETTER BEH, ARABIC LETTER HEH DOACHASHMEE> +067E ; [.1C43.0020.0002.067E][.FFF4.0000.0000.0000] # ARABIC LETTER PEH +067E 06BE ; [.1C43.0020.0002.067E][.FFF5.0000.0000.0000] # <ARABIC LETTER PEH, ARABIC LETTER HEH DOACHASHMEE> +062A ; [.1C43.0020.0002.062A][.FFF6.0000.0000.0000] # ARABIC LETTER TEH +062A 06BE ; [.1C43.0020.0002.062A][.FFF7.0000.0000.0000] # <ARABIC LETTER TEH, ARABIC LETTER HEH DOACHASHMEE> +0679 ; [.1C43.0020.0002.0679][.FFF8.0000.0000.0000] # ARABIC LETTER TTEH +0679 06BE ; [.1C43.0020.0002.0679][.FFF9.0000.0000.0000] # <ARABIC LETTER TTEH, ARABIC LETTER HEH DOACHASHMEE> +062B ; [.1C43.0020.0002.062B][.FFFA.0000.0000.0000] # ARABIC LETTER THEH +062C ; [.1C43.0020.0002.062C][.FFFB.0000.0000.0000] # ARABIC LETTER JEEM +062C 06BE ; [.1C44.0020.0002.062C][.FFF1.0000.0000.0000] # <ARABIC LETTER JEEM, ARABIC LETTER HEH DOACHASHMEE> +0686 ; [.1C44.0020.0002.0686][.FFF2.0000.0000.0000] # ARABIC LETTER TCHEH +0686 06BE ; [.1C44.0020.0002.0686][.FFF3.0000.0000.0000] # <ARABIC LETTER TCHEH, ARABIC LETTER HEH DOACHASHMEE> +062D ; [.1C44.0020.0002.062D][.FFF4.0000.0000.0000] # ARABIC LETTER HAH +062E ; [.1C44.0020.0002.062E][.FFF5.0000.0000.0000] # ARABIC LETTER KHAH +062F ; [.1C44.0020.0002.062F][.FFF6.0000.0000.0000] # ARABIC LETTER DAL +062F 06BE ; [.1C44.0020.0002.062F][.FFF7.0000.0000.0000] # <ARABIC LETTER DAL, ARABIC LETTER HEH DOACHASHMEE> +0688 ; [.1C44.0020.0002.0688][.FFF8.0000.0000.0000] # ARABIC LETTER DDAL +0688 06BE ; [.1C44.0020.0002.0688][.FFF9.0000.0000.0000] # <ARABIC LETTER DDAL, ARABIC LETTER HEH DOACHASHMEE> +0630 ; [.1C44.0020.0002.0630][.FFFA.0000.0000.0000] # ARABIC LETTER THAL +0631 ; [.1C44.0020.0002.0631][.FFFB.0000.0000.0000] # ARABIC LETTER REH +0631 06BE ; [.1C45.0020.0002.0631][.FFF1.0000.0000.0000] # <ARABIC LETTER REH, ARABIC LETTER HEH DOACHASHMEE> +0691 ; [.1C45.0020.0002.0691][.FFF2.0000.0000.0000] # ARABIC LETTER RREH +0691 06BE ; [.1C45.0020.0002.0691][.FFF3.0000.0000.0000] # <ARABIC LETTER RREH, ARABIC LETTER HEH DOACHASHMEE> +0632 ; [.1C45.0020.0002.0632][.FFF4.0000.0000.0000] # ARABIC LETTER ZAIN +0698 ; [.1C45.0020.0002.0698][.FFF5.0000.0000.0000] # ARABIC LETTER JEH +0633 ; [.1C45.0020.0002.0633][.FFF6.0000.0000.0000] # ARABIC LETTER SEEN +0634 ; [.1C45.0020.0002.0634][.FFF7.0000.0000.0000] # ARABIC LETTER SHEEN +0635 ; [.1C45.0020.0002.0635][.FFF8.0000.0000.0000] # ARABIC LETTER SAD +0636 ; [.1C45.0020.0002.0636][.FFF9.0000.0000.0000] # ARABIC LETTER DAD +0637 ; [.1C45.0020.0002.0637][.FFFA.0000.0000.0000] # ARABIC LETTER TAH +0638 ; [.1C45.0020.0002.0638][.FFFB.0000.0000.0000] # ARABIC LETTER ZAH +0639 ; [.1C46.0020.0002.0639][.FFF1.0000.0000.0000] # ARABIC LETTER AIN +063A ; [.1C46.0020.0002.063A][.FFF2.0000.0000.0000] # ARABIC LETTER GHAIN +0641 ; [.1C46.0020.0002.0641][.FFF3.0000.0000.0000] # ARABIC LETTER FEH +0642 ; [.1C46.0020.0002.0642][.FFF4.0000.0000.0000] # ARABIC LETTER QAF +06A9 ; [.1C46.0020.0002.06A9][.FFF5.0000.0000.0000] # ARABIC LETTER KEHEH +06A9 06BE ; [.1C46.0020.0002.06A9][.FFF6.0000.0000.0000] # <ARABIC LETTER KEHEH, ARABIC LETTER HEH DOACHASHMEE> +06AF ; [.1C46.0020.0002.06AF][.FFF7.0000.0000.0000] # ARABIC LETTER GAF +06AF 06BE ; [.1C46.0020.0002.06AF][.FFF8.0000.0000.0000] # <ARABIC LETTER GAF, ARABIC LETTER HEH DOACHASHMEE> +0644 ; [.1C46.0020.0002.0644][.FFF9.0000.0000.0000] # ARABIC LETTER LAM +0644 06BE ; [.1C46.0020.0002.0644][.FFFA.0000.0000.0000] # <ARABIC LETTER LAM, ARABIC LETTER HEH DOACHASHMEE> +0645 ; [.1C46.0020.0002.0645][.FFFB.0000.0000.0000] # ARABIC LETTER MEEM +0645 06BE ; [.1C47.0020.0002.0645][.FFF1.0000.0000.0000] # <ARABIC LETTER MEEM, ARABIC LETTER HEH DOACHASHMEE> +0646 ; [.1C47.0020.0002.0646][.FFF2.0000.0000.0000] # ARABIC LETTER NOON +0646 06BE ; [.1C47.0020.0002.0646][.FFF3.0000.0000.0000] # <ARABIC LETTER NOON, ARABIC LETTER HEH DOACHASHMEE> +06BA ; [.1C47.0020.0002.06BA][.FFF4.0000.0000.0000] # ARABIC LETTER NOON GHUNNA +06BA 06BE ; [.1C47.0020.0002.06BA][.FFF5.0000.0000.0000] # <ARABIC LETTER NOON GHUNNA, ARABIC LETTER HEH DOACHASHMEE> +0648 ; [.1C47.0020.0002.0648][.FFF6.0000.0000.0000] # ARABIC LETTER WAW +0624 ; [.1C47.0021.0002.0624][.FFF6.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE +0648 0654 ; [.1C47.0021.0002.0624][.FFF6.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE +0648 06BE ; [.1C47.0020.0002.0648][.FFF7.0000.0000.0000] # <ARABIC LETTER WAW, ARABIC LETTER HEH DOACHASHMEE> +06C1 ; [.1C47.0020.0002.06C1][.FFF8.0000.0000.0000] # ARABIC LETTER HEH GOAL +06C2 ; [.1C47.0021.0002.06C2][.FFF8.0000.0000.0000] # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE +06C1 0654 ; [.1C47.0021.0002.06C2][.FFF8.0000.0000.0000] # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE +06BE ; [.1C47.0020.0002.06BE][.FFF9.0000.0000.0000] # ARABIC LETTER HEH DOACHASHMEE +06C3 ; [.1C47.0020.0002.06C3][.FFFA.0000.0000.0000] # ARABIC LETTER TEH MARBUTA GOAL +0621 ; [.1C47.0020.0002.0621][.FFFB.0000.0000.0000] # ARABIC LETTER HAMZA +06CC ; [.1C48.0020.0002.06CC][.FFF1.0000.0000.0000] # ARABIC LETTER FARSI YEH +0626 ; [.1C48.0021.0002.0626][.FFF1.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE +064A 0654 ; [.1C48.0021.0002.0626][.FFF1.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE +06CC 06BE ; [.1C48.0020.0002.06CC][.FFF2.0000.0000.0000] # <ARABIC LETTER FARSI YEH, ARABIC LETTER HEH DOACHASHMEE> +06D2 ; [.1C48.0020.0002.06D2][.FFF3.0000.0000.0000] # ARABIC LETTER YEH BARREE +06D3 ; [.1C48.0021.0002.06D3][.FFF3.0000.0000.0000] # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D2 0654 ; [.1C48.0021.0002.06D3][.FFF3.0000.0000.0000] # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +0652 ; [.0000.00A2.0002.0652] # ARABIC SUKUN +064E ; [.0000.00A3.0002.064E] # ARABIC FATHA +0650 ; [.0000.00A4.0002.0650] # ARABIC KASRA +064F ; [.0000.00A5.0002.064F] # ARABIC DAMMA +0670 ; [.0000.00A6.0002.0670] # ARABIC LETTER SUPERSCRIPT ALEF +0656 ; [.0000.00A7.0002.0656] # ARABIC SUBSCRIPT ALEF +0657 ; [.0000.00A8.0002.0657] # ARABIC INVERTED DAMMA +064B ; [.0000.00A9.0002.064B] # ARABIC FATHATAN +064D ; [.0000.00AA.0002.064D] # ARABIC KASRATAN +064C ; [.0000.00AB.0002.064C] # ARABIC DAMMATAN +0654 ; [.0000.00AC.0002.0654] # ARABIC HAMZA ABOVE +0651 ; [.0000.00AD.0002.0651] # ARABIC SHADDA +0658 ; [.0000.00AE.0002.0658] # ARABIC MARK NOON GHUNNA +0653 ; [.0000.00AF.0002.0653] # ARABIC MADDAH ABOVE +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/wae.pl b/cpan/Unicode-Collate/Collate/Locale/wae.pl new file mode 100644 index 0000000000..abada128dc --- /dev/null +++ b/cpan/Unicode-Collate/Collate/Locale/wae.pl @@ -0,0 +1,23 @@ ++{ + entry => <<'ENTRY', # for DUCET v6.0.0 +0061 0061 ; [.15A3.0020.0002.0061][.0000.0032.0002.0301] # <LATIN SMALL LETTER A, LATIN SMALL LETTER A> +00E4 00E4 ; [.15A3.0020.0002.0061][.0000.004E.0002.0303] # <LATIN SMALL LETTER A WITH DIAERESIS, LATIN SMALL LETTER A WITH DIAERESIS> +00E4 0061 0308 ; [.15A3.0020.0002.0061][.0000.004E.0002.0303] # <LATIN SMALL LETTER A WITH DIAERESIS, LATIN SMALL LETTER A, COMBINING DIAERESIS> +0061 0308 00E4 ; [.15A3.0020.0002.0061][.0000.004E.0002.0303] # <LATIN SMALL LETTER A, COMBINING DIAERESIS, LATIN SMALL LETTER A WITH DIAERESIS> +0061 0308 0061 0308 ; [.15A3.0020.0002.0061][.0000.004E.0002.0303] # <LATIN SMALL LETTER A, COMBINING DIAERESIS, LATIN SMALL LETTER A, COMBINING DIAERESIS> +0065 0065 ; [.15FF.0020.0002.0065][.0000.0032.0002.0301] # <LATIN SMALL LETTER E, LATIN SMALL LETTER E> +0069 0069 ; [.1680.0020.0002.0069][.0000.0032.0002.0301] # <LATIN SMALL LETTER I, LATIN SMALL LETTER I> +006F 006F ; [.1724.0020.0002.006F][.0000.0032.0002.0301] # <LATIN SMALL LETTER O, LATIN SMALL LETTER O> +00F6 00F6 ; [.1724.0020.0002.006F][.0000.004E.0002.0303] # <LATIN SMALL LETTER O WITH DIAERESIS, LATIN SMALL LETTER O WITH DIAERESIS> +00F6 006F 0308 ; [.1724.0020.0002.006F][.0000.004E.0002.0303] # <LATIN SMALL LETTER O WITH DIAERESIS, LATIN SMALL LETTER O, COMBINING DIAERESIS> +006F 0308 00F6 ; [.1724.0020.0002.006F][.0000.004E.0002.0303] # <LATIN SMALL LETTER O, COMBINING DIAERESIS, LATIN SMALL LETTER O WITH DIAERESIS> +006F 0308 006F 0308 ; [.1724.0020.0002.006F][.0000.004E.0002.0303] # <LATIN SMALL LETTER O, COMBINING DIAERESIS, LATIN SMALL LETTER O, COMBINING DIAERESIS> +0063 0068 ; [.15D1.0020.0002.0063][.0000.0041.0002.030C] # <LATIN SMALL LETTER C, LATIN SMALL LETTER H> +0073 0063 0068 ; [.17A6.0020.0002.0073][.0000.0041.0002.030C] # <LATIN SMALL LETTER S, LATIN SMALL LETTER C, LATIN SMALL LETTER H> +0075 0075 ; [.17E9.0020.0002.0075][.0000.0032.0002.0301] # <LATIN SMALL LETTER U, LATIN SMALL LETTER U> +00FC 00FC ; [.17E9.0020.0002.0075][.0000.004E.0002.0303] # <LATIN SMALL LETTER U WITH DIAERESIS, LATIN SMALL LETTER U WITH DIAERESIS> +00FC 0075 0308 ; [.17E9.0020.0002.0075][.0000.004E.0002.0303] # <LATIN SMALL LETTER U WITH DIAERESIS, LATIN SMALL LETTER U, COMBINING DIAERESIS> +0075 0308 00FC ; [.17E9.0020.0002.0075][.0000.004E.0002.0303] # <LATIN SMALL LETTER U, COMBINING DIAERESIS, LATIN SMALL LETTER U WITH DIAERESIS> +0075 0308 0075 0308 ; [.17E9.0020.0002.0075][.0000.004E.0002.0303] # <LATIN SMALL LETTER U, COMBINING DIAERESIS, LATIN SMALL LETTER U, COMBINING DIAERESIS> +ENTRY +}; diff --git a/cpan/Unicode-Collate/Collate/Locale/zh_pin.pl b/cpan/Unicode-Collate/Collate/Locale/zh_pin.pl index 480cc3fa93..b8270f1437 100644 --- a/cpan/Unicode-Collate/Collate/Locale/zh_pin.pl +++ b/cpan/Unicode-Collate/Collate/Locale/zh_pin.pl @@ -208,6 +208,477 @@ use Unicode::Collate::CJK::Pinyin; 0075 0308 ; [.17E9.0025.0002.00FC] # LATIN SMALL LETTER U WITH DIAERESIS 00DC ; [.17E9.0025.0008.00DC] # LATIN CAPITAL LETTER U WITH DIAERESIS 0055 0308 ; [.17E9.0025.0008.00DC] # LATIN CAPITAL LETTER U WITH DIAERESIS -3007 ; [.A3E5.0020.0002.3007] # IDEOGRAPHIC NUMBER ZERO +FDD0 0041 ; [.8000.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER A> +FDD0 0042 ; [.80C5.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER B> +FDD0 0043 ; [.8441.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER C> +FDD0 0044 ; [.897D.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER D> +FDD0 0045 ; [.8D61.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER E> +FDD0 0046 ; [.8E1B.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER F> +FDD0 0047 ; [.9089.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER G> +FDD0 0048 ; [.93C4.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER H> +FDD0 004A ; [.97EC.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER J> +FDD0 004B ; [.9E32.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER K> +FDD0 004C ; [.A05D.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER L> +3007 ; [.A401.0020.0002.3007] # IDEOGRAPHIC NUMBER ZERO +FDD0 004D ; [.A680.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER M> +FDD0 004E ; [.A9A9.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER N> +FDD0 004F ; [.AB89.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER O> +FDD0 0050 ; [.ABAC.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER P> +FDD0 0051 ; [.AE02.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER Q> +FDD0 0052 ; [.B1F1.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER R> +FDD0 0053 ; [.B337.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER S> +FDD0 0054 ; [.B858.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER T> +FDD0 0057 ; [.BBD3.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER W> +FDD0 0058 ; [.BE4A.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER X> +FDD0 0059 ; [.C382.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER Y> +FDD0 005A ; [.CB22.0020.0002.FDD0] # <noncharacter-FDD0, LATIN CAPITAL LETTER Z> +3220 ; [*030E.0020.0004.3220][.C5CF.0020.0004.3220][*030F.0020.001F.3220] # PARENTHESIZED IDEOGRAPH ONE +3226 ; [*030E.0020.0004.3226][.AE03.0020.0004.3226][*030F.0020.001F.3226] # PARENTHESIZED IDEOGRAPH SEVEN +3222 ; [*030E.0020.0004.3222][.B35F.0020.0004.3222][*030F.0020.001F.3222] # PARENTHESIZED IDEOGRAPH THREE +3228 ; [*030E.0020.0004.3228][.9C92.0020.0004.3228][*030F.0020.001F.3228] # PARENTHESIZED IDEOGRAPH NINE +3221 ; [*030E.0020.0004.3221][.8E0E.0020.0004.3221][*030F.0020.001F.3221] # PARENTHESIZED IDEOGRAPH TWO +3224 ; [*030E.0020.0004.3224][.BDF1.0020.0004.3224][*030F.0020.001F.3224] # PARENTHESIZED IDEOGRAPH FIVE +3239 ; [*030E.0020.0004.3239][.89B6.0020.0004.3239][*030F.0020.001F.3239] # PARENTHESIZED IDEOGRAPH REPRESENT +323D ; [*030E.0020.0004.323D][.AE82.0020.0004.323D][*030F.0020.001F.323D] # PARENTHESIZED IDEOGRAPH ENTERPRISE +3241 ; [*030E.0020.0004.3241][.C1F9.0020.0004.3241][*030F.0020.001F.3241] # PARENTHESIZED IDEOGRAPH REST +3227 ; [*030E.0020.0004.3227][.80C6.0020.0004.3227][*030F.0020.001F.3227] # PARENTHESIZED IDEOGRAPH EIGHT +3225 ; [*030E.0020.0004.3225][.A49F.0020.0004.3225][*030F.0020.001F.3225] # PARENTHESIZED IDEOGRAPH SIX +3238 ; [*030E.0020.0004.3238][.A132.0020.0004.3238][*030F.0020.001F.3238] # PARENTHESIZED IDEOGRAPH LABOR +3229 ; [*030E.0020.0004.3229][.B57B.0020.0004.3229][*030F.0020.001F.3229] # PARENTHESIZED IDEOGRAPH TEN +323F ; [*030E.0020.0004.323F][.C10A.0020.0004.323F][*030F.0020.001F.323F] # PARENTHESIZED IDEOGRAPH ALLIANCE +3234 ; [*030E.0020.0004.3234][.A8EB.0020.0004.3234][*030F.0020.001F.3234] # PARENTHESIZED IDEOGRAPH NAME +323A ; [*030E.0020.0004.323A][.9589.0020.0004.323A][*030F.0020.001F.323A] # PARENTHESIZED IDEOGRAPH CALL +3223 ; [*030E.0020.0004.3223][.B715.0020.0004.3223][*030F.0020.001F.3223] # PARENTHESIZED IDEOGRAPH FOUR +322F ; [*030E.0020.0004.322F][.BB2D.0020.0004.322F][*030F.0020.001F.322F] # PARENTHESIZED IDEOGRAPH EARTH +323B ; [*030E.0020.0004.323B][.C304.0020.0004.323B][*030F.0020.001F.323B] # PARENTHESIZED IDEOGRAPH STUDY +3230 ; [*030E.0020.0004.3230][.B272.0020.0004.3230][*030F.0020.001F.3230] # PARENTHESIZED IDEOGRAPH SUN +322A ; [*030E.0020.0004.322A][.CA96.0020.0004.322A][*030F.0020.001F.322A] # PARENTHESIZED IDEOGRAPH MOON +3232 ; [*030E.0020.0004.3232][.C8D2.0020.0004.3232][*030F.0020.001F.3232] # PARENTHESIZED IDEOGRAPH HAVE +322D ; [*030E.0020.0004.322D][.A98A.0020.0004.322D][*030F.0020.001F.322D] # PARENTHESIZED IDEOGRAPH WOOD +3231 ; [*030E.0020.0004.3231][.CF58.0020.0004.3231][*030F.0020.001F.3231] # PARENTHESIZED IDEOGRAPH STOCK +322C ; [*030E.0020.0004.322C][.B6BA.0020.0004.322C][*030F.0020.001F.322C] # PARENTHESIZED IDEOGRAPH WATER +322B ; [*030E.0020.0004.322B][.97BC.0020.0004.322B][*030F.0020.001F.322B] # PARENTHESIZED IDEOGRAPH FIRE +3235 ; [*030E.0020.0004.3235][.B980.0020.0004.3235][*030F.0020.001F.3235] # PARENTHESIZED IDEOGRAPH SPECIAL +323C ; [*030E.0020.0004.323C][.999B.0020.0004.323C][*030F.0020.001F.323C] # PARENTHESIZED IDEOGRAPH SUPERVISE +3233 ; [*030E.0020.0004.3233][.B4AC.0020.0004.3233][*030F.0020.001F.3233] # PARENTHESIZED IDEOGRAPH SOCIETY +3237 ; [*030E.0020.0004.3237][.CFAD.0020.0004.3237][*030F.0020.001F.3237] # PARENTHESIZED IDEOGRAPH CONGRATULATION +3240 ; [*030E.0020.0004.3240][.98D6.0020.0004.3240][*030F.0020.001F.3240] # PARENTHESIZED IDEOGRAPH FESTIVAL +3242 ; [*030E.0020.0004.3242][.D0D4.0020.0004.3242][*030F.0020.001F.3242] # PARENTHESIZED IDEOGRAPH SELF +3243 ; [*030E.0020.0004.3243][.CE51.0020.0004.3243][*030F.0020.001F.3243] # PARENTHESIZED IDEOGRAPH REACH +3236 ; [*030E.0020.0004.3236][.844F.0020.0004.3236][*030F.0020.001F.3236] # PARENTHESIZED IDEOGRAPH FINANCIAL +323E ; [*030E.0020.0004.323E][.D0A3.0020.0004.323E][*030F.0020.001F.323E] # PARENTHESIZED IDEOGRAPH RESOURCE +322E ; [*030E.0020.0004.322E][.9B96.0020.0004.322E][*030F.0020.001F.322E] # PARENTHESIZED IDEOGRAPH METAL +3358 ; [.1599.0020.0004.3358][.8B60.0020.0004.3358] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO +33E9 ; [.159A.0020.0004.33E9][.1599.0020.0004.33E9][.B272.0020.001F.33E9] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TEN +32C9 ; [.159A.0020.0004.32C9][.1599.0020.0004.32C9][.CA96.0020.001F.32C9] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR OCTOBER +3362 ; [.159A.0020.0004.3362][.1599.0020.0004.3362][.8B60.0020.001F.3362] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TEN +33EA ; [.159A.0020.0004.33EA][.159A.0020.0004.33EA][.B272.0020.001F.33EA] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ELEVEN +32CA ; [.159A.0020.0004.32CA][.159A.0020.0004.32CA][.CA96.0020.001F.32CA] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR NOVEMBER +3363 ; [.159A.0020.0004.3363][.159A.0020.0004.3363][.8B60.0020.001F.3363] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ELEVEN +33EB ; [.159A.0020.0004.33EB][.159B.0020.0004.33EB][.B272.0020.001F.33EB] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWELVE +32CB ; [.159A.0020.0004.32CB][.159B.0020.0004.32CB][.CA96.0020.001F.32CB] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER +3364 ; [.159A.0020.0004.3364][.159B.0020.0004.3364][.8B60.0020.001F.3364] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWELVE +33EC ; [.159A.0020.0004.33EC][.159C.0020.0004.33EC][.B272.0020.001F.33EC] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTEEN +3365 ; [.159A.0020.0004.3365][.159C.0020.0004.3365][.8B60.0020.001F.3365] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THIRTEEN +33ED ; [.159A.0020.0004.33ED][.159D.0020.0004.33ED][.B272.0020.001F.33ED] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FOURTEEN +3366 ; [.159A.0020.0004.3366][.159D.0020.0004.3366][.8B60.0020.001F.3366] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOURTEEN +33EE ; [.159A.0020.0004.33EE][.159E.0020.0004.33EE][.B272.0020.001F.33EE] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FIFTEEN +3367 ; [.159A.0020.0004.3367][.159E.0020.0004.3367][.8B60.0020.001F.3367] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIFTEEN +33EF ; [.159A.0020.0004.33EF][.159F.0020.0004.33EF][.B272.0020.001F.33EF] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SIXTEEN +3368 ; [.159A.0020.0004.3368][.159F.0020.0004.3368][.8B60.0020.001F.3368] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIXTEEN +33F0 ; [.159A.0020.0004.33F0][.15A0.0020.0004.33F0][.B272.0020.001F.33F0] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SEVENTEEN +3369 ; [.159A.0020.0004.3369][.15A0.0020.0004.3369][.8B60.0020.001F.3369] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SEVENTEEN +33F1 ; [.159A.0020.0004.33F1][.15A1.0020.0004.33F1][.B272.0020.001F.33F1] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY EIGHTEEN +336A ; [.159A.0020.0004.336A][.15A1.0020.0004.336A][.8B60.0020.001F.336A] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR EIGHTEEN +33F2 ; [.159A.0020.0004.33F2][.15A2.0020.0004.33F2][.B272.0020.001F.33F2] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY NINETEEN +336B ; [.159A.0020.0004.336B][.15A2.0020.0004.336B][.8B60.0020.001F.336B] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR NINETEEN +33E0 ; [.159A.0020.0004.33E0][.B272.0020.0004.33E0] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE +32C0 ; [.159A.0020.0004.32C0][.CA96.0020.0004.32C0] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY +3359 ; [.159A.0020.0004.3359][.8B60.0020.0004.3359] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ONE +33F3 ; [.159B.0020.0004.33F3][.1599.0020.0004.33F3][.B272.0020.001F.33F3] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY +336C ; [.159B.0020.0004.336C][.1599.0020.0004.336C][.8B60.0020.001F.336C] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY +33F4 ; [.159B.0020.0004.33F4][.159A.0020.0004.33F4][.B272.0020.001F.33F4] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-ONE +336D ; [.159B.0020.0004.336D][.159A.0020.0004.336D][.8B60.0020.001F.336D] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-ONE +33F5 ; [.159B.0020.0004.33F5][.159B.0020.0004.33F5][.B272.0020.001F.33F5] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-TWO +336E ; [.159B.0020.0004.336E][.159B.0020.0004.336E][.8B60.0020.001F.336E] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-TWO +33F6 ; [.159B.0020.0004.33F6][.159C.0020.0004.33F6][.B272.0020.001F.33F6] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-THREE +336F ; [.159B.0020.0004.336F][.159C.0020.0004.336F][.8B60.0020.001F.336F] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-THREE +33F7 ; [.159B.0020.0004.33F7][.159D.0020.0004.33F7][.B272.0020.001F.33F7] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-FOUR +3370 ; [.159B.0020.0004.3370][.159D.0020.0004.3370][.8B60.0020.001F.3370] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR +33F8 ; [.159B.0020.0004.33F8][.159E.0020.0004.33F8][.B272.0020.001F.33F8] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-FIVE +33F9 ; [.159B.0020.0004.33F9][.159F.0020.0004.33F9][.B272.0020.001F.33F9] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-SIX +33FA ; [.159B.0020.0004.33FA][.15A0.0020.0004.33FA][.B272.0020.001F.33FA] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-SEVEN +33FB ; [.159B.0020.0004.33FB][.15A1.0020.0004.33FB][.B272.0020.001F.33FB] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-EIGHT +33FC ; [.159B.0020.0004.33FC][.15A2.0020.0004.33FC][.B272.0020.001F.33FC] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-NINE +33E1 ; [.159B.0020.0004.33E1][.B272.0020.0004.33E1] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWO +32C1 ; [.159B.0020.0004.32C1][.CA96.0020.0004.32C1] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR FEBRUARY +335A ; [.159B.0020.0004.335A][.8B60.0020.0004.335A] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWO +33FD ; [.159C.0020.0004.33FD][.1599.0020.0004.33FD][.B272.0020.001F.33FD] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY +33FE ; [.159C.0020.0004.33FE][.159A.0020.0004.33FE][.B272.0020.001F.33FE] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE +33E2 ; [.159C.0020.0004.33E2][.B272.0020.0004.33E2] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THREE +32C2 ; [.159C.0020.0004.32C2][.CA96.0020.0004.32C2] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR MARCH +335B ; [.159C.0020.0004.335B][.8B60.0020.0004.335B] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THREE +33E3 ; [.159D.0020.0004.33E3][.B272.0020.0004.33E3] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FOUR +32C3 ; [.159D.0020.0004.32C3][.CA96.0020.0004.32C3] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR APRIL +335C ; [.159D.0020.0004.335C][.8B60.0020.0004.335C] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOUR +33E4 ; [.159E.0020.0004.33E4][.B272.0020.0004.33E4] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FIVE +32C4 ; [.159E.0020.0004.32C4][.CA96.0020.0004.32C4] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR MAY +335D ; [.159E.0020.0004.335D][.8B60.0020.0004.335D] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIVE +33E5 ; [.159F.0020.0004.33E5][.B272.0020.0004.33E5] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SIX +32C5 ; [.159F.0020.0004.32C5][.CA96.0020.0004.32C5] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR JUNE +335E ; [.159F.0020.0004.335E][.8B60.0020.0004.335E] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIX +33E6 ; [.15A0.0020.0004.33E6][.B272.0020.0004.33E6] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SEVEN +32C6 ; [.15A0.0020.0004.32C6][.CA96.0020.0004.32C6] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR JULY +335F ; [.15A0.0020.0004.335F][.8B60.0020.0004.335F] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SEVEN +33E7 ; [.15A1.0020.0004.33E7][.B272.0020.0004.33E7] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY EIGHT +32C7 ; [.15A1.0020.0004.32C7][.CA96.0020.0004.32C7] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR AUGUST +3360 ; [.15A1.0020.0004.3360][.8B60.0020.0004.3360] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR EIGHT +33E8 ; [.15A2.0020.0004.33E8][.B272.0020.0004.33E8] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY NINE +32C8 ; [.15A2.0020.0004.32C8][.CA96.0020.0004.32C8] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR SEPTEMBER +3361 ; [.15A2.0020.0004.3361][.8B60.0020.0004.3361] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR NINE +1F241 ; [*0356.0020.0004.1F241][.B35F.0020.0004.1F241][*0357.0020.001F.1F241] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E09 +1F242 ; [*0356.0020.0004.1F242][.8E0E.0020.0004.1F242][*0357.0020.001F.1F242] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E8C +1F247 ; [*0356.0020.0004.1F247][.B548.0020.0004.1F247][*0357.0020.001F.1F247] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-52DD +1F243 ; [*0356.0020.0004.1F243][.8049.0020.0004.1F243][*0357.0020.001F.1F243] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-5B89 +1F245 ; [*0356.0020.0004.1F245][.89A6.0020.0004.1F245][*0357.0020.001F.1F245] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6253 +1F248 ; [*0356.0020.0004.1F248][.811B.0020.0004.1F248][*0357.0020.001F.1F248] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F240 ; [*0356.0020.0004.1F240][.81FC.0020.0004.1F240][*0357.0020.001F.1F240] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C +1F244 ; [*0356.0020.0004.1F244][.8B60.0020.0004.1F244][*0357.0020.001F.1F244] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-70B9 +1F246 ; [*0356.0020.0004.1F246][.8A91.0020.0004.1F246][*0357.0020.001F.1F246] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-76D7 +2F00 ; [.C5CF.0020.0004.2F00] # KANGXI RADICAL ONE +3192 ; [.C5CF.0020.0014.3192] # IDEOGRAPHIC ANNOTATION ONE MARK +3280 ; [.C5CF.0020.0006.3280] # CIRCLED IDEOGRAPH ONE +1F229 ; [.C5CF.0020.001C.1F229] # SQUARED CJK UNIFIED IDEOGRAPH-4E00 +319C ; [.8BE6.0020.0014.319C] # IDEOGRAPHIC ANNOTATION FOURTH MARK +3286 ; [.AE03.0020.0006.3286] # CIRCLED IDEOGRAPH SEVEN +3194 ; [.B35F.0020.0014.3194] # IDEOGRAPHIC ANNOTATION THREE MARK +3282 ; [.B35F.0020.0006.3282] # CIRCLED IDEOGRAPH THREE +1F22A ; [.B35F.0020.001C.1F22A] # SQUARED CJK UNIFIED IDEOGRAPH-4E09 +3196 ; [.B46D.0020.0014.3196] # IDEOGRAPHIC ANNOTATION TOP MARK +32A4 ; [.B46D.0020.0006.32A4] # CIRCLED IDEOGRAPH HIGH +3198 ; [.BF75.0020.0014.3198] # IDEOGRAPHIC ANNOTATION BOTTOM MARK +32A6 ; [.BF75.0020.0006.32A6] # CIRCLED IDEOGRAPH LOW +319B ; [.8382.0020.0014.319B] # IDEOGRAPHIC ANNOTATION THIRD MARK +2F01 ; [.937A.0020.0004.2F01] # KANGXI RADICAL LINE +3197 ; [.CECC.0020.0014.3197] # IDEOGRAPHIC ANNOTATION MIDDLE MARK +32A5 ; [.CECC.0020.0006.32A5] # CIRCLED IDEOGRAPH CENTRE +1F22D ; [.CECC.0020.001C.1F22D] # SQUARED CJK UNIFIED IDEOGRAPH-4E2D +2F02 ; [.CF8B.0020.0004.2F02] # KANGXI RADICAL DOT +2F03 ; [.AD44.0020.0004.2F03] # KANGXI RADICAL SLASH +2F04 ; [.C643.0020.0004.2F04] # KANGXI RADICAL SECOND +319A ; [.C643.0020.0014.319A] # IDEOGRAPHIC ANNOTATION SECOND MARK +3288 ; [.9C92.0020.0006.3288] # CIRCLED IDEOGRAPH NINE +2F05 ; [.9D9D.0020.0004.2F05] # KANGXI RADICAL HOOK +2F06 ; [.8E0E.0020.0004.2F06] # KANGXI RADICAL TWO +3193 ; [.8E0E.0020.0014.3193] # IDEOGRAPHIC ANNOTATION TWO MARK +3281 ; [.8E0E.0020.0006.3281] # CIRCLED IDEOGRAPH TWO +1F214 ; [.8E0E.0020.001C.1F214] # SQUARED CJK UNIFIED IDEOGRAPH-4E8C +3284 ; [.BDF1.0020.0006.3284] # CIRCLED IDEOGRAPH FIVE +2F07 ; [.BAE9.0020.0004.2F07] # KANGXI RADICAL LID +1F218 ; [.9A8F.0020.001C.1F218] # SQUARED CJK UNIFIED IDEOGRAPH-4EA4 +2F08 ; [.B22F.0020.0004.2F08] # KANGXI RADICAL MAN +319F ; [.B22F.0020.0014.319F] # IDEOGRAPHIC ANNOTATION MAN MARK +32AD ; [.AE82.0020.0006.32AD] # CIRCLED IDEOGRAPH ENTERPRISE +32A1 ; [.C1F9.0020.0006.32A1] # CIRCLED IDEOGRAPH REST +329D ; [.C8A5.0020.0006.329D] # CIRCLED IDEOGRAPH EXCELLENT +2F09 ; [.8DE5.0020.0004.2F09] # KANGXI RADICAL LEGS +2F0A ; [.B2E8.0020.0004.2F0A] # KANGXI RADICAL ENTER +2F0B ; [.80C6.0020.0004.2F0B] # KANGXI RADICAL EIGHT +3287 ; [.80C6.0020.0006.3287] # CIRCLED IDEOGRAPH EIGHT +3285 ; [.A49F.0020.0006.3285] # CIRCLED IDEOGRAPH SIX +2F0C ; [.9C60.0020.0004.2F0C] # KANGXI RADICAL DOWN BOX +1F21E ; [.CB49.0020.001C.1F21E] # SQUARED CJK UNIFIED IDEOGRAPH-518D +2F0D ; [.A832.0020.0004.2F0D] # KANGXI RADICAL COVER +32A2 ; [.C132.0020.0006.32A2] # CIRCLED IDEOGRAPH COPY +2F0E ; [.837B.0020.0004.2F0E] # KANGXI RADICAL ICE +2F0F ; [.989A.0020.0004.2F0F] # KANGXI RADICAL TABLE +2F10 ; [.AF3D.0020.0004.2F10] # KANGXI RADICAL OPEN BOX +2F11 ; [.8A71.0020.0004.2F11] # KANGXI RADICAL KNIFE +1F220 ; [.8779.0020.001C.1F220] # SQUARED CJK UNIFIED IDEOGRAPH-521D +1F21C ; [.AF1F.0020.001C.1F21C] # SQUARED CJK UNIFIED IDEOGRAPH-524D +1F239 ; [.9162.0020.001C.1F239] # SQUARED CJK UNIFIED IDEOGRAPH-5272 +2F12 ; [.A237.0020.0004.2F12] # KANGXI RADICAL POWER +3298 ; [.A132.0020.0006.3298] # CIRCLED IDEOGRAPH LABOR +2F13 ; [.8175.0020.0004.2F13] # KANGXI RADICAL WRAP +2F14 ; [.8238.0020.0004.2F14] # KANGXI RADICAL SPOON +2F15 ; [.8E8F.0020.0004.2F15] # KANGXI RADICAL RIGHT OPEN BOX +2F16 ; [.BF01.0020.0004.2F16] # KANGXI RADICAL HIDING ENCLOSURE +32A9 ; [.C5D4.0020.0006.32A9] # CIRCLED IDEOGRAPH MEDICINE +2F17 ; [.B57B.0020.0004.2F17] # KANGXI RADICAL TEN +3038 ; [.B57B.0020.0004.3038] # HANGZHOU NUMERAL TEN +3289 ; [.B57B.0020.0006.3289] # CIRCLED IDEOGRAPH TEN +3039 ; [.AAA8.0020.0004.3039] # HANGZHOU NUMERAL TWENTY +303A ; [.B342.0020.0004.303A] # HANGZHOU NUMERAL THIRTY +32AF ; [.C10A.0020.0006.32AF] # CIRCLED IDEOGRAPH ALLIANCE +2F18 ; [.8410.0020.0004.2F18] # KANGXI RADICAL DIVINATION +2F19 ; [.9B2A.0020.0004.2F19] # KANGXI RADICAL SEAL +329E ; [.C7AF.0020.0006.329E] # CIRCLED IDEOGRAPH PRINT +2F1A ; [.85AC.0020.0004.2F1A] # KANGXI RADICAL CLIFF +2F1B ; [.B6E4.0020.0004.2F1B] # KANGXI RADICAL PRIVATE +2F1C ; [.C8E8.0020.0004.2F1C] # KANGXI RADICAL AGAIN +1F212 ; [.B6A1.0020.001C.1F212] # SQUARED CJK UNIFIED IDEOGRAPH-53CC +2F1D ; [.9F2E.0020.0004.2F1D] # KANGXI RADICAL MOUTH +1F251 ; [.9ED7.0020.0006.1F251] # CIRCLED IDEOGRAPH ACCEPT +32A8 ; [.C8E9.0020.0006.32A8] # CIRCLED IDEOGRAPH RIGHT +1F22E ; [.C8E9.0020.001C.1F22E] # SQUARED CJK UNIFIED IDEOGRAPH-53F3 +1F234 ; [.9499.0020.001C.1F234] # SQUARED CJK UNIFIED IDEOGRAPH-5408 +3294 ; [.A8EB.0020.0006.3294] # CIRCLED IDEOGRAPH NAME +1F225 ; [.8816.0020.001C.1F225] # SQUARED CJK UNIFIED IDEOGRAPH-5439 +3244 ; [.BD6F.0020.0006.3244] # CIRCLED IDEOGRAPH QUESTION +1F23A ; [.C804.0020.001C.1F23A] # SQUARED CJK UNIFIED IDEOGRAPH-55B6 +2F1E ; [.BC8F.0020.0004.2F1E] # KANGXI RADICAL ENCLOSURE +3195 ; [.B715.0020.0014.3195] # IDEOGRAPHIC ANNOTATION FOUR MARK +3283 ; [.B715.0020.0006.3283] # CIRCLED IDEOGRAPH FOUR +2F1F ; [.BB2D.0020.0004.2F1F] # KANGXI RADICAL EARTH +328F ; [.BB2D.0020.0006.328F] # CIRCLED IDEOGRAPH EARTH +319E ; [.8AAF.0020.0014.319E] # IDEOGRAPHIC ANNOTATION EARTH MARK +2F20 ; [.B5A9.0020.0004.2F20] # KANGXI RADICAL SCHOLAR +1F224 ; [.B526.0020.001C.1F224] # SQUARED CJK UNIFIED IDEOGRAPH-58F0 +2F21 ; [.CE2D.0020.0004.2F21] # KANGXI RADICAL GO +2F22 ; [.B7D9.0020.0004.2F22] # KANGXI RADICAL GO SLOWLY +2F23 ; [.BE4B.0020.0004.2F23] # KANGXI RADICAL EVENING +1F215 ; [.8D1C.0020.001C.1F215] # SQUARED CJK UNIFIED IDEOGRAPH-591A +32B0 ; [.C59A.0020.0006.32B0] # CIRCLED IDEOGRAPH NIGHT +2F24 ; [.89A7.0020.0004.2F24] # KANGXI RADICAL BIG +337D ; [.89A7.0020.001C.337D][.CDCB.0020.001F.337D] # SQUARE ERA NAME TAISYOU +319D ; [.B9F9.0020.0014.319D] # IDEOGRAPHIC ANNOTATION HEAVEN MARK +1F217 ; [.B9F9.0020.001C.1F217] # SQUARED CJK UNIFIED IDEOGRAPH-5929 +2F25 ; [.AB5E.0020.0004.2F25] # KANGXI RADICAL WOMAN +329B ; [.AB5E.0020.0006.329B] # CIRCLED IDEOGRAPH FEMALE +2F26 ; [.D0E1.0020.0004.2F26] # KANGXI RADICAL CHILD +1F211 ; [.D0D3.0020.001C.1F211] # SQUARED CJK UNIFIED IDEOGRAPH-5B57 +32AB ; [.C304.0020.0006.32AB] # CIRCLED IDEOGRAPH STUDY +2F27 ; [.A858.0020.0004.2F27] # KANGXI RADICAL ROOF +32AA ; [.D0E4.0020.0006.32AA] # CIRCLED IDEOGRAPH RELIGION +2F28 ; [.8955.0020.0004.2F28] # KANGXI RADICAL INCH +2F29 ; [.C0DE.0020.0004.2F29] # KANGXI RADICAL SMALL +2F2A ; [.C8AC.0020.0004.2F2A] # KANGXI RADICAL LAME +2F2B ; [.B554.0020.0004.2F2B] # KANGXI RADICAL CORPSE +2F2C ; [.85F7.0020.0004.2F2C] # KANGXI RADICAL SPROUT +2F2D ; [.B3FD.0020.0004.2F2D] # KANGXI RADICAL MOUNTAIN +2F2E ; [.87DC.0020.0004.2F2E] # KANGXI RADICAL RIVER +2F2F ; [.91CB.0020.0004.2F2F] # KANGXI RADICAL WORK +32A7 ; [.D19D.0020.0006.32A7] # CIRCLED IDEOGRAPH LEFT +1F22C ; [.D19D.0020.001C.1F22C] # SQUARED CJK UNIFIED IDEOGRAPH-5DE6 +2F30 ; [.989B.0020.0004.2F30] # KANGXI RADICAL ONESELF +2F31 ; [.9B91.0020.0004.2F31] # KANGXI RADICAL TURBAN +2F32 ; [.90EE.0020.0004.2F32] # KANGXI RADICAL DRY +337B ; [.AD6D.0020.001C.337B][.866D.0020.001F.337B] # SQUARE ERA NAME HEISEI +2F33 ; [.C50D.0020.0004.2F33] # KANGXI RADICAL SHORT THREAD +3245 ; [.C8EA.0020.0006.3245] # CIRCLED IDEOGRAPH KINDERGARTEN +2F34 ; [.9307.0020.0004.2F34] # KANGXI RADICAL DOTTED CLIFF +2F35 ; [.C792.0020.0004.2F35] # KANGXI RADICAL LONG STRIDE +2F36 ; [.91E7.0020.0004.2F36] # KANGXI RADICAL TWO HANDS +2F37 ; [.C66C.0020.0004.2F37] # KANGXI RADICAL SHOOT +2F38 ; [.91CC.0020.0004.2F38] # KANGXI RADICAL BOW +2F39 ; [.98AF.0020.0004.2F39] # KANGXI RADICAL SNOUT +2F3A ; [.B3FE.0020.0004.2F3A] # KANGXI RADICAL BRISTLE +2F3B ; [.86F1.0020.0004.2F3B] # KANGXI RADICAL STEP +1F21D ; [.957C.0020.001C.1F21D] # SQUARED CJK UNIFIED IDEOGRAPH-5F8C +1F250 ; [.8AB1.0020.0006.1F250] # CIRCLED IDEOGRAPH ADVANTAGE +2F3C ; [.C178.0020.0004.2F3C] # KANGXI RADICAL HEART +2F3D ; [.9153.0020.0004.2F3D] # KANGXI RADICAL HALBERD +2F3E ; [.95D9.0020.0004.2F3E] # KANGXI RADICAL DOOR +2F3F ; [.B60C.0020.0004.2F3F] # KANGXI RADICAL HAND +1F210 ; [.B60C.0020.001C.1F210] # SQUARED CJK UNIFIED IDEOGRAPH-624B +1F231 ; [.89A6.0020.001C.1F231] # SQUARED CJK UNIFIED IDEOGRAPH-6253 +1F227 ; [.BAEB.0020.001C.1F227] # SQUARED CJK UNIFIED IDEOGRAPH-6295 +1F22F ; [.CE40.0020.001C.1F22F] # SQUARED CJK UNIFIED IDEOGRAPH-6307 +1F228 ; [.8420.0020.001C.1F228] # SQUARED CJK UNIFIED IDEOGRAPH-6355 +2F40 ; [.CDDA.0020.0004.2F40] # KANGXI RADICAL BRANCH +2F41 ; [.ADCC.0020.0004.2F41] # KANGXI RADICAL RAP +2F42 ; [.BD44.0020.0004.2F42] # KANGXI RADICAL SCRIPT +3246 ; [.BD44.0020.0006.3246] # CIRCLED IDEOGRAPH SCHOOL +2F43 ; [.8C5E.0020.0004.2F43] # KANGXI RADICAL DIPPER +1F21B ; [.A385.0020.001C.1F21B] # SQUARED CJK UNIFIED IDEOGRAPH-6599 +2F44 ; [.9B93.0020.0004.2F44] # KANGXI RADICAL AXE +1F21F ; [.C188.0020.001C.1F21F] # SQUARED CJK UNIFIED IDEOGRAPH-65B0 +2F45 ; [.8E90.0020.0004.2F45] # KANGXI RADICAL SQUARE +2F46 ; [.BDD2.0020.0004.2F46] # KANGXI RADICAL NOT +2F47 ; [.B272.0020.0004.2F47] # KANGXI RADICAL SUN +3290 ; [.B272.0020.0006.3290] # CIRCLED IDEOGRAPH SUN +337E ; [.A8EC.0020.001C.337E][.CE5C.0020.001F.337E] # SQUARE ERA NAME MEIZI +1F219 ; [.C83C.0020.001C.1F219] # SQUARED CJK UNIFIED IDEOGRAPH-6620 +337C ; [.CCD5.0020.001C.337C][.949E.0020.001F.337C] # SQUARE ERA NAME SYOUWA +2F48 ; [.CA8E.0020.0004.2F48] # KANGXI RADICAL SAY +2F49 ; [.CA96.0020.0004.2F49] # KANGXI RADICAL MOON +328A ; [.CA96.0020.0006.328A] # CIRCLED IDEOGRAPH MOON +1F237 ; [.CA96.0020.001C.1F237] # SQUARED CJK UNIFIED IDEOGRAPH-6708 +3292 ; [.C8D2.0020.0006.3292] # CIRCLED IDEOGRAPH HAVE +1F236 ; [.C8D2.0020.001C.1F236] # SQUARED CJK UNIFIED IDEOGRAPH-6709 +2F4A ; [.A98A.0020.0004.2F4A] # KANGXI RADICAL TREE +328D ; [.A98A.0020.0006.328D] # CIRCLED IDEOGRAPH WOOD +3291 ; [.CF58.0020.0006.3291] # CIRCLED IDEOGRAPH STOCK +337F ; [.CF58.0020.001C.337F][.B5B3.0020.001F.337F][.9737.0020.001F.337F][.B4AC.0020.001F.337F] # SQUARE CORPORATION +2F4B ; [.AF4C.0020.0004.2F4B] # KANGXI RADICAL LACK +2F4C ; [.CE2E.0020.0004.2F4C] # KANGXI RADICAL STOP +32A3 ; [.CDCB.0020.0006.32A3] # CIRCLED IDEOGRAPH CORRECT +2F4D ; [.89B3.0020.0004.2F4D] # KANGXI RADICAL DEATH +2F4E ; [.B623.0020.0004.2F4E] # KANGXI RADICAL WEAPON +2F4F ; [.BDD3.0020.0004.2F4F] # KANGXI RADICAL DO NOT +2E9F ; [.A97A.0020.0004.2E9F] # CJK RADICAL MOTHER +2F50 ; [.8239.0020.0004.2F50] # KANGXI RADICAL COMPARE +2F51 ; [.A71D.0020.0004.2F51] # KANGXI RADICAL FUR +2F52 ; [.B5AA.0020.0004.2F52] # KANGXI RADICAL CLAN +2F53 ; [.AE99.0020.0004.2F53] # KANGXI RADICAL STEAM +2F54 ; [.B6BA.0020.0004.2F54] # KANGXI RADICAL WATER +328C ; [.B6BA.0020.0006.328C] # CIRCLED IDEOGRAPH WATER +329F ; [.CFA5.0020.0006.329F] # CIRCLED IDEOGRAPH ATTENTION +1F235 ; [.A6DD.0020.001C.1F235] # SQUARED CJK UNIFIED IDEOGRAPH-6E80 +1F226 ; [.C445.0020.001C.1F226] # SQUARED CJK UNIFIED IDEOGRAPH-6F14 +2F55 ; [.97BC.0020.0004.2F55] # KANGXI RADICAL FIRE +328B ; [.97BC.0020.0006.328B] # CIRCLED IDEOGRAPH FIRE +1F21A ; [.BDE3.0020.001C.1F21A] # SQUARED CJK UNIFIED IDEOGRAPH-7121 +2F56 ; [.CCDE.0020.0004.2F56] # KANGXI RADICAL CLAW +2F57 ; [.9046.0020.0004.2F57] # KANGXI RADICAL FATHER +2F58 ; [.C51B.0020.0004.2F58] # KANGXI RADICAL DOUBLE X +2F59 ; [.ABD3.0020.0004.2F59] # KANGXI RADICAL HALF TREE TRUNK +2F5A ; [.AD1C.0020.0004.2F5A] # KANGXI RADICAL SLICE +2F5B ; [.C397.0020.0004.2F5B] # KANGXI RADICAL FANG +2F5C ; [.AB21.0020.0004.2F5C] # KANGXI RADICAL COW +3295 ; [.B980.0020.0006.3295] # CIRCLED IDEOGRAPH SPECIAL +2F5D ; [.B1B4.0020.0004.2F5D] # KANGXI RADICAL DOG +2F5E ; [.C2C8.0020.0004.2F5E] # KANGXI RADICAL PROFOUND +2F5F ; [.C99A.0020.0004.2F5F] # KANGXI RADICAL JADE +2F60 ; [.928E.0020.0004.2F60] # KANGXI RADICAL MELON +2F61 ; [.BBE5.0020.0004.2F61] # KANGXI RADICAL TILE +2F62 ; [.90BE.0020.0004.2F62] # KANGXI RADICAL SWEET +2F63 ; [.B523.0020.0004.2F63] # KANGXI RADICAL LIFE +1F222 ; [.B523.0020.001C.1F222] # SQUARED CJK UNIFIED IDEOGRAPH-751F +2F64 ; [.C894.0020.0004.2F64] # KANGXI RADICAL USE +2F65 ; [.BA01.0020.0004.2F65] # KANGXI RADICAL FIELD +3199 ; [.9955.0020.0014.3199] # IDEOGRAPHIC ANNOTATION FIRST MARK +1F238 ; [.B4C6.0020.001C.1F238] # SQUARED CJK UNIFIED IDEOGRAPH-7533 +329A ; [.A9E0.0020.0006.329A] # CIRCLED IDEOGRAPH MALE +2F66 ; [.ACE2.0020.0004.2F66] # KANGXI RADICAL BOLT OF CLOTH +2F67 ; [.AA2F.0020.0004.2F67] # KANGXI RADICAL SICKNESS +2F68 ; [.83AC.0020.0004.2F68] # KANGXI RADICAL DOTTED TENT +2F69 ; [.810B.0020.0004.2F69] # KANGXI RADICAL WHITE +2F6A ; [.ACBB.0020.0004.2F6A] # KANGXI RADICAL SKIN +2F6B ; [.A8CF.0020.0004.2F6B] # KANGXI RADICAL DISH +32AC ; [.999B.0020.0006.32AC] # CIRCLED IDEOGRAPH SUPERVISE +2F6C ; [.A98D.0020.0004.2F6C] # KANGXI RADICAL EYE +2F6D ; [.A71E.0020.0004.2F6D] # KANGXI RADICAL SPEAR +2F6E ; [.B59D.0020.0004.2F6E] # KANGXI RADICAL ARROW +2F6F ; [.B57D.0020.0004.2F6F] # KANGXI RADICAL STONE +2F70 ; [.B5B0.0020.0004.2F70] # KANGXI RADICAL SPIRIT +3293 ; [.B4AC.0020.0006.3293] # CIRCLED IDEOGRAPH SOCIETY +3297 ; [.CFAD.0020.0006.3297] # CIRCLED IDEOGRAPH CONGRATULATION +1F232 ; [.9BD6.0020.001C.1F232] # SQUARED CJK UNIFIED IDEOGRAPH-7981 +2F71 ; [.B2AC.0020.0004.2F71] # KANGXI RADICAL TRACK +2F72 ; [.9498.0020.0004.2F72] # KANGXI RADICAL GRAIN +3299 ; [.A83C.0020.0006.3299] # CIRCLED IDEOGRAPH SECRET +2F73 ; [.C301.0020.0004.2F73] # KANGXI RADICAL CAVE +1F233 ; [.9F18.0020.001C.1F233] # SQUARED CJK UNIFIED IDEOGRAPH-7A7A +2F74 ; [.A23B.0020.0004.2F74] # KANGXI RADICAL STAND +2F75 ; [.CF74.0020.0004.2F75] # KANGXI RADICAL BAMBOO +3247 ; [.CDBB.0020.0006.3247] # CIRCLED IDEOGRAPH KOTO +2F76 ; [.A821.0020.0004.2F76] # KANGXI RADICAL RICE +2F77 ; [.A833.0020.0004.2F77] # KANGXI RADICAL SILK +1F221 ; [.CEDC.0020.001C.1F221] # SQUARED CJK UNIFIED IDEOGRAPH-7D42 +2F78 ; [.8F9E.0020.0004.2F78] # KANGXI RADICAL JAR +2F79 ; [.BC56.0020.0004.2F79] # KANGXI RADICAL NET +2F7A ; [.C4C4.0020.0004.2F7A] # KANGXI RADICAL SHEEP +2F7B ; [.C975.0020.0004.2F7B] # KANGXI RADICAL FEATHER +2F7C ; [.A149.0020.0004.2F7C] # KANGXI RADICAL OLD +2F7D ; [.8DE6.0020.0004.2F7D] # KANGXI RADICAL AND +2F7E ; [.A19B.0020.0004.2F7E] # KANGXI RADICAL PLOW +2F7F ; [.8E00.0020.0004.2F7F] # KANGXI RADICAL EAR +2F80 ; [.C99D.0020.0004.2F80] # KANGXI RADICAL BRUSH +2F81 ; [.B2C1.0020.0004.2F81] # KANGXI RADICAL MEAT +2F82 ; [.8612.0020.0004.2F82] # KANGXI RADICAL MINISTER +2F83 ; [.D0D4.0020.0004.2F83] # KANGXI RADICAL SELF +2F84 ; [.CE51.0020.0004.2F84] # KANGXI RADICAL ARRIVE +2F85 ; [.9CA1.0020.0004.2F85] # KANGXI RADICAL MORTAR +2F86 ; [.B4A3.0020.0004.2F86] # KANGXI RADICAL TONGUE +2F87 ; [.87EF.0020.0004.2F87] # KANGXI RADICAL OPPOSE +2F88 ; [.CF09.0020.0004.2F88] # KANGXI RADICAL BOAT +2F89 ; [.91A3.0020.0004.2F89] # KANGXI RADICAL STOPPING +2F8A ; [.B3A0.0020.0004.2F8A] # KANGXI RADICAL COLOR +2F8B ; [.84B1.0020.0004.2F8B] # KANGXI RADICAL GRASS +2F8C ; [.9588.0020.0004.2F8C] # KANGXI RADICAL TIGER +2F8D ; [.872E.0020.0004.2F8D] # KANGXI RADICAL INSECT +2F8E ; [.C317.0020.0004.2F8E] # KANGXI RADICAL BLOOD +2F8F ; [.C1BA.0020.0004.2F8F] # KANGXI RADICAL WALK ENCLOSURE +2F90 ; [.C5D3.0020.0004.2F90] # KANGXI RADICAL CLOTHES +2F91 ; [.C3B8.0020.0004.2F91] # KANGXI RADICAL WEST +2F92 ; [.99FE.0020.0004.2F92] # KANGXI RADICAL SEE +2F93 ; [.9AB8.0020.0004.2F93] # KANGXI RADICAL HORN +1F216 ; [.9B6C.0020.001C.1F216] # SQUARED CJK UNIFIED IDEOGRAPH-89E3 +2F94 ; [.C3F5.0020.0004.2F94] # KANGXI RADICAL SPEECH +2F95 ; [.924B.0020.0004.2F95] # KANGXI RADICAL VALLEY +2F96 ; [.8C5F.0020.0004.2F96] # KANGXI RADICAL BEAN +2F97 ; [.B59F.0020.0004.2F97] # KANGXI RADICAL PIG +2F98 ; [.CE56.0020.0004.2F98] # KANGXI RADICAL BADGER +2F99 ; [.81CA.0020.0004.2F99] # KANGXI RADICAL SHELL +3296 ; [.844F.0020.0006.3296] # CIRCLED IDEOGRAPH FINANCIAL +1F223 ; [.8E87.0020.001C.1F223] # SQUARED CJK UNIFIED IDEOGRAPH-8CA9 +32AE ; [.D0A3.0020.0006.32AE] # CIRCLED IDEOGRAPH RESOURCE +2F9A ; [.86F6.0020.0004.2F9A] # KANGXI RADICAL RED +2F9B ; [.D138.0020.0004.2F9B] # KANGXI RADICAL RUN +1F230 ; [.D138.0020.001C.1F230] # SQUARED CJK UNIFIED IDEOGRAPH-8D70 +2F9C ; [.D141.0020.0004.2F9C] # KANGXI RADICAL FOOT +2F9D ; [.B4CA.0020.0004.2F9D] # KANGXI RADICAL BODY +2F9E ; [.85ED.0020.0004.2F9E] # KANGXI RADICAL CART +2F9F ; [.C17D.0020.0004.2F9F] # KANGXI RADICAL BITTER +2FA0 ; [.8616.0020.0004.2FA0] # KANGXI RADICAL MORNING +2FA1 ; [.8858.0020.0004.2FA1] # KANGXI RADICAL WALK +1F22B ; [.C8C4.0020.001C.1F22B] # SQUARED CJK UNIFIED IDEOGRAPH-904A +329C ; [.B5EC.0020.0006.329C] # CIRCLED IDEOGRAPH SUITABLE +2FA2 ; [.C682.0020.0004.2FA2] # KANGXI RADICAL CITY +2FA3 ; [.C8D6.0020.0004.2FA3] # KANGXI RADICAL WINE +2FA4 ; [.82F0.0020.0004.2FA4] # KANGXI RADICAL DISTINGUISH +2FA5 ; [.A21E.0020.0004.2FA5] # KANGXI RADICAL VILLAGE +2FA6 ; [.9B96.0020.0004.2FA6] # KANGXI RADICAL GOLD +328E ; [.9B96.0020.0006.328E] # CIRCLED IDEOGRAPH METAL +2FA7 ; [.CCB5.0020.0004.2FA7] # KANGXI RADICAL LONG +2FA8 ; [.A7B4.0020.0004.2FA8] # KANGXI RADICAL GATE +2FA9 ; [.904E.0020.0004.2FA9] # KANGXI RADICAL MOUND +2FAA ; [.A24C.0020.0004.2FAA] # KANGXI RADICAL SLAVE +2FAB ; [.D014.0020.0004.2FAB] # KANGXI RADICAL SHORT TAILED BIRD +2FAC ; [.C976.0020.0004.2FAC] # KANGXI RADICAL RAIN +2FAD ; [.B063.0020.0004.2FAD] # KANGXI RADICAL BLUE +2FAE ; [.8EB9.0020.0004.2FAE] # KANGXI RADICAL WRONG +2FAF ; [.A882.0020.0004.2FAF] # KANGXI RADICAL FACE +2FB0 ; [.9173.0020.0004.2FB0] # KANGXI RADICAL LEATHER +2FB1 ; [.BC9A.0020.0004.2FB1] # KANGXI RADICAL TANNED LEATHER +2FB2 ; [.9C9A.0020.0004.2FB2] # KANGXI RADICAL LEEK +2FB3 ; [.C745.0020.0004.2FB3] # KANGXI RADICAL SOUND +2FB4 ; [.C59E.0020.0004.2FB4] # KANGXI RADICAL LEAF +32A0 ; [.C084.0020.0006.32A0] # CIRCLED IDEOGRAPH ITEM +2FB5 ; [.8F57.0020.0004.2FB5] # KANGXI RADICAL WIND +2FB6 ; [.8EBA.0020.0004.2FB6] # KANGXI RADICAL FLY +2FB7 ; [.B58A.0020.0004.2FB7] # KANGXI RADICAL EAT +2FB8 ; [.B60F.0020.0004.2FB8] # KANGXI RADICAL HEAD +2FB9 ; [.C044.0020.0004.2FB9] # KANGXI RADICAL FRAGRANT +2FBA ; [.A690.0020.0004.2FBA] # KANGXI RADICAL HORSE +2FBB ; [.924E.0020.0004.2FBB] # KANGXI RADICAL BONE +2FBC ; [.9124.0020.0004.2FBC] # KANGXI RADICAL TALL +2FBD ; [.830C.0020.0004.2FBD] # KANGXI RADICAL HAIR +2FBE ; [.8C65.0020.0004.2FBE] # KANGXI RADICAL FIGHT +2FBF ; [.85BA.0020.0004.2FBF] # KANGXI RADICAL SACRIFICIAL WINE +2FC0 ; [.9176.0020.0004.2FC0] # KANGXI RADICAL CAULDRON +2FC1 ; [.934D.0020.0004.2FC1] # KANGXI RADICAL GHOST +2FC2 ; [.C936.0020.0004.2FC2] # KANGXI RADICAL FISH +2FC3 ; [.AABB.0020.0004.2FC3] # KANGXI RADICAL BIRD +2FC4 ; [.A552.0020.0004.2FC4] # KANGXI RADICAL SALT +2FC5 ; [.A57C.0020.0004.2FC5] # KANGXI RADICAL DEER +2FC6 ; [.A6C0.0020.0004.2FC6] # KANGXI RADICAL WHEAT +2FC7 ; [.A687.0020.0004.2FC7] # KANGXI RADICAL HEMP +2FC8 ; [.96C2.0020.0004.2FC8] # KANGXI RADICAL YELLOW +2FC9 ; [.B656.0020.0004.2FC9] # KANGXI RADICAL MILLET +2FCA ; [.94F2.0020.0004.2FCA] # KANGXI RADICAL BLACK +2FCB ; [.CE4C.0020.0004.2FCB] # KANGXI RADICAL EMBROIDERY +2FCC ; [.A87C.0020.0004.2FCC] # KANGXI RADICAL FROG +2FCD ; [.8BF6.0020.0004.2FCD] # KANGXI RADICAL TRIPOD +2FCE ; [.9261.0020.0004.2FCE] # KANGXI RADICAL DRUM +2FCF ; [.B659.0020.0004.2FCF] # KANGXI RADICAL RAT +2FD0 ; [.8237.0020.0004.2FD0] # KANGXI RADICAL NOSE +2FD1 ; [.AE64.0020.0004.2FD1] # KANGXI RADICAL EVEN +2FD2 ; [.86F0.0020.0004.2FD2] # KANGXI RADICAL TOOTH +2FD3 ; [.A4C6.0020.0004.2FD3] # KANGXI RADICAL DRAGON +2FD4 ; [.9336.0020.0004.2FD4] # KANGXI RADICAL TURTLE +2EF3 ; [.9317.0020.0004.2EF3] # CJK RADICAL C-SIMPLIFIED TURTLE +2FD5 ; [.CAB5.0020.0004.2FD5] # KANGXI RADICAL FLUTE ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/zh_strk.pl b/cpan/Unicode-Collate/Collate/Locale/zh_strk.pl index de277a18ac..d3340215c8 100644 --- a/cpan/Unicode-Collate/Collate/Locale/zh_strk.pl +++ b/cpan/Unicode-Collate/Collate/Locale/zh_strk.pl @@ -208,58 +208,543 @@ use Unicode::Collate::CJK::Stroke; 0075 0308 ; [.17E9.0025.0002.00FC] # LATIN SMALL LETTER U WITH DIAERESIS 00DC ; [.17E9.0025.0008.00DC] # LATIN CAPITAL LETTER U WITH DIAERESIS 0055 0308 ; [.17E9.0025.0008.00DC] # LATIN CAPITAL LETTER U WITH DIAERESIS -2E84 ; [.8006.0020.0002.2E84] # CJK RADICAL SECOND THREE -3006 ; [.8011.0020.0002.3006] # IDEOGRAPHIC CLOSING MARK -3007 ; [.8012.0020.0002.3007] # IDEOGRAPHIC NUMBER ZERO -3021 ; [.8013.0020.0002.3021] # HANGZHOU NUMERAL ONE -3025 ; [.8014.0020.0002.3025] # HANGZHOU NUMERAL FIVE -303B ; [.8015.0020.0002.303B] # VERTICAL IDEOGRAPHIC ITERATION MARK -2E86 ; [.802F.0020.0002.2E86] # CJK RADICAL BOX -2E87 ; [.8033.0020.0002.2E87] # CJK RADICAL TABLE -2E88 ; [.8036.0020.0002.2E88] # CJK RADICAL KNIFE ONE -2E8A ; [.8040.0020.0002.2E8A] # CJK RADICAL DIVINATION -2E80 ; [.8045.0020.0002.2E80] # CJK RADICAL REPEAT -2ECF ; [.8049.0020.0002.2ECF] # CJK RADICAL CITY -2ED6 ; [.804A.0020.0002.2ED6] # CJK RADICAL MOUND TWO -3022 ; [.804C.0020.0002.3022] # HANGZHOU NUMERAL TWO -3024 ; [.804D.0020.0002.3024] # HANGZHOU NUMERAL FOUR -3026 ; [.804E.0020.0002.3026] # HANGZHOU NUMERAL SIX -2E8C ; [.808C.0020.0002.2E8C] # CJK RADICAL SMALL ONE -2E8D ; [.808D.0020.0002.2E8D] # CJK RADICAL SMALL TWO -2E95 ; [.80A5.0020.0002.2E95] # CJK RADICAL SNOUT TWO -2EBE ; [.80B0.0020.0002.2EBE] # CJK RADICAL GRASS ONE -2ECC ; [.80B2.0020.0002.2ECC] # CJK RADICAL SIMPLIFIED WALK -3005 ; [.80B8.0020.0002.3005] # IDEOGRAPHIC ITERATION MARK -3023 ; [.80B9.0020.0002.3023] # HANGZHOU NUMERAL THREE -3027 ; [.80BA.0020.0002.3027] # HANGZHOU NUMERAL SEVEN -2E9C ; [.8157.0020.0002.2E9C] # CJK RADICAL SUN -2E9D ; [.815A.0020.0002.2E9D] # CJK RADICAL MOON -2EA5 ; [.816C.0020.0002.2EA5] # CJK RADICAL PAW TWO -2EA7 ; [.8176.0020.0002.2EA7] # CJK RADICAL COW -2EBC ; [.8181.0020.0002.2EBC] # CJK RADICAL MEAT -2ECD ; [.818A.0020.0002.2ECD] # CJK RADICAL WALK ONE -3028 ; [.818F.0020.0002.3028] # HANGZHOU NUMERAL EIGHT -3029 ; [.8190.0020.0002.3029] # HANGZHOU NUMERAL NINE -2EAA ; [.82AE.0020.0002.2EAA] # CJK RADICAL BOLT OF CLOTH -2EAC ; [.82BA.0020.0002.2EAC] # CJK RADICAL SPIRIT ONE -2EAE ; [.84AC.0020.0002.2EAE] # CJK RADICAL BAMBOO -2EB6 ; [.84C0.0020.0002.2EB6] # CJK RADICAL SHEEP -2EC6 ; [.8873.0020.0002.2EC6] # CJK RADICAL SIMPLIFIED HORN -2ECA ; [.8893.0020.0002.2ECA] # CJK RADICAL FOOT -FA24 ; [.8E18.0020.0002.FA24] # CJK COMPATIBILITY IDEOGRAPH-FA24 -2ED7 ; [.8E53.0020.0002.2ED7] # CJK RADICAL RAIN -2EDE ; [.94A3.0020.0002.2EDE] # CJK RADICAL EAT TWO -2EE3 ; [.94BA.0020.0002.2EE3] # CJK RADICAL BONE -FA0F ; [.95E7.0020.0002.FA0F] # CJK COMPATIBILITY IDEOGRAPH-FA0F -FA21 ; [.A26E.0020.0002.FA21] # CJK COMPATIBILITY IDEOGRAPH-FA21 -FA23 ; [.A307.0020.0002.FA23] # CJK COMPATIBILITY IDEOGRAPH-FA23 -FA11 ; [.A597.0020.0002.FA11] # CJK COMPATIBILITY IDEOGRAPH-FA11 -FA0E ; [.ACC3.0020.0002.FA0E] # CJK COMPATIBILITY IDEOGRAPH-FA0E -FA13 ; [.AEEB.0020.0002.FA13] # CJK COMPATIBILITY IDEOGRAPH-FA13 -FA29 ; [.B3A7.0020.0002.FA29] # CJK COMPATIBILITY IDEOGRAPH-FA29 -FA14 ; [.B682.0020.0002.FA14] # CJK COMPATIBILITY IDEOGRAPH-FA14 -FA27 ; [.C164.0020.0002.FA27] # CJK COMPATIBILITY IDEOGRAPH-FA27 -FA28 ; [.C738.0020.0002.FA28] # CJK COMPATIBILITY IDEOGRAPH-FA28 -FA1F ; [.D22A.0020.0002.FA1F] # CJK COMPATIBILITY IDEOGRAPH-FA1F +FDD0 2801 ; [.8000.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-1> +2E84 ; [.8007.0020.0002.2E84] # CJK RADICAL SECOND THREE +3006 ; [.8012.0020.0002.3006] # IDEOGRAPHIC CLOSING MARK +3007 ; [.8013.0020.0002.3007] # IDEOGRAPHIC NUMBER ZERO +3021 ; [.8014.0020.0002.3021] # HANGZHOU NUMERAL ONE +3025 ; [.8015.0020.0002.3025] # HANGZHOU NUMERAL FIVE +303B ; [.8016.0020.0002.303B] # VERTICAL IDEOGRAPHIC ITERATION MARK +FDD0 2802 ; [.8017.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-2> +2E86 ; [.8031.0020.0002.2E86] # CJK RADICAL BOX +2E87 ; [.8035.0020.0002.2E87] # CJK RADICAL TABLE +2E88 ; [.8038.0020.0002.2E88] # CJK RADICAL KNIFE ONE +2E8A ; [.8042.0020.0002.2E8A] # CJK RADICAL DIVINATION +2E80 ; [.8047.0020.0002.2E80] # CJK RADICAL REPEAT +2ECF ; [.804B.0020.0002.2ECF] # CJK RADICAL CITY +2ED6 ; [.804C.0020.0002.2ED6] # CJK RADICAL MOUND TWO +3022 ; [.804E.0020.0002.3022] # HANGZHOU NUMERAL TWO +3024 ; [.804F.0020.0002.3024] # HANGZHOU NUMERAL FOUR +3026 ; [.8050.0020.0002.3026] # HANGZHOU NUMERAL SIX +FDD0 2803 ; [.8051.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-12> +2E8C ; [.808F.0020.0002.2E8C] # CJK RADICAL SMALL ONE +2E8D ; [.8090.0020.0002.2E8D] # CJK RADICAL SMALL TWO +2E95 ; [.80A8.0020.0002.2E95] # CJK RADICAL SNOUT TWO +2EBE ; [.80B3.0020.0002.2EBE] # CJK RADICAL GRASS ONE +2ECC ; [.80B5.0020.0002.2ECC] # CJK RADICAL SIMPLIFIED WALK +3005 ; [.80BB.0020.0002.3005] # IDEOGRAPHIC ITERATION MARK +3023 ; [.80BC.0020.0002.3023] # HANGZHOU NUMERAL THREE +3027 ; [.80BD.0020.0002.3027] # HANGZHOU NUMERAL SEVEN +FDD0 2804 ; [.80BE.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-3> +2E9C ; [.815B.0020.0002.2E9C] # CJK RADICAL SUN +2E9D ; [.815E.0020.0002.2E9D] # CJK RADICAL MOON +2EA5 ; [.8170.0020.0002.2EA5] # CJK RADICAL PAW TWO +2EA7 ; [.817A.0020.0002.2EA7] # CJK RADICAL COW +2EBC ; [.8185.0020.0002.2EBC] # CJK RADICAL MEAT +2ECD ; [.818E.0020.0002.2ECD] # CJK RADICAL WALK ONE +3028 ; [.8193.0020.0002.3028] # HANGZHOU NUMERAL EIGHT +3029 ; [.8194.0020.0002.3029] # HANGZHOU NUMERAL NINE +FDD0 2805 ; [.8195.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-13> +2EAA ; [.82B3.0020.0002.2EAA] # CJK RADICAL BOLT OF CLOTH +2EAC ; [.82BF.0020.0002.2EAC] # CJK RADICAL SPIRIT ONE +FDD0 2806 ; [.82E7.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-23> +2EAE ; [.84B2.0020.0002.2EAE] # CJK RADICAL BAMBOO +2EB6 ; [.84C6.0020.0002.2EB6] # CJK RADICAL SHEEP +FDD0 2807 ; [.8529.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-123> +2EC6 ; [.887A.0020.0002.2EC6] # CJK RADICAL SIMPLIFIED HORN +2ECA ; [.889A.0020.0002.2ECA] # CJK RADICAL FOOT +FDD0 2808 ; [.88FA.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-4> +FA24 ; [.8E20.0020.0002.FA24] # CJK COMPATIBILITY IDEOGRAPH-FA24 +2ED7 ; [.8E5B.0020.0002.2ED7] # CJK RADICAL RAIN +FDD0 2809 ; [.8E7C.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-14> +2EDE ; [.94AC.0020.0002.2EDE] # CJK RADICAL EAT TWO +2EE3 ; [.94C3.0020.0002.2EE3] # CJK RADICAL BONE +FDD0 280A ; [.94CA.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-24> +FA0F ; [.95F1.0020.0002.FA0F] # CJK COMPATIBILITY IDEOGRAPH-FA0F +FDD0 280B ; [.9C16.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-124> +FA21 ; [.A279.0020.0002.FA21] # CJK COMPATIBILITY IDEOGRAPH-FA21 +FA23 ; [.A312.0020.0002.FA23] # CJK COMPATIBILITY IDEOGRAPH-FA23 +FDD0 280C ; [.A41D.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-34> +FA11 ; [.A5A3.0020.0002.FA11] # CJK COMPATIBILITY IDEOGRAPH-FA11 +FDD0 280D ; [.AC7F.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-134> +FA0E ; [.ACD0.0020.0002.FA0E] # CJK COMPATIBILITY IDEOGRAPH-FA0E +FA13 ; [.AEF8.0020.0002.FA13] # CJK COMPATIBILITY IDEOGRAPH-FA13 +FA29 ; [.B3B4.0020.0002.FA29] # CJK COMPATIBILITY IDEOGRAPH-FA29 +FDD0 280E ; [.B44A.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-234> +FA14 ; [.B690.0020.0002.FA14] # CJK COMPATIBILITY IDEOGRAPH-FA14 +FDD0 280F ; [.BB5A.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-1234> +FA27 ; [.C173.0020.0002.FA27] # CJK COMPATIBILITY IDEOGRAPH-FA27 +FDD0 2810 ; [.C276.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-5> +FA28 ; [.C748.0020.0002.FA28] # CJK COMPATIBILITY IDEOGRAPH-FA28 +FDD0 2811 ; [.C860.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-15> +FDD0 2812 ; [.CD3D.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-25> +FDD0 2813 ; [.D0E6.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-125> +FA1F ; [.D23D.0020.0002.FA1F] # CJK COMPATIBILITY IDEOGRAPH-FA1F +FDD0 2814 ; [.D41B.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-35> +FDD0 2815 ; [.D68C.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-135> +FDD0 2816 ; [.D846.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-235> +FDD0 2817 ; [.D993.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-1235> +FDD0 2818 ; [.DA95.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-45> +FDD0 2819 ; [.DB3D.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-145> +FDD0 281A ; [.DBB4.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-245> +FDD0 281B ; [.DBF1.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-1245> +FDD0 281C ; [.DC29.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-345> +FDD0 281D ; [.DC4D.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-1345> +FDD0 281E ; [.DC5C.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-2345> +FDD0 281F ; [.DC6C.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-12345> +FDD0 2820 ; [.DC71.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-6> +FDD0 2821 ; [.DC76.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-16> +FDD0 2823 ; [.DC7C.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-126> +FDD0 2824 ; [.DC7E.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-36> +FDD0 2827 ; [.DC80.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-1236> +FDD0 2830 ; [.DC82.0020.0002.FDD0] # <noncharacter-FDD0, BRAILLE PATTERN DOTS-56> +3220 ; [*030E.0020.0004.3220][.8001.0020.0004.3220][*030F.0020.001F.3220] # PARENTHESIZED IDEOGRAPH ONE +3226 ; [*030E.0020.0004.3226][.801A.0020.0004.3226][*030F.0020.001F.3226] # PARENTHESIZED IDEOGRAPH SEVEN +3222 ; [*030E.0020.0004.3222][.8054.0020.0004.3222][*030F.0020.001F.3222] # PARENTHESIZED IDEOGRAPH THREE +3228 ; [*030E.0020.0004.3228][.8027.0020.0004.3228][*030F.0020.001F.3228] # PARENTHESIZED IDEOGRAPH NINE +3221 ; [*030E.0020.0004.3221][.802A.0020.0004.3221][*030F.0020.001F.3221] # PARENTHESIZED IDEOGRAPH TWO +3224 ; [*030E.0020.0004.3224][.80D7.0020.0004.3224][*030F.0020.001F.3224] # PARENTHESIZED IDEOGRAPH FIVE +3239 ; [*030E.0020.0004.3239][.81C2.0020.0004.3239][*030F.0020.001F.3239] # PARENTHESIZED IDEOGRAPH REPRESENT +323D ; [*030E.0020.0004.323D][.831C.0020.0004.323D][*030F.0020.001F.323D] # PARENTHESIZED IDEOGRAPH ENTERPRISE +3241 ; [*030E.0020.0004.3241][.832C.0020.0004.3241][*030F.0020.001F.3241] # PARENTHESIZED IDEOGRAPH REST +3227 ; [*030E.0020.0004.3227][.8030.0020.0004.3227][*030F.0020.001F.3227] # PARENTHESIZED IDEOGRAPH EIGHT +3225 ; [*030E.0020.0004.3225][.80F5.0020.0004.3225][*030F.0020.001F.3225] # PARENTHESIZED IDEOGRAPH SIX +3238 ; [*030E.0020.0004.3238][.85A8.0020.0004.3238][*030F.0020.001F.3238] # PARENTHESIZED IDEOGRAPH LABOR +3229 ; [*030E.0020.0004.3229][.8041.0020.0004.3229][*030F.0020.001F.3229] # PARENTHESIZED IDEOGRAPH TEN +323F ; [*030E.0020.0004.323F][.899D.0020.0004.323F][*030F.0020.001F.323F] # PARENTHESIZED IDEOGRAPH ALLIANCE +3234 ; [*030E.0020.0004.3234][.839D.0020.0004.3234][*030F.0020.001F.3234] # PARENTHESIZED IDEOGRAPH NAME +323A ; [*030E.0020.0004.323A][.89D5.0020.0004.323A][*030F.0020.001F.323A] # PARENTHESIZED IDEOGRAPH CALL +3223 ; [*030E.0020.0004.3223][.822D.0020.0004.3223][*030F.0020.001F.3223] # PARENTHESIZED IDEOGRAPH FOUR +322F ; [*030E.0020.0004.322F][.8081.0020.0004.322F][*030F.0020.001F.322F] # PARENTHESIZED IDEOGRAPH EARTH +323B ; [*030E.0020.0004.323B][.8A8D.0020.0004.323B][*030F.0020.001F.323B] # PARENTHESIZED IDEOGRAPH STUDY +3230 ; [*030E.0020.0004.3230][.815C.0020.0004.3230][*030F.0020.001F.3230] # PARENTHESIZED IDEOGRAPH SUN +322A ; [*030E.0020.0004.322A][.815F.0020.0004.322A][*030F.0020.001F.322A] # PARENTHESIZED IDEOGRAPH MOON +3232 ; [*030E.0020.0004.3232][.845A.0020.0004.3232][*030F.0020.001F.3232] # PARENTHESIZED IDEOGRAPH HAVE +322D ; [*030E.0020.0004.322D][.8160.0020.0004.322D][*030F.0020.001F.322D] # PARENTHESIZED IDEOGRAPH WOOD +3231 ; [*030E.0020.0004.3231][.979F.0020.0004.3231][*030F.0020.001F.3231] # PARENTHESIZED IDEOGRAPH STOCK +322C ; [*030E.0020.0004.322C][.816D.0020.0004.322C][*030F.0020.001F.322C] # PARENTHESIZED IDEOGRAPH WATER +322B ; [*030E.0020.0004.322B][.816E.0020.0004.322B][*030F.0020.001F.322B] # PARENTHESIZED IDEOGRAPH FIRE +3235 ; [*030E.0020.0004.3235][.9895.0020.0004.3235][*030F.0020.001F.3235] # PARENTHESIZED IDEOGRAPH SPECIAL +323C ; [*030E.0020.0004.323C][.B79A.0020.0004.323C][*030F.0020.001F.323C] # PARENTHESIZED IDEOGRAPH SUPERVISE +3233 ; [*030E.0020.0004.3233][.8D2E.0020.0004.3233][*030F.0020.001F.3233] # PARENTHESIZED IDEOGRAPH SOCIETY +3237 ; [*030E.0020.0004.3237][.9970.0020.0004.3237][*030F.0020.001F.3237] # PARENTHESIZED IDEOGRAPH CONGRATULATION +3240 ; [*030E.0020.0004.3240][.A100.0020.0004.3240][*030F.0020.001F.3240] # PARENTHESIZED IDEOGRAPH FESTIVAL +3242 ; [*030E.0020.0004.3242][.84D8.0020.0004.3242][*030F.0020.001F.3242] # PARENTHESIZED IDEOGRAPH SELF +3243 ; [*030E.0020.0004.3243][.84D9.0020.0004.3243][*030F.0020.001F.3243] # PARENTHESIZED IDEOGRAPH REACH +3236 ; [*030E.0020.0004.3236][.9B3B.0020.0004.3236][*030F.0020.001F.3236] # PARENTHESIZED IDEOGRAPH FINANCIAL +323E ; [*030E.0020.0004.323E][.B2A5.0020.0004.323E][*030F.0020.001F.323E] # PARENTHESIZED IDEOGRAPH RESOURCE +322E ; [*030E.0020.0004.322E][.8E33.0020.0004.322E][*030F.0020.001F.322E] # PARENTHESIZED IDEOGRAPH METAL +3358 ; [.1599.0020.0004.3358][.9216.0020.0004.3358] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO +33E9 ; [.159A.0020.0004.33E9][.1599.0020.0004.33E9][.815C.0020.001F.33E9] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TEN +32C9 ; [.159A.0020.0004.32C9][.1599.0020.0004.32C9][.815F.0020.001F.32C9] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR OCTOBER +3362 ; [.159A.0020.0004.3362][.1599.0020.0004.3362][.9216.0020.001F.3362] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TEN +33EA ; [.159A.0020.0004.33EA][.159A.0020.0004.33EA][.815C.0020.001F.33EA] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ELEVEN +32CA ; [.159A.0020.0004.32CA][.159A.0020.0004.32CA][.815F.0020.001F.32CA] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR NOVEMBER +3363 ; [.159A.0020.0004.3363][.159A.0020.0004.3363][.9216.0020.001F.3363] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ELEVEN +33EB ; [.159A.0020.0004.33EB][.159B.0020.0004.33EB][.815C.0020.001F.33EB] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWELVE +32CB ; [.159A.0020.0004.32CB][.159B.0020.0004.32CB][.815F.0020.001F.32CB] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER +3364 ; [.159A.0020.0004.3364][.159B.0020.0004.3364][.9216.0020.001F.3364] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWELVE +33EC ; [.159A.0020.0004.33EC][.159C.0020.0004.33EC][.815C.0020.001F.33EC] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTEEN +3365 ; [.159A.0020.0004.3365][.159C.0020.0004.3365][.9216.0020.001F.3365] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THIRTEEN +33ED ; [.159A.0020.0004.33ED][.159D.0020.0004.33ED][.815C.0020.001F.33ED] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FOURTEEN +3366 ; [.159A.0020.0004.3366][.159D.0020.0004.3366][.9216.0020.001F.3366] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOURTEEN +33EE ; [.159A.0020.0004.33EE][.159E.0020.0004.33EE][.815C.0020.001F.33EE] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FIFTEEN +3367 ; [.159A.0020.0004.3367][.159E.0020.0004.3367][.9216.0020.001F.3367] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIFTEEN +33EF ; [.159A.0020.0004.33EF][.159F.0020.0004.33EF][.815C.0020.001F.33EF] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SIXTEEN +3368 ; [.159A.0020.0004.3368][.159F.0020.0004.3368][.9216.0020.001F.3368] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIXTEEN +33F0 ; [.159A.0020.0004.33F0][.15A0.0020.0004.33F0][.815C.0020.001F.33F0] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SEVENTEEN +3369 ; [.159A.0020.0004.3369][.15A0.0020.0004.3369][.9216.0020.001F.3369] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SEVENTEEN +33F1 ; [.159A.0020.0004.33F1][.15A1.0020.0004.33F1][.815C.0020.001F.33F1] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY EIGHTEEN +336A ; [.159A.0020.0004.336A][.15A1.0020.0004.336A][.9216.0020.001F.336A] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR EIGHTEEN +33F2 ; [.159A.0020.0004.33F2][.15A2.0020.0004.33F2][.815C.0020.001F.33F2] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY NINETEEN +336B ; [.159A.0020.0004.336B][.15A2.0020.0004.336B][.9216.0020.001F.336B] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR NINETEEN +33E0 ; [.159A.0020.0004.33E0][.815C.0020.0004.33E0] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE +32C0 ; [.159A.0020.0004.32C0][.815F.0020.0004.32C0] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY +3359 ; [.159A.0020.0004.3359][.9216.0020.0004.3359] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ONE +33F3 ; [.159B.0020.0004.33F3][.1599.0020.0004.33F3][.815C.0020.001F.33F3] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY +336C ; [.159B.0020.0004.336C][.1599.0020.0004.336C][.9216.0020.001F.336C] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY +33F4 ; [.159B.0020.0004.33F4][.159A.0020.0004.33F4][.815C.0020.001F.33F4] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-ONE +336D ; [.159B.0020.0004.336D][.159A.0020.0004.336D][.9216.0020.001F.336D] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-ONE +33F5 ; [.159B.0020.0004.33F5][.159B.0020.0004.33F5][.815C.0020.001F.33F5] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-TWO +336E ; [.159B.0020.0004.336E][.159B.0020.0004.336E][.9216.0020.001F.336E] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-TWO +33F6 ; [.159B.0020.0004.33F6][.159C.0020.0004.33F6][.815C.0020.001F.33F6] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-THREE +336F ; [.159B.0020.0004.336F][.159C.0020.0004.336F][.9216.0020.001F.336F] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-THREE +33F7 ; [.159B.0020.0004.33F7][.159D.0020.0004.33F7][.815C.0020.001F.33F7] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-FOUR +3370 ; [.159B.0020.0004.3370][.159D.0020.0004.3370][.9216.0020.001F.3370] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR +33F8 ; [.159B.0020.0004.33F8][.159E.0020.0004.33F8][.815C.0020.001F.33F8] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-FIVE +33F9 ; [.159B.0020.0004.33F9][.159F.0020.0004.33F9][.815C.0020.001F.33F9] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-SIX +33FA ; [.159B.0020.0004.33FA][.15A0.0020.0004.33FA][.815C.0020.001F.33FA] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-SEVEN +33FB ; [.159B.0020.0004.33FB][.15A1.0020.0004.33FB][.815C.0020.001F.33FB] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-EIGHT +33FC ; [.159B.0020.0004.33FC][.15A2.0020.0004.33FC][.815C.0020.001F.33FC] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-NINE +33E1 ; [.159B.0020.0004.33E1][.815C.0020.0004.33E1] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWO +32C1 ; [.159B.0020.0004.32C1][.815F.0020.0004.32C1] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR FEBRUARY +335A ; [.159B.0020.0004.335A][.9216.0020.0004.335A] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWO +33FD ; [.159C.0020.0004.33FD][.1599.0020.0004.33FD][.815C.0020.001F.33FD] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY +33FE ; [.159C.0020.0004.33FE][.159A.0020.0004.33FE][.815C.0020.001F.33FE] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE +33E2 ; [.159C.0020.0004.33E2][.815C.0020.0004.33E2] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THREE +32C2 ; [.159C.0020.0004.32C2][.815F.0020.0004.32C2] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR MARCH +335B ; [.159C.0020.0004.335B][.9216.0020.0004.335B] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THREE +33E3 ; [.159D.0020.0004.33E3][.815C.0020.0004.33E3] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FOUR +32C3 ; [.159D.0020.0004.32C3][.815F.0020.0004.32C3] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR APRIL +335C ; [.159D.0020.0004.335C][.9216.0020.0004.335C] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOUR +33E4 ; [.159E.0020.0004.33E4][.815C.0020.0004.33E4] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FIVE +32C4 ; [.159E.0020.0004.32C4][.815F.0020.0004.32C4] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR MAY +335D ; [.159E.0020.0004.335D][.9216.0020.0004.335D] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIVE +33E5 ; [.159F.0020.0004.33E5][.815C.0020.0004.33E5] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SIX +32C5 ; [.159F.0020.0004.32C5][.815F.0020.0004.32C5] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR JUNE +335E ; [.159F.0020.0004.335E][.9216.0020.0004.335E] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIX +33E6 ; [.15A0.0020.0004.33E6][.815C.0020.0004.33E6] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SEVEN +32C6 ; [.15A0.0020.0004.32C6][.815F.0020.0004.32C6] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR JULY +335F ; [.15A0.0020.0004.335F][.9216.0020.0004.335F] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SEVEN +33E7 ; [.15A1.0020.0004.33E7][.815C.0020.0004.33E7] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY EIGHT +32C7 ; [.15A1.0020.0004.32C7][.815F.0020.0004.32C7] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR AUGUST +3360 ; [.15A1.0020.0004.3360][.9216.0020.0004.3360] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR EIGHT +33E8 ; [.15A2.0020.0004.33E8][.815C.0020.0004.33E8] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY NINE +32C8 ; [.15A2.0020.0004.32C8][.815F.0020.0004.32C8] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR SEPTEMBER +3361 ; [.15A2.0020.0004.3361][.9216.0020.0004.3361] # IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR NINE +1F241 ; [*0356.0020.0004.1F241][.8054.0020.0004.1F241][*0357.0020.001F.1F241] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E09 +1F242 ; [*0356.0020.0004.1F242][.802A.0020.0004.1F242][*0357.0020.001F.1F242] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E8C +1F247 ; [*0356.0020.0004.1F247][.A465.0020.0004.1F247][*0357.0020.001F.1F247] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-52DD +1F243 ; [*0356.0020.0004.1F243][.83F0.0020.0004.1F243][*0357.0020.001F.1F243] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-5B89 +1F245 ; [*0356.0020.0004.1F245][.8277.0020.0004.1F245][*0357.0020.001F.1F245] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6253 +1F248 ; [*0356.0020.0004.1F248][.9EDA.0020.0004.1F248][*0357.0020.001F.1F248] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F240 ; [*0356.0020.0004.1F240][.8283.0020.0004.1F240][*0357.0020.001F.1F240] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C +1F244 ; [*0356.0020.0004.1F244][.9216.0020.0004.1F244][*0357.0020.001F.1F244] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-70B9 +1F246 ; [*0356.0020.0004.1F246][.A0BD.0020.0004.1F246][*0357.0020.001F.1F246] # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-76D7 +2F00 ; [.8001.0020.0004.2F00] # KANGXI RADICAL ONE +3192 ; [.8001.0020.0014.3192] # IDEOGRAPHIC ANNOTATION ONE MARK +3280 ; [.8001.0020.0006.3280] # CIRCLED IDEOGRAPH ONE +1F229 ; [.8001.0020.001C.1F229] # SQUARED CJK UNIFIED IDEOGRAPH-4E00 +319C ; [.8018.0020.0014.319C] # IDEOGRAPHIC ANNOTATION FOURTH MARK +3286 ; [.801A.0020.0006.3286] # CIRCLED IDEOGRAPH SEVEN +3194 ; [.8054.0020.0014.3194] # IDEOGRAPHIC ANNOTATION THREE MARK +3282 ; [.8054.0020.0006.3282] # CIRCLED IDEOGRAPH THREE +1F22A ; [.8054.0020.001C.1F22A] # SQUARED CJK UNIFIED IDEOGRAPH-4E09 +3196 ; [.8055.0020.0014.3196] # IDEOGRAPHIC ANNOTATION TOP MARK +32A4 ; [.8055.0020.0006.32A4] # CIRCLED IDEOGRAPH HIGH +3198 ; [.8056.0020.0014.3198] # IDEOGRAPHIC ANNOTATION BOTTOM MARK +32A6 ; [.8056.0020.0006.32A6] # CIRCLED IDEOGRAPH LOW +319B ; [.819C.0020.0014.319B] # IDEOGRAPHIC ANNOTATION THIRD MARK +2F01 ; [.8002.0020.0004.2F01] # KANGXI RADICAL LINE +3197 ; [.80C6.0020.0014.3197] # IDEOGRAPHIC ANNOTATION MIDDLE MARK +32A5 ; [.80C6.0020.0006.32A5] # CIRCLED IDEOGRAPH CENTRE +1F22D ; [.80C6.0020.001C.1F22D] # SQUARED CJK UNIFIED IDEOGRAPH-4E2D +2F02 ; [.8003.0020.0004.2F02] # KANGXI RADICAL DOT +2F03 ; [.8004.0020.0004.2F03] # KANGXI RADICAL SLASH +2F04 ; [.8008.0020.0004.2F04] # KANGXI RADICAL SECOND +319A ; [.8008.0020.0014.319A] # IDEOGRAPHIC ANNOTATION SECOND MARK +3288 ; [.8027.0020.0006.3288] # CIRCLED IDEOGRAPH NINE +2F05 ; [.8010.0020.0004.2F05] # KANGXI RADICAL HOOK +2F06 ; [.802A.0020.0004.2F06] # KANGXI RADICAL TWO +3193 ; [.802A.0020.0014.3193] # IDEOGRAPHIC ANNOTATION TWO MARK +3281 ; [.802A.0020.0006.3281] # CIRCLED IDEOGRAPH TWO +1F214 ; [.802A.0020.001C.1F214] # SQUARED CJK UNIFIED IDEOGRAPH-4E8C +3284 ; [.80D7.0020.0006.3284] # CIRCLED IDEOGRAPH FIVE +2F07 ; [.802B.0020.0004.2F07] # KANGXI RADICAL LID +1F218 ; [.8300.0020.001C.1F218] # SQUARED CJK UNIFIED IDEOGRAPH-4EA4 +2F08 ; [.802C.0020.0004.2F08] # KANGXI RADICAL MAN +319F ; [.802C.0020.0014.319F] # IDEOGRAPHIC ANNOTATION MAN MARK +32AD ; [.831C.0020.0006.32AD] # CIRCLED IDEOGRAPH ENTERPRISE +32A1 ; [.832C.0020.0006.32A1] # CIRCLED IDEOGRAPH REST +329D ; [.C86C.0020.0006.329D] # CIRCLED IDEOGRAPH EXCELLENT +2F09 ; [.802E.0020.0004.2F09] # KANGXI RADICAL LEGS +2F0A ; [.802F.0020.0004.2F0A] # KANGXI RADICAL ENTER +2F0B ; [.8030.0020.0004.2F0B] # KANGXI RADICAL EIGHT +3287 ; [.8030.0020.0006.3287] # CIRCLED IDEOGRAPH EIGHT +3285 ; [.80F5.0020.0006.3285] # CIRCLED IDEOGRAPH SIX +2F0C ; [.8032.0020.0004.2F0C] # KANGXI RADICAL DOWN BOX +1F21E ; [.8356.0020.001C.1F21E] # SQUARED CJK UNIFIED IDEOGRAPH-518D +2F0D ; [.8033.0020.0004.2F0D] # KANGXI RADICAL COVER +32A2 ; [.81D8.0020.0006.32A2] # CIRCLED IDEOGRAPH COPY +2F0E ; [.8034.0020.0004.2F0E] # KANGXI RADICAL ICE +2F0F ; [.8036.0020.0004.2F0F] # KANGXI RADICAL TABLE +2F10 ; [.8037.0020.0004.2F10] # KANGXI RADICAL OPEN BOX +2F11 ; [.8039.0020.0004.2F11] # KANGXI RADICAL KNIFE +1F220 ; [.858C.0020.001C.1F220] # SQUARED CJK UNIFIED IDEOGRAPH-521D +1F21C ; [.8EE7.0020.001C.1F21C] # SQUARED CJK UNIFIED IDEOGRAPH-524D +1F239 ; [.A45F.0020.001C.1F239] # SQUARED CJK UNIFIED IDEOGRAPH-5272 +2F12 ; [.803C.0020.0004.2F12] # KANGXI RADICAL POWER +3298 ; [.85A8.0020.0006.3298] # CIRCLED IDEOGRAPH LABOR +2F13 ; [.803D.0020.0004.2F13] # KANGXI RADICAL WRAP +2F14 ; [.803E.0020.0004.2F14] # KANGXI RADICAL SPOON +2F15 ; [.803F.0020.0004.2F15] # KANGXI RADICAL RIGHT OPEN BOX +2F16 ; [.8040.0020.0004.2F16] # KANGXI RADICAL HIDING ENCLOSURE +32A9 ; [.85B0.0020.0006.32A9] # CIRCLED IDEOGRAPH MEDICINE +2F17 ; [.8041.0020.0004.2F17] # KANGXI RADICAL TEN +3038 ; [.8041.0020.0004.3038] # HANGZHOU NUMERAL TEN +3289 ; [.8041.0020.0006.3289] # CIRCLED IDEOGRAPH TEN +3039 ; [.8059.0020.0004.3039] # HANGZHOU NUMERAL TWENTY +303A ; [.8118.0020.0004.303A] # HANGZHOU NUMERAL THIRTY +32AF ; [.899D.0020.0006.32AF] # CIRCLED IDEOGRAPH ALLIANCE +2F18 ; [.8043.0020.0004.2F18] # KANGXI RADICAL DIVINATION +2F19 ; [.8044.0020.0004.2F19] # KANGXI RADICAL SEAL +329E ; [.8384.0020.0006.329E] # CIRCLED IDEOGRAPH PRINT +2F1A ; [.8045.0020.0004.2F1A] # KANGXI RADICAL CLIFF +2F1B ; [.8046.0020.0004.2F1B] # KANGXI RADICAL PRIVATE +2F1C ; [.8048.0020.0004.2F1C] # KANGXI RADICAL AGAIN +1F212 ; [.8128.0020.001C.1F212] # SQUARED CJK UNIFIED IDEOGRAPH-53CC +2F1D ; [.807F.0020.0004.2F1D] # KANGXI RADICAL MOUTH +1F251 ; [.8218.0020.0006.1F251] # CIRCLED IDEOGRAPH ACCEPT +32A8 ; [.821C.0020.0006.32A8] # CIRCLED IDEOGRAPH RIGHT +1F22E ; [.821C.0020.001C.1F22E] # SQUARED CJK UNIFIED IDEOGRAPH-53F3 +1F234 ; [.8398.0020.001C.1F234] # SQUARED CJK UNIFIED IDEOGRAPH-5408 +3294 ; [.839D.0020.0006.3294] # CIRCLED IDEOGRAPH NAME +1F225 ; [.85E1.0020.001C.1F225] # SQUARED CJK UNIFIED IDEOGRAPH-5439 +3244 ; [.9CC0.0020.0006.3244] # CIRCLED IDEOGRAPH QUESTION +1F23A ; [.9CFD.0020.001C.1F23A] # SQUARED CJK UNIFIED IDEOGRAPH-55B6 +2F1E ; [.8080.0020.0004.2F1E] # KANGXI RADICAL ENCLOSURE +3195 ; [.822D.0020.0014.3195] # IDEOGRAPHIC ANNOTATION FOUR MARK +3283 ; [.822D.0020.0006.3283] # CIRCLED IDEOGRAPH FOUR +2F1F ; [.8081.0020.0004.2F1F] # KANGXI RADICAL EARTH +328F ; [.8081.0020.0006.328F] # CIRCLED IDEOGRAPH EARTH +319E ; [.83B9.0020.0014.319E] # IDEOGRAPHIC ANNOTATION EARTH MARK +2F20 ; [.8082.0020.0004.2F20] # KANGXI RADICAL SCHOLAR +1F224 ; [.8649.0020.001C.1F224] # SQUARED CJK UNIFIED IDEOGRAPH-58F0 +2F21 ; [.8083.0020.0004.2F21] # KANGXI RADICAL GO +2F22 ; [.8084.0020.0004.2F22] # KANGXI RADICAL GO SLOWLY +2F23 ; [.8085.0020.0004.2F23] # KANGXI RADICAL EVENING +1F215 ; [.83C8.0020.001C.1F215] # SQUARED CJK UNIFIED IDEOGRAPH-591A +32B0 ; [.8A3E.0020.0006.32B0] # CIRCLED IDEOGRAPH NIGHT +2F24 ; [.8086.0020.0004.2F24] # KANGXI RADICAL BIG +337D ; [.8086.0020.001C.337D][.8288.0020.001F.337D] # SQUARE ERA NAME TAISYOU +319D ; [.8130.0020.0014.319D] # IDEOGRAPHIC ANNOTATION HEAVEN MARK +1F217 ; [.8130.0020.001C.1F217] # SQUARED CJK UNIFIED IDEOGRAPH-5929 +2F25 ; [.8088.0020.0004.2F25] # KANGXI RADICAL WOMAN +329B ; [.8088.0020.0006.329B] # CIRCLED IDEOGRAPH FEMALE +2F26 ; [.8089.0020.0004.2F26] # KANGXI RADICAL CHILD +1F211 ; [.83E8.0020.001C.1F211] # SQUARED CJK UNIFIED IDEOGRAPH-5B57 +32AB ; [.8A8D.0020.0006.32AB] # CIRCLED IDEOGRAPH STUDY +2F27 ; [.808D.0020.0004.2F27] # KANGXI RADICAL ROOF +32AA ; [.8A94.0020.0006.32AA] # CIRCLED IDEOGRAPH RELIGION +2F28 ; [.808E.0020.0004.2F28] # KANGXI RADICAL INCH +2F29 ; [.8091.0020.0004.2F29] # KANGXI RADICAL SMALL +2F2A ; [.8092.0020.0004.2F2A] # KANGXI RADICAL LAME +2F2B ; [.8095.0020.0004.2F2B] # KANGXI RADICAL CORPSE +2F2C ; [.8096.0020.0004.2F2C] # KANGXI RADICAL SPROUT +2F2D ; [.8097.0020.0004.2F2D] # KANGXI RADICAL MOUNTAIN +2F2E ; [.8098.0020.0004.2F2E] # KANGXI RADICAL RIVER +2F2F ; [.809B.0020.0004.2F2F] # KANGXI RADICAL WORK +32A7 ; [.8256.0020.0006.32A7] # CIRCLED IDEOGRAPH LEFT +1F22C ; [.8256.0020.001C.1F22C] # SQUARED CJK UNIFIED IDEOGRAPH-5DE6 +2F30 ; [.809C.0020.0004.2F30] # KANGXI RADICAL ONESELF +2F31 ; [.809F.0020.0004.2F31] # KANGXI RADICAL TURBAN +2F32 ; [.80A0.0020.0004.2F32] # KANGXI RADICAL DRY +337B ; [.825F.0020.001C.337B][.8435.0020.001F.337B] # SQUARE ERA NAME HEISEI +2F33 ; [.80A2.0020.0004.2F33] # KANGXI RADICAL SHORT THREAD +3245 ; [.8260.0020.0006.3245] # CIRCLED IDEOGRAPH KINDERGARTEN +2F34 ; [.80A3.0020.0004.2F34] # KANGXI RADICAL DOTTED CLIFF +2F35 ; [.80A4.0020.0004.2F35] # KANGXI RADICAL LONG STRIDE +2F36 ; [.80A5.0020.0004.2F36] # KANGXI RADICAL TWO HANDS +2F37 ; [.80A6.0020.0004.2F37] # KANGXI RADICAL SHOOT +2F38 ; [.80A7.0020.0004.2F38] # KANGXI RADICAL BOW +2F39 ; [.80A9.0020.0004.2F39] # KANGXI RADICAL SNOUT +2F3A ; [.80AB.0020.0004.2F3A] # KANGXI RADICAL BRISTLE +2F3B ; [.80AC.0020.0004.2F3B] # KANGXI RADICAL STEP +1F21D ; [.9050.0020.001C.1F21D] # SQUARED CJK UNIFIED IDEOGRAPH-5F8C +1F250 ; [.9E0E.0020.0006.1F250] # CIRCLED IDEOGRAPH ADVANTAGE +2F3C ; [.8149.0020.0004.2F3C] # KANGXI RADICAL HEART +2F3D ; [.814B.0020.0004.2F3D] # KANGXI RADICAL HALBERD +2F3E ; [.814C.0020.0004.2F3E] # KANGXI RADICAL DOOR +2F3F ; [.814F.0020.0004.2F3F] # KANGXI RADICAL HAND +1F210 ; [.814F.0020.001C.1F210] # SQUARED CJK UNIFIED IDEOGRAPH-624B +1F231 ; [.8277.0020.001C.1F231] # SQUARED CJK UNIFIED IDEOGRAPH-6253 +1F227 ; [.873A.0020.001C.1F227] # SQUARED CJK UNIFIED IDEOGRAPH-6295 +1F22F ; [.90B6.0020.001C.1F22F] # SQUARED CJK UNIFIED IDEOGRAPH-6307 +1F228 ; [.9724.0020.001C.1F228] # SQUARED CJK UNIFIED IDEOGRAPH-6355 +2F40 ; [.8152.0020.0004.2F40] # KANGXI RADICAL BRANCH +2F41 ; [.8153.0020.0004.2F41] # KANGXI RADICAL RAP +2F42 ; [.8155.0020.0004.2F42] # KANGXI RADICAL SCRIPT +3246 ; [.8155.0020.0006.3246] # CIRCLED IDEOGRAPH SCHOOL +2F43 ; [.8156.0020.0004.2F43] # KANGXI RADICAL DIPPER +1F21B ; [.9745.0020.001C.1F21B] # SQUARED CJK UNIFIED IDEOGRAPH-6599 +2F44 ; [.8157.0020.0004.2F44] # KANGXI RADICAL AXE +1F21F ; [.AE69.0020.001C.1F21F] # SQUARED CJK UNIFIED IDEOGRAPH-65B0 +2F45 ; [.8158.0020.0004.2F45] # KANGXI RADICAL SQUARE +2F46 ; [.8159.0020.0004.2F46] # KANGXI RADICAL NOT +2F47 ; [.815C.0020.0004.2F47] # KANGXI RADICAL SUN +3290 ; [.815C.0020.0006.3290] # CIRCLED IDEOGRAPH SUN +337E ; [.8BBF.0020.001C.337E][.8C54.0020.001F.337E] # SQUARE ERA NAME MEIZI +1F219 ; [.90F3.0020.001C.1F219] # SQUARED CJK UNIFIED IDEOGRAPH-6620 +337C ; [.9100.0020.001C.337C][.89E5.0020.001F.337C] # SQUARE ERA NAME SYOUWA +2F48 ; [.815D.0020.0004.2F48] # KANGXI RADICAL SAY +2F49 ; [.815F.0020.0004.2F49] # KANGXI RADICAL MOON +328A ; [.815F.0020.0006.328A] # CIRCLED IDEOGRAPH MOON +1F237 ; [.815F.0020.001C.1F237] # SQUARED CJK UNIFIED IDEOGRAPH-6708 +3292 ; [.845A.0020.0006.3292] # CIRCLED IDEOGRAPH HAVE +1F236 ; [.845A.0020.001C.1F236] # SQUARED CJK UNIFIED IDEOGRAPH-6709 +2F4A ; [.8160.0020.0004.2F4A] # KANGXI RADICAL TREE +328D ; [.8160.0020.0006.328D] # CIRCLED IDEOGRAPH WOOD +3291 ; [.979F.0020.0006.3291] # CIRCLED IDEOGRAPH STOCK +337F ; [.979F.0020.001C.337F][.841D.0020.001F.337F][.8335.0020.001F.337F][.8D2E.0020.001F.337F] # SQUARE CORPORATION +2F4B ; [.8163.0020.0004.2F4B] # KANGXI RADICAL LACK +2F4C ; [.8164.0020.0004.2F4C] # KANGXI RADICAL STOP +32A3 ; [.8288.0020.0006.32A3] # CIRCLED IDEOGRAPH CORRECT +2F4D ; [.8165.0020.0004.2F4D] # KANGXI RADICAL DEATH +2F4E ; [.8166.0020.0004.2F4E] # KANGXI RADICAL WEAPON +2F4F ; [.8167.0020.0004.2F4F] # KANGXI RADICAL DO NOT +2E9F ; [.828A.0020.0004.2E9F] # CJK RADICAL MOTHER +2F50 ; [.8169.0020.0004.2F50] # KANGXI RADICAL COMPARE +2F51 ; [.816A.0020.0004.2F51] # KANGXI RADICAL FUR +2F52 ; [.816B.0020.0004.2F52] # KANGXI RADICAL CLAN +2F53 ; [.816C.0020.0004.2F53] # KANGXI RADICAL STEAM +2F54 ; [.816D.0020.0004.2F54] # KANGXI RADICAL WATER +328C ; [.816D.0020.0006.328C] # CIRCLED IDEOGRAPH WATER +329F ; [.8C7F.0020.0006.329F] # CIRCLED IDEOGRAPH ATTENTION +1F235 ; [.A7C6.0020.001C.1F235] # SQUARED CJK UNIFIED IDEOGRAPH-6E80 +1F226 ; [.B6D2.0020.001C.1F226] # SQUARED CJK UNIFIED IDEOGRAPH-6F14 +2F55 ; [.816E.0020.0004.2F55] # KANGXI RADICAL FIRE +328B ; [.816E.0020.0006.328B] # CIRCLED IDEOGRAPH FIRE +1F21A ; [.A7EB.0020.001C.1F21A] # SQUARED CJK UNIFIED IDEOGRAPH-7121 +2F56 ; [.8171.0020.0004.2F56] # KANGXI RADICAL CLAW +2F57 ; [.8173.0020.0004.2F57] # KANGXI RADICAL FATHER +2F58 ; [.8174.0020.0004.2F58] # KANGXI RADICAL DOUBLE X +2F59 ; [.8176.0020.0004.2F59] # KANGXI RADICAL HALF TREE TRUNK +2F5A ; [.8177.0020.0004.2F5A] # KANGXI RADICAL SLICE +2F5B ; [.8179.0020.0004.2F5B] # KANGXI RADICAL FANG +2F5C ; [.817B.0020.0004.2F5C] # KANGXI RADICAL COW +3295 ; [.9895.0020.0006.3295] # CIRCLED IDEOGRAPH SPECIAL +2F5D ; [.817D.0020.0004.2F5D] # KANGXI RADICAL DOG +2F5E ; [.82A2.0020.0004.2F5E] # KANGXI RADICAL PROFOUND +2F5F ; [.82A3.0020.0004.2F5F] # KANGXI RADICAL JADE +2F60 ; [.82A7.0020.0004.2F60] # KANGXI RADICAL MELON +2F61 ; [.82A8.0020.0004.2F61] # KANGXI RADICAL TILE +2F62 ; [.82A9.0020.0004.2F62] # KANGXI RADICAL SWEET +2F63 ; [.82AA.0020.0004.2F63] # KANGXI RADICAL LIFE +1F222 ; [.82AA.0020.001C.1F222] # SQUARED CJK UNIFIED IDEOGRAPH-751F +2F64 ; [.82AB.0020.0004.2F64] # KANGXI RADICAL USE +2F65 ; [.82AD.0020.0004.2F65] # KANGXI RADICAL FIELD +3199 ; [.82AF.0020.0014.3199] # IDEOGRAPHIC ANNOTATION FIRST MARK +1F238 ; [.82B0.0020.001C.1F238] # SQUARED CJK UNIFIED IDEOGRAPH-7533 +329A ; [.8815.0020.0006.329A] # CIRCLED IDEOGRAPH MALE +2F66 ; [.82B4.0020.0004.2F66] # KANGXI RADICAL BOLT OF CLOTH +2F67 ; [.82B6.0020.0004.2F67] # KANGXI RADICAL SICKNESS +2F68 ; [.82B7.0020.0004.2F68] # KANGXI RADICAL DOTTED TENT +2F69 ; [.82B8.0020.0004.2F69] # KANGXI RADICAL WHITE +2F6A ; [.82B9.0020.0004.2F6A] # KANGXI RADICAL SKIN +2F6B ; [.82BA.0020.0004.2F6B] # KANGXI RADICAL DISH +32AC ; [.B79A.0020.0006.32AC] # CIRCLED IDEOGRAPH SUPERVISE +2F6C ; [.82BB.0020.0004.2F6C] # KANGXI RADICAL EYE +2F6D ; [.82BC.0020.0004.2F6D] # KANGXI RADICAL SPEAR +2F6E ; [.82BD.0020.0004.2F6E] # KANGXI RADICAL ARROW +2F6F ; [.82BE.0020.0004.2F6F] # KANGXI RADICAL STONE +2F70 ; [.82C0.0020.0004.2F70] # KANGXI RADICAL SPIRIT +3293 ; [.8D2E.0020.0006.3293] # CIRCLED IDEOGRAPH SOCIETY +3297 ; [.9970.0020.0006.3297] # CIRCLED IDEOGRAPH CONGRATULATION +1F232 ; [.B0A6.0020.001C.1F232] # SQUARED CJK UNIFIED IDEOGRAPH-7981 +2F71 ; [.82C1.0020.0004.2F71] # KANGXI RADICAL TRACK +2F72 ; [.82C2.0020.0004.2F72] # KANGXI RADICAL GRAIN +3299 ; [.9977.0020.0006.3299] # CIRCLED IDEOGRAPH SECRET +2F73 ; [.82C3.0020.0004.2F73] # KANGXI RADICAL CAVE +1F233 ; [.8D3E.0020.001C.1F233] # SQUARED CJK UNIFIED IDEOGRAPH-7A7A +2F74 ; [.82C4.0020.0004.2F74] # KANGXI RADICAL STAND +2F75 ; [.84B5.0020.0004.2F75] # KANGXI RADICAL BAMBOO +3247 ; [.B822.0020.0006.3247] # CIRCLED IDEOGRAPH KOTO +2F76 ; [.84B6.0020.0004.2F76] # KANGXI RADICAL RICE +2F77 ; [.84B7.0020.0004.2F77] # KANGXI RADICAL SILK +1F221 ; [.A172.0020.001C.1F221] # SQUARED CJK UNIFIED IDEOGRAPH-7D42 +2F78 ; [.84C4.0020.0004.2F78] # KANGXI RADICAL JAR +2F79 ; [.84C5.0020.0004.2F79] # KANGXI RADICAL NET +2F7A ; [.84C7.0020.0004.2F7A] # KANGXI RADICAL SHEEP +2F7B ; [.84CA.0020.0004.2F7B] # KANGXI RADICAL FEATHER +2F7C ; [.84CB.0020.0004.2F7C] # KANGXI RADICAL OLD +2F7D ; [.84CD.0020.0004.2F7D] # KANGXI RADICAL AND +2F7E ; [.84CE.0020.0004.2F7E] # KANGXI RADICAL PLOW +2F7F ; [.84CF.0020.0004.2F7F] # KANGXI RADICAL EAR +2F80 ; [.84D0.0020.0004.2F80] # KANGXI RADICAL BRUSH +2F81 ; [.84D2.0020.0004.2F81] # KANGXI RADICAL MEAT +2F82 ; [.84D7.0020.0004.2F82] # KANGXI RADICAL MINISTER +2F83 ; [.84D8.0020.0004.2F83] # KANGXI RADICAL SELF +2F84 ; [.84D9.0020.0004.2F84] # KANGXI RADICAL ARRIVE +2F85 ; [.84DA.0020.0004.2F85] # KANGXI RADICAL MORTAR +2F86 ; [.84DC.0020.0004.2F86] # KANGXI RADICAL TONGUE +2F87 ; [.84DD.0020.0004.2F87] # KANGXI RADICAL OPPOSE +2F88 ; [.84DE.0020.0004.2F88] # KANGXI RADICAL BOAT +2F89 ; [.84DF.0020.0004.2F89] # KANGXI RADICAL STOPPING +2F8A ; [.84E0.0020.0004.2F8A] # KANGXI RADICAL COLOR +2F8B ; [.84E1.0020.0004.2F8B] # KANGXI RADICAL GRASS +2F8C ; [.84EA.0020.0004.2F8C] # KANGXI RADICAL TIGER +2F8D ; [.84EB.0020.0004.2F8D] # KANGXI RADICAL INSECT +2F8E ; [.84EC.0020.0004.2F8E] # KANGXI RADICAL BLOOD +2F8F ; [.84ED.0020.0004.2F8F] # KANGXI RADICAL WALK ENCLOSURE +2F90 ; [.84EE.0020.0004.2F90] # KANGXI RADICAL CLOTHES +2F91 ; [.84EF.0020.0004.2F91] # KANGXI RADICAL WEST +2F92 ; [.8878.0020.0004.2F92] # KANGXI RADICAL SEE +2F93 ; [.887B.0020.0004.2F93] # KANGXI RADICAL HORN +1F216 ; [.B258.0020.001C.1F216] # SQUARED CJK UNIFIED IDEOGRAPH-89E3 +2F94 ; [.887D.0020.0004.2F94] # KANGXI RADICAL SPEECH +2F95 ; [.8891.0020.0004.2F95] # KANGXI RADICAL VALLEY +2F96 ; [.8892.0020.0004.2F96] # KANGXI RADICAL BEAN +2F97 ; [.8893.0020.0004.2F97] # KANGXI RADICAL PIG +2F98 ; [.8894.0020.0004.2F98] # KANGXI RADICAL BADGER +2F99 ; [.8895.0020.0004.2F99] # KANGXI RADICAL SHELL +3296 ; [.9B3B.0020.0006.3296] # CIRCLED IDEOGRAPH FINANCIAL +1F223 ; [.A2FD.0020.001C.1F223] # SQUARED CJK UNIFIED IDEOGRAPH-8CA9 +32AE ; [.B2A5.0020.0006.32AE] # CIRCLED IDEOGRAPH RESOURCE +2F9A ; [.8898.0020.0004.2F9A] # KANGXI RADICAL RED +2F9B ; [.8899.0020.0004.2F9B] # KANGXI RADICAL RUN +1F230 ; [.8899.0020.001C.1F230] # SQUARED CJK UNIFIED IDEOGRAPH-8D70 +2F9C ; [.889B.0020.0004.2F9C] # KANGXI RADICAL FOOT +2F9D ; [.889C.0020.0004.2F9D] # KANGXI RADICAL BODY +2F9E ; [.889D.0020.0004.2F9E] # KANGXI RADICAL CART +2F9F ; [.88A1.0020.0004.2F9F] # KANGXI RADICAL BITTER +2FA0 ; [.88A2.0020.0004.2FA0] # KANGXI RADICAL MORNING +2FA1 ; [.88A3.0020.0004.2FA1] # KANGXI RADICAL WALK +1F22B ; [.B30B.0020.001C.1F22B] # SQUARED CJK UNIFIED IDEOGRAPH-904A +329C ; [.C10A.0020.0006.329C] # CIRCLED IDEOGRAPH SUITABLE +2FA2 ; [.88B4.0020.0004.2FA2] # KANGXI RADICAL CITY +2FA3 ; [.88C5.0020.0004.2FA3] # KANGXI RADICAL WINE +2FA4 ; [.88C6.0020.0004.2FA4] # KANGXI RADICAL DISTINGUISH +2FA5 ; [.88C7.0020.0004.2FA5] # KANGXI RADICAL VILLAGE +2FA6 ; [.8E33.0020.0004.2FA6] # KANGXI RADICAL GOLD +328E ; [.8E33.0020.0006.328E] # CIRCLED IDEOGRAPH METAL +2FA7 ; [.8E40.0020.0004.2FA7] # KANGXI RADICAL LONG +2FA8 ; [.8E42.0020.0004.2FA8] # KANGXI RADICAL GATE +2FA9 ; [.8E45.0020.0004.2FA9] # KANGXI RADICAL MOUND +2FAA ; [.8E59.0020.0004.2FAA] # KANGXI RADICAL SLAVE +2FAB ; [.8E5A.0020.0004.2FAB] # KANGXI RADICAL SHORT TAILED BIRD +2FAC ; [.8E5C.0020.0004.2FAC] # KANGXI RADICAL RAIN +2FAD ; [.8E5D.0020.0004.2FAD] # KANGXI RADICAL BLUE +2FAE ; [.8E5F.0020.0004.2FAE] # KANGXI RADICAL WRONG +2FAF ; [.949C.0020.0004.2FAF] # KANGXI RADICAL FACE +2FB0 ; [.949D.0020.0004.2FB0] # KANGXI RADICAL LEATHER +2FB1 ; [.949E.0020.0004.2FB1] # KANGXI RADICAL TANNED LEATHER +2FB2 ; [.94A0.0020.0004.2FB2] # KANGXI RADICAL LEEK +2FB3 ; [.94A1.0020.0004.2FB3] # KANGXI RADICAL SOUND +2FB4 ; [.94A2.0020.0004.2FB4] # KANGXI RADICAL LEAF +32A0 ; [.AC44.0020.0006.32A0] # CIRCLED IDEOGRAPH ITEM +2FB5 ; [.94A7.0020.0004.2FB5] # KANGXI RADICAL WIND +2FB6 ; [.94AB.0020.0004.2FB6] # KANGXI RADICAL FLY +2FB7 ; [.94AD.0020.0004.2FB7] # KANGXI RADICAL EAT +2FB8 ; [.94B7.0020.0004.2FB8] # KANGXI RADICAL HEAD +2FB9 ; [.94B9.0020.0004.2FB9] # KANGXI RADICAL FRAGRANT +2FBA ; [.9BF9.0020.0004.2FBA] # KANGXI RADICAL HORSE +2FBB ; [.9C00.0020.0004.2FBB] # KANGXI RADICAL BONE +2FBC ; [.9C01.0020.0004.2FBC] # KANGXI RADICAL TALL +2FBD ; [.9C02.0020.0004.2FBD] # KANGXI RADICAL HAIR +2FBE ; [.9C03.0020.0004.2FBE] # KANGXI RADICAL FIGHT +2FBF ; [.9C04.0020.0004.2FBF] # KANGXI RADICAL SACRIFICIAL WINE +2FC0 ; [.9C05.0020.0004.2FC0] # KANGXI RADICAL CAULDRON +2FC1 ; [.9C06.0020.0004.2FC1] # KANGXI RADICAL GHOST +2FC2 ; [.A408.0020.0004.2FC2] # KANGXI RADICAL FISH +2FC3 ; [.A40A.0020.0004.2FC3] # KANGXI RADICAL BIRD +2FC4 ; [.A414.0020.0004.2FC4] # KANGXI RADICAL SALT +2FC5 ; [.A415.0020.0004.2FC5] # KANGXI RADICAL DEER +2FC6 ; [.A416.0020.0004.2FC6] # KANGXI RADICAL WHEAT +2FC7 ; [.A418.0020.0004.2FC7] # KANGXI RADICAL HEMP +2FC8 ; [.AC78.0020.0004.2FC8] # KANGXI RADICAL YELLOW +2FC9 ; [.AC7A.0020.0004.2FC9] # KANGXI RADICAL MILLET +2FCA ; [.AC7B.0020.0004.2FCA] # KANGXI RADICAL BLACK +2FCB ; [.AC7C.0020.0004.2FCB] # KANGXI RADICAL EMBROIDERY +2FCC ; [.B440.0020.0004.2FCC] # KANGXI RADICAL FROG +2FCD ; [.B442.0020.0004.2FCD] # KANGXI RADICAL TRIPOD +2FCE ; [.B443.0020.0004.2FCE] # KANGXI RADICAL DRUM +2FCF ; [.B445.0020.0004.2FCF] # KANGXI RADICAL RAT +2FD0 ; [.BB56.0020.0004.2FD0] # KANGXI RADICAL NOSE +2FD1 ; [.BB57.0020.0004.2FD1] # KANGXI RADICAL EVEN +2FD2 ; [.C273.0020.0004.2FD2] # KANGXI RADICAL TOOTH +2FD3 ; [.C85E.0020.0004.2FD3] # KANGXI RADICAL DRAGON +2FD4 ; [.C85F.0020.0004.2FD4] # KANGXI RADICAL TURTLE +2EF3 ; [.88F9.0020.0004.2EF3] # CJK RADICAL C-SIMPLIFIED TURTLE +2FD5 ; [.CD3C.0020.0004.2FD5] # KANGXI RADICAL FLUTE ENTRY }; diff --git a/cpan/Unicode-Collate/README b/cpan/Unicode-Collate/README index 1218ec14d3..aafadace98 100644 --- a/cpan/Unicode-Collate/README +++ b/cpan/Unicode-Collate/README @@ -1,4 +1,4 @@ -Unicode/Collate version 0.81 +Unicode/Collate version 0.85 =============================== NAME diff --git a/cpan/Unicode-Collate/t/loc_be.t b/cpan/Unicode-Collate/t/loc_be.t index be085500d1..95f35b7fc3 100644 --- a/cpan/Unicode-Collate/t/loc_be.t +++ b/cpan/Unicode-Collate/t/loc_be.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 191 }; +BEGIN { plan tests => 189 }; use strict; use warnings; @@ -30,9 +30,9 @@ ok($objBe->getlocale, 'be'); $objBe->change(level => 1); ok($objBe->gt("\x{451}", "\x{435}")); -ok($objBe->gt("\x{401}", "\x{415}")); ok($objBe->lt("\x{451}", "\x{454}")); -ok($objBe->lt("\x{401}", "\x{404}")); + +# 4 ok($objBe->gt("\x{4E5}", "\x{438}")); # not suppressed ok($objBe->gt("\x{4E4}", "\x{418}")); # not suppressed @@ -45,7 +45,7 @@ ok($objBe->gt("\x{4F0}", "\x{423}")); # not suppressed ok($objBe->gt("\x{4F3}", "\x{443}")); # not suppressed ok($objBe->gt("\x{4F2}", "\x{423}")); # not suppressed -# 16 +# 14 ok($objBe->eq("\x{4D1}", "\x{430}")); ok($objBe->eq("\x{4D0}", "\x{410}")); @@ -88,12 +88,14 @@ ok($objBe->eq("\x{4EC}", "\x{42D}")); ok($objBe->eq("\x{477}", "\x{475}")); ok($objBe->eq("\x{476}", "\x{474}")); -# 56 +# 54 $objBe->change(level => 2); ok($objBe->eq("\x{451}", "\x{401}")); +# 55 + ok($objBe->gt("\x{4D1}", "\x{430}")); ok($objBe->gt("\x{4D0}", "\x{410}")); ok($objBe->gt("\x{4D3}", "\x{430}")); @@ -135,12 +137,14 @@ ok($objBe->gt("\x{4EC}", "\x{42D}")); ok($objBe->gt("\x{477}", "\x{475}")); ok($objBe->gt("\x{476}", "\x{474}")); -# 97 +# 95 $objBe->change(level => 3); ok($objBe->lt("\x{451}", "\x{401}")); +# 96 + ok($objBe->eq("\x{451}", "\x{435}\x{308}")); ok($objBe->eq("\x{401}", "\x{415}\x{308}")); ok($objBe->eq("\x{4E5}", "\x{438}\x{308}")); # not suppressed @@ -154,7 +158,7 @@ ok($objBe->eq("\x{4F0}", "\x{423}\x{308}")); # not suppressed ok($objBe->eq("\x{4F3}", "\x{443}\x{30B}")); # not suppressed ok($objBe->eq("\x{4F2}", "\x{423}\x{30B}")); # not suppressed -# 110 +# 108 for my $i ("", "\0") { ok($objBe->eq("\x{4D1}", "\x{430}$i\x{306}")); @@ -199,10 +203,10 @@ for my $i ("", "\0") { ok($objBe->eq("\x{476}", "\x{474}$i\x{30F}")); } -# 190 +# 188 $objBe->change(upper_before_lower => 1); ok($objBe->gt("\x{451}", "\x{401}")); -# 191 +# 189 diff --git a/cpan/Unicode-Collate/t/loc_bn.t b/cpan/Unicode-Collate/t/loc_bn.t new file mode 100644 index 0000000000..052040e3c4 --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_bn.t @@ -0,0 +1,36 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 6 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objBn = Unicode::Collate::Locale-> + new(locale => 'BN', normalization => undef); + +ok($objBn->getlocale, 'bn'); + +$objBn->change(level => 1); + +ok($objBn->lt("\x{994}", "\x{982}")); +ok($objBn->lt("\x{982}", "\x{983}")); +ok($objBn->lt("\x{983}", "\x{981}")); +ok($objBn->lt("\x{981}", "\x{995}")); + diff --git a/cpan/Unicode-Collate/t/loc_cjk.t b/cpan/Unicode-Collate/t/loc_cjk.t new file mode 100644 index 0000000000..e7bf40b922 --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_cjk.t @@ -0,0 +1,493 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 2692 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objDefault = Unicode::Collate::Locale-> + new(locale => 'DEFAULT', normalization => undef); + +ok($objDefault->getlocale, 'default'); + +my $objZhP = Unicode::Collate::Locale-> + new(locale => 'ZH__pinyin', normalization => undef); + +ok($objZhP->getlocale, 'zh__pinyin'); + +my $objZhS = Unicode::Collate::Locale-> + new(locale => 'ZH__stroke', normalization => undef); + +ok($objZhS->getlocale, 'zh__stroke'); + +for my $obj ($objDefault, $objZhP, $objZhS) { + for my $lev (2, 3) { + $obj->change(level => $lev); + my $r = $lev == 2 ? 0 : 1; + ok($obj->cmp("\x{3220}", "\(\x{4E00}\)"), $r); + ok($obj->cmp("\x{3226}", "\(\x{4E03}\)"), $r); + ok($obj->cmp("\x{3222}", "\(\x{4E09}\)"), $r); + ok($obj->cmp("\x{3228}", "\(\x{4E5D}\)"), $r); + ok($obj->cmp("\x{3221}", "\(\x{4E8C}\)"), $r); + ok($obj->cmp("\x{3224}", "\(\x{4E94}\)"), $r); + ok($obj->cmp("\x{3239}", "\(\x{4EE3}\)"), $r); + ok($obj->cmp("\x{323D}", "\(\x{4F01}\)"), $r); + ok($obj->cmp("\x{3241}", "\(\x{4F11}\)"), $r); + ok($obj->cmp("\x{3227}", "\(\x{516B}\)"), $r); + ok($obj->cmp("\x{3225}", "\(\x{516D}\)"), $r); + ok($obj->cmp("\x{3238}", "\(\x{52B4}\)"), $r); + ok($obj->cmp("\x{3229}", "\(\x{5341}\)"), $r); + ok($obj->cmp("\x{323F}", "\(\x{5354}\)"), $r); + ok($obj->cmp("\x{3234}", "\(\x{540D}\)"), $r); + ok($obj->cmp("\x{323A}", "\(\x{547C}\)"), $r); + ok($obj->cmp("\x{3223}", "\(\x{56DB}\)"), $r); + ok($obj->cmp("\x{322F}", "\(\x{571F}\)"), $r); + ok($obj->cmp("\x{323B}", "\(\x{5B66}\)"), $r); + ok($obj->cmp("\x{3230}", "\(\x{65E5}\)"), $r); + ok($obj->cmp("\x{322A}", "\(\x{6708}\)"), $r); + ok($obj->cmp("\x{3232}", "\(\x{6709}\)"), $r); + ok($obj->cmp("\x{322D}", "\(\x{6728}\)"), $r); + ok($obj->cmp("\x{3231}", "\(\x{682A}\)"), $r); + ok($obj->cmp("\x{322C}", "\(\x{6C34}\)"), $r); + ok($obj->cmp("\x{322B}", "\(\x{706B}\)"), $r); + ok($obj->cmp("\x{3235}", "\(\x{7279}\)"), $r); + ok($obj->cmp("\x{323C}", "\(\x{76E3}\)"), $r); + ok($obj->cmp("\x{3233}", "\(\x{793E}\)"), $r); + ok($obj->cmp("\x{3237}", "\(\x{795D}\)"), $r); + ok($obj->cmp("\x{3240}", "\(\x{796D}\)"), $r); + ok($obj->cmp("\x{3242}", "\(\x{81EA}\)"), $r); + ok($obj->cmp("\x{3243}", "\(\x{81F3}\)"), $r); + ok($obj->cmp("\x{3236}", "\(\x{8CA1}\)"), $r); + ok($obj->cmp("\x{323E}", "\(\x{8CC7}\)"), $r); + ok($obj->cmp("\x{322E}", "\(\x{91D1}\)"), $r); + ok($obj->cmp("\x{3358}", "0\x{70B9}"), $r); + ok($obj->cmp("\x{33E9}", "10\x{65E5}"), $r); + ok($obj->cmp("\x{32C9}", "10\x{6708}"), $r); + ok($obj->cmp("\x{3362}", "10\x{70B9}"), $r); + ok($obj->cmp("\x{33EA}", "11\x{65E5}"), $r); + ok($obj->cmp("\x{32CA}", "11\x{6708}"), $r); + ok($obj->cmp("\x{3363}", "11\x{70B9}"), $r); + ok($obj->cmp("\x{33EB}", "12\x{65E5}"), $r); + ok($obj->cmp("\x{32CB}", "12\x{6708}"), $r); + ok($obj->cmp("\x{3364}", "12\x{70B9}"), $r); + ok($obj->cmp("\x{33EC}", "13\x{65E5}"), $r); + ok($obj->cmp("\x{3365}", "13\x{70B9}"), $r); + ok($obj->cmp("\x{33ED}", "14\x{65E5}"), $r); + ok($obj->cmp("\x{3366}", "14\x{70B9}"), $r); + ok($obj->cmp("\x{33EE}", "15\x{65E5}"), $r); + ok($obj->cmp("\x{3367}", "15\x{70B9}"), $r); + ok($obj->cmp("\x{33EF}", "16\x{65E5}"), $r); + ok($obj->cmp("\x{3368}", "16\x{70B9}"), $r); + ok($obj->cmp("\x{33F0}", "17\x{65E5}"), $r); + ok($obj->cmp("\x{3369}", "17\x{70B9}"), $r); + ok($obj->cmp("\x{33F1}", "18\x{65E5}"), $r); + ok($obj->cmp("\x{336A}", "18\x{70B9}"), $r); + ok($obj->cmp("\x{33F2}", "19\x{65E5}"), $r); + ok($obj->cmp("\x{336B}", "19\x{70B9}"), $r); + ok($obj->cmp("\x{33E0}", "1\x{65E5}"), $r); + ok($obj->cmp("\x{32C0}", "1\x{6708}"), $r); + ok($obj->cmp("\x{3359}", "1\x{70B9}"), $r); + ok($obj->cmp("\x{33F3}", "20\x{65E5}"), $r); + ok($obj->cmp("\x{336C}", "20\x{70B9}"), $r); + ok($obj->cmp("\x{33F4}", "21\x{65E5}"), $r); + ok($obj->cmp("\x{336D}", "21\x{70B9}"), $r); + ok($obj->cmp("\x{33F5}", "22\x{65E5}"), $r); + ok($obj->cmp("\x{336E}", "22\x{70B9}"), $r); + ok($obj->cmp("\x{33F6}", "23\x{65E5}"), $r); + ok($obj->cmp("\x{336F}", "23\x{70B9}"), $r); + ok($obj->cmp("\x{33F7}", "24\x{65E5}"), $r); + ok($obj->cmp("\x{3370}", "24\x{70B9}"), $r); + ok($obj->cmp("\x{33F8}", "25\x{65E5}"), $r); + ok($obj->cmp("\x{33F9}", "26\x{65E5}"), $r); + ok($obj->cmp("\x{33FA}", "27\x{65E5}"), $r); + ok($obj->cmp("\x{33FB}", "28\x{65E5}"), $r); + ok($obj->cmp("\x{33FC}", "29\x{65E5}"), $r); + ok($obj->cmp("\x{33E1}", "2\x{65E5}"), $r); + ok($obj->cmp("\x{32C1}", "2\x{6708}"), $r); + ok($obj->cmp("\x{335A}", "2\x{70B9}"), $r); + ok($obj->cmp("\x{33FD}", "30\x{65E5}"), $r); + ok($obj->cmp("\x{33FE}", "31\x{65E5}"), $r); + ok($obj->cmp("\x{33E2}", "3\x{65E5}"), $r); + ok($obj->cmp("\x{32C2}", "3\x{6708}"), $r); + ok($obj->cmp("\x{335B}", "3\x{70B9}"), $r); + ok($obj->cmp("\x{33E3}", "4\x{65E5}"), $r); + ok($obj->cmp("\x{32C3}", "4\x{6708}"), $r); + ok($obj->cmp("\x{335C}", "4\x{70B9}"), $r); + ok($obj->cmp("\x{33E4}", "5\x{65E5}"), $r); + ok($obj->cmp("\x{32C4}", "5\x{6708}"), $r); + ok($obj->cmp("\x{335D}", "5\x{70B9}"), $r); + ok($obj->cmp("\x{33E5}", "6\x{65E5}"), $r); + ok($obj->cmp("\x{32C5}", "6\x{6708}"), $r); + ok($obj->cmp("\x{335E}", "6\x{70B9}"), $r); + ok($obj->cmp("\x{33E6}", "7\x{65E5}"), $r); + ok($obj->cmp("\x{32C6}", "7\x{6708}"), $r); + ok($obj->cmp("\x{335F}", "7\x{70B9}"), $r); + ok($obj->cmp("\x{33E7}", "8\x{65E5}"), $r); + ok($obj->cmp("\x{32C7}", "8\x{6708}"), $r); + ok($obj->cmp("\x{3360}", "8\x{70B9}"), $r); + ok($obj->cmp("\x{33E8}", "9\x{65E5}"), $r); + ok($obj->cmp("\x{32C8}", "9\x{6708}"), $r); + ok($obj->cmp("\x{3361}", "9\x{70B9}"), $r); + ok($obj->cmp("\x{1F241}", "\x{3014}\x{4E09}\x{3015}"), $r); + ok($obj->cmp("\x{1F242}", "\x{3014}\x{4E8C}\x{3015}"), $r); + ok($obj->cmp("\x{1F247}", "\x{3014}\x{52DD}\x{3015}"), $r); + ok($obj->cmp("\x{1F243}", "\x{3014}\x{5B89}\x{3015}"), $r); + ok($obj->cmp("\x{1F245}", "\x{3014}\x{6253}\x{3015}"), $r); + ok($obj->cmp("\x{1F248}", "\x{3014}\x{6557}\x{3015}"), $r); + ok($obj->cmp("\x{1F240}", "\x{3014}\x{672C}\x{3015}"), $r); + ok($obj->cmp("\x{1F244}", "\x{3014}\x{70B9}\x{3015}"), $r); + ok($obj->cmp("\x{1F246}", "\x{3014}\x{76D7}\x{3015}"), $r); + ok($obj->cmp("\x{2F00}", "\x{4E00}"), $r); + ok($obj->cmp("\x{3192}", "\x{4E00}"), $r); + ok($obj->cmp("\x{3280}", "\x{4E00}"), $r); + ok($obj->cmp("\x{1F229}", "\x{4E00}"), $r); + ok($obj->cmp("\x{319C}", "\x{4E01}"), $r); + ok($obj->cmp("\x{3286}", "\x{4E03}"), $r); + ok($obj->cmp("\x{3194}", "\x{4E09}"), $r); + ok($obj->cmp("\x{3282}", "\x{4E09}"), $r); + ok($obj->cmp("\x{1F22A}", "\x{4E09}"), $r); + ok($obj->cmp("\x{3196}", "\x{4E0A}"), $r); + ok($obj->cmp("\x{32A4}", "\x{4E0A}"), $r); + ok($obj->cmp("\x{3198}", "\x{4E0B}"), $r); + ok($obj->cmp("\x{32A6}", "\x{4E0B}"), $r); + ok($obj->cmp("\x{319B}", "\x{4E19}"), $r); + ok($obj->cmp("\x{2F01}", "\x{4E28}"), $r); + ok($obj->cmp("\x{3197}", "\x{4E2D}"), $r); + ok($obj->cmp("\x{32A5}", "\x{4E2D}"), $r); + ok($obj->cmp("\x{1F22D}", "\x{4E2D}"), $r); + ok($obj->cmp("\x{2F02}", "\x{4E36}"), $r); + ok($obj->cmp("\x{2F03}", "\x{4E3F}"), $r); + ok($obj->cmp("\x{2F04}", "\x{4E59}"), $r); + ok($obj->cmp("\x{319A}", "\x{4E59}"), $r); + ok($obj->cmp("\x{3288}", "\x{4E5D}"), $r); + ok($obj->cmp("\x{2F05}", "\x{4E85}"), $r); + ok($obj->cmp("\x{2F06}", "\x{4E8C}"), $r); + ok($obj->cmp("\x{3193}", "\x{4E8C}"), $r); + ok($obj->cmp("\x{3281}", "\x{4E8C}"), $r); + ok($obj->cmp("\x{1F214}", "\x{4E8C}"), $r); + ok($obj->cmp("\x{3284}", "\x{4E94}"), $r); + ok($obj->cmp("\x{2F07}", "\x{4EA0}"), $r); + ok($obj->cmp("\x{1F218}", "\x{4EA4}"), $r); + ok($obj->cmp("\x{2F08}", "\x{4EBA}"), $r); + ok($obj->cmp("\x{319F}", "\x{4EBA}"), $r); + ok($obj->cmp("\x{32AD}", "\x{4F01}"), $r); + ok($obj->cmp("\x{32A1}", "\x{4F11}"), $r); + ok($obj->cmp("\x{329D}", "\x{512A}"), $r); + ok($obj->cmp("\x{2F09}", "\x{513F}"), $r); + ok($obj->cmp("\x{2F0A}", "\x{5165}"), $r); + ok($obj->cmp("\x{2F0B}", "\x{516B}"), $r); + ok($obj->cmp("\x{3287}", "\x{516B}"), $r); + ok($obj->cmp("\x{3285}", "\x{516D}"), $r); + ok($obj->cmp("\x{2F0C}", "\x{5182}"), $r); + ok($obj->cmp("\x{1F21E}", "\x{518D}"), $r); + ok($obj->cmp("\x{2F0D}", "\x{5196}"), $r); + ok($obj->cmp("\x{32A2}", "\x{5199}"), $r); + ok($obj->cmp("\x{2F0E}", "\x{51AB}"), $r); + ok($obj->cmp("\x{2F0F}", "\x{51E0}"), $r); + ok($obj->cmp("\x{2F10}", "\x{51F5}"), $r); + ok($obj->cmp("\x{2F11}", "\x{5200}"), $r); + ok($obj->cmp("\x{1F220}", "\x{521D}"), $r); + ok($obj->cmp("\x{1F21C}", "\x{524D}"), $r); + ok($obj->cmp("\x{1F239}", "\x{5272}"), $r); + ok($obj->cmp("\x{2F12}", "\x{529B}"), $r); + ok($obj->cmp("\x{3298}", "\x{52B4}"), $r); + ok($obj->cmp("\x{2F13}", "\x{52F9}"), $r); + ok($obj->cmp("\x{2F14}", "\x{5315}"), $r); + ok($obj->cmp("\x{2F15}", "\x{531A}"), $r); + ok($obj->cmp("\x{2F16}", "\x{5338}"), $r); + ok($obj->cmp("\x{32A9}", "\x{533B}"), $r); + ok($obj->cmp("\x{2F17}", "\x{5341}"), $r); + ok($obj->cmp("\x{3038}", "\x{5341}"), $r); + ok($obj->cmp("\x{3289}", "\x{5341}"), $r); + ok($obj->cmp("\x{3039}", "\x{5344}"), $r); + ok($obj->cmp("\x{303A}", "\x{5345}"), $r); + ok($obj->cmp("\x{32AF}", "\x{5354}"), $r); + ok($obj->cmp("\x{2F18}", "\x{535C}"), $r); + ok($obj->cmp("\x{2F19}", "\x{5369}"), $r); + ok($obj->cmp("\x{329E}", "\x{5370}"), $r); + ok($obj->cmp("\x{2F1A}", "\x{5382}"), $r); + ok($obj->cmp("\x{2F1B}", "\x{53B6}"), $r); + ok($obj->cmp("\x{2F1C}", "\x{53C8}"), $r); + ok($obj->cmp("\x{1F212}", "\x{53CC}"), $r); + ok($obj->cmp("\x{2F1D}", "\x{53E3}"), $r); + ok($obj->cmp("\x{1F251}", "\x{53EF}"), $r); + ok($obj->cmp("\x{32A8}", "\x{53F3}"), $r); + ok($obj->cmp("\x{1F22E}", "\x{53F3}"), $r); + ok($obj->cmp("\x{1F234}", "\x{5408}"), $r); + ok($obj->cmp("\x{3294}", "\x{540D}"), $r); + ok($obj->cmp("\x{1F225}", "\x{5439}"), $r); + ok($obj->cmp("\x{3244}", "\x{554F}"), $r); + ok($obj->cmp("\x{1F23A}", "\x{55B6}"), $r); + ok($obj->cmp("\x{2F1E}", "\x{56D7}"), $r); + ok($obj->cmp("\x{3195}", "\x{56DB}"), $r); + ok($obj->cmp("\x{3283}", "\x{56DB}"), $r); + ok($obj->cmp("\x{2F1F}", "\x{571F}"), $r); + ok($obj->cmp("\x{328F}", "\x{571F}"), $r); + ok($obj->cmp("\x{319E}", "\x{5730}"), $r); + ok($obj->cmp("\x{2F20}", "\x{58EB}"), $r); + ok($obj->cmp("\x{1F224}", "\x{58F0}"), $r); + ok($obj->cmp("\x{2F21}", "\x{5902}"), $r); + ok($obj->cmp("\x{2F22}", "\x{590A}"), $r); + ok($obj->cmp("\x{2F23}", "\x{5915}"), $r); + ok($obj->cmp("\x{1F215}", "\x{591A}"), $r); + ok($obj->cmp("\x{32B0}", "\x{591C}"), $r); + ok($obj->cmp("\x{2F24}", "\x{5927}"), $r); + ok($obj->cmp("\x{337D}", "\x{5927}\x{6B63}"), $r); + ok($obj->cmp("\x{319D}", "\x{5929}"), $r); + ok($obj->cmp("\x{1F217}", "\x{5929}"), $r); + ok($obj->cmp("\x{2F25}", "\x{5973}"), $r); + ok($obj->cmp("\x{329B}", "\x{5973}"), $r); + ok($obj->cmp("\x{2F26}", "\x{5B50}"), $r); + ok($obj->cmp("\x{1F211}", "\x{5B57}"), $r); + ok($obj->cmp("\x{32AB}", "\x{5B66}"), $r); + ok($obj->cmp("\x{2F27}", "\x{5B80}"), $r); + ok($obj->cmp("\x{32AA}", "\x{5B97}"), $r); + ok($obj->cmp("\x{2F28}", "\x{5BF8}"), $r); + ok($obj->cmp("\x{2F29}", "\x{5C0F}"), $r); + ok($obj->cmp("\x{2F2A}", "\x{5C22}"), $r); + ok($obj->cmp("\x{2F2B}", "\x{5C38}"), $r); + ok($obj->cmp("\x{2F2C}", "\x{5C6E}"), $r); + ok($obj->cmp("\x{2F2D}", "\x{5C71}"), $r); + ok($obj->cmp("\x{2F2E}", "\x{5DDB}"), $r); + ok($obj->cmp("\x{2F2F}", "\x{5DE5}"), $r); + ok($obj->cmp("\x{32A7}", "\x{5DE6}"), $r); + ok($obj->cmp("\x{1F22C}", "\x{5DE6}"), $r); + ok($obj->cmp("\x{2F30}", "\x{5DF1}"), $r); + ok($obj->cmp("\x{2F31}", "\x{5DFE}"), $r); + ok($obj->cmp("\x{2F32}", "\x{5E72}"), $r); + ok($obj->cmp("\x{337B}", "\x{5E73}\x{6210}"), $r); + ok($obj->cmp("\x{2F33}", "\x{5E7A}"), $r); + ok($obj->cmp("\x{3245}", "\x{5E7C}"), $r); + ok($obj->cmp("\x{2F34}", "\x{5E7F}"), $r); + ok($obj->cmp("\x{2F35}", "\x{5EF4}"), $r); + ok($obj->cmp("\x{2F36}", "\x{5EFE}"), $r); + ok($obj->cmp("\x{2F37}", "\x{5F0B}"), $r); + ok($obj->cmp("\x{2F38}", "\x{5F13}"), $r); + ok($obj->cmp("\x{2F39}", "\x{5F50}"), $r); + ok($obj->cmp("\x{2F3A}", "\x{5F61}"), $r); + ok($obj->cmp("\x{2F3B}", "\x{5F73}"), $r); + ok($obj->cmp("\x{1F21D}", "\x{5F8C}"), $r); + ok($obj->cmp("\x{1F250}", "\x{5F97}"), $r); + ok($obj->cmp("\x{2F3C}", "\x{5FC3}"), $r); + ok($obj->cmp("\x{2F3D}", "\x{6208}"), $r); + ok($obj->cmp("\x{2F3E}", "\x{6236}"), $r); + ok($obj->cmp("\x{2F3F}", "\x{624B}"), $r); + ok($obj->cmp("\x{1F210}", "\x{624B}"), $r); + ok($obj->cmp("\x{1F231}", "\x{6253}"), $r); + ok($obj->cmp("\x{1F227}", "\x{6295}"), $r); + ok($obj->cmp("\x{1F22F}", "\x{6307}"), $r); + ok($obj->cmp("\x{1F228}", "\x{6355}"), $r); + ok($obj->cmp("\x{2F40}", "\x{652F}"), $r); + ok($obj->cmp("\x{2F41}", "\x{6534}"), $r); + ok($obj->cmp("\x{2F42}", "\x{6587}"), $r); + ok($obj->cmp("\x{3246}", "\x{6587}"), $r); + ok($obj->cmp("\x{2F43}", "\x{6597}"), $r); + ok($obj->cmp("\x{1F21B}", "\x{6599}"), $r); + ok($obj->cmp("\x{2F44}", "\x{65A4}"), $r); + ok($obj->cmp("\x{1F21F}", "\x{65B0}"), $r); + ok($obj->cmp("\x{2F45}", "\x{65B9}"), $r); + ok($obj->cmp("\x{2F46}", "\x{65E0}"), $r); + ok($obj->cmp("\x{2F47}", "\x{65E5}"), $r); + ok($obj->cmp("\x{3290}", "\x{65E5}"), $r); + ok($obj->cmp("\x{337E}", "\x{660E}\x{6CBB}"), $r); + ok($obj->cmp("\x{1F219}", "\x{6620}"), $r); + ok($obj->cmp("\x{337C}", "\x{662D}\x{548C}"), $r); + ok($obj->cmp("\x{2F48}", "\x{66F0}"), $r); + ok($obj->cmp("\x{2F49}", "\x{6708}"), $r); + ok($obj->cmp("\x{328A}", "\x{6708}"), $r); + ok($obj->cmp("\x{1F237}", "\x{6708}"), $r); + ok($obj->cmp("\x{3292}", "\x{6709}"), $r); + ok($obj->cmp("\x{1F236}", "\x{6709}"), $r); + ok($obj->cmp("\x{2F4A}", "\x{6728}"), $r); + ok($obj->cmp("\x{328D}", "\x{6728}"), $r); + ok($obj->cmp("\x{3291}", "\x{682A}"), $r); + ok($obj->cmp("\x{337F}", "\x{682A}\x{5F0F}\x{4F1A}\x{793E}"), $r); + ok($obj->cmp("\x{2F4B}", "\x{6B20}"), $r); + ok($obj->cmp("\x{2F4C}", "\x{6B62}"), $r); + ok($obj->cmp("\x{32A3}", "\x{6B63}"), $r); + ok($obj->cmp("\x{2F4D}", "\x{6B79}"), $r); + ok($obj->cmp("\x{2F4E}", "\x{6BB3}"), $r); + ok($obj->cmp("\x{2F4F}", "\x{6BCB}"), $r); + ok($obj->cmp("\x{2E9F}", "\x{6BCD}"), $r); + ok($obj->cmp("\x{2F50}", "\x{6BD4}"), $r); + ok($obj->cmp("\x{2F51}", "\x{6BDB}"), $r); + ok($obj->cmp("\x{2F52}", "\x{6C0F}"), $r); + ok($obj->cmp("\x{2F53}", "\x{6C14}"), $r); + ok($obj->cmp("\x{2F54}", "\x{6C34}"), $r); + ok($obj->cmp("\x{328C}", "\x{6C34}"), $r); + ok($obj->cmp("\x{329F}", "\x{6CE8}"), $r); + ok($obj->cmp("\x{1F235}", "\x{6E80}"), $r); + ok($obj->cmp("\x{1F226}", "\x{6F14}"), $r); + ok($obj->cmp("\x{2F55}", "\x{706B}"), $r); + ok($obj->cmp("\x{328B}", "\x{706B}"), $r); + ok($obj->cmp("\x{1F21A}", "\x{7121}"), $r); + ok($obj->cmp("\x{2F56}", "\x{722A}"), $r); + ok($obj->cmp("\x{2F57}", "\x{7236}"), $r); + ok($obj->cmp("\x{2F58}", "\x{723B}"), $r); + ok($obj->cmp("\x{2F59}", "\x{723F}"), $r); + ok($obj->cmp("\x{2F5A}", "\x{7247}"), $r); + ok($obj->cmp("\x{2F5B}", "\x{7259}"), $r); + ok($obj->cmp("\x{2F5C}", "\x{725B}"), $r); + ok($obj->cmp("\x{3295}", "\x{7279}"), $r); + ok($obj->cmp("\x{2F5D}", "\x{72AC}"), $r); + ok($obj->cmp("\x{2F5E}", "\x{7384}"), $r); + ok($obj->cmp("\x{2F5F}", "\x{7389}"), $r); + ok($obj->cmp("\x{2F60}", "\x{74DC}"), $r); + ok($obj->cmp("\x{2F61}", "\x{74E6}"), $r); + ok($obj->cmp("\x{2F62}", "\x{7518}"), $r); + ok($obj->cmp("\x{2F63}", "\x{751F}"), $r); + ok($obj->cmp("\x{1F222}", "\x{751F}"), $r); + ok($obj->cmp("\x{2F64}", "\x{7528}"), $r); + ok($obj->cmp("\x{2F65}", "\x{7530}"), $r); + ok($obj->cmp("\x{3199}", "\x{7532}"), $r); + ok($obj->cmp("\x{1F238}", "\x{7533}"), $r); + ok($obj->cmp("\x{329A}", "\x{7537}"), $r); + ok($obj->cmp("\x{2F66}", "\x{758B}"), $r); + ok($obj->cmp("\x{2F67}", "\x{7592}"), $r); + ok($obj->cmp("\x{2F68}", "\x{7676}"), $r); + ok($obj->cmp("\x{2F69}", "\x{767D}"), $r); + ok($obj->cmp("\x{2F6A}", "\x{76AE}"), $r); + ok($obj->cmp("\x{2F6B}", "\x{76BF}"), $r); + ok($obj->cmp("\x{32AC}", "\x{76E3}"), $r); + ok($obj->cmp("\x{2F6C}", "\x{76EE}"), $r); + ok($obj->cmp("\x{2F6D}", "\x{77DB}"), $r); + ok($obj->cmp("\x{2F6E}", "\x{77E2}"), $r); + ok($obj->cmp("\x{2F6F}", "\x{77F3}"), $r); + ok($obj->cmp("\x{2F70}", "\x{793A}"), $r); + ok($obj->cmp("\x{3293}", "\x{793E}"), $r); + ok($obj->cmp("\x{3297}", "\x{795D}"), $r); + ok($obj->cmp("\x{1F232}", "\x{7981}"), $r); + ok($obj->cmp("\x{2F71}", "\x{79B8}"), $r); + ok($obj->cmp("\x{2F72}", "\x{79BE}"), $r); + ok($obj->cmp("\x{3299}", "\x{79D8}"), $r); + ok($obj->cmp("\x{2F73}", "\x{7A74}"), $r); + ok($obj->cmp("\x{1F233}", "\x{7A7A}"), $r); + ok($obj->cmp("\x{2F74}", "\x{7ACB}"), $r); + ok($obj->cmp("\x{2F75}", "\x{7AF9}"), $r); + ok($obj->cmp("\x{3247}", "\x{7B8F}"), $r); + ok($obj->cmp("\x{2F76}", "\x{7C73}"), $r); + ok($obj->cmp("\x{2F77}", "\x{7CF8}"), $r); + ok($obj->cmp("\x{1F221}", "\x{7D42}"), $r); + ok($obj->cmp("\x{2F78}", "\x{7F36}"), $r); + ok($obj->cmp("\x{2F79}", "\x{7F51}"), $r); + ok($obj->cmp("\x{2F7A}", "\x{7F8A}"), $r); + ok($obj->cmp("\x{2F7B}", "\x{7FBD}"), $r); + ok($obj->cmp("\x{2F7C}", "\x{8001}"), $r); + ok($obj->cmp("\x{2F7D}", "\x{800C}"), $r); + ok($obj->cmp("\x{2F7E}", "\x{8012}"), $r); + ok($obj->cmp("\x{2F7F}", "\x{8033}"), $r); + ok($obj->cmp("\x{2F80}", "\x{807F}"), $r); + ok($obj->cmp("\x{2F81}", "\x{8089}"), $r); + ok($obj->cmp("\x{2F82}", "\x{81E3}"), $r); + ok($obj->cmp("\x{2F83}", "\x{81EA}"), $r); + ok($obj->cmp("\x{2F84}", "\x{81F3}"), $r); + ok($obj->cmp("\x{2F85}", "\x{81FC}"), $r); + ok($obj->cmp("\x{2F86}", "\x{820C}"), $r); + ok($obj->cmp("\x{2F87}", "\x{821B}"), $r); + ok($obj->cmp("\x{2F88}", "\x{821F}"), $r); + ok($obj->cmp("\x{2F89}", "\x{826E}"), $r); + ok($obj->cmp("\x{2F8A}", "\x{8272}"), $r); + ok($obj->cmp("\x{2F8B}", "\x{8278}"), $r); + ok($obj->cmp("\x{2F8C}", "\x{864D}"), $r); + ok($obj->cmp("\x{2F8D}", "\x{866B}"), $r); + ok($obj->cmp("\x{2F8E}", "\x{8840}"), $r); + ok($obj->cmp("\x{2F8F}", "\x{884C}"), $r); + ok($obj->cmp("\x{2F90}", "\x{8863}"), $r); + ok($obj->cmp("\x{2F91}", "\x{897E}"), $r); + ok($obj->cmp("\x{2F92}", "\x{898B}"), $r); + ok($obj->cmp("\x{2F93}", "\x{89D2}"), $r); + ok($obj->cmp("\x{1F216}", "\x{89E3}"), $r); + ok($obj->cmp("\x{2F94}", "\x{8A00}"), $r); + ok($obj->cmp("\x{2F95}", "\x{8C37}"), $r); + ok($obj->cmp("\x{2F96}", "\x{8C46}"), $r); + ok($obj->cmp("\x{2F97}", "\x{8C55}"), $r); + ok($obj->cmp("\x{2F98}", "\x{8C78}"), $r); + ok($obj->cmp("\x{2F99}", "\x{8C9D}"), $r); + ok($obj->cmp("\x{3296}", "\x{8CA1}"), $r); + ok($obj->cmp("\x{1F223}", "\x{8CA9}"), $r); + ok($obj->cmp("\x{32AE}", "\x{8CC7}"), $r); + ok($obj->cmp("\x{2F9A}", "\x{8D64}"), $r); + ok($obj->cmp("\x{2F9B}", "\x{8D70}"), $r); + ok($obj->cmp("\x{1F230}", "\x{8D70}"), $r); + ok($obj->cmp("\x{2F9C}", "\x{8DB3}"), $r); + ok($obj->cmp("\x{2F9D}", "\x{8EAB}"), $r); + ok($obj->cmp("\x{2F9E}", "\x{8ECA}"), $r); + ok($obj->cmp("\x{2F9F}", "\x{8F9B}"), $r); + ok($obj->cmp("\x{2FA0}", "\x{8FB0}"), $r); + ok($obj->cmp("\x{2FA1}", "\x{8FB5}"), $r); + ok($obj->cmp("\x{1F22B}", "\x{904A}"), $r); + ok($obj->cmp("\x{329C}", "\x{9069}"), $r); + ok($obj->cmp("\x{2FA2}", "\x{9091}"), $r); + ok($obj->cmp("\x{2FA3}", "\x{9149}"), $r); + ok($obj->cmp("\x{2FA4}", "\x{91C6}"), $r); + ok($obj->cmp("\x{2FA5}", "\x{91CC}"), $r); + ok($obj->cmp("\x{2FA6}", "\x{91D1}"), $r); + ok($obj->cmp("\x{328E}", "\x{91D1}"), $r); + ok($obj->cmp("\x{2FA7}", "\x{9577}"), $r); + ok($obj->cmp("\x{2FA8}", "\x{9580}"), $r); + ok($obj->cmp("\x{2FA9}", "\x{961C}"), $r); + ok($obj->cmp("\x{2FAA}", "\x{96B6}"), $r); + ok($obj->cmp("\x{2FAB}", "\x{96B9}"), $r); + ok($obj->cmp("\x{2FAC}", "\x{96E8}"), $r); + ok($obj->cmp("\x{2FAD}", "\x{9751}"), $r); + ok($obj->cmp("\x{2FAE}", "\x{975E}"), $r); + ok($obj->cmp("\x{2FAF}", "\x{9762}"), $r); + ok($obj->cmp("\x{2FB0}", "\x{9769}"), $r); + ok($obj->cmp("\x{2FB1}", "\x{97CB}"), $r); + ok($obj->cmp("\x{2FB2}", "\x{97ED}"), $r); + ok($obj->cmp("\x{2FB3}", "\x{97F3}"), $r); + ok($obj->cmp("\x{2FB4}", "\x{9801}"), $r); + ok($obj->cmp("\x{32A0}", "\x{9805}"), $r); + ok($obj->cmp("\x{2FB5}", "\x{98A8}"), $r); + ok($obj->cmp("\x{2FB6}", "\x{98DB}"), $r); + ok($obj->cmp("\x{2FB7}", "\x{98DF}"), $r); + ok($obj->cmp("\x{2FB8}", "\x{9996}"), $r); + ok($obj->cmp("\x{2FB9}", "\x{9999}"), $r); + ok($obj->cmp("\x{2FBA}", "\x{99AC}"), $r); + ok($obj->cmp("\x{2FBB}", "\x{9AA8}"), $r); + ok($obj->cmp("\x{2FBC}", "\x{9AD8}"), $r); + ok($obj->cmp("\x{2FBD}", "\x{9ADF}"), $r); + ok($obj->cmp("\x{2FBE}", "\x{9B25}"), $r); + ok($obj->cmp("\x{2FBF}", "\x{9B2F}"), $r); + ok($obj->cmp("\x{2FC0}", "\x{9B32}"), $r); + ok($obj->cmp("\x{2FC1}", "\x{9B3C}"), $r); + ok($obj->cmp("\x{2FC2}", "\x{9B5A}"), $r); + ok($obj->cmp("\x{2FC3}", "\x{9CE5}"), $r); + ok($obj->cmp("\x{2FC4}", "\x{9E75}"), $r); + ok($obj->cmp("\x{2FC5}", "\x{9E7F}"), $r); + ok($obj->cmp("\x{2FC6}", "\x{9EA5}"), $r); + ok($obj->cmp("\x{2FC7}", "\x{9EBB}"), $r); + ok($obj->cmp("\x{2FC8}", "\x{9EC3}"), $r); + ok($obj->cmp("\x{2FC9}", "\x{9ECD}"), $r); + ok($obj->cmp("\x{2FCA}", "\x{9ED1}"), $r); + ok($obj->cmp("\x{2FCB}", "\x{9EF9}"), $r); + ok($obj->cmp("\x{2FCC}", "\x{9EFD}"), $r); + ok($obj->cmp("\x{2FCD}", "\x{9F0E}"), $r); + ok($obj->cmp("\x{2FCE}", "\x{9F13}"), $r); + ok($obj->cmp("\x{2FCF}", "\x{9F20}"), $r); + ok($obj->cmp("\x{2FD0}", "\x{9F3B}"), $r); + ok($obj->cmp("\x{2FD1}", "\x{9F4A}"), $r); + ok($obj->cmp("\x{2FD2}", "\x{9F52}"), $r); + ok($obj->cmp("\x{2FD3}", "\x{9F8D}"), $r); + ok($obj->cmp("\x{2FD4}", "\x{9F9C}"), $r); + ok($obj->cmp("\x{2EF3}", "\x{9F9F}"), $r); + ok($obj->cmp("\x{2FD5}", "\x{9FA0}"), $r); + } +} diff --git a/cpan/Unicode-Collate/t/loc_cyrl.t b/cpan/Unicode-Collate/t/loc_cyrl.t index 02deb3869e..eee5639b14 100644 --- a/cpan/Unicode-Collate/t/loc_cyrl.t +++ b/cpan/Unicode-Collate/t/loc_cyrl.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 104 }; +BEGIN { plan tests => 130 }; use strict; use warnings; @@ -29,115 +29,139 @@ ok($objNoSuppress->getlocale, 'default'); $objNoSuppress->change(level => 1); -ok($objNoSuppress->ne("\x{4D1}", "\x{430}")); -ok($objNoSuppress->ne("\x{4D0}", "\x{410}")); -ok($objNoSuppress->ne("\x{4D3}", "\x{430}")); -ok($objNoSuppress->ne("\x{4D2}", "\x{410}")); -ok($objNoSuppress->ne("\x{453}", "\x{433}")); -ok($objNoSuppress->ne("\x{403}", "\x{413}")); -ok($objNoSuppress->ne("\x{4D7}", "\x{435}")); -ok($objNoSuppress->ne("\x{4D6}", "\x{415}")); -ok($objNoSuppress->ne("\x{4DD}", "\x{436}")); -ok($objNoSuppress->ne("\x{4DC}", "\x{416}")); -ok($objNoSuppress->ne("\x{4DF}", "\x{437}")); -ok($objNoSuppress->ne("\x{4DE}", "\x{417}")); -ok($objNoSuppress->ne("\x{4E5}", "\x{438}")); -ok($objNoSuppress->ne("\x{4E4}", "\x{418}")); -ok($objNoSuppress->ne("\x{457}", "\x{456}")); -ok($objNoSuppress->ne("\x{407}", "\x{406}")); -ok($objNoSuppress->ne("\x{439}", "\x{438}")); -ok($objNoSuppress->ne("\x{419}", "\x{418}")); -ok($objNoSuppress->ne("\x{4E7}", "\x{43E}")); -ok($objNoSuppress->ne("\x{4E6}", "\x{41E}")); -ok($objNoSuppress->ne("\x{45C}", "\x{43A}")); -ok($objNoSuppress->ne("\x{40C}", "\x{41A}")); -ok($objNoSuppress->ne("\x{45E}", "\x{443}")); -ok($objNoSuppress->ne("\x{40E}", "\x{423}")); -ok($objNoSuppress->ne("\x{4F1}", "\x{443}")); -ok($objNoSuppress->ne("\x{4F0}", "\x{423}")); -ok($objNoSuppress->ne("\x{4F3}", "\x{443}")); -ok($objNoSuppress->ne("\x{4F2}", "\x{423}")); -ok($objNoSuppress->ne("\x{4F5}", "\x{447}")); -ok($objNoSuppress->ne("\x{4F4}", "\x{427}")); -ok($objNoSuppress->ne("\x{4F9}", "\x{44B}")); -ok($objNoSuppress->ne("\x{4F8}", "\x{42B}")); -ok($objNoSuppress->ne("\x{4ED}", "\x{44D}")); -ok($objNoSuppress->ne("\x{4EC}", "\x{42D}")); - -# 36 +ok($objNoSuppress->gt("\x{4D1}", "\x{430}")); # not suppressed +ok($objNoSuppress->gt("\x{4D0}", "\x{410}")); # not suppressed +ok($objNoSuppress->gt("\x{4D3}", "\x{430}")); # not suppressed +ok($objNoSuppress->gt("\x{4D2}", "\x{410}")); # not suppressed +ok($objNoSuppress->gt("\x{4DB}", "\x{4D9}")); # not suppressed +ok($objNoSuppress->gt("\x{4DA}", "\x{4D8}")); # not suppressed +ok($objNoSuppress->gt("\x{453}", "\x{433}")); # not suppressed +ok($objNoSuppress->gt("\x{403}", "\x{413}")); # not suppressed +ok($objNoSuppress->gt("\x{4D7}", "\x{435}")); # not suppressed +ok($objNoSuppress->gt("\x{4D6}", "\x{415}")); # not suppressed +ok($objNoSuppress->gt("\x{4DD}", "\x{436}")); # not suppressed +ok($objNoSuppress->gt("\x{4DC}", "\x{416}")); # not suppressed +ok($objNoSuppress->gt("\x{4DF}", "\x{437}")); # not suppressed +ok($objNoSuppress->gt("\x{4DE}", "\x{417}")); # not suppressed +ok($objNoSuppress->gt("\x{4E5}", "\x{438}")); # not suppressed +ok($objNoSuppress->gt("\x{4E4}", "\x{418}")); # not suppressed +ok($objNoSuppress->gt("\x{457}", "\x{456}")); # not suppressed +ok($objNoSuppress->gt("\x{407}", "\x{406}")); # not suppressed +ok($objNoSuppress->gt("\x{439}", "\x{438}")); # not suppressed +ok($objNoSuppress->gt("\x{419}", "\x{418}")); # not suppressed +ok($objNoSuppress->gt("\x{4E7}", "\x{43E}")); # not suppressed +ok($objNoSuppress->gt("\x{4E6}", "\x{41E}")); # not suppressed +ok($objNoSuppress->gt("\x{4EB}", "\x{4E9}")); # not suppressed +ok($objNoSuppress->gt("\x{4EA}", "\x{4E8}")); # not suppressed +ok($objNoSuppress->gt("\x{45C}", "\x{43A}")); # not suppressed +ok($objNoSuppress->gt("\x{40C}", "\x{41A}")); # not suppressed +ok($objNoSuppress->gt("\x{45E}", "\x{443}")); # not suppressed +ok($objNoSuppress->gt("\x{40E}", "\x{423}")); # not suppressed +ok($objNoSuppress->gt("\x{4F1}", "\x{443}")); # not suppressed +ok($objNoSuppress->gt("\x{4F0}", "\x{423}")); # not suppressed +ok($objNoSuppress->gt("\x{4F3}", "\x{443}")); # not suppressed +ok($objNoSuppress->gt("\x{4F2}", "\x{423}")); # not suppressed +ok($objNoSuppress->gt("\x{4F5}", "\x{447}")); # not suppressed +ok($objNoSuppress->gt("\x{4F4}", "\x{427}")); # not suppressed +ok($objNoSuppress->gt("\x{4F9}", "\x{44B}")); # not suppressed +ok($objNoSuppress->gt("\x{4F8}", "\x{42B}")); # not suppressed +ok($objNoSuppress->gt("\x{4ED}", "\x{44D}")); # not suppressed +ok($objNoSuppress->gt("\x{4EC}", "\x{42D}")); # not suppressed +ok($objNoSuppress->gt("\x{477}", "\x{475}")); # not suppressed +ok($objNoSuppress->gt("\x{476}", "\x{474}")); # not suppressed + +# 42 + +ok($objNoSuppress->eq("\x{450}", "\x{435}")); # not contraction +ok($objNoSuppress->eq("\x{400}", "\x{415}")); # not contraction +ok($objNoSuppress->eq("\x{451}", "\x{435}")); # not contraction +ok($objNoSuppress->eq("\x{401}", "\x{415}")); # not contraction +ok($objNoSuppress->eq("\x{4C2}", "\x{436}")); # not contraction +ok($objNoSuppress->eq("\x{4C1}", "\x{416}")); # not contraction +ok($objNoSuppress->eq("\x{45D}", "\x{438}")); # not contraction +ok($objNoSuppress->eq("\x{40D}", "\x{418}")); # not contraction +ok($objNoSuppress->eq("\x{4E3}", "\x{438}")); # not contraction +ok($objNoSuppress->eq("\x{4E2}", "\x{418}")); # not contraction +ok($objNoSuppress->eq("\x{4EF}", "\x{443}")); # not contraction +ok($objNoSuppress->eq("\x{4EE}", "\x{423}")); # not contraction + +# 54 + +$objNoSuppress->change(level => 2); + +ok($objNoSuppress->gt("\x{450}", "\x{435}")); # not contraction +ok($objNoSuppress->gt("\x{400}", "\x{415}")); # not contraction +ok($objNoSuppress->gt("\x{451}", "\x{435}")); # not contraction +ok($objNoSuppress->gt("\x{401}", "\x{415}")); # not contraction +ok($objNoSuppress->gt("\x{4C2}", "\x{436}")); # not contraction +ok($objNoSuppress->gt("\x{4C1}", "\x{416}")); # not contraction +ok($objNoSuppress->gt("\x{45D}", "\x{438}")); # not contraction +ok($objNoSuppress->gt("\x{40D}", "\x{418}")); # not contraction +ok($objNoSuppress->gt("\x{4E3}", "\x{438}")); # not contraction +ok($objNoSuppress->gt("\x{4E2}", "\x{418}")); # not contraction +ok($objNoSuppress->gt("\x{4EF}", "\x{443}")); # not contraction +ok($objNoSuppress->gt("\x{4EE}", "\x{423}")); # not contraction + +# 66 $objNoSuppress->change(level => 3); -ok($objNoSuppress->eq("\x{4D1}", "\x{430}\x{306}")); -ok($objNoSuppress->eq("\x{4D0}", "\x{410}\x{306}")); -ok($objNoSuppress->eq("\x{4D3}", "\x{430}\x{308}")); -ok($objNoSuppress->eq("\x{4D2}", "\x{410}\x{308}")); -ok($objNoSuppress->eq("\x{453}", "\x{433}\x{301}")); -ok($objNoSuppress->eq("\x{403}", "\x{413}\x{301}")); -ok($objNoSuppress->eq("\x{4D7}", "\x{435}\x{306}")); -ok($objNoSuppress->eq("\x{4D6}", "\x{415}\x{306}")); -ok($objNoSuppress->eq("\x{4DD}", "\x{436}\x{308}")); -ok($objNoSuppress->eq("\x{4DC}", "\x{416}\x{308}")); -ok($objNoSuppress->eq("\x{4DF}", "\x{437}\x{308}")); -ok($objNoSuppress->eq("\x{4DE}", "\x{417}\x{308}")); -ok($objNoSuppress->eq("\x{4E5}", "\x{438}\x{308}")); -ok($objNoSuppress->eq("\x{4E4}", "\x{418}\x{308}")); -ok($objNoSuppress->eq("\x{457}", "\x{456}\x{308}")); -ok($objNoSuppress->eq("\x{407}", "\x{406}\x{308}")); -ok($objNoSuppress->eq("\x{439}", "\x{438}\x{306}")); -ok($objNoSuppress->eq("\x{419}", "\x{418}\x{306}")); -ok($objNoSuppress->eq("\x{4E7}", "\x{43E}\x{308}")); -ok($objNoSuppress->eq("\x{4E6}", "\x{41E}\x{308}")); -ok($objNoSuppress->eq("\x{45C}", "\x{43A}\x{301}")); -ok($objNoSuppress->eq("\x{40C}", "\x{41A}\x{301}")); -ok($objNoSuppress->eq("\x{45E}", "\x{443}\x{306}")); -ok($objNoSuppress->eq("\x{40E}", "\x{423}\x{306}")); -ok($objNoSuppress->eq("\x{4F1}", "\x{443}\x{308}")); -ok($objNoSuppress->eq("\x{4F0}", "\x{423}\x{308}")); -ok($objNoSuppress->eq("\x{4F3}", "\x{443}\x{30B}")); -ok($objNoSuppress->eq("\x{4F2}", "\x{423}\x{30B}")); -ok($objNoSuppress->eq("\x{4F5}", "\x{447}\x{308}")); -ok($objNoSuppress->eq("\x{4F4}", "\x{427}\x{308}")); -ok($objNoSuppress->eq("\x{4F9}", "\x{44B}\x{308}")); -ok($objNoSuppress->eq("\x{4F8}", "\x{42B}\x{308}")); -ok($objNoSuppress->eq("\x{4ED}", "\x{44D}\x{308}")); -ok($objNoSuppress->eq("\x{4EC}", "\x{42D}\x{308}")); - -# 70 - -ok($objNoSuppress->ne("\x{4D1}", "\x{430}\0\x{306}")); -ok($objNoSuppress->ne("\x{4D0}", "\x{410}\0\x{306}")); -ok($objNoSuppress->ne("\x{4D3}", "\x{430}\0\x{308}")); -ok($objNoSuppress->ne("\x{4D2}", "\x{410}\0\x{308}")); -ok($objNoSuppress->ne("\x{453}", "\x{433}\0\x{301}")); -ok($objNoSuppress->ne("\x{403}", "\x{413}\0\x{301}")); -ok($objNoSuppress->ne("\x{4D7}", "\x{435}\0\x{306}")); -ok($objNoSuppress->ne("\x{4D6}", "\x{415}\0\x{306}")); -ok($objNoSuppress->ne("\x{4DD}", "\x{436}\0\x{308}")); -ok($objNoSuppress->ne("\x{4DC}", "\x{416}\0\x{308}")); -ok($objNoSuppress->ne("\x{4DF}", "\x{437}\0\x{308}")); -ok($objNoSuppress->ne("\x{4DE}", "\x{417}\0\x{308}")); -ok($objNoSuppress->ne("\x{4E5}", "\x{438}\0\x{308}")); -ok($objNoSuppress->ne("\x{4E4}", "\x{418}\0\x{308}")); -ok($objNoSuppress->ne("\x{457}", "\x{456}\0\x{308}")); -ok($objNoSuppress->ne("\x{407}", "\x{406}\0\x{308}")); -ok($objNoSuppress->ne("\x{439}", "\x{438}\0\x{306}")); -ok($objNoSuppress->ne("\x{419}", "\x{418}\0\x{306}")); -ok($objNoSuppress->ne("\x{4E7}", "\x{43E}\0\x{308}")); -ok($objNoSuppress->ne("\x{4E6}", "\x{41E}\0\x{308}")); -ok($objNoSuppress->ne("\x{45C}", "\x{43A}\0\x{301}")); -ok($objNoSuppress->ne("\x{40C}", "\x{41A}\0\x{301}")); -ok($objNoSuppress->ne("\x{45E}", "\x{443}\0\x{306}")); -ok($objNoSuppress->ne("\x{40E}", "\x{423}\0\x{306}")); -ok($objNoSuppress->ne("\x{4F1}", "\x{443}\0\x{308}")); -ok($objNoSuppress->ne("\x{4F0}", "\x{423}\0\x{308}")); -ok($objNoSuppress->ne("\x{4F3}", "\x{443}\0\x{30B}")); -ok($objNoSuppress->ne("\x{4F2}", "\x{423}\0\x{30B}")); -ok($objNoSuppress->ne("\x{4F5}", "\x{447}\0\x{308}")); -ok($objNoSuppress->ne("\x{4F4}", "\x{427}\0\x{308}")); -ok($objNoSuppress->ne("\x{4F9}", "\x{44B}\0\x{308}")); -ok($objNoSuppress->ne("\x{4F8}", "\x{42B}\0\x{308}")); -ok($objNoSuppress->ne("\x{4ED}", "\x{44D}\0\x{308}")); -ok($objNoSuppress->ne("\x{4EC}", "\x{42D}\0\x{308}")); - -# 104 +ok($objNoSuppress->eq("\x{4D1}", "\x{430}\x{306}")); # not suppressed +ok($objNoSuppress->eq("\x{4D0}", "\x{410}\x{306}")); # not suppressed +ok($objNoSuppress->eq("\x{4D3}", "\x{430}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4D2}", "\x{410}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4DB}", "\x{4D9}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4DA}", "\x{4D8}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{453}", "\x{433}\x{301}")); # not suppressed +ok($objNoSuppress->eq("\x{403}", "\x{413}\x{301}")); # not suppressed +ok($objNoSuppress->eq("\x{4D7}", "\x{435}\x{306}")); # not suppressed +ok($objNoSuppress->eq("\x{4D6}", "\x{415}\x{306}")); # not suppressed +ok($objNoSuppress->eq("\x{4DD}", "\x{436}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4DC}", "\x{416}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4DF}", "\x{437}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4DE}", "\x{417}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4E5}", "\x{438}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4E4}", "\x{418}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{457}", "\x{456}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{407}", "\x{406}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{439}", "\x{438}\x{306}")); # not suppressed +ok($objNoSuppress->eq("\x{419}", "\x{418}\x{306}")); # not suppressed +ok($objNoSuppress->eq("\x{4E7}", "\x{43E}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4E6}", "\x{41E}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4EB}", "\x{4E9}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4EA}", "\x{4E8}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{45C}", "\x{43A}\x{301}")); # not suppressed +ok($objNoSuppress->eq("\x{40C}", "\x{41A}\x{301}")); # not suppressed +ok($objNoSuppress->eq("\x{45E}", "\x{443}\x{306}")); # not suppressed +ok($objNoSuppress->eq("\x{40E}", "\x{423}\x{306}")); # not suppressed +ok($objNoSuppress->eq("\x{4F1}", "\x{443}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4F0}", "\x{423}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4F3}", "\x{443}\x{30B}")); # not suppressed +ok($objNoSuppress->eq("\x{4F2}", "\x{423}\x{30B}")); # not suppressed +ok($objNoSuppress->eq("\x{4F5}", "\x{447}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4F4}", "\x{427}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4F9}", "\x{44B}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4F8}", "\x{42B}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4ED}", "\x{44D}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{4EC}", "\x{42D}\x{308}")); # not suppressed +ok($objNoSuppress->eq("\x{477}", "\x{475}\x{30F}")); # not suppressed +ok($objNoSuppress->eq("\x{476}", "\x{474}\x{30F}")); # not suppressed + +# 106 + +for my $i ("", "\0") { + ok($objNoSuppress->eq("\x{450}", "\x{435}$i\x{300}")); # not contraction + ok($objNoSuppress->eq("\x{400}", "\x{415}$i\x{300}")); # not contraction + ok($objNoSuppress->eq("\x{451}", "\x{435}$i\x{308}")); # not contraction + ok($objNoSuppress->eq("\x{401}", "\x{415}$i\x{308}")); # not contraction + ok($objNoSuppress->eq("\x{4C2}", "\x{436}$i\x{306}")); # not contraction + ok($objNoSuppress->eq("\x{4C1}", "\x{416}$i\x{306}")); # not contraction + ok($objNoSuppress->eq("\x{45D}", "\x{438}$i\x{300}")); # not contraction + ok($objNoSuppress->eq("\x{40D}", "\x{418}$i\x{300}")); # not contraction + ok($objNoSuppress->eq("\x{4E3}", "\x{438}$i\x{304}")); # not contraction + ok($objNoSuppress->eq("\x{4E2}", "\x{418}$i\x{304}")); # not contraction + ok($objNoSuppress->eq("\x{4EF}", "\x{443}$i\x{304}")); # not contraction + ok($objNoSuppress->eq("\x{4EE}", "\x{423}$i\x{304}")); # not contraction +} + +# 130 diff --git a/cpan/Unicode-Collate/t/loc_fa.t b/cpan/Unicode-Collate/t/loc_fa.t new file mode 100644 index 0000000000..6f18df05db --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_fa.t @@ -0,0 +1,142 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 81 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objFa = Unicode::Collate::Locale-> + new(locale => 'FA', normalization => undef); + +ok($objFa->getlocale, 'fa'); + +$objFa->change(level => 1); + +ok($objFa->lt("\x{622}", "\x{627}")); +ok($objFa->lt("\x{627}", "\x{621}")); +ok($objFa->lt("\x{621}", "\x{66E}")); + +ok($objFa->lt("\x{6CF}", "\x{647}")); +ok($objFa->lt("\x{647}", "\x{778}")); + +# 7 + +ok($objFa->eq("\x{64E}", "\x{650}")); +ok($objFa->eq("\x{650}", "\x{64F}")); +ok($objFa->eq("\x{64F}", "\x{64B}")); +ok($objFa->eq("\x{64B}", "\x{64D}")); +ok($objFa->eq("\x{64D}", "\x{64C}")); + +ok($objFa->eq("\x{627}", "\x{671}")); + +ok($objFa->eq("\x{621}", "\x{623}")); +ok($objFa->eq("\x{623}", "\x{672}")); +ok($objFa->eq("\x{672}", "\x{625}")); +ok($objFa->eq("\x{625}", "\x{673}")); +ok($objFa->eq("\x{673}", "\x{624}")); +ok($objFa->eq("\x{624}", "\x{6CC}\x{654}")); + +ok($objFa->eq("\x{6A9}", "\x{6AA}")); +ok($objFa->eq("\x{6AA}", "\x{6AB}")); +ok($objFa->eq("\x{6AB}", "\x{643}")); +ok($objFa->eq("\x{643}", "\x{6AC}")); +ok($objFa->eq("\x{6AC}", "\x{6AD}")); +ok($objFa->eq("\x{6AD}", "\x{6AE}")); + +ok($objFa->eq("\x{647}", "\x{6D5}")); +ok($objFa->eq("\x{6D5}", "\x{6C1}")); +ok($objFa->eq("\x{6C1}", "\x{629}")); +ok($objFa->eq("\x{629}", "\x{6C3}")); +ok($objFa->eq("\x{6C3}", "\x{6C0}")); +ok($objFa->eq("\x{6C0}", "\x{6BE}")); + +ok($objFa->eq("\x{6CC}", "\x{649}")); +ok($objFa->eq("\x{649}", "\x{6D2}")); +ok($objFa->eq("\x{6D2}", "\x{64A}")); +ok($objFa->eq("\x{64A}", "\x{6D0}")); +ok($objFa->eq("\x{6D0}", "\x{6D1}")); +ok($objFa->eq("\x{6D1}", "\x{6CD}")); +ok($objFa->eq("\x{6CD}", "\x{6CE}")); + +# 38 + +$objFa->change(level => 2); + +ok($objFa->lt("\x{64E}", "\x{650}")); +ok($objFa->lt("\x{650}", "\x{64F}")); +ok($objFa->lt("\x{64F}", "\x{64B}")); +ok($objFa->lt("\x{64B}", "\x{64D}")); +ok($objFa->lt("\x{64D}", "\x{64C}")); + +ok($objFa->lt("\x{627}", "\x{671}")); + +ok($objFa->lt("\x{621}", "\x{623}")); +ok($objFa->lt("\x{623}", "\x{672}")); +ok($objFa->lt("\x{672}", "\x{625}")); +ok($objFa->lt("\x{625}", "\x{673}")); +ok($objFa->lt("\x{673}", "\x{624}")); +ok($objFa->lt("\x{624}", "\x{6CC}\x{654}")); + +ok($objFa->lt("\x{6A9}", "\x{6AA}")); +ok($objFa->lt("\x{6AA}", "\x{6AB}")); +ok($objFa->lt("\x{6AB}", "\x{643}")); +ok($objFa->lt("\x{643}", "\x{6AC}")); +ok($objFa->lt("\x{6AC}", "\x{6AD}")); +ok($objFa->lt("\x{6AD}", "\x{6AE}")); + +ok($objFa->lt("\x{647}", "\x{6D5}")); +ok($objFa->lt("\x{6D5}", "\x{6C1}")); +ok($objFa->lt("\x{6C1}", "\x{629}")); +ok($objFa->lt("\x{629}", "\x{6C3}")); +ok($objFa->lt("\x{6C3}", "\x{6C0}")); +ok($objFa->lt("\x{6C0}", "\x{6BE}")); + +ok($objFa->lt("\x{6CC}", "\x{649}")); +ok($objFa->lt("\x{649}", "\x{6D2}")); +ok($objFa->lt("\x{6D2}", "\x{64A}")); +ok($objFa->lt("\x{64A}", "\x{6D0}")); +ok($objFa->lt("\x{6D0}", "\x{6D1}")); +ok($objFa->lt("\x{6D1}", "\x{6CD}")); +ok($objFa->lt("\x{6CD}", "\x{6CE}")); + +# 69 + +ok($objFa->eq("\x{6CC}\x{654}", "\x{649}\x{654}")); +ok($objFa->eq("\x{649}\x{654}", "\x{626}")); + +# 71 + +$objFa->change(level => 3); + +ok($objFa->lt("\x{6CC}\x{654}", "\x{649}\x{654}")); +ok($objFa->lt("\x{649}\x{654}", "\x{626}")); + +# 73 + +ok($objFa->eq("\x{622}", "\x{627}\x{653}")); +ok($objFa->eq("\x{623}", "\x{627}\x{654}")); +ok($objFa->eq("\x{625}", "\x{627}\x{655}")); +ok($objFa->eq("\x{624}", "\x{648}\x{654}")); +ok($objFa->eq("\x{626}", "\x{64A}\x{654}")); +ok($objFa->eq("\x{6C2}", "\x{6C1}\x{654}")); +ok($objFa->eq("\x{6C0}", "\x{6D5}\x{654}")); +ok($objFa->eq("\x{6D3}", "\x{6D2}\x{654}")); + +# 81 diff --git a/cpan/Unicode-Collate/t/loc_fiph.t b/cpan/Unicode-Collate/t/loc_fiph.t index 4c38c1565b..621d5ac320 100644 --- a/cpan/Unicode-Collate/t/loc_fiph.t +++ b/cpan/Unicode-Collate/t/loc_fiph.t @@ -49,17 +49,19 @@ ok($objFiPhone->lt($ouml, "\x{1C0}")); # 6 +ok($objFiPhone->lt('v', 'w')); +ok($objFiPhone->gt('x', 'w')); + ok($objFiPhone->eq("d\x{335}", "\x{111}")); ok($objFiPhone->eq("g\x{335}", "\x{1E5}")); ok($objFiPhone->eq("n\x{335}", "\x{14B}")); ok($objFiPhone->eq("t\x{335}", "\x{167}")); ok($objFiPhone->eq("z\x{335}", "\x{292}")); -ok($objFiPhone->lt('v', 'w')); ok($objFiPhone->eq('y', $uuml)); ok($objFiPhone->eq($auml, $ae)); ok($objFiPhone->eq($ouml, $ostk)); -# 15 +# 16 $objFiPhone->change(level => 2); @@ -68,7 +70,6 @@ ok($objFiPhone->lt("g\x{335}", "\x{1E5}")); ok($objFiPhone->lt("n\x{335}", "\x{14B}")); ok($objFiPhone->lt("t\x{335}", "\x{167}")); ok($objFiPhone->lt("z\x{335}", "\x{292}")); -ok($objFiPhone->lt('v', 'w')); ok($objFiPhone->lt('y', $uuml)); ok($objFiPhone->lt($auml, $ae)); ok($objFiPhone->lt($ouml, $ostk)); diff --git a/cpan/Unicode-Collate/t/loc_kk.t b/cpan/Unicode-Collate/t/loc_kk.t index ee5840939f..33bc6bd069 100644 --- a/cpan/Unicode-Collate/t/loc_kk.t +++ b/cpan/Unicode-Collate/t/loc_kk.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 217 }; +BEGIN { plan tests => 211 }; use strict; use warnings; @@ -30,25 +30,20 @@ ok($objKk->getlocale, 'kk'); $objKk->change(level => 1); ok($objKk->gt("\x{451}", "\x{435}")); -ok($objKk->gt("\x{401}", "\x{415}")); -ok($objKk->gt("\x{4AF}", "\x{4B1}")); -ok($objKk->gt("\x{4AE}", "\x{4B0}")); -ok($objKk->lt("\x{456}", "\x{44C}")); -ok($objKk->lt("\x{406}", "\x{42C}")); - ok($objKk->lt("\x{451}", "\x{454}")); -ok($objKk->lt("\x{401}", "\x{404}")); +ok($objKk->gt("\x{4AF}", "\x{4B1}")); ok($objKk->lt("\x{4AF}", "\x{A64B}")); -ok($objKk->lt("\x{4AE}", "\x{A64A}")); ok($objKk->gt("\x{456}", "\x{4F9}")); -ok($objKk->gt("\x{406}", "\x{4F8}")); +ok($objKk->lt("\x{456}", "\x{44C}")); + +# 8 ok($objKk->gt("\x{4E5}", "\x{438}")); # not suppressed ok($objKk->gt("\x{4E4}", "\x{418}")); # not suppressed ok($objKk->gt("\x{439}", "\x{438}")); # not suppressed ok($objKk->gt("\x{419}", "\x{418}")); # not suppressed -# 18 +# 12 ok($objKk->eq("\x{4D1}", "\x{430}")); ok($objKk->eq("\x{4D0}", "\x{410}")); @@ -97,7 +92,7 @@ ok($objKk->eq("\x{4EC}", "\x{42D}")); ok($objKk->eq("\x{477}", "\x{475}")); ok($objKk->eq("\x{476}", "\x{474}")); -# 64 +# 58 $objKk->change(level => 2); @@ -105,6 +100,8 @@ ok($objKk->eq("\x{451}", "\x{401}")); ok($objKk->eq("\x{4AF}", "\x{4AE}")); ok($objKk->eq("\x{456}", "\x{406}")); +# 61 + ok($objKk->gt("\x{4D1}", "\x{430}")); ok($objKk->gt("\x{4D0}", "\x{410}")); ok($objKk->gt("\x{4D3}", "\x{430}")); @@ -152,7 +149,7 @@ ok($objKk->gt("\x{4EC}", "\x{42D}")); ok($objKk->gt("\x{477}", "\x{475}")); ok($objKk->gt("\x{476}", "\x{474}")); -# 113 +# 107 $objKk->change(level => 3); @@ -160,6 +157,8 @@ ok($objKk->lt("\x{451}", "\x{401}")); ok($objKk->lt("\x{4AF}", "\x{4AE}")); ok($objKk->lt("\x{456}", "\x{406}")); +# 110 + ok($objKk->eq("\x{451}", "\x{435}\x{308}")); ok($objKk->eq("\x{401}", "\x{415}\x{308}")); ok($objKk->eq("\x{4E5}", "\x{438}\x{308}")); # not suppressed @@ -167,7 +166,7 @@ ok($objKk->eq("\x{4E4}", "\x{418}\x{308}")); # not suppressed ok($objKk->eq("\x{439}", "\x{438}\x{306}")); # not suppressed ok($objKk->eq("\x{419}", "\x{418}\x{306}")); # not suppressed -# 122 +# 116 for my $i ("", "\0") { ok($objKk->eq("\x{4D1}", "\x{430}$i\x{306}")); @@ -218,7 +217,7 @@ for my $i ("", "\0") { ok($objKk->eq("\x{476}", "\x{474}$i\x{30F}")); } -# 214 +# 208 $objKk->change(upper_before_lower => 1); @@ -226,4 +225,4 @@ ok($objKk->gt("\x{451}", "\x{401}")); ok($objKk->gt("\x{4AF}", "\x{4AE}")); ok($objKk->gt("\x{456}", "\x{406}")); -# 217 +# 211 diff --git a/cpan/Unicode-Collate/t/loc_sa.t b/cpan/Unicode-Collate/t/loc_sa.t new file mode 100644 index 0000000000..1ee3b2a5cf --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_sa.t @@ -0,0 +1,56 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 14 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objSa = Unicode::Collate::Locale-> + new(locale => 'SA', normalization => undef); + +ok($objSa->getlocale, 'sa'); + +$objSa->change(level => 1); + +ok($objSa->lt("\x{950}", "\x{902}")); +ok($objSa->lt("\x{902}", "\x{903}")); +ok($objSa->lt("\x{903}", "\x{972}")); + +ok($objSa->eq("\x{902}", "\x{901}")); + +ok($objSa->lt("\x{939}", "\x{933}")); +ok($objSa->lt("\x{933}", "\x{915}\x{94D}\x{937}")); +ok($objSa->lt("\x{915}\x{94D}\x{937}", "\x{91C}\x{94D}\x{91E}")); +ok($objSa->lt("\x{91C}\x{94D}\x{91E}", "\x{93D}")); + +ok($objSa->eq("\x{933}", "\x{934}")); + +# 11 + +$objSa->change(level => 2); + +ok($objSa->lt("\x{902}", "\x{901}")); +ok($objSa->lt("\x{933}", "\x{934}")); + +$objSa->change(level => 3); + +ok($objSa->eq("\x{933}\x{93C}", "\x{934}")); + +# 14 diff --git a/cpan/Unicode-Collate/t/loc_si.t b/cpan/Unicode-Collate/t/loc_si.t new file mode 100644 index 0000000000..1c994f5acf --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_si.t @@ -0,0 +1,39 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 8 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objSi = Unicode::Collate::Locale-> + new(locale => 'SI', normalization => undef); + +ok($objSi->getlocale, 'si'); + +$objSi->change(level => 1); + +ok($objSi->lt("\x{D96}", "\x{D82}")); +ok($objSi->lt("\x{D82}", "\x{D83}")); +ok($objSi->lt("\x{D83}", "\x{D9A}")); + +ok($objSi->lt("\x{DA3}", "\x{DA5}")); +ok($objSi->lt("\x{DA5}", "\x{DA4}")); +ok($objSi->lt("\x{DA4}", "\x{DA6}")); + diff --git a/cpan/Unicode-Collate/t/loc_sidt.t b/cpan/Unicode-Collate/t/loc_sidt.t new file mode 100644 index 0000000000..42fb5609ee --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_sidt.t @@ -0,0 +1,43 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 9 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objSiDict = Unicode::Collate::Locale-> + new(locale => 'SI-dict', normalization => undef); + +ok($objSiDict->getlocale, 'si__dictionary'); + +$objSiDict->change(level => 1); + +ok($objSiDict->lt("\x{D96}", "\x{D82}")); +ok($objSiDict->lt("\x{D82}", "\x{D83}")); +ok($objSiDict->lt("\x{D83}", "\x{D9A}")); + +ok($objSiDict->gt("\x{DA5}", "\x{DA2}")); +ok($objSiDict->eq("\x{DA5}", "\x{DA2}\x{DCA}\x{DA4}")); +ok($objSiDict->lt("\x{DA5}", "\x{DA3}")); + +$objSiDict->change(level => 2); + +ok($objSiDict->gt("\x{DA5}", "\x{DA2}\x{DCA}\x{DA4}")); + diff --git a/cpan/Unicode-Collate/t/loc_sk.t b/cpan/Unicode-Collate/t/loc_sk.t index 79ed15fe49..c6ad615b58 100644 --- a/cpan/Unicode-Collate/t/loc_sk.t +++ b/cpan/Unicode-Collate/t/loc_sk.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 52 }; +BEGIN { plan tests => 58 }; use strict; use warnings; @@ -37,38 +37,42 @@ ok($objSk->lt("h", "ch")); ok($objSk->gt("i", "ch")); ok($objSk->lt("o", "o\x{302}")); ok($objSk->gt("p", "o\x{302}")); +ok($objSk->lt("r", "r\x{30C}")); +ok($objSk->gt("s", "r\x{30C}")); ok($objSk->lt("s", "s\x{30C}")); ok($objSk->gt("t", "s\x{30C}")); ok($objSk->lt("z", "z\x{30C}")); ok($objSk->lt("z\x{30C}", "\x{292}")); # U+0292 EZH -# 14 +# 16 $objSk->change(level => 2); ok($objSk->eq("a\x{308}", "A\x{308}")); ok($objSk->eq("c\x{30C}", "C\x{30C}")); ok($objSk->eq("o\x{302}", "O\x{302}")); +ok($objSk->eq("r\x{30C}", "R\x{30C}")); ok($objSk->eq("s\x{30C}", "S\x{30C}")); ok($objSk->eq("z\x{30C}", "Z\x{30C}")); ok($objSk->eq("ch", "cH")); ok($objSk->eq("cH", "Ch")); ok($objSk->eq("Ch", "CH")); -# 22 +# 25 $objSk->change(level => 3); ok($objSk->lt("a\x{308}", "A\x{308}")); ok($objSk->lt("c\x{30C}", "C\x{30C}")); ok($objSk->lt("o\x{302}", "O\x{302}")); +ok($objSk->lt("r\x{30C}", "R\x{30C}")); ok($objSk->lt("s\x{30C}", "S\x{30C}")); ok($objSk->lt("z\x{30C}", "Z\x{30C}")); ok($objSk->lt("ch", "cH")); ok($objSk->lt("cH", "Ch")); ok($objSk->lt("Ch", "CH")); -# 30 +# 34 ok($objSk->eq("a\x{308}", pack('U', 0xE4))); ok($objSk->eq("A\x{308}", pack('U', 0xC4))); @@ -78,12 +82,14 @@ ok($objSk->eq("c\x{30C}", "\x{10D}")); ok($objSk->eq("C\x{30C}", "\x{10C}")); ok($objSk->eq("o\x{302}", pack('U', 0xF4))); ok($objSk->eq("O\x{302}", pack('U', 0xD4))); +ok($objSk->eq("r\x{30C}", "\x{159}")); +ok($objSk->eq("R\x{30C}", "\x{158}")); ok($objSk->eq("s\x{30C}", "\x{161}")); ok($objSk->eq("S\x{30C}", "\x{160}")); ok($objSk->eq("z\x{30C}", "\x{17E}")); ok($objSk->eq("Z\x{30C}", "\x{17D}")); -# 42 +# 48 ok($objSk->eq("o\x{302}\x{300}", "\x{1ED3}")); ok($objSk->eq("O\x{302}\x{300}", "\x{1ED2}")); @@ -96,4 +102,4 @@ ok($objSk->eq("O\x{302}\x{309}", "\x{1ED4}")); ok($objSk->eq("o\x{302}\x{323}", "\x{1ED9}")); ok($objSk->eq("O\x{302}\x{323}", "\x{1ED8}")); -# 52 +# 58 diff --git a/cpan/Unicode-Collate/t/loc_sr.t b/cpan/Unicode-Collate/t/loc_sr.t index 70a0a934f2..ab2547f4df 100644 --- a/cpan/Unicode-Collate/t/loc_sr.t +++ b/cpan/Unicode-Collate/t/loc_sr.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 130 }; +BEGIN { plan tests => 210 }; use strict; use warnings; @@ -29,31 +29,44 @@ ok($objSr->getlocale, 'sr'); $objSr->change(level => 1); -ok($objSr->gt("\x{4E5}", "\x{438}")); -ok($objSr->gt("\x{4E4}", "\x{418}")); -ok($objSr->gt("\x{439}", "\x{438}")); -ok($objSr->gt("\x{419}", "\x{418}")); - -# 6 - ok($objSr->eq("\x{4D1}", "\x{430}")); ok($objSr->eq("\x{4D0}", "\x{410}")); ok($objSr->eq("\x{4D3}", "\x{430}")); ok($objSr->eq("\x{4D2}", "\x{410}")); +ok($objSr->eq("\x{4DB}", "\x{4D9}")); +ok($objSr->eq("\x{4DA}", "\x{4D8}")); ok($objSr->eq("\x{453}", "\x{433}")); ok($objSr->eq("\x{403}", "\x{413}")); +ok($objSr->eq("\x{450}", "\x{435}")); # not contraction +ok($objSr->eq("\x{400}", "\x{415}")); # not contraction +ok($objSr->eq("\x{451}", "\x{435}")); # not contraction +ok($objSr->eq("\x{401}", "\x{415}")); # not contraction ok($objSr->eq("\x{4D7}", "\x{435}")); ok($objSr->eq("\x{4D6}", "\x{415}")); +ok($objSr->eq("\x{4C2}", "\x{436}")); # not contraction +ok($objSr->eq("\x{4C1}", "\x{416}")); # not contraction ok($objSr->eq("\x{4DD}", "\x{436}")); ok($objSr->eq("\x{4DC}", "\x{416}")); ok($objSr->eq("\x{4DF}", "\x{437}")); ok($objSr->eq("\x{4DE}", "\x{417}")); +ok($objSr->eq("\x{45D}", "\x{438}")); # not contraction +ok($objSr->eq("\x{40D}", "\x{418}")); # not contraction +ok($objSr->eq("\x{4E3}", "\x{438}")); # not contraction +ok($objSr->eq("\x{4E2}", "\x{418}")); # not contraction +ok($objSr->eq("\x{4E5}", "\x{438}")); +ok($objSr->eq("\x{4E4}", "\x{418}")); ok($objSr->eq("\x{457}", "\x{456}")); ok($objSr->eq("\x{407}", "\x{406}")); +ok($objSr->eq("\x{439}", "\x{438}")); +ok($objSr->eq("\x{419}", "\x{418}")); ok($objSr->eq("\x{4E7}", "\x{43E}")); ok($objSr->eq("\x{4E6}", "\x{41E}")); +ok($objSr->eq("\x{4EB}", "\x{4E9}")); +ok($objSr->eq("\x{4EA}", "\x{4E8}")); ok($objSr->eq("\x{45C}", "\x{43A}")); ok($objSr->eq("\x{40C}", "\x{41A}")); +ok($objSr->eq("\x{4EF}", "\x{443}")); # not contraction +ok($objSr->eq("\x{4EE}", "\x{423}")); # not contraction ok($objSr->eq("\x{45E}", "\x{443}")); ok($objSr->eq("\x{40E}", "\x{423}")); ok($objSr->eq("\x{4F1}", "\x{443}")); @@ -66,8 +79,10 @@ ok($objSr->eq("\x{4F9}", "\x{44B}")); ok($objSr->eq("\x{4F8}", "\x{42B}")); ok($objSr->eq("\x{4ED}", "\x{44D}")); ok($objSr->eq("\x{4EC}", "\x{42D}")); +ok($objSr->eq("\x{477}", "\x{475}")); +ok($objSr->eq("\x{476}", "\x{474}")); -# 36 +# 54 $objSr->change(level => 2); @@ -75,20 +90,40 @@ ok($objSr->gt("\x{4D1}", "\x{430}")); ok($objSr->gt("\x{4D0}", "\x{410}")); ok($objSr->gt("\x{4D3}", "\x{430}")); ok($objSr->gt("\x{4D2}", "\x{410}")); +ok($objSr->gt("\x{4DB}", "\x{4D9}")); +ok($objSr->gt("\x{4DA}", "\x{4D8}")); ok($objSr->gt("\x{453}", "\x{433}")); ok($objSr->gt("\x{403}", "\x{413}")); +ok($objSr->gt("\x{450}", "\x{435}")); # not contraction +ok($objSr->gt("\x{400}", "\x{415}")); # not contraction +ok($objSr->gt("\x{451}", "\x{435}")); # not contraction +ok($objSr->gt("\x{401}", "\x{415}")); # not contraction ok($objSr->gt("\x{4D7}", "\x{435}")); ok($objSr->gt("\x{4D6}", "\x{415}")); +ok($objSr->gt("\x{4C2}", "\x{436}")); # not contraction +ok($objSr->gt("\x{4C1}", "\x{416}")); # not contraction ok($objSr->gt("\x{4DD}", "\x{436}")); ok($objSr->gt("\x{4DC}", "\x{416}")); ok($objSr->gt("\x{4DF}", "\x{437}")); ok($objSr->gt("\x{4DE}", "\x{417}")); +ok($objSr->gt("\x{45D}", "\x{438}")); # not contraction +ok($objSr->gt("\x{40D}", "\x{418}")); # not contraction +ok($objSr->gt("\x{4E3}", "\x{438}")); # not contraction +ok($objSr->gt("\x{4E2}", "\x{418}")); # not contraction +ok($objSr->gt("\x{4E5}", "\x{438}")); +ok($objSr->gt("\x{4E4}", "\x{418}")); ok($objSr->gt("\x{457}", "\x{456}")); ok($objSr->gt("\x{407}", "\x{406}")); +ok($objSr->gt("\x{439}", "\x{438}")); +ok($objSr->gt("\x{419}", "\x{418}")); ok($objSr->gt("\x{4E7}", "\x{43E}")); ok($objSr->gt("\x{4E6}", "\x{41E}")); +ok($objSr->gt("\x{4EB}", "\x{4E9}")); +ok($objSr->gt("\x{4EA}", "\x{4E8}")); ok($objSr->gt("\x{45C}", "\x{43A}")); ok($objSr->gt("\x{40C}", "\x{41A}")); +ok($objSr->gt("\x{4EF}", "\x{443}")); # not contraction +ok($objSr->gt("\x{4EE}", "\x{423}")); # not contraction ok($objSr->gt("\x{45E}", "\x{443}")); ok($objSr->gt("\x{40E}", "\x{423}")); ok($objSr->gt("\x{4F1}", "\x{443}")); @@ -101,77 +136,66 @@ ok($objSr->gt("\x{4F9}", "\x{44B}")); ok($objSr->gt("\x{4F8}", "\x{42B}")); ok($objSr->gt("\x{4ED}", "\x{44D}")); ok($objSr->gt("\x{4EC}", "\x{42D}")); +ok($objSr->gt("\x{477}", "\x{475}")); +ok($objSr->gt("\x{476}", "\x{474}")); -# 66 +# 106 $objSr->change(level => 3); -ok($objSr->eq("\x{4D1}", "\x{430}\x{306}")); -ok($objSr->eq("\x{4D0}", "\x{410}\x{306}")); -ok($objSr->eq("\x{4D3}", "\x{430}\x{308}")); -ok($objSr->eq("\x{4D2}", "\x{410}\x{308}")); -ok($objSr->eq("\x{453}", "\x{433}\x{301}")); -ok($objSr->eq("\x{403}", "\x{413}\x{301}")); -ok($objSr->eq("\x{4D7}", "\x{435}\x{306}")); -ok($objSr->eq("\x{4D6}", "\x{415}\x{306}")); -ok($objSr->eq("\x{4DD}", "\x{436}\x{308}")); -ok($objSr->eq("\x{4DC}", "\x{416}\x{308}")); -ok($objSr->eq("\x{4DF}", "\x{437}\x{308}")); -ok($objSr->eq("\x{4DE}", "\x{417}\x{308}")); -ok($objSr->eq("\x{4E5}", "\x{438}\x{308}")); -ok($objSr->eq("\x{4E4}", "\x{418}\x{308}")); -ok($objSr->eq("\x{457}", "\x{456}\x{308}")); -ok($objSr->eq("\x{407}", "\x{406}\x{308}")); -ok($objSr->eq("\x{439}", "\x{438}\x{306}")); -ok($objSr->eq("\x{419}", "\x{418}\x{306}")); -ok($objSr->eq("\x{4E7}", "\x{43E}\x{308}")); -ok($objSr->eq("\x{4E6}", "\x{41E}\x{308}")); -ok($objSr->eq("\x{45C}", "\x{43A}\x{301}")); -ok($objSr->eq("\x{40C}", "\x{41A}\x{301}")); -ok($objSr->eq("\x{45E}", "\x{443}\x{306}")); -ok($objSr->eq("\x{40E}", "\x{423}\x{306}")); -ok($objSr->eq("\x{4F1}", "\x{443}\x{308}")); -ok($objSr->eq("\x{4F0}", "\x{423}\x{308}")); -ok($objSr->eq("\x{4F3}", "\x{443}\x{30B}")); -ok($objSr->eq("\x{4F2}", "\x{423}\x{30B}")); -ok($objSr->eq("\x{4F5}", "\x{447}\x{308}")); -ok($objSr->eq("\x{4F4}", "\x{427}\x{308}")); -ok($objSr->eq("\x{4F9}", "\x{44B}\x{308}")); -ok($objSr->eq("\x{4F8}", "\x{42B}\x{308}")); -ok($objSr->eq("\x{4ED}", "\x{44D}\x{308}")); -ok($objSr->eq("\x{4EC}", "\x{42D}\x{308}")); - -# 100 - -ok($objSr->eq("\x{4D1}", "\x{430}\0\x{306}")); -ok($objSr->eq("\x{4D0}", "\x{410}\0\x{306}")); -ok($objSr->eq("\x{4D3}", "\x{430}\0\x{308}")); -ok($objSr->eq("\x{4D2}", "\x{410}\0\x{308}")); -ok($objSr->eq("\x{453}", "\x{433}\0\x{301}")); -ok($objSr->eq("\x{403}", "\x{413}\0\x{301}")); -ok($objSr->eq("\x{4D7}", "\x{435}\0\x{306}")); -ok($objSr->eq("\x{4D6}", "\x{415}\0\x{306}")); -ok($objSr->eq("\x{4DD}", "\x{436}\0\x{308}")); -ok($objSr->eq("\x{4DC}", "\x{416}\0\x{308}")); -ok($objSr->eq("\x{4DF}", "\x{437}\0\x{308}")); -ok($objSr->eq("\x{4DE}", "\x{417}\0\x{308}")); -ok($objSr->eq("\x{457}", "\x{456}\0\x{308}")); -ok($objSr->eq("\x{407}", "\x{406}\0\x{308}")); -ok($objSr->eq("\x{4E7}", "\x{43E}\0\x{308}")); -ok($objSr->eq("\x{4E6}", "\x{41E}\0\x{308}")); -ok($objSr->eq("\x{45C}", "\x{43A}\0\x{301}")); -ok($objSr->eq("\x{40C}", "\x{41A}\0\x{301}")); -ok($objSr->eq("\x{45E}", "\x{443}\0\x{306}")); -ok($objSr->eq("\x{40E}", "\x{423}\0\x{306}")); -ok($objSr->eq("\x{4F1}", "\x{443}\0\x{308}")); -ok($objSr->eq("\x{4F0}", "\x{423}\0\x{308}")); -ok($objSr->eq("\x{4F3}", "\x{443}\0\x{30B}")); -ok($objSr->eq("\x{4F2}", "\x{423}\0\x{30B}")); -ok($objSr->eq("\x{4F5}", "\x{447}\0\x{308}")); -ok($objSr->eq("\x{4F4}", "\x{427}\0\x{308}")); -ok($objSr->eq("\x{4F9}", "\x{44B}\0\x{308}")); -ok($objSr->eq("\x{4F8}", "\x{42B}\0\x{308}")); -ok($objSr->eq("\x{4ED}", "\x{44D}\0\x{308}")); -ok($objSr->eq("\x{4EC}", "\x{42D}\0\x{308}")); - -# 130 +for my $i ("", "\0") { + ok($objSr->eq("\x{4D1}", "\x{430}$i\x{306}")); + ok($objSr->eq("\x{4D0}", "\x{410}$i\x{306}")); + ok($objSr->eq("\x{4D3}", "\x{430}$i\x{308}")); + ok($objSr->eq("\x{4D2}", "\x{410}$i\x{308}")); + ok($objSr->eq("\x{4DB}", "\x{4D9}$i\x{308}")); + ok($objSr->eq("\x{4DA}", "\x{4D8}$i\x{308}")); + ok($objSr->eq("\x{453}", "\x{433}$i\x{301}")); + ok($objSr->eq("\x{403}", "\x{413}$i\x{301}")); + ok($objSr->eq("\x{450}", "\x{435}$i\x{300}")); # not contraction + ok($objSr->eq("\x{400}", "\x{415}$i\x{300}")); # not contraction + ok($objSr->eq("\x{451}", "\x{435}$i\x{308}")); # not contraction + ok($objSr->eq("\x{401}", "\x{415}$i\x{308}")); # not contraction + ok($objSr->eq("\x{4D7}", "\x{435}$i\x{306}")); + ok($objSr->eq("\x{4D6}", "\x{415}$i\x{306}")); + ok($objSr->eq("\x{4C2}", "\x{436}$i\x{306}")); # not contraction + ok($objSr->eq("\x{4C1}", "\x{416}$i\x{306}")); # not contraction + ok($objSr->eq("\x{4DD}", "\x{436}$i\x{308}")); + ok($objSr->eq("\x{4DC}", "\x{416}$i\x{308}")); + ok($objSr->eq("\x{4DF}", "\x{437}$i\x{308}")); + ok($objSr->eq("\x{4DE}", "\x{417}$i\x{308}")); + ok($objSr->eq("\x{45D}", "\x{438}$i\x{300}")); # not contraction + ok($objSr->eq("\x{40D}", "\x{418}$i\x{300}")); # not contraction + ok($objSr->eq("\x{4E3}", "\x{438}$i\x{304}")); # not contraction + ok($objSr->eq("\x{4E2}", "\x{418}$i\x{304}")); # not contraction + ok($objSr->eq("\x{4E5}", "\x{438}$i\x{308}")); + ok($objSr->eq("\x{4E4}", "\x{418}$i\x{308}")); + ok($objSr->eq("\x{457}", "\x{456}$i\x{308}")); + ok($objSr->eq("\x{407}", "\x{406}$i\x{308}")); + ok($objSr->eq("\x{439}", "\x{438}$i\x{306}")); + ok($objSr->eq("\x{419}", "\x{418}$i\x{306}")); + ok($objSr->eq("\x{4E7}", "\x{43E}$i\x{308}")); + ok($objSr->eq("\x{4E6}", "\x{41E}$i\x{308}")); + ok($objSr->eq("\x{4EB}", "\x{4E9}$i\x{308}")); + ok($objSr->eq("\x{4EA}", "\x{4E8}$i\x{308}")); + ok($objSr->eq("\x{45C}", "\x{43A}$i\x{301}")); + ok($objSr->eq("\x{40C}", "\x{41A}$i\x{301}")); + ok($objSr->eq("\x{4EF}", "\x{443}$i\x{304}")); # not contraction + ok($objSr->eq("\x{4EE}", "\x{423}$i\x{304}")); # not contraction + ok($objSr->eq("\x{45E}", "\x{443}$i\x{306}")); + ok($objSr->eq("\x{40E}", "\x{423}$i\x{306}")); + ok($objSr->eq("\x{4F1}", "\x{443}$i\x{308}")); + ok($objSr->eq("\x{4F0}", "\x{423}$i\x{308}")); + ok($objSr->eq("\x{4F3}", "\x{443}$i\x{30B}")); + ok($objSr->eq("\x{4F2}", "\x{423}$i\x{30B}")); + ok($objSr->eq("\x{4F5}", "\x{447}$i\x{308}")); + ok($objSr->eq("\x{4F4}", "\x{427}$i\x{308}")); + ok($objSr->eq("\x{4F9}", "\x{44B}$i\x{308}")); + ok($objSr->eq("\x{4F8}", "\x{42B}$i\x{308}")); + ok($objSr->eq("\x{4ED}", "\x{44D}$i\x{308}")); + ok($objSr->eq("\x{4EC}", "\x{42D}$i\x{308}")); + ok($objSr->eq("\x{477}", "\x{475}$i\x{30F}")); + ok($objSr->eq("\x{476}", "\x{474}$i\x{30F}")); +} + +# 210 diff --git a/cpan/Unicode-Collate/t/loc_srla.t b/cpan/Unicode-Collate/t/loc_srla.t new file mode 100644 index 0000000000..9b6ba0a927 --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_srla.t @@ -0,0 +1,186 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 118 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objSrLatn = Unicode::Collate::Locale-> + new(locale => 'SR-LATN', normalization => undef); + +ok($objSrLatn->getlocale, 'sr_Latn'); + +$objSrLatn->change(level => 1); + +ok($objSrLatn->lt("c", "c\x{30C}")); +ok($objSrLatn->lt("c\x{30C}", "c\x{301}")); +ok($objSrLatn->gt("d", "c\x{301}")); +ok($objSrLatn->lt("d", "dz\x{30C}")); +ok($objSrLatn->lt("dzz", "dz\x{30C}")); +ok($objSrLatn->lt("dz\x{30C}", "d\x{335}")); +ok($objSrLatn->gt("e", "d\x{335}")); +ok($objSrLatn->lt("l", "lj")); +ok($objSrLatn->lt("lz","lj")); +ok($objSrLatn->gt("m", "lj")); +ok($objSrLatn->lt("n", "nj")); +ok($objSrLatn->lt("nz","nj")); +ok($objSrLatn->gt("o", "nj")); +ok($objSrLatn->lt("s", "s\x{30C}")); +ok($objSrLatn->lt("sz","s\x{30C}")); +ok($objSrLatn->gt("t", "s\x{30C}")); +ok($objSrLatn->lt("z", "z\x{30C}")); +ok($objSrLatn->lt("zz","z\x{30C}")); +ok($objSrLatn->lt("z\x{30C}", "\x{292}")); # U+0292 EZH + +# 21 + +# not tailored +ok($objSrLatn->lt("dZ\x{30C}","dz\x{30C}")); +ok($objSrLatn->lt("lJ", "lj")); +ok($objSrLatn->lt("nJ", "nj")); + +# 24 + +$objSrLatn->change(level => 2); + +ok($objSrLatn->eq("c\x{30C}", "C\x{30C}")); +ok($objSrLatn->eq("c\x{301}", "C\x{301}")); +ok($objSrLatn->eq("dz\x{30C}","Dz\x{30C}")); +ok($objSrLatn->eq("Dz\x{30C}","DZ\x{30C}")); +ok($objSrLatn->eq("d\x{335}", "D\x{335}")); +ok($objSrLatn->eq("lj", "Lj")); +ok($objSrLatn->eq("Lj", "LJ")); +ok($objSrLatn->eq("nj", "Nj")); +ok($objSrLatn->eq("Nj", "NJ")); +ok($objSrLatn->eq("s\x{30C}", "S\x{30C}")); +ok($objSrLatn->eq("z\x{30C}", "Z\x{30C}")); + +# 35 + +ok($objSrLatn->eq("dz\x{30C}", "\x{1C6}")); +ok($objSrLatn->eq("Dz\x{30C}", "\x{1C6}")); +ok($objSrLatn->eq("Dz\x{30C}", "\x{1C5}")); +ok($objSrLatn->eq("DZ\x{30C}", "\x{1C5}")); +ok($objSrLatn->eq("DZ\x{30C}", "\x{1C4}")); + +ok($objSrLatn->eq("lj", "\x{1C9}")); +ok($objSrLatn->eq("Lj", "\x{1C9}")); +ok($objSrLatn->eq("Lj", "\x{1C8}")); +ok($objSrLatn->eq("LJ", "\x{1C8}")); +ok($objSrLatn->eq("LJ", "\x{1C7}")); + +ok($objSrLatn->eq("nj", "\x{1CC}")); +ok($objSrLatn->eq("Nj", "\x{1CC}")); +ok($objSrLatn->eq("Nj", "\x{1CB}")); +ok($objSrLatn->eq("NJ", "\x{1CB}")); +ok($objSrLatn->eq("NJ", "\x{1CA}")); + +# 50 + +$objSrLatn->change(level => 3); + +ok($objSrLatn->lt("c\x{30C}", "C\x{30C}")); +ok($objSrLatn->lt("c\x{301}", "C\x{301}")); +ok($objSrLatn->lt("dz\x{30C}","Dz\x{30C}")); +ok($objSrLatn->lt("Dz\x{30C}","DZ\x{30C}")); +ok($objSrLatn->lt("d\x{335}", "D\x{335}")); +ok($objSrLatn->lt("lj", "Lj")); +ok($objSrLatn->lt("Lj", "LJ")); +ok($objSrLatn->lt("nj", "Nj")); +ok($objSrLatn->lt("Nj", "NJ")); +ok($objSrLatn->lt("s\x{30C}", "S\x{30C}")); +ok($objSrLatn->lt("z\x{30C}", "Z\x{30C}")); + +# 61 + +ok($objSrLatn->lt("dz\x{30C}", "\x{1C6}")); +ok($objSrLatn->gt("Dz\x{30C}", "\x{1C6}")); +ok($objSrLatn->lt("Dz\x{30C}", "\x{1C5}")); +ok($objSrLatn->gt("DZ\x{30C}", "\x{1C5}")); +ok($objSrLatn->lt("DZ\x{30C}", "\x{1C4}")); + +ok($objSrLatn->lt("lj", "\x{1C9}")); +ok($objSrLatn->gt("Lj", "\x{1C9}")); +ok($objSrLatn->lt("Lj", "\x{1C8}")); +ok($objSrLatn->gt("LJ", "\x{1C8}")); +ok($objSrLatn->lt("LJ", "\x{1C7}")); + +ok($objSrLatn->lt("nj", "\x{1CC}")); +ok($objSrLatn->gt("Nj", "\x{1CC}")); +ok($objSrLatn->lt("Nj", "\x{1CB}")); +ok($objSrLatn->gt("NJ", "\x{1CB}")); +ok($objSrLatn->lt("NJ", "\x{1CA}")); + +# 76 + +ok($objSrLatn->eq("c\x{30C}", "\x{10D}")); +ok($objSrLatn->eq("C\x{30C}", "\x{10C}")); +ok($objSrLatn->eq("c\x{301}", "\x{107}")); +ok($objSrLatn->eq("c\x{341}", "\x{107}")); +ok($objSrLatn->eq("C\x{301}", "\x{106}")); +ok($objSrLatn->eq("C\x{341}", "\x{106}")); +ok($objSrLatn->eq("dz\x{30C}", "d\x{17E}")); +ok($objSrLatn->eq("dZ\x{30C}", "d\x{17D}")); +ok($objSrLatn->eq("Dz\x{30C}", "D\x{17E}")); +ok($objSrLatn->eq("DZ\x{30C}", "D\x{17D}")); +ok($objSrLatn->eq("d\x{335}", "\x{111}")); +ok($objSrLatn->eq("D\x{335}", "\x{110}")); +ok($objSrLatn->eq("s\x{30C}", "\x{161}")); +ok($objSrLatn->eq("S\x{30C}", "\x{160}")); +ok($objSrLatn->eq("z\x{30C}", "\x{17E}")); +ok($objSrLatn->eq("Z\x{30C}", "\x{17D}")); + +# 92 + +$objSrLatn->change(upper_before_lower => 1); + +ok($objSrLatn->gt("c\x{30C}", "C\x{30C}")); +ok($objSrLatn->gt("c\x{301}", "C\x{301}")); +ok($objSrLatn->gt("dz\x{30C}","Dz\x{30C}")); +ok($objSrLatn->gt("Dz\x{30C}","DZ\x{30C}")); +ok($objSrLatn->gt("d\x{335}", "D\x{335}")); +ok($objSrLatn->gt("lj", "Lj")); +ok($objSrLatn->gt("Lj", "LJ")); +ok($objSrLatn->gt("nj", "Nj")); +ok($objSrLatn->gt("Nj", "NJ")); +ok($objSrLatn->gt("s\x{30C}", "S\x{30C}")); +ok($objSrLatn->gt("z\x{30C}", "Z\x{30C}")); + +# 103 + +ok($objSrLatn->lt("DZ\x{30C}", "\x{1C4}")); +ok($objSrLatn->gt("Dz\x{30C}", "\x{1C4}")); +ok($objSrLatn->lt("Dz\x{30C}", "\x{1C5}")); +ok($objSrLatn->gt("dz\x{30C}", "\x{1C5}")); +ok($objSrLatn->lt("dz\x{30C}", "\x{1C6}")); + +ok($objSrLatn->lt("LJ", "\x{1C7}")); +ok($objSrLatn->gt("Lj", "\x{1C7}")); +ok($objSrLatn->lt("Lj", "\x{1C8}")); +ok($objSrLatn->gt("lj", "\x{1C8}")); +ok($objSrLatn->lt("lj", "\x{1C9}")); + +ok($objSrLatn->lt("NJ", "\x{1CA}")); +ok($objSrLatn->gt("Nj", "\x{1CA}")); +ok($objSrLatn->lt("Nj", "\x{1CB}")); +ok($objSrLatn->gt("nj", "\x{1CB}")); +ok($objSrLatn->lt("nj", "\x{1CC}")); + +# 118 diff --git a/cpan/Unicode-Collate/t/loc_sv.t b/cpan/Unicode-Collate/t/loc_sv.t index c28632ea06..4d7c85034b 100644 --- a/cpan/Unicode-Collate/t/loc_sv.t +++ b/cpan/Unicode-Collate/t/loc_sv.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 117 }; +BEGIN { plan tests => 115 }; use strict; use warnings; @@ -57,6 +57,7 @@ ok($objSv->lt($ouml, "\x{1C0}")); ok($objSv->eq('d', "\x{111}")); ok($objSv->eq("\x{111}", $eth)); +ok($objSv->eq('v', 'w')); ok($objSv->eq('y', $uuml)); ok($objSv->eq($uuml, "\x{171}")); @@ -67,12 +68,13 @@ ok($objSv->eq($ostk, "\x{151}")); ok($objSv->eq("\x{151}", "\x{153}")); ok($objSv->eq("\x{153}", $ocrc)); -# 16 +# 17 $objSv->change(level => 2); ok($objSv->lt('d', "\x{111}")); ok($objSv->lt("\x{111}", $eth)); +ok($objSv->lt('v', 'w')); ok($objSv->lt('y', $uuml)); ok($objSv->lt($uuml, "\x{171}")); @@ -83,16 +85,14 @@ ok($objSv->lt($ostk, "\x{151}")); ok($objSv->lt("\x{151}", "\x{153}")); ok($objSv->lt("\x{153}", $ocrc)); -# 26 +# 28 ok($objSv->eq("\x{111}", "\x{110}")); ok($objSv->eq($eth, $ETH)); ok($objSv->eq('th', $thrn)); ok($objSv->eq($thrn, 'TH')); ok($objSv->eq('TH', $THRN)); -ok($objSv->eq('v', 'w')); -ok($objSv->eq('w', 'V')); -ok($objSv->eq('V', 'W')); +ok($objSv->eq('w', 'W')); ok($objSv->eq($uuml, $Uuml)); ok($objSv->eq("\x{171}", "\x{170}")); ok($objSv->eq($arng, $Arng)); @@ -115,9 +115,7 @@ ok($objSv->lt($eth, $ETH)); ok($objSv->lt('th', $thrn)); ok($objSv->lt($thrn, 'TH')); ok($objSv->lt('TH', $THRN)); -ok($objSv->lt('v', 'w')); -ok($objSv->lt('w', 'V')); -ok($objSv->lt('V', 'W')); +ok($objSv->lt('w', 'W')); ok($objSv->lt($uuml, $Uuml)); ok($objSv->lt("\x{171}", "\x{170}")); ok($objSv->lt($arng, $Arng)); @@ -131,7 +129,7 @@ ok($objSv->lt("\x{151}", "\x{150}")); ok($objSv->lt("\x{153}", "\x{152}")); ok($objSv->lt($ocrc, $Ocrc)); -# 66 +# 64 ok($objSv->eq("d\x{335}", "\x{111}")); ok($objSv->eq("D\x{335}", "\x{110}")); @@ -154,7 +152,7 @@ ok($objSv->eq("O\x{30B}", "\x{150}")); ok($objSv->eq("o\x{302}", $ocrc)); ok($objSv->eq("O\x{302}", $Ocrc)); -# 86 +# 84 ok($objSv->eq("u\x{308}\x{300}", "\x{1DC}")); ok($objSv->eq("U\x{308}\x{300}", "\x{1DB}")); @@ -178,7 +176,7 @@ ok($objSv->eq("O\x{308}\x{304}", "\x{22A}")); ok($objSv->eq("o\x{338}\x{301}", "\x{1FF}")); ok($objSv->eq("O\x{338}\x{301}", "\x{1FE}")); -# 107 +# 105 ok($objSv->eq("o\x{302}\x{300}", "\x{1ED3}")); ok($objSv->eq("O\x{302}\x{300}", "\x{1ED2}")); @@ -191,4 +189,4 @@ ok($objSv->eq("O\x{302}\x{309}", "\x{1ED4}")); ok($objSv->eq("o\x{302}\x{323}", "\x{1ED9}")); ok($objSv->eq("O\x{302}\x{323}", "\x{1ED8}")); -# 117 +# 115 diff --git a/cpan/Unicode-Collate/t/loc_svrf.t b/cpan/Unicode-Collate/t/loc_svrf.t new file mode 100644 index 0000000000..8fc90a8bd3 --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_svrf.t @@ -0,0 +1,193 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 115 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $eth = pack 'U', 0xF0; +my $ETH = pack 'U', 0xD0; +my $thrn = pack 'U', 0xFE; +my $THRN = pack 'U', 0xDE; +my $uuml = pack 'U', 0xFC; +my $Uuml = pack 'U', 0xDC; +my $arng = pack 'U', 0xE5; +my $Arng = pack 'U', 0xC5; +my $auml = pack 'U', 0xE4; +my $Auml = pack 'U', 0xC4; +my $ae = pack 'U', 0xE6; +my $AE = pack 'U', 0xC6; +my $ouml = pack 'U', 0xF6; +my $Ouml = pack 'U', 0xD6; +my $ostk = pack 'U', 0xF8; +my $Ostk = pack 'U', 0xD8; +my $ocrc = pack 'U', 0xF4; +my $Ocrc = pack 'U', 0xD4; + +my $objSvReform = Unicode::Collate::Locale-> + new(locale => 'SV-reform', normalization => undef); + +ok($objSvReform->getlocale, 'sv__reformed'); + +$objSvReform->change(level => 1); + +ok($objSvReform->lt('z', $arng)); +ok($objSvReform->lt($arng, $auml)); +ok($objSvReform->lt($auml, $ouml)); +ok($objSvReform->lt($ouml, "\x{1C0}")); + +# 6 + +ok($objSvReform->lt('v', 'w')); +ok($objSvReform->gt('x', 'w')); + +ok($objSvReform->eq('d', "\x{111}")); +ok($objSvReform->eq("\x{111}", $eth)); +ok($objSvReform->eq('y', $uuml)); +ok($objSvReform->eq($uuml, "\x{171}")); + +ok($objSvReform->eq($auml, $ae)); +ok($objSvReform->eq($ae, "\x{119}")); +ok($objSvReform->eq($ouml, $ostk)); +ok($objSvReform->eq($ostk, "\x{151}")); +ok($objSvReform->eq("\x{151}", "\x{153}")); +ok($objSvReform->eq("\x{153}", $ocrc)); + +# 18 + +$objSvReform->change(level => 2); + +ok($objSvReform->lt('d', "\x{111}")); +ok($objSvReform->lt("\x{111}", $eth)); +ok($objSvReform->lt('y', $uuml)); +ok($objSvReform->lt($uuml, "\x{171}")); + +ok($objSvReform->lt($auml, $ae)); +ok($objSvReform->lt($ae, "\x{119}")); +ok($objSvReform->lt($ouml, $ostk)); +ok($objSvReform->lt($ostk, "\x{151}")); +ok($objSvReform->lt("\x{151}", "\x{153}")); +ok($objSvReform->lt("\x{153}", $ocrc)); + +# 28 + +ok($objSvReform->eq("\x{111}", "\x{110}")); +ok($objSvReform->eq($eth, $ETH)); +ok($objSvReform->eq('th', $thrn)); +ok($objSvReform->eq($thrn, 'TH')); +ok($objSvReform->eq('TH', $THRN)); +ok($objSvReform->eq('w', 'W')); +ok($objSvReform->eq($uuml, $Uuml)); +ok($objSvReform->eq("\x{171}", "\x{170}")); +ok($objSvReform->eq($arng, $Arng)); +ok($objSvReform->eq($auml, $Auml)); +ok($objSvReform->eq($ae, $AE)); +ok($objSvReform->eq($AE, "\x{1D2D}")); +ok($objSvReform->eq("\x{119}", "\x{118}")); +ok($objSvReform->eq($ouml, $Ouml)); +ok($objSvReform->eq($ostk, $Ostk)); +ok($objSvReform->eq("\x{151}", "\x{150}")); +ok($objSvReform->eq("\x{153}", "\x{152}")); +ok($objSvReform->eq($ocrc, $Ocrc)); + +# 46 + +$objSvReform->change(level => 3); + +ok($objSvReform->lt("\x{111}", "\x{110}")); +ok($objSvReform->lt($eth, $ETH)); +ok($objSvReform->lt('th', $thrn)); +ok($objSvReform->lt($thrn, 'TH')); +ok($objSvReform->lt('TH', $THRN)); +ok($objSvReform->lt('w', 'W')); +ok($objSvReform->lt($uuml, $Uuml)); +ok($objSvReform->lt("\x{171}", "\x{170}")); +ok($objSvReform->lt($arng, $Arng)); +ok($objSvReform->lt($auml, $Auml)); +ok($objSvReform->lt($ae, $AE)); +ok($objSvReform->lt($AE, "\x{1D2D}")); +ok($objSvReform->lt("\x{119}", "\x{118}")); +ok($objSvReform->lt($ouml, $Ouml)); +ok($objSvReform->lt($ostk, $Ostk)); +ok($objSvReform->lt("\x{151}", "\x{150}")); +ok($objSvReform->lt("\x{153}", "\x{152}")); +ok($objSvReform->lt($ocrc, $Ocrc)); + +# 64 + +ok($objSvReform->eq("d\x{335}", "\x{111}")); +ok($objSvReform->eq("D\x{335}", "\x{110}")); +ok($objSvReform->eq("u\x{308}", $uuml)); +ok($objSvReform->eq("U\x{308}", $Uuml)); +ok($objSvReform->eq("u\x{30B}", "\x{171}")); +ok($objSvReform->eq("U\x{30B}", "\x{170}")); +ok($objSvReform->eq("a\x{30A}", $arng)); +ok($objSvReform->eq("A\x{30A}", $Arng)); +ok($objSvReform->eq("a\x{308}", $auml)); +ok($objSvReform->eq("A\x{308}", $Auml)); +ok($objSvReform->eq("e\x{328}", "\x{119}")); +ok($objSvReform->eq("E\x{328}", "\x{118}")); +ok($objSvReform->eq("o\x{308}", $ouml)); +ok($objSvReform->eq("O\x{308}", $Ouml)); +ok($objSvReform->eq("o\x{338}", $ostk)); +ok($objSvReform->eq("O\x{338}", $Ostk)); +ok($objSvReform->eq("o\x{30B}", "\x{151}")); +ok($objSvReform->eq("O\x{30B}", "\x{150}")); +ok($objSvReform->eq("o\x{302}", $ocrc)); +ok($objSvReform->eq("O\x{302}", $Ocrc)); + +# 84 + +ok($objSvReform->eq("u\x{308}\x{300}", "\x{1DC}")); +ok($objSvReform->eq("U\x{308}\x{300}", "\x{1DB}")); +ok($objSvReform->eq("u\x{308}\x{301}", "\x{1D8}")); +ok($objSvReform->eq("U\x{308}\x{301}", "\x{1D7}")); +ok($objSvReform->eq("u\x{308}\x{304}", "\x{1D6}")); +ok($objSvReform->eq("U\x{308}\x{304}", "\x{1D5}")); +ok($objSvReform->eq("u\x{308}\x{30C}", "\x{1DA}")); +ok($objSvReform->eq("U\x{308}\x{30C}", "\x{1D9}")); +ok($objSvReform->eq("A\x{30A}", "\x{212B}")); +ok($objSvReform->eq("a\x{30A}\x{301}", "\x{1FB}")); +ok($objSvReform->eq("A\x{30A}\x{301}", "\x{1FA}")); +ok($objSvReform->eq("a\x{308}\x{304}", "\x{1DF}")); +ok($objSvReform->eq("A\x{308}\x{304}", "\x{1DE}")); +ok($objSvReform->eq("\x{1FD}", "$ae\x{301}")); +ok($objSvReform->eq("\x{1FC}", "$AE\x{301}")); +ok($objSvReform->eq("\x{1E3}", "$ae\x{304}")); +ok($objSvReform->eq("\x{1E2}", "$AE\x{304}")); +ok($objSvReform->eq("o\x{308}\x{304}", "\x{22B}")); +ok($objSvReform->eq("O\x{308}\x{304}", "\x{22A}")); +ok($objSvReform->eq("o\x{338}\x{301}", "\x{1FF}")); +ok($objSvReform->eq("O\x{338}\x{301}", "\x{1FE}")); + +# 105 + +ok($objSvReform->eq("o\x{302}\x{300}", "\x{1ED3}")); +ok($objSvReform->eq("O\x{302}\x{300}", "\x{1ED2}")); +ok($objSvReform->eq("o\x{302}\x{301}", "\x{1ED1}")); +ok($objSvReform->eq("O\x{302}\x{301}", "\x{1ED0}")); +ok($objSvReform->eq("o\x{302}\x{303}", "\x{1ED7}")); +ok($objSvReform->eq("O\x{302}\x{303}", "\x{1ED6}")); +ok($objSvReform->eq("o\x{302}\x{309}", "\x{1ED5}")); +ok($objSvReform->eq("O\x{302}\x{309}", "\x{1ED4}")); +ok($objSvReform->eq("o\x{302}\x{323}", "\x{1ED9}")); +ok($objSvReform->eq("O\x{302}\x{323}", "\x{1ED8}")); + +# 115 diff --git a/cpan/Unicode-Collate/t/loc_ta.t b/cpan/Unicode-Collate/t/loc_ta.t new file mode 100644 index 0000000000..710860ec9a --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_ta.t @@ -0,0 +1,85 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 52 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $Kssa = "\x{B95}\x{BCD}\x{BB7}"; + +my $objTa = Unicode::Collate::Locale-> + new(locale => 'TA', normalization => undef); + +ok($objTa->getlocale, 'ta'); + +$objTa->change(level => 1); + +ok($objTa->lt("\x{B94}", "\x{B82}")); +ok($objTa->gt("\x{B83}", "\x{B82}")); +ok($objTa->lt("\x{B83}", "\x{B95}\x{BCD}")); +ok($objTa->gt("\x{B95}", "\x{B95}\x{BCD}")); +ok($objTa->lt("\x{B95}", "\x{B99}\x{BCD}")); +ok($objTa->gt("\x{B99}", "\x{B99}\x{BCD}")); +ok($objTa->lt("\x{B99}", "\x{B9A}\x{BCD}")); +ok($objTa->gt("\x{B9A}", "\x{B9A}\x{BCD}")); +ok($objTa->lt("\x{B9A}", "\x{B9E}\x{BCD}")); +ok($objTa->gt("\x{B9E}", "\x{B9E}\x{BCD}")); +ok($objTa->lt("\x{B9E}", "\x{B9F}\x{BCD}")); +ok($objTa->gt("\x{B9F}", "\x{B9F}\x{BCD}")); +ok($objTa->lt("\x{B9F}", "\x{BA3}\x{BCD}")); +ok($objTa->gt("\x{BA3}", "\x{BA3}\x{BCD}")); +ok($objTa->lt("\x{BA3}", "\x{BA4}\x{BCD}")); +ok($objTa->gt("\x{BA4}", "\x{BA4}\x{BCD}")); +ok($objTa->lt("\x{BA4}", "\x{BA8}\x{BCD}")); +ok($objTa->gt("\x{BA8}", "\x{BA8}\x{BCD}")); +ok($objTa->lt("\x{BA8}", "\x{BAA}\x{BCD}")); +ok($objTa->gt("\x{BAA}", "\x{BAA}\x{BCD}")); +ok($objTa->lt("\x{BAA}", "\x{BAE}\x{BCD}")); +ok($objTa->gt("\x{BAE}", "\x{BAE}\x{BCD}")); +ok($objTa->lt("\x{BAE}", "\x{BAF}\x{BCD}")); +ok($objTa->gt("\x{BAF}", "\x{BAF}\x{BCD}")); +ok($objTa->lt("\x{BAF}", "\x{BB0}\x{BCD}")); +ok($objTa->gt("\x{BB0}", "\x{BB0}\x{BCD}")); +ok($objTa->lt("\x{BB0}", "\x{BB2}\x{BCD}")); +ok($objTa->gt("\x{BB2}", "\x{BB2}\x{BCD}")); +ok($objTa->lt("\x{BB2}", "\x{BB5}\x{BCD}")); +ok($objTa->gt("\x{BB5}", "\x{BB5}\x{BCD}")); +ok($objTa->lt("\x{BB5}", "\x{BB4}\x{BCD}")); +ok($objTa->gt("\x{BB4}", "\x{BB4}\x{BCD}")); +ok($objTa->lt("\x{BB4}", "\x{BB3}\x{BCD}")); +ok($objTa->gt("\x{BB3}", "\x{BB3}\x{BCD}")); +ok($objTa->lt("\x{BB3}", "\x{BB1}\x{BCD}")); +ok($objTa->gt("\x{BB1}", "\x{BB1}\x{BCD}")); +ok($objTa->lt("\x{BB1}", "\x{BA9}\x{BCD}")); +ok($objTa->gt("\x{BA9}", "\x{BA9}\x{BCD}")); +ok($objTa->lt("\x{BA9}", "\x{B9C}\x{BCD}")); +ok($objTa->gt("\x{B9C}", "\x{B9C}\x{BCD}")); +ok($objTa->lt("\x{B9C}", "\x{BB6}\x{BCD}")); +ok($objTa->gt("\x{BB6}", "\x{BB6}\x{BCD}")); +ok($objTa->lt("\x{BB6}", "\x{BB7}\x{BCD}")); +ok($objTa->gt("\x{BB7}", "\x{BB7}\x{BCD}")); +ok($objTa->lt("\x{BB7}", "\x{BB8}\x{BCD}")); +ok($objTa->gt("\x{BB8}", "\x{BB8}\x{BCD}")); +ok($objTa->lt("\x{BB8}", "\x{BB9}\x{BCD}")); +ok($objTa->gt("\x{BB9}", "\x{BB9}\x{BCD}")); +ok($objTa->lt("\x{BB9}", "${Kssa}\x{BCD}")); +ok($objTa->gt("${Kssa}", "${Kssa}\x{BCD}")); + +# 52 diff --git a/cpan/Unicode-Collate/t/loc_te.t b/cpan/Unicode-Collate/t/loc_te.t new file mode 100644 index 0000000000..38f68d062c --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_te.t @@ -0,0 +1,36 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 6 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objTe = Unicode::Collate::Locale-> + new(locale => 'TE', normalization => undef); + +ok($objTe->getlocale, 'te'); + +$objTe->change(level => 1); + +ok($objTe->lt("\x{C14}", "\x{C01}")); +ok($objTe->lt("\x{C01}", "\x{C02}")); +ok($objTe->lt("\x{C02}", "\x{C03}")); +ok($objTe->lt("\x{C03}", "\x{C15}")); + diff --git a/cpan/Unicode-Collate/t/loc_test.t b/cpan/Unicode-Collate/t/loc_test.t index 8d7d74a816..84eb85fd58 100644 --- a/cpan/Unicode-Collate/t/loc_test.t +++ b/cpan/Unicode-Collate/t/loc_test.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 120 }; +BEGIN { plan tests => 130 }; use strict; use warnings; @@ -25,7 +25,7 @@ ok(1); our (@listEs, @listEsT, @listFr); @listEs = qw( - cambio camella camello camelo Camerún + cambio camella camello camelo Camerún chico chile Chile CHILE chocolate cielo curso espacio espanto español esperanza lama líquido llama Llama LLAMA llamar luz nos nueve ñu ojo @@ -138,3 +138,16 @@ ok("@sortFr" eq "@listFr"); ok(ref($objEsT ->{$keyXS}), $UseXS); } # 120 + +ok(Unicode::Collate::Locale::_locale('sr'), 'sr'); +ok(Unicode::Collate::Locale::_locale('sr_Cyrl'), 'sr'); +ok(Unicode::Collate::Locale::_locale('sr_Latn'), 'sr_Latn'); +ok(Unicode::Collate::Locale::_locale('sr_LATN'), 'sr_Latn'); +ok(Unicode::Collate::Locale::_locale('sr_latn'), 'sr_Latn'); +ok(Unicode::Collate::Locale::_locale('de'), 'default'); +ok(Unicode::Collate::Locale::_locale('de_phone'), 'de__phonebook'); +ok(Unicode::Collate::Locale::_locale('de__phonebook'), 'de__phonebook'); +ok(Unicode::Collate::Locale::_locale('de-phonebk'), 'de__phonebook'); +ok(Unicode::Collate::Locale::_locale('de--phonebk'), 'de__phonebook'); + +# 130 diff --git a/cpan/Unicode-Collate/t/loc_th.t b/cpan/Unicode-Collate/t/loc_th.t new file mode 100644 index 0000000000..c7833b70da --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_th.t @@ -0,0 +1,81 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 25 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objTh = Unicode::Collate::Locale-> + new(locale => 'TH', normalization => undef); + + +ok($objTh->getlocale, 'th'); + +$objTh->change(level => 1); + +# shifted + +ok($objTh->eq("\x{E2F}", "")); +ok($objTh->eq("\x{E46}", "")); +ok($objTh->eq("\x{E4F}", "")); + +# 5 + +$objTh->change(variable => "non-ignorable"); + +ok($objTh->lt("\x{E2F}", "\x{E46}")); +ok($objTh->lt("\x{E46}", "\x{E4F}")); + +ok($objTh->lt("\x{E2E}", "\x{E4D}")); +ok($objTh->lt("\x{E4D}", "\x{E30}")); + +ok($objTh->lt("\x{E44}", "\x{E3A}")); + +# 10 + +ok($objTh->eq("\x{E4E}", "")); +ok($objTh->eq("\x{E4C}", "")); +ok($objTh->eq("\x{E47}", "")); +ok($objTh->eq("\x{E48}", "")); +ok($objTh->eq("\x{E49}", "")); +ok($objTh->eq("\x{E4A}", "")); + +# 16 + +$objTh->change(level => 2); + +ok($objTh->lt("\x{E4E}", "\x{E4C}")); +ok($objTh->lt("\x{E4C}", "\x{E47}")); +ok($objTh->lt("\x{E47}", "\x{E48}")); +ok($objTh->lt("\x{E48}", "\x{E49}")); +ok($objTh->lt("\x{E49}", "\x{E4A}")); +ok($objTh->lt("\x{E4A}", "\x{E4B}")); + +ok($objTh->eq("\x{E32}", "\x{E45}")); + +# 23 + +$objTh->change(level => 3); + +ok($objTh->lt("\x{E32}", "\x{E45}")); + +ok($objTh->eq("\x{E33}", "\x{E4D}\x{E32}")); + +# 25 diff --git a/cpan/Unicode-Collate/t/loc_uk.t b/cpan/Unicode-Collate/t/loc_uk.t index 13258bb5a4..cda53dfaad 100644 --- a/cpan/Unicode-Collate/t/loc_uk.t +++ b/cpan/Unicode-Collate/t/loc_uk.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 7 }; +BEGIN { plan tests => 208 }; use strict; use warnings; @@ -29,21 +29,197 @@ ok($objUk->getlocale, 'uk'); $objUk->change(level => 1); -ok($objUk->lt("\x{433}", "\x{491}")); -ok($objUk->gt("\x{434}", "\x{491}")); - -# 4 +ok($objUk->gt("\x{491}", "\x{433}")); +ok($objUk->lt("\x{491}", "\x{434}")); +ok($objUk->gt("\x{44C}", "\x{44F}")); +ok($objUk->lt("\x{44C}", "\x{519}")); + +# 6 + +ok($objUk->gt("\x{4E5}", "\x{438}")); # not suppressed +ok($objUk->gt("\x{4E4}", "\x{418}")); # not suppressed +ok($objUk->gt("\x{457}", "\x{456}")); # not suppressed +ok($objUk->gt("\x{407}", "\x{406}")); # not suppressed +ok($objUk->gt("\x{439}", "\x{438}")); # not suppressed +ok($objUk->gt("\x{419}", "\x{418}")); # not suppressed + +# 12 + +ok($objUk->eq("\x{4D1}", "\x{430}")); +ok($objUk->eq("\x{4D0}", "\x{410}")); +ok($objUk->eq("\x{4D3}", "\x{430}")); +ok($objUk->eq("\x{4D2}", "\x{410}")); +ok($objUk->eq("\x{4DB}", "\x{4D9}")); +ok($objUk->eq("\x{4DA}", "\x{4D8}")); +ok($objUk->eq("\x{453}", "\x{433}")); +ok($objUk->eq("\x{403}", "\x{413}")); +ok($objUk->eq("\x{450}", "\x{435}")); # not contraction +ok($objUk->eq("\x{400}", "\x{415}")); # not contraction +ok($objUk->eq("\x{451}", "\x{435}")); # not contraction +ok($objUk->eq("\x{401}", "\x{415}")); # not contraction +ok($objUk->eq("\x{4D7}", "\x{435}")); +ok($objUk->eq("\x{4D6}", "\x{415}")); +ok($objUk->eq("\x{4C2}", "\x{436}")); # not contraction +ok($objUk->eq("\x{4C1}", "\x{416}")); # not contraction +ok($objUk->eq("\x{4DD}", "\x{436}")); +ok($objUk->eq("\x{4DC}", "\x{416}")); +ok($objUk->eq("\x{4DF}", "\x{437}")); +ok($objUk->eq("\x{4DE}", "\x{417}")); +ok($objUk->eq("\x{45D}", "\x{438}")); # not contraction +ok($objUk->eq("\x{40D}", "\x{418}")); # not contraction +ok($objUk->eq("\x{4E3}", "\x{438}")); # not contraction +ok($objUk->eq("\x{4E2}", "\x{418}")); # not contraction +ok($objUk->eq("\x{4E7}", "\x{43E}")); +ok($objUk->eq("\x{4E6}", "\x{41E}")); +ok($objUk->eq("\x{4EB}", "\x{4E9}")); +ok($objUk->eq("\x{4EA}", "\x{4E8}")); +ok($objUk->eq("\x{45C}", "\x{43A}")); +ok($objUk->eq("\x{40C}", "\x{41A}")); +ok($objUk->eq("\x{4EF}", "\x{443}")); # not contraction +ok($objUk->eq("\x{4EE}", "\x{423}")); # not contraction +ok($objUk->eq("\x{45E}", "\x{443}")); +ok($objUk->eq("\x{40E}", "\x{423}")); +ok($objUk->eq("\x{4F1}", "\x{443}")); +ok($objUk->eq("\x{4F0}", "\x{423}")); +ok($objUk->eq("\x{4F3}", "\x{443}")); +ok($objUk->eq("\x{4F2}", "\x{423}")); +ok($objUk->eq("\x{4F5}", "\x{447}")); +ok($objUk->eq("\x{4F4}", "\x{427}")); +ok($objUk->eq("\x{4F9}", "\x{44B}")); +ok($objUk->eq("\x{4F8}", "\x{42B}")); +ok($objUk->eq("\x{4ED}", "\x{44D}")); +ok($objUk->eq("\x{4EC}", "\x{42D}")); +ok($objUk->eq("\x{477}", "\x{475}")); +ok($objUk->eq("\x{476}", "\x{474}")); + +# 58 $objUk->change(level => 2); ok($objUk->eq("\x{491}", "\x{490}")); +ok($objUk->eq("\x{44C}", "\x{42C}")); + +# 60 + +ok($objUk->gt("\x{4D1}", "\x{430}")); +ok($objUk->gt("\x{4D0}", "\x{410}")); +ok($objUk->gt("\x{4D3}", "\x{430}")); +ok($objUk->gt("\x{4D2}", "\x{410}")); +ok($objUk->gt("\x{4DB}", "\x{4D9}")); +ok($objUk->gt("\x{4DA}", "\x{4D8}")); +ok($objUk->gt("\x{453}", "\x{433}")); +ok($objUk->gt("\x{403}", "\x{413}")); +ok($objUk->gt("\x{450}", "\x{435}")); # not contraction +ok($objUk->gt("\x{400}", "\x{415}")); # not contraction +ok($objUk->gt("\x{451}", "\x{435}")); # not contraction +ok($objUk->gt("\x{401}", "\x{415}")); # not contraction +ok($objUk->gt("\x{4D7}", "\x{435}")); +ok($objUk->gt("\x{4D6}", "\x{415}")); +ok($objUk->gt("\x{4C2}", "\x{436}")); # not contraction +ok($objUk->gt("\x{4C1}", "\x{416}")); # not contraction +ok($objUk->gt("\x{4DD}", "\x{436}")); +ok($objUk->gt("\x{4DC}", "\x{416}")); +ok($objUk->gt("\x{4DF}", "\x{437}")); +ok($objUk->gt("\x{4DE}", "\x{417}")); +ok($objUk->gt("\x{45D}", "\x{438}")); # not contraction +ok($objUk->gt("\x{40D}", "\x{418}")); # not contraction +ok($objUk->gt("\x{4E3}", "\x{438}")); # not contraction +ok($objUk->gt("\x{4E2}", "\x{418}")); # not contraction +ok($objUk->gt("\x{4E7}", "\x{43E}")); +ok($objUk->gt("\x{4E6}", "\x{41E}")); +ok($objUk->gt("\x{4EB}", "\x{4E9}")); +ok($objUk->gt("\x{4EA}", "\x{4E8}")); +ok($objUk->gt("\x{45C}", "\x{43A}")); +ok($objUk->gt("\x{40C}", "\x{41A}")); +ok($objUk->gt("\x{4EF}", "\x{443}")); # not contraction +ok($objUk->gt("\x{4EE}", "\x{423}")); # not contraction +ok($objUk->gt("\x{45E}", "\x{443}")); +ok($objUk->gt("\x{40E}", "\x{423}")); +ok($objUk->gt("\x{4F1}", "\x{443}")); +ok($objUk->gt("\x{4F0}", "\x{423}")); +ok($objUk->gt("\x{4F3}", "\x{443}")); +ok($objUk->gt("\x{4F2}", "\x{423}")); +ok($objUk->gt("\x{4F5}", "\x{447}")); +ok($objUk->gt("\x{4F4}", "\x{427}")); +ok($objUk->gt("\x{4F9}", "\x{44B}")); +ok($objUk->gt("\x{4F8}", "\x{42B}")); +ok($objUk->gt("\x{4ED}", "\x{44D}")); +ok($objUk->gt("\x{4EC}", "\x{42D}")); +ok($objUk->gt("\x{477}", "\x{475}")); +ok($objUk->gt("\x{476}", "\x{474}")); + +# 106 $objUk->change(level => 3); ok($objUk->lt("\x{491}", "\x{490}")); +ok($objUk->lt("\x{44C}", "\x{42C}")); + +# 108 + +ok($objUk->eq("\x{4E5}", "\x{438}\x{308}")); # not suppressed +ok($objUk->eq("\x{4E4}", "\x{418}\x{308}")); # not suppressed +ok($objUk->eq("\x{457}", "\x{456}\x{308}")); # not suppressed +ok($objUk->eq("\x{407}", "\x{406}\x{308}")); # not suppressed +ok($objUk->eq("\x{439}", "\x{438}\x{306}")); # not suppressed +ok($objUk->eq("\x{419}", "\x{418}\x{306}")); # not suppressed + +# 114 + +for my $i ("", "\0") { + ok($objUk->eq("\x{4D1}", "\x{430}$i\x{306}")); + ok($objUk->eq("\x{4D0}", "\x{410}$i\x{306}")); + ok($objUk->eq("\x{4D3}", "\x{430}$i\x{308}")); + ok($objUk->eq("\x{4D2}", "\x{410}$i\x{308}")); + ok($objUk->eq("\x{4DB}", "\x{4D9}$i\x{308}")); + ok($objUk->eq("\x{4DA}", "\x{4D8}$i\x{308}")); + ok($objUk->eq("\x{453}", "\x{433}$i\x{301}")); + ok($objUk->eq("\x{403}", "\x{413}$i\x{301}")); + ok($objUk->eq("\x{450}", "\x{435}$i\x{300}")); # not contraction + ok($objUk->eq("\x{400}", "\x{415}$i\x{300}")); # not contraction + ok($objUk->eq("\x{451}", "\x{435}$i\x{308}")); # not contraction + ok($objUk->eq("\x{401}", "\x{415}$i\x{308}")); # not contraction + ok($objUk->eq("\x{4D7}", "\x{435}$i\x{306}")); + ok($objUk->eq("\x{4D6}", "\x{415}$i\x{306}")); + ok($objUk->eq("\x{4C2}", "\x{436}$i\x{306}")); # not contraction + ok($objUk->eq("\x{4C1}", "\x{416}$i\x{306}")); # not contraction + ok($objUk->eq("\x{4DD}", "\x{436}$i\x{308}")); + ok($objUk->eq("\x{4DC}", "\x{416}$i\x{308}")); + ok($objUk->eq("\x{4DF}", "\x{437}$i\x{308}")); + ok($objUk->eq("\x{4DE}", "\x{417}$i\x{308}")); + ok($objUk->eq("\x{45D}", "\x{438}$i\x{300}")); # not contraction + ok($objUk->eq("\x{40D}", "\x{418}$i\x{300}")); # not contraction + ok($objUk->eq("\x{4E3}", "\x{438}$i\x{304}")); # not contraction + ok($objUk->eq("\x{4E2}", "\x{418}$i\x{304}")); # not contraction + ok($objUk->eq("\x{4E7}", "\x{43E}$i\x{308}")); + ok($objUk->eq("\x{4E6}", "\x{41E}$i\x{308}")); + ok($objUk->eq("\x{4EB}", "\x{4E9}$i\x{308}")); + ok($objUk->eq("\x{4EA}", "\x{4E8}$i\x{308}")); + ok($objUk->eq("\x{45C}", "\x{43A}$i\x{301}")); + ok($objUk->eq("\x{40C}", "\x{41A}$i\x{301}")); + ok($objUk->eq("\x{4EF}", "\x{443}$i\x{304}")); # not contraction + ok($objUk->eq("\x{4EE}", "\x{423}$i\x{304}")); # not contraction + ok($objUk->eq("\x{45E}", "\x{443}$i\x{306}")); + ok($objUk->eq("\x{40E}", "\x{423}$i\x{306}")); + ok($objUk->eq("\x{4F1}", "\x{443}$i\x{308}")); + ok($objUk->eq("\x{4F0}", "\x{423}$i\x{308}")); + ok($objUk->eq("\x{4F3}", "\x{443}$i\x{30B}")); + ok($objUk->eq("\x{4F2}", "\x{423}$i\x{30B}")); + ok($objUk->eq("\x{4F5}", "\x{447}$i\x{308}")); + ok($objUk->eq("\x{4F4}", "\x{427}$i\x{308}")); + ok($objUk->eq("\x{4F9}", "\x{44B}$i\x{308}")); + ok($objUk->eq("\x{4F8}", "\x{42B}$i\x{308}")); + ok($objUk->eq("\x{4ED}", "\x{44D}$i\x{308}")); + ok($objUk->eq("\x{4EC}", "\x{42D}$i\x{308}")); + ok($objUk->eq("\x{477}", "\x{475}$i\x{30F}")); + ok($objUk->eq("\x{476}", "\x{474}$i\x{30F}")); +} + +# 206 $objUk->change(upper_before_lower => 1); ok($objUk->gt("\x{491}", "\x{490}")); +ok($objUk->gt("\x{44C}", "\x{42C}")); -# 7 +# 208 diff --git a/cpan/Unicode-Collate/t/loc_ur.t b/cpan/Unicode-Collate/t/loc_ur.t new file mode 100644 index 0000000000..4e173fec01 --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_ur.t @@ -0,0 +1,136 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 91 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objUr = Unicode::Collate::Locale-> + new(locale => 'UR', normalization => undef); + +ok($objUr->getlocale, 'ur'); + +$objUr->change(level => 1); + +ok($objUr->lt("\x{627}", "\x{622}")); +ok($objUr->lt("\x{622}", "\x{628}")); +ok($objUr->lt("\x{628}", "\x{628}\x{6BE}")); +ok($objUr->lt("\x{628}\x{6BE}", "\x{67E}")); +ok($objUr->lt("\x{67E}", "\x{67E}\x{6BE}")); +ok($objUr->lt("\x{67E}\x{6BE}", "\x{62A}")); +ok($objUr->lt("\x{62A}", "\x{62A}\x{6BE}")); +ok($objUr->lt("\x{62A}\x{6BE}", "\x{679}")); +ok($objUr->lt("\x{679}", "\x{679}\x{6BE}")); +ok($objUr->lt("\x{679}\x{6BE}", "\x{62B}")); +ok($objUr->lt("\x{62B}", "\x{62C}")); +ok($objUr->lt("\x{62C}", "\x{62C}\x{6BE}")); +ok($objUr->lt("\x{62C}\x{6BE}", "\x{686}")); +ok($objUr->lt("\x{686}", "\x{686}\x{6BE}")); +ok($objUr->lt("\x{686}\x{6BE}", "\x{62D}")); +ok($objUr->lt("\x{62D}", "\x{62E}")); +ok($objUr->lt("\x{62E}", "\x{62F}")); +ok($objUr->lt("\x{62F}", "\x{62F}\x{6BE}")); +ok($objUr->lt("\x{62F}\x{6BE}", "\x{688}")); +ok($objUr->lt("\x{688}", "\x{688}\x{6BE}")); +ok($objUr->lt("\x{688}\x{6BE}", "\x{630}")); +ok($objUr->lt("\x{630}", "\x{631}")); +ok($objUr->lt("\x{631}", "\x{631}\x{6BE}")); +ok($objUr->lt("\x{631}\x{6BE}", "\x{691}")); +ok($objUr->lt("\x{691}", "\x{691}\x{6BE}")); +ok($objUr->lt("\x{691}\x{6BE}", "\x{632}")); +ok($objUr->lt("\x{632}", "\x{698}")); +ok($objUr->lt("\x{698}", "\x{633}")); +ok($objUr->lt("\x{633}", "\x{634}")); +ok($objUr->lt("\x{634}", "\x{635}")); +ok($objUr->lt("\x{635}", "\x{636}")); +ok($objUr->lt("\x{636}", "\x{637}")); +ok($objUr->lt("\x{637}", "\x{638}")); +ok($objUr->lt("\x{638}", "\x{639}")); +ok($objUr->lt("\x{639}", "\x{63A}")); +ok($objUr->lt("\x{63A}", "\x{641}")); +ok($objUr->lt("\x{641}", "\x{642}")); +ok($objUr->lt("\x{642}", "\x{6A9}")); +ok($objUr->lt("\x{6A9}", "\x{6A9}\x{6BE}")); +ok($objUr->lt("\x{6A9}\x{6BE}", "\x{6AF}")); +ok($objUr->lt("\x{6AF}", "\x{6AF}\x{6BE}")); +ok($objUr->lt("\x{6AF}\x{6BE}", "\x{644}")); +ok($objUr->lt("\x{644}", "\x{644}\x{6BE}")); +ok($objUr->lt("\x{644}\x{6BE}", "\x{645}")); +ok($objUr->lt("\x{645}", "\x{645}\x{6BE}")); +ok($objUr->lt("\x{645}\x{6BE}", "\x{646}")); +ok($objUr->lt("\x{646}", "\x{646}\x{6BE}")); +ok($objUr->lt("\x{646}\x{6BE}", "\x{6BA}")); +ok($objUr->lt("\x{6BA}", "\x{6BA}\x{6BE}")); +ok($objUr->lt("\x{6BA}\x{6BE}", "\x{648}")); +ok($objUr->lt("\x{648}", "\x{648}\x{6BE}")); +ok($objUr->lt("\x{648}\x{6BE}", "\x{6C1}")); +ok($objUr->lt("\x{6C1}", "\x{6BE}")); +ok($objUr->lt("\x{6BE}", "\x{6C3}")); +ok($objUr->lt("\x{6C3}", "\x{621}")); +ok($objUr->lt("\x{621}", "\x{6CC}")); +ok($objUr->lt("\x{6CC}", "\x{6CC}\x{6BE}")); +ok($objUr->lt("\x{6CC}\x{6BE}", "\x{6D2}")); +ok($objUr->lt("\x{6D2}", "\x{67B}")); + +# 61 + +ok($objUr->eq("\x{627}", "\x{623}")); +ok($objUr->eq("\x{648}", "\x{624}")); +ok($objUr->eq("\x{6C1}", "\x{6C2}")); +ok($objUr->eq("\x{6CC}", "\x{626}")); +ok($objUr->eq("\x{6D2}", "\x{6D3}")); + +# 66 + +$objUr->change(level => 2); + +ok($objUr->lt("\x{627}", "\x{623}")); +ok($objUr->lt("\x{648}", "\x{624}")); +ok($objUr->lt("\x{6C1}", "\x{6C2}")); +ok($objUr->lt("\x{6CC}", "\x{626}")); +ok($objUr->lt("\x{6D2}", "\x{6D3}")); + +# 71 + +ok($objUr->lt("\x{652}", "\x{64E}")); +ok($objUr->lt("\x{64E}", "\x{650}")); +ok($objUr->lt("\x{650}", "\x{64F}")); +ok($objUr->lt("\x{64F}", "\x{670}")); +ok($objUr->lt("\x{670}", "\x{656}")); +ok($objUr->lt("\x{656}", "\x{657}")); +ok($objUr->lt("\x{657}", "\x{64B}")); +ok($objUr->lt("\x{64B}", "\x{64D}")); +ok($objUr->lt("\x{64D}", "\x{64C}")); +ok($objUr->lt("\x{64C}", "\x{654}")); +ok($objUr->lt("\x{654}", "\x{651}")); +ok($objUr->lt("\x{651}", "\x{658}")); +ok($objUr->lt("\x{658}", "\x{653}")); +ok($objUr->lt("\x{653}", "\x{655}")); + +# 85 + +ok($objUr->eq("\x{623}", "\x{627}\x{654}")); +ok($objUr->eq("\x{622}", "\x{627}\x{653}")); +ok($objUr->eq("\x{624}", "\x{648}\x{654}")); +ok($objUr->eq("\x{6C2}", "\x{6C1}\x{654}")); +ok($objUr->eq("\x{626}", "\x{64A}\x{654}")); +ok($objUr->eq("\x{6D3}", "\x{6D2}\x{654}")); + +# 91 diff --git a/cpan/Unicode-Collate/t/loc_wae.t b/cpan/Unicode-Collate/t/loc_wae.t new file mode 100644 index 0000000000..aaaa6eee37 --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_wae.t @@ -0,0 +1,138 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 78 }; + +use strict; +use warnings; +use Unicode::Collate::Locale; + +ok(1); + +######################### + +my $objWae = Unicode::Collate::Locale-> + new(locale => 'WAE', normalization => undef); + +ok($objWae->getlocale, 'wae'); + +$objWae->change(level => 1); + +ok($objWae->lt("aa", "a9")); +ok($objWae->gt("aA", "a9")); +ok($objWae->gt("Aa", "a9")); +ok($objWae->gt("AA", "a9")); +ok($objWae->lt("ee", "e9")); +ok($objWae->gt("eE", "e9")); +ok($objWae->gt("Ee", "e9")); +ok($objWae->gt("EE", "e9")); +ok($objWae->lt("ii", "i9")); +ok($objWae->gt("iI", "i9")); +ok($objWae->gt("Ii", "i9")); +ok($objWae->gt("II", "i9")); +ok($objWae->lt("oo", "o9")); +ok($objWae->gt("oO", "o9")); +ok($objWae->gt("Oo", "o9")); +ok($objWae->gt("OO", "o9")); +ok($objWae->lt("uu", "u9")); +ok($objWae->gt("uU", "u9")); +ok($objWae->gt("Uu", "u9")); +ok($objWae->gt("UU", "u9")); + +# 22 + +ok($objWae->lt("ch", "c9")); +ok($objWae->gt("cH", "c9")); +ok($objWae->gt("Ch", "c9")); +ok($objWae->gt("CH", "c9")); + +ok($objWae->lt("sch", "s9")); +ok($objWae->gt("scH", "s9")); +ok($objWae->gt("sCh", "s9")); +ok($objWae->gt("sCH", "s9")); +ok($objWae->gt("Sch", "s9")); +ok($objWae->gt("ScH", "s9")); +ok($objWae->gt("SCh", "s9")); +ok($objWae->gt("SCH", "s9")); + +# 34 + +ok($objWae->lt("a\x{308}a\x{308}", "a9")); +ok($objWae->gt("a\x{308}A\x{308}", "a9")); +ok($objWae->gt("A\x{308}a\x{308}", "a9")); +ok($objWae->gt("A\x{308}A\x{308}", "a9")); +ok($objWae->lt("o\x{308}o\x{308}", "o9")); +ok($objWae->gt("o\x{308}O\x{308}", "o9")); +ok($objWae->gt("O\x{308}o\x{308}", "o9")); +ok($objWae->gt("O\x{308}O\x{308}", "o9")); +ok($objWae->lt("u\x{308}u\x{308}", "u9")); +ok($objWae->gt("u\x{308}U\x{308}", "u9")); +ok($objWae->gt("U\x{308}u\x{308}", "u9")); +ok($objWae->gt("U\x{308}U\x{308}", "u9")); + +# 46 + +$objWae->change(level => 3); + +ok($objWae->eq("a\x{301}", pack('U', 0xE1))); +ok($objWae->eq("e\x{301}", pack('U', 0xE9))); +ok($objWae->eq("i\x{301}", pack('U', 0xED))); +ok($objWae->eq("o\x{301}", pack('U', 0xF3))); +ok($objWae->eq("u\x{301}", pack('U', 0xFA))); +ok($objWae->eq("a\x{301}", "aa")); +ok($objWae->eq("e\x{301}", "ee")); +ok($objWae->eq("i\x{301}", "ii")); +ok($objWae->eq("o\x{301}", "oo")); +ok($objWae->eq("u\x{301}", "uu")); + +# 56 + +ok($objWae->eq("c\x{30C}", "\x{10D}")); +ok($objWae->eq("s\x{30C}", "\x{161}")); +ok($objWae->eq("c\x{30C}", "ch")); +ok($objWae->eq("s\x{30C}", "sch")); + +# 60 + +my $a1 = pack('U', 0xE4); +my $o1 = pack('U', 0xF6); +my $u1 = pack('U', 0xFC); +my $a2 = "a\x{308}"; +my $o2 = "o\x{308}"; +my $u2 = "u\x{308}"; + +ok($objWae->eq($a1, $a2)); +ok($objWae->eq($o1, $o2)); +ok($objWae->eq($u1, $u2)); + +ok($objWae->eq("a\x{303}", pack('U', 0xE3))); +ok($objWae->eq("o\x{303}", pack('U', 0xF5))); +ok($objWae->eq("u\x{303}", "\x{169}")); + +# 66 + +ok($objWae->eq("a\x{303}", $a1.$a1)); +ok($objWae->eq("a\x{303}", $a1.$a2)); +ok($objWae->eq("a\x{303}", $a2.$a1)); +ok($objWae->eq("a\x{303}", $a2.$a2)); +ok($objWae->eq("o\x{303}", $o1.$o1)); +ok($objWae->eq("o\x{303}", $o1.$o2)); +ok($objWae->eq("o\x{303}", $o2.$o1)); +ok($objWae->eq("o\x{303}", $o2.$o2)); +ok($objWae->eq("u\x{303}", $u1.$u1)); +ok($objWae->eq("u\x{303}", $u1.$u2)); +ok($objWae->eq("u\x{303}", $u2.$u1)); +ok($objWae->eq("u\x{303}", $u2.$u2)); + +# 78 diff --git a/dist/B-Deparse/Deparse.pm b/dist/B-Deparse/Deparse.pm index aa8bcc47f2..cfdfc5384b 100644 --- a/dist/B-Deparse/Deparse.pm +++ b/dist/B-Deparse/Deparse.pm @@ -19,32 +19,21 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR SVf_FAKE SVs_RMG SVs_SMG CVf_METHOD CVf_LVALUE PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE - PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED), - ($] < 5.008004 ? () : 'OPpSORT_INPLACE'), - ($] < 5.008006 ? () : qw(OPpSORT_DESCEND OPpITER_REVERSED)), - ($] < 5.008009 ? () : qw(OPpCONST_NOVER OPpPAD_STATE)), - ($] < 5.009 ? 'PMf_SKIPWHITE' : qw(RXf_SKIPWHITE)), - ($] < 5.011 ? 'CVf_LOCKED' : 'OPpREVERSE_INPLACE'), - ($] < 5.013 ? () : 'PMf_NONDESTRUCT'), - ($] < 5.015003 && - # This empirical feature test is required during the - # transitional phase where blead still identifies itself - # as 5.15.2 but has had $[ removed. After blead has its - # version number bumped to 5.15.3, this can be reduced to - # just test $] < 5.015003. - ($] < 5.015002 || do { require B; exists(&B::OPpCONST_ARYBASE) }) - ? qw(OPpCONST_ARYBASE) : ()); -$VERSION = "1.09"; + PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED); +$VERSION = "1.10"; use strict; use vars qw/$AUTOLOAD/; use warnings (); BEGIN { + # List version-specific constants here. # Easiest way to keep this code portable between version looks to # be to fake up a dummy constant that will never actually be true. foreach (qw(OPpSORT_INPLACE OPpSORT_DESCEND OPpITER_REVERSED OPpCONST_NOVER - OPpPAD_STATE RXf_SKIPWHITE CVf_LOCKED OPpREVERSE_INPLACE - PMf_NONDESTRUCT OPpCONST_ARYBASE)) { + OPpPAD_STATE PMf_SKIPWHITE RXf_SKIPWHITE + CVf_LOCKED OPpREVERSE_INPLACE + PMf_NONDESTRUCT OPpCONST_ARYBASE OPpEVAL_BYTES)) { + eval { import B $_ }; no strict 'refs'; *{$_} = sub () {0} unless *{$_}{CODE}; } @@ -1557,6 +1546,7 @@ my %feature_keywords = ( when => 'switch', default => 'switch', break => 'switch', + evalbytes=>'evalbytes', ); sub keyword { @@ -1564,11 +1554,9 @@ sub keyword { my $name = shift; return $name if $name =~ /^CORE::/; # just in case if (exists $feature_keywords{$name}) { - return - $self->{'hinthash'} - && $self->{'hinthash'}{"feature_$feature_keywords{$name}"} - ? $name - : "CORE::$name"; + return "CORE::$name" + if !$self->{'hinthash'} + || !$self->{'hinthash'}{"feature_$feature_keywords{$name}"} } if ( $name !~ /^(?:chom?p|do|exec|glob|s(?:elect|ystem))\z/ @@ -1766,7 +1754,12 @@ sub pp_alarm { unop(@_, "alarm") } sub pp_sleep { maybe_targmy(@_, \&unop, "sleep") } sub pp_dofile { unop(@_, "do") } -sub pp_entereval { unop(@_, "eval") } +sub pp_entereval { + unop( + @_, + $_[1]->private & OPpEVAL_BYTES ? $_[0]->keyword('evalbytes') : "eval" + ) +} sub pp_ghbyname { unop(@_, "gethostbyname") } sub pp_gnbyname { unop(@_, "getnetbyname") } @@ -2510,7 +2503,7 @@ sub indirop { my $self = shift; my($op, $cx, $name) = @_; my($expr, @exprs); - my $kid = $op->first->sibling; + my $firstkid = my $kid = $op->first->sibling; my $indir = ""; if ($op->flags & OPf_STACKED) { $indir = $kid; @@ -2534,7 +2527,7 @@ sub indirop { $indir = '{$b cmp $a} '; } for (; !null($kid); $kid = $kid->sibling) { - $expr = $self->deparse($kid, 6); + $expr = $self->deparse($kid, !$indir && $kid == $firstkid && $name eq "sort" && $firstkid->name eq "entersub" ? 16 : 6); push @exprs, $expr; } my $name2; @@ -2547,7 +2540,7 @@ sub indirop { } my $args = $indir . join(", ", @exprs); - if ($indir ne "" and $name eq "sort") { + if ($indir ne "" && $name eq "sort") { # We don't want to say "sort(f 1, 2, 3)", since perl -w will # give bareword warnings in that case. Therefore if context # requires, we'll put parens around the outside "(sort f 1, 2, @@ -2559,6 +2552,13 @@ sub indirop { } else { return "$name2 $args"; } + } elsif ( + !$indir && $name eq "sort" + && $op->first->sibling->name eq 'entersub' + ) { + # We cannot say sort foo(bar), as foo will be interpreted as a + # comparison routine. We have to say sort(...) in that case. + return "$name2($args)"; } else { return $self->maybe_parens_func($name2, $args, $cx, 5); } @@ -2600,6 +2600,7 @@ sub pp_list { my($op, $cx) = @_; my($expr, @exprs); my $kid = $op->first->sibling; # skip pushmark + return '' if class($kid) eq 'NULL'; my $lop; my $local = "either"; # could be local(...), my(...), state(...) or our(...) for ($lop = $kid; !null($lop); $lop = $lop->sibling) { diff --git a/dist/B-Deparse/t/core.t b/dist/B-Deparse/t/core.t index 2d5aa32919..81d9038b8a 100644 --- a/dist/B-Deparse/t/core.t +++ b/dist/B-Deparse/t/core.t @@ -10,6 +10,8 @@ BEGIN { use strict; use Test::More; +use feature (sprintf(":%vd", $^V)); # to avoid relying on the feature + # logic to add CORE:: # Many functions appear in multiple lists, so that shift() and shift(foo) # are both tested. @@ -18,7 +20,8 @@ my @nary = ( # nullary functions [qw( abs alarm break chr cos chop close chdir chomp chmod chown chroot caller continue die dump exp exit exec endgrent - endpwent endnetent endhostent endservent endprotoent fork glob + endpwent endnetent endhostent endservent + endprotoent evalbytes fork glob getppid getpwent getprotoent gethostent getnetent getservent getgrent getlogin getc gmtime hex int lc log lstat length lcfirst localtime mkdir ord oct pop quotemeta ref rand @@ -28,7 +31,7 @@ my @nary = ( # unary [qw( abs alarm bless binmode chr cos chop close chdir chomp chmod chown chroot closedir die do dump exp exit exec - each fileno getpgrp getpwnam getpwuid getpeername + each evalbytes fileno getpgrp getpwnam getpwuid getpeername getprotobyname getprotobynumber gethostbyname getnetbyname getsockname getgrnam getgrgid getc glob gmtime hex int join keys kill lc diff --git a/dist/B-Deparse/t/deparse.t b/dist/B-Deparse/t/deparse.t index e527b99a33..503f46ff91 100644 --- a/dist/B-Deparse/t/deparse.t +++ b/dist/B-Deparse/t/deparse.t @@ -765,6 +765,7 @@ CORE::given ($x) { CORE::break; } } +CORE::evalbytes ''; #### # $#- $#+ $#{%} etc. my @x; @@ -776,3 +777,14 @@ my @x; # ${#} interpolated (the first line magically disables the warning) () = *#; () = "${#}a"; +#### +# ()[...] +my(@a) = ()[()]; +#### +# sort(foo(bar)) +# sort(foo(bar)) is interpreted as sort &foo(bar) +# sort foo(bar) is interpreted as sort foo bar +# parentheses are not optional in this case +print sort(foo('bar')); +>>>> +print sort(foo('bar')); diff --git a/dist/ExtUtils-Manifest/t/Manifest.t b/dist/ExtUtils-Manifest/t/Manifest.t index 8d2ff8b91e..96c5b50412 100644 --- a/dist/ExtUtils-Manifest/t/Manifest.t +++ b/dist/ExtUtils-Manifest/t/Manifest.t @@ -262,6 +262,7 @@ is( $files->{foobar}, '', ' preserved old entries' ); pass "normalization success with i=$i"; } else { require Data::Dumper; + no warnings "once"; local $Data::Dumper::Useqq = 1; local $Data::Dumper::Terse = 1; is Data::Dumper::Dumper($maniaddresult), Data::Dumper::Dumper($prev_maniaddresult), "eol normalization failed with i=$i"; diff --git a/dist/ExtUtils-ParseXS/t/XSMore.xs b/dist/ExtUtils-ParseXS/t/XSMore.xs index 2a0a87a7ec..322fc1de98 100644 --- a/dist/ExtUtils-ParseXS/t/XSMore.xs +++ b/dist/ExtUtils-ParseXS/t/XSMore.xs @@ -120,6 +120,8 @@ typemaptest3(foo, bar, baz) MyType5 bar MyType5 baz CODE: + PERL_UNUSED_VAR(bar); + PERL_UNUSED_VAR(baz); RETVAL = foo; OUTPUT: RETVAL diff --git a/dist/Locale-Maketext/ChangeLog b/dist/Locale-Maketext/ChangeLog index 3a28d0934b..ed16312062 100644 --- a/dist/Locale-Maketext/ChangeLog +++ b/dist/Locale-Maketext/ChangeLog @@ -1,5 +1,15 @@ Revision history for Perl suite Locale::Maketext +2011-05-25 + * Update to 1.19 from upstream blead + + [perl #89896] Locale::Maketext test failure + when environment has variable containing unbalanced brackets + + Suppress "Name used only once" warnings. + + [perl #81888] Fix typos (spelling errors) in dist/* + 2010-10-20 * Release 1.17 diff --git a/dist/Locale-Maketext/lib/Locale/Maketext.pm b/dist/Locale-Maketext/lib/Locale/Maketext.pm index af5d21a78a..9e4c497d2d 100644 --- a/dist/Locale-Maketext/lib/Locale/Maketext.pm +++ b/dist/Locale-Maketext/lib/Locale/Maketext.pm @@ -27,7 +27,7 @@ BEGIN { } -$VERSION = '1.19'; +$VERSION = '1.20'; @ISA = (); $MATCH_SUPERS = 1; diff --git a/dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm b/dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm index 75c993caee..9e78c7e0b5 100644 --- a/dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm +++ b/dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm @@ -2,7 +2,7 @@ package Locale::Maketext::Guts; use Locale::Maketext; -our $VERSION = '1.17'; +our $VERSION = '1.20'; =head1 NAME diff --git a/dist/Locale-Maketext/lib/Locale/Maketext/GutsLoader.pm b/dist/Locale-Maketext/lib/Locale/Maketext/GutsLoader.pm index 858fcf7663..35a71ab509 100644 --- a/dist/Locale-Maketext/lib/Locale/Maketext/GutsLoader.pm +++ b/dist/Locale-Maketext/lib/Locale/Maketext/GutsLoader.pm @@ -2,7 +2,7 @@ package Locale::Maketext::GutsLoader; use Locale::Maketext; -our $VERSION = '1.17'; +our $VERSION = '1.20'; sub zorp { return scalar @_ } diff --git a/dist/Module-CoreList/Changes b/dist/Module-CoreList/Changes index 9d6e5354fd..060111bd78 100644 --- a/dist/Module-CoreList/Changes +++ b/dist/Module-CoreList/Changes @@ -1,4 +1,7 @@ -2.56 Thu Oct 20 2011 +2.58 Sun Nov 20 2011 + - Updated for v5.15.5 + +2.57 Thu Oct 20 2011 - Updated for v5.15.4 2.56 Tues Sept 20 2011 diff --git a/dist/Module-CoreList/lib/Module/CoreList.pm b/dist/Module-CoreList/lib/Module/CoreList.pm index 95c896dbd2..fef69805f0 100644 --- a/dist/Module-CoreList/lib/Module/CoreList.pm +++ b/dist/Module-CoreList/lib/Module/CoreList.pm @@ -2,7 +2,7 @@ package Module::CoreList; use strict; use vars qw/$VERSION %released %version %families %upstream %bug_tracker %deprecated/; -$VERSION = '2.57'; +$VERSION = '2.58'; =head1 NAME @@ -172,7 +172,7 @@ Module::CoreList currently covers the 5.000, 5.001, 5.002, 5.003_07, 5.11.2, 5.11.3, 5.11.4, 5.11.5, 5.12.0, 5.12.1, 5.12.2, 5.12.3, 5.12.4, 5.13.0, 5.13.1, 5.13.2, 5.13.3, 5.13.4, 5.13.5, 5.13.6, 5.13.7, 5.13.8, 5.13.9, 5.13.10, 5.13.11, 5.14.0, 5.14.1, 5.14.2, 5.15.0, 5.15.1, 5.15.2, -5.15.3 and 5.15.4 releases of perl. +5.15.3, 5.15.4 and 5.15.5 releases of perl. =head1 HISTORY @@ -357,6 +357,7 @@ sub removed_raw { 5.014002 => '2011-09-26', 5.015003 => '2011-09-20', 5.015004 => '2011-10-20', + 5.015005 => '2011-11-20', ); for my $version ( sort { $a <=> $b } keys %released ) { @@ -29528,6 +29529,674 @@ for my $version ( sort { $a <=> $b } keys %released ) { 'warnings' => '1.12', 'warnings::register' => '1.02', }, + 5.015005 => { + 'AnyDBM_File' => '1.01', + 'App::Cpan' => '1.5701', + 'App::Prove' => '3.23', + 'App::Prove::State' => '3.23', + 'App::Prove::State::Result'=> '3.23', + 'App::Prove::State::Result::Test'=> '3.23', + 'Archive::Extract' => '0.58', + 'Archive::Tar' => '1.80', + 'Archive::Tar::Constant'=> '1.80', + 'Archive::Tar::File' => '1.80', + 'Attribute::Handlers' => '0.93', + 'AutoLoader' => '5.71', + 'AutoSplit' => '1.06', + 'B' => '1.32', + 'B::Concise' => '0.87', + 'B::Debug' => '1.16', + 'B::Deparse' => '1.09', + 'B::Lint' => '1.13', + 'B::Lint::Debug' => '1.12', + 'B::Showlex' => '1.03', + 'B::Terse' => '1.06', + 'B::Xref' => '1.03', + 'Benchmark' => '1.13', + 'CGI' => '3.58', + 'CGI::Apache' => '1.01', + 'CGI::Carp' => '3.51', + 'CGI::Cookie' => '1.30', + 'CGI::Fast' => '1.09', + 'CGI::Pretty' => '3.46', + 'CGI::Push' => '1.05', + 'CGI::Switch' => '1.01', + 'CGI::Util' => '3.53', + 'CPAN' => '1.9800', + 'CPAN::Author' => '5.5001', + 'CPAN::Bundle' => '5.5', + 'CPAN::CacheMgr' => '5.5001', + 'CPAN::Complete' => '5.5', + 'CPAN::Debug' => '5.5001', + 'CPAN::DeferredCode' => '5.50', + 'CPAN::Distribution' => '1.9602', + 'CPAN::Distroprefs' => '6', + 'CPAN::Distrostatus' => '5.5', + 'CPAN::Exception::RecursiveDependency'=> '5.5', + 'CPAN::Exception::blocked_urllist'=> '1.001', + 'CPAN::Exception::yaml_not_installed'=> '5.5', + 'CPAN::Exception::yaml_process_error'=> '5.5', + 'CPAN::FTP' => '5.5005', + 'CPAN::FTP::netrc' => '1.01', + 'CPAN::FirstTime' => '5.5303', + 'CPAN::HTTP::Client' => '1.9600', + 'CPAN::HTTP::Credentials'=> '1.9600', + 'CPAN::HandleConfig' => '5.5003', + 'CPAN::Index' => '1.9600', + 'CPAN::InfoObj' => '5.5', + 'CPAN::Kwalify' => '5.50', + 'CPAN::LWP::UserAgent' => '1.9600', + 'CPAN::Meta' => '2.112621', + 'CPAN::Meta::Converter' => '2.112621', + 'CPAN::Meta::Feature' => '2.112621', + 'CPAN::Meta::History' => '2.112621', + 'CPAN::Meta::Prereqs' => '2.112621', + 'CPAN::Meta::Spec' => '2.112621', + 'CPAN::Meta::Validator' => '2.112621', + 'CPAN::Meta::YAML' => '0.004', + 'CPAN::Mirrors' => '1.9600', + 'CPAN::Module' => '5.5001', + 'CPAN::Nox' => '5.50', + 'CPAN::Prompt' => '5.5', + 'CPAN::Queue' => '5.5001', + 'CPAN::Shell' => '5.5002', + 'CPAN::Tarzip' => '5.5011', + 'CPAN::URL' => '5.5', + 'CPAN::Version' => '5.5001', + 'CPANPLUS' => '0.9112', + 'CPANPLUS::Backend' => undef, + 'CPANPLUS::Backend::RV' => undef, + 'CPANPLUS::Config' => undef, + 'CPANPLUS::Configure' => undef, + 'CPANPLUS::Configure::Setup'=> undef, + 'CPANPLUS::Dist' => undef, + 'CPANPLUS::Dist::Autobundle'=> undef, + 'CPANPLUS::Dist::Base' => undef, + 'CPANPLUS::Dist::Build' => '0.60', + 'CPANPLUS::Dist::Build::Constants'=> '0.60', + 'CPANPLUS::Dist::MM' => undef, + 'CPANPLUS::Dist::Sample'=> undef, + 'CPANPLUS::Error' => undef, + 'CPANPLUS::Internals' => '0.9112', + 'CPANPLUS::Internals::Constants'=> undef, + 'CPANPLUS::Internals::Constants::Report'=> undef, + 'CPANPLUS::Internals::Extract'=> undef, + 'CPANPLUS::Internals::Fetch'=> undef, + 'CPANPLUS::Internals::Report'=> undef, + 'CPANPLUS::Internals::Search'=> undef, + 'CPANPLUS::Internals::Source'=> undef, + 'CPANPLUS::Internals::Source::Memory'=> undef, + 'CPANPLUS::Internals::Source::SQLite'=> undef, + 'CPANPLUS::Internals::Source::SQLite::Tie'=> undef, + 'CPANPLUS::Internals::Utils'=> undef, + 'CPANPLUS::Internals::Utils::Autoflush'=> undef, + 'CPANPLUS::Module' => undef, + 'CPANPLUS::Module::Author'=> undef, + 'CPANPLUS::Module::Author::Fake'=> undef, + 'CPANPLUS::Module::Checksums'=> undef, + 'CPANPLUS::Module::Fake'=> undef, + 'CPANPLUS::Module::Signature'=> undef, + 'CPANPLUS::Selfupdate' => undef, + 'CPANPLUS::Shell' => undef, + 'CPANPLUS::Shell::Classic'=> '0.0562', + 'CPANPLUS::Shell::Default'=> '0.9112', + 'CPANPLUS::Shell::Default::Plugins::CustomSource'=> undef, + 'CPANPLUS::Shell::Default::Plugins::Remote'=> undef, + 'CPANPLUS::Shell::Default::Plugins::Source'=> undef, + 'Carp' => '1.23', + 'Carp::Heavy' => '1.23', + 'Class::Struct' => '0.63', + 'Compress::Raw::Bzip2' => '2.042', + 'Compress::Raw::Zlib' => '2.042', + 'Compress::Zlib' => '2.042', + 'Config' => undef, + 'Config::Extensions' => '0.01', + 'Cwd' => '3.37', + 'DB' => '1.03', + 'DBM_Filter' => '0.04', + 'DBM_Filter::compress' => '0.02', + 'DBM_Filter::encode' => '0.02', + 'DBM_Filter::int32' => '0.02', + 'DBM_Filter::null' => '0.02', + 'DBM_Filter::utf8' => '0.02', + 'DB_File' => '1.824', + 'Data::Dumper' => '2.134', + 'Devel::InnerPackage' => '0.3', + 'Devel::PPPort' => '3.20', + 'Devel::Peek' => '1.08', + 'Devel::SelfStubber' => '1.05', + 'Digest' => '1.17', + 'Digest::MD5' => '2.51', + 'Digest::SHA' => '5.63', + 'Digest::base' => '1.16', + 'Digest::file' => '1.16', + 'DirHandle' => '1.04', + 'Dumpvalue' => '1.16', + 'DynaLoader' => '1.14', + 'Encode' => '2.44', + 'Encode::Alias' => '2.15', + 'Encode::Byte' => '2.04', + 'Encode::CJKConstants' => '2.02', + 'Encode::CN' => '2.03', + 'Encode::CN::HZ' => '2.05', + 'Encode::Config' => '2.05', + 'Encode::EBCDIC' => '2.02', + 'Encode::Encoder' => '2.02', + 'Encode::Encoding' => '2.05', + 'Encode::GSM0338' => '2.01', + 'Encode::Guess' => '2.05', + 'Encode::JP' => '2.04', + 'Encode::JP::H2Z' => '2.02', + 'Encode::JP::JIS7' => '2.04', + 'Encode::KR' => '2.03', + 'Encode::KR::2022_KR' => '2.02', + 'Encode::MIME::Header' => '2.13', + 'Encode::MIME::Header::ISO_2022_JP'=> '1.03', + 'Encode::MIME::Name' => '1.01', + 'Encode::Symbol' => '2.02', + 'Encode::TW' => '2.03', + 'Encode::Unicode' => '2.07', + 'Encode::Unicode::UTF7' => '2.05', + 'English' => '1.04', + 'Env' => '1.03', + 'Errno' => '1.15', + 'Exporter' => '5.65', + 'Exporter::Heavy' => '5.65', + 'ExtUtils::CBuilder' => '0.280204', + 'ExtUtils::CBuilder::Base'=> '0.280204', + 'ExtUtils::CBuilder::Platform::Unix'=> '0.280203', + 'ExtUtils::CBuilder::Platform::VMS'=> '0.280203', + 'ExtUtils::CBuilder::Platform::Windows'=> '0.280203', + 'ExtUtils::CBuilder::Platform::Windows::BCC'=> '0.280203', + 'ExtUtils::CBuilder::Platform::Windows::GCC'=> '0.280203', + 'ExtUtils::CBuilder::Platform::Windows::MSVC'=> '0.280203', + 'ExtUtils::CBuilder::Platform::aix'=> '0.280203', + 'ExtUtils::CBuilder::Platform::cygwin'=> '0.280203', + 'ExtUtils::CBuilder::Platform::darwin'=> '0.280203', + 'ExtUtils::CBuilder::Platform::dec_osf'=> '0.280203', + 'ExtUtils::CBuilder::Platform::os2'=> '0.280203', + 'ExtUtils::Command' => '1.17', + 'ExtUtils::Command::MM' => '6.63_02', + 'ExtUtils::Constant' => '0.23', + 'ExtUtils::Constant::Base'=> '0.05', + 'ExtUtils::Constant::ProxySubs'=> '0.08', + 'ExtUtils::Constant::Utils'=> '0.03', + 'ExtUtils::Constant::XS'=> '0.03', + 'ExtUtils::Embed' => '1.30', + 'ExtUtils::Install' => '1.57', + 'ExtUtils::Installed' => '1.999002', + 'ExtUtils::Liblist' => '6.63_02', + 'ExtUtils::Liblist::Kid'=> '6.63_02', + 'ExtUtils::MM' => '6.63_02', + 'ExtUtils::MM_AIX' => '6.63_02', + 'ExtUtils::MM_Any' => '6.63_02', + 'ExtUtils::MM_BeOS' => '6.63_02', + 'ExtUtils::MM_Cygwin' => '6.63_02', + 'ExtUtils::MM_DOS' => '6.63_02', + 'ExtUtils::MM_Darwin' => '6.63_02', + 'ExtUtils::MM_MacOS' => '6.63_02', + 'ExtUtils::MM_NW5' => '6.63_02', + 'ExtUtils::MM_OS2' => '6.63_02', + 'ExtUtils::MM_QNX' => '6.63_02', + 'ExtUtils::MM_UWIN' => '6.63_02', + 'ExtUtils::MM_Unix' => '6.63_02', + 'ExtUtils::MM_VMS' => '6.63_02', + 'ExtUtils::MM_VOS' => '6.63_02', + 'ExtUtils::MM_Win32' => '6.63_02', + 'ExtUtils::MM_Win95' => '6.63_02', + 'ExtUtils::MY' => '6.63_02', + 'ExtUtils::MakeMaker' => '6.63_02', + 'ExtUtils::MakeMaker::Config'=> '6.63_02', + 'ExtUtils::Manifest' => '1.60', + 'ExtUtils::Miniperl' => undef, + 'ExtUtils::Mkbootstrap' => '6.63_02', + 'ExtUtils::Mksymlists' => '6.63_02', + 'ExtUtils::Packlist' => '1.44', + 'ExtUtils::ParseXS' => '3.05', + 'ExtUtils::ParseXS::Constants'=> '3.05', + 'ExtUtils::ParseXS::CountLines'=> '3.05', + 'ExtUtils::ParseXS::Utilities'=> '3.05', + 'ExtUtils::Typemaps' => '1.02', + 'ExtUtils::Typemaps::InputMap'=> undef, + 'ExtUtils::Typemaps::OutputMap'=> undef, + 'ExtUtils::Typemaps::Type'=> '0.05', + 'ExtUtils::XSSymSet' => '1.2', + 'ExtUtils::testlib' => '6.63_02', + 'Fatal' => '2.10', + 'Fcntl' => '1.11', + 'File::Basename' => '2.83', + 'File::CheckTree' => '4.41', + 'File::Compare' => '1.1006', + 'File::Copy' => '2.21', + 'File::DosGlob' => '1.06', + 'File::Fetch' => '0.32', + 'File::Find' => '1.20', + 'File::Glob' => '1.14', + 'File::GlobMapper' => '1.000', + 'File::Path' => '2.08_01', + 'File::Spec' => '3.34', + 'File::Spec::Cygwin' => '3.33', + 'File::Spec::Epoc' => '3.33', + 'File::Spec::Functions' => '3.33', + 'File::Spec::Mac' => '3.35', + 'File::Spec::OS2' => '3.33', + 'File::Spec::Unix' => '3.34', + 'File::Spec::VMS' => '3.35', + 'File::Spec::Win32' => '3.35', + 'File::Temp' => '0.22', + 'File::stat' => '1.05', + 'FileCache' => '1.08', + 'FileHandle' => '2.02', + 'Filter::Simple' => '0.88', + 'Filter::Util::Call' => '1.39', + 'FindBin' => '1.51', + 'GDBM_File' => '1.14', + 'Getopt::Long' => '2.38', + 'Getopt::Std' => '1.06', + 'HTTP::Tiny' => '0.016', + 'Hash::Util' => '0.11', + 'Hash::Util::FieldHash' => '1.10', + 'I18N::Collate' => '1.02', + 'I18N::LangTags' => '0.37', + 'I18N::LangTags::Detect'=> '1.05', + 'I18N::LangTags::List' => '0.35_01', + 'I18N::Langinfo' => '0.08_02', + 'IO' => '1.25_06', + 'IO::Compress::Adapter::Bzip2'=> '2.042', + 'IO::Compress::Adapter::Deflate'=> '2.042', + 'IO::Compress::Adapter::Identity'=> '2.042', + 'IO::Compress::Base' => '2.042', + 'IO::Compress::Base::Common'=> '2.042', + 'IO::Compress::Bzip2' => '2.042', + 'IO::Compress::Deflate' => '2.042', + 'IO::Compress::Gzip' => '2.042', + 'IO::Compress::Gzip::Constants'=> '2.042', + 'IO::Compress::RawDeflate'=> '2.042', + 'IO::Compress::Zip' => '2.042', + 'IO::Compress::Zip::Constants'=> '2.042', + 'IO::Compress::Zlib::Constants'=> '2.042', + 'IO::Compress::Zlib::Extra'=> '2.042', + 'IO::Dir' => '1.08', + 'IO::File' => '1.15', + 'IO::Handle' => '1.33', + 'IO::Pipe' => '1.14', + 'IO::Poll' => '0.08', + 'IO::Seekable' => '1.10', + 'IO::Select' => '1.20', + 'IO::Socket' => '1.33', + 'IO::Socket::INET' => '1.32', + 'IO::Socket::UNIX' => '1.23', + 'IO::Uncompress::Adapter::Bunzip2'=> '2.042', + 'IO::Uncompress::Adapter::Identity'=> '2.042', + 'IO::Uncompress::Adapter::Inflate'=> '2.042', + 'IO::Uncompress::AnyInflate'=> '2.042', + 'IO::Uncompress::AnyUncompress'=> '2.042', + 'IO::Uncompress::Base' => '2.042', + 'IO::Uncompress::Bunzip2'=> '2.042', + 'IO::Uncompress::Gunzip'=> '2.042', + 'IO::Uncompress::Inflate'=> '2.042', + 'IO::Uncompress::RawInflate'=> '2.042', + 'IO::Uncompress::Unzip' => '2.042', + 'IO::Zlib' => '1.10', + 'IPC::Cmd' => '0.72', + 'IPC::Msg' => '2.03', + 'IPC::Open2' => '1.04', + 'IPC::Open3' => '1.12', + 'IPC::Semaphore' => '2.03', + 'IPC::SharedMem' => '2.03', + 'IPC::SysV' => '2.03', + 'JSON::PP' => '2.27200', + 'JSON::PP::Boolean' => undef, + 'List::Util' => '1.23', + 'List::Util::PP' => '1.23', + 'List::Util::XS' => '1.23', + 'Locale::Codes' => '3.18', + 'Locale::Codes::Constants'=> '3.18', + 'Locale::Codes::Country'=> '3.18', + 'Locale::Codes::Country_Codes'=> '3.18', + 'Locale::Codes::Currency'=> '3.18', + 'Locale::Codes::Currency_Codes'=> '3.18', + 'Locale::Codes::LangExt'=> '3.18', + 'Locale::Codes::LangExt_Codes'=> '3.18', + 'Locale::Codes::LangVar'=> '3.18', + 'Locale::Codes::LangVar_Codes'=> '3.18', + 'Locale::Codes::Language'=> '3.18', + 'Locale::Codes::Language_Codes'=> '3.18', + 'Locale::Codes::Script' => '3.18', + 'Locale::Codes::Script_Codes'=> '3.18', + 'Locale::Country' => '3.18', + 'Locale::Currency' => '3.18', + 'Locale::Language' => '3.18', + 'Locale::Maketext' => '1.20', + 'Locale::Maketext::Guts'=> '1.20', + 'Locale::Maketext::GutsLoader'=> '1.20', + 'Locale::Maketext::Simple'=> '0.21', + 'Locale::Script' => '3.18', + 'Log::Message' => '0.04', + 'Log::Message::Config' => '0.04', + 'Log::Message::Handlers'=> '0.04', + 'Log::Message::Item' => '0.04', + 'Log::Message::Simple' => '0.08', + 'MIME::Base64' => '3.13', + 'MIME::QuotedPrint' => '3.13', + 'Math::BigFloat' => '1.997', + 'Math::BigFloat::Trace' => '0.29', + 'Math::BigInt' => '1.997', + 'Math::BigInt::Calc' => '1.997', + 'Math::BigInt::CalcEmu' => '1.997', + 'Math::BigInt::FastCalc'=> '0.30', + 'Math::BigInt::Trace' => '0.29', + 'Math::BigRat' => '0.2603', + 'Math::Complex' => '1.58', + 'Math::Trig' => '1.22', + 'Memoize' => '1.02', + 'Memoize::AnyDBM_File' => '1.02', + 'Memoize::Expire' => '1.02', + 'Memoize::ExpireFile' => '1.02', + 'Memoize::ExpireTest' => '1.02', + 'Memoize::NDBM_File' => '1.02', + 'Memoize::SDBM_File' => '1.02', + 'Memoize::Storable' => '1.02', + 'Module::Build' => '0.39_01', + 'Module::Build::Base' => '0.39_01', + 'Module::Build::Compat' => '0.39_01', + 'Module::Build::Config' => '0.39_01', + 'Module::Build::ConfigData'=> undef, + 'Module::Build::Cookbook'=> '0.39_01', + 'Module::Build::Dumper' => '0.39_01', + 'Module::Build::ModuleInfo'=> '0.39_01', + 'Module::Build::Notes' => '0.39_01', + 'Module::Build::PPMMaker'=> '0.39_01', + 'Module::Build::Platform::Amiga'=> '0.39_01', + 'Module::Build::Platform::Default'=> '0.39_01', + 'Module::Build::Platform::EBCDIC'=> '0.39_01', + 'Module::Build::Platform::MPEiX'=> '0.39_01', + 'Module::Build::Platform::MacOS'=> '0.39_01', + 'Module::Build::Platform::RiscOS'=> '0.39_01', + 'Module::Build::Platform::Unix'=> '0.39_01', + 'Module::Build::Platform::VMS'=> '0.39_01', + 'Module::Build::Platform::VOS'=> '0.39_01', + 'Module::Build::Platform::Windows'=> '0.39_01', + 'Module::Build::Platform::aix'=> '0.39_01', + 'Module::Build::Platform::cygwin'=> '0.39_01', + 'Module::Build::Platform::darwin'=> '0.39_01', + 'Module::Build::Platform::os2'=> '0.39_01', + 'Module::Build::PodParser'=> '0.39_01', + 'Module::Build::Version'=> '0.87', + 'Module::Build::YAML' => '1.41', + 'Module::CoreList' => '2.58', + 'Module::Load' => '0.22', + 'Module::Load::Conditional'=> '0.46', + 'Module::Loaded' => '0.06', + 'Module::Metadata' => '1.000007', + 'Module::Pluggable' => '3.9', + 'Module::Pluggable::Object'=> '3.9', + 'Moped::Msg' => '0.01', + 'NDBM_File' => '1.12', + 'NEXT' => '0.65', + 'Net::Cmd' => '2.29', + 'Net::Config' => '1.11', + 'Net::Domain' => '2.20', + 'Net::FTP' => '2.77', + 'Net::FTP::A' => '1.18', + 'Net::FTP::E' => '0.01', + 'Net::FTP::I' => '1.12', + 'Net::FTP::L' => '0.01', + 'Net::FTP::dataconn' => '0.11', + 'Net::NNTP' => '2.24', + 'Net::Netrc' => '2.12', + 'Net::POP3' => '2.29', + 'Net::Ping' => '2.38', + 'Net::SMTP' => '2.31', + 'Net::Time' => '2.10', + 'Net::hostent' => '1.01', + 'Net::netent' => '1.00', + 'Net::protoent' => '1.00', + 'Net::servent' => '1.01', + 'O' => '1.01', + 'ODBM_File' => '1.12', + 'Object::Accessor' => '0.42', + 'Opcode' => '1.21', + 'POSIX' => '1.26', + 'Package::Constants' => '0.02', + 'Params::Check' => '0.32', + 'Parse::CPAN::Meta' => '1.4401', + 'Perl::OSType' => '1.002', + 'PerlIO' => '1.07', + 'PerlIO::encoding' => '0.15', + 'PerlIO::scalar' => '0.12', + 'PerlIO::via' => '0.12', + 'PerlIO::via::QuotedPrint'=> '0.06', + 'Pod::Checker' => '1.45', + 'Pod::Escapes' => '1.04', + 'Pod::Find' => '1.35', + 'Pod::Functions' => '1.04', + 'Pod::Html' => '1.11', + 'Pod::InputObjects' => '1.31', + 'Pod::LaTeX' => '0.59', + 'Pod::Man' => '2.25', + 'Pod::ParseLink' => '1.10', + 'Pod::ParseUtils' => '1.36', + 'Pod::Parser' => '1.37', + 'Pod::Perldoc' => '3.15_07', + 'Pod::Perldoc::BaseTo' => undef, + 'Pod::Perldoc::GetOptsOO'=> undef, + 'Pod::Perldoc::ToChecker'=> undef, + 'Pod::Perldoc::ToMan' => undef, + 'Pod::Perldoc::ToNroff' => undef, + 'Pod::Perldoc::ToPod' => undef, + 'Pod::Perldoc::ToRtf' => undef, + 'Pod::Perldoc::ToText' => undef, + 'Pod::Perldoc::ToTk' => undef, + 'Pod::Perldoc::ToXml' => undef, + 'Pod::PlainText' => '2.04', + 'Pod::Select' => '1.36', + 'Pod::Simple' => '3.19', + 'Pod::Simple::BlackBox' => '3.19', + 'Pod::Simple::Checker' => '3.19', + 'Pod::Simple::Debug' => '3.19', + 'Pod::Simple::DumpAsText'=> '3.19', + 'Pod::Simple::DumpAsXML'=> '3.19', + 'Pod::Simple::HTML' => '3.19', + 'Pod::Simple::HTMLBatch'=> '3.19', + 'Pod::Simple::HTMLLegacy'=> '5.01', + 'Pod::Simple::LinkSection'=> '3.19', + 'Pod::Simple::Methody' => '3.19', + 'Pod::Simple::Progress' => '3.19', + 'Pod::Simple::PullParser'=> '3.19', + 'Pod::Simple::PullParserEndToken'=> '3.19', + 'Pod::Simple::PullParserStartToken'=> '3.19', + 'Pod::Simple::PullParserTextToken'=> '3.19', + 'Pod::Simple::PullParserToken'=> '3.19', + 'Pod::Simple::RTF' => '3.19', + 'Pod::Simple::Search' => '3.19', + 'Pod::Simple::SimpleTree'=> '3.19', + 'Pod::Simple::Text' => '3.19', + 'Pod::Simple::TextContent'=> '3.19', + 'Pod::Simple::TiedOutFH'=> '3.19', + 'Pod::Simple::Transcode'=> '3.19', + 'Pod::Simple::TranscodeDumb'=> '3.19', + 'Pod::Simple::TranscodeSmart'=> '3.19', + 'Pod::Simple::XHTML' => '3.19', + 'Pod::Simple::XMLOutStream'=> '3.19', + 'Pod::Text' => '3.15', + 'Pod::Text::Color' => '2.06', + 'Pod::Text::Overstrike' => '2.04', + 'Pod::Text::Termcap' => '2.06', + 'Pod::Usage' => '1.36', + 'SDBM_File' => '1.09', + 'Safe' => '2.29', + 'Scalar::Util' => '1.23', + 'Scalar::Util::PP' => '1.23', + 'Search::Dict' => '1.04', + 'SelectSaver' => '1.02', + 'SelfLoader' => '1.18', + 'Socket' => '1.94_02', + 'Storable' => '2.33', + 'Symbol' => '1.07', + 'Sys::Hostname' => '1.16', + 'Sys::Syslog' => '0.29', + 'Sys::Syslog::Win32' => undef, + 'TAP::Base' => '3.23', + 'TAP::Formatter::Base' => '3.23', + 'TAP::Formatter::Color' => '3.23', + 'TAP::Formatter::Console'=> '3.23', + 'TAP::Formatter::Console::ParallelSession'=> '3.23', + 'TAP::Formatter::Console::Session'=> '3.23', + 'TAP::Formatter::File' => '3.23', + 'TAP::Formatter::File::Session'=> '3.23', + 'TAP::Formatter::Session'=> '3.23', + 'TAP::Harness' => '3.23', + 'TAP::Object' => '3.23', + 'TAP::Parser' => '3.23', + 'TAP::Parser::Aggregator'=> '3.23', + 'TAP::Parser::Grammar' => '3.23', + 'TAP::Parser::Iterator' => '3.23', + 'TAP::Parser::Iterator::Array'=> '3.23', + 'TAP::Parser::Iterator::Process'=> '3.23', + 'TAP::Parser::Iterator::Stream'=> '3.23', + 'TAP::Parser::IteratorFactory'=> '3.23', + 'TAP::Parser::Multiplexer'=> '3.23', + 'TAP::Parser::Result' => '3.23', + 'TAP::Parser::Result::Bailout'=> '3.23', + 'TAP::Parser::Result::Comment'=> '3.23', + 'TAP::Parser::Result::Plan'=> '3.23', + 'TAP::Parser::Result::Pragma'=> '3.23', + 'TAP::Parser::Result::Test'=> '3.23', + 'TAP::Parser::Result::Unknown'=> '3.23', + 'TAP::Parser::Result::Version'=> '3.23', + 'TAP::Parser::Result::YAML'=> '3.23', + 'TAP::Parser::ResultFactory'=> '3.23', + 'TAP::Parser::Scheduler'=> '3.23', + 'TAP::Parser::Scheduler::Job'=> '3.23', + 'TAP::Parser::Scheduler::Spinner'=> '3.23', + 'TAP::Parser::Source' => '3.23', + 'TAP::Parser::SourceHandler'=> '3.23', + 'TAP::Parser::SourceHandler::Executable'=> '3.23', + 'TAP::Parser::SourceHandler::File'=> '3.23', + 'TAP::Parser::SourceHandler::Handle'=> '3.23', + 'TAP::Parser::SourceHandler::Perl'=> '3.23', + 'TAP::Parser::SourceHandler::RawTAP'=> '3.23', + 'TAP::Parser::Utils' => '3.23', + 'TAP::Parser::YAMLish::Reader'=> '3.23', + 'TAP::Parser::YAMLish::Writer'=> '3.23', + 'Term::ANSIColor' => '3.01', + 'Term::Cap' => '1.12', + 'Term::Complete' => '1.402', + 'Term::ReadLine' => '1.07', + 'Term::UI' => '0.26', + 'Term::UI::History' => undef, + 'Test' => '1.25_02', + 'Test::Builder' => '0.98', + 'Test::Builder::Module' => '0.98', + 'Test::Builder::Tester' => '1.22', + 'Test::Builder::Tester::Color'=> '1.22', + 'Test::Harness' => '3.23', + 'Test::More' => '0.98', + 'Test::Simple' => '0.98', + 'Text::Abbrev' => '1.02', + 'Text::Balanced' => '2.02', + 'Text::ParseWords' => '3.27', + 'Text::Soundex' => '3.03_01', + 'Text::Tabs' => '2009.0305', + 'Text::Wrap' => '2009.0305', + 'Thread' => '3.02', + 'Thread::Queue' => '2.12', + 'Thread::Semaphore' => '2.12', + 'Tie::Array' => '1.05', + 'Tie::File' => '0.97_02', + 'Tie::Handle' => '4.2', + 'Tie::Hash' => '1.04', + 'Tie::Hash::NamedCapture'=> '0.08', + 'Tie::Memoize' => '1.1', + 'Tie::RefHash' => '1.39', + 'Tie::Scalar' => '1.02', + 'Tie::StdHandle' => '4.2', + 'Tie::SubstrHash' => '1.00', + 'Time::HiRes' => '1.9724', + 'Time::Local' => '1.2000', + 'Time::Piece' => '1.20_01', + 'Time::Seconds' => undef, + 'Time::gmtime' => '1.03', + 'Time::localtime' => '1.02', + 'Time::tm' => '1.00', + 'UNIVERSAL' => '1.10', + 'Unicode' => '6.0.0', + 'Unicode::Collate' => '0.85', + 'Unicode::Collate::CJK::Big5'=> '0.65', + 'Unicode::Collate::CJK::GB2312'=> '0.65', + 'Unicode::Collate::CJK::JISX0208'=> '0.64', + 'Unicode::Collate::CJK::Korean'=> '0.66', + 'Unicode::Collate::CJK::Pinyin'=> '0.85', + 'Unicode::Collate::CJK::Stroke'=> '0.85', + 'Unicode::Collate::Locale'=> '0.85', + 'Unicode::Normalize' => '1.13', + 'Unicode::UCD' => '0.37', + 'User::grent' => '1.01', + 'User::pwent' => '1.00', + 'VMS::DCLsym' => '1.05', + 'VMS::Filespec' => '1.12', + 'VMS::Stdio' => '2.4', + 'Version::Requirements' => '0.101020', + 'Win32' => '0.44', + 'Win32API::File' => '0.1200', + 'Win32API::File::ExtUtils::Myconst2perl'=> '1', + 'Win32CORE' => '0.02', + 'XS::APItest' => '0.33', + 'XS::Typemap' => '0.07', + 'XSLoader' => '0.16', + 'arybase' => '0.01', + 'attributes' => '0.17', + 'autodie' => '2.10', + 'autodie::exception' => '2.10', + 'autodie::exception::system'=> '2.10', + 'autodie::hints' => '2.10', + 'autouse' => '1.06', + 'base' => '2.18', + 'bigint' => '0.29', + 'bignum' => '0.29', + 'bigrat' => '0.29', + 'blib' => '1.06', + 'bytes' => '1.04', + 'charnames' => '1.24', + 'constant' => '1.23', + 'deprecate' => '0.02', + 'diagnostics' => '1.25', + 'encoding' => '2.6_01', + 'encoding::warnings' => '0.11', + 'feature' => '1.23', + 'fields' => '2.16', + 'filetest' => '1.02', + 'if' => '0.0601', + 'inc::latest' => '0.39_01', + 'integer' => '1.00', + 'less' => '0.03', + 'lib' => '0.63', + 'locale' => '1.00', + 'mro' => '1.09', + 'open' => '1.10', + 'ops' => '1.02', + 'overload' => '1.15', + 'overload::numbers' => undef, + 'overloading' => '0.01', + 'parent' => '0.225', + 'perlfaq' => '5.0150036', + 're' => '0.18', + 'sigtrap' => '1.05', + 'sort' => '2.01', + 'strict' => '1.05', + 'subs' => '1.00', + 'threads' => '1.85', + 'threads::shared' => '1.40', + 'unicore::Name' => undef, + 'utf8' => '1.09', + 'vars' => '1.02', + 'version' => '0.93', + 'version::Requirements' => '0.101020', + 'vmsish' => '1.03', + 'warnings' => '1.12', + 'warnings::register' => '1.02', + }, ); %deprecated = ( @@ -29658,6 +30327,8 @@ for my $version ( sort { $a <=> $b } keys %released ) { }, 5.015004 => { }, + 5.015005 => { + }, ); %upstream = ( @@ -30190,6 +30861,7 @@ for my $version ( sort { $a <=> $b } keys %released ) { 'threads' => 'blead', 'threads::shared' => 'blead', 'version' => undef, + 'version::Requirements' => undef, 'warnings' => undef, 'warnings::register' => undef, ); @@ -30716,6 +31388,7 @@ for my $version ( sort { $a <=> $b } keys %released ) { 'threads' => undef, 'threads::shared' => undef, 'version' => undef, + 'version::Requirements' => undef, ); # Create aliases with trailing zeros for $] use diff --git a/dist/Storable/Storable.pm b/dist/Storable/Storable.pm index 3e995c396d..561f00eaac 100644 --- a/dist/Storable/Storable.pm +++ b/dist/Storable/Storable.pm @@ -21,7 +21,7 @@ package Storable; @ISA = qw(Exporter); use vars qw($canonical $forgive_me $VERSION); -$VERSION = '2.32'; +$VERSION = '2.33'; BEGIN { if (eval { local $SIG{__DIE__}; require Log::Agent; 1 }) { @@ -31,13 +31,14 @@ BEGIN { # Use of Log::Agent is optional. If it hasn't imported these subs then # provide a fallback implementation. # - else { + if (!exists &logcroak) { require Carp; - *logcroak = sub { Carp::croak(@_); }; - + } + if (!exists &logcarp) { + require Carp; *logcarp = sub { Carp::carp(@_); }; diff --git a/dist/Storable/t/robust.t b/dist/Storable/t/robust.t new file mode 100644 index 0000000000..27f5fc0056 --- /dev/null +++ b/dist/Storable/t/robust.t @@ -0,0 +1,12 @@ +#!/usr/bin/perl + +# This test script checks that Storable will load properly if someone +# is incorrectly messing with %INC to hide Log::Agent. No, no-one should +# really be doing this, but, then, it *used* to work! + +use Test::More; +plan tests => 1; + +$INC{'Log/Agent.pm'} = '#ignore#'; +require Storable; +pass; @@ -2195,7 +2195,7 @@ S_deb_curcv(pTHX_ const I32 ix) if (CxTYPE(cx) == CXt_SUB || CxTYPE(cx) == CXt_FORMAT) return cx->blk_sub.cv; else if (CxTYPE(cx) == CXt_EVAL && !CxTRYBLOCK(cx)) - return PL_compcv; + return cx->blk_eval.cv; else if (ix == 0 && PL_curstackinfo->si_type == PERLSI_MAIN) return PL_main_cv; else if (ix <= 0) @@ -600,6 +600,13 @@ ApPR |bool |is_uni_punct |UV c ApPR |bool |is_uni_xdigit |UV c Ap |UV |to_uni_upper |UV c|NN U8 *p|NN STRLEN *lenp Ap |UV |to_uni_title |UV c|NN U8 *p|NN STRLEN *lenp +#ifdef PERL_IN_UTF8_C +sR |U8 |to_lower_latin1|const U8 c|NULLOK U8 *p|NULLOK STRLEN *lenp +p |UV |_to_fold_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const U8 flags +#endif +#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) +p |UV |_to_upper_title_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const char S_or_s +#endif Ap |UV |to_uni_lower |UV c|NN U8 *p|NN STRLEN *lenp Amp |UV |to_uni_fold |UV c|NN U8 *p|NN STRLEN *lenp AMp |UV |_to_uni_fold_flags|UV c|NN U8 *p|NN STRLEN *lenp|U8 flags @@ -846,7 +853,9 @@ i |bool |aassign_common_vars |NULLOK OP* o Apda |OP* |newASSIGNOP |I32 flags|NULLOK OP* left|I32 optype|NULLOK OP* right Apda |OP* |newCONDOP |I32 flags|NN OP* first|NULLOK OP* trueop|NULLOK OP* falseop Apd |CV* |newCONSTSUB |NULLOK HV* stash|NULLOK const char* name|NULLOK SV* sv -Apd |CV* |newCONSTSUB_flags |NULLOK HV* stash|NULLOK const char* name|U32 flags|NULLOK SV* sv +Apd |CV* |newCONSTSUB_flags|NULLOK HV* stash \ + |NULLOK const char* name|STRLEN len \ + |U32 flags|NULLOK SV* sv #ifdef PERL_MAD Ap |OP* |newFORM |I32 floor|NULLOK OP* o|NULLOK OP* block #else @@ -865,6 +874,10 @@ Apda |OP* |newSLICEOP |I32 flags|NULLOK OP* subscript|NULLOK OP* listop Apda |OP* |newSTATEOP |I32 flags|NULLOK char* label|NULLOK OP* o Abm |CV* |newSUB |I32 floor|NULLOK OP* o|NULLOK OP* proto \ |NULLOK OP* block +p |CV * |newXS_len_flags|NULLOK const char *name|STRLEN len \ + |NN XSUBADDR_t subaddr\ + |NN const char *const filename \ + |NULLOK const char *const proto|U32 flags ApM |CV * |newXS_flags |NULLOK const char *name|NN XSUBADDR_t subaddr\ |NN const char *const filename \ |NULLOK const char *const proto|U32 flags @@ -1576,8 +1589,6 @@ ApR |MAGIC* |mg_dup |NULLOK MAGIC *mg|NN CLONE_PARAMS *const param #if defined(PERL_IN_SV_C) s |SV ** |sv_dup_inc_multiple|NN SV *const *source|NN SV **dest \ |SSize_t items|NN CLONE_PARAMS *const param -#endif -#if defined(PERL_IN_SV_C) sR |SV* |sv_dup_common |NN const SV *const sstr \ |NN CLONE_PARAMS *const param #endif @@ -1790,7 +1801,8 @@ sR |I32 |dopoptoloop |I32 startingblock sR |I32 |dopoptosub_at |NN const PERL_CONTEXT* cxstk|I32 startingblock sR |I32 |dopoptowhen |I32 startingblock s |void |save_lines |NULLOK AV *array|NN SV *sv -s |bool |doeval |int gimme|NULLOK OP** startop|NULLOK CV* outside|U32 seq +s |bool |doeval |int gimme|NULLOK OP** startop \ + |NULLOK CV* outside|U32 seq|NULLOK HV* hh sR |PerlIO *|check_type_and_open|NN SV *name #ifndef PERL_DISABLE_PMC sR |PerlIO *|doopen_pm |NN SV *name @@ -1967,15 +1979,18 @@ po |void |sv_add_backref |NN SV *const tsv|NN SV *const sv poM |void |sv_kill_backrefs |NN SV *const sv|NULLOK AV *const av #endif +#if defined(PERL_IN_SV_C) || defined (PERL_IN_OP_C) +pR |SV * |varname |NULLOK const GV *const gv|const char gvtype \ + |PADOFFSET targ|NULLOK const SV *const keyname \ + |I32 aindex|int subscript_type +#endif + pX |void |sv_del_backref |NN SV *const tsv|NN SV *const sv #if defined(PERL_IN_SV_C) nsR |char * |uiv_2buf |NN char *const buf|const IV iv|UV uv|const int is_uv|NN char **const peob s |void |sv_unglob |NN SV *const sv s |void |not_a_number |NN SV *const sv s |I32 |visit |NN SVFUNC_t f|const U32 flags|const U32 mask -sR |SV * |varname |NULLOK const GV *const gv|const char gvtype \ - |PADOFFSET targ|NULLOK const SV *const keyname \ - |I32 aindex|int subscript_type # ifdef DEBUGGING s |void |del_sv |NN SV *p # endif @@ -2261,8 +2276,7 @@ p |void |hv_ename_delete|NN HV *hv|NN const char *name|U32 len \ |U32 flags : Used in dump.c and hv.c poM |AV** |hv_backreferences_p |NN HV *hv -#if defined(PERL_IN_DUMP_C) || defined(PERL_IN_HV_C) || defined(PERL_IN_SV_C) -: Only used in sv.c +#if defined(PERL_IN_DUMP_C) || defined(PERL_IN_HV_C) || defined(PERL_IN_SV_C) || defined(PERL_IN_SCOPE_C) poM |void |hv_kill_backrefs |NN HV *hv #endif Apd |void |hv_clear_placeholders |NN HV *hv @@ -326,7 +326,7 @@ #define newBINOP(a,b,c,d) Perl_newBINOP(aTHX_ a,b,c,d) #define newCONDOP(a,b,c,d) Perl_newCONDOP(aTHX_ a,b,c,d) #define newCONSTSUB(a,b,c) Perl_newCONSTSUB(aTHX_ a,b,c) -#define newCONSTSUB_flags(a,b,c,d) Perl_newCONSTSUB_flags(aTHX_ a,b,c,d) +#define newCONSTSUB_flags(a,b,c,d,e) Perl_newCONSTSUB_flags(aTHX_ a,b,c,d,e) #define newCVREF(a,b) Perl_newCVREF(aTHX_ a,b) #define newFOROP(a,b,c,d,e) Perl_newFOROP(aTHX_ a,b,c,d,e) #define newGIVENOP(a,b,c) Perl_newGIVENOP(aTHX_ a,b,c) @@ -988,6 +988,7 @@ #define ck_anoncode(a) Perl_ck_anoncode(aTHX_ a) #define ck_bitop(a) Perl_ck_bitop(aTHX_ a) #define ck_chdir(a) Perl_ck_chdir(aTHX_ a) +#define ck_cmp(a) Perl_ck_cmp(aTHX_ a) #define ck_concat(a) Perl_ck_concat(aTHX_ a) #define ck_defined(a) Perl_ck_defined(aTHX_ a) #define ck_delete(a) Perl_ck_delete(aTHX_ a) @@ -1004,6 +1005,7 @@ #define ck_grep(a) Perl_ck_grep(aTHX_ a) #define ck_index(a) Perl_ck_index(aTHX_ a) #define ck_join(a) Perl_ck_join(aTHX_ a) +#define ck_length(a) Perl_ck_length(aTHX_ a) #define ck_lfun(a) Perl_ck_lfun(aTHX_ a) #define ck_listiob(a) Perl_ck_listiob(aTHX_ a) #define ck_match(a) Perl_ck_match(aTHX_ a) @@ -1135,6 +1137,7 @@ #define my_stat_flags(a) Perl_my_stat_flags(aTHX_ a) #define my_swabn Perl_my_swabn #define my_unexec() Perl_my_unexec(aTHX) +#define newXS_len_flags(a,b,c,d,e,f) Perl_newXS_len_flags(aTHX_ a,b,c,d,e,f) #define nextargv(a) Perl_nextargv(aTHX_ a) #define oopsAV(a) Perl_oopsAV(aTHX_ a) #define oopsHV(a) Perl_oopsHV(aTHX_ a) @@ -1427,7 +1430,7 @@ #define destroy_matcher(a) S_destroy_matcher(aTHX_ a) #define do_smartmatch(a,b,c) S_do_smartmatch(aTHX_ a,b,c) #define docatch(a) S_docatch(aTHX_ a) -#define doeval(a,b,c,d) S_doeval(aTHX_ a,b,c,d) +#define doeval(a,b,c,d,e) S_doeval(aTHX_ a,b,c,d,e) #define dofindlabel(a,b,c,d) S_dofindlabel(aTHX_ a,b,c,d) #define doparseform(a) S_doparseform(aTHX_ a) #define dopoptoeval(a) S_dopoptoeval(aTHX_ a) @@ -1509,7 +1512,6 @@ #define uiv_2buf S_uiv_2buf #define utf8_mg_len_cache_update(a,b,c) S_utf8_mg_len_cache_update(aTHX_ a,b,c) #define utf8_mg_pos_cache_update(a,b,c,d,e) S_utf8_mg_pos_cache_update(aTHX_ a,b,c,d,e) -#define varname(a,b,c,d,e,f) S_varname(aTHX_ a,b,c,d,e,f) #define visit(a,b,c) S_visit(aTHX_ a,b,c) # if defined(PERL_OLD_COPY_ON_WRITE) #define sv_release_COW(a,b,c) S_sv_release_COW(aTHX_ a,b,c) @@ -1520,6 +1522,9 @@ #define unreferenced_to_tmp_stack(a) S_unreferenced_to_tmp_stack(aTHX_ a) # endif # endif +# if defined(PERL_IN_SV_C) || defined (PERL_IN_OP_C) +#define varname(a,b,c,d,e,f) Perl_varname(aTHX_ a,b,c,d,e,f) +# endif # if defined(PERL_IN_TOKE_C) #define ao(a) S_ao(aTHX_ a) #define check_uni() S_check_uni(aTHX) @@ -1570,9 +1575,14 @@ #define isa_lookup(a,b,c,d) S_isa_lookup(aTHX_ a,b,c,d) # endif # if defined(PERL_IN_UTF8_C) +#define _to_fold_latin1(a,b,c,d) Perl__to_fold_latin1(aTHX_ a,b,c,d) #define is_utf8_char_slow S_is_utf8_char_slow #define is_utf8_common(a,b,c) S_is_utf8_common(aTHX_ a,b,c) #define swash_get(a,b,c) S_swash_get(aTHX_ a,b,c) +#define to_lower_latin1(a,b,c) S_to_lower_latin1(aTHX_ a,b,c) +# endif +# if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) +#define _to_upper_title_latin1(a,b,c,d) Perl__to_upper_title_latin1(aTHX_ a,b,c,d) # endif # if defined(PERL_IN_UTIL_C) #define ckwarn_common(a) S_ckwarn_common(aTHX_ a) diff --git a/epoc/config.sh b/epoc/config.sh index 2215274c37..470b42bc3a 100644 --- a/epoc/config.sh +++ b/epoc/config.sh @@ -33,8 +33,8 @@ apirevision='' apisubversion='' apiversion='' ar='arm-epoc-pe-ar' -archlib='/usr/lib/perl/5.15.4/epoc' -archlibexp='/usr/lib/perl/5.15.4/epoc' +archlib='/usr/lib/perl/5.15.5/epoc' +archlibexp='/usr/lib/perl/5.15.5/epoc' archname64='' archname='epoc' archobjs='epoc.o epocish.o epoc_stubs.o' @@ -711,16 +711,16 @@ inc_version_list=' ' inc_version_list_init='0' incpath='' inews='' -installarchlib='/home/of/PERL/perl/lib/5.15.4/epoc' +installarchlib='/home/of/PERL/perl/lib/5.15.5/epoc' installbin='/home/of/PERL/System/Programs/' installman1dir='/home/of/PERL/man1' installman3dir='/home/of/PERL/man3' installprefix='' installprefixexp='' -installprivlib='/home/of/PERL/perl/lib/5.15.4/' +installprivlib='/home/of/PERL/perl/lib/5.15.5/' installscript='/home/of/PERL/bin/' -installsitearch='/home/of/PERL/site/lib/site_perl/5.15.4/epoc' -installsitelib='/home/of/PERL/perl/lib/site_perl/5.15.4' +installsitearch='/home/of/PERL/site/lib/site_perl/5.15.5/epoc' +installsitelib='/home/of/PERL/perl/lib/site_perl/5.15.5' installstyle='' installusrbinperl='undef' installvendorarch='' @@ -842,8 +842,8 @@ pmake='' pr='' prefix='' prefixexp='' -privlib='/usr/lib/perl/5.15.4' -privlibexp='/usr/lib/perl/5.15.4' +privlib='/usr/lib/perl/5.15.5' +privlibexp='/usr/lib/perl/5.15.5' procselfexe='' prototype='define' ptrsize='4' @@ -906,11 +906,11 @@ sig_num='0' sig_num_init='0, 0' sig_size='1' signal_t='void' -sitearch='/usr/lib/perl/site_perl/5.15.4/epoc' -sitearchexp='/usr/lib/perl/site_perl/5.15.4/epoc' -sitelib='/usr/lib/perl/site_perl/5.15.4/' +sitearch='/usr/lib/perl/site_perl/5.15.5/epoc' +sitearchexp='/usr/lib/perl/site_perl/5.15.5/epoc' +sitelib='/usr/lib/perl/site_perl/5.15.5/' sitelib_stem='/usr/lib/perl/site_perl' -sitelibexp='/usr/lib/perl/site_perl/5.15.4/' +sitelibexp='/usr/lib/perl/site_perl/5.15.5/' siteprefix='' siteprefixexp='' sizesize='4' @@ -1020,7 +1020,7 @@ vendorlib_stem='' vendorlibexp='' vendorprefix='' vendorprefixexp='' -version='5.15.4' +version='5.15.5' versiononly='undef' vi='' voidflags='15' @@ -1044,10 +1044,10 @@ config_arg10='' config_arg11='' PERL_REVISION=5 PERL_VERSION=15 -PERL_SUBVERSION=4 +PERL_SUBVERSION=5 PERL_API_REVISION=5 PERL_API_VERSION=15 -PERL_API_SUBVERSION=4 +PERL_API_SUBVERSION=5 PERL_CONFIG_SH=true CONFIGDOTSH=true # Variables propagated from previous config.sh file. diff --git a/epoc/createpkg.pl b/epoc/createpkg.pl index 3f46fe507e..f328ae5e62 100644 --- a/epoc/createpkg.pl +++ b/epoc/createpkg.pl @@ -3,7 +3,7 @@ use File::Find; use Cwd; -$VERSION="5.15.4"; +$VERSION="5.15.5"; $EPOC_VERSION=1; diff --git a/ext/B/B/Concise.pm b/ext/B/B/Concise.pm index 8ab7e001d0..1ef9c95244 100644 --- a/ext/B/B/Concise.pm +++ b/ext/B/B/Concise.pm @@ -14,7 +14,7 @@ use warnings; # uses #3 and #4, since warnings uses Carp use Exporter (); # use #5 -our $VERSION = "0.86"; +our $VERSION = "0.87"; our @ISA = qw(Exporter); our @EXPORT_OK = qw( set_style set_style_standard add_callback concise_subref concise_cv concise_main @@ -647,7 +647,7 @@ $priv{"threadsv"}{64} = "SVREFd"; $priv{"exit"}{128} = "VMS"; $priv{$_}{2} = "FTACCESS" for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec"); -$priv{"entereval"}{2} = "HAS_HH"; +@{$priv{"entereval"}}{2,4,8,16} = qw "HAS_HH UNI BYTES COPHH"; if ($] >= 5.009) { # Stacked filetests are post 5.8.x @{$priv{$_}}{4,8} = ("FTSTACKED","FTSTACKING") diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t index 67b8591701..41a2ad815a 100644 --- a/ext/B/t/concise-xs.t +++ b/ext/B/t/concise-xs.t @@ -165,11 +165,11 @@ my $testpkgs = { OPpSORT_REVERSE OPpREVERSE_INPLACE OPpTARGET_MY OPpTRANS_COMPLEMENT OPpTRANS_DELETE OPpTRANS_SQUASH PMf_CONTINUE PMf_EVAL PMf_EXTENDED PMf_FOLD PMf_GLOBAL - PMf_KEEP PMf_NONDESTRUCT + PMf_KEEP PMf_NONDESTRUCT PMf_SKIPWHITE PMf_MULTILINE PMf_ONCE PMf_SINGLELINE POSTFIX SVf_FAKE SVf_IOK SVf_NOK SVf_POK SVf_ROK SVpad_OUR SVs_RMG SVs_SMG SWAP_CHILDREN OPpPAD_STATE - OPpCONST_ARYBASE + OPpCONST_ARYBASE OPpEVAL_BYTES /, $] > 5.009 ? ('RXf_SKIPWHITE') : ('PMf_SKIPWHITE'), 'CVf_LOCKED', # This ends up as a constant, pre or post 5.10 ], diff --git a/ext/Errno/Errno_pm.PL b/ext/Errno/Errno_pm.PL index 7cd7a249d2..439f2544ca 100644 --- a/ext/Errno/Errno_pm.PL +++ b/ext/Errno/Errno_pm.PL @@ -2,7 +2,7 @@ use ExtUtils::MakeMaker; use Config; use strict; -our $VERSION = "1.14"; +our $VERSION = "1.15"; my %err = (); my %wsa = (); @@ -316,6 +316,10 @@ EOF } } + # escape $Config{'archname'} + my $archname = $Config{'archname'}; + $archname =~ s/([@%\$])/\\\1/g; + # Write Errno.pm print <<"EDQ"; @@ -330,8 +334,8 @@ use Config; use strict; "\$Config{'archname'}-\$Config{'osvers'}" eq -"$Config{'archname'}-$Config{'osvers'}" or - die "Errno architecture ($Config{'archname'}-$Config{'osvers'}) does not match executable architecture (\$Config{'archname'}-\$Config{'osvers'})"; +"$archname-$Config{'osvers'}" or + die "Errno architecture ($archname-$Config{'osvers'}) does not match executable architecture (\$Config{'archname'}-\$Config{'osvers'})"; our \$VERSION = "$VERSION"; \$VERSION = eval \$VERSION; diff --git a/ext/File-Glob/Glob.xs b/ext/File-Glob/Glob.xs index 2ec500d4f9..3ea05909c6 100644 --- a/ext/File-Glob/Glob.xs +++ b/ext/File-Glob/Glob.xs @@ -332,11 +332,7 @@ void bsd_glob(pattern,...) char *pattern PREINIT: - glob_t pglob; - int i; - int retval; int flags = 0; - SV *tmp; PPCODE: { /* allow for optional flags argument */ @@ -386,7 +382,7 @@ PPCODE: BOOT: { #ifndef PERL_EXTERNAL_GLOB - /* Don’t do this at home! The globhook interface is highly volatile. */ + /* Don't do this at home! The globhook interface is highly volatile. */ PL_globhook = csh_glob_iter; #endif } diff --git a/ext/NDBM_File/hints/gnu.pl b/ext/NDBM_File/hints/gnu.pl new file mode 100644 index 0000000000..db63567966 --- /dev/null +++ b/ext/NDBM_File/hints/gnu.pl @@ -0,0 +1 @@ +do './hints/linux.pl' or die $@; diff --git a/ext/POSIX/t/sysconf.t b/ext/POSIX/t/sysconf.t index 8008996b63..65625a8542 100644 --- a/ext/POSIX/t/sysconf.t +++ b/ext/POSIX/t/sysconf.t @@ -125,7 +125,7 @@ SKIP: { or skip("could not create fifo $fifo ($!)", 2 * 3 * @path_consts_fifo); SKIP: { - my $fd = POSIX::open($fifo, O_RDWR) + my $fd = POSIX::open($fifo, O_RDONLY | O_NONBLOCK) or skip("could not open $fifo ($!)", 3 * @path_consts_fifo); for my $constant (@path_consts_fifo) { diff --git a/ext/Socket/t/Socket.t b/ext/Socket/t/Socket.t index d289f103a4..326fa7a9ef 100644 --- a/ext/Socket/t/Socket.t +++ b/ext/Socket/t/Socket.t @@ -179,7 +179,12 @@ if ($^O eq 'linux') { if($Config{d_inetntop} && $Config{d_inetaton}){ print ((inet_ntop(AF_INET, inet_pton(AF_INET, "10.20.30.40")) eq "10.20.30.40") ? "ok 19\n" : "not ok 19\n"); print ((inet_ntop(AF_INET, inet_aton("10.20.30.40")) eq "10.20.30.40") ? "ok 20\n" : "not ok 20\n"); - print (lc(inet_ntop(AF_INET6, inet_pton(AF_INET6, "2001:503:BA3E::2:30")) eq "2001:503:ba3e::2:30") ? "ok 21\n" : "not ok 21\n"); + if(defined eval { AF_INET6() } ) { + print (lc(inet_ntop(AF_INET6, inet_pton(AF_INET6, "2001:503:BA3E::2:30")) eq "2001:503:ba3e::2:30") ? "ok 21\n" : "not ok 21\n"); + } + else { + print "ok 21 - skipped - no AF_INET6\n"; + } } else { # no IPv6 print "ok $_ - skipped on this platform\n" for 19 .. 21; diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index 00a30ded24..ca5c45bf3e 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -52,7 +52,7 @@ sub import { } } -our $VERSION = '0.32'; +our $VERSION = '0.34'; use vars '$WARNINGS_ON_BOOTSTRAP'; use vars map "\$${_}_called_PP", qw(BEGIN UNITCHECK CHECK INIT END); diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 646d82162d..46cc458d52 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -1885,13 +1885,15 @@ newCONSTSUB_type(stash, name, flags, type) int type PREINIT: CV* cv; + STRLEN len; + const char *pv = SvPV(name, len); PPCODE: switch (type) { case 0: - cv = newCONSTSUB(stash, SvPV_nolen(name), NULL); + cv = newCONSTSUB(stash, pv, NULL); break; case 1: - cv = newCONSTSUB_flags(stash, SvPV_nolen(name), flags | SvUTF8(name), NULL); + cv = newCONSTSUB_flags(stash, pv, len, flags | SvUTF8(name), NULL); break; } EXTEND(SP, 2); @@ -2652,9 +2654,9 @@ test_coplabel() if (len != 3) croak("fail # cop_fetch_label len"); if (utf8) croak("fail # cop_fetch_label utf8"); /* SMALL GERMAN UMLAUT A */ - Perl_cop_store_label(aTHX_ cop, "foä", 4, SVf_UTF8); + Perl_cop_store_label(aTHX_ cop, "fo\xc3\xa4", 4, SVf_UTF8); label = Perl_cop_fetch_label(aTHX_ cop, &len, &utf8); - if (strcmp(label,"foä")) croak("fail # cop_fetch_label label"); + if (strcmp(label,"fo\xc3\xa4")) croak("fail # cop_fetch_label label"); if (len != 4) croak("fail # cop_fetch_label len"); if (!utf8) croak("fail # cop_fetch_label utf8"); diff --git a/ext/XS-APItest/t/check_warnings.t b/ext/XS-APItest/t/check_warnings.t new file mode 100644 index 0000000000..2de083d8ee --- /dev/null +++ b/ext/XS-APItest/t/check_warnings.t @@ -0,0 +1,20 @@ +#!perl + +# This test checks to make sure that a BEGIN block created from an XS call +# does not implicitly change the current warning scope, causing a CHECK +# or INIT block created after the corresponding phase to warn when it +# shouldn’t. + +use Test::More tests => 1; + +$SIG{__WARN__} = sub { $w .= shift }; + +use warnings; +eval q| + BEGIN{ + no warnings; + package XS::APItest; require XSLoader; XSLoader::load() + } +|; + +is $w, undef, 'No warnings about CHECK and INIT in warningless scope'; diff --git a/ext/XS-APItest/t/eval-filter.t b/ext/XS-APItest/t/eval-filter.t index 8d370e59a7..f54bd83707 100644 --- a/ext/XS-APItest/t/eval-filter.t +++ b/ext/XS-APItest/t/eval-filter.t @@ -1,9 +1,27 @@ #!perl -w use strict; -use Test::More tests => 1; +use Test::More tests => 5; use XS::APItest; +{ + use feature "unicode_eval"; + my $unfiltered_foo = "foo"; + BEGIN { + eval "BEGIN { filter() }"; + like $@, qr/^Source filters apply only to byte streams at /, + 'filters die under unicode_eval'; + } + is "foo", $unfiltered_foo, 'filters leak not out of unicode evals'; + + use feature "evalbytes"; + our $thingy; + BEGIN { evalbytes "BEGIN { filter() }\n\$thingy = 'foo'" } + is $thingy, "fee", + "source filters apply to evalbytten strings"; + is "foo", $unfiltered_foo, 'filters leak not out of byte evals'; +} + BEGIN { eval "BEGIN{ filter() }" } is "foo", "fee", "evals share filters with the currently compiling scope"; diff --git a/ext/XS-APItest/t/gv_fetchmeth_autoload.t b/ext/XS-APItest/t/gv_fetchmeth_autoload.t index a24c00083c..b24bfb1e15 100644 --- a/ext/XS-APItest/t/gv_fetchmeth_autoload.t +++ b/ext/XS-APItest/t/gv_fetchmeth_autoload.t @@ -65,6 +65,7 @@ ok !XS::APItest::gv_fetchmeth_autoload_type(\%::, "method\0not quite!", 3, $leve ::ok !XS::APItest::gv_fetchmeth_autoload_type(\%ï½ï½ï½‰ï½Ž::, "method", $type, $level, 0); { + no warnings 'once'; local *AUTOLOAD = sub { 1 }; ::is XS::APItest::gv_fetchmeth_autoload_type(\%ï½ï½ï½‰ï½Ž::, "ï½ï½…thï½ï½„$type", $type, $level, 0), "*ï½ï½ï½‰ï½Ž::ï½ï½…thï½ï½„$type", "Autoloading UTF-8 subs works"; } diff --git a/ext/XS-APItest/t/xs_special_subs_require.t b/ext/XS-APItest/t/xs_special_subs_require.t index 49604a5fe5..3131aada9d 100644 --- a/ext/XS-APItest/t/xs_special_subs_require.t +++ b/ext/XS-APItest/t/xs_special_subs_require.t @@ -86,8 +86,8 @@ is($XS::APItest::END_called_PP, undef, "END not yet called"); @trap = sort @trap; is(scalar @trap, 2, "There were 2 warnings"); - is($trap[0], "Too late to run CHECK block.\n"); - is($trap[1], "Too late to run INIT block.\n"); + like($trap[0], qr "^Too late to run CHECK block"); + like($trap[1], qr "^Too late to run INIT block"); } print "# Second body\n"; diff --git a/ext/arybase/arybase.xs b/ext/arybase/arybase.xs index 5c653e3f2d..d654b545e5 100644 --- a/ext/arybase/arybase.xs +++ b/ext/arybase/arybase.xs @@ -137,6 +137,7 @@ STATIC bool ab_op_is_dollar_bracket(pTHX_ OP *o) { return o->op_type == OP_RV2SV && (o->op_flags & OPf_KIDS) && (c = cUNOPx(o)->op_first) && c->op_type == OP_GV + && GvSTASH(cGVOPx_gv(c)) == PL_defstash && strEQ(GvNAME(cGVOPx_gv(c)), "["); } @@ -150,7 +151,7 @@ STATIC void ab_neuter_dollar_bracket(pTHX_ OP *o) { */ oldc = cUNOPx(o)->op_first; newc = newGVOP(OP_GV, 0, - gv_fetchpvs("arybase::[", GV_ADDMULTI, SVt_PVGV)); + gv_fetchpvs("arybase::leftbrack", GV_ADDMULTI, SVt_PVGV)); cUNOPx(o)->op_first = newc; op_free(oldc); } @@ -161,6 +162,9 @@ STATIC void ab_process_assignment(pTHX_ OP *left, OP *right) { if (ab_op_is_dollar_bracket(left) && right->op_type == OP_CONST) { set_arybase_to(SvIV(cSVOPx_sv(right))); ab_neuter_dollar_bracket(left); + Perl_ck_warner_d(aTHX_ + packWARN(WARN_DEPRECATED), "Use of assignment to $[ is deprecated" + ); } } diff --git a/ext/arybase/t/aeach.t b/ext/arybase/t/aeach.t index f56d39e246..241677acb0 100644 --- a/ext/arybase/t/aeach.t +++ b/ext/arybase/t/aeach.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; BEGIN { diff --git a/ext/arybase/t/aelem.t b/ext/arybase/t/aelem.t index d6b8c38149..c26a2a80c3 100644 --- a/ext/arybase/t/aelem.t +++ b/ext/arybase/t/aelem.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; use Test::More tests => 33; diff --git a/ext/arybase/t/akeys.t b/ext/arybase/t/akeys.t index 45af13bf47..53e9db15e7 100644 --- a/ext/arybase/t/akeys.t +++ b/ext/arybase/t/akeys.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; BEGIN { diff --git a/ext/arybase/t/arybase.t b/ext/arybase/t/arybase.t index a7edaf41bc..f3d32874e2 100644 --- a/ext/arybase/t/arybase.t +++ b/ext/arybase/t/arybase.t @@ -3,6 +3,7 @@ # Basic tests for $[ as a variable # plus miscellaneous bug fix tests +no warnings 'deprecated'; use Test::More tests => 7; sub outside_base_scope { return "${'['}" } diff --git a/ext/arybase/t/aslice.t b/ext/arybase/t/aslice.t index 38aa87b7a9..f4a507da6b 100644 --- a/ext/arybase/t/aslice.t +++ b/ext/arybase/t/aslice.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; use Test::More tests => 18; diff --git a/ext/arybase/t/av2arylen.t b/ext/arybase/t/av2arylen.t index 988cca92f7..6c1deb2de4 100644 --- a/ext/arybase/t/av2arylen.t +++ b/ext/arybase/t/av2arylen.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; use Test::More tests => 8; diff --git a/ext/arybase/t/index.t b/ext/arybase/t/index.t index 58efe74d5a..86dde88865 100644 --- a/ext/arybase/t/index.t +++ b/ext/arybase/t/index.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; use Test::More tests => 12; diff --git a/ext/arybase/t/lslice.t b/ext/arybase/t/lslice.t index 6247a5e810..c012b84f5e 100644 --- a/ext/arybase/t/lslice.t +++ b/ext/arybase/t/lslice.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; use Test::More tests => 11; diff --git a/ext/arybase/t/pos.t b/ext/arybase/t/pos.t index f2f6504a5b..970e17eaa0 100644 --- a/ext/arybase/t/pos.t +++ b/ext/arybase/t/pos.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; use Test::More tests => 12; diff --git a/ext/arybase/t/scope.t b/ext/arybase/t/scope.t index 5fb09930e2..07b4176977 100644 --- a/ext/arybase/t/scope.t +++ b/ext/arybase/t/scope.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; use Test::More tests => 14; diff --git a/ext/arybase/t/splice.t b/ext/arybase/t/splice.t index e2db280a93..9fd618a635 100644 --- a/ext/arybase/t/splice.t +++ b/ext/arybase/t/splice.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; use Test::More tests => 23; diff --git a/ext/arybase/t/substr.t b/ext/arybase/t/substr.t index 793293be8b..ecfba48bae 100644 --- a/ext/arybase/t/substr.t +++ b/ext/arybase/t/substr.t @@ -1,4 +1,4 @@ -use warnings; +use warnings; no warnings 'deprecated'; use strict; use Test::More tests => 6; @@ -369,20 +369,12 @@ Perl_gv_init_pvn(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, U32 flag CV *cv; ENTER; if (has_constant) { - char *name0 = NULL; - if (name[len]) - /* newCONSTSUB doesn't take a len arg, so make sure we - * give it a \0-terminated string */ - name0 = savepvn(name,len); - /* newCONSTSUB takes ownership of the reference from us. */ - cv = newCONSTSUB_flags(stash, (name0 ? name0 : name), flags, has_constant); + cv = newCONSTSUB_flags(stash, name, len, flags, has_constant); /* In case op.c:S_process_special_blocks stole it: */ if (!GvCV(gv)) GvCV_set(gv, (CV *)SvREFCNT_inc_simple_NN(cv)); assert(GvCV(gv) == cv); /* newCONSTSUB should have set this */ - if (name0) - Safefree(name0); /* If this reference was a copy of another, then the subroutine must have been "imported", by a Perl space assignment to a GV from a reference to CV. */ @@ -1174,7 +1166,7 @@ Perl_gv_autoload_pvn(pTHX_ HV *stash, const char *name, STRLEN len, U32 flags) * via the SvPVX field in the CV, and the stash in CvSTASH. * * Due to an unfortunate accident of history, the SvPVX field - * serves two purposes. It is also used for the subroutine’s pro- + * serves two purposes. It is also used for the subroutine's pro- * type. Since SvPVX has been documented as returning the sub name * for a long time, but not as returning the prototype, we have * to preserve the SvPVX AUTOLOAD behaviour and put the prototype @@ -2039,13 +2031,9 @@ Perl_gv_fullname4(pTHX_ SV *sv, const GV *gv, const char *prefix, bool keepmain) PERL_ARGS_ASSERT_GV_FULLNAME4; - if (!hv) { - SvOK_off(sv); - return; - } sv_setpv(sv, prefix ? prefix : ""); - if ((name = HvNAME(hv))) { + if (hv && (name = HvNAME(hv))) { const STRLEN len = HvNAMELEN(hv); if (keepmain || strnNE(name, "main", len)) { sv_catpvn_flags(sv,name,len,HvNAMEUTF8(hv)?SV_CATUTF8:SV_CATBYTES); @@ -115,9 +115,10 @@ Null SV pointer. (No longer available when C<PERL_CORE> is defined.) /* a simple (bool) cast may not do the right thing: if bool is defined * as char for example, then the cast from int is implementation-defined + * (bool)!!(cbool) in a ternary triggers a bug in xlc on AIX */ -#define cBOOL(cbool) ((bool)!!(cbool)) +#define cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0) /* Try to figure out __func__ or __FUNCTION__ equivalent, if any. * XXX Should really be a Configure probe, with HAS__FUNCTION__ diff --git a/hints/catamount.sh b/hints/catamount.sh index bf189558b7..f6e76192e8 100644 --- a/hints/catamount.sh +++ b/hints/catamount.sh @@ -31,11 +31,11 @@ # mkdir -p /opt/perl-catamount # mkdir -p /opt/perl-catamount/include # mkdir -p /opt/perl-catamount/lib -# mkdir -p /opt/perl-catamount/lib/perl5/5.15.4 +# mkdir -p /opt/perl-catamount/lib/perl5/5.15.5 # mkdir -p /opt/perl-catamount/bin # cp *.h /opt/perl-catamount/include # cp libperl.a /opt/perl-catamount/lib -# cp -pr lib/* /opt/perl-catamount/lib/perl5/5.15.4 +# cp -pr lib/* /opt/perl-catamount/lib/perl5/5.15.5 # cp miniperl perl run.sh cc.sh /opt/perl-catamount/lib # # With the headers and the libperl.a you can embed Perl to your Catamount diff --git a/hints/gnu.sh b/hints/gnu.sh index f07564269d..42333710f2 100644 --- a/hints/gnu.sh +++ b/hints/gnu.sh @@ -33,6 +33,19 @@ ccdlflags='-Wl,-E' # Debian bug #258618 ccflags="-D_GNU_SOURCE $ccflags" +cat > UU/uselargefiles.cbu <<'EOCBU' +# This script UU/uselargefiles.cbu will get 'called-back' by Configure +# after it has prompted the user for whether to use large files. +case "$uselargefiles" in +''|$define|true|[yY]*) +# Keep this in the left margin. +ccflags_uselargefiles="-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64" + + ccflags="$ccflags $ccflags_uselargefiles" + ;; +esac +EOCBU + # The following routines are only available as stubs in GNU libc. # XXX remove this once metaconf detects the GNU libc stubs. d_msgctl='undef' diff --git a/hints/hpux.sh b/hints/hpux.sh index be6c1fddb8..653b5a1865 100644 --- a/hints/hpux.sh +++ b/hints/hpux.sh @@ -157,7 +157,7 @@ case `$cc -v 2>&1`"" in done [ -z "$cc_found" ] && cc_found=`which cc` what $cc_found >&4 - ccversion=`what $cc_found | awk '/Compiler/{print $2}/Itanium/{print $6,$7}/for Integrity/{print $6}'` + ccversion=`what $cc_found | awk '/Compiler/{print $2}/Itanium/{print $6,$7}/for Integrity/{print $6,$7}'` case "$ccflags" in "-Ae "*) ;; *) ccflags="-Ae $cc_cppflags" @@ -414,7 +414,7 @@ case "$ccisgcc" in fi ;; - *) # HP's compiler cannot combine -g and -O + *) case "$optimize" in "") optimize="+O2 +Onolimit" ;; *O[3456789]*) optimize=`echo "$optimize" | sed -e 's/O[3-9]/O2/'` ;; @@ -436,6 +436,19 @@ case "$ccisgcc" in # maint (5.8.8+) and blead (5.9.3+) # -O1/+O1 passed all tests (m)'05 [ 10 Jan 2005 ] optimize="$opt" ;; + B3910B*A.06.15) + # > cc --version + # cc: HP C/aC++ B3910B A.06.15 [May 16 2007] + # Has optimizing problems with +O2 for blead (5.15.5), + # see https://rt.perl.org:443/rt3/Ticket/Display.html?id=103668. + # + # +O2 +Onolimit +Onoprocelim +Ostore_ordering \ + # +Onolibcalls=strcmp + # passes all tests (with/without -DDEBUGGING) [Nov 17 2011] + case "$optimize" in + *O2*) optimize="$optimize +Onoprocelim +Ostore_ordering +Onolibcalls=strcmp" ;; + esac + ;; *) doop_cflags="optimize=\"$opt\"" op_cflags="optimize=\"$opt\"" ;; esac diff --git a/hints/solaris_2.sh b/hints/solaris_2.sh index 508cd3ebc2..0574ce75c1 100644 --- a/hints/solaris_2.sh +++ b/hints/solaris_2.sh @@ -311,11 +311,15 @@ else cat > try.c << 'EOM' #include <stdio.h> int main() { -#ifdef __SUNPRO_C +#if defined(__SUNPRO_C) printf("workshop\n"); #else +#if defined(__SUNPRO_CC) + printf("workshop CC\n"); +#else printf("\n"); #endif +#endif return(0); } EOM @@ -323,10 +327,32 @@ EOM if $tryworkshopcc >/dev/null 2>&1; then cc_name=`./try` if test "$cc_name" = "workshop"; then - ccversion="`${cc:-cc} -V 2>&1|sed -n -e '1s/^cc: //p'`" + ccversion="`${cc:-cc} -V 2>&1|sed -n -e '1s/^cc: //ip'`" + if test ! "$use64bitall_done"; then + loclibpth="/usr/lib /usr/ccs/lib `$getworkshoplibs` $loclibpth" + fi + # Sun cc doesn't support gcc attributes + d_attribute_format='undef' + d_attribute_malloc='undef' + d_attribute_nonnull='undef' + d_attribute_noreturn='undef' + d_attribute_pure='undef' + d_attribute_unused='undef' + d_attribute_warn_unused_result='undef' + fi + if test "$cc_name" = "workshop CC"; then + ccversion="`${cc:-CC} -V 2>&1|sed -n -e '1s/^CC: //ip'`" if test ! "$use64bitall_done"; then loclibpth="/usr/lib /usr/ccs/lib `$getworkshoplibs` $loclibpth" fi + # Sun CC doesn't support gcc attributes + d_attribute_format='undef' + d_attribute_malloc='undef' + d_attribute_nonnull='undef' + d_attribute_noreturn='undef' + d_attribute_pure='undef' + d_attribute_unused='undef' + d_attribute_warn_unused_result='undef' fi fi diff --git a/hints/vos.sh b/hints/vos.sh index f08ba0b2c5..0a3a677edd 100644 --- a/hints/vos.sh +++ b/hints/vos.sh @@ -1,54 +1,48 @@ -# $Id: vos.sh,v 1.0 2001-12-11 09:30:00-05 Green Exp $ - -# This is a hints file for Stratus VOS, using the POSIX environment -# in VOS 14.4.0 and higher. +# This is a hints file for Stratus OpenVOS, using the POSIX environment +# found in VOS 17.1.0 and higher. # -# VOS POSIX is based on POSIX.1-1996 and contains elements of +# OpenVOS POSIX is based on POSIX.1-1996 and contains elements of # POSIX.1-2001. It ships with gcc as the standard compiler. # # Paul Green (Paul.Green@stratus.com) # C compiler and default options. cc=gcc -ccflags="-D_SVID_SOURCE -D_POSIX_C_SOURCE=200112L -D_VOS_EXTENDED_NAMES" +ccflags="-D_XOPEN_SOURCE=700 -D_VOS_EXTENDED_NAMES" +ccdlflags="-Wl,-rpath,$shrpdir" +cccdlflags="-fPIC" # Make command. make="/system/gnu_library/bin/gmake" # indented to not put it into config.sh _make="/system/gnu_library/bin/gmake" -# Architecture name -if test `uname -m` = i786; then - archname="i786" -else - archname="hppa1.1" +# Check for the minimum acceptable release of OpenVOS (17.1.0). +if test `uname -r | sed -e 's/OpenVOS Release //' -e 's/VOS Release //'` \< "17.1.0"; then +cat >&4 <<EOF +*** +*** This version of Perl 5 must be built on OpenVOS Release 17.1.0 or later. +*** +EOF +exit 1 fi +# Always X86 +archname=`uname -m` + # Executable suffix. # No, this is not a typo. The ".pm" really is the native # executable suffix in VOS. Talk about cosmic resonance. _exe=".pm" # Object library paths. -loclibpth="/system/stcp/object_library" -loclibpth="$loclibpth /system/stcp/object_library/common" -loclibpth="$loclibpth /system/stcp/object_library/net" -loclibpth="$loclibpth /system/stcp/object_library/socket" -loclibpth="$loclibpth /system/posix_object_library/sysv" loclibpth="$loclibpth /system/posix_object_library" loclibpth="$loclibpth /system/c_object_library" loclibpth="$loclibpth /system/object_library" glibpth="$loclibpth" # Include library paths -# Pick up vos/syslog.h on Continuum Platform. -if test "$archname" = "i786"; then - locincpth="" -else - locincpth=`pwd`/vos -fi -locincpth="$locincpth /system/stcp/include_library" -locincpth="$locincpth /system/include_library/sysv" +locincpth="" usrinc="/system/include_library" # Where to install perl5. @@ -56,11 +50,15 @@ prefix=/system/ported/perl5 # Linker is gcc. ld="gcc" +lddlflags="-shared" + +# Shared libraries! +so="so" -# No shared libraries. -so="none" +# Build libperl.so +useshrplib="true" -# Don't use nm. +# Don't use nm. The VOS copy of libc.a is empty. usenm="n" # Make the default be no large file support. @@ -89,71 +87,5 @@ archobjs="vos.o" # Help gmake find vos.c test -h vos.c || ln -s vos/vos.c vos.c -# VOS returns a constant 1 for st_nlink when stat'ing a -# directory. Therefore, we must set this variable to stop -# File::Find using the link count to determine whether there are -# subdirectories to be searched. -dont_use_nlink=define - # Tell Configure where to find the hosts file. hostcat="cat /system/stcp/hosts" - -# VOS does not have socketpair() but we supply one in vos.c -d_sockpair="define" - -# Once we have the compiler flags defined, Configure will -# execute the following call-back script. See hints/README.hints -# for details. -cat > UU/cc.cbu <<'EOCBU' -# This script UU/cc.cbu will get 'called-back' by Configure after it -# has prompted the user for the C compiler to use. - -# Compile and run the a test case to see if bug gnu_g++-220 is -# present. If so, lower the optimization level when compiling -# pp_pack.c. This works around a bug in unpack. - -echo " " -echo "Testing whether bug gnu_g++-220 is fixed in your compiler..." - -# Try compiling the test case. -if $cc -o t001 -O $ccflags $ldflags ../hints/t001.c; then - gccbug=`$run ./t001` - if [ "X$gccversion" = "X" ]; then - # Done too late in Configure if hinted - gccversion=`$cc -dumpversion` - fi - case "$gccbug" in - *fails*) cat >&4 <<EOF -This C compiler ($gccversion) is known to have optimizer -problems when compiling pp_pack.c. The Stratus bug number -for this problem is gnu_g++-220. - -Disabling optimization for pp_pack.c. -EOF - case "$pp_pack_cflags" in - '') pp_pack_cflags='optimize=' - echo "pp_pack_cflags='optimize=\"\"'" >> config.sh ;; - *) echo "You specified pp_pack_cflags yourself, so we'll go with your value." >&4 ;; - esac - ;; - *) echo "Your compiler is ok." >&4 - ;; - esac -else - echo " " - echo "*** WHOA THERE!!! ***" >&4 - echo " Your C compiler \"$cc\" doesn't seem to be working!" >&4 - case "$knowitall" in - '') - echo " You'd better start hunting for one and let me know about it." >&4 - exit 1 - ;; - esac -fi - -$rm -f t001$_o t001$_exe t001.kp -EOCBU - - -# VOS 14.7 has minimal support for dynamic linking. Too minimal for perl. -usedl="undef" @@ -2383,7 +2383,7 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) } #endif - /* hv_iterint now ensures this. */ + /* hv_iterinit now ensures this. */ assert (HvARRAY(hv)); /* At start of hash, entry is NULL. */ @@ -2426,6 +2426,7 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) or if we run through it and find only placeholders. */ } } + else iter->xhv_riter = -1; if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */ HvLAZYDEL_off(hv); diff --git a/intrpvar.h b/intrpvar.h index 66daab2916..0b32657960 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -227,8 +227,6 @@ When you replace this variable, it is considered a good practice to store the po PERLVARI(I, opfreehook, Perl_ophook_t, 0) /* op_free() hook */ -/* Space for U32 */ -PERLVARI(I, reginterp_cnt,I32, 0) /* Whether "Regexp" was interpolated. */ PERLVARI(I, watchaddr, char **, 0) PERLVAR(I, watchok, char *) @@ -297,9 +295,9 @@ PERLVAR(I, dowarn, U8) PERLVAR(I, sawampersand, bool) /* must save all match strings */ PERLVAR(I, unsafe, bool) PERLVAR(I, exit_flags, U8) /* was exit() unexpected, etc. */ -PERLVAR(I, srand_called, bool) -PERLVARI(I, in_load_module, bool, FALSE) /* to prevent recursions in PerlIO_find_layer */ - /* Space for a U16 */ + +PERLVARI(I, reginterp_cnt, I32, 0) /* Whether "Regexp" was interpolated. */ + PERLVAR(I, inplace, char *) PERLVAR(I, e_script, SV *) @@ -596,17 +594,16 @@ PERLVAR(I, utf8_tofold, SV *) PERLVAR(I, last_swash_hv, HV *) PERLVAR(I, last_swash_tmps, U8 *) PERLVAR(I, last_swash_slen, STRLEN) -PERLVARA(I, last_swash_key,10, U8) -PERLVAR(I, last_swash_klen, U8) /* Only needs to store 0-10 */ +PERLVARA(I, last_swash_key,12, U8) +PERLVAR(I, last_swash_klen, U8) /* Only needs to store 0-12 */ #ifdef FCRYPT PERLVARI(I, cryptseen, bool, FALSE) /* has fast crypt() been initialized? */ #endif PERLVAR(I, pad_reset_pending, bool) /* reset pad on next attempted alloc */ - -PERLVARI(I, glob_index, int, 0) - +PERLVAR(I, srand_called, bool) +PERLVARI(I, in_load_module, bool, FALSE) /* to prevent recursions in PerlIO_find_layer */ PERLVAR(I, parser, yy_parser *) /* current parser state */ @@ -727,6 +724,7 @@ PERLVAR(I, custom_ops, HV *) /* custom op registrations */ /* Hook for File::Glob */ PERLVARI(I, globhook, globhook_t, NULL) +PERLVARI(I, glob_index, int, 0) PERLVAR(I, reentrant_retint, int) /* Integer return value from reentrant functions */ /* The last unconditional member of the interpreter structure when 5.10.0 was @@ -736,8 +734,8 @@ PERLVAR(I, reentrant_retint, int) /* Integer return value from reentrant functio #define PERL_LAST_5_16_0_INTERP_MEMBER Ireentrant_retint #ifdef PERL_IMPLICIT_CONTEXT -PERLVARI(I, my_cxt_size, int, 0) /* size of PL_my_cxt_list */ PERLVARI(I, my_cxt_list, void **, NULL) /* per-module array of MY_CXT pointers */ +PERLVARI(I, my_cxt_size, int, 0) /* size of PL_my_cxt_list */ # ifdef PERL_GLOBAL_STRUCT_PRIVATE PERLVARI(I, my_cxt_keys, const char **, NULL) /* per-module array of pointers to MY_CXT_KEY constants */ # endif diff --git a/keywords.c b/keywords.c index b9ef465967..921d55077e 100644 --- a/keywords.c +++ b/keywords.c @@ -2740,7 +2740,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords) goto unknown; } - case 9: /* 9 tokens of length 9 */ + case 9: /* 10 tokens of length 9 */ switch (name[0]) { case 'U': @@ -2759,19 +2759,39 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords) goto unknown; case 'e': - if (name[1] == 'n' && - name[2] == 'd' && - name[3] == 'n' && - name[4] == 'e' && - name[5] == 't' && - name[6] == 'e' && - name[7] == 'n' && - name[8] == 't') - { /* endnetent */ - return -KEY_endnetent; - } + switch (name[1]) + { + case 'n': + if (name[2] == 'd' && + name[3] == 'n' && + name[4] == 'e' && + name[5] == 't' && + name[6] == 'e' && + name[7] == 'n' && + name[8] == 't') + { /* endnetent */ + return -KEY_endnetent; + } - goto unknown; + goto unknown; + + case 'v': + if (name[2] == 'a' && + name[3] == 'l' && + name[4] == 'b' && + name[5] == 'y' && + name[6] == 't' && + name[7] == 'e' && + name[8] == 's') + { /* evalbytes */ + return (all_keywords || FEATURE_IS_ENABLED("evalbytes") ? -KEY_evalbytes : 0); + } + + goto unknown; + + default: + goto unknown; + } case 'g': if (name[1] == 'e' && @@ -3399,5 +3419,5 @@ unknown: } /* Generated from: - * 6563b55da87af894b79ef9d777217633eee6c7b5f352ff4c17317f562247f5fc regen/keywords.pl + * 370a83cf6eed30dfe61a9dc611013e1014e0c5f537dc4f3872576ba8b9ef7572 regen/keywords.pl * ex: set ro: */ diff --git a/keywords.h b/keywords.h index 83ad0efb56..c33f6687c1 100644 --- a/keywords.h +++ b/keywords.h @@ -71,202 +71,203 @@ #define KEY_eof 55 #define KEY_eq 56 #define KEY_eval 57 -#define KEY_exec 58 -#define KEY_exists 59 -#define KEY_exit 60 -#define KEY_exp 61 -#define KEY_fcntl 62 -#define KEY_fileno 63 -#define KEY_flock 64 -#define KEY_for 65 -#define KEY_foreach 66 -#define KEY_fork 67 -#define KEY_format 68 -#define KEY_formline 69 -#define KEY_ge 70 -#define KEY_getc 71 -#define KEY_getgrent 72 -#define KEY_getgrgid 73 -#define KEY_getgrnam 74 -#define KEY_gethostbyaddr 75 -#define KEY_gethostbyname 76 -#define KEY_gethostent 77 -#define KEY_getlogin 78 -#define KEY_getnetbyaddr 79 -#define KEY_getnetbyname 80 -#define KEY_getnetent 81 -#define KEY_getpeername 82 -#define KEY_getpgrp 83 -#define KEY_getppid 84 -#define KEY_getpriority 85 -#define KEY_getprotobyname 86 -#define KEY_getprotobynumber 87 -#define KEY_getprotoent 88 -#define KEY_getpwent 89 -#define KEY_getpwnam 90 -#define KEY_getpwuid 91 -#define KEY_getservbyname 92 -#define KEY_getservbyport 93 -#define KEY_getservent 94 -#define KEY_getsockname 95 -#define KEY_getsockopt 96 -#define KEY_given 97 -#define KEY_glob 98 -#define KEY_gmtime 99 -#define KEY_goto 100 -#define KEY_grep 101 -#define KEY_gt 102 -#define KEY_hex 103 -#define KEY_if 104 -#define KEY_index 105 -#define KEY_int 106 -#define KEY_ioctl 107 -#define KEY_join 108 -#define KEY_keys 109 -#define KEY_kill 110 -#define KEY_last 111 -#define KEY_lc 112 -#define KEY_lcfirst 113 -#define KEY_le 114 -#define KEY_length 115 -#define KEY_link 116 -#define KEY_listen 117 -#define KEY_local 118 -#define KEY_localtime 119 -#define KEY_lock 120 -#define KEY_log 121 -#define KEY_lstat 122 -#define KEY_lt 123 -#define KEY_m 124 -#define KEY_map 125 -#define KEY_mkdir 126 -#define KEY_msgctl 127 -#define KEY_msgget 128 -#define KEY_msgrcv 129 -#define KEY_msgsnd 130 -#define KEY_my 131 -#define KEY_ne 132 -#define KEY_next 133 -#define KEY_no 134 -#define KEY_not 135 -#define KEY_oct 136 -#define KEY_open 137 -#define KEY_opendir 138 -#define KEY_or 139 -#define KEY_ord 140 -#define KEY_our 141 -#define KEY_pack 142 -#define KEY_package 143 -#define KEY_pipe 144 -#define KEY_pop 145 -#define KEY_pos 146 -#define KEY_print 147 -#define KEY_printf 148 -#define KEY_prototype 149 -#define KEY_push 150 -#define KEY_q 151 -#define KEY_qq 152 -#define KEY_qr 153 -#define KEY_quotemeta 154 -#define KEY_qw 155 -#define KEY_qx 156 -#define KEY_rand 157 -#define KEY_read 158 -#define KEY_readdir 159 -#define KEY_readline 160 -#define KEY_readlink 161 -#define KEY_readpipe 162 -#define KEY_recv 163 -#define KEY_redo 164 -#define KEY_ref 165 -#define KEY_rename 166 -#define KEY_require 167 -#define KEY_reset 168 -#define KEY_return 169 -#define KEY_reverse 170 -#define KEY_rewinddir 171 -#define KEY_rindex 172 -#define KEY_rmdir 173 -#define KEY_s 174 -#define KEY_say 175 -#define KEY_scalar 176 -#define KEY_seek 177 -#define KEY_seekdir 178 -#define KEY_select 179 -#define KEY_semctl 180 -#define KEY_semget 181 -#define KEY_semop 182 -#define KEY_send 183 -#define KEY_setgrent 184 -#define KEY_sethostent 185 -#define KEY_setnetent 186 -#define KEY_setpgrp 187 -#define KEY_setpriority 188 -#define KEY_setprotoent 189 -#define KEY_setpwent 190 -#define KEY_setservent 191 -#define KEY_setsockopt 192 -#define KEY_shift 193 -#define KEY_shmctl 194 -#define KEY_shmget 195 -#define KEY_shmread 196 -#define KEY_shmwrite 197 -#define KEY_shutdown 198 -#define KEY_sin 199 -#define KEY_sleep 200 -#define KEY_socket 201 -#define KEY_socketpair 202 -#define KEY_sort 203 -#define KEY_splice 204 -#define KEY_split 205 -#define KEY_sprintf 206 -#define KEY_sqrt 207 -#define KEY_srand 208 -#define KEY_stat 209 -#define KEY_state 210 -#define KEY_study 211 -#define KEY_sub 212 -#define KEY_substr 213 -#define KEY_symlink 214 -#define KEY_syscall 215 -#define KEY_sysopen 216 -#define KEY_sysread 217 -#define KEY_sysseek 218 -#define KEY_system 219 -#define KEY_syswrite 220 -#define KEY_tell 221 -#define KEY_telldir 222 -#define KEY_tie 223 -#define KEY_tied 224 -#define KEY_time 225 -#define KEY_times 226 -#define KEY_tr 227 -#define KEY_truncate 228 -#define KEY_uc 229 -#define KEY_ucfirst 230 -#define KEY_umask 231 -#define KEY_undef 232 -#define KEY_unless 233 -#define KEY_unlink 234 -#define KEY_unpack 235 -#define KEY_unshift 236 -#define KEY_untie 237 -#define KEY_until 238 -#define KEY_use 239 -#define KEY_utime 240 -#define KEY_values 241 -#define KEY_vec 242 -#define KEY_wait 243 -#define KEY_waitpid 244 -#define KEY_wantarray 245 -#define KEY_warn 246 -#define KEY_when 247 -#define KEY_while 248 -#define KEY_write 249 -#define KEY_x 250 -#define KEY_xor 251 -#define KEY_y 252 +#define KEY_evalbytes 58 +#define KEY_exec 59 +#define KEY_exists 60 +#define KEY_exit 61 +#define KEY_exp 62 +#define KEY_fcntl 63 +#define KEY_fileno 64 +#define KEY_flock 65 +#define KEY_for 66 +#define KEY_foreach 67 +#define KEY_fork 68 +#define KEY_format 69 +#define KEY_formline 70 +#define KEY_ge 71 +#define KEY_getc 72 +#define KEY_getgrent 73 +#define KEY_getgrgid 74 +#define KEY_getgrnam 75 +#define KEY_gethostbyaddr 76 +#define KEY_gethostbyname 77 +#define KEY_gethostent 78 +#define KEY_getlogin 79 +#define KEY_getnetbyaddr 80 +#define KEY_getnetbyname 81 +#define KEY_getnetent 82 +#define KEY_getpeername 83 +#define KEY_getpgrp 84 +#define KEY_getppid 85 +#define KEY_getpriority 86 +#define KEY_getprotobyname 87 +#define KEY_getprotobynumber 88 +#define KEY_getprotoent 89 +#define KEY_getpwent 90 +#define KEY_getpwnam 91 +#define KEY_getpwuid 92 +#define KEY_getservbyname 93 +#define KEY_getservbyport 94 +#define KEY_getservent 95 +#define KEY_getsockname 96 +#define KEY_getsockopt 97 +#define KEY_given 98 +#define KEY_glob 99 +#define KEY_gmtime 100 +#define KEY_goto 101 +#define KEY_grep 102 +#define KEY_gt 103 +#define KEY_hex 104 +#define KEY_if 105 +#define KEY_index 106 +#define KEY_int 107 +#define KEY_ioctl 108 +#define KEY_join 109 +#define KEY_keys 110 +#define KEY_kill 111 +#define KEY_last 112 +#define KEY_lc 113 +#define KEY_lcfirst 114 +#define KEY_le 115 +#define KEY_length 116 +#define KEY_link 117 +#define KEY_listen 118 +#define KEY_local 119 +#define KEY_localtime 120 +#define KEY_lock 121 +#define KEY_log 122 +#define KEY_lstat 123 +#define KEY_lt 124 +#define KEY_m 125 +#define KEY_map 126 +#define KEY_mkdir 127 +#define KEY_msgctl 128 +#define KEY_msgget 129 +#define KEY_msgrcv 130 +#define KEY_msgsnd 131 +#define KEY_my 132 +#define KEY_ne 133 +#define KEY_next 134 +#define KEY_no 135 +#define KEY_not 136 +#define KEY_oct 137 +#define KEY_open 138 +#define KEY_opendir 139 +#define KEY_or 140 +#define KEY_ord 141 +#define KEY_our 142 +#define KEY_pack 143 +#define KEY_package 144 +#define KEY_pipe 145 +#define KEY_pop 146 +#define KEY_pos 147 +#define KEY_print 148 +#define KEY_printf 149 +#define KEY_prototype 150 +#define KEY_push 151 +#define KEY_q 152 +#define KEY_qq 153 +#define KEY_qr 154 +#define KEY_quotemeta 155 +#define KEY_qw 156 +#define KEY_qx 157 +#define KEY_rand 158 +#define KEY_read 159 +#define KEY_readdir 160 +#define KEY_readline 161 +#define KEY_readlink 162 +#define KEY_readpipe 163 +#define KEY_recv 164 +#define KEY_redo 165 +#define KEY_ref 166 +#define KEY_rename 167 +#define KEY_require 168 +#define KEY_reset 169 +#define KEY_return 170 +#define KEY_reverse 171 +#define KEY_rewinddir 172 +#define KEY_rindex 173 +#define KEY_rmdir 174 +#define KEY_s 175 +#define KEY_say 176 +#define KEY_scalar 177 +#define KEY_seek 178 +#define KEY_seekdir 179 +#define KEY_select 180 +#define KEY_semctl 181 +#define KEY_semget 182 +#define KEY_semop 183 +#define KEY_send 184 +#define KEY_setgrent 185 +#define KEY_sethostent 186 +#define KEY_setnetent 187 +#define KEY_setpgrp 188 +#define KEY_setpriority 189 +#define KEY_setprotoent 190 +#define KEY_setpwent 191 +#define KEY_setservent 192 +#define KEY_setsockopt 193 +#define KEY_shift 194 +#define KEY_shmctl 195 +#define KEY_shmget 196 +#define KEY_shmread 197 +#define KEY_shmwrite 198 +#define KEY_shutdown 199 +#define KEY_sin 200 +#define KEY_sleep 201 +#define KEY_socket 202 +#define KEY_socketpair 203 +#define KEY_sort 204 +#define KEY_splice 205 +#define KEY_split 206 +#define KEY_sprintf 207 +#define KEY_sqrt 208 +#define KEY_srand 209 +#define KEY_stat 210 +#define KEY_state 211 +#define KEY_study 212 +#define KEY_sub 213 +#define KEY_substr 214 +#define KEY_symlink 215 +#define KEY_syscall 216 +#define KEY_sysopen 217 +#define KEY_sysread 218 +#define KEY_sysseek 219 +#define KEY_system 220 +#define KEY_syswrite 221 +#define KEY_tell 222 +#define KEY_telldir 223 +#define KEY_tie 224 +#define KEY_tied 225 +#define KEY_time 226 +#define KEY_times 227 +#define KEY_tr 228 +#define KEY_truncate 229 +#define KEY_uc 230 +#define KEY_ucfirst 231 +#define KEY_umask 232 +#define KEY_undef 233 +#define KEY_unless 234 +#define KEY_unlink 235 +#define KEY_unpack 236 +#define KEY_unshift 237 +#define KEY_untie 238 +#define KEY_until 239 +#define KEY_use 240 +#define KEY_utime 241 +#define KEY_values 242 +#define KEY_vec 243 +#define KEY_wait 244 +#define KEY_waitpid 245 +#define KEY_wantarray 246 +#define KEY_warn 247 +#define KEY_when 248 +#define KEY_while 249 +#define KEY_write 250 +#define KEY_x 251 +#define KEY_xor 252 +#define KEY_y 253 /* Generated from: - * 6563b55da87af894b79ef9d777217633eee6c7b5f352ff4c17317f562247f5fc regen/keywords.pl + * 370a83cf6eed30dfe61a9dc611013e1014e0c5f537dc4f3872576ba8b9ef7572 regen/keywords.pl * ex: set ro: */ diff --git a/lib/.gitignore b/lib/.gitignore index 2d0791c744..f74af56874 100644 --- a/lib/.gitignore +++ b/lib/.gitignore @@ -409,6 +409,8 @@ /unicore/Heavy.pl /unicore/mktables.lst /unicore/Name.pl +/unicore/Name.pm unicore/TestProp.pl /unicore/To +/unicore/UCD.pl /unicore/lib diff --git a/lib/File/Compare.t b/lib/File/Compare.t index 7a31af68f1..1b7d038301 100644 --- a/lib/File/Compare.t +++ b/lib/File/Compare.t @@ -112,6 +112,7 @@ if (@donetests == 3) { print "not " unless $donetests[2] == 0; print "ok 13 # "; print "TODO" if $^O eq "cygwin"; # spaces after filename silently trunc'd + print "TODO" if $^O eq "vos"; # spaces after filename silently trunc'd print " file/fileCR [$donetests[2]]\n"; } else { diff --git a/lib/File/Copy.t b/lib/File/Copy.t index 63c99d1261..ffd3d59db7 100644 --- a/lib/File/Copy.t +++ b/lib/File/Copy.t @@ -265,6 +265,8 @@ SKIP: { if $^O eq 'VMS'; skip "Copy doesn't set file permissions correctly on Win32.", $skips if $^O eq "MSWin32"; + skip "Copy maps POSIX permissions to VOS permissions.", $skips + if $^O eq "vos"; # Just a sub to get better failure messages. sub __ ($) { diff --git a/lib/Internals.t b/lib/Internals.t index b0d5bda352..d3fce9c1c8 100644 --- a/lib/Internals.t +++ b/lib/Internals.t @@ -7,7 +7,7 @@ BEGIN { } } -use Test::More tests => 74; +use Test::More tests => 78; my $ro_err = qr/^Modification of a read-only value attempted/; @@ -163,3 +163,14 @@ is( Internals::SvREFCNT($foo[2]), 1 ); is( Internals::SvREFCNT(%foo), 1 ); is( Internals::SvREFCNT($foo{foo}), 1 ); +is( Internals::SvREFCNT($foo, 2), 2, "update ref count"); +is( Internals::SvREFCNT($foo), 2, "check we got the stored value"); + +# the reference count is a U16, but was returned as an IV resulting in +# different values between 32 and 64-bit builds +my $big_count = 0xFFFFFFF0; # -16 32-bit signed +is( Internals::SvREFCNT($foo, $big_count), $big_count, + "set reference count unsigned"); +is( Internals::SvREFCNT($foo), $big_count, "reference count unsigned"); + +Internals::SvREFCNT($foo, 1 ); diff --git a/lib/UNIVERSAL.pm b/lib/UNIVERSAL.pm index e649ef2c2f..4e4122362a 100644 --- a/lib/UNIVERSAL.pm +++ b/lib/UNIVERSAL.pm @@ -1,6 +1,6 @@ package UNIVERSAL; -our $VERSION = '1.09'; +our $VERSION = '1.10'; # UNIVERSAL should not contain any extra subs/methods beyond those # that it exists to define. The use of Exporter below is a historical @@ -103,7 +103,7 @@ check the invocand with C<blessed> from L<Scalar::Util> first: use Scalar::Util 'blessed'; - if ( blessed( $obj ) && $obj->isa("Some::Class") { + if ( blessed( $obj ) && $obj->isa("Some::Class") ) { ... } diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 31cd4fcc42..e1a6cdb1ba 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -6,7 +6,7 @@ no warnings 'surrogate'; # surrogates can be inputs to this use charnames (); use Unicode::Normalize qw(getCombinClass NFD); -our $VERSION = '0.36'; +our $VERSION = '0.37'; use Storable qw(dclone); @@ -23,6 +23,11 @@ our @EXPORT_OK = qw(charinfo casefold casespec namedseq num + prop_aliases + prop_value_aliases + prop_invlist + prop_invmap + MAX_CP ); use Carp; @@ -62,6 +67,19 @@ Unicode::UCD - Unicode character database my $categories = general_categories(); my $types = bidi_types(); + use Unicode::UCD 'prop_aliases'; + my @space_names = prop_aliases("space"); + + use Unicode::UCD 'prop_value_aliases'; + my @gc_punct_names = prop_value_aliases("Gc", "Punct"); + + use Unicode::UCD 'prop_invlist'; + my @puncts = prop_invlist("gc=punctuation"); + + use Unicode::UCD 'prop_invmap'; + my ($list_ref, $map_ref, $format, $missing) + = prop_invmap("General Category"); + use Unicode::UCD 'compexcl'; my $compexcl = compexcl($codepoint); @@ -125,7 +143,7 @@ standard. If the L</code point argument> is not assigned in the standard (i.e., has the general category C<Cn> meaning C<Unassigned>) or is a non-character (meaning it is guaranteed to never be assigned in the standard), -B<undef> is returned. +C<undef> is returned. Fields that aren't applicable to the particular code point argument exist in the returned hash, and are empty. @@ -154,6 +172,9 @@ C<E<lt>controlE<gt>>. The short name of the general category of I<code>. This will match one of the keys in the hash returned by L</general_categories()>. +The L</prop_value_aliases()> function can be used to get all the synonyms +of the category name. + =item B<combining> the combining class number for I<code> used in the Canonical Ordering Algorithm. @@ -161,15 +182,21 @@ For Unicode 5.1, this is described in Section 3.11 C<Canonical Ordering Behavior available at L<http://www.unicode.org/versions/Unicode5.1.0/> +The L</prop_value_aliases()> function can be used to get all the synonyms +of the combining class number. + =item B<bidi> bidirectional type of I<code>. This will match one of the keys in the hash returned by L</bidi_types()>. +The L</prop_value_aliases()> function can be used to get all the synonyms +of the bidi type name. + =item B<decomposition> is empty if I<code> has no decomposition; or is one or more codes -(separated by spaces) that taken in order represent a decomposition for +(separated by spaces) that, taken in order, represent a decomposition for I<code>. Each has at least four hexdigits. The codes may be preceded by a word enclosed in angle brackets then a space, like C<E<lt>compatE<gt> >, giving the type of decomposition @@ -233,13 +260,13 @@ mappings.) =item B<block> -block I<code> belongs to (used in C<\p{Blk=...}>). +the block I<code> belongs to (used in C<\p{Blk=...}>). See L</Blocks versus Scripts>. =item B<script> -script I<code> belongs to. +the script I<code> belongs to. See L</Blocks versus Scripts>. =back @@ -508,22 +535,24 @@ sub charinrange { my $range = charblock('Armenian'); With a L</code point argument> charblock() returns the I<block> the code point -belongs to, e.g. C<Basic Latin>. +belongs to, e.g. C<Basic Latin>. The old-style block name is returned (see +L</Old-style versus new-style block names>). If the code point is unassigned, this returns the block it would belong to if -it were assigned (which it may in future versions of the Unicode Standard). +it were assigned. See also L</Blocks versus Scripts>. If supplied with an argument that can't be a code point, charblock() tries to -do the opposite and interpret the argument as a code point block. The return -value is a I<range>: an anonymous list that consists of another anonymous list -whose first element is the first code point in the block, and whose second -(and final) element is the final code point in the block. (The extra layer of -indirection is so that the same program logic can be used to handle both this -return, and the return from L</charscript()> which can have multiple ranges.) -You can test whether a code point is in a range using the L</charinrange()> -function. -If the argument is not a known code point block, B<undef> is returned. +do the opposite and interpret the argument as an old-style block name. The +return value +is a I<range set> with one range: an anonymous list with a single element that +consists of another anonymous list whose first element is the first code point +in the block, and whose second (and final) element is the final code point in +the block. (The extra list consisting of just one element is so that the same +program logic can be used to handle both this return, and the return from +L</charscript()> which can have multiple ranges.) You can test whether a code +point is in a range using the L</charinrange()> function. If the argument is +not a known block, C<undef> is returned. =cut @@ -537,6 +566,7 @@ sub _charblocks { unless (@BLOCKS) { if (openunicode(\$BLOCKSFH, "Blocks.txt")) { local $_; + local $/ = "\n"; while (<$BLOCKSFH>) { if (/^([0-9A-F]+)\.\.([0-9A-F]+);\s+(.+)/) { my ($lo, $hi) = (hex($1), hex($2)); @@ -579,14 +609,14 @@ sub charblock { With a L</code point argument> charscript() returns the I<script> the code point belongs to, e.g. C<Latin>, C<Greek>, C<Han>. -If the code point is unassigned, it returns B<undef> +If the code point is unassigned, it returns C<"Unknown">. If supplied with an argument that can't be a code point, charscript() tries -to do the opposite and interpret the argument as a code point script. The -return value is a I<range>: an anonymous list of lists that contain +to do the opposite and interpret the argument as a script name. The +return value is a I<range set>: an anonymous list of lists that contain I<start-of-range>, I<end-of-range> code point pairs. You can test whether a -code point is in a range using the L</charinrange()> function. If the -argument is not a known code point script, B<undef> is returned. +code point is in a range set using the L</charinrange()> function. If the +argument is not a known script, C<undef> is returned. See also L</Blocks versus Scripts>. @@ -630,6 +660,12 @@ sub charscript { charblocks() returns a reference to a hash with the known block names as the keys, and the code point ranges (see L</charblock()>) as the values. +The names are in the old-style (see L</Old-style versus new-style block +names>). + +L<prop_invmap("block")|/prop_invmap()> can be used to get this same data in a +different type of data structure. + See also L</Blocks versus Scripts>. =cut @@ -649,6 +685,9 @@ charscripts() returns a reference to a hash with the known script names as the keys, and the code point ranges (see L</charscript()>) as the values. +L<prop_invmap("script")|/prop_invmap()> can be used to get this same data in a +different type of data structure. + See also L</Blocks versus Scripts>. =cut @@ -732,6 +771,9 @@ from the long names to the short names. The general category is the one returned from L</charinfo()> under the C<category> key. +The L</prop_value_aliases()> function can be used to get all the synonyms of +the category name. + =cut my %BIDI_TYPES = @@ -774,6 +816,9 @@ the Unicode TR9 is recommended reading: L<http://www.unicode.org/reports/tr9/> (as of Unicode 5.0.0) +The L</prop_value_aliases()> function can be used to get all the synonyms of +the bidi type name. + =cut sub bidi_types { @@ -849,7 +894,7 @@ sub compexcl { This returns the (almost) locale-independent case folding of the character specified by the L</code point argument>. -If there is no case folding for that code point, B<undef> is returned. +If there is no case folding for that code point, C<undef> is returned. If there is a case folding for that code point, a reference to a hash with the following fields is returned: @@ -863,7 +908,7 @@ added if necessary to make it contain at least four hexdigits =item B<full> -one or more codes (separated by spaces) that taken in order give the +one or more codes (separated by spaces) that, taken in order, give the code points for the case folding for I<code>. Each has at least four hexdigits. @@ -887,8 +932,8 @@ I<code>. It is defined primarily for backwards compatibility. is C<C> (for C<common>) if the best possible fold is a single code point (I<simple> equals I<full> equals I<mapping>). It is C<S> if there are distinct folds, I<simple> and I<full> (I<mapping> equals I<simple>). And it is C<F> if -there only a I<full> fold (I<mapping> equals I<full>; I<simple> is empty). Note -that this +there is only a I<full> fold (I<mapping> equals I<full>; I<simple> is empty). +Note that this describes the contents of I<mapping>. It is defined primarily for backwards compatibility. @@ -898,14 +943,14 @@ dotless lowercase i: =over -=item B<*> +=item B<*> If you use this C<I> mapping -If you use this C<I> mapping, the result is case-insensitive, +the result is case-insensitive, but dotless and dotted I's are not distinguished -=item B<*> +=item B<*> If you exclude this C<I> mapping -If you exclude this C<I> mapping, the result is not fully case-insensitive, but +the result is not fully case-insensitive, but dotless and dotted I's are distinguished =back @@ -915,7 +960,7 @@ dotless and dotted I's are distinguished contains any special folding for Turkic languages. For versions of Unicode starting with 3.2, this field is empty unless I<code> has a different folding in Turkic languages, in which case it is one or more codes (separated by -spaces) that taken in order give the code points for the case folding for +spaces) that, taken in order, give the code points for the case folding for I<code> in those languages. Each code has at least four hexdigits. Note that this folding does not maintain canonical equivalence without @@ -954,6 +999,7 @@ sub _casefold { unless (%CASEFOLD) { if (openunicode(\$CASEFOLDFH, "CaseFolding.txt")) { local $_; + local $/ = "\n"; while (<$CASEFOLDFH>) { if (/^([0-9A-F]+); ([CFIST]); ([0-9A-F]+(?: [0-9A-F]+)*);/) { my $code = hex($1); @@ -1024,7 +1070,7 @@ Unicode case mappings as returned by L</charinfo()> never are). If there are no case mappings for the L</code point argument>, or if all three possible mappings (I<lower>, I<title> and I<upper>) result in single code -points and are locale independent and unconditional, B<undef> is returned +points and are locale independent and unconditional, C<undef> is returned (which means that the case mappings, if any, for the code point are those returned by L</charinfo()>). @@ -1043,26 +1089,26 @@ added if necessary to make it contain at least four hexdigits =item B<lower> -one or more codes (separated by spaces) that taken in order give the +one or more codes (separated by spaces) that, taken in order, give the code points for the lower case of I<code>. Each has at least four hexdigits. =item B<title> -one or more codes (separated by spaces) that taken in order give the +one or more codes (separated by spaces) that, taken in order, give the code points for the title case of I<code>. Each has at least four hexdigits. =item B<upper> -one or more codes (separated by spaces) that taken in order give the +one or more codes (separated by spaces) that, taken in order, give the code points for the upper case of I<code>. Each has at least four hexdigits. =item B<condition> the conditions for the mappings to be valid. -If B<undef>, the mappings are always valid. +If C<undef>, the mappings are always valid. When defined, this field is a list of conditions, all of which must be true for the mappings to be valid. The list consists of one or more @@ -1082,7 +1128,7 @@ These are for context-sensitive casing. =back The hash described above is returned for locale-independent casing, where -at least one of the mappings has length longer than one. If B<undef> is +at least one of the mappings has length longer than one. If C<undef> is returned, the code point may have mappings, but if so, all are length one, and are returned by L</charinfo()>. Note that when this function does return a value, it will be for the complete @@ -1116,6 +1162,7 @@ sub _casespec { unless (%CASESPEC) { if (openunicode(\$CASESPECFH, "SpecialCasing.txt")) { local $_; + local $/ = "\n"; while (<$CASESPECFH>) { if (/^([0-9A-F]+); ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; (\w+(?: \w+)*)?/) { my ($hexcode, $lower, $title, $upper, $condition) = @@ -1186,13 +1233,13 @@ sub casespec { my %namedseq = namedseq(); If used with a single argument in a scalar context, returns the string -consisting of the code points of the named sequence, or B<undef> if no +consisting of the code points of the named sequence, or C<undef> if no named sequence by that name exists. If used with a single argument in a list context, it returns the list of the ordinals of the code points. If used with no arguments in a list context, returns a hash with the names of the named sequences as the keys and the named sequences as strings as -the values. Otherwise, it returns B<undef> or an empty list depending +the values. Otherwise, it returns C<undef> or an empty list depending on the context. This function only operates on officially approved (not provisional) named @@ -1212,6 +1259,7 @@ sub _namedseq { unless (%NAMEDSEQ) { if (openunicode(\$NAMEDSEQFH, "Name.pl")) { local $_; + local $/ = "\n"; while (<$NAMEDSEQFH>) { if (/^ [0-9A-F]+ \ /x) { chomp; @@ -1382,7 +1430,1591 @@ sub num { return $value; } +=pod + +=head2 B<prop_aliases()> + + use Unicode::UCD 'prop_aliases'; + + my ($short_name, $full_name, @other_names) = prop_aliases("space"); + my $same_full_name = prop_aliases("Space"); # Scalar context + my ($same_short_name) = prop_aliases("Space"); # gets 0th element + print "The full name is $full_name\n"; + print "The short name is $short_name\n"; + print "The other aliases are: ", join(", ", @other_names), "\n"; + + prints: + The full name is White_Space + The short name is WSpace + The other aliases are: Space + +Most Unicode properties have several synonymous names. Typically, there is at +least a short name, convenient to type, and a long name that more fully +describes the property, and hence is more easily understood. + +If you know one name for a Unicode property, you can use C<prop_aliases> to find +either the long name (when called in scalar context), or a list of all of the +names, somewhat ordered so that the short name is in the 0th element, the long +name in the next element, and any other synonyms are in the remaining +elements, in no particular order. + +The long name is returned in a form nicely capitalized, suitable for printing. + +The input parameter name is loosely matched, which means that white space, +hyphens, and underscores are ignored (except for the trailing underscore in +the old_form grandfathered-in C<"L_">, which is better written as C<"LC">, and +both of which mean C<General_Category=Cased Letter>). + +If the name is unknown, C<undef> is returned (or an empty list in list +context). Note that Perl typically recognizes property names in regular +expressions with an optional C<"Is_>" (with or without the underscore) +prefixed to them, such as C<\p{isgc=punct}>. This function does not recognize +those in the input, returning C<undef>. Nor are they included in the output +as possible synonyms. + +C<prop_aliases> does know about the Perl extensions to Unicode properties, +such as C<Any> and C<XPosixAlpha>, and the single form equivalents to Unicode +properties such as C<XDigit>, C<Greek>, C<In_Greek>, and C<Is_Greek>. The +final example demonstrates that the C<"Is_"> prefix is recognized for these +extensions; it is needed to resolve ambiguities. For example, +C<prop_aliases('lc')> returns the list C<(lc, Lowercase_Mapping)>, but +C<prop_aliases('islc')> returns C<(Is_LC, Cased_Letter)>. This is +because C<islc> is a Perl extension which is short for +C<General_Category=Cased Letter>. The lists returned for the Perl extensions +will not include the C<"Is_"> prefix (whether or not the input had it) unless +needed to resolve ambiguities, as shown in the C<"islc"> example, where the +returned list had one element containing C<"Is_">, and the other without. + +It is also possible for the reverse to happen: C<prop_aliases('isc')> returns +the list C<(isc, ISO_Comment)>; whereas C<prop_aliases('c')> returns +C<(C, Other)> (the latter being a Perl extension meaning +C<General_Category=Other>. +L<perluniprops/Properties accessible through Unicode::UCD> lists the available +forms, including which ones are discouraged from use. + +Those discouraged forms are accepted as input to C<prop_aliases>, but are not +returned in the lists. C<prop_aliases('isL&')> and C<prop_aliases('isL_')>, +which are old synonyms for C<"Is_LC"> and should not be used in new code, are +examples of this. These both return C<(Is_LC, Cased_Letter)>. Thus this +function allows you to take a discourarged form, and find its acceptable +alternatives. The same goes with single-form Block property equivalences. +Only the forms that begin with C<"In_"> are not discouraged; if you pass +C<prop_aliases> a discouraged form, you will get back the equivalent ones that +begin with C<"In_">. It will otherwise look like a new-style block name (see. +L</Old-style versus new-style block names>). + +C<prop_aliases> does not know about any user-defined properties, and will +return C<undef> if called with one of those. Likewise for Perl internal +properties, with the exception of "Perl_Decimal_Digit" which it does know +about (and which is documented below in L</prop_invmap()>). + +=cut + +# It may be that there are use cases where the discouraged forms should be +# returned. If that comes up, an optional boolean second parameter to the +# function could be created, for example. + +# These are created by mktables for this routine and stored in unicore/UCD.pl +# where their structures are described. +our %string_property_loose_to_name; +our %ambiguous_names; +our %loose_perlprop_to_name; +our %prop_aliases; + +sub prop_aliases ($) { + my $prop = $_[0]; + return unless defined $prop; + + require "unicore/UCD.pl"; + require "unicore/Heavy.pl"; + require "utf8_heavy.pl"; + + # The property name may be loosely or strictly matched; we don't know yet. + # But both types use lower-case. + $prop = lc $prop; + + # It is loosely matched if its lower case isn't known to be strict. + my $list_ref; + if (! exists $utf8::stricter_to_file_of{$prop}) { + my $loose = utf8::_loose_name($prop); + + # There is a hash that converts from any loose name to its standard + # form, mapping all synonyms for a name to one name that can be used + # as a key into another hash. The whole concept is for memory + # savings, as the second hash doesn't have to have all the + # combinations. Actually, there are two hashes that do the + # converstion. One is used in utf8_heavy.pl (stored in Heavy.pl) for + # looking up properties matchable in regexes. This function needs to + # access string properties, which aren't available in regexes, so a + # second conversion hash is made for them (stored in UCD.pl). Look in + # the string one now, as the rest can have an optional 'is' prefix, + # which these don't. + if (exists $string_property_loose_to_name{$loose}) { + + # Convert to its standard loose name. + $prop = $string_property_loose_to_name{$loose}; + } + else { + my $retrying = 0; # bool. ? Has an initial 'is' been stripped + RETRY: + if (exists $utf8::loose_property_name_of{$loose} + && (! $retrying + || ! exists $ambiguous_names{$loose})) + { + # Found an entry giving the standard form. We don't get here + # (in the test above) when we've stripped off an + # 'is' and the result is an ambiguous name. That is because + # these are official Unicode properties (though Perl can have + # an optional 'is' prefix meaning the official property), and + # all ambiguous cases involve a Perl single-form extension + # for the gc, script, or block properties, and the stripped + # 'is' means that they mean one of those, and not one of + # these + $prop = $utf8::loose_property_name_of{$loose}; + } + elsif (exists $loose_perlprop_to_name{$loose}) { + + # This hash is specifically for this function to list Perl + # extensions that aren't in the earlier hashes. If there is + # only one element, the short and long names are identical. + # Otherwise the form is already in the same form as + # %prop_aliases, which is handled at the end of the function. + $list_ref = $loose_perlprop_to_name{$loose}; + if (@$list_ref == 1) { + my @list = ($list_ref->[0], $list_ref->[0]); + $list_ref = \@list; + } + } + elsif (! exists $utf8::loose_to_file_of{$loose}) { + + # loose_to_file_of is a complete list of loose names. If not + # there, the input is unknown. + return; + } + else { + + # Here we found the name but not its aliases, so it has to + # exist. This means it must be one of the Perl single-form + # extensions. First see if it is for a property-value + # combination in one of the following properties. + my @list; + foreach my $property ("gc", "script") { + @list = prop_value_aliases($property, $loose); + last if @list; + } + if (@list) { + + # Here, it is one of those property-value combination + # single-form synonyms. There are ambiguities with some + # of these. Check against the list for these, and adjust + # if necessary. + for my $i (0 .. @list -1) { + if (exists $ambiguous_names + {utf8::_loose_name(lc $list[$i])}) + { + # The ambiguity is resolved by toggling whether or + # not it has an 'is' prefix + $list[$i] =~ s/^Is_// or $list[$i] =~ s/^/Is_/; + } + } + return @list; + } + + # Here, it wasn't one of the gc or script single-form + # extensions. It could be a block property single-form + # extension. An 'in' prefix definitely means that, and should + # be looked up without the prefix. + my $began_with_in = $loose =~ s/^in//; + @list = prop_value_aliases("block", $loose); + if (@list) { + map { $_ =~ s/^/In_/ } @list; + return @list; + } + + # Here still haven't found it. The last opportunity for it + # being valid is only if it began with 'is'. We retry without + # the 'is', setting a flag to that effect so that we don't + # accept things that begin with 'isis...' + if (! $retrying && ! $began_with_in && $loose =~ s/^is//) { + $retrying = 1; + goto RETRY; + } + + # Here, didn't find it. Since it was in %loose_to_file_of, we + # should have been able to find it. + carp __PACKAGE__, "::prop_aliases: Unexpectedly could not find '$prop'. Send bug report to perlbug\@perl.org"; + return; + } + } + } + + if (! $list_ref) { + # Here, we have set $prop to a standard form name of the input. Look + # it up in the structure created by mktables for this purpose, which + # contains both strict and loosely matched properties. Avoid + # autovivifying. + $list_ref = $prop_aliases{$prop} if exists $prop_aliases{$prop}; + return unless $list_ref; + } + + # The full name is in element 1. + return $list_ref->[1] unless wantarray; + + return @{dclone $list_ref}; +} + +=pod + +=head2 B<prop_value_aliases()> + + use Unicode::UCD 'prop_value_aliases'; + + my ($short_name, $full_name, @other_names) + = prop_value_aliases("Gc", "Punct"); + my $same_full_name = prop_value_aliases("Gc", "P"); # Scalar cntxt + my ($same_short_name) = prop_value_aliases("Gc", "P"); # gets 0th + # element + print "The full name is $full_name\n"; + print "The short name is $short_name\n"; + print "The other aliases are: ", join(", ", @other_names), "\n"; + + prints: + The full name is Punctuation + The short name is P + The other aliases are: Punct + +Some Unicode properties have a restricted set of legal values. For example, +all binary properties are restricted to just C<true> or C<false>; and there +are only a few dozen possible General Categories. + +For such properties, there are usually several synonyms for each possible +value. For example, in binary properties, I<truth> can be represented by any of +the strings "Y", "Yes", "T", or "True"; and the General Category +"Punctuation" by that string, or "Punct", or simply "P". + +Like property names, there is typically at least a short name for each such +property-value, and a long name. If you know any name of the property-value, +you can use C<prop_value_aliases>() to get the long name (when called in +scalar context), or a list of all the names, with the short name in the 0th +element, the long name in the next element, and any other synonyms in the +remaining elements, in no particular order, except that any all-numeric +synonyms will be last. + +The long name is returned in a form nicely capitalized, suitable for printing. + +Case, white space, hyphens, and underscores are ignored in the input parameters +(except for the trailing underscore in the old-form grandfathered-in general +category property value C<"L_">, which is better written as C<"LC">). + +If either name is unknown, C<undef> is returned. Note that Perl typically +recognizes property names in regular expressions with an optional C<"Is_>" +(with or without the underscore) prefixed to them, such as C<\p{isgc=punct}>. +This function does not recognize those in the property parameter, returning +C<undef>. + +If called with a property that doesn't have synonyms for its values, it +returns the input value, possibly normalized with capitalization and +underscores. + +For the block property, new-style block names are returned (see +L</Old-style versus new-style block names>). + +To find the synonyms for single-forms, such as C<\p{Any}>, use +L</prop_aliases()> instead. + +C<prop_value_aliases> does not know about any user-defined properties, and +will return C<undef> if called with one of those. + +=cut + +# These are created by mktables for this routine and stored in unicore/UCD.pl +# where their structures are described. +our %loose_to_standard_value; +our %prop_value_aliases; + +sub prop_value_aliases ($$) { + my ($prop, $value) = @_; + return unless defined $prop && defined $value; + + require "unicore/UCD.pl"; + require "utf8_heavy.pl"; + + # Find the property name synonym that's used as the key in other hashes, + # which is element 0 in the returned list. + ($prop) = prop_aliases($prop); + return if ! $prop; + $prop = utf8::_loose_name(lc $prop); + + # Here is a legal property, but the hash below (created by mktables for + # this purpose) only knows about the properties that have a very finite + # number of potential values, that is not ones whose value could be + # anything, like most (if not all) string properties. These don't have + # synonyms anyway. Simply return the input. For example, there is no + # synonym for ('Uppercase_Mapping', A'). + return $value if ! exists $prop_value_aliases{$prop}; + + # The value name may be loosely or strictly matched; we don't know yet. + # But both types use lower-case. + $value = lc $value; + + # If the name isn't found under loose matching, it certainly won't be + # found under strict + my $loose_value = utf8::_loose_name($value); + return unless exists $loose_to_standard_value{"$prop=$loose_value"}; + + # Similarly if the combination under loose matching doesn't exist, it + # won't exist under strict. + my $standard_value = $loose_to_standard_value{"$prop=$loose_value"}; + return unless exists $prop_value_aliases{$prop}{$standard_value}; + + # Here we did find a combination under loose matching rules. But it could + # be that is a strict property match that shouldn't have matched. + # %prop_value_aliases is set up so that the strict matches will appear as + # if they were in loose form. Thus, if the non-loose version is legal, + # we're ok, can skip the further check. + if (! exists $utf8::stricter_to_file_of{"$prop=$value"} + + # We're also ok and skip the further check if value loosely matches. + # mktables has verified that no strict name under loose rules maps to + # an existing loose name. This code relies on the very limited + # circumstances that strict names can be here. Strict name matching + # happens under two conditions: + # 1) when the name begins with an underscore. But this function + # doesn't accept those, and %prop_value_aliases doesn't have + # them. + # 2) When the values are numeric, in which case we need to look + # further, but their squeezed-out loose values will be in + # %stricter_to_file_of + && exists $utf8::stricter_to_file_of{"$prop=$loose_value"}) + { + # The only thing that's legal loosely under strict is that can have an + # underscore between digit pairs XXX + while ($value =~ s/(\d)_(\d)/$1$2/g) {} + return unless exists $utf8::stricter_to_file_of{"$prop=$value"}; + } + + # Here, we know that the combination exists. Return it. + my $list_ref = $prop_value_aliases{$prop}{$standard_value}; + if (@$list_ref > 1) { + # The full name is in element 1. + return $list_ref->[1] unless wantarray; + + return @{dclone $list_ref}; + } + + return $list_ref->[0] unless wantarray; + + # Only 1 element means that it repeats + return ( $list_ref->[0], $list_ref->[0] ); +} + +# All 1 bits is the largest possible UV. +$Unicode::UCD::MAX_CP = ~0; + +=pod + +=head2 B<prop_invlist()> + +C<prop_invlist> returns an inversion list (described below) that defines all the +code points for the binary Unicode property (or "property=value" pair) given +by the input parameter string: + + use feature 'say'; + use Unicode::UCD 'prop_invlist'; + say join ", ", prop_invlist("Any"); + + prints: + 0, 1114112 + +An empty list is returned if the input is unknown; the number of elements in +the list is returned if called in scalar context. + +L<perluniprops|perluniprops/Properties accessible through \p{} and \P{}> gives +the list of properties that this function accepts, as well as all the possible +forms for them (including with the optional "Is_" prefixes). (Except this +function doesn't accept any Perl-internal properties, some of which are listed +there.) This function uses the same loose or tighter matching rules for +resolving the input property's name as is done for regular expressions. These +are also specified in L<perluniprops|perluniprops/Properties accessible +through \p{} and \P{}>. Examples of using the "property=value" form are: + + say join ", ", prop_invlist("Script=Shavian"); + + prints: + 66640, 66688 + + say join ", ", prop_invlist("ASCII_Hex_Digit=No"); + + prints: + 0, 48, 58, 65, 71, 97, 103 + + say join ", ", prop_invlist("ASCII_Hex_Digit=Yes"); + + prints: + 48, 58, 65, 71, 97, 103 + +Inversion lists are a compact way of specifying Unicode property-value +definitions. The 0th item in the list is the lowest code point that has the +property-value. The next item (item [1]) is the lowest code point beyond that +one that does NOT have the property-value. And the next item beyond that +([2]) is the lowest code point beyond that one that does have the +property-value, and so on. Put another way, each element in the list gives +the beginning of a range that has the property-value (for even numbered +elements), or doesn't have the property-value (for odd numbered elements). +The name for this data structure stems from the fact that each element in the +list toggles (or inverts) whether the corresponding range is or isn't on the +list. + +In the final example above, the first ASCII Hex digit is code point 48, the +character "0", and all code points from it through 57 (a "9") are ASCII hex +digits. Code points 58 through 64 aren't, but 65 (an "A") through 70 (an "F") +are, as are 97 ("a") through 102 ("f"). 103 starts a range of code points +that aren't ASCII hex digits. That range extends to infinity, which on your +computer can be found in the variable C<$Unicode::UCD::MAX_CP>. (This +variable is as close to infinity as Perl can get on your platform, and may be +too high for some operations to work; you may wish to use a smaller number for +your purposes.) + +Note that the inversion lists returned by this function can possibly include +non-Unicode code points, that is anything above 0x10FFFF. This is in +contrast to Perl regular expression matches on those code points, in which a +non-Unicode code point always fails to match. For example, both of these have +the same result: + + chr(0x110000) =~ \p{ASCII_Hex_Digit=True} # Fails. + chr(0x110000) =~ \p{ASCII_Hex_Digit=False} # Fails! + +And both raise a warning that a Unicode property is being used on a +non-Unicode code point. It is arguable as to which is the correct thing to do +here. This function has chosen the way opposite to the Perl regular +expression behavior. This allows you to easily flip to to the Perl regular +expression way (for you to go in the other direction would be far harder). +Simply add 0x110000 at the end of the non-empty returned list if it isn't +already that value; and pop that value if it is; like: + + my @list = prop_invlist("foo"); + if (@list) { + if ($list[-1] == 0x110000) { + pop @list; # Defeat the turning on for above Unicode + } + else { + push @list, 0x110000; # Turn off for above Unicode + } + } + +It is a simple matter to expand out an inversion list to a full list of all +code points that have the property-value: + + my @invlist = prop_invlist($property_name); + die "empty" unless @invlist; + my @full_list; + for (my $i = 0; $i < @invlist; $i += 2) { + my $upper = ($i + 1) < @invlist + ? $invlist[$i+1] - 1 # In range + : $Unicode::UCD::MAX_CP; # To infinity. You may want + # to stop much much earlier; + # going this high may expose + # perl deficiencies with very + # large numbers. + for my $j ($invlist[$i] .. $upper) { + push @full_list, $j; + } + } + +C<prop_invlist> does not know about any user-defined nor Perl internal-only +properties, and will return C<undef> if called with one of those. + +=cut + +# User-defined properties could be handled with some changes to utf8_heavy.pl; +# and implementing here of dealing with EXTRAS. If done, consideration should +# be given to the fact that the user subroutine could return different results +# with each call; security issues need to be thought about. + +# These are created by mktables for this routine and stored in unicore/UCD.pl +# where their structures are described. +our %loose_defaults; +our $MAX_UNICODE_CODEPOINT; + +sub prop_invlist ($) { + my $prop = $_[0]; + return if ! defined $prop; + + require "utf8_heavy.pl"; + + # Warnings for these are only for regexes, so not applicable to us + no warnings 'deprecated'; + + # Get the swash definition of the property-value. + my $swash = utf8::SWASHNEW(__PACKAGE__, $prop, undef, 1, 0); + + # Fail if not found, or isn't a boolean property-value, or is a + # user-defined property, or is internal-only. + return if ! $swash + || ref $swash eq "" + || $swash->{'BITS'} != 1 + || $swash->{'USER_DEFINED'} + || $prop =~ /^\s*_/; + + if ($swash->{'EXTRAS'}) { + carp __PACKAGE__, "::prop_invlist: swash returned for $prop unexpectedly has EXTRAS magic"; + return; + } + if ($swash->{'SPECIALS'}) { + carp __PACKAGE__, "::prop_invlist: swash returned for $prop unexpectedly has SPECIALS magic"; + return; + } + + my @invlist; + + # The input lines look like: + # 0041\t005A # [26] + # 005F + + # Split into lines, stripped of trailing comments + foreach my $range (split "\n", + $swash->{'LIST'} =~ s/ \s* (?: \# .* )? $ //xmgr) + { + # And find the beginning and end of the range on the line + my ($hex_begin, $hex_end) = split "\t", $range; + my $begin = hex $hex_begin; + + # Add the beginning of the range + push @invlist, $begin; + + if (defined $hex_end) { # The next item starts with the code point 1 + # beyond the end of the range. + push @invlist, hex($hex_end) + 1; + } + else { # No end of range, is a single code point. + push @invlist, $begin + 1; + } + } + + require "unicore/UCD.pl"; + my $FIRST_NON_UNICODE = $MAX_UNICODE_CODEPOINT + 1; + + # Could need to be inverted: add or subtract a 0 at the beginning of the + # list. And to keep it from matching non-Unicode, add or subtract the + # first non-unicode code point. + if ($swash->{'INVERT_IT'}) { + if (@invlist && $invlist[0] == 0) { + shift @invlist; + } + else { + unshift @invlist, 0; + } + if (@invlist && $invlist[-1] == $FIRST_NON_UNICODE) { + pop @invlist; + } + else { + push @invlist, $FIRST_NON_UNICODE; + } + } + + # Here, the list is set up to include only Unicode code points. But, if + # the table is the default one for the property, it should contain all + # non-Unicode code points. First calculate the loose name for the + # property. This is done even for strict-name properties, as the data + # structure that mktables generates for us is set up so that we don't have + # to worry about that. The property-value needs to be split if compound, + # as the loose rules need to be independently calculated on each part. We + # know that it is syntactically valid, or SWASHNEW would have failed. + + $prop = lc $prop; + my ($prop_only, $table) = split /\s*[:=]\s*/, $prop; + if ($table) { + + # May have optional prefixed 'is' + $prop = utf8::_loose_name($prop_only) =~ s/^is//r; + $prop = $utf8::loose_property_name_of{$prop}; + $prop .= "=" . utf8::_loose_name($table); + } + else { + $prop = utf8::_loose_name($prop); + } + if (exists $loose_defaults{$prop}) { + + # Here, is the default table. If a range ended with 10ffff, instead + # continue that range to infinity, by popping the 110000; otherwise, + # add the range from 11000 to infinity + if (! @invlist || $invlist[-1] != $FIRST_NON_UNICODE) { + push @invlist, $FIRST_NON_UNICODE; + } + else { + pop @invlist; + } + } + + return @invlist; +} + +sub _search_invlist { + # Find the range in the inversion list which contains a code point; that + # is, find i such that l[i] <= code_point < l[i+1] + + # If this is ever made public, could use to speed up .t specials. Would + # need to use code point argument, as in other functions in this pm + + my $list_ref = shift; + my $code_point = shift; + # Verify non-neg numeric XXX + + my $max_element = @$list_ref - 1; + return if ! $max_element < 0; # Undef if list is empty. + # Short cut something at the far-end of the table. This also allows us to + # refer to element [$i+1] without fear of being out-of-bounds in the loop + # below. + return $max_element if $code_point >= $list_ref->[$max_element]; + + use integer; # want integer division + + my $i = $max_element / 2; + + my $lower = 0; + my $upper = $max_element; + while (1) { + + if ($code_point >= $list_ref->[$i]) { + + # Here we have met the lower constraint. We can quit if we + # also meet the upper one. + last if $code_point < $list_ref->[$i+1]; + + $lower = $i; # Still too low. + + } + else { + + # Here, $code_point < $list_ref[$i], so look lower down. + $upper = $i; + } + + # Split search domain in half to try again. + my $temp = ($upper + $lower) / 2; + + # No point in continuing unless $i changes for next time + # in the loop. + return $i if $temp == $i; + $i = $temp; + } # End of while loop + + # Here we have found the offset + return $i; +} + +=pod + +=head2 B<prop_invmap()> + + use Unicode::UCD 'prop_invmap'; + my ($list_ref, $map_ref, $format, $missing) + = prop_invmap("General Category"); + +C<prop_invmap> is used to get the complete mapping definition for a property, +in the form of an inversion map. An inversion map consists of two parallel +arrays. One is an ordered list of code points that mark range beginnings, and +the other gives the value (or mapping) that all code points in the +corresponding range have. + +C<prop_invmap> is called with the name of the desired property. The name is +loosely matched, meaning that differences in case, white-space, hyphens, and +underscores are not meaningful (except for the trailing underscore in the +old-form grandfathered-in property C<"L_">, which is better written as C<"LC">, +or even better, C<"Gc=LC">). + +Many Unicode properties have more than one name (or alias). C<prop_invmap> +understands all of these, including Perl extensions to them. Ambiguities are +resolved as described above for L</prop_aliases()>. The Perl internal +property "Perl_Decimal_Digit, described below, is also accepted. C<undef> is +returned if the property name is unknown. +See L<perluniprops/Properties accessible through Unicode::UCD> for the +properties acceptable as inputs to this function. + +It is a fatal error to call this function except in list context. + +In addition to the the two arrays that form the inversion map, C<prop_invmap> +returns two other values; one is a scalar that gives some details as to the +format of the entries of the map array; the other is used for specialized +purposes, described at the end of this section. + +This means that C<prop_invmap> returns a 4 element list. For example, + + my ($blocks_ranges_ref, $blocks_maps_ref, $format, $default) + = prop_invmap("Block"); + +In this call, the two arrays will be populated as shown below (for Unicode +6.0): + + Index @blocks_ranges @blocks_maps + 0 0x0000 Basic Latin + 1 0x0080 Latin-1 Supplement + 2 0x0100 Latin Extended-A + 3 0x0180 Latin Extended-B + 4 0x0250 IPA Extensions + 5 0x02B0 Spacing Modifier Letters + 6 0x0300 Combining Diacritical Marks + 7 0x0370 Greek and Coptic + 8 0x0400 Cyrillic + ... + 233 0x2B820 No_Block + 234 0x2F800 CJK Compatibility Ideographs Supplement + 235 0x2FA20 No_Block + 236 0xE0000 Tags + 237 0xE0080 No_Block + 238 0xE0100 Variation Selectors Supplement + 239 0xE01F0 No_Block + 240 0xF0000 Supplementary Private Use Area-A + 241 0x100000 Supplementary Private Use Area-B + 242 0x110000 No_Block + +The first line (with Index [0]) means that the value for code point 0 is "Basic +Latin". The entry "0x0080" in the @blocks_ranges column in the second line +means that the value from the first line, "Basic Latin", extends to all code +points in the range from 0 up to but not including 0x0080, that is, through +255. In other words, the code points from 0 to 255 are all in the "Basic +Latin" block. Similarly, all code points in the range from 0x0080 up to (but +not including) 0x0100 are in the block named "Latin-1 Supplement", etc. +(Notice that the return is the old-style block names; see L</Old-style versus +new-style block names>). + +The final line (with Index [242]) means that the value for all code points above +the legal Unicode maximum code point have the value "No_Block", which is the +term Unicode uses for a non-existing block. + +The arrays completely specify the mappings for all possible code points. +The final element in an inversion map returned by this function will always be +for the range that consists of all the code points that aren't legal Unicode, +but that are expressible on the platform. (That is, it starts with code point +0x110000, the first code point above the legal Unicode maximum, and extends to +infinity.) The value for that range will be the same that any typical +unassigned code point has for the specified property. (Certain unassigned +code points are not "typical"; for example the non-character code points, or +those in blocks that are to be written right-to-left. The above-Unicode +range's value is not based on these atypical code points.) It could be argued +that, instead of treating these as unassigned Unicode code points, the value +for this range should be C<undef>. If you wish, you can change the returned +arrays accordingly. + +The maps are almost always simple scalars that should be interpreted as-is. +These values are those given in the Unicode-supplied data files, which may be +inconsistent as to capitalization and as to which synonym for a property-value +is given. The results may be normalized by using the L</prop_value_aliases()> +function. + +There are exceptions to the simple scalar maps. Some properties have some +elements in their map list that are themselves lists of scalars; and some +special strings are returned that are not to be interpreted as-is. Element +[2] (placed into C<$format> in the example above) of the returned four element +list tells you if the map has any of these special elements, as follows: + +=over + +=item C<s> + +means all the elements of the map array are simple scalars, with no special +elements. Almost all properties are like this, like the C<block> example +above. + +=item C<sl> + +means that some of the map array elements have the form given by C<s>, and +the rest are lists of scalars. For example, here is a portion of the output +of calling C<prop_invmap>() with the "Script Extensions" property: + + @scripts_ranges @scripts_maps + ... + 0x0953 Deva + 0x0964 [ Beng Deva Guru Orya ] + 0x0966 Deva + 0x0970 Common + +Here, the code points 0x964 and 0x965 are used in the Bengali, +Devanagari, Gurmukhi, and Oriya scripts. + +=item C<r> + +means that all the elements of the map array are either rational numbers or +the string C<"NaN">, meaning "Not a Number". A rational number is either an +integer, or two integers separated by a solidus (C<"/">). The second integer +represents the denominator of the division implied by the solidus, and is +guaranteed not to be 0. If you want to convert them to scalar numbers, you +can use something like this: + + my ($invlist_ref, $invmap_ref, $format) = prop_invmap($property); + if ($format && $format eq "r") { + map { $_ = eval $_ } @$invmap_ref; + } + +Here's some entries from the output of the property "Nv", which has format +C<"r">. + + @numerics_ranges @numerics_maps Note + 0x00 "NaN" + 0x30 0 DIGIT 0 + 0x31 1 + 0x32 2 + ... + 0x37 7 + 0x38 8 + 0x39 9 DIGIT 9 + 0x3A "NaN" + 0xB2 2 SUPERSCRIPT 2 + 0xB3 3 SUPERSCRIPT 2 + 0xB4 "NaN" + 0xB9 1 SUPERSCRIPT 1 + 0xBA "NaN" + 0xBC 1/4 VULGAR FRACTION 1/4 + 0xBD 1/2 VULGAR FRACTION 1/2 + 0xBE 3/4 VULGAR FRACTION 3/4 + 0xBF "NaN" + 0x660 0 ARABIC-INDIC DIGIT ZERO + +=item C<c> + +is like C<s> in that all the map array elements are scalars, but some of them +are the special string S<C<"E<lt>code pointE<gt>">>, meaning that the map of +each code point in the corresponding range in the inversion list is the code +point itself. For example, in: + + my ($uppers_ranges_ref, $uppers_maps_ref, $format) + = prop_invmap("Simple_Uppercase_Mapping"); + +the returned arrays look like this: + + @$uppers_ranges_ref @$uppers_maps_ref Note + 0 "<code point>" + 97 65 'a' maps to 'A' + 98 66 'b' => 'B' + 99 67 'c' => 'C' + ... + 120 88 'x' => 'X' + 121 89 'y' => 'Y' + 122 90 'z' => 'Z' + 123 "<code point>" + 181 924 MICRO SIGN => Greek Cap MU + 182 "<code point>" + ... + +The first line means that the uppercase of code point 0 is 0; +the uppercase of code point 1 is 1; ... of code point 96 is 96. Without the +C<"E<lt>code_pointE<gt>"> notation, every code point would have to have an +entry. This would mean that the arrays would each have more than a million +entries to list just the legal Unicode code points! + +=item C<cl> + +means that some of the map array elements have the form given by C<c>, and +the rest are ordered lists of code points. +For example, in: + + my ($uppers_ranges_ref, $uppers_maps_ref, $format) + = prop_invmap("Uppercase_Mapping"); + +the returned arrays look like this: + + @$uppers_ranges_ref @$uppers_maps_ref + 0 "<code point>" + 97 65 + ... + 122 90 + 123 "<code point>" + 181 924 + 182 "<code point>" + ... + 0x0149 [ 0x02BC 0x004E ] + 0x014A "<code point>" + 0x014B 0x014A + ... + +This is the full Uppercase_Mapping property (as opposed to the +Simple_Uppercase_Mapping given in the example for format C<"c">). The only +difference between the two in the ranges shown is that the code point at +0x0149 (LATIN SMALL LETTER N PRECEDED BY APOSTROPHE) maps to a string of two +characters, 0x02BC (MODIFIER LETTER APOSTROPHE) followed by 0x004E (LATIN +CAPITAL LETTER N). + +=item C<cle> + +means that some of the map array elements have the forms given by C<cl>, and +the rest are the empty string. The property C<NFKC_Casefold> has this form. +An example slice is: + + @$ranges_ref @$maps_ref Note + ... + 0x00AA 0x0061 FEMININE ORDINAL INDICATOR => 'a' + 0x00AB <code point> + 0x00AD SOFT HYPHEN => "" + 0x00AE <code point> + 0x00AF [ 0x0020, 0x0304 ] MACRON => SPACE . COMBINING MACRON + 0x00B0 <code point> + ... + +=item C<n> + +means the Name property. All the elements of the map array are simple +scalars, but some of them contain special strings that require more work to +get the actual name. + +Entries such as: + + CJK UNIFIED IDEOGRAPH-<code point> + +mean that the name for the code point is "CJK UNIFIED IDEOGRAPH-" +with the code point (expressed in hexadecimal) appended to it, like "CJK +UNIFIED IDEOGRAPH-3403" (similarly for C<CJK COMPATIBILITY IDEOGRAPH-E<lt>code +pointE<gt>>). + +Also, entries like + + <hangul syllable> + +means that the name is algorithmically calculated. This is easily done by +the function L<charnames/charnames::viacode(code)>. + +Note that for control characters (C<Gc=cc>), Unicode's data files have the +string "C<E<lt>controlE<gt>>", but the real name of each of these characters is the empty +string. This function returns that real name, the empty string. + +=item C<d> + +means the Decomposition_Mapping property. This property is like C<cl> +properties, except it has an additional entry type: + + <hangul syllable> + +for those code points whose decomposition is algorithmically calculated. (The +C<n> format has this same entry.) These can be generated via the function +L<Unicode::Normalize::NFD()|Unicode::Normalize>. + + +Note that the mapping is the one that is specified in the Unicode data files, +and to get the final decomposition, it may need to be applied recursively. + +=back + +A binary search can be used to quickly find a code point in the inversion +list, and hence its corresponding mapping. + +The final element (index [3], assigned to C<$default> in the "block" example) in +the four element list returned by this function may be useful for applications +that wish to convert the returned inversion map data structure into some +other, such as a hash. It gives the mapping that most code points map to +under the property. If you establish the convention that any code point not +explicitly listed in your data structure maps to this value, you can +potentially make your data structure much smaller. As you construct your data +structure from the one returned by this function, simply ignore those ranges +that map to this value, generally called the "default" value. For example, to +convert to the data structure searchable by L</charinrange()>, you can follow +this recipe: + + my ($list_ref, $map_ref, $format, $missing) = prop_invmap($property); + my @range_list; + for my $i (0 .. @$list_ref - 2) { + next if $map_ref->[$i] eq $missing; + push @range_list, [ $list_ref->[$i], + $list_ref->[$i+1], + $map_ref->[$i] + ]; + } + + print charinrange(\@range_list, $code_point), "\n"; + + +With this, C<charinrange()> will return C<undef> if its input code point maps +to C<$missing>. You can avoid this by omitting the C<next> statement, and adding +a line after the loop to handle the final element of the inversion map. + +One internal Perl property is accessible by this function. +"Perl_Decimal_Digit" returns an inversion map in which all the Unicode decimal +digits map to their numeric values, and everything else to the empty string, +like so: + + @digits @values + 0x0000 "" + 0x0030 0 + 0x0031 1 + 0x0032 2 + 0x0033 3 + 0x0034 4 + 0x0035 5 + 0x0036 6 + 0x0037 7 + 0x0038 8 + 0x0039 9 + 0x003A "" + 0x0660 0 + 0x0661 1 + ... + +Note that the inversion maps returned for the C<Case_Folding> and +C<Simple_Case_Folding> properties do not include the Turkic-locale mappings. +Use L</casefold()> for these. + +The C<Name_Alias> property is potentially undergoing signficant revision by +Unicode at the time of this writing. The format of the values returned for it +may change substantially in future Unicode versions. + +C<prop_invmap> does not know about any user-defined properties, and will +return C<undef> if called with one of those. + +=cut + +# User-defined properties could be handled with some changes to utf8_heavy.pl; +# if done, consideration should be given to the fact that the user subroutine +# could return different results with each call, which could lead to some +# security issues. + +# One could store things in memory so they don't have to be recalculated, but +# it is unlikely this will be called often, and some properties would take up +# significant memory. + +# These are created by mktables for this routine and stored in unicore/UCD.pl +# where their structures are described. +our @algorithmic_named_code_points; +our $HANGUL_BEGIN; +our $HANGUL_COUNT; + +sub prop_invmap ($) { + + croak __PACKAGE__, "::prop_invmap: must be called in list context" unless wantarray; + + my $prop = $_[0]; + return unless defined $prop; + + # Fail internal properties + return if $prop =~ /^_/; + + # The values returned by this function. + my (@invlist, @invmap, $format, $missing); + + # The swash has two components we look at, the base list, and a hash, + # named 'SPECIALS', containing any additional members whose mappings don't + # fit into the the base list scheme of things. These generally 'override' + # any value in the base list for the same code point. + my $overrides; + + require "utf8_heavy.pl"; + require "unicore/UCD.pl"; + +RETRY: + + # Try to get the map swash for the property. They have 'To' prepended to + # the property name, and 32 means we will accept 32 bit return values. + my $swash = utf8::SWASHNEW(__PACKAGE__, "To$prop", undef, 32, 0); + + # If didn't find it, could be because needs a proxy. And if was the + # 'Block' or 'Name' property, use a proxy even if did find it. Finding it + # would be the result of the installation changing mktables to output the + # Block or Name tables. The Block table gives block names in the + # new-style, and this routine is supposed to return old-style block names. + # The Name table is valid, but we need to execute the special code below + # to add in the algorithmic-defined name entries. + if (ref $swash eq "" + || $swash->{'TYPE'} eq 'ToBlk' + || $swash->{'TYPE'} eq 'ToNa') + { + + # Get the short name of the input property, in standard form + my ($second_try) = prop_aliases($prop); + return unless $second_try; + $second_try = utf8::_loose_name(lc $second_try); + + if ($second_try eq "in") { + + # This property is identical to age for inversion map purposes + $prop = "age"; + goto RETRY; + } + elsif ($second_try eq 'scf') { + + # This property uses just the LIST part of cf which includes the + # simple folds that are otherwise overridden by the SPECIALS. So + # all we need do is to not look at the SPECIALS; set $overrides to + # indicate that + $overrides = -1; + $prop = "cf"; + goto RETRY; + } + elsif ($second_try =~ / ^ s[ltu]c $ /x) { + + # Because some applications may be reading the full mapping + # equivalent files directly, they haven't been changed to include + # the simple mappings as well, as was done with the cf file (which + # doesn't have those backward compatibility issues) in 5.14. + # Instead, separate internal-only files were created that + # contain just the simple mappings that get overridden by the + # SPECIALS. Thus, these simple case mappings use the LIST part of + # their full mapping equivalents; plus the ones that are in those + # additional files. These special files are used by other + # functions in this module, so use the same hashes that those + # functions use. + my $file; + if ($second_try eq "suc") { + $file = '_suc.pl'; + $overrides = \%SIMPLE_UPPER; + } + elsif ($second_try eq "slc") { + $file = '_slc.pl'; + $overrides = \%SIMPLE_LOWER; + } + else { + $file = '_stc.pl'; + $overrides = \%SIMPLE_TITLE; + } + + # The files are already handled by the _read_table() function. + # Don't read them in if already done. + %$overrides =_read_table("unicore/To/$file", 'use_hash') + unless %$overrides; + + # Convert to the full mapping name, and go handle that; e.g., + # suc => uc. + $prop = $second_try =~ s/^s//r; + goto RETRY; + } + elsif ($second_try eq "blk") { + + # We use the old block names. Just create a fake swash from its + # data. + _charblocks(); + my %blocks; + $blocks{'LIST'} = ""; + $blocks{'TYPE'} = "ToBlk"; + $utf8::SwashInfo{ToBlk}{'missing'} = "No_Block"; + $utf8::SwashInfo{ToBlk}{'format'} = "s"; + + foreach my $block (@BLOCKS) { + $blocks{'LIST'} .= sprintf "%x\t%x\t%s\n", + $block->[0], + $block->[1], + $block->[2]; + } + $swash = \%blocks; + } + elsif ($second_try eq "na") { + + # Use the combo file that has all the Name-type properties in it, + # extracting just the ones that are for the actual 'Name' + # property. And create a fake swash from it. + my %names; + $names{'LIST'} = ""; + my $original = do "unicore/Name.pl"; + my $previous_hex_code_point = ""; + my $algorithm_names = \@algorithmic_named_code_points; + + # We hold off on adding the next entry to the list until we know, + # that the next line isn't for the same code point. We only + # output the final line. That one is the original Name property + # value. The others are the Name_Alias corrections, which are + # listed first in the file. + my $staging = ""; + + my $i = 0; + foreach my $line (split "\n", $original) { + my ($hex_code_point, $name) = split "\t", $line; + + # Weeds out all comments, blank lines, and named sequences + next if $hex_code_point =~ /\P{ASCII_HEX_DIGIT}/; + + my $code_point = hex $hex_code_point; + + # The name of all controls is the default: the empty string. + # The set of controls is immutable, so these hard-coded + # constants work. + next if $code_point <= 0x9F + && ($code_point <= 0x1F || $code_point >= 0x7F); + + # Output the last iteration's result, but only output the + # final name if a code point has more than one. + $names{'LIST'} .= $staging + if $hex_code_point ne $previous_hex_code_point; + + # If we are beyond where one of the special lines needs to + # be inserted ... + if ($i < @$algorithm_names + && $code_point > $algorithm_names->[$i]->{'low'}) + { + + # ... then insert it, ahead of what we were about to + # output + $staging = sprintf "%x\t%x\t%s\n", + $algorithm_names->[$i]->{'low'}, + $algorithm_names->[$i]->{'high'}, + $algorithm_names->[$i]->{'name'}; + + # And pretend that what we last saw was the final code + # point of the inserted range. + $previous_hex_code_point = sprintf "%04X", + $algorithm_names->[$i]->{'high'}; + + # Done with this range. + $i++; + + # Except we actually need to output the inserted line. + redo; + } + + # Normal name. + $staging = sprintf "%x\t\t%s\n", $code_point, $name; + $previous_hex_code_point = $hex_code_point; + } + + # Add the name from the final iteration + $names{'LIST'} .= $staging; + + $names{'TYPE'} = "ToNa"; + $utf8::SwashInfo{ToNa}{'missing'} = ""; + $utf8::SwashInfo{ToNa}{'format'} = "n"; + $swash = \%names; + } + elsif ($second_try =~ / ^ ( d [mt] ) $ /x) { + + # The file is a combination of dt and dm properties. Create a + # fake swash from the portion that we want. + my $original = do "unicore/Decomposition.pl"; + my %decomps; + + if ($second_try eq 'dt') { + $decomps{'TYPE'} = "ToDt"; + $utf8::SwashInfo{'ToDt'}{'missing'} = "None"; + $utf8::SwashInfo{'ToDt'}{'format'} = "s"; + } + else { + $decomps{'TYPE'} = "ToDm"; + $utf8::SwashInfo{'ToDm'}{'missing'} = "<code point>"; + + # Use a special internal-to-this_routine format, 'dm', to + # distinguish from 'd', meaning decimal. + $utf8::SwashInfo{'ToDm'}{'format'} = "dm"; + } + + $decomps{'LIST'} = ""; + + # This property has one special range not in the file: for the + # hangul syllables + my $done_hangul = 0; # Have we done the hangul range. + foreach my $line (split "\n", $original) { + my ($hex_lower, $hex_upper, $type_and_map) = split "\t", $line; + my $code_point = hex $hex_lower; + my $value; + + # The type, enclosed in <...>, precedes the mapping separated + # by blanks + if ($type_and_map =~ / ^ < ( .* ) > \s+ (.*) $ /x) { + $value = ($second_try eq 'dt') ? $1 : $2 + } + else { # If there is no type specified, it's canonical + $value = ($second_try eq 'dt') + ? "Canonical" : + $type_and_map; + } + + # Insert the hangul range at the appropriate spot. + if (! $done_hangul && $code_point > $HANGUL_BEGIN) { + $done_hangul = 1; + $decomps{'LIST'} .= + sprintf "%x\t%x\t%s\n", + $HANGUL_BEGIN, + $HANGUL_BEGIN + $HANGUL_COUNT - 1, + ($second_try eq 'dt') + ? "Canonical" + : "<hangul syllable>"; + } + + # And append this to our constructed LIST. + $decomps{'LIST'} .= "$hex_lower\t$hex_upper\t$value\n"; + } + $swash = \%decomps; + } + else { # Don't know this property. Fail. + return; + } + } + + if ($swash->{'EXTRAS'}) { + carp __PACKAGE__, "::prop_invmap: swash returned for $prop unexpectedly has EXTRAS magic"; + return; + } + + # Here, have a valid swash return. Examine it. + my $returned_prop = $swash->{TYPE}; + + # All properties but binary ones should have 'missing' and 'format' + # entries + $missing = $utf8::SwashInfo{$returned_prop}{'missing'}; + $missing = 'N' unless defined $missing; + + $format = $utf8::SwashInfo{$returned_prop}{'format'}; + $format = 'b' unless defined $format; + + # The LIST input lines look like: + # ... + # 0374\t\tCommon + # 0375\t0377\tGreek # [3] + # 037A\t037D\tGreek # [4] + # 037E\t\tCommon + # 0384\t\tGreek + # ... + # + # Convert them to like + # 0374 => Common + # 0375 => Greek + # 0378 => $missing + # 037A => Greek + # 037E => Common + # 037F => $missing + # 0384 => Greek + # + # For binary properties, the final non-comment column is absent, and + # assumed to be 'Y'. + + foreach my $range (split "\n", $swash->{'LIST'}) { + $range =~ s/ \s* (?: \# .* )? $ //xg; # rmv trailing space, comments + + # Find the beginning and end of the range on the line + my ($hex_begin, $hex_end, $map) = split "\t", $range; + my $begin = hex $hex_begin; + my $end = (defined $hex_end && $hex_end ne "") + ? hex $hex_end + : $begin; + + # If the property doesn't have a range that begins at 0, add one that + # maps to the default value (for missing ranges). + if (! @invlist) { + if ($begin != 0) { + push @invlist, 0; + push @invmap, $missing; + } + } + elsif ($invlist[-1] == $begin) { + + # If the input isn't in the most compact form, so that there are + # two adjacent ranges that map to the same thing, they should be + # combined. This happens in our constructed dt mapping, as + # Element [-2] is the map for the latest range so far processed. + # Just set the beginning point of the map to $missing (in + # invlist[-1]) to 1 beyond where this range ends. For example, in + # 12\t13\tXYZ + # 14\t17\tXYZ + # we have set it up so that it looks like + # 12 => XYZ + # 14 => $missing + # + # We now see that it should be + # 12 => XYZ + # 18 => $missing + if (@invlist > 1 && $invmap[-2] eq $map) { + $invlist[-1] = $end + 1; + next; + } + + # Here, the range started in the previous iteration that maps to + # $missing starts at the same code point as this range. That + # means there is no gap to fill that that range was intended for, + # so we just pop it off the parallel arrays. + pop @invlist; + pop @invmap; + } + + # Add the range beginning, and the range's map. + push @invlist, $begin; + if ($format eq 'dm') { + + # The decomposition maps are either a line like <hangul syllable> + # which are to be taken as is; or a sequence of code points in hex + # and separated by blanks. Convert them to decimal, and if there + # is more than one, use an anonymous array as the map. + if ($map =~ /^ < /x) { + push @invmap, $map; + } + else { + my @map = map { hex } split " ", $map; + if (@map == 1) { + push @invmap, $map[0]; + } + else { + push @invmap, \@map; + } + } + } + else { + + # Otherwise, convert hex formatted list entries to decimal; add a + # 'Y' map for the missing value in binary properties, or + # otherwise, use the input map unchanged. + $map = ($format eq 'x') + ? hex $map + : $format eq 'b' + ? 'Y' + : $map; + push @invmap, $map; + } + + # We just started a range. It ends with $end. The gap between it and + # the next element in the list must be filled with a range that maps + # to the default value. If there is no gap, the next iteration will + # pop this, unless there is no next iteration, and we have filled all + # of the Unicode code space, so check for that and skip. + if ($end < $MAX_UNICODE_CODEPOINT) { + push @invlist, $end + 1; + push @invmap, $missing; + } + } + + # If the property is empty, make all code points use the value for missing + # ones. + if (! @invlist) { + push @invlist, 0; + push @invmap, $missing; + } + + # And add in standard element that all non-Unicode code points map to + # $missing + push @invlist, $MAX_UNICODE_CODEPOINT + 1; + push @invmap, $missing; + + # The second component of the map are those values that require + # non-standard specification, stored in SPECIALS. These override any + # duplicate code points in LIST. If we are using a proxy, we may have + # already set $overrides based on the proxy. + $overrides = $swash->{'SPECIALS'} unless defined $overrides; + if ($overrides) { + + # A negative $overrides implies that the SPECIALS should be ignored, + # and a simple 'c' list is the value. + if ($overrides < 0) { + $format = 'c'; + } + else { + + # Currently, all overrides are for properties that normally map to + # single code points, but now some will map to lists of code + # points (but there is an exception case handled below). + $format = 'cl'; + + # Look through the overrides. + foreach my $cp_maybe_utf8 (keys %$overrides) { + my $cp; + my @map; + + # If the overrides came from SPECIALS, the code point keys are + # packed UTF-8. + if ($overrides == $swash->{'SPECIALS'}) { + $cp = unpack("C0U", $cp_maybe_utf8); + @map = unpack "U0U*", $swash->{'SPECIALS'}{$cp_maybe_utf8}; + + # The empty string will show up unpacked as an empty + # array. + $format = 'cle' if @map == 0; + } + else { + + # But if we generated the overrides, we didn't bother to + # pack them, and we, so far, do this only for properties + # that are 'c' ones. + $cp = $cp_maybe_utf8; + @map = hex $overrides->{$cp}; + $format = 'c'; + } + + # Find the range that the override applies to. + my $i = _search_invlist(\@invlist, $cp); + if ($cp < $invlist[$i] || $cp >= $invlist[$i + 1]) { + croak __PACKAGE__, "wrong_range, cp=$cp; i=$i, current=$invlist[$i]; next=$invlist[$i + 1]" + } + + # And what that range currently maps to + my $cur_map = $invmap[$i]; + + # If there is a gap between the next range and the code point + # we are overriding, we have to add elements to both arrays to + # fill that gap, using the map that applies to it, which is + # $cur_map, since it is part of the current range. + if ($invlist[$i + 1] > $cp + 1) { + #use feature 'say'; + #say "Before splice:"; + #say 'i-2=[', $i-2, ']', sprintf("%04X maps to %s", $invlist[$i-2], $invmap[$i-2]) if $i >= 2; + #say 'i-1=[', $i-1, ']', sprintf("%04X maps to %s", $invlist[$i-1], $invmap[$i-1]) if $i >= 1; + #say 'i =[', $i, ']', sprintf("%04X maps to %s", $invlist[$i], $invmap[$i]); + #say 'i+1=[', $i+1, ']', sprintf("%04X maps to %s", $invlist[$i+1], $invmap[$i+1]) if $i < @invlist + 1; + #say 'i+2=[', $i+2, ']', sprintf("%04X maps to %s", $invlist[$i+2], $invmap[$i+2]) if $i < @invlist + 2; + + splice @invlist, $i + 1, 0, $cp + 1; + splice @invmap, $i + 1, 0, $cur_map; + + #say "After splice:"; + #say 'i-2=[', $i-2, ']', sprintf("%04X maps to %s", $invlist[$i-2], $invmap[$i-2]) if $i >= 2; + #say 'i-1=[', $i-1, ']', sprintf("%04X maps to %s", $invlist[$i-1], $invmap[$i-1]) if $i >= 1; + #say 'i =[', $i, ']', sprintf("%04X maps to %s", $invlist[$i], $invmap[$i]); + #say 'i+1=[', $i+1, ']', sprintf("%04X maps to %s", $invlist[$i+1], $invmap[$i+1]) if $i < @invlist + 1; + #say 'i+2=[', $i+2, ']', sprintf("%04X maps to %s", $invlist[$i+2], $invmap[$i+2]) if $i < @invlist + 2; + } + + # If the remaining portion of the range is multiple code + # points (ending with the one we are replacing, guaranteed by + # the earlier splice). We must split it into two + if ($invlist[$i] < $cp) { + $i++; # Compensate for the new element + + #use feature 'say'; + #say "Before splice:"; + #say 'i-2=[', $i-2, ']', sprintf("%04X maps to %s", $invlist[$i-2], $invmap[$i-2]) if $i >= 2; + #say 'i-1=[', $i-1, ']', sprintf("%04X maps to %s", $invlist[$i-1], $invmap[$i-1]) if $i >= 1; + #say 'i =[', $i, ']', sprintf("%04X maps to %s", $invlist[$i], $invmap[$i]); + #say 'i+1=[', $i+1, ']', sprintf("%04X maps to %s", $invlist[$i+1], $invmap[$i+1]) if $i < @invlist + 1; + #say 'i+2=[', $i+2, ']', sprintf("%04X maps to %s", $invlist[$i+2], $invmap[$i+2]) if $i < @invlist + 2; + + splice @invlist, $i, 0, $cp; + splice @invmap, $i, 0, 'dummy'; + + #say "After splice:"; + #say 'i-2=[', $i-2, ']', sprintf("%04X maps to %s", $invlist[$i-2], $invmap[$i-2]) if $i >= 2; + #say 'i-1=[', $i-1, ']', sprintf("%04X maps to %s", $invlist[$i-1], $invmap[$i-1]) if $i >= 1; + #say 'i =[', $i, ']', sprintf("%04X maps to %s", $invlist[$i], $invmap[$i]); + #say 'i+1=[', $i+1, ']', sprintf("%04X maps to %s", $invlist[$i+1], $invmap[$i+1]) if $i < @invlist + 1; + #say 'i+2=[', $i+2, ']', sprintf("%04X maps to %s", $invlist[$i+2], $invmap[$i+2]) if $i < @invlist + 2; + } + + # Here, the range we are overriding contains a single code + # point. The result could be the empty string, a single + # value, or a list. If the last case, we use an anonymous + # array. + $invmap[$i] = (scalar @map == 0) + ? "" + : (scalar @map > 1) + ? \@map + : $map[0]; + } + } + } + elsif ($format eq 'x') { + + # All hex-valued properties are really to code points + $format = 'c'; + } + elsif ($format eq 'dm') { + $format = 'd'; + } + elsif ($format eq 'sw') { # blank-separated elements to form a list. + map { $_ = [ split " ", $_ ] if $_ =~ / / } @invmap; + $format = 'sl'; + } + elsif ($returned_prop eq 'ToNameAlias') { + + # This property currently doesn't have any lists, but theoretically + # could + $format = 'sl'; + } + elsif ($format ne 'n' && $format ne 'r') { + + # All others are simple scalars + $format = 's'; + } + + return (\@invlist, \@invmap, $format, $missing); +} =head2 Unicode::UCD::UnicodeVersion @@ -1397,6 +3029,7 @@ my $UNICODEVERSION; sub UnicodeVersion { unless (defined $UNICODEVERSION) { openunicode(\$VERSIONFH, "version"); + local $/ = "\n"; chomp($UNICODEVERSION = <$VERSIONFH>); close($VERSIONFH); croak __PACKAGE__, "::VERSION: strange version '$UNICODEVERSION'" @@ -1430,13 +3063,37 @@ C<\p{...}> (e.g. C<\p{Tibetan}> matches characters of the Tibetan script), while C<\p{Blk=...}> is used for blocks (e.g. C<\p{Blk=Tibetan}> matches any of the 256 code points in the Tibetan block). +=head2 Old-style versus new-style block names + +Unicode publishes the names of blocks in two different styles, though the two +are equivalent under Unicode's loose matching rules. + +The original style uses blanks and hyphens in the block names (except for +C<No_Block>), like so: + + Miscellaneous Mathematical Symbols-B + +The newer style replaces these with underscores, like this: + + Miscellaneous_Mathematical_Symbols_B + +This newer style is consistent with the values of other Unicode properties. +To preserve backward compatibility, all the functions in Unicode::UCD that +return block names (except one) return the old-style ones. That one function, +L</prop_value_aliases()> can be used to convert from old-style to new-style: + + my $new_style = prop_values_aliases("block", $old_style); + +Perl also has single-form extensions that refer to blocks, C<In_Cyrillic>, +meaning C<Block=Cyrillic>. These have always been written in the new style. + +To convert from new-style to old-style, follow this recipe: -=head2 Implementation Note + $old_style = charblock((prop_invlist("block=$new_style"))[0]); -The first use of charinfo() opens a read-only filehandle to the Unicode -Character Database (the database is included in the Perl distribution). -The filehandle is then kept open for further queries. In other words, -if you are wondering where one of your filehandles went, that's where. +(which finds the range of code points in the block using C<prop_invlist>, +gets the lower end of the range (0th element) and then looks up the old name +for its block using C<charblock>). =head1 BUGS @@ -1444,7 +3101,7 @@ Does not yet support EBCDIC platforms. =head1 AUTHOR -Jarkko Hietaniemi +Jarkko Hietaniemi. Now maintained by perl5 porters. =cut diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 1dc7efe38a..6d8c628a87 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -17,10 +17,10 @@ use strict; use Unicode::UCD; use Test::More; -BEGIN { plan tests => 305 }; - use Unicode::UCD 'charinfo'; +$/ = 7; + my $charinfo; is(charinfo(0x110000), undef, "Verify charinfo() of non-unicode is undef"); @@ -501,11 +501,1412 @@ use charnames ":full"; is(num("0"), 0, 'Verify num("0") == 0'); is(num("98765"), 98765, 'Verify num("98765") == 98765'); ok(! defined num("98765\N{FULLWIDTH DIGIT FOUR}"), 'Verify num("98765\N{FULLWIDTH DIGIT FOUR}") isnt defined'); -is(num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}"), 21, 'Verify \N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}" == 21'); -ok(! defined num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}"), 'Verify \N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}" isnt defined'); +is(num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}"), 21, 'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}") == 21'); +ok(! defined num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}"), 'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}") isnt defined'); is(num("\N{CHAM DIGIT ZERO}\N{CHAM DIGIT THREE}"), 3, 'Verify num("\N{CHAM DIGIT ZERO}\N{CHAM DIGIT THREE}") == 3'); ok(! defined num("\N{CHAM DIGIT ZERO}\N{JAVANESE DIGIT NINE}"), 'Verify num("\N{CHAM DIGIT ZERO}\N{JAVANESE DIGIT NINE}") isnt defined'); is(num("\N{SUPERSCRIPT TWO}"), 2, 'Verify num("\N{SUPERSCRIPT TWO} == 2'); is(num("\N{ETHIOPIC NUMBER TEN THOUSAND}"), 10000, 'Verify num("\N{ETHIOPIC NUMBER TEN THOUSAND}") == 10000'); is(num("\N{NORTH INDIC FRACTION ONE HALF}"), .5, 'Verify num("\N{NORTH INDIC FRACTION ONE HALF}") == .5'); is(num("\N{U+12448}"), 9, 'Verify num("\N{U+12448}") == 9'); + +# Create a user-defined property +sub InKana {<<'END'} +3040 309F +30A0 30FF +END + +use Unicode::UCD qw(prop_aliases); + +is(prop_aliases(undef), undef, "prop_aliases(undef) returns <undef>"); +is(prop_aliases("unknown property"), undef, + "prop_aliases(<unknown property>) returns <undef>"); +is(prop_aliases("InKana"), undef, + "prop_aliases(<user-defined property>) returns <undef>"); +is(prop_aliases("Perl_Decomposition_Mapping"), undef, "prop_aliases('Perl_Decomposition_Mapping') returns <undef> since internal-Perl-only"); +is(prop_aliases("Perl_Charnames"), undef, + "prop_aliases('Perl_Charnames') returns <undef> since internal-Perl-only"); +is(prop_aliases("isgc"), undef, + "prop_aliases('isgc') returns <undef> since is not covered Perl extension"); +is(prop_aliases("Is_Is_Any"), undef, + "prop_aliases('Is_Is_Any') returns <undef> since two is's"); + +require 'utf8_heavy.pl'; +require "unicore/Heavy.pl"; + +# Keys are lists of properties. Values are defined if have been tested. +my %props; + +# To test for loose matching, add in the characters that are ignored there. +my $extra_chars = "-_ "; + +# The one internal property we accept +$props{'Perl_Decimal_Digit'} = 1; +my @list = prop_aliases("perldecimaldigit"); +is_deeply(\@list, + [ "Perl_Decimal_Digit", + "Perl_Decimal_Digit" + ], "prop_aliases('perldecimaldigit') returns Perl_Decimal_Digit as both short and full names"); + +# Get the official Unicode property name synonyms and test them. +open my $props, "<", "../lib/unicore/PropertyAliases.txt" + or die "Can't open Unicode PropertyAliases.txt"; +$/ = "\n"; +while (<$props>) { + s/\s*#.*//; # Remove comments + next if /^\s* $/x; # Ignore empty and comment lines + + chomp; + my $count = 0; # 0th field in line is short name; 1th is long name + my $short_name; + my $full_name; + my @names_via_short; + foreach my $alias (split /\s*;\s*/) { # Fields are separated by + # semi-colons + # Add in the characters that are supposed to be ignored, to test loose + # matching, which the tested function does on all inputs. + my $mod_name = "$extra_chars$alias"; + + my $loose = utf8::_loose_name(lc $alias); + + # Indicate we have tested this. + $props{$loose} = 1; + + my @all_names = prop_aliases($mod_name); + if (grep { $_ eq $loose } @Unicode::UCD::suppressed_properties) { + is(@all_names, 0, "prop_aliases('$mod_name') returns undef since $alias is not installed"); + next; + } + elsif (! @all_names) { + fail("prop_aliases('$mod_name')"); + diag("'$alias' is unknown to prop_aliases()"); + next; + } + + if ($count == 0) { # Is short name + + @names_via_short = prop_aliases($mod_name); + + # If the 0th test fails, no sense in continuing with the others + last unless is($names_via_short[0], $alias, + "prop_aliases: '$alias' is the short name for '$mod_name'"); + $short_name = $alias; + } + elsif ($count == 1) { # Is full name + + # Some properties have the same short and full name; no sense + # repeating the test if the same. + if ($alias ne $short_name) { + my @names_via_full = prop_aliases($mod_name); + is_deeply(\@names_via_full, \@names_via_short, "prop_aliases() returns the same list for both '$short_name' and '$mod_name'"); + } + + # Tests scalar context + is(prop_aliases($short_name), $alias, + "prop_aliases: '$alias' is the long name for '$short_name'"); + } + else { # Is another alias + is_deeply(\@all_names, \@names_via_short, "prop_aliases() returns the same list for both '$short_name' and '$mod_name'"); + ok((grep { $_ =~ /^$alias$/i } @all_names), + "prop_aliases: '$alias' is listed as an alias for '$mod_name'"); + } + + $count++; + } +} + +# Now test anything we can find that wasn't covered by the tests of the +# official properties. We have no way of knowing if mktables omitted a Perl +# extension or not, but we do the best we can from its generated lists + +foreach my $alias (keys %utf8::loose_to_file_of) { + next if $alias =~ /=/; + my $lc_name = lc $alias; + my $loose = utf8::_loose_name($lc_name); + next if exists $props{$loose}; # Skip if already tested + $props{$loose} = 1; + my $mod_name = "$extra_chars$alias"; # Tests loose matching + my @aliases = prop_aliases($mod_name); + my $found_it = grep { utf8::_loose_name(lc $_) eq $lc_name } @aliases; + if ($found_it) { + pass("prop_aliases: '$lc_name' is listed as an alias for '$mod_name'"); + } + elsif ($lc_name =~ /l[_&]$/) { + + # These two names are special in that they don't appear in the + # returned list because they are discouraged from use. Verify + # that they return the same list as a non-discouraged version. + my @LC = prop_aliases('Is_LC'); + is_deeply(\@aliases, \@LC, "prop_aliases: '$lc_name' returns the same list as 'Is_LC'"); + } + else { + my $stripped = $lc_name =~ s/^is//; + + # Could be that the input includes a prefix 'is', which is rarely + # returned as an alias, so having successfully stripped it off above, + # try again. + if ($stripped) { + $found_it = grep { utf8::_loose_name(lc $_) eq $lc_name } @aliases; + } + + # If that didn't work, it could be that it's a block, which is always + # returned with a leading 'In_' to avoid ambiguity. Try comparing + # with that stripped off. + if (! $found_it) { + $found_it = grep { utf8::_loose_name(s/^In_(.*)/\L$1/r) eq $lc_name } + @aliases; + # Could check that is a real block, but tests for invmap will + # likely pickup any errors, since this will be tested there. + $lc_name = "in$lc_name" if $found_it; # Change for message below + } + my $message = "prop_aliases: '$lc_name' is listed as an alias for '$mod_name'"; + ($found_it) ? pass($message) : fail($message); + } +} + +my $done_equals = 0; +foreach my $alias (keys %utf8::stricter_to_file_of) { + if ($alias =~ /=/) { # Only test one case where there is an equals + next if $done_equals; + $done_equals = 1; + } + my $lc_name = lc $alias; + my @list = prop_aliases($alias); + if ($alias =~ /^_/) { + is(@list, 0, "prop_aliases: '$lc_name' returns an empty list since it is internal_only"); + } + elsif ($alias =~ /=/) { + is(@list, 0, "prop_aliases: '$lc_name' returns an empty list since is illegal property name"); + } + else { + ok((grep { lc $_ eq $lc_name } @list), + "prop_aliases: '$lc_name' is listed as an alias for '$alias'"); + } +} + +use Unicode::UCD qw(prop_value_aliases); + +is(prop_value_aliases("unknown property", "unknown value"), undef, + "prop_value_aliases(<unknown property>, <unknown value>) returns <undef>"); +is(prop_value_aliases(undef, undef), undef, + "prop_value_aliases(undef, undef) returns <undef>"); +is((prop_value_aliases("na", "A")), "A", "test that prop_value_aliases returns its input for properties that don't have synonyms"); +is(prop_value_aliases("isgc", "C"), undef, "prop_value_aliases('isgc', 'C') returns <undef> since is not covered Perl extension"); +is(prop_value_aliases("gc", "isC"), undef, "prop_value_aliases('gc', 'isC') returns <undef> since is not covered Perl extension"); + +# We have no way of knowing if mktables omitted a Perl extension that it +# shouldn't have, but we can check if it omitted an official Unicode property +# name synonym. And for those, we can check if the short and full names are +# correct. + +my %pva_tested; # List of things already tested. +open my $propvalues, "<", "../lib/unicore/PropValueAliases.txt" + or die "Can't open Unicode PropValueAliases.txt"; +while (<$propvalues>) { + s/\s*#.*//; # Remove comments + next if /^\s* $/x; # Ignore empty and comment lines + chomp; + + my @fields = split /\s*;\s*/; # Fields are separated by semi-colons + my $prop = shift @fields; # 0th field is the property, + my $count = 0; # 0th field in line (after shifting off the property) is + # short name; 1th is long name + my $short_name; + my @names_via_short; # Saves the values between iterations + + # The property on the lhs of the = is always loosely matched. Add in + # characters that are ignored under loose matching to test that + my $mod_prop = "$extra_chars$prop"; + + if ($fields[0] eq 'n/a') { # See comments in input file, essentially + # means full name and short name are identical + $fields[0] = $fields[1]; + } + elsif ($fields[0] ne $fields[1] + && utf8::_loose_name(lc $fields[0]) + eq utf8::_loose_name(lc $fields[1]) + && $fields[1] !~ /[[:upper:]]/) + { + # Also, there is a bug in the file in which "n/a" is omitted, and + # the two fields are identical except for case, and the full name + # is all lower case. Copy the "short" name unto the full one to + # give it some upper case. + + $fields[1] = $fields[0]; + } + + # The ccc property in the file is special; has an extra numeric field + # (0th), which should go at the end, since we use the next two fields as + # the short and full names, respectively. See comments in input file. + splice (@fields, 0, 0, splice(@fields, 1, 2)) if $prop eq 'ccc'; + + my $loose_prop = utf8::_loose_name(lc $prop); + my $suppressed = grep { $_ eq $loose_prop } + @Unicode::UCD::suppressed_properties; + foreach my $value (@fields) { + if ($suppressed) { + is(prop_value_aliases($prop, $value), undef, "prop_value_aliases('$prop', '$value') returns undef for suppressed property $prop"); + next; + } + elsif (grep { $_ eq ("$loose_prop=" . utf8::_loose_name(lc $value)) } @Unicode::UCD::suppressed_properties) { + is(prop_value_aliases($prop, $value), undef, "prop_value_aliases('$prop', '$value') returns undef for suppressed property $prop=$value"); + next; + } + + # Add in test for loose matching. + my $mod_value = "$extra_chars$value"; + + # If the value is a number, optionally negative, including a floating + # point or rational numer, it should be only strictly matched, so the + # loose matching should fail. + if ($value =~ / ^ -? \d+ (?: [\/.] \d+ )? $ /x) { + is(prop_value_aliases($mod_prop, $mod_value), undef, "prop_value_aliases('$mod_prop', '$mod_value') returns undef because '$mod_value' should be strictly matched"); + + # And reset so below tests just the strict matching. + $mod_value = $value; + } + + if ($count == 0) { + + @names_via_short = prop_value_aliases($mod_prop, $mod_value); + + # If the 0th test fails, no sense in continuing with the others + last unless is($names_via_short[0], $value, "prop_value_aliases: In '$prop', '$value' is the short name for '$mod_value'"); + $short_name = $value; + } + elsif ($count == 1) { + + # Some properties have the same short and full name; no sense + # repeating the test if the same. + if ($value ne $short_name) { + my @names_via_full = + prop_value_aliases($mod_prop, $mod_value); + is_deeply(\@names_via_full, \@names_via_short, "In '$prop', prop_value_aliases() returns the same list for both '$short_name' and '$mod_value'"); + } + + # Tests scalar context + is(prop_value_aliases($prop, $short_name), $value, "'$value' is the long name for prop_value_aliases('$prop', '$short_name')"); + } + else { + my @all_names = prop_value_aliases($mod_prop, $mod_value); + is_deeply(\@all_names, \@names_via_short, "In '$prop', prop_value_aliases() returns the same list for both '$short_name' and '$mod_value'"); + ok((grep { utf8::_loose_name(lc $_) eq utf8::_loose_name(lc $value) } prop_value_aliases($prop, $short_name)), "'$value' is listed as an alias for prop_value_aliases('$prop', '$short_name')"); + } + + $pva_tested{utf8::_loose_name(lc $prop) . "=" . utf8::_loose_name(lc $value)} = 1; + $count++; + } +} + +# And test as best we can, the non-official pva's that mktables generates. +foreach my $hash (\%utf8::loose_to_file_of, \%utf8::stricter_to_file_of) { + foreach my $test (keys %$hash) { + next if exists $pva_tested{$test}; # Skip if already tested + + my ($prop, $value) = split "=", $test; + next unless defined $value; # prop_value_aliases() requires an input + # 'value' + my $mod_value; + if ($hash == \%utf8::loose_to_file_of) { + + # Add extra characters to test loose-match rhs value + $mod_value = "$extra_chars$value"; + } + else { # Here value is strictly matched. + + # Extra elements are added by mktables to this hash so that + # something like "age=6.0" has a synonym of "age=6". It's not + # clear to me (khw) if we should be encouraging those synonyms, so + # don't test for them. + next if $value !~ /\D/ && exists $hash->{"$prop=$value.0"}; + + # Verify that loose matching fails when only strict is called for. + next unless is(prop_value_aliases($prop, "$extra_chars$value"), undef, + "prop_value_aliases('$prop', '$extra_chars$value') returns undef since '$value' should be strictly matched"), + + # Strict matching does allow for underscores between digits. Test + # for that. + $mod_value = $value; + while ($mod_value =~ s/(\d)(\d)/$1_$2/g) {} + } + + # The lhs property is always loosely matched, so add in extra + # characters to test that. + my $mod_prop = "$extra_chars$prop"; + + if ($prop eq 'gc' && $value =~ /l[_&]$/) { + # These two names are special in that they don't appear in the + # returned list because they are discouraged from use. Verify + # that they return the same list as a non-discouraged version. + my @LC = prop_value_aliases('gc', 'lc'); + my @l_ = prop_value_aliases($mod_prop, $mod_value); + is_deeply(\@l_, \@LC, "prop_value_aliases('$mod_prop', '$mod_value) returns the same list as prop_value_aliases('gc', 'lc')"); + } + else { + ok((grep { utf8::_loose_name(lc $_) eq utf8::_loose_name(lc $value) } + prop_value_aliases($mod_prop, $mod_value)), + "'$value' is listed as an alias for prop_value_aliases('$mod_prop', '$mod_value')"); + } + } +} + +undef %pva_tested; + +no warnings 'once'; # We use some values once from 'required' modules. + +use Unicode::UCD qw(prop_invlist prop_invmap MAX_CP); + +# There were some problems with caching interfering with prop_invlist() vs +# prop_invmap() on binary properties, and also between the 3 properties where +# Perl used the same 'To' name as another property (see utf8_heavy.pl). +# So, before testing all of prop_invlist(), +# 1) call prop_invmap() to try both orders of these name issues. This uses +# up two of the 3 properties; the third will be left so that invlist() +# on it gets called before invmap() +# 2) call prop_invmap() on a generic binary property, ahead of invlist(). +# This should test that the caching works in both directions. + +# These properties are not stable between Unicode versions, but the first few +# elements are; just look at the first element to see if are getting the +# distinction right. The general inversion map testing below will test the +# whole thing. +my $prop = "uc"; +my ($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop); +is($format, 'cl', "prop_invmap() format of '$prop' is 'cl'"); +is($missing, '<code point>', "prop_invmap() missing of '$prop' is '<code point>'"); +is($invlist_ref->[1], 0x61, "prop_invmap('$prop') list[1] is 0x61"); +is($invmap_ref->[1], 0x41, "prop_invmap('$prop') map[1] is 0x41"); + +$prop = "upper"; +($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop); +is($format, 's', "prop_invmap() format of '$prop' is 'cl'"); +is($missing, 'N', "prop_invmap() missing of '$prop' is '<code point>'"); +is($invlist_ref->[1], 0x41, "prop_invmap('$prop') list[1] is 0x41"); +is($invmap_ref->[1], 'Y', "prop_invmap('$prop') map[1] is 'Y'"); + +$prop = "lower"; +($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop); +is($format, 's', "prop_invmap() format of '$prop' is 'cl'"); +is($missing, 'N', "prop_invmap() missing of '$prop' is '<code point>'"); +is($invlist_ref->[1], 0x61, "prop_invmap('$prop') list[1] is 0x61"); +is($invmap_ref->[1], 'Y', "prop_invmap('$prop') map[1] is 'Y'"); + +$prop = "lc"; +($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop); +is($format, 'cl', "prop_invmap() format of '$prop' is 'cl'"); +is($missing, '<code point>', "prop_invmap() missing of '$prop' is '<code point>'"); +is($invlist_ref->[1], 0x41, "prop_invmap('$prop') list[1] is 0x41"); +is($invmap_ref->[1], 0x61, "prop_invmap('$prop') map[1] is 0x61"); + +# This property is stable and small, so can test all of it +$prop = "ASCII_Hex_Digit"; +($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop); +is($format, 's', "prop_invmap() format of '$prop' is 's'"); +is($missing, 'N', "prop_invmap() missing of '$prop' is 'N'"); +is_deeply($invlist_ref, [ 0x0000, 0x0030, 0x003A, 0x0041, + 0x0047, 0x0061, 0x0067, 0x110000 ], + "prop_invmap('$prop') code point list is correct"); +is_deeply($invmap_ref, [ 'N', 'Y', 'N', 'Y', 'N', 'Y', 'N', 'N' ] , + "prop_invmap('$prop') map list is correct"); + +is(prop_invlist("Unknown property"), undef, "prop_invlist(<Unknown property>) returns undef"); +is(prop_invlist(undef), undef, "prop_invlist(undef) returns undef"); +is(prop_invlist("Any"), 2, "prop_invlist('Any') returns the number of elements in scalar context"); +my @invlist = prop_invlist("Is_Any"); +is_deeply(\@invlist, [ 0, 0x110000 ], "prop_invlist works on 'Is_' prefixes"); +is(prop_invlist("Is_Is_Any"), undef, "prop_invlist('Is_Is_Any') returns <undef> since two is's"); + +use Storable qw(dclone); + +is(prop_invlist("InKana"), undef, "prop_invlist(<user-defined property returns undef>)"); + +# The way both the tests for invlist and invmap work is that they take the +# lists returned by the functions and construct from them what the original +# file should look like, which are then compared with the file. If they are +# identical, the test passes. What this tests isn't that the results are +# correct, but that invlist and invmap haven't introduced errors beyond what +# are there in the files. As a small hedge against that, test some +# prop_invlist() tables fully with the known correct result. We choose +# ASCII_Hex_Digit again, as it is stable. +@invlist = prop_invlist("AHex"); +is_deeply(\@invlist, [ 0x0030, 0x003A, 0x0041, + 0x0047, 0x0061, 0x0067 ], + "prop_invlist('AHex') is exactly the expected set of points"); +@invlist = prop_invlist("AHex=f"); +is_deeply(\@invlist, [ 0x0000, 0x0030, 0x003A, 0x0041, + 0x0047, 0x0061, 0x0067 ], + "prop_invlist('AHex=f') is exactly the expected set of points"); + +sub fail_with_diff ($$$$) { + # For use below to output better messages + my ($prop, $official, $constructed, $tested_function_name) = @_; + + is($constructed, $official, "$tested_function_name('$prop')"); + diag("Comment out lines " . (__LINE__ - 1) . " through " . (__LINE__ + 1) . " in '$0' on Un*x-like systems to see just the differences. Uses the 'diff' first in your \$PATH"); + return; + + fail("$tested_function_name('$prop')"); + + require File::Temp; + my $off = File::Temp->new(); + chomp $official; + print $off $official, "\n"; + close $off || die "Can't close official"; + + chomp $constructed; + my $gend = File::Temp->new(); + print $gend $constructed, "\n"; + close $gend || die "Can't close gend"; + + my $diff = File::Temp->new(); + system("diff $off $gend > $diff"); + + open my $fh, "<", $diff || die "Can't open $diff"; + my @diffs = <$fh>; + diag("In the diff output below '<' marks lines from the filesystem tables;\n'>' are from $tested_function_name()"); + diag(@diffs); +} + +my %tested_invlist; + +# Look at everything we think that mktables tells us exists, both loose and +# strict +foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of) +{ + foreach my $table (keys %$set_of_tables) { + + my $mod_table; + my ($prop_only, $value) = split "=", $table; + if (defined $value) { + + # If this is to be loose matched, add in characters to test that. + if ($set_of_tables == \%utf8::loose_to_file_of) { + $value = "$extra_chars$value"; + } + else { # Strict match + + # Verify that loose matching fails when only strict is called + # for. + next unless is(prop_invlist("$prop_only=$extra_chars$value"), undef, "prop_invlist('$prop_only=$extra_chars$value') returns undef since should be strictly matched"); + + # Strict matching does allow for underscores between digits. + # Test for that. + while ($value =~ s/(\d)(\d)/$1_$2/g) {} + } + + # The property portion in compound form specifications always + # matches loosely + $mod_table = "$extra_chars$prop_only = $value"; + } + else { # Single-form. + + # Like above, use looose if required, and insert underscores + # between digits if strict. + if ($set_of_tables == \%utf8::loose_to_file_of) { + $mod_table = "$extra_chars$table"; + } + else { + $mod_table = $table; + while ($mod_table =~ s/(\d)(\d)/$1_$2/g) {} + } + } + + my @tested = prop_invlist($mod_table); + if ($table =~ /^_/) { + is(@tested, 0, "prop_invlist('$mod_table') returns an empty list since is internal-only"); + next; + } + + # If we have already tested a property that uses the same file, this + # list should be identical to the one that was tested, and can bypass + # everything else. + my $file = $set_of_tables->{$table}; + if (exists $tested_invlist{$file}) { + is_deeply(\@tested, $tested_invlist{$file}, "prop_invlist('$mod_table') gave same results as its name synonym"); + next; + } + $tested_invlist{$file} = dclone \@tested; + + # A leading '!' in the file name means that it is to be inverted. + my $invert = $file =~ s/^!//; + my $official = do "unicore/lib/$file.pl"; + + # Get rid of any trailing space and comments in the file. + $official =~ s/\s*(#.*)?$//mg; + chomp $official; + + # If we are to test against an inverted file, it is easier to invert + # our array than the file. + # The file only is valid for Unicode code points, while the inversion + # list is valid for all possible code points. Therefore, we must test + # just the Unicode part against the file. Later we will test for + # the non-Unicode part. + + my $before_invert; # Saves the pre-inverted table. + if ($invert) { + $before_invert = dclone \@tested; + if (@tested && $tested[0] == 0) { + shift @tested; + } else { + unshift @tested, 0; + } + if (@tested && $tested[-1] == 0x110000) { + pop @tested; + } + else { + push @tested, 0x110000; + } + } + + # Now construct a string from the list that should match the file. + # The file gives ranges of code points with starting and ending values + # in hex, like this: + # 0041\t005A + # 0061\t007A + # 00AA + # Our list has even numbered elements start ranges that are in the + # list, and odd ones that aren't in the list. Therefore the odd + # numbered ones are one beyond the end of the previous range, but + # otherwise don't get reflected in the file. + my $tested = ""; + my $i = 0; + for (; $i < @tested - 1; $i += 2) { + my $start = $tested[$i]; + my $end = $tested[$i+1] - 1; + if ($start == $end) { + $tested .= sprintf("%04X\n", $start); + } + else { + $tested .= sprintf "%04X\t%04X\n", $start, $end; + } + } + + # As mentioned earlier, the disk files only go up through Unicode, + # whereas the prop_invlist() ones go as high as necessary. The + # comparison is only valid through max Unicode. + if ($i == @tested - 1 && $tested[$i] <= 0x10FFFF) { + $tested .= sprintf("%04X\t10FFFF\n", $tested[$i]); + } + chomp $tested; + if ($tested ne $official) { + fail_with_diff($mod_table, $official, $tested, "prop_invlist"); + next; + } + + # Here, it matched the table. Now need to check for if it is correct + # for beyond Unicode. First, calculate if is the default table or + # not. This is the same algorithm as used internally in + # prop_invlist(), so if it is wrong there, this test won't catch it. + my $prop = lc $table; + ($prop_only, $table) = split /\s*[:=]\s*/, $prop; + if (defined $table) { + + # May have optional prefixed 'is' + $prop = utf8::_loose_name($prop_only) =~ s/^is//r; + $prop = $utf8::loose_property_name_of{$prop}; + $prop .= "=" . utf8::_loose_name($table); + } + else { + $prop = utf8::_loose_name($prop); + } + my $is_default = exists $Unicode::UCD::loose_defaults{$prop}; + + @tested = @$before_invert if $invert; # Use the original + if (@tested % 2 == 0) { + + # If there are an even number of elements, the final one starts a + # range (going to infinity) of code points that are not in the + # list. + if ($is_default) { + fail("prop_invlist('$mod_table')"); + diag("default table doesn't goto infinity"); + use Data::Dumper; + diag Dumper \@tested; + next; + } + } + else { + # An odd number of elements means the final one starts a range + # (going to infinity of code points that are in the list. + if (! $is_default) { + fail("prop_invlist('$mod_table')"); + diag("non-default table needs to stop in the Unicode range"); + use Data::Dumper; + diag Dumper \@tested; + next; + } + } + + pass("prop_invlist('$mod_table')"); + } +} + +# Now test prop_invmap(). + +@list = prop_invmap("Unknown property"); +is (@list, 0, "prop_invmap(<Unknown property>) returns an empty list"); +@list = prop_invmap(undef); +is (@list, 0, "prop_invmap(undef) returns an empty list"); +ok (! eval "prop_invmap('gc')" && $@ ne "", + "prop_invmap('gc') dies in scalar context"); +@list = prop_invmap("_X_Begin"); +is (@list, 0, "prop_invmap(<internal property>) returns an empty list"); +@list = prop_invmap("InKana"); +is(@list, 0, "prop_invmap(<user-defined property returns undef>)"); +@list = prop_invmap("Perl_Decomposition_Mapping"), undef, +is(@list, 0, "prop_invmap('Perl_Decomposition_Mapping') returns <undef> since internal-Perl-only"); +@list = prop_invmap("Perl_Charnames"), undef, +is(@list, 0, "prop_invmap('Perl_Charnames') returns <undef> since internal-Perl-only"); +@list = prop_invmap("Is_Is_Any"); +is(@list, 0, "prop_invmap('Is_Is_Any') returns <undef> since two is's"); + +# The set of properties to test on has already been compiled into %props by +# the prop_aliases() tests. + +my %tested_invmaps; + +# Like prop_invlist(), prop_invmap() is tested by comparing the results +# returned by the function with the tables that mktables generates. Some of +# these tables are directly stored as files on disk, in either the unicore or +# unicore/To directories, and most should be listed in the mktables generated +# hash %utf8::loose_property_to_file_of, with a few additional ones that this +# handles specially. For these, the files are read in directly, massaged, and +# compared with what invmap() returns. The SPECIALS hash in some of these +# files overrides values in the main part of the file. +# +# The other properties are tested indirectly by generating all the possible +# inversion lists for the property, and seeing if those match the inversion +# lists returned by prop_invlist(), which has already been tested. + +PROPERTY: +foreach my $prop (keys %props) { + my $loose_prop = utf8::_loose_name(lc $prop); + my $suppressed = grep { $_ eq $loose_prop } + @Unicode::UCD::suppressed_properties; + + # Find the short and full names that this property goes by + my ($name, $full_name) = prop_aliases($prop); + if (! $name) { + if (! $suppressed) { + fail("prop_invmap('$prop')"); + diag("is unknown to prop_aliases(), and we need it in order to test prop_invmap"); + } + next PROPERTY; + } + + # Normalize the short name, as it is stored in the hashes under the + # normalized version. + $name = utf8::_loose_name(lc $name); + + # Add in the characters that are supposed to be ignored to test loose + # matching, which the tested function applies to all properties + my $mod_prop = "$extra_chars$prop"; + + my ($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($mod_prop); + my $return_ref = [ $invlist_ref, $invmap_ref, $format, $missing ]; + + # If have already tested this property under a different name, merely + # compare the return from now with the saved one from before. + if (exists $tested_invmaps{$name}) { + is_deeply($return_ref, $tested_invmaps{$name}, "prop_invmap('$mod_prop') gave same results as its synonym, '$name'"); + next PROPERTY; + } + $tested_invmaps{$name} = dclone $return_ref; + + # If prop_invmap() returned nothing, is ok iff is a property whose file is + # not generated. + if ($suppressed) { + if (defined $format) { + fail("prop_invmap('$mod_prop')"); + diag("did not return undef for suppressed property $prop"); + } + next PROPERTY; + } + elsif (!defined $format) { + fail("prop_invmap('$mod_prop')"); + diag("'$prop' is unknown to prop_invmap()"); + next PROPERTY; + } + + # The two parallel arrays must have the same number of elements. + if (@$invlist_ref != @$invmap_ref) { + fail("prop_invmap('$mod_prop')"); + diag("invlist has " + . scalar @$invlist_ref + . " while invmap has " + . scalar @$invmap_ref + . " elements"); + next PROPERTY; + } + + # The last element must be for the above-Unicode code points, and must be + # for the default value. + if ($invlist_ref->[-1] != 0x110000) { + fail("prop_invmap('$mod_prop')"); + diag("The last inversion list element is not 0x110000"); + next PROPERTY; + } + if ($invmap_ref->[-1] ne $missing) { + fail("prop_invmap('$mod_prop')"); + diag("The last inversion list element is '$invmap_ref->[-1]', and should be '$missing'"); + next PROPERTY; + } + + if ($name eq 'bmg') { # This one has an atypical $missing + if ($missing ne "") { + fail("prop_invmap('$mod_prop')"); + diag("The missings should be \"\"; got '$missing'"); + next PROPERTY; + } + } + elsif ($format =~ /^ [cd] /x) { + if ($missing ne "<code point>") { + fail("prop_invmap('$mod_prop')"); + diag("The missings should be '<code point>'; got '$missing'"); + next PROPERTY; + } + } + elsif ($missing =~ /[<>]/) { + fail("prop_invmap('$mod_prop')"); + diag("The missings should NOT be something with <...>'"); + next PROPERTY; + + # I don't want to hard code in what all the missings should be, so + # those don't get fully tested. + } + + # Certain properties don't have their own files, but must be constructed + # using proxies. + my $proxy_prop = $name; + if ($full_name eq 'Present_In') { + $proxy_prop = "age"; # The maps for these two props are identical + } + elsif ($full_name eq 'Simple_Case_Folding' + || $full_name =~ /Simple_ (.) .*? case_Mapping /x) + { + if ($full_name eq 'Simple_Case_Folding') { + $proxy_prop = 'cf'; + } + else { + # We captured the U, L, or T, leading to uc, lc, or tc. + $proxy_prop = lc $1 . "c"; + } + if ($format ne "c") { + fail("prop_invmap('$mod_prop')"); + diag("The format should be 'c'; got '$format'"); + next PROPERTY; + } + } + + my $base_file; + my $official; + + # Handle the properties that have full disk files for them (except the + # Name property which is structurally enough different that it is handled + # separately below.) + if ($name ne 'na' + && ($name eq 'blk' + || defined + ($base_file = $utf8::loose_property_to_file_of{$proxy_prop}) + || exists $utf8::loose_to_file_of{$proxy_prop} + || $name eq "dm")) + { + # In the above, blk is done unconditionally, as we need to test that + # the old-style block names are returned, even if mktables has + # generated a file for the new-style; the test for dm comes afterward, + # so that if a file has been generated for it explicitly, we use that + # file (which is valid, unlike blk) instead of the combo + # Decomposition.pl files. + my $file; + my $is_binary = 0; + if ($name eq 'blk') { + + # The blk property is special. The original file with old block + # names is retained, and the default is to not write out a + # new-name file. What we do is get the old names into a data + # structure, and from that create what the new file would look + # like. $base_file is needed to be defined, just to avoid a + # message below. + $base_file = "This is a dummy name"; + my $blocks_ref = charblocks(); + $official = ""; + for my $range (sort { $a->[0][0] <=> $b->[0][0] } + values %$blocks_ref) + { + # Translate the charblocks() data structure to what the file + # would like. + $official .= sprintf"%04X\t%04X\t%s\n", + $range->[0][0], + $range->[0][1], + $range->[0][2]; + } + } + else { + $base_file = "Decomposition" if $format eq 'd'; + + # Above leaves $base_file undefined only if it came from the hash + # below. This should happen only when it is a binary property + # (and are accessing via a single-form name, like 'In_Latin1'), + # and so it is stored in a different directory than the To ones. + # XXX Currently, the only cases where it is complemented are the + # ones that have no code points. And it works out for these that + # 1) complementing them, and then 2) adding or subtracting the + # initial 0 and final 110000 cancel each other out. But further + # work would be needed in the unlikely event that an inverted + # property comes along without these characteristics + if (!defined $base_file) { + $base_file = $utf8::loose_to_file_of{$proxy_prop}; + $is_binary = ($base_file =~ s/^!//) ? -1 : 1; + $base_file = "lib/$base_file"; + } + + # Read in the file + $file = "unicore/$base_file.pl"; + $official = do $file; + + # Get rid of any trailing space and comments in the file. + $official =~ s/\s*(#.*)?$//mg; + + # Decomposition.pl also has the <compatible> types in it, which + # should be removed. + $official =~ s/<.*?> //mg if $format eq 'd'; + } + chomp $official; + + # If there are any special elements, get a reference to them. + my $specials_ref = $utf8::file_to_swash_name{$base_file}; + if ($specials_ref) { + $specials_ref = $utf8::SwashInfo{$specials_ref}{'specials_name'}; + if ($specials_ref) { + + # Convert from the name to the actual reference. + no strict 'refs'; + $specials_ref = \%{$specials_ref}; + } + } + + # Certain of the proxy properties have to be adjusted to match the + # real ones. + if (($proxy_prop ne $name && $full_name =~ 'Mapping') + || $full_name eq 'Case_Folding') + { + + # Here we have either + # 1) Case_Folding; or + # 2) a proxy that is a full mapping, which means that what the + # real property is is the equivalent simple mapping. + # In both cases, the file will have a standard list containing + # simple mappings (to a single code point), and a specials hash + # which contains all the mappings that are to multiple code + # points. First, extract a list containing all the file's simple + # mappings. + my @list; + for (split "\n", $official) { + my ($start, $end, $value) = / ^ (.+?) \t (.*?) \t (.+?) + \s* ( \# .* )? + $ /x; + $end = $start if $end eq ""; + if ($end ne $start) { + fail("prop_invmap('$mod_prop')"); + diag("This test is expecting only single code point ranges in $file.pl"); + next PROPERTY; + } + push @list, [ hex $start, $value ]; + } + + # For Case_Folding, the file contains all the simple mappings, + # including the ones that are overridden by the specials. These + # need to be removed as the list is for just the full ones. For + # the other files, the proxy is missing the simple mappings that + # are overridden by the specials, so we need to add them. + + # For the missing simples, we get the correct values by calling + # charinfo(). Set up which element of the hash returned by + # charinfo to look at + my $charinfo_element; + if ($full_name =~ / ^ Simple_ (Lower | Upper | Title) case_Mapping/x) + { + $charinfo_element = lc $1; # e.g. Upper is referred to by the + # key 'upper' in the charinfo() + # returned hash + } + + # Go through any special mappings one by one. They are packed. + my $i = 0; + foreach my $utf8_cp (sort keys %$specials_ref) { + my $cp = unpack("C0U", $utf8_cp); + + # Get what the simple value for this should be; either nothing + # for Case_Folding, or what charinfo returns for the others. + my $simple = ($full_name eq "Case_Folding") + ? "" + : charinfo($cp)->{$charinfo_element}; + + # And create an entry to add to the list, if appropriate + my $replacement; + $replacement = [ $cp, $simple ] if $simple ne ""; + + # Find the spot in the @list of simple mappings that this + # special applies to; uses a linear search. + while ($i < @list -1 ) { + last if $cp <= $list[$i][0]; + $i++; + } + + #note $i-1 . ": " . join " => ", @{$list[$i-1]}; + #note $i-0 . ": " . join " => ", @{$list[$i-0]}; + #note $i+1 . ": " . join " => ", @{$list[$i+1]}; + + if (! defined $replacement) { + + # Here, are to remove any existing entry for this code + # point. + next if $cp != $list[$i][0]; + splice @list, $i, 1; + } + elsif ($cp == $list[$i][0]) { + + # Here, are to add something, but there is an existing + # entry, so this just replaces it. + $list[$i] = $replacement; + } + else { + + # Here, are to add something, and there isn't an existing + # entry. + splice @list, $i, 0, $replacement; + } + + #note __LINE__ . ": $cp"; + #note $i-1 . ": " . join " => ", @{$list[$i-1]}; + #note $i-0 . ": " . join " => ", @{$list[$i-0]}; + #note $i+1 . ": " . join " => ", @{$list[$i+1]}; + } + + # Here, have gone through all the specials, modifying @list as + # needed. Turn it back into what the file should look like. + $official = join "\n", map { sprintf "%04X\t\t%s", @$_ } @list; + + # And, no longer need the specials for the simple mappings, as are + # all incorporated into $official + undef $specials_ref if $full_name ne 'Case_Folding'; + } + elsif ($full_name eq 'Simple_Case_Folding') { + + # This property has everything in the regular array, and the + # specials are superfluous. + undef $specials_ref if $full_name ne 'Case_Folding'; + } + + # Here, in $official, we have what the file looks like, or should like + # if we've had to fix it up. Now take the invmap() output and reverse + # engineer from that what the file should look like. Each iteration + # appends the next line to the running string. + my $tested_map = ""; + + # Create a copy of the file's specials hash. (It has been undef'd if + # we know it isn't relevant to this property, so if it exists, it's an + # error or is relevant). As we go along, we delete from that copy. + # If a delete fails, or something is left over after we are done, + # it's an error + my %specials = %$specials_ref if $specials_ref; + + # The extra -1 is because the final element has been tested above to + # be for anything above Unicode. The file doesn't go that high. + for my $i (0 .. @$invlist_ref - 1 - 1) { + + # If the map element is a reference, have to stringify it (but + # don't do so if the format doesn't allow references, so that an + # improper format will generate an error. + if (ref $invmap_ref->[$i] + && ($format eq 'd' || $format =~ /^ . l /x)) + { + # The stringification depends on the format. At the time of + # this writing, all 'sl' formats are space separated. + if ($format eq 'sl') { + $invmap_ref->[$i] = join " ", @{$invmap_ref->[$i]}; + } + elsif ($format =~ / ^ cl e? $/x) { + + # For a cl property, the stringified result should be in + # the specials hash. The key is the packed code point, + # and the value is the packed map. + my $value; + if (! defined ($value = delete $specials{pack("C0U", $invlist_ref->[$i]) })) { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "There was no specials element for %04X", $invlist_ref->[$i]); + next PROPERTY; + } + my $packed = pack "U*", @{$invmap_ref->[$i]}; + if ($value ne $packed) { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "For %04X, expected the mapping to be '$packed', but got '$value'"); + next PROPERTY; + } + + # As this doesn't get tested when we later compare with + # the actual file, it could be out of order and we + # wouldn't know it. + if (($i > 0 && $invlist_ref->[$i] <= $invlist_ref->[$i-1]) + || $invlist_ref->[$i] >= $invlist_ref->[$i+1]) + { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]); + next PROPERTY; + } + next; + } + elsif ($format eq 'd') { + + # The decomposition mapping file has the code points as + # a string of space-separated hex constants. + $invmap_ref->[$i] = join " ", map { sprintf "%04X", $_ } @{$invmap_ref->[$i]}; + } + else { + fail("prop_invmap('$mod_prop')"); + diag("Can't handle format '$format'"); + next PROPERTY; + } + } + elsif ($format eq 'cle' && $invmap_ref->[$i] eq "") { + + # cle properties have maps to the empty string that also + # should be in the specials hash, with the key the packed code + # point, and the map just empty. + my $value; + if (! defined ($value = delete $specials{pack("C0U", $invlist_ref->[$i]) })) { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "There was no specials element for %04X", $invlist_ref->[$i]); + next PROPERTY; + } + if ($value ne "") { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "For %04X, expected the mapping to be \"\", but got '$value'", $invlist_ref->[$i]); + next PROPERTY; + } + + # As this doesn't get tested when we later compare with + # the actual file, it could be out of order and we + # wouldn't know it. + if (($i > 0 && $invlist_ref->[$i] <= $invlist_ref->[$i-1]) + || $invlist_ref->[$i] >= $invlist_ref->[$i+1]) + { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]); + next PROPERTY; + } + next; + } + elsif ($is_binary) { # These binary files don't have an explicit Y + $invmap_ref->[$i] =~ s/Y//; + } + + # The file doesn't include entries that map to $missing, so don't + # include it in the built-up string. But make sure that it is in + # the correct order in the input. + if ($invmap_ref->[$i] eq $missing) { + if (($i > 0 && $invlist_ref->[$i] <= $invlist_ref->[$i-1]) + || $invlist_ref->[$i] >= $invlist_ref->[$i+1]) + { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]); + next PROPERTY; + } + next; + } + + # 'c'-type and 'd' properties have the mapping expressed in hex in + # the file + if ($format =~ /^ [cd] /x) { + + # The d property has one entry which isn't in the file. + # Ignore it, but make sure it is in order. + if ($format eq 'd' + && $invmap_ref->[$i] eq '<hangul syllable>' + && $invlist_ref->[$i] == 0xAC00) + { + if (($i > 0 && $invlist_ref->[$i] <= $invlist_ref->[$i-1]) + || $invlist_ref->[$i] >= $invlist_ref->[$i+1]) + { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]); + next PROPERTY; + } + next; + } + $invmap_ref->[$i] = sprintf("%04X", $invmap_ref->[$i]) + if $invmap_ref->[$i] =~ / ^ [A-Fa-f0-9]+ $/x; + } + + # Finally have figured out what the map column in the file should + # be. Append the line to the running string. + my $start = $invlist_ref->[$i]; + my $end = $invlist_ref->[$i+1] - 1; + $end = ($start == $end) ? "" : sprintf("%04X", $end); + if ($invmap_ref->[$i] ne "") { + $tested_map .= sprintf "%04X\t%s\t%s\n", $start, $end, $invmap_ref->[$i]; + } + elsif ($end ne "") { + $tested_map .= sprintf "%04X\t%s\n", $start, $end; + } + else { + $tested_map .= sprintf "%04X\n", $start; + } + } # End of looping over all elements. + + # Here are done with generating what the file should look like + + chomp $tested_map; + + # And compare. + if ($tested_map ne $official) { + fail_with_diff($mod_prop, $official, $tested_map, "prop_invmap"); + next PROPERTY; + } + + # There shouldn't be any specials unaccounted for. + if (keys %specials) { + fail("prop_invmap('$mod_prop')"); + diag("Unexpected specials: " . join ", ", keys %specials); + next PROPERTY; + } + } + elsif ($format eq 'n') { + + # Handle the Name property similar to the above. But the file is + # sufficiently different that it is more convenient to make a special + # case for it. + + if ($missing ne "") { + fail("prop_invmap('$mod_prop')"); + diag("The missings should be \"\"; got \"missing\""); + next PROPERTY; + } + + $official = do "unicore/Name.pl"; + + # Get rid of the named sequences portion of the file. These don't + # have a tab before the first blank on a line. + $official =~ s/ ^ [^\t]+ \ .*? \n //xmg; + + # And get rid of the controls. These are named in the file, but + # shouldn't be in the property. + $official =~ s/ 00000 \t .* 0001F .*? \n//xs; + $official =~ s/ 0007F \t .* 0009F .*? \n//xs; + + # This is slow; it gets rid of the aliases. We look for lines that + # are for the same code point as the previous line. The previous line + # will be a name_alias; and the current line will be the name. Get + # rid of the name_alias line. This won't work if there are multiple + # aliases for a given name. + my @temp_names = split "\n", $official; + my $previous_cp = ""; + for (my $i = 0; $i < @temp_names - 1; $i++) { + $temp_names[$i] =~ /^ (.*)? \t /x; + my $current_cp = $1; + if ($current_cp eq $previous_cp) { + splice @temp_names, $i - 1, 1; + redo; + } + else { + $previous_cp = $current_cp; + } + } + $official = join "\n", @temp_names; + undef @temp_names; + chomp $official; + + # Here have adjusted the file. We also have to adjust the returned + # inversion map by checking and deleting all the lines in it that + # won't be in the file. These are the lines that have generated + # things, like <hangul syllable>. + my $tested_map = ""; # Current running string + my @code_point_in_names = + @Unicode::UCD::code_points_ending_in_code_point; + + for my $i (0 .. @$invlist_ref - 1 - 1) { + my $start = $invlist_ref->[$i]; + my $end = $invlist_ref->[$i+1] - 1; + if ($invmap_ref->[$i] eq $missing) { + if (($i > 0 && $invlist_ref->[$i] <= $invlist_ref->[$i-1]) + || $invlist_ref->[$i] >= $invlist_ref->[$i+1]) + { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]); + next PROPERTY; + } + next; + } + if ($invmap_ref->[$i] =~ / (.*) ( < .*? > )/x) { + my $name = $1; + my $type = $2; + if (($i > 0 && $invlist_ref->[$i] <= $invlist_ref->[$i-1]) + || $invlist_ref->[$i] >= $invlist_ref->[$i+1]) + { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]); + next PROPERTY; + } + if ($type eq "<hangul syllable>") { + if ($name ne "") { + fail("prop_invmap('$mod_prop')"); + diag("Unexpected text in $invmap_ref->[$i]"); + next PROPERTY; + } + if ($start != 0xAC00) { + fail("prop_invmap('$mod_prop')"); + diag(sprintf("<hangul syllables> should begin at 0xAC00, got %04X", $start)); + next PROPERTY; + } + if ($end != $start + 11172 - 1) { + fail("prop_invmap('$mod_prop')"); + diag(sprintf("<hangul syllables> should end at %04X, got %04X", $start + 11172 -1, $end)); + next PROPERTY; + } + } + elsif ($type ne "<code point>") { + fail("prop_invmap('$mod_prop')"); + diag("Unexpected text '$type' in $invmap_ref->[$i]"); + next PROPERTY; + } + else { + + # Look through the array of names that end in code points, + # and look for this start and end. If not found is an + # error. If found, delete it, and at the end, make sure + # have deleted everything. + for my $i (0 .. @code_point_in_names - 1) { + my $hash = $code_point_in_names[$i]; + if ($hash->{'low'} == $start + && $hash->{'high'} == $end + && "$hash->{'name'}-" eq $name) + { + splice @code_point_in_names, $i, 1; + last; + } + else { + fail("prop_invmap('$mod_prop')"); + diag("Unexpected code-point-in-name line '$invmap_ref->[$i]'"); + next PROPERTY; + } + } + } + + next; + } + + # Have adjusted the map, as needed. Append to running string. + $end = ($start == $end) ? "" : sprintf("%05X", $end); + $tested_map .= sprintf "%05X\t%s\n", $start, $invmap_ref->[$i]; + } + + # Finished creating the string from the inversion map. Can compare + # with what the file is. + chomp $tested_map; + if ($tested_map ne $official) { + fail_with_diff($mod_prop, $official, $tested_map, "prop_invmap"); + next PROPERTY; + } + if (@code_point_in_names) { + fail("prop_invmap('$mod_prop')"); + use Data::Dumper; + diag("Missing code-point-in-name line(s)" . Dumper \@code_point_in_names); + next PROPERTY; + } + } + elsif ($format eq 's' || $format eq 'r') { + + # Here the map is not more or less directly from a file stored on + # disk. We try a different tack. These should all be properties that + # have just a few possible values (most of them are binary). We go + # through the map list, sorting each range into buckets, one for each + # map value. Thus for binary properties there will be a bucket for Y + # and one for N. The buckets are inversion lists. We compare each + # constructed inversion list with what we would get for it using + # prop_invlist(), which has already been tested. If they all match, + # the whole map must have matched. + my %maps; + my $previous_map; + + # (The extra -1 is to not look at the final element in the loop, which + # we know is the one that starts just beyond Unicode and goes to + # infinity.) + for my $i (0 .. @$invlist_ref - 1 - 1) { + my $range_start = $invlist_ref->[$i]; + + # Because we are sorting into buckets, things could be + # out-of-order here, and still be in the correct order in the + # bucket, and hence wouldn't show up as an error; so have to + # check. + if (($i > 0 && $range_start <= $invlist_ref->[$i-1]) + || $range_start >= $invlist_ref->[$i+1]) + { + fail("prop_invmap('$mod_prop')"); + diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]); + next PROPERTY; + } + + # This new range closes out the range started in the previous + # iteration. + push @{$maps{$previous_map}}, $range_start if defined $previous_map; + + # And starts a range which will be closed in the next iteration. + $previous_map = $invmap_ref->[$i]; + push @{$maps{$previous_map}}, $range_start; + } + + # The range we just started hasn't been closed, and we didn't look at + # the final element of the loop. If that range is for the default + # value, it shouldn't be closed, as it is to extend to infinity. But + # otherwise, it should end at the final Unicode code point, and the + # list that maps to the default value should have another element that + # does go to infinity for every above Unicode code point. + + if (@$invlist_ref > 1) { + my $penultimate_map = $invmap_ref->[-2]; + if ($penultimate_map ne $missing) { + + # The -1th element contains the first non-Unicode code point. + push @{$maps{$penultimate_map}}, $invlist_ref->[-1]; + push @{$maps{$missing}}, $invlist_ref->[-1]; + } + } + + # Here, we have the buckets (inversion lists) all constructed. Go + # through each and verify that matches what prop_invlist() returns. + # We could use is_deeply() for the comparison, but would get multiple + # messages for each $prop. + foreach my $map (keys %maps) { + my @off_invlist = prop_invlist("$prop = $map"); + my $min = (@off_invlist >= @{$maps{$map}}) + ? @off_invlist + : @{$maps{$map}}; + for my $i (0 .. $min- 1) { + if ($i > @off_invlist - 1) { + fail("prop_invmap('$mod_prop')"); + diag("There is no element [$i] for $prop=$map from prop_invlist(), while [$i] in the implicit one constructed from prop_invmap() is '$maps{$map}[$i]'"); + next PROPERTY; + } + elsif ($i > @{$maps{$map}} - 1) { + fail("prop_invmap('$mod_prop')"); + diag("There is no element [$i] from the implicit $prop=$map constructed from prop_invmap(), while [$i] in the one from prop_invlist() is '$off_invlist[$i]'"); + next PROPERTY; + } + elsif ($maps{$map}[$i] ne $off_invlist[$i]) { + fail("prop_invmap('$mod_prop')"); + diag("Element [$i] of the implicit $prop=$map constructed from prop_invmap() is '$maps{$map}[$i]', and the one from prop_invlist() is '$off_invlist[$i]'"); + next PROPERTY; + } + } + } + } + else { # Don't know this property nor format. + + fail("prop_invmap('$mod_prop')"); + diag("Unknown format '$format'"); + } + + pass("prop_invmap('$mod_prop')"); +} + +done_testing(); diff --git a/lib/charnames.pm b/lib/charnames.pm index f3894f1870..d88ede0e59 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -2,7 +2,8 @@ package charnames; use strict; use warnings; use File::Spec; -our $VERSION = '1.23'; +our $VERSION = '1.24'; +use unicore::Name; # mktables-generated algorithmically-defined names use bytes (); # for $bytes::hint_bits @@ -12,9 +13,9 @@ use bytes (); # for $bytes::hint_bits # lib/unicore/Name.pl which is read in as a large string (almost 3/4 Mb in # Unicode 6.0). Each code point/name combination is separated by a \n in the # string. (Some of the CJK and the Hangul syllable names are determined -# instead algorithmically via subroutines also stored in Name.pl). Because of -# the large size of this table, it isn't converted into hashes for faster -# lookup. +# instead algorithmically via subroutines stored instead in +# lib/unicore/Name.pm). Because of the large size of this table, it isn't +# converted into hashes for faster lookup. # # But, user defined aliases are stored in their own hashes, as are Perl # extensions to the official names. These are checked first before looking at diff --git a/lib/feature.pm b/lib/feature.pm index 78cb8fcd28..ce73e2df96 100644 --- a/lib/feature.pm +++ b/lib/feature.pm @@ -1,12 +1,14 @@ package feature; -our $VERSION = '1.22'; +our $VERSION = '1.23'; # (feature name) => (internal name, used in %^H) my %feature = ( say => 'feature_say', state => 'feature_state', switch => 'feature_switch', + evalbytes => 'feature_evalbytes', + unicode_eval => 'feature_unieval', unicode_strings => 'feature_unicode', ); @@ -17,15 +19,18 @@ our $hint_uni8bit = 0x00000800; # NB. the latest bundle must be loaded by the -E switch (see toke.c) -my %feature_bundle = ( +our %feature_bundle = ( "5.10" => [qw(say state switch)], "5.11" => [qw(say state switch unicode_strings)], - "5.12" => [qw(say state switch unicode_strings)], - "5.13" => [qw(say state switch unicode_strings)], - "5.14" => [qw(say state switch unicode_strings)], - "5.15" => [qw(say state switch unicode_strings)], + "5.15" => [qw(say state switch unicode_strings unicode_eval + evalbytes)], ); +# Each of these is the same as the previous bundle +for(12...14, 16) { + $feature_bundle{"5.$_"} = $feature_bundle{"5.".($_-1)} +} + # special case $feature_bundle{"5.9.5"} = $feature_bundle{"5.10"}; @@ -126,6 +131,53 @@ C<use feature 'unicode_strings'> subpragma is B<strongly> recommended. This subpragma is available starting with Perl 5.11.3, but was not fully implemented until 5.13.8. +=head2 the 'unicode_eval' and 'evalbytes' features + +Under the C<unicode_eval> feature, Perl's C<eval> function, when passed a +string, will evaluate it as a string of characters, ignoring any +C<use utf8> declarations. C<use utf8> exists to declare the encoding of +the script, which only makes sense for a stream of bytes, not a string of +characters. Source filters are forbidden, as they also really only make +sense on strings of bytes. Any attempt to activate a source filter will +result in an error. + +The C<evalbytes> feature enables the C<evalbytes> keyword, which evaluates +the argument passed to it as a string of bytes. It dies if the string +contains any characters outside the 8-bit range. Source filters work +within C<evalbytes>: they apply to the contents of the string being +evaluated. + +Together, these two features are intended to replace the historical C<eval> +function, which has (at least) two bugs in it, that cannot easily be fixed +without breaking existing programs: + +=over + +=item * + +C<eval> behaves differently depending on the internal encoding of the +string, sometimes treating its argument as a string of bytes, and sometimes +as a string of characters. + +=item * + +Source filters activated within C<eval> leak out into whichever I<file> +scope is currently being compiled. To give an example with the CPAN module +L<Semi::Semicolons>: + + BEGIN { eval "use Semi::Semicolons; # not filtered here " } + # filtered here! + +C<evalbytes> fixes that to work the way one would expect: + + use feature "evalbytes"; + BEGIN { evalbytes "use Semi::Semicolons; # filtered " } + # not filtered + +=back + +These two features are available starting with Perl 5.16. + =head1 FEATURE BUNDLES It's possible to load a whole slew of features in one go, using diff --git a/lib/strict.pm b/lib/strict.pm index 6e193ac623..c1544f5c4b 100644 --- a/lib/strict.pm +++ b/lib/strict.pm @@ -1,6 +1,6 @@ package strict; -$strict::VERSION = "1.05"; +$strict::VERSION = "1.06"; # Verify that we're called correctly so that strictures will work. unless ( __FILE__ =~ /(^|[\/\\])\Q${\__PACKAGE__}\E\.pmc?$/ ) { @@ -91,12 +91,12 @@ is allowed so that C<goto &$AUTOLOAD> would not break under stricture. =item C<strict vars> -This generates a compile-time error if you access a variable that wasn't -declared via C<our>, C<use vars>, or C<my()>, and wasn't fully qualified. -Because this is to avoid -variable suicide problems and subtle dynamic scoping issues, a merely -local() variable isn't good enough. See L<perlfunc/my> and -L<perlfunc/local>. +This generates a compile-time error if you access a variable that was +neither explicitly declared (using any of C<my>, C<our>, C<state>, or C<use +vars>) nor fully qualified. (Because this is to avoid variable suicide +problems and subtle dynamic scoping issues, a merely C<local> variable isn't +good enough.) See L<perlfunc/my>, L<perlfunc/our>, L<perlfunc/state>, +L<perlfunc/local>, and L<vars>. use strict 'vars'; $X::foo = 1; # ok, fully qualified diff --git a/lib/unicore/Makefile b/lib/unicore/Makefile index 17072ed299..aea8f7846f 100644 --- a/lib/unicore/Makefile +++ b/lib/unicore/Makefile @@ -2,5 +2,5 @@ all: ../../miniperl -I../../lib ./mktables -P ../../pod -maketest -makelist -p clean: - rm -fr *.pl To lib + rm -fr *.pl *.pm To lib rm -f ../../pod/perluniprops.pod mktables.lst diff --git a/lib/unicore/README.perl b/lib/unicore/README.perl index 6656daf6eb..1a94d031bb 100644 --- a/lib/unicore/README.perl +++ b/lib/unicore/README.perl @@ -110,6 +110,10 @@ Unicode release number) for perluniprops.pod Module::CoreList should be changed to include the new release +Also, you should regen l1_char_class_tab.h, by + +perl regen/mk_L_charclass.pl + Finally: p4 submit diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 645b76a628..e6e2f81430 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -22,7 +22,6 @@ BEGIN { # Get the time the script started running; do it at compilation to $start_time= time; } - require 5.010_001; use strict; use warnings; @@ -32,6 +31,7 @@ use File::Find; use File::Path; use File::Spec; use Text::Tabs; +use re "/aa"; sub DEBUG () { 0 } # Set to 0 for production; 1 for development my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/; @@ -162,12 +162,18 @@ my $map_directory = 'To'; # Where map files go. # Name have a different value for every named code point. Those will not, # unless the controlling lists are changed, have their match tables written # out. But all the ones which can be used in regular expression \p{} and \P{} -# constructs will. Generally a property will have either its map table or its -# match tables written but not both. Again, what gets written is controlled -# by lists which can easily be changed. Properties have a 'Type', like -# binary, or string, or enum depending on how many match tables there are and -# the content of the maps. This 'Type' is different than a range 'Type', so -# don't get confused by the two concepts having the same name. +# constructs will. Prior to 5.14, generally a property would have either its +# map table or its match tables written but not both. Again, what gets +# written is controlled by lists which can easily be changed. Starting in +# 5.14, advantage was taken of this, and all the map tables needed to +# reconstruct the Unicode db are now written out, while suppressing the +# Unicode .txt files that contain the data. Our tables are much more compact +# than the .txt files, so a significant space savings was achieved. + +# Properties have a 'Type', like binary, or string, or enum depending on how +# many match tables there are and the content of the maps. This 'Type' is +# different than a range 'Type', so don't get confused by the two concepts +# having the same name. # # For information about the Unicode properties, see Unicode's UAX44 document: @@ -176,17 +182,16 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # As stated earlier, this program will work on any release of Unicode so far. # Most obvious problems in earlier data have NOT been corrected except when # necessary to make Perl or this program work reasonably. For example, no -# folding information was given in early releases, so this program uses the -# substitute of lower case, just so that a regular expression with the /i -# option will do something that actually gives the right results in many -# cases. There are also a couple other corrections for version 1.1.5, -# commented at the point they are made. As an example of corrections that -# weren't made (but could be) is this statement from DerivedAge.txt: "The -# supplementary private use code points and the non-character code points were -# assigned in version 2.0, but not specifically listed in the UCD until -# versions 3.0 and 3.1 respectively." (To be precise it was 3.0.1 not 3.0.0) -# More information on Unicode version glitches is further down in these -# introductory comments. +# folding information was given in early releases, so this program substitutes +# lower case instead, just so that a regular expression with the /i option +# will do something that actually gives the right results in many cases. +# There are also a couple other corrections for version 1.1.5, commented at +# the point they are made. As an example of corrections that weren't made +# (but could be) is this statement from DerivedAge.txt: "The supplementary +# private use code points and the non-character code points were assigned in +# version 2.0, but not specifically listed in the UCD until versions 3.0 and +# 3.1 respectively." (To be precise it was 3.0.1 not 3.0.0) More information +# on Unicode version glitches is further down in these introductory comments. # # This program works on all non-provisional properties as of 6.0, though the # files for some are suppressed from apparent lack of demand for them. You @@ -290,18 +295,6 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # warn about any that it doesn't know how to handle (the -q option suppresses # the warning). # -# Why have files written out for binary 'N' matches? -# For binary properties, if you know the mapping for either Y or N; the -# other is trivial to construct, so could be done at Perl run-time by just -# complementing the result, instead of having a file for it. That is, if -# someone types in \p{foo: N}, Perl could translate that to \P{foo: Y} and -# not need a file. The problem is communicating to Perl that a given -# property is binary. Perl can't figure it out from looking at the N (or -# No), as some non-binary properties have these as property values. So -# rather than inventing a way to communicate this info back to the core, -# which would have required changes there as well, it was simpler just to -# add the extra tables. -# # Why is there more than one type of range? # This simplified things. There are some very specialized code points that # have to be handled specially for output, such as Hangul syllable names. @@ -322,14 +315,6 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/'; # can't just take the intersection of two map tables, for example, as that # is nonsensical. # -# There are no match tables generated for matches of the null string. These -# would look like qr/\p{JSN=}/ currently without modifying the regex code. -# Perhaps something like them could be added if necessary. The JSN does have -# a real code point U+110B that maps to the null string, but it is a -# contributory property, and therefore not output by default. And it's easily -# handled so far by making the null string the default where it is a -# possibility. -# # DEBUGGING # # This program is written so it will run under miniperl. Occasionally changes @@ -516,7 +501,9 @@ my $MAX_LINE_WIDTH = 78; # non_skip => 1, # to the constructor for those files you want processed when you set this. # Files with a first version number of 0 are special: they are always -# processed regardless of the state of this flag. +# processed regardless of the state of this flag. Generally, Jamo.txt and +# UnicodeData.txt must not be skipped if you want this program to not die +# before normal completion. my $debug_skip = 0; # Set to 1 to enable tracing. @@ -590,16 +577,16 @@ our $to_trace = 0; # This is for a rarely used development feature that allows you to compare two # versions of the Unicode standard without having to deal with changes caused -# by the code points introduced in the later version. Change the 0 to a SINGLE -# dotted Unicode release number (e.g. 2.1). Only code points introduced in -# that release and earlier will be used; later ones are thrown away. You use -# the version number of the earliest one you want to compare; then run this -# program on directory structures containing each release, and compare the -# outputs. These outputs will therefore include only the code points common -# to both releases, and you can see the changes caused just by the underlying -# release semantic changes. For versions earlier than 3.2, you must copy a -# version of DAge.txt into the directory. -my $string_compare_versions = DEBUG && 0; # e.g., v2.1; +# by the code points introduced in the later version. Change the 0 to a +# string containing a SINGLE dotted Unicode release number (e.g. "2.1"). Only +# code points introduced in that release and earlier will be used; later ones +# are thrown away. You use the version number of the earliest one you want to +# compare; then run this program on directory structures containing each +# release, and compare the outputs. These outputs will therefore include only +# the code points common to both releases, and you can see the changes caused +# just by the underlying release semantic changes. For versions earlier than +# 3.2, you must copy a version of DAge.txt into the directory. +my $string_compare_versions = DEBUG && 0; # e.g., "2.1"; my $compare_versions = DEBUG && $string_compare_versions && pack "C*", split /\./, $string_compare_versions; @@ -851,20 +838,25 @@ my $INTERNAL_MAP = 2; # for any code point is available in a more compact form. my %global_to_output_map = ( # Needed by UCD.pm, but don't want to publicize that it exists, so won't - # get stuck supporting it if things change. Sinc it is a STRING property, - # it normally would be listed in the pod, but INTERNAL_MAP suppresses - # that. + # get stuck supporting it if things change. Since it is a STRING + # property, it normally would be listed in the pod, but INTERNAL_MAP + # suppresses that. Unicode_1_Name => $INTERNAL_MAP, Present_In => 0, # Suppress, as easily computed from Age - Canonical_Combining_Class => 0, # Duplicate of CombiningClass.pl Block => 0, # Suppress, as Blocks.txt is retained. + + # Suppress, as mapping can be found instead from the + # Perl_Decomposition_Mapping file + Decomposition_Type => 0, ); # Properties that this program ignores. -my @unimplemented_properties = ( -'Unicode_Radical_Stroke' # Remove if changing to handle this one. -); +my @unimplemented_properties; + +# With this release, it is automatically handled if the Unihan db is +# downloaded +push @unimplemented_properties, 'Unicode_Radical_Stroke' if $v_version le v5.2.0; # There are several types of obsolete properties defined by Unicode. These # must be hand-edited for every new Unicode release. @@ -892,6 +884,10 @@ my %why_obsolete; # Documentation only 'Other_Lowercase' => $contributory, 'Other_Math' => $contributory, 'Other_Uppercase' => $contributory, + 'Expands_On_NFC' => $why_no_expand, + 'Expands_On_NFD' => $why_no_expand, + 'Expands_On_NFKC' => $why_no_expand, + 'Expands_On_NFKD' => $why_no_expand, ); %why_suppressed = ( @@ -899,28 +895,45 @@ my %why_obsolete; # Documentation only # contains the same information, but without the algorithmically # determinable Hangul syllables'. This file is not published, so it's # existence is not noted in the comment. - 'Decomposition_Mapping' => 'Accessible via Unicode::Normalize', + 'Decomposition_Mapping' => 'Accessible via Unicode::Normalize or Unicode::UCD::prop_invmap()', - 'ISO_Comment' => 'Apparently no demand for it, but can access it through Unicode::UCD::charinfo. Obsoleted, and code points for it removed in Unicode 5.2', + # Don't suppress ISO_Comment, as otherwise special handling is needed + # to differentiate between it and gc=c, which can be written as 'isc', + # which is the same characters as ISO_Comment's short name. - 'Simple_Case_Folding' => "$simple. Can access this through Unicode::UCD::casefold", - 'Simple_Lowercase_Mapping' => "$simple. Can access this through Unicode::UCD::charinfo", - 'Simple_Titlecase_Mapping' => "$simple. Can access this through Unicode::UCD::charinfo", - 'Simple_Uppercase_Mapping' => "$simple. Can access this through Unicode::UCD::charinfo", + 'Name' => "Accessible via 'use charnames;' or Unicode::UCD::prop_invmap()", - 'Name' => "Accessible via 'use charnames;'", - 'Name_Alias' => "Accessible via 'use charnames;'", + 'Simple_Case_Folding' => "$simple. Can access this through Unicode::UCD::casefold or Unicode::UCD::prop_invmap()", + 'Simple_Lowercase_Mapping' => "$simple. Can access this through Unicode::UCD::charinfo or Unicode::UCD::prop_invmap()", + 'Simple_Titlecase_Mapping' => "$simple. Can access this through Unicode::UCD::charinfo or Unicode::UCD::prop_invmap()", + 'Simple_Uppercase_Mapping' => "$simple. Can access this through Unicode::UCD::charinfo or Unicode::UCD::prop_invmap()", FC_NFKC_Closure => 'Supplanted in usage by NFKC_Casefold; otherwise not useful', - Expands_On_NFC => $why_no_expand, - Expands_On_NFD => $why_no_expand, - Expands_On_NFKC => $why_no_expand, - Expands_On_NFKD => $why_no_expand, ); - # The following are suppressed because they were made contributory or - # deprecated by Unicode before Perl ever thought about supporting them. - foreach my $property ('Jamo_Short_Name', 'Grapheme_Link') { + foreach my $property ( + + # The following are suppressed because they were made contributory + # or deprecated by Unicode before Perl ever thought about + # supporting them. + 'Jamo_Short_Name', + 'Grapheme_Link', + 'Expands_On_NFC', + 'Expands_On_NFD', + 'Expands_On_NFKC', + 'Expands_On_NFKD', + + # The following are suppressed because they have been marked + # as deprecated for a sufficient amount of time + 'Other_Alphabetic', + 'Other_Default_Ignorable_Code_Point', + 'Other_Grapheme_Extend', + 'Other_ID_Continue', + 'Other_ID_Start', + 'Other_Lowercase', + 'Other_Math', + 'Other_Uppercase', + ) { $why_suppressed{$property} = $why_deprecated{$property}; } @@ -940,7 +953,7 @@ if ($v_version ge 4.0.0) { if ($v_version ge 5.2.0 && $v_version lt 6.0.0) { $why_obsolete{'ISO_Comment'} = 'Code points for it have been removed'; if ($v_version ge 6.0.0) { - $why_deprecated{'ISO_Comment'} = 'No longer needed for chart generation; otherwise not useful, and code points for it have been removed'; + $why_deprecated{'ISO_Comment'} = 'No longer needed for Unicode\'s internal chart generation; otherwise not useful, and code points for it have been removed'; } } @@ -949,8 +962,8 @@ if ($v_version ge v4.1.0) { $why_suppressed{'Script=Katakana_Or_Hiragana'} = 'Obsolete. All code points previously matched by this have been moved to "Script=Common".'; } if ($v_version ge v6.0.0) { - $why_suppressed{'Script=Katakana_Or_Hiragana'} .= ' Consider instead using Script_Extensions=Katakana or Script_Extensions=Hiragana (or both)"'; - $why_suppressed{'Script_Extensions=Katakana_Or_Hiragana'} = 'All code points that would be matched by this are matched by either Script_Extensions=Katakana or Script_Extensions=Hiragana"'; + $why_suppressed{'Script=Katakana_Or_Hiragana'} .= ' Consider instead using "Script_Extensions=Katakana" or "Script_Extensions=Hiragana (or both)"'; + $why_suppressed{'Script_Extensions=Katakana_Or_Hiragana'} = 'All code points that would be matched by this are matched by either "Script_Extensions=Katakana" or "Script_Extensions=Hiragana"'; } # This program can create files for enumerated-like properties, such as @@ -962,9 +975,10 @@ if ($v_version ge v6.0.0) { my @output_mapped_properties = split "\n", <<END; END -# If you are using the Unihan database, you need to add the properties that -# you want to extract from it to this table. For your convenience, the -# properties in the 6.0 PropertyAliases.txt file are listed, commented out +# If you are using the Unihan database in a Unicode version before 5.2, you +# need to add the properties that you want to extract from it to this table. +# For your convenience, the properties in the 6.0 PropertyAliases.txt file are +# listed, commented out my @cjk_properties = split "\n", <<'END'; #cjkAccountingNumeric; kAccountingNumeric #cjkOtherNumeric; kOtherNumeric @@ -984,7 +998,7 @@ END # Similarly for the property values. For your convenience, the lines in the # 6.0 PropertyAliases.txt file are listed. Just remove the first BUT NOT both -# '#' marks +# '#' marks (for Unicode versions before 5.2) my @cjk_property_values = split "\n", <<'END'; ## @missing: 0000..10FFFF; cjkAccountingNumeric; NaN ## @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point> @@ -1057,18 +1071,21 @@ my %default_mapping = ( # Below are files that Unicode furnishes, but this program ignores, and why my %ignored_files = ( - 'CJKRadicals.txt' => 'Unihan data', - 'Index.txt' => 'An index, not actual data', - 'NamedSqProv.txt' => 'Not officially part of the Unicode standard; Append it to NamedSequences.txt if you want to process the contents.', - 'NamesList.txt' => 'Just adds commentary', - 'NormalizationCorrections.txt' => 'Data is already in other files.', - 'Props.txt' => 'Adds nothing to PropList.txt; only in very early releases', - 'ReadMe.txt' => 'Just comments', - 'README.TXT' => 'Just comments', - 'StandardizedVariants.txt' => 'Only for glyph changes, not a Unicode character property. Does not fit into current scheme where one code point is mapped', - 'EmojiSources.txt' => 'Not of general utility: for Japanese legacy cell-phone applications', - 'IndicMatraCategory.txt' => 'Provisional', - 'IndicSyllabicCategory.txt' => 'Provisional', + 'CJKRadicals.txt' => 'Maps the kRSUnicode property values to corresponding code points', + 'Index.txt' => 'Alphabetical index of Unicode characters', + 'NamedSqProv.txt' => 'Named sequences proposed for inclusion in a later version of the Unicode Standard; if you need them now, you can append this file to F<NamedSequences.txt> and recompile perl', + 'NamesList.txt' => 'Annotated list of characters', + 'NormalizationCorrections.txt' => 'Documentation of corrections already incorporated into the Unicode data base', + 'Props.txt' => 'Only in very early releases; is a subset of F<PropList.txt> (which is used instead)', + 'ReadMe.txt' => 'Documentation', + 'StandardizedVariants.txt' => 'Certain glyph variations for character display are standardized. This lists the non-Unihan ones; the Unihan ones are also not used by Perl, and are in a separate Unicode data base L<http://www.unicode.org/ivd>', + 'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy Japanese cell-phone values', + 'IndicMatraCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts', + 'IndicSyllabicCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts', + 'auxiliary/WordBreakTest.html' => 'Documentation of validation tests', + 'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests', + 'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests', + 'auxiliary/LineBreakTest.html' => 'Documentation of validation tests', ); ### End of externally interesting definitions, except for @input_file_objects @@ -1079,12 +1096,12 @@ my $HEADER=<<"EOF"; # database, Version $string_version. Any changes made here will be lost! EOF -my $INTERNAL_ONLY=<<"EOF"; +my $INTERNAL_ONLY_HEADER = <<"EOF"; # !!!!!!! INTERNAL PERL USE ONLY !!!!!!! -# This file is for internal use by the Perl program only. The format and even -# the name or existence of this file are subject to change without notice. -# Don't use it directly. +# This file is for internal use by core Perl only. The format and even the +# name or existence of this file are subject to change without notice. Don't +# use it directly. EOF my $DEVELOPMENT_ONLY=<<"EOF"; @@ -1096,16 +1113,16 @@ my $DEVELOPMENT_ONLY=<<"EOF"; EOF -my $LAST_UNICODE_CODEPOINT_STRING = "10FFFF"; -my $LAST_UNICODE_CODEPOINT = hex $LAST_UNICODE_CODEPOINT_STRING; -my $MAX_UNICODE_CODEPOINTS = $LAST_UNICODE_CODEPOINT + 1; +my $MAX_UNICODE_CODEPOINT_STRING = "10FFFF"; +my $MAX_UNICODE_CODEPOINT = hex $MAX_UNICODE_CODEPOINT_STRING; +my $MAX_UNICODE_CODEPOINTS = $MAX_UNICODE_CODEPOINT + 1; # Matches legal code point. 4-6 hex numbers, If there are 6, the first # two must be 10; if there are 5, the first must not be a 0. Written this way -# to decrease backtracking. The first one allows the code point to be at the -# end of a word, but to work properly, the word shouldn't end with a valid hex -# character. The second one won't match a code point at the end of a word, -# and doesn't have the run-on issue +# to decrease backtracking. The first regex allows the code point to be at +# the end of a word, but to work properly, the word shouldn't end with a valid +# hex character. The second one won't match a code point at the end of a +# word, and doesn't have the run-on issue my $run_on_code_point_re = qr/ (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b/x; my $code_point_re = qr/\b$run_on_code_point_re/; @@ -1115,15 +1132,19 @@ my $code_point_re = qr/\b$run_on_code_point_re/; # depends on this ending with a semi-colon, so it can assume it is a valid # field when the line is split() by semi-colons my $missing_defaults_prefix = - qr/^#\s+\@missing:\s+0000\.\.$LAST_UNICODE_CODEPOINT_STRING\s*;/; + qr/^#\s+\@missing:\s+0000\.\.$MAX_UNICODE_CODEPOINT_STRING\s*;/; # Property types. Unicode has more types, but these are sufficient for our # purposes. my $UNKNOWN = -1; # initialized to illegal value my $NON_STRING = 1; # Either binary or enum my $BINARY = 2; -my $ENUM = 3; # Include catalog -my $STRING = 4; # Anything else: string or misc +my $FORCED_BINARY = 3; # Not a binary property, but, besides its normal + # tables, additional true and false tables are + # generated so that false is anything matching the + # default value, and true is everything else. +my $ENUM = 4; # Include catalog +my $STRING = 5; # Anything else: string or misc # Some input files have lines that give default values for code points not # contained in the file. Sometimes these should be ignored. @@ -1170,9 +1191,6 @@ my $CROAK = 5; # Die with an error if is already there # if the flag is changed, the indefinite article referring to it in the # documentation may need to be as well. my $NORMAL = ""; -my $SUPPRESSED = 'z'; # The character should never actually be seen, since - # it is suppressed -my $PLACEHOLDER = 'P'; # Implies no pod entry generated my $DEPRECATED = 'D'; my $a_bold_deprecated = "a 'B<$DEPRECATED>'"; my $A_bold_deprecated = "A 'B<$DEPRECATED>'"; @@ -1191,12 +1209,25 @@ my $A_bold_obsolete = "An 'B<$OBSOLETE>'"; my %status_past_participles = ( $DISCOURAGED => 'discouraged', - $SUPPRESSED => 'should never be generated', $STABILIZED => 'stabilized', $OBSOLETE => 'obsolete', $DEPRECATED => 'deprecated', ); +# Table fates. These are somewhat ordered, so that fates < $MAP_PROXIED should be +# externally documented. +my $ORDINARY = 0; # The normal fate. +my $MAP_PROXIED = 1; # The map table for the property isn't written out, + # but there is a file written that can be used to + # reconstruct this table +my $SUPPRESSED = 3; # The file for this table is not written out. +my $INTERNAL_ONLY = 4; # The file for this table is written out, but it is + # for Perl's internal use only +my $PLACEHOLDER = 5; # A property that is defined as a placeholder in a + # Unicode version that doesn't have it, but we need it + # to be defined, if empty, to have things work. + # Implies no pod entry generated + # The format of the values of the tables: my $EMPTY_FORMAT = ""; my $BINARY_FORMAT = 'b'; @@ -1207,16 +1238,18 @@ my $HEX_FORMAT = 'x'; my $RATIONAL_FORMAT = 'r'; my $STRING_FORMAT = 's'; my $DECOMP_STRING_FORMAT = 'c'; +my $STRING_WHITE_SPACE_LIST = 'sw'; my %map_table_formats = ( $BINARY_FORMAT => 'binary', $DECIMAL_FORMAT => 'single decimal digit', $FLOAT_FORMAT => 'floating point number', $INTEGER_FORMAT => 'integer', - $HEX_FORMAT => 'positive hex whole number; a code point', + $HEX_FORMAT => 'non-negative hex whole number; a code point', $RATIONAL_FORMAT => 'rational: an integer or a fraction', $STRING_FORMAT => 'string', $DECOMP_STRING_FORMAT => 'Perl\'s internal (Normalize.pm) decomposition mapping', + $STRING_WHITE_SPACE_LIST => 'string, but some elements are interpreted as a list; white space occurs only as list item separators' ); # Unicode didn't put such derived files in a separate directory at first. @@ -1225,12 +1258,33 @@ my $EXTRACTED = ($EXTRACTED_DIR) ? "$EXTRACTED_DIR/" : ""; my $AUXILIARY = 'auxiliary'; # Hashes that will eventually go into Heavy.pl for the use of utf8_heavy.pl +# and into UCD.pl for the use of UCD.pm my %loose_to_file_of; # loosely maps table names to their respective # files my %stricter_to_file_of; # same; but for stricter mapping. +my %loose_property_to_file_of; # Maps a loose property name to its map file +my %file_to_swash_name; # Maps the file name to its corresponding key name + # in the hash %utf8::SwashInfo my %nv_floating_to_rational; # maps numeric values floating point numbers to # their rational equivalent -my %loose_property_name_of; # Loosely maps property names to standard form +my %loose_property_name_of; # Loosely maps (non_string) property names to + # standard form +my %string_property_loose_to_name; # Same, for string properties. +my %loose_defaults; # keys are of form "prop=value", where 'prop' is + # the property name in standard loose form, and + # 'value' is the default value for that property, + # also in standard loose form. +my %loose_to_standard_value; # loosely maps table names to the canonical + # alias for them +my %ambiguous_names; # keys are alias names (in standard form) that + # have more than one possible meaning. +my %prop_aliases; # Keys are standard property name; values are each + # one's aliases +my %prop_value_aliases; # Keys of top level are standard property name; + # values are keys to another hash, Each one is + # one of the property's values, in standard form. + # The values are that prop-val's aliases. +my %ucd_pod; # Holds entries that will go into the UCD section of the pod # Most properties are immune to caseless matching, otherwise you would get # nonsensical results, as properties are a function of a code point, not @@ -1269,6 +1323,28 @@ my %Jamo_L; # Leading consonants my %Jamo_V; # Vowels my %Jamo_T; # Trailing consonants +# For code points whose name contains its ordinal as a '-ABCD' suffix. +# The key is the base name of the code point, and the value is an +# array giving all the ranges that use this base name. Each range +# is actually a hash giving the 'low' and 'high' values of it. +my %names_ending_in_code_point; +my %loose_names_ending_in_code_point; # Same as above, but has blanks, dashes + # removed from the names +# Inverse mapping. The list of ranges that have these kinds of +# names. Each element contains the low, high, and base names in an +# anonymous hash. +my @code_points_ending_in_code_point; + +# Boolean: does this Unicode version have the hangul syllables, and are we +# writing out a table for them? +my $has_hangul_syllables = 0; + +# Does this Unicode version have code points whose names end in their +# respective code points, and are we writing out a table for them? 0 for no; +# otherwise points to first property that a table is needed for them, so that +# if multiple tables are needed, we don't create duplicates +my $needing_code_points_ending_in_code_point = 0; + my @backslash_X_tests; # List of tests read in for testing \X my @unhandled_properties; # Will contain a list of properties found in # the input that we didn't process. @@ -1294,6 +1370,7 @@ my $block; my $perl_charname; my $print; my $Any; +my $script; # Are there conflicting names because of beginning with 'In_', or 'Is_' my $has_In_conflicts = 0; @@ -1941,12 +2018,15 @@ sub trace { return main::trace(@_); } main::set_access('non_skip', \%non_skip, 'c'); my %skip; - # This is used to skip processing of this input file semi-permanently. - # It is used for files that we aren't planning to process anytime soon, - # but want to allow to be in the directory and not raise a message that we - # are not handling. Mostly for test files. This is in contrast to the - # non_skip element, which is supposed to be used very temporarily for - # debugging. Sets 'optional' to 1 + # This is used to skip processing of this input file semi-permanently, + # when it evaluates to true. The value should be the reason the file is + # being skipped. It is used for files that we aren't planning to process + # anytime soon, but want to allow to be in the directory and not raise a + # message that we are not handling. Mostly for test files. This is in + # contrast to the non_skip element, which is supposed to be used very + # temporarily for debugging. Sets 'optional' to 1. Also, files that we + # pretty much will never look at can be placed in the global + # %ignored_files instead. Ones used here will be added to that list. main::set_access('skip', \%skip, 'c'); my %each_line_handler; @@ -2069,7 +2149,12 @@ sub trace { return main::trace(@_); } print "Warning: " . __PACKAGE__ . " constructor for $file{$addr} has useless 'non_skip' in it\n"; } - $optional{$addr} = 1 if $skip{$addr}; + # If skipping, set to optional, and add to list of ignored files, + # including its reason + if ($skip{$addr}) { + $optional{$addr} = 1; + $ignored_files{$file{$addr}} = $skip{$addr} + } return $self; } @@ -2629,27 +2714,29 @@ package Alias; main::set_access('name', \%name, 'r'); my %loose_match; - # Determined by the constructor code if this name should match loosely or - # not. The constructor parameters can override this, but it isn't fully - # implemented, as should have ability to override Unicode one's via - # something like a set_loose_match() + # Should this name match loosely or not. main::set_access('loose_match', \%loose_match, 'r'); - my %make_pod_entry; - # Some aliases should not get their own entries because they are covered - # by a wild-card, and some we want to discourage use of. Binary - main::set_access('make_pod_entry', \%make_pod_entry, 'r'); + my %make_re_pod_entry; + # Some aliases should not get their own entries in the re section of the + # pod, because they are covered by a wild-card, and some we want to + # discourage use of. Binary + main::set_access('make_re_pod_entry', \%make_re_pod_entry, 'r', 's'); + + my %ucd; + # Is this documented to be accessible via Unicode::UCD + main::set_access('ucd', \%ucd, 'r', 's'); my %status; # Aliases have a status, like deprecated, or even suppressed (which means # they don't appear in documentation). Enum main::set_access('status', \%status, 'r'); - my %externally_ok; + my %ok_as_filename; # Similarly, some aliases should not be considered as usable ones for # external use, such as file names, or we don't want documentation to # recommend them. Boolean - main::set_access('externally_ok', \%externally_ok, 'r'); + main::set_access('ok_as_filename', \%ok_as_filename, 'r'); sub new { my $class = shift; @@ -2659,14 +2746,15 @@ package Alias; $name{$addr} = shift; $loose_match{$addr} = shift; - $make_pod_entry{$addr} = shift; - $externally_ok{$addr} = shift; + $make_re_pod_entry{$addr} = shift; + $ok_as_filename{$addr} = shift; $status{$addr} = shift; + $ucd{$addr} = shift; Carp::carp_extra_args(\@_) if main::DEBUG && @_; # Null names are never ok externally - $externally_ok{$addr} = 0 if $name{$addr} eq ""; + $ok_as_filename{$addr} = 0 if $name{$addr} eq ""; return $self; } @@ -3036,7 +3124,7 @@ sub trace { return main::trace(@_); } # If the range list is empty, return a large value that isn't adjacent # to any that could be in the range list, for simpler tests - return $LAST_UNICODE_CODEPOINT + 2 unless scalar @{$ranges{$addr}}; + return $MAX_UNICODE_CODEPOINT + 2 unless scalar @{$ranges{$addr}}; return $ranges{$addr}->[0]->start; } @@ -3509,11 +3597,28 @@ sub trace { return main::trace(@_); } # Don't add an exact duplicate, as it isn't really a multiple if ($end >= $r->[$i]->start) { + my $existing_value = $r->[$i]->value; + my $existing_type = $r->[$i]->type; + return if $value eq $existing_value && $type eq $existing_type; + + # If the multiple value is part of an existing range, we want + # to split up that range, so that only the single code point + # is affected. To do this, we first call ourselves + # recursively to delete that code point from the table, having + # preserved its current data above. Then we call ourselves + # recursively again to add the new multiple, which we know by + # the test just above is different than the current code + # point's value, so it will become a range containing a single + # code point: just itself. Finally, we add back in the + # pre-existing code point, which will again be a single code + # point range. Because 'i' likely will have changed as a + # result of these operations, we can't just continue on, but + # do this operation recursively as well. if ($r->[$i]->start != $r->[$i]->end) { - Carp::my_carp_bug("$owner_name_of{$addr}Can't cope with adding a multiple record when the other range ($r->[$i]) contains more than one code point. No action taken."); - return; + $self->_add_delete('-', $start, $end, ""); + $self->_add_delete('+', $start, $end, $value, Type => $type); + return $self->_add_delete('+', $start, $end, $existing_value, Type => $existing_type, Replace => $MULTIPLE); } - return if $value eq $r->[$i]->value && $type eq $r->[$i]->type; } trace "Adding multiple record at $i with $start..$end, $value" if main::DEBUG && $to_trace; @@ -4008,8 +4113,8 @@ sub trace { return main::trace(@_); } # And finally, add the gap from the end of the table to the max # possible code point - if ($max < $LAST_UNICODE_CODEPOINT) { - $new->add_range($max + 1, $LAST_UNICODE_CODEPOINT); + if ($max < $MAX_UNICODE_CODEPOINT) { + $new->add_range($max + 1, $MAX_UNICODE_CODEPOINT); } return $new; } @@ -4242,7 +4347,9 @@ sub trace { return main::trace(@_); } my $a = $a_ranges[$i]; my $b = $b_ranges[$i]; trace "self $a; other $b" if main::DEBUG && $to_trace; - return 0 if $a->start != $b->start || $a->end != $b->end; + return 0 if ! defined $b + || $a->start != $b->start + || $a->end != $b->end; } return 1; } @@ -4267,7 +4374,7 @@ sub trace { return main::trace(@_); } return $try_hard if $code >= 0xFDD0 && $code <= 0xFDEF; return $try_hard if ($code & 0xFFFE) == 0xFFFE; # includes FFFF - return $try_hard if $code > $LAST_UNICODE_CODEPOINT; # keep in range + return $try_hard if $code > $MAX_UNICODE_CODEPOINT; # keep in range return $try_hard if $code >= 0xD800 && $code <= 0xDFFF; # no surrogate return 1; @@ -4398,8 +4505,8 @@ sub trace { return main::trace(@_); } main::set_access('property', \%property, 'r'); my %aliases; - # Ordered list of aliases of the table's name. The first ones in the list - # are output first in comments + # Ordered list of alias objects of the table's name. The first ones in + # the list are output first in comments main::set_access('aliases', \%aliases, 'readable_array'); my %comment; @@ -4416,10 +4523,11 @@ sub trace { return main::trace(@_); } # files. main::set_access('note', \%note, 'readable_array'); - my %internal_only; - # Boolean; if set means any file that contains this table is marked as for - # internal-only use. - main::set_access('internal_only', \%internal_only); + my %fate; + # Enum; there are a number of possibilities for what happens to this + # table: it could be normal, or suppressed, or not for external use. See + # values at definition for $SUPPRESSED. + main::set_access('fate', \%fate, 'r'); my %find_table_from_alias; # The parent property passes this pointer to a hash which this class adds @@ -4474,8 +4582,8 @@ sub trace { return main::trace(@_); } sub new { # All arguments are key => value pairs, which you can see below, most - # of which match fields documented above. Otherwise: Pod_Entry, - # Externally_Ok, and Fuzzy apply to the names of the table, and are + # of which match fields documented above. Otherwise: Re_Pod_Entry, + # OK_as_Filename, and Fuzzy apply to the names of the table, and are # documented in the Alias package return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2; @@ -4493,7 +4601,6 @@ sub trace { return main::trace(@_); } my $complete_name = $complete_name{$addr} = delete $args{'Complete_Name'}; $format{$addr} = delete $args{'Format'}; - $internal_only{$addr} = delete $args{'Internal_Only_Warning'} || 0; $output_range_counts{$addr} = delete $args{'Output_Range_Counts'}; $property{$addr} = delete $args{'_Property'}; $range_list{$addr} = delete $args{'_Range_List'}; @@ -4501,12 +4608,14 @@ sub trace { return main::trace(@_); } $status_info{$addr} = delete $args{'_Status_Info'} || ""; $range_size_1{$addr} = delete $args{'Range_Size_1'} || 0; $caseless_equivalent{$addr} = delete $args{'Caseless_Equivalent'} || 0; + $fate{$addr} = delete $args{'Fate'} || $ORDINARY; + my $ucd = delete $args{'UCD'}; my $description = delete $args{'Description'}; - my $externally_ok = delete $args{'Externally_Ok'}; + my $ok_as_filename = delete $args{'OK_as_Filename'}; my $loose_match = delete $args{'Fuzzy'}; my $note = delete $args{'Note'}; - my $make_pod_entry = delete $args{'Pod_Entry'}; + my $make_re_pod_entry = delete $args{'Re_Pod_Entry'}; my $perl_extension = delete $args{'Perl_Extension'}; # Shouldn't have any left over @@ -4528,28 +4637,40 @@ sub trace { return main::trace(@_); } push @{$description{$addr}}, $description if $description; push @{$note{$addr}}, $note if $note; - if ($status{$addr} eq $PLACEHOLDER) { + if ($fate{$addr} == $PLACEHOLDER) { # A placeholder table doesn't get documented, is a perl extension, # and quite likely will be empty - $make_pod_entry = 0 if ! defined $make_pod_entry; + $make_re_pod_entry = 0 if ! defined $make_re_pod_entry; $perl_extension = 1 if ! defined $perl_extension; + $ucd = 0 if ! defined $ucd; push @tables_that_may_be_empty, $complete_name{$addr}; + $self->add_comment(<<END); +This is a placeholder because it is not in Version $string_version of Unicode, +but is needed by the Perl core to work gracefully. Because it is not in this +version of Unicode, it will not be listed in $pod_file.pod +END } - elsif (! $status{$addr}) { - - # If hasn't set its status already, see if it is on one of the - # lists of properties or tables that have particular statuses; if - # not, is normal. The lists are prioritized so the most serious - # ones are checked first - if (exists $why_suppressed{$complete_name} + elsif (exists $why_suppressed{$complete_name} # Don't suppress if overridden && ! grep { $_ eq $complete_name{$addr} } @output_mapped_properties) - { - $status{$addr} = $SUPPRESSED; - } - elsif (exists $why_deprecated{$complete_name}) { + { + $fate{$addr} = $SUPPRESSED; + } + elsif ($fate{$addr} == $SUPPRESSED + && ! exists $why_suppressed{$property{$addr}->complete_name}) + { + Carp::my_carp_bug("There is no current capability to set the reason for suppressing."); + # perhaps Fate => [ $SUPPRESSED, "reason" ] + } + + # If hasn't set its status already, see if it is on one of the + # lists of properties or tables that have particular statuses; if + # not, is normal. The lists are prioritized so the most serious + # ones are checked first + if (! $status{$addr}) { + if (exists $why_deprecated{$complete_name}) { $status{$addr} = $DEPRECATED; } elsif (exists $why_stabilized{$complete_name}) { @@ -4562,11 +4683,7 @@ sub trace { return main::trace(@_); } # Existence above doesn't necessarily mean there is a message # associated with it. Use the most serious message. if ($status{$addr}) { - if ($why_suppressed{$complete_name}) { - $status_info{$addr} - = $why_suppressed{$complete_name}; - } - elsif ($why_deprecated{$complete_name}) { + if ($why_deprecated{$complete_name}) { $status_info{$addr} = $why_deprecated{$complete_name}; } @@ -4583,24 +4700,35 @@ sub trace { return main::trace(@_); } $perl_extension{$addr} = $perl_extension || 0; + # Don't list a property by default that is internal only + if ($fate{$addr} > $MAP_PROXIED) { + $make_re_pod_entry = 0 if ! defined $make_re_pod_entry; + $ucd = 0 if ! defined $ucd; + } + else { + $ucd = 1 if ! defined $ucd; + } + # By convention what typically gets printed only or first is what's # first in the list, so put the full name there for good output # clarity. Other routines rely on the full name being first on the # list $self->add_alias($full_name{$addr}, - Externally_Ok => $externally_ok, + OK_as_Filename => $ok_as_filename, Fuzzy => $loose_match, - Pod_Entry => $make_pod_entry, + Re_Pod_Entry => $make_re_pod_entry, Status => $status{$addr}, + UCD => $ucd, ); # Then comes the other name, if meaningfully different. if (standardize($full_name{$addr}) ne standardize($name{$addr})) { $self->add_alias($name{$addr}, - Externally_Ok => $externally_ok, + OK_as_Filename => $ok_as_filename, Fuzzy => $loose_match, - Pod_Entry => $make_pod_entry, + Re_Pod_Entry => $make_re_pod_entry, Status => $status{$addr}, + UCD => $ucd, ); } @@ -4661,15 +4789,17 @@ sub trace { return main::trace(@_); } my %args = @_; my $loose_match = delete $args{'Fuzzy'}; - my $make_pod_entry = delete $args{'Pod_Entry'}; - $make_pod_entry = $YES unless defined $make_pod_entry; + my $make_re_pod_entry = delete $args{'Re_Pod_Entry'}; + $make_re_pod_entry = $YES unless defined $make_re_pod_entry; - my $externally_ok = delete $args{'Externally_Ok'}; - $externally_ok = 1 unless defined $externally_ok; + my $ok_as_filename = delete $args{'OK_as_Filename'}; + $ok_as_filename = 1 unless defined $ok_as_filename; my $status = delete $args{'Status'}; $status = $NORMAL unless defined $status; + my $ucd = delete $args{'UCD'} // 1; + Carp::carp_extra_args(\%args) if main::DEBUG && %args; # Capitalize the first letter of the alias unless it is one of the CJK @@ -4736,8 +4866,8 @@ sub trace { return main::trace(@_); } splice @$list, $insert_position, 0, - Alias->new($name, $loose_match, $make_pod_entry, - $externally_ok, $status); + Alias->new($name, $loose_match, $make_re_pod_entry, + $ok_as_filename, $status, $ucd); # This name may be shorter than any existing ones, so clear the cache # of the shortest, so will have to be recalculated. @@ -4781,7 +4911,7 @@ sub trace { return main::trace(@_); } foreach my $alias ($self->aliases()) { # Don't use an alias that isn't ok to use for an external name. - next if ! $alias->externally_ok; + next if ! $alias->ok_as_filename; my $name = main::Standardize($alias->name); trace $self, $name if main::DEBUG && $to_trace; @@ -4800,13 +4930,40 @@ sub trace { return main::trace(@_); } } } + # If the short name isn't a nice one, perhaps an equivalent table has + # a better one. + if (! defined $short_name{$addr} + || $short_name{$addr} eq "" + || $short_name{$addr} eq "_") + { + my $return; + foreach my $follower ($self->children) { # All equivalents + my $follower_name = $follower->short_name; + next unless defined $follower_name; + + # Anything (except undefined) is better than underscore or + # empty + if (! defined $return || $return eq "_") { + $return = $follower_name; + next; + } + + # If the new follower name isn't "_" and is shorter than the + # current best one, prefer the new one. + next if $follower_name eq "_"; + next if length $follower_name > length $return; + $return = $follower_name; + } + $short_name{$addr} = $return if defined $return; + } + # If no suitable external name return undef if (! defined $short_name{$addr}) { $$nominal_length_ptr = undef if $nominal_length_ptr; return; } - # Don't allow a null external name. + # Don't allow a null short name. if ($short_name{$addr} eq "") { $short_name{$addr} = '_'; $nominal_short_name_length{$addr} = 1; @@ -4822,7 +4979,9 @@ sub trace { return main::trace(@_); } sub external_name { # Returns the external name that this table should be known by. This - # is usually the short_name, but not if the short_name is undefined. + # is usually the short_name, but not if the short_name is undefined, + # in which case the external_name is arbitrarily set to the + # underscore. my $self = shift; Carp::carp_extra_args(\@_) if main::DEBUG && @_; @@ -4924,8 +5083,6 @@ sub trace { return main::trace(@_); } my $return = ""; $return .= $DEVELOPMENT_ONLY if $compare_versions; $return .= $HEADER; - no overloading; - $return .= $INTERNAL_ONLY if $internal_only{pack 'J', $self}; return $return; } @@ -4987,7 +5144,8 @@ sub trace { return main::trace(@_); } # certain number of blocks, might as well output the whole # thing if it all will fit in one block. The number of # ranges below is an approximate number for that. - && $self->property->type == $BINARY + && ($self->property->type == $BINARY + || $self->property->type == $FORCED_BINARY) # && $self->property->tables == 2 Can't do this because the # non-binary properties, like NFDQC aren't specifiable # by the notation @@ -5261,6 +5419,41 @@ sub trace { return main::trace(@_); } return; } + sub set_fate { # Set the fate of a table + my $self = shift; + my $fate = shift; + my $reason = shift; + Carp::carp_extra_args(\@_) if main::DEBUG && @_; + + my $addr = do { no overloading; pack 'J', $self; }; + + return if $fate{$addr} == $fate; # If no-op + + # Can only change the ordinary fate, except if going to $MAP_PROXIED + return if $fate{$addr} != $ORDINARY && $fate != $MAP_PROXIED; + + $fate{$addr} = $fate; + + # Don't document anything to do with a non-normal fated table + if ($fate != $ORDINARY) { + my $put_in_pod = ($fate == $MAP_PROXIED) ? 1 : 0; + foreach my $alias ($self->aliases) { + $alias->set_ucd($put_in_pod); + + # MAP_PROXIED doesn't affect the match tables + next if $fate == $MAP_PROXIED; + $alias->set_make_re_pod_entry($put_in_pod); + } + } + + # Save the reason for suppression for output + if ($fate == $SUPPRESSED && defined $reason) { + $why_suppressed{$complete_name{$addr}} = $reason; + } + + return; + } + sub lock { # Don't allow changes to the table from now on. This stores a stack # trace of where it was called, so that later attempts to modify it @@ -5335,8 +5528,7 @@ sub trace { return main::trace(@_); } *$sub = sub { use strict "refs"; my $self = shift; - no overloading; - return $range_list{pack 'J', $self}->$sub(@_); + return $self->_range_list->$sub(@_); } } @@ -5352,7 +5544,7 @@ sub trace { return main::trace(@_); } return if $self->carp_if_locked; no overloading; - return $range_list{pack 'J', $self}->$sub(@_); + return $self->_range_list->$sub(@_); } } @@ -5407,13 +5599,9 @@ sub trace { return main::trace(@_); } \%anomalous_entries, 'readable_array'); - my %core_access; - # This is a string, solely for documentation, indicating how one can get - # access to this property via the Perl core. - main::set_access('core_access', \%core_access, 'r', 's'); - my %to_output_map; # Enum as to whether or not to write out this map table: + # 0 don't output # $EXTERNAL_MAP means its existence is noted in the documentation, and # it should not be removed nor its format changed. This # is done for those files that have traditionally been @@ -5432,7 +5620,6 @@ sub trace { return main::trace(@_); } # Optional initialization data for the table. my $initialize = delete $args{'Initialize'}; - my $core_access = delete $args{'Core_Access'}; my $default_map = delete $args{'Default_Map'}; my $property = delete $args{'_Property'}; my $full_name = delete $args{'Full_Name'}; @@ -5452,7 +5639,6 @@ sub trace { return main::trace(@_); } my $addr = do { no overloading; pack 'J', $self; }; $anomalous_entries{$addr} = []; - $core_access{$addr} = $core_access; $default_map{$addr} = $default_map; $self->initialize($initialize) if defined $initialize; @@ -5627,8 +5813,10 @@ sub trace { return main::trace(@_); } if defined $global_to_output_map{$full_name}; # If table says to output, do so; if says to suppress it, do so. + my $fate = $self->fate; + return $INTERNAL_MAP if $fate == $INTERNAL_ONLY; return $EXTERNAL_MAP if grep { $_ eq $full_name } @output_mapped_properties; - return 0 if $self->status eq $SUPPRESSED; + return 0 if $fate == $SUPPRESSED || $fate == $MAP_PROXIED; my $type = $self->property->type; @@ -5660,7 +5848,23 @@ sub trace { return main::trace(@_); } my $return = $self->SUPER::header(); - $return .= $INTERNAL_ONLY if $self->to_output_map == $INTERNAL_MAP; + if ($self->to_output_map == $INTERNAL_MAP) { + $return .= $INTERNAL_ONLY_HEADER; + } + else { + my $property_name = $self->property->full_name; + $return .= <<END; + +# !!!!!!! IT IS DEPRECATED TO USE THIS FILE !!!!!!! + +# This file is for internal use by core Perl only. It is retained for +# backwards compatibility with applications that may have come to rely on it, +# but its format and even its name or existence are subject to change without +# notice in a future Perl version. Don't use it directly. Instead, its +# contents are now retrievable through a stable API in the Unicode::UCD +# module: Unicode::UCD::prop_invmap('$property_name'). +END + } return $return; } @@ -5686,7 +5890,7 @@ sub trace { return main::trace(@_); } # have our own flag for just this purpose; but it works now to exclude # Perl generated synonyms from the lists for properties, where the # name is always the proper Unicode one. - my @property_aliases = grep { $_->externally_ok } $self->aliases; + my @property_aliases = grep { $_->ok_as_filename } $self->aliases; my $count = $self->count; my $default_map = $default_map{$addr}; @@ -5775,16 +5979,8 @@ END $property_aliases[$i]->name . '(cp)' ); } - $comment .= - "\nwhere 'cp' is $cp. Note that $these_mappings $are "; - - my $access = $core_access{$addr}; - if ($access) { - $comment .= "accessible through the Perl core via $access."; - } - else { - $comment .= "not accessible through the Perl core directly."; - } + my $full_name = $self->property->full_name; + $comment .= "\nwhere 'cp' is $cp. Note that $these_mappings $are accessible via the function prop_invmap('$full_name') in Unicode::UCD"; # And append any commentary already set from the actual property. $comment .= "\n\n" . $self->comment if $self->comment; @@ -5840,20 +6036,8 @@ END # The remaining variables are temporaries used while writing each table, # to output special ranges. - my $has_hangul_syllables; my @multi_code_point_maps; # Map is to more than one code point. - # The key is the base name of the code point, and the value is an - # array giving all the ranges that use this base name. Each range - # is actually a hash giving the 'low' and 'high' values of it. - my %names_ending_in_code_point; - my %loose_names_ending_in_code_point; - - # Inverse mapping. The list of ranges that have these kinds of - # names. Each element contains the low, high, and base names in a - # hash. - my @code_points_ending_in_code_point; - sub handle_special_range { # Called in the middle of write when it finds a range it doesn't know # how to handle. @@ -5873,32 +6057,47 @@ END # No need to output the range if it maps to the default. return if $map eq $default_map{$addr}; + my $property = $self->property; + # Switch based on the map type... if ($type == $HANGUL_SYLLABLE) { # These are entirely algorithmically determinable based on # some constants furnished by Unicode; for now, just set a # flag to indicate that have them. After everything is figured - # out, we will output the code that does the algorithm. - $has_hangul_syllables = 1; + # out, we will output the code that does the algorithm. (Don't + # output them if not needed because we are suppressing this + # property.) + $has_hangul_syllables = 1 if $property->to_output_map; } elsif ($type == $CP_IN_NAME) { - # Code points whose the name ends in their code point are also + # Code points whose name ends in their code point are also # algorithmically determinable, but need information about the map # to do so. Both the map and its inverse are stored in data - # structures output in the file. - push @{$names_ending_in_code_point{$map}->{'low'}}, $low; - push @{$names_ending_in_code_point{$map}->{'high'}}, $high; - - my $squeezed = $map =~ s/[-\s]+//gr; - push @{$loose_names_ending_in_code_point{$squeezed}->{'low'}}, $low; - push @{$loose_names_ending_in_code_point{$squeezed}->{'high'}}, $high; - - push @code_points_ending_in_code_point, { low => $low, + # structures output in the file. They are stored in the mean time + # in global lists The lists will be written out later into Name.pm, + # which is created only if needed. In order to prevent duplicates + # in the list, only add to them for one property, should multiple + # ones need them. + if ($needing_code_points_ending_in_code_point == 0) { + $needing_code_points_ending_in_code_point = $property; + } + if ($property == $needing_code_points_ending_in_code_point) { + push @{$names_ending_in_code_point{$map}->{'low'}}, $low; + push @{$names_ending_in_code_point{$map}->{'high'}}, $high; + + my $squeezed = $map =~ s/[-\s]+//gr; + push @{$loose_names_ending_in_code_point{$squeezed}->{'low'}}, + $low; + push @{$loose_names_ending_in_code_point{$squeezed}->{'high'}}, + $high; + + push @code_points_ending_in_code_point, { low => $low, high => $high, name => $map - }; + }; + } } elsif ($range->type == $MULTI_CP || $range->type == $NULL) { @@ -5907,7 +6106,8 @@ END # output format. for my $code_point ($low .. $high) { - # The pack() below can't cope with surrogates. + # The pack() below can't cope with surrogates. XXX This may + # no longer be true if ($code_point >= 0xD800 && $code_point <= 0xDFFF) { Carp::my_carp("Surrogate code point '$code_point' in mapping to '$map' in $self. No map created"); next; @@ -5978,6 +6178,10 @@ END my $name = $self->property->swash_name; + # Currently there is nothing in the pre_body unless a swash is being + # generated. + return unless defined $name; + if (defined $swash_keys{$name}) { Carp::my_carp(join_lines(<<END Already created a swash name '$name' for $swash_keys{$name}. This means that @@ -6010,239 +6214,6 @@ END $pre_body .= join("\n", @multi_code_point_maps) . "\n);\n"; } - if ($has_hangul_syllables || @code_points_ending_in_code_point) { - - # Convert these structures to output format. - my $code_points_ending_in_code_point = - main::simple_dumper(\@code_points_ending_in_code_point, - ' ' x 8); - my $names = main::simple_dumper(\%names_ending_in_code_point, - ' ' x 8); - my $loose_names = main::simple_dumper(\%loose_names_ending_in_code_point, - ' ' x 8); - - # Do the same with the Hangul names, - my $jamo; - my $jamo_l; - my $jamo_v; - my $jamo_t; - my $jamo_re; - if ($has_hangul_syllables) { - - # Construct a regular expression of all the possible - # combinations of the Hangul syllables. - my @L_re; # Leading consonants - for my $i ($LBase .. $LBase + $LCount - 1) { - push @L_re, $Jamo{$i} - } - my @V_re; # Middle vowels - for my $i ($VBase .. $VBase + $VCount - 1) { - push @V_re, $Jamo{$i} - } - my @T_re; # Trailing consonants - for my $i ($TBase + 1 .. $TBase + $TCount - 1) { - push @T_re, $Jamo{$i} - } - - # The whole re is made up of the L V T combination. - $jamo_re = '(' - . join ('|', sort @L_re) - . ')(' - . join ('|', sort @V_re) - . ')(' - . join ('|', sort @T_re) - . ')?'; - - # These hashes needed by the algorithm were generated - # during reading of the Jamo.txt file - $jamo = main::simple_dumper(\%Jamo, ' ' x 8); - $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8); - $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8); - $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8); - } - - $pre_body .= <<END; - -# To achieve significant memory savings when this file is read in, -# algorithmically derivable code points are omitted from the main body below. -# Instead, the following routines can be used to translate between name and -# code point and vice versa - -{ # Closure - - # Matches legal code point. 4-6 hex numbers, If there are 6, the - # first two must be '10'; if there are 5, the first must not be a '0'. - # First can match at the end of a word provided that the end of the - # word doesn't look like a hex number. - my \$run_on_code_point_re = qr/$run_on_code_point_re/; - my \$code_point_re = qr/$code_point_re/; - - # In the following hash, the keys are the bases of names which includes - # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The values - # of each key is another hash which is used to get the low and high ends - # for each range of code points that apply to the name. - my %names_ending_in_code_point = ( -$names - ); - - # The following hash is a copy of the previous one, except is for loose - # matching, so each name has blanks and dashes squeezed out - my %loose_names_ending_in_code_point = ( -$loose_names - ); - - # And the following array gives the inverse mapping from code points to - # names. Lowest code points are first - my \@code_points_ending_in_code_point = ( -$code_points_ending_in_code_point - ); -END - # Earlier releases didn't have Jamos. No sense outputting - # them unless will be used. - if ($has_hangul_syllables) { - $pre_body .= <<END; - - # Convert from code point to Jamo short name for use in composing Hangul - # syllable names - my %Jamo = ( -$jamo - ); - - # Leading consonant (can be null) - my %Jamo_L = ( -$jamo_l - ); - - # Vowel - my %Jamo_V = ( -$jamo_v - ); - - # Optional trailing consonant - my %Jamo_T = ( -$jamo_t - ); - - # Computed re that splits up a Hangul name into LVT or LV syllables - my \$syllable_re = qr/$jamo_re/; - - my \$HANGUL_SYLLABLE = "HANGUL SYLLABLE "; - my \$loose_HANGUL_SYLLABLE = "HANGULSYLLABLE"; - - # These constants names and values were taken from the Unicode standard, - # version 5.1, section 3.12. They are used in conjunction with Hangul - # syllables - my \$SBase = $SBase_string; - my \$LBase = $LBase_string; - my \$VBase = $VBase_string; - my \$TBase = $TBase_string; - my \$SCount = $SCount; - my \$LCount = $LCount; - my \$VCount = $VCount; - my \$TCount = $TCount; - my \$NCount = \$VCount * \$TCount; -END - } # End of has Jamos - - $pre_body .= << 'END'; - - sub name_to_code_point_special { - my ($name, $loose) = @_; - - # Returns undef if not one of the specially handled names; otherwise - # returns the code point equivalent to the input name - # $loose is non-zero if to use loose matching, 'name' in that case - # must be input as upper case with all blanks and dashes squeezed out. -END - if ($has_hangul_syllables) { - $pre_body .= << 'END'; - - if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//) - || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//)) - { - return if $name !~ qr/^$syllable_re$/; - my $L = $Jamo_L{$1}; - my $V = $Jamo_V{$2}; - my $T = (defined $3) ? $Jamo_T{$3} : 0; - return ($L * $VCount + $V) * $TCount + $T + $SBase; - } -END - } - $pre_body .= << 'END'; - - # Name must end in 'code_point' for this to handle. - return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x) - || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x)); - - my $base = $1; - my $code_point = CORE::hex $2; - my $names_ref; - - if ($loose) { - $names_ref = \%loose_names_ending_in_code_point; - } - else { - return if $base !~ s/-$//; - $names_ref = \%names_ending_in_code_point; - } - - # Name must be one of the ones which has the code point in it. - return if ! $names_ref->{$base}; - - # Look through the list of ranges that apply to this name to see if - # the code point is in one of them. - for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) { - return if $names_ref->{$base}{'low'}->[$i] > $code_point; - next if $names_ref->{$base}{'high'}->[$i] < $code_point; - - # Here, the code point is in the range. - return $code_point; - } - - # Here, looked like the name had a code point number in it, but - # did not match one of the valid ones. - return; - } - - sub code_point_to_name_special { - my $code_point = shift; - - # Returns the name of a code point if algorithmically determinable; - # undef if not -END - if ($has_hangul_syllables) { - $pre_body .= << 'END'; - - # If in the Hangul range, calculate the name based on Unicode's - # algorithm - if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) { - use integer; - my $SIndex = $code_point - $SBase; - my $L = $LBase + $SIndex / $NCount; - my $V = $VBase + ($SIndex % $NCount) / $TCount; - my $T = $TBase + $SIndex % $TCount; - $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}"; - $name .= $Jamo{$T} if $T != $TBase; - return $name; - } -END - } - $pre_body .= << 'END'; - - # Look through list of these code points for one in range. - foreach my $hash (@code_points_ending_in_code_point) { - return if $code_point < $hash->{'low'}; - if ($code_point <= $hash->{'high'}) { - return sprintf("%s-%04X", $hash->{'name'}, $code_point); - } - } - return; # None found - } -} # End closure - -END - } # End of has hangul or code point in name maps. - my $format = $self->format; my $return = <<END; @@ -6281,11 +6252,7 @@ END my $addr = do { no overloading; pack 'J', $self; }; # Clear the temporaries - $has_hangul_syllables = 0; undef @multi_code_point_maps; - undef %names_ending_in_code_point; - undef %loose_names_ending_in_code_point; - undef @code_points_ending_in_code_point; # Calculate the format of the table if not already done. my $format = $self->format; @@ -6422,7 +6389,15 @@ use base '_Base_Table'; # if the Unicode one is deprecated, the Perl one will be too. Not so for # unrelated tables. Relatedness makes generating the documentation easier. # -# 2) Conflicting. It may be that there will eventually be name clashes, with +# 2) Complement. +# Like equivalents, two tables may be the inverses of each other, the +# intersection between them is null, and the union is every Unicode code +# point. The two tables that occupy a binary property are necessarily like +# this. By specifying one table as the complement of another, we can avoid +# storing it on disk (using the other table and performing a fast +# transform), and some memory and calculations. +# +# 3) Conflicting. It may be that there will eventually be name clashes, with # the same name meaning different things. For a while, there actually were # conflicts, but they have so far been resolved by changing Perl's or # Unicode's definitions to match the other, but when this code was written, @@ -6452,9 +6427,10 @@ sub trace { return main::trace(@_); } my %parent; # The parent table to this one, initially $self. This allows us to - # distinguish between equivalent tables that are related, and those which - # may not be, but share the same output file because they match the exact - # same set of code points in the current Unicode release. + # distinguish between equivalent tables that are related (for which this + # is set to), and those which may not be, but share the same output file + # because they match the exact same set of code points in the current + # Unicode release. main::set_access('parent', \%parent, 'r'); my %children; @@ -6474,7 +6450,7 @@ sub trace { return main::trace(@_); } my %complement; # Points to the complement that this table is expressed in terms of; 0 if # none. - main::set_access('complement', \%complement, 'r', 's' ); + main::set_access('complement', \%complement, 'r'); sub new { my $class = shift; @@ -6601,6 +6577,20 @@ sub trace { return main::trace(@_); } return "Table '$name'"; } + sub _range_list { + # Returns the range list associated with this table, which will be the + # complement's if it has one. + + my $self = shift; + my $complement; + if (($complement = $self->complement) != 0) { + return ~ $complement->_range_list; + } + else { + return $self->SUPER::_range_list; + } + } + sub add_alias { # Add a synonym for this table. See the comments in the base class @@ -6723,7 +6713,14 @@ sub trace { return main::trace(@_); } Carp::my_carp_bug("Use add_alias() to set two Perl tables '$self' and '$other', equivalent."); return; } - } elsif (! $other->perl_extension) { + } elsif ($self->property != $other->property # Depending on + # situation, might + # be better to use + # add_alias() + # instead for same + # property + && ! $other->perl_extension) + { Carp::my_carp_bug("set_equivalent_to should have 'Related => 0 for equivalencing two Unicode properties. Assuming $self is not related to $other"); $related = 0; } @@ -6740,11 +6737,12 @@ sub trace { return main::trace(@_); } # Any tables that are equivalent to or children of this table must now # instead be equivalent to or (children) to the new leader (parent), # still equivalent. The equivalency includes their matches_all info, - # and for related tables, their status + # and for related tables, their fate and status. # All related tables are of necessity equivalent, but the converse # isn't necessarily true my $status = $other->status; my $status_info = $other->status_info; + my $fate = $other->fate; my $matches_all = $matches_all{other_addr}; my $caseless_equivalent = $other->caseless_equivalent; foreach my $table ($current_leader, @{$equivalents{$leader}}) { @@ -6760,6 +6758,11 @@ sub trace { return main::trace(@_); } $parent{$table_addr} = $other; push @{$children{$other_addr}}, $table; $table->set_status($status, $status_info); + + # This reason currently doesn't get exposed outside; otherwise + # would have to look up the parent's reason and use it instead. + $table->set_fate($fate, "Parent's fate"); + $self->set_caseless_equivalent($caseless_equivalent); } } @@ -6771,6 +6774,26 @@ sub trace { return main::trace(@_); } return; } + sub set_complement { + # Set $self to be the complement of the parameter table. $self is + # locked, as what it contains should all come from the other table. + + my $self = shift; + my $other = shift; + + my %args = @_; + Carp::carp_extra_args(\%args) if main::DEBUG && %args; + + if ($other->complement != 0) { + Carp::my_carp_bug("Can't set $self to be the complement of $other, which itself is the complement of " . $other->complement); + return; + } + my $addr = do { no overloading; pack 'J', $self; }; + $complement{$addr} = $other; + $self->lock; + return; + } + sub add_range { # Add a range to the list for this table. my $self = shift; # Rest of parameters passed on @@ -6779,6 +6802,14 @@ sub trace { return main::trace(@_); } return $self->_range_list->add_range(@_); } + sub header { + my $self = shift; + Carp::carp_extra_args(\@_) if main::DEBUG && @_; + + # All match tables are to be used only by the Perl core. + return $self->SUPER::header() . $INTERNAL_ONLY_HEADER; + } + sub pre_body { # Does nothing for match tables. return } @@ -6787,6 +6818,21 @@ sub trace { return main::trace(@_); } return } + sub set_fate { + my $self = shift; + my $fate = shift; + my $reason = shift; + Carp::carp_extra_args(\@_) if main::DEBUG && @_; + + $self->SUPER::set_fate($fate, $reason); + + # All children share this fate + foreach my $child ($self->children) { + $child->set_fate($fate, $reason); + } + return; + } + sub write { my $self = shift; Carp::carp_extra_args(\@_) if main::DEBUG && @_; @@ -6899,18 +6945,23 @@ END # listing all possible combinations in the comment, we make # sure that each synonym occurs at least once, and add # commentary that the other combinations are possible. + # Because regular expressions don't recognize things like + # \p{jsn=}, only look at non-null right-hand-sides my @property_aliases = $table_property->aliases; - my @table_aliases = $table->aliases; - - Carp::my_carp_bug("$table doesn't have any names. Proceeding anyway.") unless @table_aliases; + my @table_aliases = grep { $_->name ne "" } $table->aliases; # The alias lists above are already ordered in the order we # want to output them. To ensure that each synonym is listed, - # we must use the max of the two numbers. - my $listed_combos = main::max(scalar @table_aliases, - scalar @property_aliases); + # we must use the max of the two numbers. But if there are no + # legal synonyms (nothing in @table_aliases), then we don't + # list anything. + my $listed_combos = (@table_aliases) + ? main::max(scalar @table_aliases, + scalar @property_aliases) + : 0; trace "$listed_combos, tables=", scalar @table_aliases, "; names=", scalar @property_aliases if main::DEBUG; + my $property_had_compound_name = 0; for my $i (0 .. $listed_combos - 1) { @@ -6946,17 +6997,7 @@ END my $flag = $property->status || $table->status || $table_alias_object->status; - if ($flag) { - if ($flag ne $PLACEHOLDER) { - $flags{$flag} = $status_past_participles{$flag}; - } else { - $flags{$flag} = <<END; -a placeholder because it is not in Version $string_version of Unicode, but is -needed by the Perl core to work gracefully. Because it is not in this version -of Unicode, it will not be listed in $pod_file.pod -END - } - } + $flags{$flag} = $status_past_participles{$flag} if $flag; $loose_count++; @@ -7034,7 +7075,7 @@ END my $synonyms; my $entries; - if ($total_entries <= 1) { + if ($total_entries == 1) { $synonyms = ""; $entries = 'entry'; $any_of_these = 'this' @@ -7045,7 +7086,7 @@ END $any_of_these = 'any of these' } - my $comment = ""; + my $comment = "Use Unicode::UCD::prop_invlist() to access the contents of this file.\n\n"; if ($has_unrelated) { $comment .= <<END; This file is for tables that are not necessarily related: To conserve @@ -7062,14 +7103,22 @@ END foreach my $flag (sort keys %flags) { $comment .= <<END; '$flag' below means that this form is $flags{$flag}. +Consult $pod_file.pod END - next if $flag eq $PLACEHOLDER; - $comment .= "Consult $pod_file.pod\n"; } $comment .= "\n"; } - $comment .= <<END; + if ($total_entries == 0) { + Carp::my_carp("No regular expression construct can match $leader, as all names for it are the null string. Creating file anyway."); + $comment .= <<END; +This file returns the $code_points in Unicode Version $string_version for +$leader, but it is inaccessible through Perl regular expressions, as +"\\p{prop=}" is not recognized. +END + + } else { + $comment .= <<END; This file returns the $code_points in Unicode Version $string_version that $match$synonyms: @@ -7080,6 +7129,7 @@ characters matters or doesn't matter, and other permissible syntactic variants. Upper/lower case distinctions never matter. END + } if ($compound_name) { $comment .= <<END; @@ -7285,7 +7335,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } # A boolean that gives whether the input data should declare all the # tables used, or not. If the former, unknown ones raise a warning. main::set_access('pre_declared_maps', - \%pre_declared_maps, 'r'); + \%pre_declared_maps, 'r', 's'); sub new { # The only required parameter is the positionally first, name. All @@ -7319,6 +7369,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } # values should be defined for all # properties, except those overriding this // $v_version ge v5.1.0; + # Rest of parameters passed on. $has_only_code_point_maps{$addr} = 1; @@ -7410,12 +7461,11 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } _Alias_Hash => $table_ref{$addr}, _Property => $self, - # gets property's status by default + # gets property's fate and status by default + Fate => $self->fate, Status => $self->status, _Status_Info => $self->status_info, - %args, - Internal_Only_Warning => 1); # Override any - # input param + %args); return unless defined $table; } @@ -7434,7 +7484,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } Carp::my_carp("$self Added a match table '$name' to a string property '$self'. Changed it to a non-string property. Bad News."); $type{$addr} = $NON_STRING; } - elsif ($type{$addr} != $ENUM) { + elsif ($type{$addr} != $ENUM && $type{$addr} != $FORCED_BINARY) { if (scalar main::uniques(values %{$table_ref{$addr}}) > 2 && $type{$addr} == $BINARY) { @@ -7524,6 +7574,11 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } my $addr = do { no overloading; pack 'J', $self; }; + # Swash names are used only on regular map tables; otherwise there + # should be no access to the property map table from other parts of + # Perl. + return if $map{$addr}->fate != $ORDINARY; + return $file{$addr} if defined $file{$addr}; return $map{$addr}->external_name; } @@ -7576,6 +7631,23 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } return $map{pack 'J', $self}->map_add_or_replace_non_nulls($map{pack 'J', $other}); } + sub set_proxy_for { + # Certain tables are not generally written out to files, but + # Unicode::UCD has the intelligence to know that the file for $self + # can be used to reconstruct those tables. This routine just changes + # things so that UCD pod entries for those suppressed tables are + # generated, so the fact that a proxy is used is invisible to the + # user. + + my $self = shift; + + foreach my $property_name (@_) { + my $ref = property_ref($property_name); + next if $ref->to_output_map; + $ref->set_fate($MAP_PROXIED); + } + } + sub set_type { # Set the type of the property. Mostly this is figured out by the # data in the table. But this is used to set it explicitly. The @@ -7587,27 +7659,40 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } my $type = shift; Carp::carp_extra_args(\@_) if main::DEBUG && @_; - if ($type != $ENUM && $type != $BINARY && $type != $STRING) { + if ($type != $ENUM + && $type != $BINARY + && $type != $FORCED_BINARY + && $type != $STRING) + { Carp::my_carp("Unrecognized type '$type'. Type not set"); return; } { no overloading; $type{pack 'J', $self} = $type; } - return if $type != $BINARY; + return if $type != $BINARY && $type != $FORCED_BINARY; my $yes = $self->table('Y'); $yes = $self->table('Yes') if ! defined $yes; - $yes = $self->add_match_table('Y') if ! defined $yes; - $yes->add_alias('Yes'); - $yes->add_alias('T'); - $yes->add_alias('True'); - + $yes = $self->add_match_table('Y', Full_Name => 'Yes') + if ! defined $yes; + + # Add aliases in order wanted, duplicates will be ignored. We use a + # binary property present in all releases for its ordered lists of + # true/false aliases. Note, that could run into problems in + # outputting things in that we don't distinguish between the name and + # full name of these. Hopefully, if the table was already created + # before this code is executed, it was done with these set properly. + my $bm = property_ref("Bidi_Mirrored"); + foreach my $alias ($bm->table("Y")->aliases) { + $yes->add_alias($alias->name); + } my $no = $self->table('N'); $no = $self->table('No') if ! defined $no; - $no = $self->add_match_table('N') if ! defined $no; - $no->add_alias('No'); - $no->add_alias('F'); - $no->add_alias('False'); + $no = $self->add_match_table('N', Full_Name => 'No') if ! defined $no; + foreach my $alias ($bm->table("N")->aliases) { + $no->add_alias($alias->name); + } + return; } @@ -7681,7 +7766,9 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } # If already have figured these out, no need to do so again, but we do # a double check on ENUMS to make sure that a string property hasn't # improperly been classified as an ENUM, so continue on with those. - return if $type == $STRING || $type == $BINARY; + return if $type == $STRING + || $type == $BINARY + || $type == $FORCED_BINARY; # If every map is to a code point, is a string property. if ($type == $UNKNOWN @@ -7726,6 +7813,29 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } return; } + sub set_fate { + my $self = shift; + my $fate = shift; + my $reason = shift; # Ignored unless suppressing + Carp::carp_extra_args(\@_) if main::DEBUG && @_; + + my $addr = do { no overloading; pack 'J', $self; }; + if ($fate == $SUPPRESSED) { + $why_suppressed{$self->complete_name} = $reason; + } + + # Each table shares the property's fate, except that MAP_PROXIED + # doesn't affect match tables + $map{$addr}->set_fate($fate, $reason); + if ($fate != $MAP_PROXIED) { + foreach my $table ($map{$addr}, $self->tables) { + $table->set_fate($fate, $reason); + } + } + return; + } + + # Most of the accessors for a property actually apply to its map table. # Setup up accessor functions for those, referring to %map for my $sub (qw( @@ -7740,13 +7850,13 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } comment complete_name containing_range - core_access count default_map delete_range description each_range external_name + fate file_path format initialize @@ -7761,7 +7871,6 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } range_size_1 reset_each_range set_comment - set_core_access set_default_map set_file_path set_final_comment @@ -8163,7 +8272,7 @@ sub utf8_heavy_name ($$) { { # Closure - my $indent_increment = " " x 2; + my $indent_increment = " " x (($debugging_build) ? 2 : 0); my %already_output; $main::simple_dumper_nesting = 0; @@ -8177,7 +8286,7 @@ sub utf8_heavy_name ($$) { my $item = shift; my $indent = shift; - $indent = "" if ! defined $indent; + $indent = "" if ! $debugging_build || ! defined $indent; Carp::carp_extra_args(\@_) if main::DEBUG && @_; @@ -8202,9 +8311,8 @@ sub utf8_heavy_name ($$) { my $copy = $item; $copy = $UNDEF unless defined $copy; - # Quote non-numbers (numbers also have optional leading '-' and - # fractions) - if ($copy eq "" || $copy !~ /^ -? \d+ ( \. \d+ )? $/x) { + # Quote non-integers (integers also have optional leading '-') + if ($copy eq "" || $copy !~ /^ -? \d+ $/x) { # Escape apostrophe and backslash $copy =~ s/ ( ['\\] ) /\\$1/xg; @@ -8243,9 +8351,10 @@ sub utf8_heavy_name ($$) { # Indent array elements one level $output .= &simple_dumper($item->[$i], $next_indent); - $output =~ s/\n$//; # Remove trailing nl so as to - $output .= " # [$i]\n"; # add a comment giving the - # array index + next if ! $debugging_build; + $output =~ s/\n$//; # Remove any trailing nl so + $output .= " # [$i]\n"; # as to add a comment giving + # the array index } $output .= $indent; # Indent closing ']' to orig level } @@ -8456,26 +8565,27 @@ sub finish_property_setup { # These are used so much, that we set globals for them. $gc = property_ref('General_Category'); $block = property_ref('Block'); + $script = property_ref('Script'); # Perl adds this alias. $gc->add_alias('Category'); # For backwards compatibility, these property files have particular names. - my $upper = property_ref('Uppercase_Mapping'); - $upper->set_core_access('uc()'); - $upper->set_file('Upper'); # This is what utf8.c calls it - - my $lower = property_ref('Lowercase_Mapping'); - $lower->set_core_access('lc()'); - $lower->set_file('Lower'); - - my $title = property_ref('Titlecase_Mapping'); - $title->set_core_access('ucfirst()'); - $title->set_file('Title'); + property_ref('Uppercase_Mapping')->set_file('Upper'); # This is what + # utf8.c calls it + property_ref('Lowercase_Mapping')->set_file('Lower'); + property_ref('Titlecase_Mapping')->set_file('Title'); my $fold = property_ref('Case_Folding'); $fold->set_file('Fold') if defined $fold; + # Unicode::Normalize expects this file with this name and directory. + my $ccc = property_ref('Canonical_Combining_Class'); + if (defined $ccc) { + $ccc->set_file('CombiningClass'); + $ccc->set_directory(File::Spec->curdir()); + } + # utf8.c has a different meaning for non range-size-1 for map properties # that this program doesn't currently handle; and even if it were changed # to do so, some other code may be using them expecting range size 1. @@ -8793,14 +8903,30 @@ sub process_PropValueAliases { my ($property, @data) = split /\s*;\s*/; - # The full name for the ccc property value is in field 2 of the - # remaining ones; field 1 for all other properties. Swap ccc fields 1 - # and 2. (Rightmost splice removes field 2, returning it; left splice - # inserts that into field 1, thus shifting former field 1 to field 2.) - splice (@data, 1, 0, splice(@data, 2, 1)) if $property eq 'ccc'; + # The ccc property has an extra field at the beginning, which is the + # numeric value. Move it to be after the other two, mnemonic, fields, + # so that those will be used as the property value's names, and the + # number will be an extra alias. (Rightmost splice removes field 1-2, + # returning them in a slice; left splice inserts that before anything, + # thus shifting the former field 0 to after them.) + splice (@data, 0, 0, splice(@data, 1, 2)) if $property eq 'ccc'; + + # Field 0 is a short name unless "n/a"; field 1 is the full name. If + # there is no short name, use the full one in element 1 + if ($data[0] eq "n/a") { + $data[0] = $data[1]; + } + elsif ($data[0] ne $data[1] + && standardize($data[0]) eq standardize($data[1]) + && $data[1] !~ /[[:upper:]]/) + { + # Also, there is a bug in the file in which "n/a" is omitted, and + # the two fields are identical except for case, and the full name + # is all lower case. Copy the "short" name unto the full one to + # give it some upper case. - # If there is no short name, use the full one in element 1 - $data[0] = $data[1] if $data[0] eq "n/a"; + $data[1] = $data[0]; + } # Earlier releases had the pseudo property 'qc' that should expand to # the ones that replace it below. @@ -9185,6 +9311,14 @@ sub output_perl_charnames_line ($$) { # the little used $compare_versions feature is enabled. my $compare_versions_range_list; + # These are constants to the $property_info hash in this subroutine, to + # avoid using a quoted-string which might have a typo. + my $TYPE = 'type'; + my $DEFAULT_MAP = 'default_map'; + my $DEFAULT_TABLE = 'default_table'; + my $PSEUDO_MAP_TYPE = 'pseudo_map_type'; + my $MISSINGS = 'missings'; + sub process_generic_property_file { # This processes a file containing property mappings and puts them # into internal map tables. It should be used to handle any property @@ -9363,22 +9497,22 @@ sub output_perl_charnames_line ($$) { # If not the first time for this property, retrieve info about # it from the cache - if (defined ($property_info{$property_addr}{'type'})) { - $property_type = $property_info{$property_addr}{'type'}; - $default_map = $property_info{$property_addr}{'default'}; + if (defined ($property_info{$property_addr}{$TYPE})) { + $property_type = $property_info{$property_addr}{$TYPE}; + $default_map = $property_info{$property_addr}{$DEFAULT_MAP}; $map_type - = $property_info{$property_addr}{'pseudo_map_type'}; + = $property_info{$property_addr}{$PSEUDO_MAP_TYPE}; $default_table - = $property_info{$property_addr}{'default_table'}; + = $property_info{$property_addr}{$DEFAULT_TABLE}; } else { # Here, is the first time for this property. Set up the # cache. - $property_type = $property_info{$property_addr}{'type'} + $property_type = $property_info{$property_addr}{$TYPE} = $property_object->type; $map_type - = $property_info{$property_addr}{'pseudo_map_type'} + = $property_info{$property_addr}{$PSEUDO_MAP_TYPE} = $property_object->pseudo_map_type; # The Unicode files are set up so that if the map is not @@ -9392,7 +9526,7 @@ sub output_perl_charnames_line ($$) { else { $property_object->set_type($BINARY); $property_type - = $property_info{$property_addr}{'type'} + = $property_info{$property_addr}{$TYPE} = $BINARY; } } @@ -9417,17 +9551,17 @@ sub output_perl_charnames_line ($$) { if ($property_type == $STRING || $property_type == $UNKNOWN) { - $property_info{$addr}{'missings'} = $default; + $property_info{$addr}{$MISSINGS} = $default; } else { - $property_info{$addr}{'missings'} + $property_info{$addr}{$MISSINGS} = $property_object->table($default); } } # Finished storing all the @missings defaults in the input # file so far. Get the one for the current property. - my $missings = $property_info{$property_addr}{'missings'}; + my $missings = $property_info{$property_addr}{$MISSINGS}; # But we likely have separately stored what the default # should be. (This is to accommodate versions of the @@ -9491,7 +9625,7 @@ END $default_table = $missings; $default_map = $missings->full_name; } - $property_info{$property_addr}{'default_table'} + $property_info{$property_addr}{$DEFAULT_TABLE} = $default_table; } elsif ($default_map ne $missings) { @@ -9504,7 +9638,7 @@ END } } - $property_info{$property_addr}{'default'} + $property_info{$property_addr}{$DEFAULT_MAP} = $default_map; # If haven't done so already, find the table corresponding @@ -9514,7 +9648,7 @@ END && $property_type != $UNKNOWN) { $default_table = $property_info{$property_addr} - {'default_table'} + {$DEFAULT_TABLE} = $property_object->table($default_map); } } # End of is first time for this property @@ -9732,7 +9866,7 @@ END # the code point and name on each line. This was actually the hardest # thing to design around. The code points in those ranges may actually # have real maps not given by these two lines. These maps will either - # be algorithmically determinable, or in the extracted files furnished + # be algorithmically determinable, or be in the extracted files furnished # with the UCD. In the event of conflicts between these extracted files, # and this one, Unicode says that this one prevails. But it shouldn't # prevail for conflicts that occur in these ranges. The data from the @@ -9756,21 +9890,21 @@ END # first.) A comment for it will later be constructed based on the # actual properties present and used $perl_charname = Property->new('Perl_Charnames', - Core_Access => '\N{...} and "use charnames"', Default_Map => "", Directory => File::Spec->curdir(), File => 'Name', - Internal_Only_Warning => 1, + Fate => $INTERNAL_ONLY, Perl_Extension => 1, Range_Size_1 => \&output_perl_charnames_line, Type => $STRING, ); + $perl_charname->set_proxy_for('Name', 'Name_Alias'); my $Perl_decomp = Property->new('Perl_Decomposition_Mapping', Directory => File::Spec->curdir(), File => 'Decomposition', Format => $DECOMP_STRING_FORMAT, - Internal_Only_Warning => 1, + Fate => $INTERNAL_ONLY, Perl_Extension => 1, Default_Map => $CODE_POINT, @@ -9786,10 +9920,11 @@ END Map_Type => $COMPUTE_NO_MULTI_CP, Type => $STRING, ); + $Perl_decomp->set_proxy_for('Decomposition_Mapping', 'Decomposition_Type'); $Perl_decomp->add_comment(join_lines(<<END This mapping is a combination of the Unicode 'Decomposition_Type' and 'Decomposition_Mapping' properties, formatted for use by normalize.pm. It is -identical to the official Unicode 'Decomposition_Mapping' property except for +identical to the official Unicode 'Decomposition_Mapping' property except for two things: 1) It omits the algorithmically determinable Hangul syllable decompositions, which normalize.pm handles algorithmically. @@ -10354,16 +10489,14 @@ END sub filter_v6_ucd { - # Unicode 6.0 co-opted the name BELL for U+1F514, so change the input - # to pretend that U+0007 is ALERT instead, and for Perl 5.14, don't - # allow the BELL name for U+1F514, so that the old usage can be - # deprecated for one cycle. + # Unicode 6.0 co-opted the name BELL for U+1F514, but we haven't + # accepted that yet to allow for some deprecation cycles. return if $_ !~ /^(?:0007|1F514|070F);/; my ($code_point, @fields) = split /\s*;\s*/, $_, -1; if ($code_point eq '0007') { - $fields[$CHARNAME] = "ALERT"; + $fields[$CHARNAME] = ""; } elsif ($code_point eq '070F') { # Unicode Corrigendum #8; see # http://www.unicode.org/versions/corrigendum8.html @@ -10509,6 +10642,16 @@ sub filter_arabic_shaping_line { # relatively few entries in them that have different full mappings, # and thus skip the simple mapping tables altogether. + # New tables with just the simple mappings that are overridden by the + # full ones are constructed. These are for Unicode::UCD, which + # requires the simple mappings. The Case_Folding table is a combined + # table of both the simple and full mappings, with the full ones being + # in the hash, and the simple ones, even those overridden by the hash, + # being in the base table. That same mechanism could have been + # employed here, except that the docs have said that the generated + # files are usuable directly by programs, so we dare not change the + # format in any way. + my $file= shift; Carp::carp_extra_args(\@_) if main::DEBUG && @_; @@ -10526,20 +10669,26 @@ sub filter_arabic_shaping_line { # The simple version's name in each mapping merely has an 's' in # front of the full one's - my $simple = property_ref('s' . $case); + my $simple_name = 's' . $case; + my $simple = property_ref($simple_name); $simple->initialize($full) if $simple->to_output_map(); my $simple_only = Property->new("_s$case", Type => $STRING, Default_Map => $CODE_POINT, Perl_Extension => 1, - Description => "The simple mappings for $case for code points that have full mappings as well"); + Fate => $INTERNAL_ONLY, + Description => "This contains the simple mappings for $case for just the code points that have different full mappings"); $simple_only->set_to_output_map($INTERNAL_MAP); $simple_only->add_comment(join_lines( <<END This file is for UCD.pm so that it can construct simple mappings that would otherwise be lost because they are overridden by full mappings. END )); + + unless ($simple->to_output_map()) { + $simple_only->set_proxy_for($simple_name); + } } return; @@ -10674,6 +10823,18 @@ sub filter_old_style_case_folding { $to_output_simple = property_ref('Simple_Case_Folding')->to_output_map; + if (! $to_output_simple) { + property_ref('Case_Folding')->set_proxy_for('Simple_Case_Folding'); + } + + # If we ever wanted to show that these tables were combined, a new + # property method could be created, like set_combined_props() + property_ref('Case_Folding')->add_comment(join_lines( <<END +This file includes both the simple and full case folding maps. The simple +ones are in the main body of the table below, and the full ones adding to or +overriding them are in the hash. +END + )); return; } @@ -10889,8 +11050,6 @@ sub filter_numeric_value_line { { # Closure my %unihan_properties; - my $iicore; - sub setup_unihan { # Do any special setup for Unihan properties. @@ -10899,16 +11058,23 @@ sub filter_numeric_value_line { my $usource = property_ref('kIRG_USource'); $usource->set_type($STRING) if defined $usource; - # This property is to be considered binary, so change all the values - # to Y. - $iicore = property_ref('kIICore'); + # This property is to be considered binary (it says so in + # http://www.unicode.org/reports/tr38/) + my $iicore = property_ref('kIICore'); if (defined $iicore) { - $iicore->add_match_table('Y') if ! defined $iicore->table('Y'); - - # We have to change the default map, because the @missing line is - # misleading, given that we are treating it as binary. - $iicore->set_default_map('N'); - $iicore->set_type($BINARY); + $iicore->set_type($FORCED_BINARY); + $iicore->table("Y")->add_note("Forced to a binary property as per unicode.org UAX #38."); + + # Unicode doesn't include the maps for this property, so don't + # warn that they are missing. + $iicore->set_pre_declared_maps(0); + $iicore->add_comment(join_lines( <<END +This property contains enum values, but Unicode UAX #38 says it should be +interpreted as binary, so Perl creates tables for both 1) its enum values, +plus 2) true/false tables in which it is considered true for all code points +that have a non-null value +END + )); } return; @@ -10943,12 +11109,6 @@ sub filter_numeric_value_line { return; } - # The iicore property is supposed to be a boolean, so convert to our - # standard boolean form. - if (defined $iicore && $unihan_properties{$property} == $iicore) { - $_ =~ s/$property.*/$property\tY/ - } - # Convert the tab separators to our standard semi-colons, and convert # the U+HHHH notation to the rest of the standard's HHHH s/\t/;/g; @@ -11149,6 +11309,7 @@ sub setup_script_extensions { Initialize => $sc, Default_Map => $sc->default_map, Pre_Declared_Maps => 0, + Format => $STRING_WHITE_SPACE_LIST, ); $scx->add_comment(join_lines( <<END The values for code points that appear in one script are just the same as for @@ -11169,6 +11330,29 @@ END } } +sub filter_script_extensions_line { + # The Scripts file comes with the full name for the scripts; the + # ScriptExtensions, with the short name. The final mapping file is a + # combination of these, and without adjustment, would have inconsistent + # entries. This filters the latter file to convert to full names. + # Entries look like this: + # 064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW + + my @fields = split /\s*;\s*/; + my @full_names; + foreach my $short_name (split " ", $fields[1]) { + push @full_names, $script->table($short_name)->full_name; + } + $fields[1] = join " ", @full_names; + $_ = join "; ", @fields; + + return; +} + +sub setup_v6_name_alias { + property_ref('Name_Alias')->add_map(7, 7, "ALERT"); +} + sub finish_Unicode() { # This routine should be called after all the Unicode files have been read # in. It: @@ -11256,10 +11440,10 @@ sub finish_Unicode() { # Add any remaining code points to the mapping, using the default for # missing code points. + my $default_table; if (defined (my $default_map = $property->default_map)) { # Make sure there is a match table for the default - my $default_table; if (! defined ($default_table = $property->table($default_map))) { $default_table = $property->add_match_table($default_map); } @@ -11276,15 +11460,14 @@ sub finish_Unicode() { } $default_table->set_complement($non_default_table); } + else { - # This fills in any missing values with the default. It's - # tempting to save some time and memory in running this program - # by skipping this step for binary tables where the default - # is easily calculated. But it is needed for generating - # the test file, and other changes would also be required to do - # so. - $property->add_map(0, $LAST_UNICODE_CODEPOINT, - $default_map, Replace => $NO); + # This fills in any missing values with the default. It's not + # necessary to do this with binary properties, as the default + # is defined completely in terms of the Y table. + $property->add_map(0, $MAX_UNICODE_CODEPOINT, + $default_map, Replace => $NO); + } } # Have all we need to populate the match tables. @@ -11292,7 +11475,7 @@ sub finish_Unicode() { my $maps_should_be_defined = $property->pre_declared_maps; foreach my $range ($property->ranges) { my $map = $range->value; - my $table = property_ref($property_name)->table($map); + my $table = $property->table($map); if (! defined $table) { # Integral and rational property values are not necessarily @@ -11303,12 +11486,30 @@ sub finish_Unicode() { { Carp::my_carp("Table '$property_name=$map' should have been defined. Defining it now.") } - $table = property_ref($property_name)->add_match_table($map); + $table = $property->add_match_table($map); } + next if $table->complement != 0; # Don't need to populate these $table->add_range($range->start, $range->end); } + # A forced binary property has additional true/false tables which + # should have been set up when it was forced into binary. The false + # table matches exactly the same set as the property's default table. + # The true table matches the complement of that. The false table is + # not the same as an additional set of aliases on top of the default + # table, so use 'set_equivalent_to'. If it were implemented as + # additional aliases, various things would have to be adjusted, but + # especially, if the user wants to get a list of names for the table + # using Unicode::UCD::prop_value_aliases(), s/he should get a + # different set depending on whether they want the default table or + # the false table. + if ($property_type == $FORCED_BINARY) { + $property->table('N')->set_equivalent_to($default_table, + Related => 1); + $property->table('Y')->set_complement($default_table); + } + # For Perl 5.6 compatibility, all properties matchable in regexes can # have an optional 'Is_' prefix. This is now done in utf8_heavy.pl. # But warn if this creates a conflict with a (new) Unicode property @@ -11426,15 +11627,18 @@ END # tables are deleted. my $scx = property_ref("Script_Extensions"); - foreach my $table ($scx->tables) { - next unless $table->name =~ /\s/; # Only the new tables have a space - # in their names, and all do - my @scripts = split /\s+/, $table->name; - foreach my $script (@scripts) { - my $script_table = $scx->table($script); - $script_table += $table; + if (defined $scx) { + foreach my $table ($scx->tables) { + next unless $table->name =~ /\s/; # All the new and only the new + # tables have a space in their + # names + my @scripts = split /\s+/, $table->name; + foreach my $script (@scripts) { + my $script_table = $scx->table($script); + $script_table += $table; + } + $scx->delete_match_table($table); } - $scx->delete_match_table($table); } return; @@ -11456,7 +11660,7 @@ sub compile_perl() { # 'Any' is all code points. As an error check, instead of just setting it # to be that, construct it to be the union of all the major categories $Any = $perl->add_match_table('Any', - Description => "[\\x{0000}-\\x{$LAST_UNICODE_CODEPOINT_STRING}]", + Description => "[\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]", Matches_All => 1); foreach my $major_table ($gc->tables) { @@ -11467,10 +11671,10 @@ sub compile_perl() { $Any += $major_table; } - if ($Any->max != $LAST_UNICODE_CODEPOINT) { + if ($Any->max != $MAX_UNICODE_CODEPOINT) { Carp::my_carp_bug("Generated highest code point (" . sprintf("%X", $Any->max) - . ") doesn't match expected value $LAST_UNICODE_CODEPOINT_STRING.") + . ") doesn't match expected value $MAX_UNICODE_CODEPOINT_STRING.") } if ($Any->range_count != 1 || $Any->min != 0) { Carp::my_carp_bug("Generated table 'Any' doesn't match all code points.") @@ -11485,8 +11689,9 @@ sub compile_perl() { ); # Our internal-only property should be treated as more than just a - # synonym. - $perl->add_match_table('_CombAbove') + # synonym; grandfather it in to the pod. + $perl->add_match_table('_CombAbove', Re_Pod_Entry => 1, + Fate => $INTERNAL_ONLY, Status => $DISCOURAGED) ->set_equivalent_to(property_ref('ccc')->table('Above'), Related => 1); @@ -11551,8 +11756,9 @@ sub compile_perl() { my $lt = $gc->table('Lt'); # Earlier versions of mktables had this related to $lt since they have - # identical code points, but their casefolds are not equivalent, and so - # now must be kept as separate entities. + # identical code points, but their caseless equivalents are not the same, + # one being 'Cased' and the other being 'LC', and so now must be kept as + # separate entities. $Title += $lt if defined $lt; # If this Unicode version doesn't have Cased, set up our own. From @@ -11576,8 +11782,12 @@ sub compile_perl() { # one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), Format (Cf), # Modifier_Letter (Lm), or Modifier_Symbol (Sk). - # Perl has long had an internal-only alias for this property. - my $perl_case_ignorable = $perl->add_match_table('_Case_Ignorable'); + # Perl has long had an internal-only alias for this property; grandfather + # it in to the pod, but discourage its use. + my $perl_case_ignorable = $perl->add_match_table('_Case_Ignorable', + Re_Pod_Entry => 1, + Fate => $INTERNAL_ONLY, + Status => $DISCOURAGED); my $case_ignorable = property_ref('Case_Ignorable'); if (defined $case_ignorable && ! $case_ignorable->is_empty) { $perl_case_ignorable->set_equivalent_to($case_ignorable->table('Y'), @@ -11648,7 +11858,7 @@ sub compile_perl() { $Posix_Lower->set_caseless_equivalent($Posix_Alpha); my $Alnum = $perl->add_match_table('Alnum', - Description => 'Alphabetic and (Decimal) Numeric', + Description => 'Alphabetic and (decimal) Numeric', Initialize => $Alpha + $gc->table('Decimal_Number'), ); $Alnum->add_alias('XPosixAlnum'); @@ -11770,8 +11980,9 @@ sub compile_perl() { Description => '\p{Punct} + ASCII-range \p{Symbol}', Initialize => $gc->table('Punctuation') + ($ASCII & $gc->table('Symbol')), + Perl_Extension => 1 ); - $perl->add_match_table('PosixPunct', + $perl->add_match_table('PosixPunct', Perl_Extension => 1, Description => '[-!"#$%&\'()*+,./:;<>?@[\\\]^_`{|}~]', Initialize => $ASCII & $XPosixPunct, ); @@ -11820,8 +12031,11 @@ sub compile_perl() { # _CanonDCIJ is equivalent to Soft_Dotted, but if on a release earlier # than SD appeared, construct it ourselves, based on the first release SD - # was in. - my $CanonDCIJ = $perl->add_match_table('_CanonDCIJ'); + # was in. A pod entry is grandfathered in for it + my $CanonDCIJ = $perl->add_match_table('_CanonDCIJ', Re_Pod_Entry => 1, + Perl_Extension => 1, + Fate => $INTERNAL_ONLY, + Status => $DISCOURAGED); my $soft_dotted = property_ref('Soft_Dotted'); if (defined $soft_dotted && ! $soft_dotted->is_empty) { $CanonDCIJ->set_equivalent_to($soft_dotted->table('Y'), Related => 1); @@ -11842,15 +12056,17 @@ sub compile_perl() { } # These are used in Unicode's definition of \X - my $begin = $perl->add_match_table('_X_Begin', Perl_Extension => 1); - my $extend = $perl->add_match_table('_X_Extend', Perl_Extension => 1); + my $begin = $perl->add_match_table('_X_Begin', Perl_Extension => 1, + Fate => $INTERNAL_ONLY); + my $extend = $perl->add_match_table('_X_Extend', Perl_Extension => 1, + Fate => $INTERNAL_ONLY); # For backward compatibility, Perl has its own definition for IDStart # First, we include the underscore, and then the regular XID_Start also # have to be Words $perl->add_match_table('_Perl_IDStart', Perl_Extension => 1, - Internal_Only => 1, + Fate => $INTERNAL_ONLY, Initialize => ord('_') + (property_ref('XID_Start')->table('Y') & $Word) @@ -11918,7 +12134,9 @@ sub compile_perl() { # More GCB. If we found some hangul syllables, populate a combined # table. - my $lv_lvt_v = $perl->add_match_table('_X_LV_LVT_V'); + my $lv_lvt_v = $perl->add_match_table('_X_LV_LVT_V', + Perl_Extension => 1, + Fate => $INTERNAL_ONLY); my $LV = $gcb->table('LV'); if ($LV->is_empty) { push @tables_that_may_be_empty, $lv_lvt_v->complete_name; @@ -11969,48 +12187,28 @@ This file is for charnames.pm. It is the union of the $comment properties. Unicode_1_Name entries are used only for otherwise nameless code points. $alias_sentence +This file doesn't include the algorithmically determinable names. For those, +use 'unicore/Name.pm' END )); - - # The combining class property used by Perl's normalize.pm is not located - # in the normal mapping directory; create a copy for it. - my $ccc = property_ref('Canonical_Combining_Class'); - my $perl_ccc = Property->new('Perl_ccc', - Default_Map => $ccc->default_map, - Full_Name => 'Perl_Canonical_Combining_Class', - Internal_Only_Warning => 1, - Perl_Extension => 1, - Pod_Entry =>0, - Type => $ENUM, - Initialize => $ccc, - File => 'CombiningClass', - Directory => File::Spec->curdir(), - ); - $perl_ccc->set_to_output_map($EXTERNAL_MAP); - $perl_ccc->add_comment(join_lines(<<END -This mapping is for normalize.pm. It is currently identical to the Unicode -Canonical_Combining_Class property. + property_ref('Name')->add_comment(join_lines( <<END +This file doesn't include the algorithmically determinable names. For those, +use 'unicore/Name.pm' END )); - # This one match table for it is needed for calculations on output - my $default = $perl_ccc->add_match_table($ccc->default_map, - Initialize => $ccc->table($ccc->default_map), - Status => $SUPPRESSED); - # Construct the Present_In property from the Age property. if (-e 'DAge.txt' && defined (my $age = property_ref('Age'))) { my $default_map = $age->default_map; my $in = Property->new('In', Default_Map => $default_map, Full_Name => "Present_In", - Internal_Only_Warning => 1, Perl_Extension => 1, Type => $ENUM, Initialize => $age, ); $in->add_comment(join_lines(<<END -This file should not be used for any purpose. The values in this file are the +THIS FILE SHOULD NOT BE USED FOR ANY PURPOSE. The values in this file are the same as for $age, and not for what $in really means. This is because anything defined in a given release should have multiple values: that release and all higher ones. But only one value per code point can be represented in a table @@ -12084,9 +12282,10 @@ END foreach my $alias ($table->aliases) { next if $alias->name =~ /^_/; $table->add_alias('Is_' . $alias->name, - Pod_Entry => 0, + Re_Pod_Entry => 0, + UCD => 0, Status => $alias->status, - Externally_Ok => 0); + OK_as_Filename => 0); } } @@ -12103,7 +12302,7 @@ END Initialize => $gc->table('Unassigned') & property_ref('Noncharacter_Code_Point')->table('N')); - for (my $i = 0; $i <= $LAST_UNICODE_CODEPOINT; $i++ ) { + for (my $i = 0; $i <= $MAX_UNICODE_CODEPOINT; $i++ ) { $i = populate_char_info($i); # Note sets $i so may cause skips } } @@ -12126,13 +12325,12 @@ sub add_perl_synonyms() { # Construct the list of tables to get synonyms for. Start with all the # binary and the General_Category ones. - my @tables = grep { $_->type == $BINARY } property_ref('*'); + my @tables = grep { $_->type == $BINARY || $_->type == $FORCED_BINARY } + property_ref('*'); push @tables, $gc->tables; # If the version of Unicode includes the Script property, add its tables - if (defined property_ref('Script')) { - push @tables, property_ref('Script')->tables; - } + push @tables, $script->tables if defined $script; # The Block tables are kept separate because they are treated differently. # And the earliest versions of Unicode didn't include them, so add only if @@ -12197,8 +12395,8 @@ sub add_perl_synonyms() { # No name collision, so ok to add the perl synonym. - my $make_pod_entry; - my $externally_ok; + my $make_re_pod_entry; + my $ok_as_filename; my $status = $alias->status; if ($nominal_property == $block) { @@ -12208,36 +12406,36 @@ sub add_perl_synonyms() { # we don't want people using the name without the # 'In', so discourage that. if ($prefix eq "") { - $make_pod_entry = 1; + $make_re_pod_entry = 1; $status = $status || $DISCOURAGED; - $externally_ok = 0; + $ok_as_filename = 0; } elsif ($prefix eq 'In_') { - $make_pod_entry = 0; + $make_re_pod_entry = 0; $status = $status || $NORMAL; - $externally_ok = 1; + $ok_as_filename = 1; } else { - $make_pod_entry = 0; + $make_re_pod_entry = 0; $status = $status || $DISCOURAGED; - $externally_ok = 0; + $ok_as_filename = 0; } } elsif ($prefix ne "") { # The 'Is' prefix is handled in the pod by a wild # card, and we won't use it for an external name - $make_pod_entry = 0; + $make_re_pod_entry = 0; $status = $status || $NORMAL; - $externally_ok = 0; + $ok_as_filename = 0; } else { # Here, is an empty prefix, non block. This gets its # own pod entry and can be used for an external name. - $make_pod_entry = 1; + $make_re_pod_entry = 1; $status = $status || $NORMAL; - $externally_ok = 1; + $ok_as_filename = 1; } # Here, there isn't a perl pre-existing table with the @@ -12249,9 +12447,15 @@ sub add_perl_synonyms() { # Here, have found a table for $perl. Add this alias # to it, and are done with this prefix. $equivalent->add_alias($proposed_name, - Pod_Entry => $make_pod_entry, + Re_Pod_Entry => $make_re_pod_entry, + + # Currently don't output these in the + # ucd pod, as are strongly discouraged + # from being used + UCD => 0, + Status => $status, - Externally_Ok => $externally_ok); + OK_as_Filename => $ok_as_filename); trace "adding alias perl=$proposed_name to $equivalent" if main::DEBUG && $to_trace; next PREFIX; } @@ -12259,9 +12463,13 @@ sub add_perl_synonyms() { # Here, $perl doesn't already have a table that is a # synonym for this property, add one. my $added_table = $perl->add_match_table($proposed_name, - Pod_Entry => $make_pod_entry, + Re_Pod_Entry => $make_re_pod_entry, + + # See UCD comment just above + UCD => 0, + Status => $status, - Externally_Ok => $externally_ok); + OK_as_Filename => $ok_as_filename); # And it will be related to the actual table, since it is # based on it. $added_table->set_equivalent_to($actual, Related => 1); @@ -12347,7 +12555,10 @@ END # unless they are the same table. For example, N meaning Number or # Neutral is not likely to cause confusion, so don't add caveats to things # like them. - foreach my $property (grep { $_->type != $BINARY } property_ref('*')) { + foreach my $property (grep { $_->type != $BINARY + && $_->type != $FORCED_BINARY } + property_ref('*')) + { my $yes = $property->table('Yes'); if (defined $yes) { my $y = $property->table('Y'); @@ -12384,8 +12595,29 @@ sub register_file_for_name($$$) { if ($table->isa('Property')) { $table->set_file_path(@$directory_ref, $file); - push @map_properties, $table - if $directory_ref->[0] eq $map_directory; + push @map_properties, $table; + + # No swash means don't do the rest of this. + return if $table->fate != $ORDINARY; + + # Get the path to the file + my @path = $table->file_path; + + # Use just the file name if no subdirectory. + shift @path if $path[0] eq File::Spec->curdir(); + + my $file = join '/', @path; + + # Create a hash entry for utf8_heavy to get the file that stores this + # property's map table + foreach my $alias ($table->aliases) { + my $name = $alias->name; + $loose_property_to_file_of{standardize($name)} = $file; + } + + # And a way for utf8_heavy to find the proper key in the SwashInfo + # hash for this property. + $file_to_swash_name{$file} = "To" . $table->swash_name; return; } @@ -12423,12 +12655,38 @@ sub register_file_for_name($$$) { # Associate it with its file internally. Don't include the # $matches_directory first component $table->set_file_path(@$directory_ref, $file); + + # No swash means don't do the rest of this. + next if $table->isa('Map_Table') && $table->fate != $ORDINARY; + my $sub_filename = join('/', $directory_ref->[1, -1], $file); my $property = $table->property; - $property = ($property == $perl) - ? "" # 'perl' is never explicitly stated - : standardize($property->name) . '='; + my $property_name = ($property == $perl) + ? "" # 'perl' is never explicitly stated + : standardize($property->name) . '='; + + my $is_default = 0; # Is this table the default one for the property? + + # To calculate $is_default, we find if this table is the same as the + # default one for the property. But this is complicated by the + # possibility that there is a master table for this one, and the + # information is stored there instead of here. + my $parent = $table->parent; + my $leader_prop = $parent->property; + my $default_map = $leader_prop->default_map; + if (defined $default_map) { + my $default_table = $leader_prop->table($default_map); + $is_default = 1 if defined $default_table && $parent == $default_table; + } + + # Calculate the loose name for this table. Mostly it's just its name, + # standardized. But in the case of Perl tables that are single-form + # equivalents to Unicode properties, it is the latter's name. + my $loose_table_name = + ($property != $perl || $leader_prop == $perl) + ? standardize($table->name) + : standardize($parent->name); my $deprecated = ($table->status eq $DEPRECATED) ? $table->status_info @@ -12468,12 +12726,25 @@ sub register_file_for_name($$$) { if ((my $integer_name = $alias->name) =~ s/^ ( -? \d+ ) \.0+ $ /$1/x) { - $stricter_to_file_of{$property . $integer_name} - = $sub_filename; + $stricter_to_file_of{$property_name . $integer_name} + = $sub_filename; } } } + # For Unicode::UCD, create a mapping of the prop=value to the + # canonical =value for that property. + if ($standard =~ /=/) { + + # This could happen if a strict name mapped into an existing + # loose name. In that event, the strict names would have to + # be moved to a new hash. + if (exists($loose_to_standard_value{$standard})) { + Carp::my_carp_bug("'$standard' conflicts with a pre-existing use. Bad News. Continuing anyway"); + } + $loose_to_standard_value{$standard} = $loose_table_name; + } + # Keep a list of the deprecated properties and their filenames if ($deprecated && $complement == 0) { $utf8::why_deprecated{$sub_filename} = $deprecated; @@ -12483,6 +12754,10 @@ sub register_file_for_name($$$) { if ($caseless_equivalent != 0) { $caseless_equivalent_to{$standard} = $caseless_equivalent; } + + # Add to defaults list if the table this alias belongs to is the + # default one + $loose_defaults{$standard} = 1 if $is_default; } } @@ -12664,7 +12939,7 @@ sub format_pod_line ($$$;$$) { my @zero_match_tables; # List of tables that have no matches in this release -sub make_table_pod_entries($) { +sub make_re_pod_entries($) { # This generates the entries for the pod file for a given table. # Also done at this time are any children tables. The output looks like: # \p{Common} \p{Script=Common} (Short: \p{Zyyy}) (5178) @@ -12692,9 +12967,9 @@ sub make_table_pod_entries($) { # for each name each table goes by foreach my $table ($input_table, $input_table->children) { - # utf8_heavy.pl cannot deal with null string property values, so don't - # output any. - next if $table->name eq ""; + # utf8_heavy.pl cannot deal with null string property values, so skip + # any tables that have no non-null names. + next if ! grep { $_->name ne "" } $table->aliases; # First, gather all the info that applies to this table as a whole. @@ -12723,11 +12998,14 @@ sub make_table_pod_entries($) { foreach my $alias ($table->aliases) { # Skip if not to go in pod. - next unless $alias->make_pod_entry; + next unless $alias->make_re_pod_entry; # Start gathering all the components for the entry my $name = $alias->name; + # Skip if name is empty, as can't be accessed by regexes. + next if $name eq ""; + my $entry; # Holds the left column, may include extras my $entry_ref; # To refer to the left column's contents from # another entry; has no extras @@ -12743,20 +13021,42 @@ sub make_table_pod_entries($) { # Only generate one entry for all the aliases that mean true # or false in binary properties. Append a '*' to indicate # some are missing. (The heading comment notes this.) - my $wild_card_mark; + my $rhs; if ($type == $BINARY) { next if $name ne 'N' && $name ne 'Y'; - $wild_card_mark = '*'; + $rhs = "$name*"; + } + elsif ($type != $FORCED_BINARY) { + $rhs = $name; } else { - $wild_card_mark = ""; + + # Forced binary properties require special handling. It + # has two sets of tables, one set is true/false; and the + # other set is everything else. Entries are generated for + # each set. Use the Bidi_Mirrored property (which appears + # in all Unicode versions) to get a list of the aliases + # for the true/false tables. Of these, only output the N + # and Y ones, the same as, a regular binary property. And + # output all the rest, same as a non-binary property. + my $bm = property_ref("Bidi_Mirrored"); + if ($name eq 'N' || $name eq 'Y') { + $rhs = "$name*"; + } elsif (grep { $name eq $_->name } $bm->table("Y")->aliases, + $bm->table("N")->aliases) + { + next; + } + else { + $rhs = $name; + } } # Colon-space is used to give a little more space to be easier # to read; $entry = "\\p{" . $table_property_full_name - . ": $name$wild_card_mark}"; + . ": $rhs}"; # But for the reference to this entry, which will go in the # right column, where space is at a premium, use equals @@ -12852,11 +13152,18 @@ sub make_table_pod_entries($) { # Special case the binary N tables, so that will print # \P{single}, but use the Y table values to populate - # 'single', as we haven't populated the N table. + # 'single', as we haven't likewise populated the N table. + # For forced binary tables, we can't just look at the N + # table, but must see if this table is equivalent to the N + # one, as there are two equivalent beasts in these + # properties. my $test_table; my $p; - if ($type == $BINARY - && $input_table == $property->table('No')) + if ( ($type == $BINARY + && $input_table == $property->table('No')) + || ($type == $FORCED_BINARY + && $property->table('No')-> + is_set_equivalent_to($input_table))) { $test_table = $property->table('Yes'); $p = 'P'; @@ -12954,6 +13261,179 @@ sub make_table_pod_entries($) { return; } +sub make_ucd_table_pod_entries { + my $table = shift; + + # Generate the entries for the UCD section of the pod for $table. This + # also calculates if names are ambiguous, so has to be called even if the + # pod is not being output + + my $short_name = $table->name; + my $standard_short_name = standardize($short_name); + my $full_name = $table->full_name; + my $standard_full_name = standardize($full_name); + + my $full_info = ""; # Text of info column for full-name entries + my $other_info = ""; # Text of info column for short-name entries + my $short_info = ""; # Text of info column for other entries + my $meaning = ""; # Synonym of this table + + my $property = ($table->isa('Property')) + ? $table + : $table->parent->property; + + my $perl_extension = $table->perl_extension; + + # Get the more official name for for perl extensions that aren't + # stand-alone properties + if ($perl_extension && $property != $table) { + if ($property == $perl ||$property->type == $BINARY) { + $meaning = $table->complete_name; + } + else { + $meaning = $property->full_name . "=$full_name"; + } + } + + # There are three types of info column. One for the short name, one for + # the full name, and one for everything else. They mostly are the same, + # so initialize in the same loop. + foreach my $info_ref (\$full_info, \$short_info, \$other_info) { + if ($perl_extension && $property != $table) { + + # Add the synonymous name for the non-full name entries; and to + # the full-name entry if it adds extra information + if ($info_ref == \$other_info + || ($info_ref == \$short_info + && $standard_short_name ne $standard_full_name) + || standardize($meaning) ne $standard_full_name + ) { + $$info_ref .= "$meaning."; + } + } + elsif ($info_ref != \$full_info) { + + # Otherwise, the non-full name columns include the full name + $$info_ref .= $full_name; + } + + # And the full-name entry includes the short name, if different + if ($info_ref == \$full_info + && $standard_short_name ne $standard_full_name) + { + $full_info =~ s/\.\Z//; + $full_info .= " " if $full_info; + $full_info .= "(Short: $short_name)"; + } + + if ($table->perl_extension) { + $$info_ref =~ s/\.\Z//; + $$info_ref .= ". " if $$info_ref; + $$info_ref .= "(Perl extension)"; + } + } + + # Add any extra annotations to the full name entry + foreach my $more_info ($table->description, + $table->note, + $table->status_info) + { + next unless $more_info; + $full_info =~ s/\.\Z//; + $full_info .= ". " if $full_info; + $full_info .= $more_info; + } + + # These keep track if have created full and short name pod entries for the + # property + my $done_full = 0; + my $done_short = 0; + + # Every possible name is kept track of, even those that aren't going to be + # output. This way we can be sure to find the ambiguities. + foreach my $alias ($table->aliases) { + my $name = $alias->name; + my $standard = standardize($name); + my $info; + my $output_this = $alias->ucd; + + # If the full and short names are the same, we want to output the full + # one's entry, so it has priority. + if ($standard eq $standard_full_name) { + next if $done_full; + $done_full = 1; + $info = $full_info; + } + elsif ($standard eq $standard_short_name) { + next if $done_short; + $done_short = 1; + next if $standard_short_name eq $standard_full_name; + $info = $short_info; + } + else { + $info = $other_info; + } + + # Here, we have set up the two columns for this entry. But if an + # entry already exists for this name, we have to decide which one + # we're going to later output. + if (exists $ucd_pod{$standard}) { + + # If the two entries refer to the same property, it's not going to + # be ambiguous. (Likely it's because the names when standardized + # are the same.) But that means if they are different properties, + # there is ambiguity. + if ($ucd_pod{$standard}->{'property'} != $property) { + + # Here, we have an ambiguity. This code assumes that one is + # scheduled to be output and one not and that one is a perl + # extension (which is not to be output) and the other isn't. + # If those assumptions are wrong, things have to be rethought. + if ($ucd_pod{$standard}{'output_this'} == $output_this + || $ucd_pod{$standard}{'perl_extension'} == $perl_extension + || $output_this == $perl_extension) + { + Carp::my_carp("Bad news. $property and $ucd_pod{$standard}->{'property'} have unexpected output statuss and perl-extension combinations. Proceeding anyway."); + } + + # We modifiy the info column of the one being output to + # indicate the ambiguity. Set $which to point to that one's + # info. + my $which; + if ($ucd_pod{$standard}{'output_this'}) { + $which = \$ucd_pod{$standard}->{'info'}; + } + else { + $which = \$info; + $meaning = $ucd_pod{$standard}{'meaning'}; + } + + chomp $$which; + $$which =~ s/\.\Z//; + $$which .= "; NOT '$standard' meaning '$meaning'"; + + $ambiguous_names{$standard} = 1; + } + + # Use the non-perl-extension variant + next unless $ucd_pod{$standard}{'perl_extension'}; + } + + # Store enough information about this entry that we can later look for + # ambiguities, and output it properly. + $ucd_pod{$standard} = { 'name' => $name, + 'info' => $info, + 'meaning' => $meaning, + 'output_this' => $output_this, + 'perl_extension' => $perl_extension, + 'property' => $property, + 'status' => $alias->status, + }; + } # End of looping through all this table's aliases + + return; +} + sub pod_alphanumeric_sort { # Sort pod entries alphanumerically. @@ -13005,6 +13485,8 @@ sub make_pod () { # Create the .pod file. This generates the various subsections and then # combines them in one big HERE document. + my $Is_flags_text = "If an entry has flag(s) at its beginning, like \"$DEPRECATED\", the \"Is_\" form has the same flag(s)"; + return unless defined $pod_directory; print "Making pod file\n" if $verbosity >= $PROGRESS; @@ -13036,7 +13518,7 @@ e.g., C<\\p{blk:latin1}>. See L<perlunicode/"Blocks"> for more information about this. END } - my $text = "If an entry has flag(s) at its beginning, like \"$DEPRECATED\", the \"Is_\" form has the same flag(s)"; + my $text = $Is_flags_text; $text = "$exception_message $text" if $has_Is_conflicts; # And the 'Is_ line'; @@ -13068,22 +13550,22 @@ END . $formatted_properties; # Generate pod documentation lines for the tables that match nothing - my $zero_matches; + my $zero_matches = ""; if (@zero_match_tables) { @zero_match_tables = uniques(@zero_match_tables); $zero_matches = join "\n\n", map { $_ = '=item \p{' . $_->complete_name . "}" } sort { $a->complete_name cmp $b->complete_name } - uniques(@zero_match_tables); + @zero_match_tables; $zero_matches = <<END; =head2 Legal C<\\p{}> and C<\\P{}> constructs that match no characters Unicode has some property-value pairs that currently don't match anything. -This happens generally either because they are obsolete, or for symmetry with -other forms, but no language has yet been encoded that uses them. In this -version of Unicode, the following match zero code points: +This happens generally either because they are obsolete, or they exist for +symmetry with other forms, but no language has yet been encoded that uses +them. In this version of Unicode, the following match zero code points: =over 4 @@ -13113,10 +13595,7 @@ END foreach my $why (sort { $why_list{$a}->[0] cmp $why_list{$b}->[0] } keys %why_list) { - # Add to the output, all the properties that have that reason. Start - # with an empty line. - push @bad_re_properties, "\n\n"; - + # Add to the output, all the properties that have that reason. my $has_item = 0; # Flag if actually output anything. foreach my $name (@{$why_list{$why}}) { @@ -13142,6 +13621,9 @@ END my $short_name = $property->name; $short_name .= '=' . $property->table($table)->name if $table; + # Start with an empty line. + push @bad_re_properties, "\n\n" unless $has_item; + # And add the property as an item for the reason. push @bad_re_properties, "\n=item I<$name> ($short_name)\n"; $has_item = 1; @@ -13155,57 +13637,68 @@ END } # End of looping through each reason. - # Generate a list of the properties whose map table we output, from the - # global @map_properties. - my @map_tables_actually_output; - my $info_indent = 20; # Left column is narrower than \p{} table. - foreach my $property (@map_properties) { - - # Get the path to the file; don't output any not in the standard - # directory. - my @path = $property->file_path; - next if $path[0] ne $map_directory; - - # Don't mention map tables that are for internal-use only - next if $property->to_output_map == $INTERNAL_MAP; - - shift @path; # Remove the standard name + if (! @bad_re_properties) { + push @bad_re_properties, + "*** This installation accepts ALL non-Unihan properties ***"; + } + else { + # Add =over only if non-empty to avoid an empty =over/=back section, + # which is considered bad form. + unshift @bad_re_properties, "\n=over 4\n"; + push @bad_re_properties, "\n=back\n"; + } - my $file = join '/', @path; # In case is in sub directory - my $info = $property->full_name; - my $short_name = $property->name; - if ($info ne $short_name) { - $info .= " ($short_name)"; - } - foreach my $more_info ($property->description, - $property->note, - $property->status_info) - { - next unless $more_info; - $info =~ s/\.\Z//; - $info .= ". $more_info"; - } - push @map_tables_actually_output, format_pod_line($info_indent, - $file, - $info, - $property->status); + # Similiarly, generate a list of files that we don't use, grouped by the + # reasons why. First, create a hash whose keys are the reasons, and whose + # values are anonymous arrays of all the files that share that reason. + my %grouped_by_reason; + foreach my $file (keys %ignored_files) { + push @{$grouped_by_reason{$ignored_files{$file}}}, $file; } - # Sort alphabetically, and fold for output - @map_tables_actually_output = sort - pod_alphanumeric_sort @map_tables_actually_output; - @map_tables_actually_output - = simple_fold(\@map_tables_actually_output, - ' ', - $info_indent, - $automatic_pod_indent); + # Then, sort each group. + foreach my $group (keys %grouped_by_reason) { + @{$grouped_by_reason{$group}} = sort { lc $a cmp lc $b } + @{$grouped_by_reason{$group}} ; + } - # Generate a list of the formats that can appear in the map tables. - my @map_table_formats; - foreach my $format (sort keys %map_table_formats) { - push @map_table_formats, " $format $map_table_formats{$format}\n"; + # Finally, create the output text. For each reason (sorted by the + # alphabetically first file that has that reason)... + my @unused_files; + foreach my $reason (sort { lc $grouped_by_reason{$a}->[0] + cmp lc $grouped_by_reason{$b}->[0] + } + keys %grouped_by_reason) + { + # Add all the files that have that reason to the output. Start + # with an empty line. + push @unused_files, "\n\n"; + push @unused_files, map { "\n=item F<$_> \n" } + @{$grouped_by_reason{$reason}}; + # And add the reason under the list of files + push @unused_files, "\n$reason\n"; + } + + # Similarly, create the output text for the UCD section of the pod + my @ucd_pod; + foreach my $key (keys %ucd_pod) { + next unless $ucd_pod{$key}->{'output_this'}; + push @ucd_pod, format_pod_line($indent_info_column, + $ucd_pod{$key}->{'name'}, + $ucd_pod{$key}->{'info'}, + $ucd_pod{$key}->{'status'}, + ); } + # Sort alphabetically, and fold for output + @ucd_pod = sort { lc substr($a, 2) cmp lc substr($b, 2) } @ucd_pod; + my $ucd_pod = simple_fold(\@ucd_pod, + ' ', + $indent_info_column, + $automatic_pod_indent); + $ucd_pod = format_pod_line($indent_info_column, 'NAME', ' INFO') + . "\n" + . $ucd_pod; local $" = ""; # Everything is ready to assemble. @@ -13220,25 +13713,39 @@ To change this file, edit $0 instead. =head1 NAME -$pod_file - Index of Unicode Version $string_version properties in Perl +$pod_file - Index of Unicode Version $string_version character properties in Perl =head1 DESCRIPTION -There are many properties in Unicode, and Perl provides access to almost all of -them, as well as some additional extensions and short-cut synonyms. +This document provides information about the portion of the Unicode database +that deals with character properties, that is the portion that is defined on +single code points. (L</Other information in the Unicode data base> +below briefly mentions other data that Unicode provides.) + +Perl can provide access to all non-provisional Unicode character properties, +though not all are enabled by default. The omitted ones are the Unihan +properties (accessible via the CPAN module L<Unicode::Unihan>) and certain +deprecated or Unicode-internal properties. (An installation may choose to +recompile Perl's tables to change this. See L<Unicode character +properties that are NOT accepted by Perl>.) -And just about all of the few that aren't accessible through the Perl -core are accessible through the modules: L<Unicode::Normalize> and -L<Unicode::UCD>, and for Unihan properties, via the CPAN module -L<Unicode::Unihan>. +For most purposes, access to Unicode properties from the Perl core is through +regular expression matches, as described in the next section. +For some special purposes, and to access the properties that are not suitable +for regular expression matching, all the Unicode character properties that +Perl handles are accessible via the standard L<Unicode::UCD> module, as +described in the section L</Properties accessible through Unicode::UCD>. + +Perl also provides some additional extensions and short-cut synonyms +for Unicode properties. This document merely lists all available properties and does not attempt to explain what each property really means. There is a brief description of each -Perl extension. There is some detail about Blocks, Scripts, General_Category, +Perl extension; see L<perlunicode/Other Properties> for more information on +these. There is some detail about Blocks, Scripts, General_Category, and Bidi_Class in L<perlunicode>, but to find out about the intricacies of the -Unicode properties, refer to the Unicode standard. A good starting place is -L<$unicode_reference_url>. More information on the Perl extensions is in -L<perlunicode/Other Properties>. +official Unicode properties, refer to the Unicode standard. A good starting +place is L<$unicode_reference_url>. Note that you can define your own properties; see L<perlunicode/"User-Defined Character Properties">. @@ -13308,14 +13815,9 @@ There are several varieties of obsolescence: =over 4 -=item Obsolete - -Properties marked with $a_bold_obsolete in the table are considered -obsolete. - =item Stabilized -Obsolete properties may be stabilized. Such a determination does not indicate +A property may be stabilized. Such a determination does not indicate that the property should or should not be used; instead it is a declaration that the property will not be maintained nor extended for newly encoded characters. Such properties are marked with $a_bold_stabilized in the @@ -13323,7 +13825,7 @@ table. =item Deprecated -An obsolete property may be deprecated, perhaps because its original intent +A property may be deprecated, perhaps because its original intent has been replaced by another property, or because its specification was somehow defective. This means that its use is strongly discouraged, so much so that a warning will be issued if used, unless the @@ -13339,11 +13841,22 @@ earlier Unicode releases. A deprecated property may be made unavailable in a future Perl version, so it is best to move away from them. +A deprecated property may also be stabilized, but this fact is not shown. + +=item Obsolete + +Properties marked with $a_bold_obsolete in the table are considered (plain) +obsolete. Generally this designation is given to properties that Unicode once +used for internal purposes (but not any longer). + =back Some Perl extensions are present for backwards compatibility and are -discouraged from being used, but not obsolete. $A_bold_discouraged -flags each such entry in the table. +discouraged from being used, but are not obsolete. $A_bold_discouraged +flags each such entry in the table. Future Unicode versions may force +some of these extensions to be removed without warning, replaced by another +property with the same name that means something different. Use the +equivalent shown instead. @block_warning @@ -13406,7 +13919,7 @@ binary properties have both single and compound forms available. Note that all non-essential underscores are removed in the display of the short names below. -B<Summary legend:> +B<Legend summary:> =over 4 @@ -13423,7 +13936,8 @@ this property. =item B<$STRICTER> means tighter (stricter) name matching applies. -=item B<$DISCOURAGED> means use of this form is discouraged. +=item B<$DISCOURAGED> means use of this form is discouraged, and may not be +stable. =back @@ -13431,35 +13945,78 @@ $formatted_properties $zero_matches -=head1 Properties not accessible through \\p{} and \\P{} - -A few properties are accessible in Perl via various function calls only. -These are: +=head1 Properties accessible through Unicode::UCD + +All the Unicode character properties mentioned above (except for those marked +as for internal use by Perl) are also accessible by +L<Unicode::UCD/prop_invlist()>. + +Due to their nature, not all Unicode character properties are suitable for +regular expression matches, nor C<prop_invlist()>. The remaining +non-provisional, non-internal ones are accessible via +L<Unicode::UCD/prop_invmap()> (except for those that this Perl installation +hasn't included; see L<below for which those are|/Unicode character properties +that are NOT accepted by Perl>). + +For compatibility with other parts of Perl, all the single forms given in the +table in the L<section above|/Properties accessible through \\p{} and \\P{}> +are recognized. BUT, there are some ambiguities between some Perl extensions +and the Unicode properties, all of which are silently resolved in favor of the +official Unicode property. To avoid surprises, you should only use +C<prop_invmap()> for forms listed in the table below, which omits the +non-recommended ones. The affected forms are the Perl single form equivalents +of Unicode properties, such as C<\\p{sc}> being a single-form equivalent of +C<\\p{gc=sc}>, which is treated by C<prop_invmap()> as the C<Script> property, +whose short name is C<sc>. The table indicates the current ambiguities in the +INFO column, beginning with the word C<"NOT">. + +The standard Unicode properties listed below are documented in +L<$unicode_reference_url>; Perl_Decimal_Digit is documented in +L<Unicode::UCD/prop_invmap()>. The other Perl extensions are in +L<perlunicode/Other Properties>; + +The first column in the table is a name for the property; the second column is +an alternative name, if any, plus possibly some annotations. The alternative +name is the property's full name, unless that would simply repeat the first +column, in which case the second column indicates the property's short name +(if different). The annotations are given only in the entry for the full +name. If a property is obsolete, etc, the entry will be flagged with the same +characters used in the table in the L<section above|/Properties accessible +through \\p{} and \\P{}>, like B<$DEPRECATED> or B<$STABILIZED>. + +$ucd_pod + +=head1 Properties accessible through other means + +Certain properties are accessible also via core function calls. These are: Lowercase_Mapping lc() and lcfirst() Titlecase_Mapping ucfirst() Uppercase_Mapping uc() -Case_Folding is accessible through the C</i> modifier in regular expressions. +Also, Case_Folding is accessible through the C</i> modifier in regular +expressions. -The Name property is accessible through the C<\\N{}> interpolation in -double-quoted strings and regular expressions, but both usages require a C<use -charnames;> to be specified, which also contains related functions viacode(), -vianame(), and string_vianame(). +And, the Name and Name_Aliases properties are accessible through the C<\\N{}> +interpolation in double-quoted strings and regular expressions, but both +usages require a L<use charnames;|charnames> to be specified, which also +contains related functions viacode(), vianame(), and string_vianame(). -=head1 Unicode regular expression properties that are NOT accepted by Perl +Finally, most properties related to decomposition are accessible via +L<Unicode::Normalize>. + +=head1 Unicode character properties that are NOT accepted by Perl Perl will generate an error for a few character properties in Unicode when used in a regular expression. The non-Unihan ones are listed below, with the reasons they are not accepted, perhaps with work-arounds. The short names for the properties are listed enclosed in (parentheses). - -=over 4 +As described after the list, an installation can change the defaults and choose +to accept any of these. The list is machine generated based on the +choices made for the installation that generated this document. @bad_re_properties -=back - An installation can choose to allow any of these to be matched by downloading the Unicode database from L<http://www.unicode.org/Public/> to C<\$Config{privlib}>/F<unicore/> in the Perl source tree, changing the @@ -13467,54 +14024,31 @@ controlling lists contained in the program C<\$Config{privlib}>/F<unicore/mktables> and then re-compiling and installing. (C<\%Config> is available from the Config module). -=head1 Files in the I<To> directory (for serious hackers only) - -All Unicode properties are really mappings (in the mathematical sense) from -code points to their respective values. As part of its build process, -Perl constructs tables containing these mappings for all properties that it -deals with. Some, but not all, of these are written out into files. -Those written out are in the directory C<\$Config{privlib}>/F<unicore/To/> -(C<%Config> is available from the C<Config> module). - -Perl reserves the right to change the format and even the existence of any of -those files without notice, except the ones that were in existence prior to -release 5.13. If those change, a deprecation cycle will be done first. These -are: - -@map_tables_actually_output - -Each of the files in this directory defines several hash entries to help -reading programs decipher it. One of them looks like this: +=head1 Other information in the Unicode data base - \$utf8::SwashInfo{'ToNAME'}{'format'} = 's'; +The Unicode data base is delivered in two different formats. The XML version +is valid for more modern Unicode releases. The other version is a collection +of files. The two are intended to give equivalent information. Perl uses the +older form; this allows you to recompile Perl to use early Unicode releases. -where "NAME" is a name to indicate the property. For backwards compatibility, -this is not necessarily the property's official Unicode name. (The "To" is -also for backwards compatibility.) The hash entry gives the format of the -mapping fields of the table, currently one of the following: +The only non-character property that Perl currently supports is Named +Sequences, in which a sequence of code points +is given a name and generally treated as a single entity. (Perl supports +these via the C<\\N{...}> double-quotish construct, +L<charnames/charnames::string_vianame(name)>, and L<Unicode::UCD/namedseq()>. -@map_table_formats +Below is a list of the files in the Unicode data base that Perl doesn't +currently use, along with very brief descriptions of their purposes. +Some of the names of the files have been shortened from those that Unicode +uses, in order to allow them to be distinguishable from similarly named files +on file systems for which only the first 8 characters of a name are +significant. -This format applies only to the entries in the main body of the table. -Entries defined in hashes or ones that are missing from the list can have a -different format. - -The value that the missing entries have is given by another SwashInfo hash -entry line; it looks like this: - - \$utf8::SwashInfo{'ToNAME'}{'missing'} = 'NaN'; - -This example line says that any Unicode code points not explicitly listed in -the file have the value "NaN" under the property indicated by NAME. If the -value is the special string C<< <code point> >>, it means that the value for -any missing code point is the code point itself. This happens, for example, -in the file for Uppercase_Mapping (To/Upper.pl), in which code points like the -character "A", are missing because the uppercase of "A" is itself. +=over 4 -Finally, if the file contains a hash for special case entries, its name is -specified by an entry that looks like this: +@unused_files - \$utf8::SwashInfo{'ToNAME'}{'specials_name'} = 'utf8::ToSpecNAME'; +=back =head1 SEE ALSO @@ -13535,39 +14069,68 @@ sub make_Heavy () { # Create and write Heavy.pl, which passes info about the tables to # utf8_heavy.pl + # Stringify structures for output + my $loose_property_name_of + = simple_dumper(\%loose_property_name_of, ' ' x 4); + chomp $loose_property_name_of; + + my $stricter_to_file_of = simple_dumper(\%stricter_to_file_of, ' ' x 4); + chomp $stricter_to_file_of; + + my $loose_to_file_of = simple_dumper(\%loose_to_file_of, ' ' x 4); + chomp $loose_to_file_of; + + my $nv_floating_to_rational + = simple_dumper(\%nv_floating_to_rational, ' ' x 4); + chomp $nv_floating_to_rational; + + my $why_deprecated = simple_dumper(\%utf8::why_deprecated, ' ' x 4); + chomp $why_deprecated; + + # We set the key to the file when we associated files with tables, but we + # couldn't do the same for the value then, as we might not have the file + # for the alternate table figured out at that time. + foreach my $cased (keys %caseless_equivalent_to) { + my @path = $caseless_equivalent_to{$cased}->file_path; + my $path = join '/', @path[1, -1]; + $caseless_equivalent_to{$cased} = $path; + } + my $caseless_equivalent_to + = simple_dumper(\%caseless_equivalent_to, ' ' x 4); + chomp $caseless_equivalent_to; + + my $loose_property_to_file_of + = simple_dumper(\%loose_property_to_file_of, ' ' x 4); + chomp $loose_property_to_file_of; + + my $file_to_swash_name = simple_dumper(\%file_to_swash_name, ' ' x 4); + chomp $file_to_swash_name; + my @heavy = <<END; $HEADER -$INTERNAL_ONLY +$INTERNAL_ONLY_HEADER -# This file is for the use of utf8_heavy.pl +# This file is for the use of utf8_heavy.pl and Unicode::UCD -# Maps property names in loose standard form to its standard name +# Maps Unicode (not Perl single-form extensions) property names in loose +# standard form to their corresponding standard names \%utf8::loose_property_name_of = ( -END - - push @heavy, simple_dumper (\%loose_property_name_of, ' ' x 4); - push @heavy, <<END; +$loose_property_name_of ); # Maps property, table to file for those using stricter matching \%utf8::stricter_to_file_of = ( -END - push @heavy, simple_dumper (\%stricter_to_file_of, ' ' x 4); - push @heavy, <<END; +$stricter_to_file_of ); # Maps property, table to file for those using loose matching \%utf8::loose_to_file_of = ( -END - push @heavy, simple_dumper (\%loose_to_file_of, ' ' x 4); - push @heavy, <<END; +$loose_to_file_of ); # Maps floating point to fractional form \%utf8::nv_floating_to_rational = ( -END - push @heavy, simple_dumper (\%nv_floating_to_rational, ' ' x 4); - push @heavy, <<END; +$nv_floating_to_rational ); # If a floating point number doesn't have enough digits in it to get this @@ -13579,34 +14142,512 @@ END # the table, so as to avoid duplication, as many property names can map to the # file, but we only need one entry for all of them. \%utf8::why_deprecated = ( -END - - push @heavy, simple_dumper (\%utf8::why_deprecated, ' ' x 4); - push @heavy, <<END; +$why_deprecated ); -# A few properties have different behavior under /i matching. This maps the +# A few properties have different behavior under /i matching. This maps # those to substitute files to use under /i. \%utf8::caseless_equivalent = ( +$caseless_equivalent_to +); + +# Property names to mapping files +\%utf8::loose_property_to_file_of = ( +$loose_property_to_file_of +); + +# Files to the swash names within them. +\%utf8::file_to_swash_name = ( +$file_to_swash_name +); + +1; END + main::write("Heavy.pl", 0, \@heavy); # The 0 means no utf8. + return; +} - # We set the key to the file when we associated files with tables, but we - # couldn't do the same for the value then, as we might not have the file - # for the alternate table figured out at that time. - foreach my $cased (keys %caseless_equivalent_to) { - my @path = $caseless_equivalent_to{$cased}->file_path; - my $path = join '/', @path[1, -1]; - $utf8::caseless_equivalent_to{$cased} = $path; +sub make_Name_pm () { + # Create and write Name.pm, which contains subroutines and data to use in + # conjunction with Name.pl + + # Maybe there's nothing to do. + return unless $has_hangul_syllables || @code_points_ending_in_code_point; + + my @name = <<END; +$HEADER +$INTERNAL_ONLY_HEADER +END + + # Convert these structures to output format. + my $code_points_ending_in_code_point = + main::simple_dumper(\@code_points_ending_in_code_point, + ' ' x 8); + my $names = main::simple_dumper(\%names_ending_in_code_point, + ' ' x 8); + my $loose_names = main::simple_dumper(\%loose_names_ending_in_code_point, + ' ' x 8); + + # Do the same with the Hangul names, + my $jamo; + my $jamo_l; + my $jamo_v; + my $jamo_t; + my $jamo_re; + if ($has_hangul_syllables) { + + # Construct a regular expression of all the possible + # combinations of the Hangul syllables. + my @L_re; # Leading consonants + for my $i ($LBase .. $LBase + $LCount - 1) { + push @L_re, $Jamo{$i} + } + my @V_re; # Middle vowels + for my $i ($VBase .. $VBase + $VCount - 1) { + push @V_re, $Jamo{$i} + } + my @T_re; # Trailing consonants + for my $i ($TBase + 1 .. $TBase + $TCount - 1) { + push @T_re, $Jamo{$i} + } + + # The whole re is made up of the L V T combination. + $jamo_re = '(' + . join ('|', sort @L_re) + . ')(' + . join ('|', sort @V_re) + . ')(' + . join ('|', sort @T_re) + . ')?'; + + # These hashes needed by the algorithm were generated + # during reading of the Jamo.txt file + $jamo = main::simple_dumper(\%Jamo, ' ' x 8); + $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8); + $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8); + $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8); } - push @heavy, simple_dumper (\%utf8::caseless_equivalent_to, ' ' x 4); - push @heavy, <<END; + + push @name, <<END; + +# This module contains machine-generated tables and code for the +# algorithmically-determinable Unicode character names. The following +# routines can be used to translate between name and code point and vice versa + +{ # Closure + + # Matches legal code point. 4-6 hex numbers, If there are 6, the first + # two must be 10; if there are 5, the first must not be a 0. Written this + # way to decrease backtracking. The first regex allows the code point to + # be at the end of a word, but to work properly, the word shouldn't end + # with a valid hex character. The second one won't match a code point at + # the end of a word, and doesn't have the run-on issue + my \$run_on_code_point_re = qr/$run_on_code_point_re/; + my \$code_point_re = qr/$code_point_re/; + + # In the following hash, the keys are the bases of names which includes + # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The values + # of each key is another hash which is used to get the low and high ends + # for each range of code points that apply to the name. + my %names_ending_in_code_point = ( +$names + ); + + # The following hash is a copy of the previous one, except is for loose + # matching, so each name has blanks and dashes squeezed out + my %loose_names_ending_in_code_point = ( +$loose_names + ); + + # And the following array gives the inverse mapping from code points to + # names. Lowest code points are first + my \@code_points_ending_in_code_point = ( +$code_points_ending_in_code_point + ); +END + # Earlier releases didn't have Jamos. No sense outputting + # them unless will be used. + if ($has_hangul_syllables) { + push @name, <<END; + + # Convert from code point to Jamo short name for use in composing Hangul + # syllable names + my %Jamo = ( +$jamo + ); + + # Leading consonant (can be null) + my %Jamo_L = ( +$jamo_l + ); + + # Vowel + my %Jamo_V = ( +$jamo_v + ); + + # Optional trailing consonant + my %Jamo_T = ( +$jamo_t + ); + + # Computed re that splits up a Hangul name into LVT or LV syllables + my \$syllable_re = qr/$jamo_re/; + + my \$HANGUL_SYLLABLE = "HANGUL SYLLABLE "; + my \$loose_HANGUL_SYLLABLE = "HANGULSYLLABLE"; + + # These constants names and values were taken from the Unicode standard, + # version 5.1, section 3.12. They are used in conjunction with Hangul + # syllables + my \$SBase = $SBase_string; + my \$LBase = $LBase_string; + my \$VBase = $VBase_string; + my \$TBase = $TBase_string; + my \$SCount = $SCount; + my \$LCount = $LCount; + my \$VCount = $VCount; + my \$TCount = $TCount; + my \$NCount = \$VCount * \$TCount; +END + } # End of has Jamos + + push @name, << 'END'; + + sub name_to_code_point_special { + my ($name, $loose) = @_; + + # Returns undef if not one of the specially handled names; otherwise + # returns the code point equivalent to the input name + # $loose is non-zero if to use loose matching, 'name' in that case + # must be input as upper case with all blanks and dashes squeezed out. +END + if ($has_hangul_syllables) { + push @name, << 'END'; + + if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//) + || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//)) + { + return if $name !~ qr/^$syllable_re$/; + my $L = $Jamo_L{$1}; + my $V = $Jamo_V{$2}; + my $T = (defined $3) ? $Jamo_T{$3} : 0; + return ($L * $VCount + $V) * $TCount + $T + $SBase; + } +END + } + push @name, << 'END'; + + # Name must end in 'code_point' for this to handle. + return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x) + || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x)); + + my $base = $1; + my $code_point = CORE::hex $2; + my $names_ref; + + if ($loose) { + $names_ref = \%loose_names_ending_in_code_point; + } + else { + return if $base !~ s/-$//; + $names_ref = \%names_ending_in_code_point; + } + + # Name must be one of the ones which has the code point in it. + return if ! $names_ref->{$base}; + + # Look through the list of ranges that apply to this name to see if + # the code point is in one of them. + for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) { + return if $names_ref->{$base}{'low'}->[$i] > $code_point; + next if $names_ref->{$base}{'high'}->[$i] < $code_point; + + # Here, the code point is in the range. + return $code_point; + } + + # Here, looked like the name had a code point number in it, but + # did not match one of the valid ones. + return; + } + + sub code_point_to_name_special { + my $code_point = shift; + + # Returns the name of a code point if algorithmically determinable; + # undef if not +END + if ($has_hangul_syllables) { + push @name, << 'END'; + + # If in the Hangul range, calculate the name based on Unicode's + # algorithm + if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) { + use integer; + my $SIndex = $code_point - $SBase; + my $L = $LBase + $SIndex / $NCount; + my $V = $VBase + ($SIndex % $NCount) / $TCount; + my $T = $TBase + $SIndex % $TCount; + $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}"; + $name .= $Jamo{$T} if $T != $TBase; + return $name; + } +END + } + push @name, << 'END'; + + # Look through list of these code points for one in range. + foreach my $hash (@code_points_ending_in_code_point) { + return if $code_point < $hash->{'low'}; + if ($code_point <= $hash->{'high'}) { + return sprintf("%s-%04X", $hash->{'name'}, $code_point); + } + } + return; # None found + } +} # End closure + +1; +END + + main::write("Name.pm", 0, \@name); # The 0 means no utf8. + return; +} + +sub make_UCD () { + # Create and write UCD.pl, which passes info about the tables to + # Unicode::UCD + + # Create a mapping from each alias of Perl single-form extensions to all + # its equivalent aliases, for quick look-up. + my %perlprop_to_aliases; + foreach my $table ($perl->tables) { + + # First create the list of the aliases of each extension + my @aliases_list; # List of legal aliases for this extension + + my $table_name = $table->name; + my $standard_table_name = standardize($table_name); + my $table_full_name = $table->full_name; + my $standard_table_full_name = standardize($table_full_name); + + # Make sure that the list has both the short and full names + push @aliases_list, $table_name, $table_full_name; + + my $found_ucd = 0; # ? Did we actually get an alias that should be + # output for this table + + # Go through all the aliases (including the two just added), and add + # any new unique ones to the list + foreach my $alias ($table->aliases) { + + # Skip non-legal names + next unless $alias->ok_as_filename; + next unless $alias->ucd; + + $found_ucd = 1; # have at least one legal name + + my $name = $alias->name; + my $standard = standardize($name); + + # Don't repeat a name that is equivalent to one already on the + # list + next if $standard eq $standard_table_name; + next if $standard eq $standard_table_full_name; + + push @aliases_list, $name; + } + + # If there were no legal names, don't output anything. + next unless $found_ucd; + + # To conserve memory in the program reading these in, omit full names + # that are identical to the short name, when those are the only two + # aliases for the property. + if (@aliases_list == 2 && $aliases_list[0] eq $aliases_list[1]) { + pop @aliases_list; + } + + # Here, @aliases_list is the list of all the aliases that this + # extension legally has. Now can create a map to it from each legal + # standardized alias + foreach my $alias ($table->aliases) { + next unless $alias->ucd; + next unless $alias->ok_as_filename; + push @{$perlprop_to_aliases{standardize($alias->name)}}, + @aliases_list; + } + } + + # Make a list of all combinations of properties/values that are suppressed. + my @suppressed; + foreach my $property_name (keys %why_suppressed) { + + # Just the value + my $value_name = $1 if $property_name =~ s/ = ( .* ) //x; + + # The hash may contain properties not in this release of Unicode + next unless defined (my $property = property_ref($property_name)); + + # Find all combinations + foreach my $prop_alias ($property->aliases) { + my $prop_alias_name = standardize($prop_alias->name); + + # If no =value, there's just one combination possibe for this + if (! $value_name) { + + # The property may be suppressed, but there may be a proxy for + # it, so it shouldn't be listed as suppressed + next if $prop_alias->ucd; + push @suppressed, $prop_alias_name; + } + else { # Otherwise + foreach my $value_alias ($property->table($value_name)->aliases) + { + next if $value_alias->ucd; + + push @suppressed, "$prop_alias_name=" + . standardize($value_alias->name); + } + } + } + } + + # Convert the structure below (designed for Name.pm) to a form that UCD + # wants, so it doesn't have to modify it at all; i.e. so that it includes + # an element for the Hangul syllables in the appropriate place, and + # otherwise changes the name to include the "-<code point>" suffix. + my @algorithm_names; + my $done_hangul = 0; + + # Copy it linearly. + for my $i (0 .. @code_points_ending_in_code_point - 1) { + + # Insert the hanguls in the correct place. + if (! $done_hangul + && $code_points_ending_in_code_point[$i]->{'low'} > $SBase) + { + $done_hangul = 1; + push @algorithm_names, { low => $SBase, + high => $SBase + $SCount - 1, + name => '<hangul syllable>', + }; + } + + # Copy the current entry, modified. + push @algorithm_names, { + low => $code_points_ending_in_code_point[$i]->{'low'}, + high => $code_points_ending_in_code_point[$i]->{'high'}, + name => + "$code_points_ending_in_code_point[$i]->{'name'}-<code point>", + }; + } + + # Serialize these structures for output. + my $loose_to_standard_value + = simple_dumper(\%loose_to_standard_value, ' ' x 4); + chomp $loose_to_standard_value; + + my $string_property_loose_to_name + = simple_dumper(\%string_property_loose_to_name, ' ' x 4); + chomp $string_property_loose_to_name; + + my $perlprop_to_aliases = simple_dumper(\%perlprop_to_aliases, ' ' x 4); + chomp $perlprop_to_aliases; + + my $prop_aliases = simple_dumper(\%prop_aliases, ' ' x 4); + chomp $prop_aliases; + + my $prop_value_aliases = simple_dumper(\%prop_value_aliases, ' ' x 4); + chomp $prop_value_aliases; + + my $suppressed = (@suppressed) ? simple_dumper(\@suppressed, ' ' x 4) : ""; + chomp $suppressed; + + my $algorithm_names = simple_dumper(\@algorithm_names, ' ' x 4); + chomp $algorithm_names; + + my $ambiguous_names = simple_dumper(\%ambiguous_names, ' ' x 4); + chomp $ambiguous_names; + + my $loose_defaults = simple_dumper(\%loose_defaults, ' ' x 4); + chomp $loose_defaults; + + my @ucd = <<END; +$HEADER +$INTERNAL_ONLY_HEADER + +# This file is for the use of Unicode::UCD + +# Highest legal Unicode code point +\$Unicode::UCD::MAX_UNICODE_CODEPOINT = 0x$MAX_UNICODE_CODEPOINT_STRING; + +# Hangul syllables +\$Unicode::UCD::HANGUL_BEGIN = $SBase_string; +\$Unicode::UCD::HANGUL_COUNT = $SCount; + +# Keys are all the possible "prop=value" combinations, in loose form; values +# are the standard loose name for the 'value' part of the key +\%Unicode::UCD::loose_to_standard_value = ( +$loose_to_standard_value +); + +# String property loose names to standard loose name +\%Unicode::UCD::string_property_loose_to_name = ( +$string_property_loose_to_name +); + +# Keys are Perl extensions in loose form; values are each one's list of +# aliases +\%Unicode::UCD::loose_perlprop_to_name = ( +$perlprop_to_aliases +); + +# Keys are standard property name; values are each one's aliases +\%Unicode::UCD::prop_aliases = ( +$prop_aliases +); + +# Keys of top level are standard property name; values are keys to another +# hash, Each one is one of the property's values, in standard form. The +# values are that prop-val's aliases. If only one specified, the short and +# long alias are identical. +\%Unicode::UCD::prop_value_aliases = ( +$prop_value_aliases +); + +# Ordered (by code point ordinal) list of the ranges of code points whose +# names are algorithmically determined. Each range entry is an anonymous hash +# of the start and end points and a template for the names within it. +\@Unicode::UCD::algorithmic_named_code_points = ( +$algorithm_names +); + +# The properties that as-is have two meanings, and which must be disambiguated +\%Unicode::UCD::ambiguous_names = ( +$ambiguous_names +); + +# Keys are the prop-val combinations which are the default values for the +# given property, expressed in standard loose form +\%Unicode::UCD::loose_defaults = ( +$loose_defaults +); + +# All combinations of names that are suppressed. +# This is actually for UCD.t, so it knows which properties shouldn't have +# entries. If it got any bigger, would probably want to put it in its own +# file to use memory only when it was needed, in testing. +\@Unicode::UCD::suppressed_properties = ( +$suppressed ); 1; END - main::write("Heavy.pl", 0, \@heavy); # The 0 means no utf8. + main::write("UCD.pl", 0, \@ucd); # The 0 means no utf8. return; } @@ -13660,6 +14701,11 @@ sub write_all_tables() { return 1 if $a->complement != 0; return -1 if $b->complement != 0; + # Similarly, return a subservient table after + # a leader + return 1 if $a->leader != $a; + return -1 if $b->leader != $b; + my $cmp = length $ext_a <=> length $ext_b; # Return result if lengths not equal @@ -13681,8 +14727,8 @@ sub write_all_tables() { # See if should suppress the table if is empty, but warn if it # contains something. - my $suppress_if_empty_warn_if_not = grep { $complete_name eq $_ } - keys %why_suppress_if_empty_warn_if_not; + my $suppress_if_empty_warn_if_not + = $why_suppress_if_empty_warn_if_not{$complete_name} || 0; # Calculate if this table should have any code points associated # with it or not. @@ -13721,7 +14767,7 @@ sub write_all_tables() { ($is_property) ? # All these types of map tables will be full because # they will have been populated with defaults - ($type == $ENUM || $type == $BINARY) + ($type == $ENUM || $type == $FORCED_BINARY) : # A match table should match everything if its method # shows it should @@ -13735,13 +14781,12 @@ sub write_all_tables() { if ($table->is_empty) { - if ($suppress_if_empty_warn_if_not) { - $table->set_status($SUPPRESSED, - $why_suppress_if_empty_warn_if_not{$complete_name}); + $table->set_fate($SUPPRESSED, + $suppress_if_empty_warn_if_not); } - # Suppress expected empty tables. + # Suppress (by skipping them) expected empty tables. next TABLE if $expected_empty; # And setup to later output a warning for those that aren't @@ -13749,7 +14794,8 @@ sub write_all_tables() { # this table is a child of another one to avoid duplicating # the warning that should come from the parent one. if (($table == $property || $table->parent == $table) - && $table->status ne $SUPPRESSED + && $table->fate != $SUPPRESSED + && $table->fate != $MAP_PROXIED && ! grep { $complete_name =~ /^$_$/ } @tables_that_may_be_empty) { @@ -13762,7 +14808,7 @@ sub write_all_tables() { elsif ($expected_empty) { my $because = ""; if ($suppress_if_empty_warn_if_not) { - $because = " because $why_suppress_if_empty_warn_if_not{$complete_name}"; + $because = " because $suppress_if_empty_warn_if_not"; } Carp::my_carp("Not expecting property $table$because. Generating file for it anyway."); @@ -13799,11 +14845,11 @@ sub write_all_tables() { } } - if ($table->status eq $SUPPRESSED) { + if ($table->fate == $SUPPRESSED) { if (! $is_property) { my @children = $table->children; foreach my $child (@children) { - if ($child->status ne $SUPPRESSED) { + if ($child->fate != $SUPPRESSED) { Carp::my_carp_bug("'$table' is suppressed and has a child '$child' which isn't"); } } @@ -13811,15 +14857,18 @@ sub write_all_tables() { next TABLE; } + if (! $is_property) { + make_ucd_table_pod_entries($table) if $table->property == $perl; + # Several things need to be done just once for each related # group of match tables. Do them on the parent. if ($table->parent == $table) { # Add an entry in the pod file for the table; it also does # the children. - make_table_pod_entries($table) if defined $pod_directory; + make_re_pod_entries($table) if defined $pod_directory; # See if the the table matches identical code points with # something that has already been output. In that case, @@ -13831,21 +14880,27 @@ sub write_all_tables() { # have to have the same status to share a file, so add # this to the bucket hash. (The reason for this latter is # that Heavy.pl associates a status with a file.) - my $hash = $table->hash . ';' . $table->status; - - # Look at each table that is in the same bucket as this - # one would be. - foreach my $comparison (@{$match_tables_to_write{$hash}}) - { - if ($table->matches_identically_to($comparison)) { - $table->set_equivalent_to($comparison, + # We don't check tables that are inverses of others, as it + # would lead to some coding complications, and checking + # all the regular ones should find everything. + if ($table->complement == 0) { + my $hash = $table->hash . ';' . $table->status; + + # Look at each table that is in the same bucket as + # this one would be. + foreach my $comparison + (@{$match_tables_to_write{$hash}}) + { + if ($table->matches_identically_to($comparison)) { + $table->set_equivalent_to($comparison, Related => 0); - next TABLE; + next TABLE; + } } - } - # Here, not equivalent, add this table to the bucket. - push @{$match_tables_to_write{$hash}}, $table; + # Here, not equivalent, add this table to the bucket. + push @{$match_tables_to_write{$hash}}, $table; + } } } else { @@ -13854,30 +14909,57 @@ sub write_all_tables() { # Don't write out or make references to the $perl property next if $table == $perl; - if ($type != $STRING) { - - # There is a mapping stored of the various synonyms to the - # standardized name of the property for utf8_heavy.pl. - # Also, the pod file contains entries of the form: - # \p{alias: *} \p{full: *} - # rather than show every possible combination of things. + make_ucd_table_pod_entries($table); + + # There is a mapping stored of the various synonyms to the + # standardized name of the property for utf8_heavy.pl. + # Also, the pod file contains entries of the form: + # \p{alias: *} \p{full: *} + # rather than show every possible combination of things. + + my @property_aliases = $property->aliases; + + my $full_property_name = $property->full_name; + my $property_name = $property->name; + my $standard_property_name = standardize($property_name); + my $standard_property_full_name + = standardize($full_property_name); + + # We also create for Unicode::UCD a list of aliases for + # the property. The list starts with the property name; + # then its full name. + my @property_list; + my @standard_list; + if ( $property->fate <= $MAP_PROXIED) { + @property_list = ($property_name, $full_property_name); + @standard_list = ($standard_property_name, + $standard_property_full_name); + } - my @property_aliases = $property->aliases; + # For each synonym ... + for my $i (0 .. @property_aliases - 1) { + my $alias = $property_aliases[$i]; + my $alias_name = $alias->name; + my $alias_standard = standardize($alias_name); - # The full name of this property is stored by convention - # first in the alias array - my $full_property_name = - '\p{' . $property_aliases[0]->name . ': *}'; - my $standard_property_name = standardize($table->name); - # For each synonym ... - for my $i (0 .. @property_aliases - 1) { - my $alias = $property_aliases[$i]; - my $alias_name = $alias->name; - my $alias_standard = standardize($alias_name); + # Add other aliases to the list of property aliases + if ($property->fate <= $MAP_PROXIED + && ! grep { $alias_standard eq $_ } @standard_list) + { + push @property_list, $alias_name; + push @standard_list, $alias_standard; + } - # Set the mapping for utf8_heavy of the alias to the - # property + # For utf8_heavy, set the mapping of the alias to the + # property + if ($type == $STRING) { + if ($property->fate <= $MAP_PROXIED) { + $string_property_loose_to_name{$alias_standard} + = $standard_property_name; + } + } + else { if (exists ($loose_property_name_of{$alias_standard})) { Carp::my_carp("There already is a property with the same standard name as $alias_name: $loose_property_name_of{$alias_standard}. Old name is retained"); @@ -13887,16 +14969,16 @@ sub write_all_tables() { = $standard_property_name; } - # Now for the pod entry for this alias. Skip if not + # Now for the re pod entry for this alias. Skip if not # outputting a pod; skip the first one, which is the # full name so won't have an entry like: '\p{full: *} # \p{full: *}', and skip if don't want an entry for # this one. next if $i == 0 || ! defined $pod_directory - || ! $alias->make_pod_entry; + || ! $alias->make_re_pod_entry; - my $rhs = $full_property_name; + my $rhs = "\\p{$full_property_name: *}"; if ($property != $perl && $table->perl_extension) { $rhs .= ' (Perl extension)'; } @@ -13906,10 +14988,69 @@ sub write_all_tables() { $rhs, $alias->status); } - } # End of non-string-like property code + } + + # The list of all possible names is attached to each alias, so + # lookup is easy + if (@property_list) { + push @{$prop_aliases{$standard_list[0]}}, @property_list; + } + + if ($property->fate <= $MAP_PROXIED) { + + # Similarly, we create for Unicode::UCD a list of + # property-value aliases. + + my $property_full_name = $property->full_name; + + # Look at each table in the property... + foreach my $table ($property->tables) { + my @values_list; + my $table_full_name = $table->full_name; + my $standard_table_full_name + = standardize($table_full_name); + my $table_name = $table->name; + my $standard_table_name = standardize($table_name); + + # The list starts with the table name and its full + # name. + push @values_list, $table_name, $table_full_name; + + # We add to the table each unique alias that isn't + # discouraged from use. + foreach my $alias ($table->aliases) { + next if $alias->status + && $alias->status eq $DISCOURAGED; + my $name = $alias->name; + my $standard = standardize($name); + next if $standard eq $standard_table_name; + next if $standard eq $standard_table_full_name; + push @values_list, $name; + } + + # Here @values_list is a list of all the aliases for + # the table. That is, all the property-values given + # by this table. By agreement with Unicode::UCD, + # if the name and full name are identical, and there + # are no other names, drop the duplcate entry to save + # memory. + if (@values_list == 2 + && $values_list[0] eq $values_list[1]) + { + pop @values_list + } + # To save memory, unlike the similar list for property + # aliases above, only the standard forms hve the list. + # This forces an extra step of converting from input + # name to standard name, but the savings are + # considerable. (There is only marginal savings if we + # did this with the property aliases.) + push @{$prop_value_aliases{$standard_property_name}{$standard_table_name}}, @values_list; + } + } - # Don't output a mapping file if not desired. + # Don't write out a mapping file if not desired. next if ! $property->to_output_map; } @@ -13968,8 +15109,10 @@ sub write_all_tables() { # Write out the pod file make_pod; - # And Heavy.pl + # And Heavy.pl, Name.pm, UCD.pl make_Heavy; + make_Name_pm; + make_UCD; make_property_test_script() if $make_test_script; return; @@ -14278,9 +15421,11 @@ sub make_property_test_script() { # pre-existing one. push @property_aliases, map { Alias->new("Is_" . $_->name, $_->loose_match, - $_->make_pod_entry, - $_->externally_ok, - $_->status) + $_->make_re_pod_entry, + $_->ok_as_filename, + $_->status, + $_->ucd, + ) } @property_aliases; my $max = max(scalar @table_aliases, scalar @property_aliases); for my $j (0 .. $max - 1) { @@ -14570,7 +15715,7 @@ my @input_file_objects = ( Property => 'Bidi_Mirroring_Glyph', ), Input_file->new("NormalizationTest.txt", v3.0.1, - Skip => 1, + Skip => 'Validation Tests', ), Input_file->new('CaseFolding.txt', v3.0.1, Pre_Handler => \&setup_case_folding, @@ -14612,13 +15757,13 @@ my @input_file_objects = ( Handler => \&process_GCB_test, ), Input_file->new("$AUXILIARY/LBTest.txt", v4.1.0, - Skip => 1, + Skip => 'Validation Tests', ), Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0, - Skip => 1, + Skip => 'Validation Tests', ), Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0, - Skip => 1, + Skip => 'Validation Tests', ), Input_file->new("$AUXILIARY/SentenceBreakProperty.txt", v4.1.0, Property => 'Sentence_Break', @@ -14629,9 +15774,12 @@ my @input_file_objects = ( ), Input_file->new('NameAliases.txt', v5.0.0, Property => 'Name_Alias', + Pre_Handler => ($v_version ge v6.0.0) + ? \&setup_v6_name_alias + : undef, ), Input_file->new("BidiTest.txt", v5.2.0, - Skip => 1, + Skip => 'Validation Tests', ), Input_file->new('UnihanIndicesDictionary.txt', v5.2.0, Optional => 1, @@ -14669,6 +15817,7 @@ my @input_file_objects = ( Input_file->new('ScriptExtensions.txt', v6.0.0, Property => 'Script_Extensions', Pre_Handler => \&setup_script_extensions, + Each_Line_Handler => \&filter_script_extensions_line, ), ); diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index dc8211cb16..08d901ff69 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -3,6 +3,7 @@ use strict; use warnings; sub DEBUG () { 0 } +$|=1 if DEBUG; sub DESTROY {} @@ -10,7 +11,7 @@ my %Cache; sub croak { require Carp; Carp::croak(@_) } -sub loose_name ($) { +sub _loose_name ($) { # Given a lowercase property or property-value name, return its # standardized version that is expected for look-up in the 'loose' hashes # in Heavy.pl (hence, this depends on what mktables does). This squeezes @@ -19,7 +20,7 @@ sub loose_name ($) { my $loose = $_[0] =~ s/[-\s_]//rg; - return $loose if $loose !~ / ^ (?: is )? l $/x; + return $loose if $loose !~ / ^ (?: is | to )? l $/x; return 'l_' if $_[0] =~ / l .* _ /x; # If original had a trailing '_' return $loose; } @@ -43,6 +44,7 @@ sub loose_name ($) { sub SWASHNEW { my ($class, $type, $list, $minbits, $none) = @_; + my $user_defined = 0; local $^D = 0 if $^D; $class = "" unless defined $class; @@ -59,6 +61,8 @@ sub loose_name ($) { ## regexec.c:regclass_swash -- for /[]/, \p, and \P ## utf8.c:is_utf8_common -- for common Unicode properties ## utf8.c:to_utf8_case -- for lc, uc, ucfirst, etc. and //i + ## Unicode::UCD::prop_invlist + ## Unicode::UCD::prop_invmap ## ## Given a $type, our goal is to fill $list with the set of codepoint ## ranges. If $type is false, $list passed is used. @@ -90,6 +94,8 @@ sub loose_name ($) { # same meanings as the input parameters. # SPECIALS contains a reference to any special-treatment hash in the # INVERT_IT is non-zero if the result should be inverted before use + # USER_DEFINED is non-zero if the result came from a user-defined + # property. my $file; ## file to load data from, and also part of the %Cache key. my $ListSorted = 0; @@ -142,6 +148,7 @@ sub loose_name ($) { if $tainted; no strict 'refs'; $list = &{$prop}($caseless); + $user_defined = 1; last GETFILE; } } @@ -193,7 +200,7 @@ sub loose_name ($) { # name is always loosely matched, and always can have an # optional 'is' prefix (which isn't true in the single # form). - $property = loose_name($property) =~ s/^is//r; + $property = _loose_name($property) =~ s/^is//r; # And convert to canonical form. Quit if not valid. $property = $utf8::loose_property_name_of{$property}; @@ -385,7 +392,7 @@ sub loose_name ($) { # out the applicable characters on the rhs and looking up # again. if (! defined $file) { - $table = loose_name($table); + $table = _loose_name($table); $property_and_table = "$prefix$table"; print STDERR __LINE__, ": $property_and_table\n" if DEBUG; $file = $utf8::loose_to_file_of{$property_and_table}; @@ -417,17 +424,61 @@ sub loose_name ($) { ## The user-level way to access ToDigit() and ToFold() ## is to use Unicode::UCD. ## - if ($type =~ /^To(Digit|Fold|Lower|Title|Upper)$/) { + # Only check if caller wants non-binary + my $retried = 0; + if ($minbits != 1 && $property_and_table =~ s/^to//) {{ + # Look input up in list of properties for which we have + # mapping files. + if (defined ($file = + $utf8::loose_property_to_file_of{$property_and_table})) + { + $type = $utf8::file_to_swash_name{$file}; + print STDERR __LINE__, ": type set to $type\n" if DEBUG; + $file = "$unicore_dir/$file.pl"; + last GETFILE; + } # If that fails see if there is a corresponding binary + # property file + elsif (defined ($file = + $utf8::loose_to_file_of{$property_and_table})) + { - # Fail if wanting a binary property, as these aren't. - if ($minbits == 1) { - pop @recursed if @recursed; - return $type; + # Here, there is no map file for the property we are + # trying to get the map of, but this is a binary + # property, and there is a file for it that can easily + # be translated to a mapping. + + # In the case of properties that are forced to binary, + # they are a combination. We return the actual + # mapping instead of the binary. If the input is + # something like 'Tocjkkiicore', it will be found in + # %loose_property_to_file_of above as => 'To/kIICore'. + # But the form like ToIskiicore won't be. To fix + # this, it was easiest to do it here. These + # properties are the complements of the default + # property, so there is an entry in %loose_to_file_of + # that is 'iskiicore' => '!kIICore/N', If we find such + # an entry, strip off things and try again, which + # should find the entry in %loose_property_to_file_of. + # Actual binary properties that are of this form, such + # as this entry: 'ishrkt' => '!Perl/Any' will also be + # retried, but won't be in %loose_property_to_file_of, + # and instead the next time through, it will find + # 'hrkt' => '!Perl/Any' and proceed. + redo if ! $retried + && $file =~ /^!/ + && $property_and_table =~ s/^is//; + + # This is a binary property. Setting this here causes + # it to be stored as such in the cache, so if someone + # comes along later looking for just a binary, they + # get it. + $minbits = 1; + + $invert_it = $file =~ s/^!//; + $file = "$unicore_dir/lib/$file.pl"; + last GETFILE; } - $file = "$unicore_dir/To/$1.pl"; - ## would like to test to see if $file actually exists.... - last GETFILE; - } + } } ## ## If we reach this line, it's because we couldn't figure @@ -436,7 +487,7 @@ sub loose_name ($) { pop @recursed if @recursed; return $type; - } + } # end of GETFILE block if (defined $file) { print STDERR __LINE__, ": found it (file='$file')\n" if DEBUG; @@ -462,9 +513,13 @@ sub loose_name ($) { } $ListSorted = 1; ## we know that these lists are sorted - } + } # End of $type is non-null + + # Here, either $type was null, or we found the requested property and + # read it into $list my $extras; + my $bits = $minbits; if ($list) { @@ -528,13 +583,15 @@ sub loose_name ($) { } push @extras, $name => $subobj; $bits = $subobj->{BITS} if $bits < $subobj->{BITS}; + $user_defined = $subobj->{USER_DEFINED} + if $subobj->{USER_DEFINED}; } } } } if (DEBUG) { - print STDERR __LINE__, ": CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none, INVERT_IT => $invert_it"; + print STDERR __LINE__, ": CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none, INVERT_IT => $invert_it, USER_DEFINED => $user_defined"; print STDERR "\nLIST =>\n$list" if defined $list; print STDERR "\nEXTRAS =>\n$extras" if defined $extras; print STDERR "\n"; @@ -546,6 +603,7 @@ sub loose_name ($) { EXTRAS => $extras, LIST => $list, NONE => $none, + USER_DEFINED => $user_defined, @extras, } => $class; @@ -557,7 +615,7 @@ sub loose_name ($) { { my $specials_name = $utf8::SwashInfo{$type}{'specials_name'}; no strict "refs"; - print STDERR "\nspecials_name => $SWASH->{'SPECIALS'}\n" if DEBUG; + print STDERR "\nspecials_name => $specials_name\n" if DEBUG; $SWASH->{'SPECIALS'} = \%$specials_name; } $SWASH->{'INVERT_IT'} = $invert_it; @@ -2820,7 +2820,7 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) (void)setregid((Gid_t)PL_gid, (Gid_t)-1); #else #ifdef HAS_SETRESGID - (void)setresgid((Gid_t)PL_gid, (Gid_t)-1, (Gid_t) 1); + (void)setresgid((Gid_t)PL_gid, (Gid_t)-1, (Gid_t) -1); #else if (PL_gid == PL_egid) /* special case $( = $) */ (void)PerlProc_setgid(PL_gid); @@ -550,7 +550,7 @@ Perl_mro_isa_changed_in(pTHX_ HV* stash) /* We have to iterate through isarev twice to avoid a chicken and * egg problem: if A inherits from B and both are in isarev, A might - * be processed before B and use B’s previous linearisation. + * be processed before B and use B's previous linearisation. */ /* First iteration: Wipe everything, but stash away the isa hashes @@ -673,7 +673,7 @@ Perl_mro_isa_changed_in(pTHX_ HV* stash) stashname_utf8 ? -(I32)stashname_len : (I32)stashname_len, &PL_sv_yes, 0); } - /* Delete our name from our former parents’ isarevs. */ + /* Delete our name from our former parents' isarevs. */ if(isa && HvARRAY(isa)) mro_clean_isarev(isa, stashname, stashname_len, meta->isa, (stashname_utf8 ? SVf_UTF8 : 0) ); @@ -688,7 +688,7 @@ S_mro_clean_isarev(pTHX_ HV * const isa, const char * const name, PERL_ARGS_ASSERT_MRO_CLEAN_ISAREV; - /* Delete our name from our former parents’ isarevs. */ + /* Delete our name from our former parents' isarevs. */ if(isa && HvARRAY(isa) && hv_iterinit(isa)) { SV **svp; while((iter = hv_iternext(isa))) { @@ -876,7 +876,7 @@ Perl_mro_package_moved(pTHX_ HV * const stash, HV * const oldstash, } } -void +STATIC void S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes, HV *stash, HV *oldstash, SV *namesv) { @@ -959,7 +959,7 @@ S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes, * are not going to call mro_isa_changed_in with this * name (and not at all if it has become anonymous) so * we need to delete old isarev entries here, both - * those in the superclasses and this class’s own list + * those in the superclasses and this class's own list * of subclasses. We simply delete the latter from * PL_isarev, since we still need it. hv_delete morti- * fies it for us, so sv_2mortal is not necessary. */ @@ -994,7 +994,7 @@ S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes, /* Add it to the big list if it needs * mro_isa_changed_in called on it. That happens if it was * detached from the symbol table (so it had no HvENAME) before - * being assigned to the spot named by the ‘name’ variable, because + * being assigned to the spot named by the `name' variable, because * its cached isa linearisation is now stale (the effective name * having changed), and subclasses will then use that cache when * mro_package_moved calls mro_isa_changed_in. (See @@ -2558,11 +2558,26 @@ Perl_bind_match(pTHX_ I32 type, OP *left, OP *right) || rtype == OP_TRANSR ) ? (int)rtype : OP_MATCH]; - const char * const sample = ((ltype == OP_RV2AV || ltype == OP_PADAV) + const bool isary = ltype == OP_RV2AV || ltype == OP_PADAV; + GV *gv; + SV * const name = + (ltype == OP_RV2AV || ltype == OP_RV2HV) + ? cUNOPx(left)->op_first->op_type == OP_GV + && (gv = cGVOPx_gv(cUNOPx(left)->op_first)) + ? varname(gv, isary ? '@' : '%', 0, NULL, 0, 1) + : NULL + : varname(NULL, isary ? '@' : '%', left->op_targ, NULL, 0, 1); + if (name) + Perl_warner(aTHX_ packWARN(WARN_MISC), + "Applying %s to %"SVf" will act on scalar(%"SVf")", + desc, name, name); + else { + const char * const sample = (isary ? "@array" : "%hash"); - Perl_warner(aTHX_ packWARN(WARN_MISC), + Perl_warner(aTHX_ packWARN(WARN_MISC), "Applying %s to %s will act on scalar(%s)", desc, sample, sample); + } } if (rtype == OP_CONST && @@ -3598,6 +3613,11 @@ Perl_newOP(pTHX_ I32 type, I32 flags) dVAR; OP *o; + if (type == -OP_ENTEREVAL) { + type = OP_ENTEREVAL; + flags |= OPpEVAL_BYTES<<8; + } + assert((PL_opargs[type] & OA_CLASS_MASK) == OA_BASEOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_BASEOP_OR_UNOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_FILESTATOP @@ -3640,6 +3660,11 @@ Perl_newUNOP(pTHX_ I32 type, I32 flags, OP *first) dVAR; UNOP *unop; + if (type == -OP_ENTEREVAL) { + type = OP_ENTEREVAL; + flags |= OPpEVAL_BYTES<<8; + } + assert((PL_opargs[type] & OA_CLASS_MASK) == OA_UNOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_BASEOP_OR_UNOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_FILESTATOP @@ -6448,7 +6473,8 @@ Perl_newATTRSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) = (block || attrs || (CvFLAGS(PL_compcv) & CVf_BUILTIN_ATTRS) || PL_madskills) ? GV_ADDMULTI : GV_ADDMULTI | GV_NOINIT; - const char * const name = o ? SvPV_nolen_const(cSVOPo->op_sv) : NULL; + STRLEN namlen = 0; + const char * const name = o ? SvPV_const(cSVOPo->op_sv, namlen) : NULL; bool has_name; bool name_is_utf8 = o ? (SvUTF8(cSVOPo->op_sv) ? 1 : 0) : 0; @@ -6594,7 +6620,10 @@ Perl_newATTRSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) } else { GvCV_set(gv, NULL); - cv = newCONSTSUB_flags(NULL, name, name_is_utf8 ? SVf_UTF8 : 0, const_sv); + cv = newCONSTSUB_flags( + NULL, name, namlen, name_is_utf8 ? SVf_UTF8 : 0, + const_sv + ); } stash = (CvGV(cv) && GvSTASH(CvGV(cv))) @@ -6801,13 +6830,13 @@ S_process_special_blocks(pTHX_ const char *const fullname, GV *const gv, ENTER; SAVECOPFILE(&PL_compiling); SAVECOPLINE(&PL_compiling); + SAVEVPTR(PL_curcop); DEBUG_x( dump_sub(gv) ); Perl_av_create_and_push(aTHX_ &PL_beginav, MUTABLE_SV(cv)); GvCV_set(gv,0); /* cv has been hijacked */ call_list(oldscope, PL_beginav); - PL_curcop = &PL_compiling; CopHINTS_set(&PL_compiling, PL_hints); LEAVE; } @@ -6863,7 +6892,7 @@ See L</newCONSTSUB_flags>. CV * Perl_newCONSTSUB(pTHX_ HV *stash, const char *name, SV *sv) { - return newCONSTSUB_flags(stash, name, 0, sv); + return newCONSTSUB_flags(stash, name, name ? strlen(name) : 0, 0, sv); } /* @@ -6883,7 +6912,8 @@ compile time.) */ CV * -Perl_newCONSTSUB_flags(pTHX_ HV *stash, const char *name, U32 flags, SV *sv) +Perl_newCONSTSUB_flags(pTHX_ HV *stash, const char *name, STRLEN len, + U32 flags, SV *sv) { dVAR; CV* cv; @@ -6920,7 +6950,7 @@ Perl_newCONSTSUB_flags(pTHX_ HV *stash, const char *name, U32 flags, SV *sv) and so doesn't get free()d. (It's expected to be from the C pre- processor __FILE__ directive). But we need a dynamically allocated one, and we need it to get freed. */ - cv = newXS_flags(name, const_sv_xsub, file ? file : "", "", + cv = newXS_len_flags(name, len, const_sv_xsub, file ? file : "", "", XS_DYNAMIC_FILENAME | flags); CvXSUBANY(cv).any_ptr = sv; CvCONST_on(cv); @@ -6939,12 +6969,27 @@ Perl_newXS_flags(pTHX_ const char *name, XSUBADDR_t subaddr, const char *const filename, const char *const proto, U32 flags) { + PERL_ARGS_ASSERT_NEWXS_FLAGS; + return newXS_len_flags( + name, name ? strlen(name) : 0, subaddr, filename, proto, flags + ); +} + +CV * +Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, + XSUBADDR_t subaddr, const char *const filename, + const char *const proto, U32 flags) +{ CV *cv; - PERL_ARGS_ASSERT_NEWXS_FLAGS; + PERL_ARGS_ASSERT_NEWXS_LEN_FLAGS; { - GV * const gv = gv_fetchpv(name ? name : + GV * const gv = name + ? gv_fetchpvn( + name,len,GV_ADDMULTI|flags,SVt_PVCV + ) + : gv_fetchpv( (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"), GV_ADDMULTI | flags, SVt_PVCV); @@ -7284,6 +7329,32 @@ Perl_ck_bitop(pTHX_ OP *o) return o; } +PERL_STATIC_INLINE bool +is_dollar_bracket(pTHX_ const OP * const o) +{ + const OP *kid; + return o->op_type == OP_RV2SV && o->op_flags & OPf_KIDS + && (kid = cUNOPx(o)->op_first) + && kid->op_type == OP_GV + && strEQ(GvNAME(cGVOPx_gv(kid)), "["); +} + +OP * +Perl_ck_cmp(pTHX_ OP *o) +{ + PERL_ARGS_ASSERT_CK_CMP; + if (ckWARN(WARN_SYNTAX)) { + const OP *kid = cUNOPo->op_first; + if (kid && ( + is_dollar_bracket(aTHX_ kid) + || ((kid = kid->op_sibling) && is_dollar_bracket(aTHX_ kid)) + )) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "$[ used in %s (did you mean $] ?)", OP_DESC(o)); + } + return o; +} + OP * Perl_ck_concat(pTHX_ OP *o) { @@ -7443,21 +7514,27 @@ Perl_ck_eval(pTHX_ OP *o) } } else { + const U8 priv = o->op_private; #ifdef PERL_MAD OP* const oldo = o; #else op_free(o); #endif - o = newUNOP(OP_ENTEREVAL, 0, newDEFSVOP()); + o = newUNOP(OP_ENTEREVAL, priv <<8, newDEFSVOP()); op_getmad(oldo,o,'O'); } o->op_targ = (PADOFFSET)PL_hints; - if ((PL_hints & HINT_LOCALIZE_HH) != 0 && GvHV(PL_hintgv)) { + if ((PL_hints & HINT_LOCALIZE_HH) != 0 + && !(o->op_private & OPpEVAL_COPHH) && GvHV(PL_hintgv)) { /* Store a copy of %^H that pp_entereval can pick up. */ OP *hhop = newSVOP(OP_HINTSEVAL, 0, MUTABLE_SV(hv_copy_hints_hv(GvHV(PL_hintgv)))); cUNOPo->op_first->op_sibling = hhop; o->op_private |= OPpEVAL_HAS_HH; + + if (!(o->op_private & OPpEVAL_BYTES) + && FEATURE_IS_ENABLED("unieval")) + o->op_private |= OPpEVAL_UNICODE; } return o; } @@ -8531,10 +8608,14 @@ Perl_ck_require(pTHX_ OP *o) } if (gv && GvCVu(gv) && GvIMPORTED_CV(gv)) { - OP * const kid = cUNOPo->op_first; - OP * newop; - - cUNOPo->op_first = 0; + OP *kid, *newop; + if (o->op_flags & OPf_KIDS) { + kid = cUNOPo->op_first; + cUNOPo->op_first = NULL; + } + else { + kid = newDEFSVOP(); + } #ifndef PERL_MAD op_free(o); #endif @@ -9328,7 +9409,7 @@ Perl_ck_entersub_args_core(pTHX_ OP *entersubop, GV *namegv, SV *protosv) } else { OP *prev, *cvop; - U32 paren; + U32 flags; #ifdef PERL_MAD bool seenarg = FALSE; #endif @@ -9347,16 +9428,20 @@ Perl_ck_entersub_args_core(pTHX_ OP *entersubop, GV *namegv, SV *protosv) #endif ; prev->op_sibling = NULL; - paren = OPf_SPECIAL * !(cvop->op_private & OPpENTERSUB_NOPAREN); + flags = OPf_SPECIAL * !(cvop->op_private & OPpENTERSUB_NOPAREN); op_free(cvop); if (aop == cvop) aop = NULL; op_free(entersubop); + if (opnum == OP_ENTEREVAL + && GvNAMELEN(namegv)==9 && strnEQ(GvNAME(namegv), "evalbytes", 9)) + flags |= OPpEVAL_BYTES <<8; + switch (PL_opargs[opnum] & OA_CLASS_MASK) { case OA_UNOP: case OA_BASEOP_OR_UNOP: case OA_FILESTATOP: - return aop ? newUNOP(opnum,paren,aop) : newOP(opnum,paren); + return aop ? newUNOP(opnum,flags,aop) : newOP(opnum,flags); case OA_BASEOP: if (aop) { #ifdef PERL_MAD @@ -9618,6 +9703,57 @@ Perl_ck_each(pTHX_ OP *o) return o->op_type == ref_type ? o : ck_fun(o); } +OP * +Perl_ck_length(pTHX_ OP *o) +{ + PERL_ARGS_ASSERT_CK_LENGTH; + + o = ck_fun(o); + + if (ckWARN(WARN_SYNTAX)) { + const OP *kid = o->op_flags & OPf_KIDS ? cLISTOPo->op_first : NULL; + + if (kid) { + SV *name = NULL; + const bool hash = kid->op_type == OP_PADHV + || kid->op_type == OP_RV2HV; + switch (kid->op_type) { + case OP_PADHV: + case OP_PADAV: + name = varname( + NULL, hash ? '%' : '@', kid->op_targ, NULL, 0, 1 + ); + break; + case OP_RV2HV: + case OP_RV2AV: + if (cUNOPx(kid)->op_first->op_type != OP_GV) break; + { + GV *gv = cGVOPx_gv(cUNOPx(kid)->op_first); + if (!gv) break; + name = varname(gv, hash?'%':'@', 0, NULL, 0, 1); + } + break; + default: + return o; + } + if (name) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "length() used on %"SVf" (did you mean \"scalar(%s%"SVf + ")\"?)", + name, hash ? "keys " : "", name + ); + else if (hash) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "length() used on %%hash (did you mean \"scalar(keys %%hash)\"?)"); + else + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "length() used on @array (did you mean \"scalar(@array)\"?)"); + } + } + + return o; +} + /* caller is supposed to assign the return to the container of the rep_op var */ STATIC OP * @@ -9703,6 +9839,7 @@ S_inplace_aassign(pTHX_ OP *o) { if (oright->op_type != OP_RV2AV || !cUNOPx(oright)->op_first || cUNOPx(oright)->op_first->op_type != OP_GV + || cUNOPx(oleft )->op_first->op_type != OP_GV || cGVOPx_gv(cUNOPx(oleft)->op_first) != cGVOPx_gv(cUNOPx(oright)->op_first) ) @@ -10309,6 +10446,8 @@ Perl_core_prototype(pTHX_ SV *sv, const char *name, const int code, retsetpvs("+;$$@", OP_SPLICE); case KEY___FILE__: case KEY___LINE__: case KEY___PACKAGE__: retsetpvs("", 0); + case KEY_evalbytes: + name = "entereval"; break; case KEY_readpipe: name = "backtick"; } @@ -10406,7 +10545,11 @@ Perl_coresub_op(pTHX_ SV * const coreargssv, const int code, opnum == OP_WANTARRAY ? OPpOFFBYONE << 8 : 0) ); case OA_BASEOP_OR_UNOP: - o = newUNOP(opnum,0,argop); + if (opnum == OP_ENTEREVAL) { + o = newUNOP(OP_ENTEREVAL,OPpEVAL_COPHH<<8,argop); + if (code == -KEY_evalbytes) o->op_private |= OPpEVAL_BYTES; + } + else o = newUNOP(opnum,0,argop); if (opnum == OP_CALLER) o->op_private |= OPpOFFBYONE; else { onearg: @@ -295,6 +295,9 @@ Deprecated. Use C<GIMME_V> instead. /* Private for OP_ENTEREVAL */ #define OPpEVAL_HAS_HH 2 /* Does it have a copy of %^H */ +#define OPpEVAL_UNICODE 4 +#define OPpEVAL_BYTES 8 +#define OPpEVAL_COPHH 16 /* Construct %^H from cop hints */ /* Private for OP_CALLER and OP_WANTARRAY */ #define OPpOFFBYONE 128 /* Treat caller(1) as caller(2) */ @@ -1381,14 +1381,14 @@ EXT Perl_check_t PL_check[] /* or perlvars.h */ Perl_ck_fun, /* stringify */ Perl_ck_bitop, /* left_shift */ Perl_ck_bitop, /* right_shift */ - Perl_ck_null, /* lt */ - Perl_ck_null, /* i_lt */ - Perl_ck_null, /* gt */ - Perl_ck_null, /* i_gt */ - Perl_ck_null, /* le */ - Perl_ck_null, /* i_le */ - Perl_ck_null, /* ge */ - Perl_ck_null, /* i_ge */ + Perl_ck_cmp, /* lt */ + Perl_ck_cmp, /* i_lt */ + Perl_ck_cmp, /* gt */ + Perl_ck_cmp, /* i_gt */ + Perl_ck_cmp, /* le */ + Perl_ck_cmp, /* i_le */ + Perl_ck_cmp, /* ge */ + Perl_ck_cmp, /* i_ge */ Perl_ck_null, /* eq */ Perl_ck_null, /* i_eq */ Perl_ck_null, /* ne */ @@ -1422,7 +1422,7 @@ EXT Perl_check_t PL_check[] /* or perlvars.h */ Perl_ck_fun, /* hex */ Perl_ck_fun, /* oct */ Perl_ck_fun, /* abs */ - Perl_ck_fun, /* length */ + Perl_ck_length, /* length */ Perl_ck_substr, /* substr */ Perl_ck_fun, /* vec */ Perl_ck_index, /* index */ @@ -2023,7 +2023,7 @@ EXTCONST U32 PL_opargs[] = { 0x00009bc0, /* require */ 0x00001140, /* dofile */ 0x00000604, /* hintseval */ - 0x00001b40, /* entereval */ + 0x00009bc0, /* entereval */ 0x00001100, /* leaveeval */ 0x00000340, /* entertry */ 0x00000400, /* leavetry */ @@ -6,7 +6,7 @@ * License or the Artistic License, as specified in the README file. * * This file defines the layout of the parser object used by the parser - * and lexer (perly.c, toke,c). + * and lexer (perly.c, toke.c). */ #define YYEMPTY (-2) @@ -105,15 +105,24 @@ typedef struct yy_parser { COP *saved_curcop; /* the previous PL_curcop */ char tokenbuf[256]; - bool in_pod; /* lexer is within a =pod section */ U8 lex_fakeeof; /* precedence at which to fake EOF */ + PERL_BITFIELD16 lex_flags:14; + PERL_BITFIELD16 in_pod:1; /* lexer is within a =pod section */ + PERL_BITFIELD16 filtered:1; /* source filters in evalbytes */ } yy_parser; /* flags for lexer API */ #define LEX_STUFF_UTF8 0x00000001 #define LEX_KEEP_PREVIOUS 0x00000002 + #ifdef PERL_CORE # define LEX_START_SAME_FILTER 0x00000001 +# define LEX_IGNORE_UTF8_HINTS 0x00000002 +# define LEX_EVALBYTES 0x00000004 +# define LEX_START_COPIED 0x00000008 +# define LEX_START_FLAGS \ + (LEX_START_SAME_FILTER|LEX_START_COPIED \ + |LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES) #endif /* flags for parser API */ diff --git a/patchlevel.h b/patchlevel.h index 4a356b33a5..15041883e3 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -15,7 +15,7 @@ #define PERL_REVISION 5 /* age */ #define PERL_VERSION 15 /* epoch */ -#define PERL_SUBVERSION 4 /* generation */ +#define PERL_SUBVERSION 5 /* generation */ /* The following numbers describe the earliest compatible version of Perl ("compatibility" here being defined as sufficient binary/API @@ -36,7 +36,7 @@ */ #define PERL_API_REVISION 5 #define PERL_API_VERSION 15 -#define PERL_API_SUBVERSION 4 +#define PERL_API_SUBVERSION 5 /* XXX Note: The selection of non-default Configure options, such as -Duselonglong may invalidate these settings. Currently, Configure diff --git a/plan9/config.plan9 b/plan9/config.plan9 index 2b0b50d863..56473b4fb3 100644 --- a/plan9/config.plan9 +++ b/plan9/config.plan9 @@ -3385,8 +3385,8 @@ * This symbol contains the ~name expanded version of PRIVLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define PRIVLIB "/sys/lib/perl/5.15.4" /**/ -#define PRIVLIB_EXP "/sys/lib/perl/5.15.4" /**/ +#define PRIVLIB "/sys/lib/perl/5.15.5" /**/ +#define PRIVLIB_EXP "/sys/lib/perl/5.15.5" /**/ /* PTRSIZE: * This symbol contains the size of a pointer, so that the C preprocessor @@ -3513,9 +3513,9 @@ * removed. The elements in inc_version_list (inc_version_list.U) can * be tacked onto this variable to generate a list of directories to search. */ -#define SITELIB "/sys/lib/perl/5.15.4/site_perl" /**/ -#define SITELIB_EXP "/sys/lib/perl/5.15.4/site_perl" /**/ -#define SITELIB_STEM "/sys/lib/perl/5.15.4/site_perl" /**/ +#define SITELIB "/sys/lib/perl/5.15.5/site_perl" /**/ +#define SITELIB_EXP "/sys/lib/perl/5.15.5/site_perl" /**/ +#define SITELIB_STEM "/sys/lib/perl/5.15.5/site_perl" /**/ /* Size_t_size: * This symbol holds the size of a Size_t in bytes. diff --git a/plan9/config_sh.sample b/plan9/config_sh.sample index 7c0bbcc4c4..7292f26959 100644 --- a/plan9/config_sh.sample +++ b/plan9/config_sh.sample @@ -32,12 +32,12 @@ alignbytes='4' ansi2knr='' aphostname='/bin/uname -n' api_revision='5' -api_subversion='4' +api_subversion='5' api_version='15' -api_versionstring='5.15.4' +api_versionstring='5.15.5' ar='ar' -archlib='/sys/lib/perl5/5.15.4/386' -archlibexp='/sys/lib/perl5/5.15.4/386' +archlib='/sys/lib/perl5/5.15.5/386' +archlibexp='/sys/lib/perl5/5.15.5/386' archname64='' archname='386' archobjs='' @@ -719,17 +719,17 @@ inc_version_list=' ' inc_version_list_init='0' incpath='' inews='' -installarchlib='/sys/lib/perl/5.15.4/386' +installarchlib='/sys/lib/perl/5.15.5/386' installbin='/usr/bin' installman1dir='/sys/man/1pub' installman3dir='/sys/man/2pub' installprefix='/usr' installprefixexp='/usr' -installprivlib='/sys/lib/perl/5.15.4' +installprivlib='/sys/lib/perl/5.15.5' installscript='/usr/bin' -installsitearch='/sys/lib/perl/5.15.4/site_perl/386' +installsitearch='/sys/lib/perl/5.15.5/site_perl/386' installsitebin='/usr/bin' -installsitelib='/sys/lib/perl/5.15.4/site_perl' +installsitelib='/sys/lib/perl/5.15.5/site_perl' installstyle='lib/perl5' installusrbinperl='undef' installvendorarch='' @@ -849,8 +849,8 @@ pmake='' pr='' prefix='/usr' prefixexp='/usr' -privlib='/sys/lib/perl/5.15.4' -privlibexp='/sys/lib/perl/5.15.4' +privlib='/sys/lib/perl/5.15.5' +privlibexp='/sys/lib/perl/5.15.5' procselfexe='' prototype='define' ptrsize='4' @@ -915,13 +915,13 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0' sig_size='50' signal_t='void' -sitearch='/sys/lib/perl/5.15.4/site_perl/386' +sitearch='/sys/lib/perl/5.15.5/site_perl/386' sitearchexp='/sys/lib/perl/site_perl/386' sitebin='/usr/bin' sitebinexp='/usr/bin' -sitelib='/sys/lib/perl/5.15.4/site_perl' -sitelib_stem='/sys/lib/perl/5.15.4/site_perl' -sitelibexp='/sys/lib/perl/5.15.4/site_perl' +sitelib='/sys/lib/perl/5.15.5/site_perl' +sitelib_stem='/sys/lib/perl/5.15.5/site_perl' +sitelibexp='/sys/lib/perl/5.15.5/site_perl' siteprefix='/usr' siteprefixexp='/usr' sizesize='4' @@ -954,7 +954,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/sys/include/ape/string.h' submit='' -subversion='4' +subversion='5' sysman='/sys/man/1pub' tail='' tar='' @@ -1034,8 +1034,8 @@ vendorlib_stem='' vendorlibexp='' vendorprefix='' vendorprefixexp='' -version='5.15.4' -version_patchlevel_string='version 15 subversion 4' +version='5.15.5' +version_patchlevel_string='version 15 subversion 5' versiononly='undef' vi='' voidflags='15' @@ -1050,9 +1050,9 @@ config_args='' config_argc=0 PERL_REVISION=5 PERL_VERSION=15 -PERL_SUBVERSION=4 +PERL_SUBVERSION=5 PERL_API_REVISION=5 PERL_API_VERSION=15 -PERL_API_SUBVERSION=4 +PERL_API_SUBVERSION=5 PERL_PATCHLEVEL= PERL_CONFIG_SH=true @@ -136,6 +136,7 @@ h Miscellaneous perlhist Perl history records perldelta Perl changes since previous version + perl5156delta Perl changes in version 5.15.6 perl5155delta Perl changes in version 5.15.5 perl5154delta Perl changes in version 5.15.4 perl5153delta Perl changes in version 5.15.3 diff --git a/pod/.gitignore b/pod/.gitignore index 394aa805ea..83ea9a09fe 100644 --- a/pod/.gitignore +++ b/pod/.gitignore @@ -58,7 +58,7 @@ /podselect.bat # generated -/perl5155delta.pod +/perl5156delta.pod /perlapi.pod /perlintern.pod *.html diff --git a/pod/Makefile.SH b/pod/Makefile.SH index d970c8682f..5cc9b83902 100644 --- a/pod/Makefile.SH +++ b/pod/Makefile.SH @@ -76,7 +76,7 @@ html: $(POD2HTML) $(HTML) tex: $(POD2LATEX) $(TEX) toc perltoc.pod: buildtoc - $(PERLILIB) buildtoc --build-toc + $(PERLILIB) buildtoc .SUFFIXES: .pm .pod diff --git a/pod/buildtoc b/pod/buildtoc index 96163c4f90..930cf70d85 100644 --- a/pod/buildtoc +++ b/pod/buildtoc @@ -1,310 +1,33 @@ #!/usr/bin/perl -w use strict; -use vars qw($masterpodfile %Build %Targets $Verbose $Quiet %Ignore - @Master %Readmes %Pods %Aux %Readmepods %Pragmata %Modules - %Copies %Generated $Test); +use vars qw(%Found $Quiet); use File::Spec; use File::Find; use FindBin; -use Text::Tabs; use Text::Wrap; use Getopt::Long; -use Carp; no locale; -require 5.010; -{ +# Assumption is that we're either already being run from the top level (*nix, +# VMS), or have absolute paths in @INC (Win32, pod/Makefile) +BEGIN { my $Top = File::Spec->catdir($FindBin::Bin, File::Spec->updir); - - sub abs_from_top { - my $path = shift; - return File::Spec->catdir($Top, split /\//, $path) if $path =~ s!/\z!!; - return File::Spec->catfile($Top, split /\//, $path); - } -} - -# make it clearer when we haven't run to completion, as we can be quite -# noisy when things are working ok - -sub my_die { - print STDERR "$0: ", @_; - print STDERR "\n" unless $_[-1] =~ /\n\z/; - print STDERR "ABORTED\n"; - exit 255; -} - - -$masterpodfile = abs_from_top('pod.lst'); - -# Generate any/all of these files -# --verbose gives slightly more output -# --quiet suppresses routine warnings -# --build-all tries to build everything -# --build-foo updates foo as follows -# --showfiles shows the files to be changed -# --test exit if perl.pod, pod.lst, MANIFEST are consistent, and regenerated -# files are up to date, die otherwise. - -%Targets - = ( - toc => 'pod/perltoc.pod', - manifest => 'MANIFEST', - perlpod => 'pod/perl.pod', - vms => 'vms/descrip_mms.template', - nmake => 'win32/Makefile', - dmake => 'win32/makefile.mk', - podmak => 'win32/pod.mak', - # plan9 => 'plan9/mkfile'), - unix => 'Makefile.SH', - # TODO: add roffitall - ); - -foreach (values %Targets) { - $_ = abs_from_top($_); -} - -# process command-line switches - -{ - my @files = keys %Targets; - my $filesopts = join(" | ", map { "--build-$_" } "all", sort @files); - my $showfiles; - my %build_these; - die <<__USAGE__ -$0: Usage: $0 [--verbose] [--showfiles] $filesopts -__USAGE__ - unless @ARGV - && GetOptions (verbose => \$Verbose, - quiet => \$Quiet, - showfiles => \$showfiles, - test => \$Test, - map {+"build-$_", \$build_these{$_}} @files, 'all'); - if ($build_these{all}) { - %Build = %Targets; - } else { - while (my ($file, $want) = each %build_these) { - $Build{$file} = $Targets{$file} if $want; - } - } - if ($showfiles) { - print - join(" ", - sort { lc $a cmp lc $b } - map { - my ($v, $d, $f) = File::Spec->splitpath($_); - my @d; - @d = defined $d ? File::Spec->splitdir($d) : (); - shift @d if @d; - File::Spec->catfile(@d ? - (@d == 1 && $d[0] eq '' ? () : @d) - : "pod", $f); - } @Targets{@files}), - "\n"; - exit(0); - } + chdir $Top or die "Can't chdir to $Top: $!"; + require 'Porting/pod_lib.pl'; } -# Don't copy these top level READMEs -%Ignore - = ( - micro => 1, -# vms => 1, - ); +die "$0: Usage: $0 [--quiet]\n" + unless GetOptions (quiet => \$Quiet) && !@ARGV; -if ($Verbose) { - print "I will be building $_\n" foreach keys %Build; -} - -my $delta_target; - -{ - my $source = 'perldelta.pod'; - my $filename = abs_from_top("pod/$source"); - open my $fh, '<', $filename or my_die "Can't open $filename: $!"; - local $/; - my $contents = <$fh>; - my @want = - $contents =~ /perldelta - what is new for perl v5\.(\d+)\.(\d+)\n/; - die "Can't extract version from $filename" unless @want; - $delta_target = "perl5$want[0]$want[1]delta.pod"; - - # This way round so that keys can act as a MANIFEST skip list - # Targets will always be in the pod directory. Currently we can only cope - # with sources being in the same directory. - $Copies{$delta_target} = $source; -} - -# process pod.lst - -open my $master, '<', $masterpodfile or my_die "Can't open $masterpodfile: $!"; - -foreach (<$master>) { - next if /^\#/; - - # At least one upper case letter somewhere in the first group - if (/^(\S+)\s(.*)/ && $1 =~ tr/h//) { - # it's a heading - my $flags = $1; - $flags =~ tr/h//d; - my %flags = (header => 1); - $flags{toc_omit} = 1 if $flags =~ tr/o//d; - $flags{aux} = 1 if $flags =~ tr/a//d; - my_die "Unknown flag found in heading line: $_" if length $flags; - push @Master, [\%flags, $2]; - - } elsif (/^(\S*)\s+(\S+)\s+(.*)/) { - # it's a section - my ($flags, $podname, $desc) = ($1, $2, $3); - my $filename = "${podname}.pod"; - $filename = "pod/${filename}" if $filename !~ m{/}; - - my %flags = (indent => 0); - $flags{indent} = $1 if $flags =~ s/(\d+)//; - $flags{toc_omit} = 1 if $flags =~ tr/o//d; - $flags{aux} = 1 if $flags =~ tr/a//d; - $flags{perlpod_omit} = "$podname.pod" eq $delta_target; - - $Generated{"$podname.pod"}++ if $flags =~ tr/g//d; - - if ($flags =~ tr/r//d) { - my $readme = $podname; - $readme =~ s/^perl//; - $Readmepods{$podname} = $Readmes{$readme} = $desc; - $flags{readme} = 1; - } elsif ($flags{aux}) { - $Aux{$podname} = $desc; - } else { - $Pods{$podname} = $desc; - } - my_die "Unknown flag found in section line: $_" if length $flags; - my $shortname = $podname =~ s{.*/}{}r; - push @Master, [\%flags, $podname, $filename, $desc, $shortname]; - } elsif (/^$/) { - push @Master, undef; - } else { - my_die "Malformed line: $_" if $1 =~ tr/A-Z//; - } -} +my $state = get_pod_metadata(0, 'pod/perltoc.pod'); -close $master; - -# Sanity cross check -{ - my (%disk_pods, @disk_pods); - my (@manipods, %manipods); - my (@manireadmes, %manireadmes); - my (@perlpods, %perlpods); - my (@cpanpods, %cpanpods, %cpanpods_short); - my (%our_pods); - - # These are stub files for deleted documents. We don't want them to show up - # in perl.pod, they just exist so that if someone types "perldoc perltoot" - # they get some sort of pointer to the new docs. - my %ignoredpods - = map { ( "$_.pod" => 1 ) } qw( perlboot perlbot perltooc perltoot ); - - # Convert these to a list of filenames. - foreach (keys %Pods, keys %Readmepods) { - $our_pods{"$_.pod"}++; - } - - opendir my $dh, abs_from_top('pod/'); - while (defined ($_ = readdir $dh)) { - next unless /\.pod\z/; - push @disk_pods, $_; - ++$disk_pods{$_}; - } - - # Things we copy from won't be in perl.pod - # Things we copy to won't be in MANIFEST - - my $filename = abs_from_top('MANIFEST'); - open my $mani, '<', $filename or my_die "opening $filename failed: $!"; - while (<$mani>) { - chomp; - s/\s+.*$//; - if (m!^pod/([^.]+\.pod)!i) { - push @manipods, $1; - } elsif (m!^README\.(\S+)!i) { - next if $Ignore{$1}; - push @manireadmes, "perl$1.pod"; - } elsif (exists $our_pods{$_}) { - push @cpanpods, $_; - $disk_pods{$_}++ - if -e $_; - } - } - close $mani or my_die "close MANIFEST: $!\n"; - @manipods{@manipods} = @manipods; - @manireadmes{@manireadmes} = @manireadmes; - @cpanpods{@cpanpods} = map { s/.*\///r } @cpanpods; - %cpanpods_short = reverse %cpanpods; - - $filename = abs_from_top('pod/perl.pod'); - open my $perlpod, '<', $filename or my_die "opening $filename failed: $!\n"; - while (<$perlpod>) { - if (/^For ease of access, /../^\(If you're intending /) { - if (/^\s+(perl\S*)\s+\w/) { - push @perlpods, "$1.pod"; - } - } - } - close $perlpod or my_die "close perlpod: $!\n"; - my_die "could not find the pod listing of perl.pod\n" - unless @perlpods; - @perlpods{@perlpods} = @perlpods; - - my @inconsistent; - foreach my $i (sort keys %disk_pods) { - push @inconsistent, "$0: $i exists but is unknown by buildtoc\n" - unless $our_pods{$i}; - push @inconsistent, "$0: $i exists but is unknown by ../MANIFEST\n" - if !$manipods{$i} && !$manireadmes{$i} && !$Copies{$i} && !$Generated{$i} && !$cpanpods{$i}; - push @inconsistent, "$0: $i exists but is unknown by perl.pod\n" - if !$perlpods{$i} && !exists $Copies{$i} && !$cpanpods{$i} && !$ignoredpods{$i}; - } - my %BuildFiles; - foreach my $path (values %Build) { - (undef, undef, my $file) = File::Spec->splitpath($path); - ++$BuildFiles{$file} - } - - foreach my $i (sort keys %our_pods) { - push @inconsistent, "$0: $i is known by buildtoc but does not exist\n" - unless $disk_pods{$i} or $BuildFiles{$i}; - } - foreach my $i (sort keys %manipods) { - push @inconsistent, "$0: $i is known by ../MANIFEST but does not exist\n" - unless $disk_pods{$i}; - push @inconsistent, "$0: $i is known by ../MANIFEST but is marked as generated\n" - if $Generated{$i}; - } - foreach my $i (sort keys %perlpods) { - push @inconsistent, "$0: $i is known by perl.pod but does not exist\n" - unless $disk_pods{$i} or $BuildFiles{$i} or $cpanpods_short{$i}; - } - if ($Test) { - delete $Build{toc}; - printf "1..%d\n", 1 + scalar keys %Build; - if (@inconsistent) { - print "not ok 1\n"; - die @inconsistent - } - print "ok 1\n"; - } - else { - warn @inconsistent if @inconsistent; - } -} +warn @{$state->{inconsistent}} if @{$state->{inconsistent}}; # Find all the modules -if ($Build{toc}) { - my @modpods; - find \&getpods => abs_from_top('lib/'); - - sub getpods { +my @modpods; +find(sub { if (/\.p(od|m)$/) { my $file = $File::Find::name; return if $file =~ qr!/lib/Pod/Functions.pm\z!; # Used only by pod itself @@ -331,39 +54,34 @@ if ($Build{toc}) { warn "$0: NOTE: cannot find '=head1 NAME' in:\n $file\n" unless $Quiet; } } - } + }, 'lib'); - my_die "Can't find any pods!\n" unless @modpods; +my_die "Can't find any pods!\n" unless @modpods; - my %done; - for (@modpods) { +my %done; +for (@modpods) { my $name = $_; $name =~ s/\.p(m|od)$//; $name =~ s-.*?/lib/--; $name =~ s-/-::-g; next if $done{$name}++; - if ($name =~ /^[a-z]/) { - $Pragmata{$name} = $_; - } else { - $Modules{$name} = $_; - } - } + $Found{$name =~ /^[a-z]/ ? 'PRAGMA' : 'MODULE'}{$name} = $_; } -# OK. Now a lot of ancillary function definitions follow -# Main program returns at "Do stuff" +# Accumulating everything into a lexical before writing to disk dates from the +# time when this script also provided the functionality of regen/pod_rules.pl +# and this code was in a subroutine do_toc(). In turn, the use of a file scoped +# lexical instead of a parameter or return value is because the code dates back +# further still, and used *only* to create pod/perltoc.pod by printing direct my $OUT; -sub do_toc { - my $filename = shift; - - ($_= <<"EOPOD2B") =~ s/^\t//gm and $OUT .= $_; +($_= <<"EOPOD2B") =~ s/^\t//gm and $OUT .= $_; # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! # This file is autogenerated by buildtoc from all the other pods. - # Edit those files and run buildtoc --build-toc to effect changes. + # Edit those files and run $0 to effect changes. =head1 NAME @@ -378,39 +96,27 @@ sub do_toc { =head1 BASIC DOCUMENTATION EOPOD2B -#' make emacs happy - - # All the things in the master list that happen to be pod filenames - foreach (grep {defined $_ && @$_ == 3 && !$_->[0]{toc_omit}} @Master) { - podset($_->[1], abs_from_top($_->[2])); - } - - - ($_= <<"EOPOD2B") =~ s/^\t//gm and $OUT .= $_; - - - - =head1 PRAGMA DOCUMENTATION - -EOPOD2B - foreach (sort keys %Pragmata) { - podset($_, $Pragmata{$_}); - } +# All the things in the master list that happen to be pod filenames +foreach (grep {defined $_ && @$_ == 3 && !$_->[0]{toc_omit}} @{$state->{master}}) { + podset(@$_); +} - ($_= <<"EOPOD2B") =~ s/^\t//gm and $OUT .= $_; +foreach my $type (qw(PRAGMA MODULE)) { + ($_= <<"EOPOD2B") =~ s/^\t//gm and $OUT .= $_; - =head1 MODULE DOCUMENTATION + =head1 $type DOCUMENTATION EOPOD2B - foreach (sort keys %Modules) { - podset($_, $Modules{$_}); - } + foreach my $name (sort keys %{$Found{$type}}) { + podset($name, $Found{$type}{$name}); + } +} - $_= <<"EOPOD2B"; +$_= <<"EOPOD2B"; =head1 AUXILIARY DOCUMENTATION @@ -422,8 +128,8 @@ EOPOD2B EOPOD2B - $_ .= join "\n", map {"\t=item $_\n"} sort keys %Aux; - $_ .= <<"EOPOD2B" ; +$_ .= join "\n", map {"\t=item $_\n"} sort keys %{$state->{aux}}; +$_ .= <<"EOPOD2B" ; =back @@ -435,16 +141,17 @@ EOPOD2B EOPOD2B - s/^\t//gm; - $OUT .= "$_\n"; +s/^\t//gm; +$OUT .= "$_\n"; - $OUT =~ s/\n\s+\n/\n\n/gs; - $OUT =~ s/\n{3,}/\n\n/g; +$OUT =~ s/\n\s+\n/\n\n/gs; +$OUT =~ s/\n{3,}/\n\n/g; - $OUT =~ s/([^\n]+)/wrap('', '', $1)/ge; +$OUT =~ s/([^\n]+)/wrap('', '', $1)/ge; - return $OUT; -} +write_or_die('pod/perltoc.pod', $OUT); + +exit(0); # Below are all the auxiliary routines for generating perltoc.pod @@ -527,250 +234,17 @@ sub unitem { $initem = 0; } -# End of original buildtoc. From here on are routines to generate new sections -# for and inplace edit other files - -sub generate_perlpod { - my @output; - my $maxlength = 0; - foreach (@Master) { - my $flags = $_->[0]; - next if $flags->{aux}; - next if $flags->{perlpod_omit}; - - if (@$_ == 2) { - # Heading - push @output, "=head2 $_->[1]\n"; - } elsif (@$_ == 5) { - # Section - my $start = " " x (4 + $flags->{indent}) . $_->[4]; - $maxlength = length $start if length ($start) > $maxlength; - push @output, [$start, $_->[3]]; - } elsif (@$_ == 0) { - # blank line - push @output, "\n"; - } else { - my_die "Illegal length " . scalar @$_; - } - } - # want at least 2 spaces padding - $maxlength += 2; - $maxlength = ($maxlength + 3) & ~3; - # sprintf gives $1.....$2 where ... are spaces: - return unexpand (map {ref $_ ? sprintf "%-${maxlength}s%s\n", @$_ : $_} - @output); -} - - -sub generate_manifest { - # Annoyingly, unexpand doesn't consider it good form to replace a single - # space before a tab with a tab - # Annoyingly (2) it returns read only values. - my @temp = unexpand (map {sprintf "%-32s%s", @$_} @_); - map {s/ \t/\t\t/g; $_} @temp; -} -sub generate_manifest_pod { - generate_manifest map {["pod/$_.pod", $Pods{$_}]} - sort grep { - !$Copies{"$_.pod"} && !$Generated{"$_.pod"} && !-e "$_.pod" - } keys %Pods; -} -sub generate_manifest_readme { - generate_manifest sort {$a->[0] cmp $b->[0]} - ["README.vms", "Notes about installing the VMS port"], - map {["README.$_", $Readmes{$_}]} keys %Readmes; -} +# Code added in commit 416302502f485afa, but never used. +# Probably roffitall should become something that buildtoc generates, instead +# of something that we ship in the distribution. sub generate_roffitall { - (map ({"\t\$maindir/$_.1\t\\"}sort keys %Pods), + (map ({"\t\$maindir/$_.1\t\\"}sort keys %{$state->{pods}}), "\t\t\\", - map ({"\t\$maindir/$_.1\t\\"}sort keys %Aux), + map ({"\t\$maindir/$_.1\t\\"}sort keys %{$state->{aux}}), "\t\t\\", - map ({"\t\$libdir/$_.3\t\\"}sort keys %Pragmata), + map ({"\t\$libdir/$_.3\t\\"}sort keys %{$Found{PRAGMA}}), "\t\t\\", - map ({"\t\$libdir/$_.3\t\\"}sort keys %Modules), + map ({"\t\$libdir/$_.3\t\\"}sort keys %{$Found{MODULE}}), ) } - -sub generate_nmake_1 { - # XXX Fix this with File::Spec - (map {sprintf "\tcopy ..\\README.%-8s ..\\pod\\perl$_.pod\n", $_} - sort keys %Readmes), - (map {"\tcopy ..\\pod\\$Copies{$_} ..\\pod\\$_\n"} sort keys %Copies); -} - -# This doesn't have a trailing newline -sub generate_nmake_2 { - # Spot the special case - local $Text::Wrap::columns = 76; - my $line = wrap ("\t ", "\t ", - join " ", sort keys %Copies, keys %Generated, - map {"perl$_.pod"} keys %Readmes); - $line =~ s/$/ \\/mg; - $line =~ s/ \\$//; - $line; -} - -sub generate_pod_mak { - my $variable = shift; - my @lines; - my $line = "\U$variable = " . join "\t\\\n\t", - map {"$_.$variable"} sort grep { $_ !~ m{/} } keys %Pods; - # Special case - $line =~ s/.*perltoc.html.*\n//m; - $line; -} - -sub verify_contiguous { - my ($name, $content, $what) = @_; - my $sections = () = $content =~ m/\0+/g; - croak("$0: $name contains no $what") if $sections < 1; - croak("$0: $name contains discontiguous $what") if $sections > 1; -} - -sub do_manifest { - my ($name, $prev) = @_; - my @manifest = - grep {! m!^pod/[^.]+\.pod.*!} - grep {! m!^README\.(\S+)! || $Ignore{$1}} split "\n", $prev; - join "\n", ( - # Dictionary order - fold and handle non-word chars as nothing - map { $_->[0] } - sort { $a->[1] cmp $b->[1] || $a->[0] cmp $b->[0] } - map { my $f = lc $_; $f =~ s/[^a-z0-9\s]//g; [ $_, $f ] } - @manifest, - &generate_manifest_pod(), - &generate_manifest_readme()), ''; -} - -sub do_nmake { - my ($name, $makefile) = @_; - $makefile =~ s/^\tcopy \.\.\\README.*\n/\0/gm; - verify_contiguous($name, $makefile, 'README copies'); - # Now remove the other copies that follow - 1 while $makefile =~ s/\0\tcopy .*\n/\0/gm; - $makefile =~ s/\0+/join ("", &generate_nmake_1)/se; - - $makefile =~ s{(-cd \$\(PODDIR\) && del /f[^\n]+).*?(-cd \.\.\\utils && del /f)} - {"$1\n" . &generate_nmake_2."\n\t$2"}se; - $makefile; -} - -# shut up used only once warning -*do_dmake = *do_dmake = \&do_nmake; - -sub do_perlpod { - my ($name, $pod) = @_; - - unless ($pod =~ s{(For\ ease\ of\ access,\ .*\n) - (?:\s+[a-z]{4,}.*\n # fooo - |=head.*\n # =head foo - |\s*\n # blank line - )+ - } - {$1 . join "", &generate_perlpod}mxe) { - my_die "Failed to insert amendments in do_perlpod"; - } - $pod; -} - -sub do_podmak { - my ($name, $body) = @_; - foreach my $variable (qw(pod man html tex)) { - my_die "could not find $variable in $name" - unless $body =~ s{\n\U$variable\E = (?:[^\n]*\\\n)*[^\n]*} - {"\n" . generate_pod_mak ($variable)}se; - } - $body; -} - -sub do_vms { - my ($name, $makefile) = @_; - -# Looking for the macro defining the current perldelta: -#PERLDELTA_CURRENT = [.pod]perl5139delta.pod - - $makefile =~ s/\nPERLDELTA_CURRENT\s+=\s+\Q[.pod]perl\E\d+delta\.pod\n - /\0/sx; - verify_contiguous($name, $makefile, 'current perldelta macro'); - $makefile =~ s/\0+/join "\n", '', "PERLDELTA_CURRENT = [.pod]$delta_target", ''/se; - - $makefile; -} - -sub do_unix { - my ($name, $makefile_SH) = @_; - - $makefile_SH =~ s{^(perltoc_pod_prereqs = extra.pods).*} - {join ' ', $1, map "pod/$_", - sort keys %Copies, grep {!/perltoc/} keys %Generated - }mge; - -# pod/perl511delta.pod: pod/perldelta.pod -# cd pod && $(LNS) perldelta.pod perl511delta.pod - - $makefile_SH =~ s!( -pod/perl[a-z0-9_]+\.pod: pod/perl[a-z0-9_]+\.pod - \$\(LNS\) perl[a-z0-9_]+\.pod pod/perl[a-z0-9_]+\.pod -)+!\0!gm; - - verify_contiguous($name, $makefile_SH, 'copy rules'); - - my @copy_rules = map " -pod/$_: pod/$Copies{$_} - \$(LNS) $Copies{$_} pod/$_ -", keys %Copies; - - $makefile_SH =~ s/\0+/join '', @copy_rules/se; - $makefile_SH; - -} - -# Do stuff - -my $built; -while (my ($target, $name) = each %Targets) { - print "Working on target $target\n" if $Verbose; - next unless $Build{$target}; - $built++; - my ($orig, $mode); - print "Now processing $name\n" if $Verbose; - if ($target ne "toc") { - local $/; - open my $thing, '<', $name or my_die "Can't open $name: $!"; - binmode $thing; - $orig = <$thing>; - my_die "$name contains NUL bytes" if $orig =~ /\0/; - } - - my $new = do { - no strict 'refs'; - &{"do_$target"}($target, $orig); - }; - - if (defined $orig) { - if ($new eq $orig) { - if ($Test) { - printf "ok %d # $name is up to date\n", $built + 1; - } elsif ($Verbose) { - print "Was not modified\n"; - } - next; - } elsif ($Test) { - printf "not ok %d # $name is up to date\n", $built + 1; - next; - } - $mode = (stat $name)[2] // my_die "Can't stat $name: $!"; - rename $name, "$name.old" or my_die "Can't rename $name to $name.old: $!"; - } - - open my $thing, '>', $name or my_die "Can't open $name for writing: $!"; - binmode $thing; - print $thing $new or my_die "print to $name failed: $!"; - close $thing or my_die "close $name failed: $!"; - if (defined $mode) { - chmod $mode & 0777, $name or my_die "can't chmod $mode $name: $!"; - } -} - -warn "$0: was not instructed to build anything\n" unless $built || $Test; diff --git a/pod/perl.pod b/pod/perl.pod index d41240411c..6321dee05a 100644 --- a/pod/perl.pod +++ b/pod/perl.pod @@ -159,6 +159,7 @@ For ease of access, the Perl manual has been split up into several sections. perlhist Perl history records perldelta Perl changes since previous version + perl5155delta Perl changes in version 5.15.5 perl5154delta Perl changes in version 5.15.4 perl5153delta Perl changes in version 5.15.3 perl5152delta Perl changes in version 5.15.2 diff --git a/pod/perl5155delta.pod b/pod/perl5155delta.pod new file mode 100644 index 0000000000..0269f69e96 --- /dev/null +++ b/pod/perl5155delta.pod @@ -0,0 +1,659 @@ +=encoding utf8 + +=head1 NAME + +perl5155delta - what is new for perl v5.15.5 + +=head1 DESCRIPTION + +This document describes differences between the 5.15.4 release and +the 5.15.5 release. + +If you are upgrading from an earlier release such as 5.15.3, first read +L<perl5154delta>, which describes differences between 5.15.3 and +5.15.4. + +=head1 Core Enhancements + +=head2 More consistent C<eval> + +The C<eval> operator sometimes treats a string argument as a sequence of +characters and sometimes as a sequence of bytes, depending on the internal +encoding. The internal encoding is not supposed to make any difference, +but there is code that relies on this inconsistency. + +Under C<use v5.15> and higher, the C<unicode_eval> and C<evalbytes> +features resolve this. The C<unicode_eval> feature causes C<eval $string> +to treat the string always as Unicode. The C<evalbytes> features provides +a function, itself called C<evalbytes>, which evaluates its argument always +as a string of bytes. + +These features also fix oddities with source filters leaking to outer +dynamic scopes. + +See L<feature> for more detail. + +=head2 C<$[> is back + +The C<$[> variable is back again, but is now implemented as a module, so +programs that do not mention it (i.e., most of them), will not incur any +run-time penalty. In a later release in the 5.15 branch it might be +disabled in the scope of C<use v5.16>. + +The new implementation has some bug fixes. See L<arybase>. + +=head1 Security + +=head2 Privileges are now set correctly when assigning to C<$(> + +A hypothetical bug (probably non-exploitable in practice) due to the +incorrect setting of the effective group ID while setting C<$(> has been +fixed. The bug would only have affected systems that have C<setresgid()> +but not C<setregid()>, but no such systems are known of. + +=head1 Incompatible Changes + +=head2 Certain deprecated Unicode properties are no longer supported by default + +Perl should never have exposed certain Unicode properties that are used +by Unicode internally and not meant to be publicly available. Use of +these has generated deprecated warning messages since Perl 5.12. The +removed properties are Other_Alphabetic, +Other_Default_Ignorable_Code_Point, Other_Grapheme_Extend, +Other_ID_Continue, Other_ID_Start, Other_Lowercase, Other_Math, and +Other_Uppercase. + +Perl may be recompiled to include any or all of them; instructions are +given in +L<perluniprops/Unicode character properties that are NOT accepted by Perl>. + +=head2 Dereferencing IO thingies as typeglobs + +The C<*{...}> operator, when passed a reference to an IO thingy (as in +C<*{*STDIN{IO}}>), creates a new typeglob containing just that IO object. + +Previously, it would stringify as an empty string, but some operators would +treat it as undefined, producing an "uninitialized" warning. + +Having a typeglob appear as an empty string is a side effect of the +implementation that has caused various bugs over the years. + +The solution was to make it stringify like a normal anonymous typeglob, +like those produced by C<< open($foo->{bar}, ...) >> [perl #96326]. + +=head1 Deprecations + +=head2 Don't read the Unicode data base files in F<lib/unicore> + +It is now deprecated to directly read the Unicode data base files. +These are stored in the F<lib/unicore> directory. Instead, you should +use the new functions in L<Unicode::UCD>. These provide a stable API, +and give complete information. (This API is, however, subject to change +somewhat during the 5.15 development cycle, as we gain experience and +get feedback from using it.) + +Perl may at some point in the future change or remove the files. The +file most likely for applications to have used is F<lib/unicore/ToDigit.pl>. +L<Unicode::UCD/prop_invmap()> can be used to get at its data instead. + +=head1 Performance Enhancements + +=over 4 + +=item * + +Due to changes in L<File::Glob>, Perl's C<glob> function and its +C<< <...> >> equivalent are now much faster. The splitting of the pattern +into words has been rewritten in C, resulting in speed-ups of 20% in some +cases. + +This does not affect VMS, as it does not use File::Glob. + +=back + +=head1 Modules and Pragmata + +=head2 New Modules and Pragmata + +=over 4 + +=item * + +L<arybase> -- this new module implements the C<$[> variable. + +=back + +=head2 Updated Modules and Pragmata + +=over 4 + +=item * + +L<Archive::Extract> has been upgraded from version 0.56 to version 0.58. + +=item * + +L<B::Concise> has been upgraded from version 0.86 to version 0.87. + +=item * + +L<B::Deparse> has been upgraded from version 1.08 to version 1.09. + +It now correctly deparses C<CORE::do>, C<CORE::glob> and slices of empty +lists. + +=item * + +L<CGI> has been upgraded from version 3.55 to version 3.58. + +Use public and documented FCGI.pm API in CGI::Fast +CGI::Fast was using an FCGI API that was deprecated and removed from +documentation more than ten years ago. Usage of this deprecated API with +FCGI E<gt>= 0.70 or FCGI E<lt>= 0.73 introduces a security issue. +L<https://rt.cpan.org/Public/Bug/Display.html?id=68380> +L<http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2011-2766> + +=item * + +L<charnames> has been upgraded from version 1.23 to version 1.24. + +=item * + +L<Compress::Raw::Bzip2> has been upgraded from version 2.037 to version 2.042. + +=item * + +L<Compress::Raw::Zlib> has been upgraded from version 2.037 to version 2.042. + +=item * + +L<Compress::Zlib> has been upgraded from version 2.037 to version 2.042. + +=item * + +L<CPANPLUS> has been upgraded from version 0.9111 to version 0.9112. + +=item * + +L<CPANPLUS::Dist::Build> has been upgraded from version 0.58 to version 0.60. + +=item * + +L<Digest::SHA> has been upgraded from version 5.62 to version 5.63. + +Added code to allow very large data inputs all at once, which had previously been +limited to several hundred megabytes at a time + +=item * + +L<Errno> has been upgraded from version 1.14 to version 1.15. + +Choosing an archname containing a @, $ or % character no longer results in +unintended interpolation in Errno's architecture check. + +=item * + +L<ExtUtils::MakeMaker> has been upgraded from version 6.61_01 to version 6.63_02. + +=item * + +L<feature> has been upgraded from version 1.22 to version 1.23. + +=item * + +L<File::DosGlob> has been upgraded from version 1.05 to version 1.06. + +=item * + +L<File::Glob> has been upgraded from version 1.13 to version 1.14. + +It has a new C<:bsd_glob> export tag, intended to replace C<:glob>. Like +C<:glob> it overrides C<glob> with a function that does not split the glob +pattern into words, but, unlike C<:glob>, it iterates properly in scalar +context, instead of returning the last file. + +There are other changes affecting Perl's own C<glob> operator (which uses +File::Glob internally, except on VMS). See L</Performance Enhancements> +and L</Selected Bug Fixes>. + +=item * + +L<HTTP::Tiny> has been upgraded from version 0.013 to version 0.016. + +Adds additional shorthand methods for all common HTTP verbs, +a C<post_form()> method for POST-ing x-www-form-urlencoded data and +a C<www_form_urlencode()> utility method. + +=item * + +L<Module::CoreList> has been upgraded from version 2.57 to version 2.58. + +=item * + +L<Opcode> has been upgraded from version 1.20 to version 1.21. + +=item * + +L<perlfaq> has been upgraded from version 5.0150035 to version 5.0150036. + +=item * + +L<Socket> as been upgraded from version 1.94_01 to 1.94_02. + +It has new functions and constants for handling IPv6 sockets: + + pack_ipv6_mreq + unpack_ipv6_mreq + IPV6_ADD_MEMBERSHIP + IPV6_DROP_MEMBERSHIP + IPV6_MTU + IPV6_MTU_DISCOVER + IPV6_MULTICAST_HOPS + IPV6_MULTICAST_IF + IPV6_MULTICAST_LOOP + IPV6_UNICAST_HOPS + IPV6_V6ONLY + +=item * + +L<Storable> has been upgraded from version 2.32 to 2.33. + +The ability to add a fake entry to %INC to prevent Log::Agent from loading +has been restored. In version 2.27 (included with perl 5.14.0), Storable +starting producing an error instead. + +=item * + +L<strict> has been upgraded from version 1.04 to version 1.05. + +=item * + +L<Unicode::Collate> has been upgraded from version 0.80 to version 0.85. + +Locales updated to CLDR 2.0: mk, mt, nb, nn, ro, ru, sk, sr, sv, uk, +zh__pinyin, zh__stroke +Newly supported locales: bn, fa, ml, mr, or, pa, sa, si, si__dictionary, +sr_Latn, sv__reformed, ta, te, th, ur, wae. + +=item * + +L<Unicode::UCD> has been upgraded from version 0.36 to version 0.37. + +This adds four new functions: C<prop_aliases()>, and +C<prop_value_aliases()> which are used to find all the Unicode-approved +synonyms for property names, or to convert from one name to another; +C<prop_invlist> which returns all the code points matching a given +Unicode binary property; and C<prop_invmap> which returns the complete +specification of a given Unicode property. + +=item * + +L<UNIVERSAL> has been upgraded from version 1.09 to version 1.10. + +=back + +=head1 Diagnostics + +The following additions or changes have been made to diagnostic output, +including warnings and fatal error messages. For the complete list of +diagnostic messages, see L<perldiag>. + +=head2 New Diagnostics + +=head3 New Errors + +=over 4 + +=item * + +L<Source filters apply only to byte streams|perldiag/"Source filters apply only to byte streams"> + +This new error occurs when you try to activate a source filter (usually by +loading a source filter module) within a string passed to C<eval> under the +C<unicode_eval> feature. + +=item * + +L<That use of $[ is unsupported|perldiag/"That use of $[ is unsupported"> + +This previously removed error has been restored with the re-implementation +of C<$[> as a module. + +=back + +=head3 New Warnings + +=over 4 + +=item * + +L<length() used on %s|perldiag/length() used on %s> + +This new warning occurs when C<length> is used on an array or hash, instead +of C<scalar(@array)> or C<scalar(keys %hash)>. + +=item * + +L<$[ used in %s (did you mean $] ?)|perldiag/"$[ used in %s (did you mean $] ?)"> + +This new warning exists to catch the mistaken use of C<$[> in version +checks. C<$]>, not C<$[>, contains the version number. C<$[> in a numeric +comparison is almost always wrong. + +=item * + +L<Use of assignment to $[ is deprecated|perldiag/"Use of assignment to $[ is deprecated"> + +This previously removed warning has been restored with the re-implementation +of C<$[> as a module. + +=back + +=head2 Changes to Existing Diagnostics + +=over 4 + +=item * + +The uninitialized warning for C<y///r> when C<$_> is implicit and undefined +now mentions the variable name, just like the non-/r variation of the +operator. + +=item * + +The "Applying pattern match..." or similar warning produced when an array +or hash is on the left-hand side of the C<=~> operator now mentions the +name of the variable. + +=back + +=head1 Configuration and Compilation + +=over 4 + +=item * + +F<pod/buildtoc>, used by the build process to build L<perltoc>, has been +refactored and simplified. It now only contains code to build L<perltoc>; +the code to regenerate Makefiles has been moved to F<Porting/pod_rules.pl>. +It's a bug if this change has any material effect on the build process. + +=back + +=head1 Platform Support + +=head2 Platform-Specific Notes + +=over 4 + +=item GNU/Hurd + +Numerous build and test failures on GNU/Hurd have been resolved with hints +for building DBM modules, detection of the library search path, and enabling +of large file support. + +=item OpenVOS + +Perl is now built with dynamic linking on OpenVOS, the minimum supported +version of which is now Release 17.1.0. + +=item SunOS + +The CC workshop C++ compiler is now detected and used on systems that ship +without cc. + +=back + +=head1 Internal Changes + +=over 4 + +=item * + +C<PL_curstash> is now reference-counted. + +=back + +=head1 Selected Bug Fixes + +=over 4 + +=item * + +Perl now holds an extra reference count on the package that code is +currently compiling in. This means that the following code no longer crashes [perl #101486]: + + package Foo; + BEGIN {*Foo:: = *Bar::} + sub foo; + +=item * + +F<dumpvar.pl>, and consequently the C<x> command in the debugger, have been +fixed to handle objects blessed into classes whose names contain "=". The +contents of such objects used not to be dumped [perl #101814]. + +=item * + +The C<x> repetition operator no longer crashes on 64-bit builds with large +repeat counts [perl #94560]. + +=item * + +A fix to C<glob> under miniperl (used to configure modules when perl itself +is built) in Perl 5.15.3 stopped C<< <~> >> from returning the home +directory, because it cleared %ENV before calling csh. Now C<$ENV{HOME}> +is preserved. This fix probably does not affect anything. If +L<File::Glob> fails to load for some reason, Perl reverts to using csh. +So it would apply in that case. + +=item * + +On OSes other than VMS, Perl's C<glob> operator (and the C<< <...> >> form) +use L<File::Glob> underneath. L<File::Glob> splits the pattern into words, +before feeding each word to its C<bsd_glob> function. + +There were several inconsistencies in the way the split was done. Now +quotation marks (' and ") are always treated as shell-style word delimiters +(that allow whitespace as part of a word) and backslashes are always +preserved, unless they exist to escape quotation marks. Before, those +would only sometimes be the case, depending on whether the pattern +contained whitespace. Also, escaped whitespace at the end of the pattern +is no longer stripped [perl #40470]. + +=item * + +C<CORE::glob> now works as a way to call the default globbing function. It +used to respect overrides, despite the C<CORE::> prefix. + +=item * + +In 5.14, C</[[:lower:]]/i> and C</[[:upper:]]/i> no longer matched the +opposite case. This has been fixed [perl #101970]. + +=item * + +A regular expression match with an overloaded object on the right-hand side +would in some cases stringify the object too many times. + +=item * + +The C-level C<pregcomp> function could become confused as to whether the +pattern was in UTF8 if the pattern was an overloaded, tied, or otherwise +magical scalar [perl #101940]. + +=item * + +A regression has been fixed that was introduced in 5.14, in C</i> +regular expression matching, in which a match improperly fails if the +pattern is in UTF-8, the target string is not, and a Latin-1 character +precedes a character in the string that should match the pattern. [perl +#101710] + +=item * + +C<@{"..."} = reverse ...> started crashing in 5.15.3. This has been fixed. + +=item * + +C<ref> in a tainted expression started producing an "sv_upgrade" error in +5.15.4. This has been fixed. + +=item * + +Weak references to lexical hashes going out of scope were not going stale +(becoming undefined), but continued to point to the hash. + +=item * + +Weak references to lexical variables going out of scope are now broken +before any magical methods (e.g., DESTROY on a tie object) are called. +This prevents such methods from modifying the variable that will be seen +the next time the scope is entered. + +=item * + +A C<keys> optimisation in Perl 5.12.0 to make it faster on empty hashes +caused C<each> not to reset the iterator if called after the last element +was deleted. This has been fixed. + +=item * + +The C<#line 42 foo> directive used not to update the arrays of lines used +by the debugger if it occurred in a string eval. This was partially fixed +in 5.14, but it only worked for a single C<#line 42 foo> in each eval. Now +it works for multiple. + +=item * + +String eval used not to localise C<%^H> when compiling its argument if it +was empty at the time the C<eval> call itself was compiled. This could +lead to scary side effects, like C<use re "/m"> enabling other flags that +the surrounding code was trying to enable for its caller [perl #68750]. + +=item * + +Creating a BEGIN block from XS code (via C<newXS> or C<newATTRSUB>) would, +on completion, make the hints of the current compiling code the current +hints. This could cause warnings to occur in a non-warning scope. + +=item * + +C<eval $string> and C<require> no longer localise hints (C<$^H> and C<%^H>) +at run time, but only during compilation of the $string or required file. +This makes C<BEGIN { $^H{foo}=7 }> equivalent to +C<BEGIN { eval '$^H{foo}=7' }> [perl #70151]. + +=item * + +When subroutine calls are intercepted by the debugger, the name of the +subroutine or a reference to it is stored in C<$DB::sub>, for the debugger +to access. In some cases (such as C<$foo = *bar; undef *bar; &$foo>) +C<$DB::sub> would be set to a name that could not be used to find the +subroutine, and so the debugger's attempt to call it would fail. Now the +check to see whether a reference is needed is more robust, so those +problems should not happen anymore [rt.cpan.org #69862]. + +=item * + +Localising a tied scalar that returns a typeglob no longer stops it from +being tied till the end of the scope. + +=item * + +When C<open> is called with three arguments, the third being a file handle +(as in C<< open $fh, ">&", $fh2 >>), if the third argument is tied or a +reference to a tied variable, FETCH is now called exactly once, instead of +0, 2, or 3 times (all of which could occur in various circumstances). + +=item * + +C<sort> no longer ignores FETCH when passed a reference to a tied glob for +the comparison routine. + +=item * + +Warnings emitted by C<sort> when a custom comparison routine returns a +non-numeric value now show the line number of the C<sort> operator, rather +than the last line of the comparison routine. The warnings also occur now +only if warnings are enabled in the scope where C<sort> occurs. Previously +the warnings would occur if enabled in the comparison routine's scope. + +=item * + +C<Internals::SvREFCNT> now behaves consistently in 'get' and 'set' scenarios +[perl #103222] and also treats the reference count as unsigned. + +=item * + +Calling C<require> on an implicit C<$_> when C<*CORE::GLOBAL::require> has +been overridden does not segfault anymore, and C<$_> is now passed to the +overriding subroutine [perl #78260]. + +=back + +=head1 Acknowledgements + +Perl 5.15.5 represents approximately 1 month of development since Perl 5.15.4 +and contains approximately 28,000 lines of changes across 440 files from 29 +authors. + +Perl continues to flourish into its third decade thanks to a vibrant community +of users and developers. The following people are known to have contributed the +improvements that became Perl 5.15.5: + +Brian Fraser, Chris 'BinGOs' Williams, chromatic, Craig A. Berry, David Golden, +Father Chrysostomos, Florian Ragwitz, H.Merijn Brand, Jilles Tjoelker, Jim +Meyering, Karl Williamson, Laurent Dami, Leon Timmermans, Mark A. Stratman, +Matthew Horsfall, Michael G Schwern, Moritz Lenz, Nicholas Clark, Paul Evans, +Paul Green, Paul Johnson, Perlover, Pino Toscano, Reini Urban, Steve Hay, Tom +Christiansen, Tony Cook, Vincent Pit, Zefram. + +The list above is almost certainly incomplete as it is automatically generated +from version control history. In particular, it does not include the names of +the (very much appreciated) contributors who reported issues to the Perl bug +tracker. + +Many of the changes included in this version originated in the CPAN modules +included in Perl's core. We're grateful to the entire CPAN community for +helping Perl to flourish. + +For a more complete list of all of Perl's historical contributors, please see +the F<AUTHORS> file in the Perl source distribution. + +=head1 Reporting Bugs + +If you find what you think is a bug, you might check the articles +recently posted to the comp.lang.perl.misc newsgroup and the perl +bug database at http://rt.perl.org/perlbug/ . There may also be +information at http://www.perl.org/ , the Perl Home Page. + +If you believe you have an unreported bug, please run the L<perlbug> +program included with your release. Be sure to trim your bug down +to a tiny but sufficient test case. Your bug report, along with the +output of C<perl -V>, will be sent off to perlbug@perl.org to be +analysed by the Perl porting team. + +If the bug you are reporting has security implications, which make it +inappropriate to send to a publicly archived mailing list, then please send +it to perl5-security-report@perl.org. This points to a closed subscription +unarchived mailing list, which includes +all the core committers, who will be able +to help assess the impact of issues, figure out a resolution, and help +co-ordinate the release of patches to mitigate or fix the problem across all +platforms on which Perl is supported. Please only use this address for +security issues in the Perl core, not for modules independently +distributed on CPAN. + +=head1 SEE ALSO + +The F<Changes> file for an explanation of how to view exhaustive details +on what changed. + +The F<INSTALL> file for how to build Perl. + +The F<README> file for general stuff. + +The F<Artistic> and F<Copying> files for copyright information. + +=cut diff --git a/pod/perldelta.pod b/pod/perldelta.pod index a20d73fd71..5d8418bcb5 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -5,16 +5,16 @@ [ this is a template for a new perldelta file. Any text flagged as XXX needs to be processed before release. ] -perldelta - what is new for perl v5.15.5 +perldelta - what is new for perl v5.15.6 =head1 DESCRIPTION -This document describes differences between the 5.15.4 release and -the 5.15.5 release. +This document describes differences between the 5.15.5 release and +the 5.15.6 release. -If you are upgrading from an earlier release such as 5.15.3, first read -L<perl5154delta>, which describes differences between 5.15.3 and -5.15.4. +If you are upgrading from an earlier release such as 5.15.4, first read +L<perl5155delta>, which describes differences between 5.15.4 and +5.15.5. =head1 Notice @@ -96,34 +96,7 @@ XXX =item * -L<Archive::Extract> has been upgraded from version 0.56 to version 0.58. - -=item * - -L<CPANPLUS::Dist::Build> has been upgraded from version 0.58 to version 0.60. - -=item * - -L<ExtUtils::MakeMaker> has been upgraded from version 6.61_01 to version 6.63_01. - -=item * - -L<HTTP::Tiny> has been upgraded from version 0.013 to version 0.016. - -Adds additional shorthand methods for all common HTTP verbs, -a C<post_form()> method for POST-ing x-www-form-urlencoded data and -a C<www_form_urlencode()> utility method. - -=item * - -L<perlfaq> has been upgraded from version 5.0150035 to version 5.0150036. - -=item * - -L<Unicode::Collate> has been upgraded from version 0.80 to version 0.81. - -Locales updated to CLDR 2.0: mk, mt, nb, nn, ro, ru. -Newly supported locales: ml, mr, or, pa. +L<XXX> has been upgraded from version 0.69 to version 0.70. =back @@ -382,7 +355,7 @@ here. XXX Generate this with: - perl Porting/acknowledgements.pl v5.15.4..HEAD + perl Porting/acknowledgements.pl v5.15.5..HEAD =head1 Reporting Bugs diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 6f2416a0a8..624835e463 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -232,11 +232,11 @@ spots. This is now heavily deprecated. =item assertion botched: %s -(P) The malloc package that comes with Perl had an internal failure. +(X) The malloc package that comes with Perl had an internal failure. =item Assertion failed: file "%s" -(P) A general assertion failed. The file in question must be examined. +(X) A general assertion failed. The file in question must be examined. =item Assignment to both a list and a scalar @@ -2463,6 +2463,19 @@ effective uids or gids failed. length/code combination tried to obtain more data. This results in an undefined value for the length. See L<perlfunc/pack>. +=item length() used on %s + +(W syntax) You used length() on either an array or a hash when you +probably wanted a count of the items. + +Array size can be obtained by doing: + + scalar(@array); + +The number of items in a hash can be obtained by doing: + + scalar(keys %hash); + =item Lexing code attempted to stuff non-Latin-1 character into Latin-1 input (F) An extension is attempting to insert text into the current parse @@ -4334,6 +4347,13 @@ But before sort was a keyword, people sometimes used it as a filehandle. (F) A sort comparison subroutine may not return a list value with more or less than one element. See L<perlfunc/sort>. +=item Source filters apply only to byte streams + +(F) You tried to activate a source filter (usually by loading a +source filter module) within a string passed to C<eval>. This is +not permitted under the C<unicode_eval> feature. Consider using +C<evalbytes> instead. See L<feature>. + =item splice() offset past end of array (W misc) You attempted to specify an offset that was past the end of @@ -5017,6 +5037,17 @@ See L<POSIX/FUNCTIONS> for more information. (F) You called a Win32 function with incorrect arguments. See L<Win32> for more information. +=item $[ used in %s (did you mean $] ?) + +(W syntax) You used C<$[> in a comparison, such as: + + if ($[ > 5.006) { + ... + } + +You probably meant to use C<$]> instead. C<$[> is the base for indexing +arrays. C<$]> is the Perl version number in decimal. + =item Useless assignment to a temporary (W misc) You assigned to an lvalue subroutine, but what @@ -5129,6 +5160,11 @@ you can write it as C<push(@tied_array,())> to avoid this warning. (F) The "use" keyword is recognized and executed at compile time, and returns no useful value. See L<perlmod>. +=item Use of assignment to $[ is deprecated + +(D deprecated) The C<$[> variable (index of the first element in an array) +is deprecated. See L<perlvar/"$[">. + =item Use of bare << to mean <<"" is deprecated (D deprecated) You are now encouraged to use the explicitly quoted diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 6a09a536d5..86770fd84a 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -161,7 +161,8 @@ C<umask>, C<unlink>, C<utime> =item Keywords related to the control flow of your Perl program X<control flow> -C<caller>, C<continue>, C<die>, C<do>, C<dump>, C<eval>, C<exit>, +C<caller>, C<continue>, C<die>, C<do>, +C<dump>, C<eval>, C<evalbytes> C<exit>, C<__FILE__>, C<goto>, C<last>, C<__LINE__>, C<next>, C<__PACKAGE__>, C<redo>, C<return>, C<sub>, C<wantarray>, @@ -186,7 +187,8 @@ L<feature>. Alternately, include a C<use v5.10> or later to the current scope. =item Miscellaneous functions -C<defined>, C<dump>, C<eval>, C<formline>, C<local>, C<my>, C<our>, +C<defined>, C<dump>, C<eval>, C<evalbytes>, +C<formline>, C<local>, C<my>, C<our>, C<reset>, C<scalar>, C<state>, C<undef>, C<wantarray> =item Functions for processes and process groups @@ -1634,6 +1636,17 @@ Note that the value is parsed every time the C<eval> executes. If EXPR is omitted, evaluates C<$_>. This form is typically used to delay parsing and subsequent execution of the text of EXPR until run time. +If the C<unicode_eval> feature is enabled (which is the default under a +C<use 5.16> or higher declaration), EXPR or C<$_> is treated as a string of +characters, so C<use utf8> declarations have no effect, and source filters +are forbidden. In the absence of the C<unicode_eval> feature, the string +will sometimes be treated as characters and sometimes as bytes, depending +on the internal encoding, and source filters activated within the C<eval> +exhibit the erratic, but historical, behaviour of affecting some outer file +scope that is still compiling. See also the L</evalbytes> keyword, which +always treats its input as a byte stream and works properly with source +filters, and the L<feature> pragma. + In the second form, the code within the BLOCK is parsed only once--at the same time the code surrounding the C<eval> itself was parsed--and executed within the context of the current Perl program. This form is typically @@ -1763,6 +1776,21 @@ surrounding lexical scope, but rather the scope of the first non-DB piece of code that called it. You don't normally need to worry about this unless you are writing a Perl debugger. +=item evalbytes EXPR +X<evalbytes> + +=item evalbytes + +This function is like L</eval> with a string argument, except it always +parses its argument, or C<$_> if EXPR is omitted, as a string of bytes. A +string containing characters whose ordinal value exceeds 255 results in an +error. Source filters activated within the evaluated code apply to the +code itself. + +This function is only available under the C<evalbytes> feature, a +C<use v5.16> (or higher) declaration, or with a C<CORE::> prefix. See +L<feature> for more information. + =item exec LIST X<exec> X<execute> @@ -2536,6 +2564,19 @@ Note that C<glob> splits its arguments on whitespace and treats each segment as separate pattern. As such, C<glob("*.c *.h")> matches all files with a F<.c> or F<.h> extension. The expression C<glob(".* *")> matches all files in the current working directory. +If you want to glob filenames that might contain whitespace, you'll +have to use extra quotes around the spacey filename to protect it. +For example, to glob filenames that have an C<e> followed by a space +followed by an C<f>, use either of: + + @spacies = <"*e f*">; + @spacies = glob '"*e f*"'; + @spacies = glob q("*e f*"); + +If you had to get a variable through, you could do this: + + @spacies = glob "'*${var}e f*'"; + @spacies = glob qq("*${var}e f*"); If non-empty braces are the only wildcard characters used in the C<glob>, no filenames are matched, but potentially many strings diff --git a/pod/perlhist.pod b/pod/perlhist.pod index 81f85526b5..97bf051cfc 100644 --- a/pod/perlhist.pod +++ b/pod/perlhist.pod @@ -468,6 +468,7 @@ the strings?). Ricardo 5.15.2 2011-Aug-20 Stevan 5.15.3 2011-Sep-20 Florian 5.15.4 2011-Oct-20 + Steve 5.15.5 2011-Nov-20 =head2 SELECTED RELEASE SIZES diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index a4d6516c55..0d8bd1ab9f 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -916,18 +916,29 @@ The negation is useful for defining (surprise!) negated classes. END } -Intersection is useful for getting the common characters matched by -two (or more) classes. +This will match all non-Unicode code points, since every one of them is +not in Kana. You can use intersection to exclude these, if desired, as +this modified example shows: - sub InFooAndBar { + sub InNotKana { return <<'END'; - +main::InFoo - &main::InBar + !utf8::InHiragana + -utf8::InKatakana + +utf8::IsCn + &utf8::Any END } -It's important to remember not to use "&" for the first set; that -would be intersecting with nothing, resulting in an empty set. +C<&utf8::Any> must be the last line in the definition. + +Intersection is used generally for getting the common characters matched +by two (or more) classes. It's important to remember not to use "&" for +the first set; that would be intersecting with nothing, resulting in an +empty set. + +(Note that official Unicode properties differ from these in that they +automatically exclude non-Unicode code points and a warning is raised if +a match is attempted on one of those.) =head2 User-Defined Case Mappings (for serious hackers only) @@ -1030,7 +1041,7 @@ Level 2 - Extended Unicode Support [11] have Unicode::Normalize but not integrated to regexes [12] have \X but we don't have a "Grapheme Cluster Mode" [14] see UAX#29, Word Boundaries - [15] see UAX#21 "Case Mappings" + [15] This is covered in Chapter 3.13 (in Unicode 6.0) =item * @@ -165,7 +165,7 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict, sv = SvRV(sv); if (SvTYPE(sv) == SVt_PVIO) { GV * const gv = MUTABLE_GV(sv_newmortal()); - gv_init(gv, 0, "", 0, 0); + gv_init(gv, 0, "$__ANONIO__", 11, 0); GvIOp(gv) = MUTABLE_IO(sv); SvREFCNT_inc_void_NN(sv); sv = MUTABLE_SV(gv); @@ -753,9 +753,11 @@ PP(pp_trans) } TARG = sv_newmortal(); if(PL_op->op_type == OP_TRANSR) { - SV * const newsv = newSVsv(sv); + STRLEN len; + const char * const pv = SvPV(sv,len); + SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv)); do_trans(newsv); - mPUSHs(newsv); + PUSHs(newsv); } else PUSHi(do_trans(sv)); RETURN; @@ -3430,63 +3432,15 @@ PP(pp_crypt) /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ -/* Below are several macros that generate code */ /* Generates code to store a unicode codepoint c that is known to occupy - * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */ -#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \ - STMT_START { \ - *(p) = UTF8_TWO_BYTE_HI(c); \ - *((p)+1) = UTF8_TWO_BYTE_LO(c); \ - } STMT_END - -/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next - * available byte after the two bytes */ + * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1, + * and p is advanced to point to the next available byte after the two bytes */ #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \ STMT_START { \ *(p)++ = UTF8_TWO_BYTE_HI(c); \ *((p)++) = UTF8_TWO_BYTE_LO(c); \ } STMT_END -/* Generates code to store the upper case of latin1 character l which is known - * to have its upper case be non-latin1 into the two bytes p and p+1. There - * are only two characters that fit this description, and this macro knows - * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC - * bytes */ -#define STORE_NON_LATIN1_UC(p, l) \ -STMT_START { \ - if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \ - STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \ - } else { /* Must be the following letter */ \ - STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \ - } \ -} STMT_END - -/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte - * after the character stored */ -#define CAT_NON_LATIN1_UC(p, l) \ -STMT_START { \ - if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \ - CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \ - } else { \ - CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \ - } \ -} STMT_END - -/* Generates code to add the two UTF-8 bytes (probably u) that are the upper - * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l), - * and must require two bytes to store it. Advances p to point to the next - * available position */ -#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \ -STMT_START { \ - if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \ - CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \ - } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \ - *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \ - } else {/* else is one of the other two special cases */ \ - CAT_NON_LATIN1_UC((p), (l)); \ - } \ -} STMT_END - PP(pp_ucfirst) { /* Actually is both lcfirst() and ucfirst(). Only the first character @@ -3534,96 +3488,22 @@ PP(pp_ucfirst) if (! slen) { /* If empty */ need = 1; /* still need a trailing NUL */ + ulen = 0; } else if (DO_UTF8(source)) { /* Is the source utf8? */ doing_utf8 = TRUE; + ulen = UTF8SKIP(s); + if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen); + else toLOWER_utf8(s, tmpbuf, &tculen); - if (UTF8_IS_INVARIANT(*s)) { - - /* An invariant source character is either ASCII or, in EBCDIC, an - * ASCII equivalent or a caseless C1 control. In both these cases, - * the lower and upper cases of any character are also invariants - * (and title case is the same as upper case). So it is safe to - * use the simple case change macros which avoid the overhead of - * the general functions. Note that if perl were to be extended to - * do locale handling in UTF-8 strings, this wouldn't be true in, - * for example, Lithuanian or Turkic. */ - *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s); - tculen = ulen = 1; - need = slen + 1; - } - else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { - U8 chr; - - /* Similarly, if the source character isn't invariant but is in the - * latin1 range (or EBCDIC equivalent thereof), we have the case - * changes compiled into perl, and can avoid the overhead of the - * general functions. In this range, the characters are stored as - * two UTF-8 bytes, and it so happens that any changed-case version - * is also two bytes (in both ASCIIish and EBCDIC machines). */ - tculen = ulen = 2; - need = slen + 1; - - /* Convert the two source bytes to a single Unicode code point - * value, change case and save for below */ - chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)); - if (op_type == OP_LCFIRST) { /* lower casing is easy */ - U8 lower = toLOWER_LATIN1(chr); - STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower); - } - else { /* ucfirst */ - U8 upper = toUPPER_LATIN1_MOD(chr); - - /* Most of the latin1 range characters are well-behaved. Their - * title and upper cases are the same, and are also in the - * latin1 range. The macro above returns their upper (hence - * title) case, and all that need be done is to save the result - * for below. However, several characters are problematic, and - * have to be handled specially. The MOD in the macro name - * above means that these tricky characters all get mapped to - * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS. - * This mapping saves some tests for the majority of the - * characters */ - - if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { - - /* Not tricky. Just save it. */ - STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper); - } - else if (chr == LATIN_SMALL_LETTER_SHARP_S) { - - /* This one is tricky because it is two characters long, - * though the UTF-8 is still two bytes, so the stored - * length doesn't change */ - *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */ - *(tmpbuf + 1) = 's'; - } - else { - - /* The other two have their title and upper cases the same, - * but are tricky because the changed-case characters - * aren't in the latin1 range. They, however, do fit into - * two UTF-8 bytes */ - STORE_NON_LATIN1_UC(tmpbuf, chr); - } - } - } - else { - - /* Here, can't short-cut the general case */ - - utf8_to_uvchr(s, &ulen); - if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen); - else toLOWER_utf8(s, tmpbuf, &tculen); - - /* we can't do in-place if the length changes. */ - if (ulen != tculen) inplace = FALSE; - need = slen + 1 - ulen + tculen; - } + /* we can't do in-place if the length changes. */ + if (ulen != tculen) inplace = FALSE; + need = slen + 1 - ulen + tculen; } else { /* Non-zero length, non-UTF-8, Need to consider locale and if * latin1 is treated as caseless. Note that a locale takes * precedence */ + ulen = 1; /* Original character is 1 byte */ tculen = 1; /* Most characters will require one byte, but this will * need to be overridden for the tricky ones */ need = slen + 1; @@ -3646,44 +3526,42 @@ PP(pp_ucfirst) * native function does */ } else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */ - *tmpbuf = toUPPER_LATIN1_MOD(*s); - - /* tmpbuf now has the correct title case for all latin1 characters - * except for the several ones that have tricky handling. All - * of these are mapped by the MOD to the letter below. */ - if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { - - /* The length is going to change, with all three of these, so - * can't replace just the first character */ - inplace = FALSE; - - /* We use the original to distinguish between these tricky - * cases */ - if (*s == LATIN_SMALL_LETTER_SHARP_S) { - /* Two character title case 'Ss', but can remain non-UTF-8 */ - need = slen + 2; - *tmpbuf = 'S'; - *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */ - tculen = 2; + UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's'); + if (tculen > 1) { + assert(tculen == 2); + + /* If the result is an upper Latin1-range character, it can + * still be represented in one byte, which is its ordinal */ + if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) { + *tmpbuf = (U8) title_ord; + tculen = 1; } else { - - /* The other two tricky ones have their title case outside - * latin1. It is the same as their upper case. */ - doing_utf8 = TRUE; - STORE_NON_LATIN1_UC(tmpbuf, *s); - - /* The UTF-8 and UTF-EBCDIC lengths of both these characters - * and their upper cases is 2. */ - tculen = ulen = 2; - - /* The entire result will have to be in UTF-8. Assume worst - * case sizing in conversion. (all latin1 characters occupy - * at most two bytes in utf8) */ - convert_source_to_utf8 = TRUE; - need = slen * 2 + 1; + /* Otherwise it became more than one ASCII character (in + * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to + * beyond Latin1, so the number of bytes changed, so can't + * replace just the first character in place. */ + inplace = FALSE; + + /* If the result won't fit in a byte, the entire result will + * have to be in UTF-8. Assume worst case sizing in + * conversion. (all latin1 characters occupy at most two bytes + * in utf8) */ + if (title_ord > 255) { + doing_utf8 = TRUE; + convert_source_to_utf8 = TRUE; + need = slen * 2 + 1; + + /* The (converted) UTF-8 and UTF-EBCDIC lengths of all + * (both) characters whose title case is above 255 is + * 2. */ + ulen = 2; + } + else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */ + need = slen + 1 + 1; + } } - } /* End of is one of the three special chars */ + } } /* End of use Unicode (Latin1) semantics */ } /* End of changing the case of the first character */ @@ -3867,63 +3745,47 @@ PP(pp_uc) bool in_iota_subscript = FALSE; while (s < send) { + STRLEN u; + STRLEN ulen; + UV uv; if (in_iota_subscript && ! is_utf8_mark(s)) { + /* A non-mark. Time to output the iota subscript */ #define GREEK_CAPITAL_LETTER_IOTA 0x0399 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); in_iota_subscript = FALSE; - } - - /* If the UTF-8 character is invariant, then it is in the range - * known by the standard macro; result is only one byte long */ - if (UTF8_IS_INVARIANT(*s)) { - *d++ = toUPPER(*s); - s++; - } - else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { - - /* Likewise, if it fits in a byte, its case change is in our - * table */ - U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)); - U8 upper = toUPPER_LATIN1_MOD(orig); - CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper); - s += 2; - } - else { - - /* Otherwise, need the general UTF-8 case. Get the changed - * case value and copy it to the output buffer */ + } - const STRLEN u = UTF8SKIP(s); - STRLEN ulen; + /* Then handle the current character. Get the changed case value + * and copy it to the output buffer */ - const UV uv = toUPPER_utf8(s, tmpbuf, &ulen); - if (uv == GREEK_CAPITAL_LETTER_IOTA - && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI) - { - in_iota_subscript = TRUE; - } - else { - if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { - /* If the eventually required minimum size outgrows - * the available space, we need to grow. */ - const UV o = d - (U8*)SvPVX_const(dest); - - /* If someone uppercases one million U+03B0s we - * SvGROW() one million times. Or we could try - * guessing how much to allocate without allocating too - * much. Such is life. See corresponding comment in - * lc code for another option */ - SvGROW(dest, min); - d = (U8*)SvPVX(dest) + o; - } - Copy(tmpbuf, d, ulen, U8); - d += ulen; - } - s += u; - } + u = UTF8SKIP(s); + uv = toUPPER_utf8(s, tmpbuf, &ulen); + if (uv == GREEK_CAPITAL_LETTER_IOTA + && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI) + { + in_iota_subscript = TRUE; + } + else { + if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { + /* If the eventually required minimum size outgrows the + * available space, we need to grow. */ + const UV o = d - (U8*)SvPVX_const(dest); + + /* If someone uppercases one million U+03B0s we SvGROW() + * one million times. Or we could try guessing how much to + * allocate without allocating too much. Such is life. + * See corresponding comment in lc code for another option + * */ + SvGROW(dest, min); + d = (U8*)SvPVX(dest) + o; + } + Copy(tmpbuf, d, ulen, U8); + d += ulen; + } + s += u; } if (in_iota_subscript) { CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); @@ -3953,7 +3815,7 @@ PP(pp_uc) else { for (; s < send; d++, s++) { *d = toUPPER_LATIN1_MOD(*s); - if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue; + if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) continue; /* The mainstream case is the tight loop above. To avoid * extra tests in that, all three characters that require @@ -4014,23 +3876,13 @@ PP(pp_uc) (send -s) * 2 + 1); d = (U8*)SvPVX(dest) + len; - /* And append the current character's upper case in UTF-8 */ - CAT_NON_LATIN1_UC(d, *s); - /* Now process the remainder of the source, converting to * upper and UTF-8. If a resulting byte is invariant in * UTF-8, output it as-is, otherwise convert to UTF-8 and * append it to the output. */ - - s++; for (; s < send; s++) { - U8 upper = toUPPER_LATIN1_MOD(*s); - if UTF8_IS_INVARIANT(upper) { - *d++ = upper; - } - else { - CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper); - } + (void) _to_upper_title_latin1(*s, d, &len, 'S'); + d += len; } /* Here have processed the whole source; no need to continue @@ -4111,55 +3963,35 @@ PP(pp_lc) U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; while (s < send) { - if (UTF8_IS_INVARIANT(*s)) { - - /* Invariant characters use the standard mappings compiled in. - */ - *d++ = toLOWER(*s); - s++; - } - else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { - - /* As do the ones in the Latin1 range */ - U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))); - CAT_UNI_TO_UTF8_TWO_BYTE(d, lower); - s += 2; - } - else { - /* Here, is utf8 not in Latin-1 range, have to go out and get - * the mappings from the tables. */ + const STRLEN u = UTF8SKIP(s); + STRLEN ulen; - const STRLEN u = UTF8SKIP(s); - STRLEN ulen; + toLOWER_utf8(s, tmpbuf, &ulen); - toLOWER_utf8(s, tmpbuf, &ulen); - - /* Here is where we would do context-sensitive actions. See - * the commit message for this comment for why there isn't any - */ + /* Here is where we would do context-sensitive actions. See the + * commit message for this comment for why there isn't any */ - if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { + if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { - /* If the eventually required minimum size outgrows the - * available space, we need to grow. */ - const UV o = d - (U8*)SvPVX_const(dest); + /* If the eventually required minimum size outgrows the + * available space, we need to grow. */ + const UV o = d - (U8*)SvPVX_const(dest); - /* If someone lowercases one million U+0130s we SvGROW() - * one million times. Or we could try guessing how much to - * allocate without allocating too much. Such is life. - * Another option would be to grow an extra byte or two - * more each time we need to grow, which would cut down the - * million to 500K, with little waste */ - SvGROW(dest, min); - d = (U8*)SvPVX(dest) + o; - } - - /* Copy the newly lowercased letter to the output buffer we're - * building */ - Copy(tmpbuf, d, ulen, U8); - d += ulen; - s += u; + /* If someone lowercases one million U+0130s we SvGROW() one + * million times. Or we could try guessing how much to + * allocate without allocating too much. Such is life. + * Another option would be to grow an extra byte or two more + * each time we need to grow, which would cut down the million + * to 500K, with little waste */ + SvGROW(dest, min); + d = (U8*)SvPVX(dest) + o; } + + /* Copy the newly lowercased letter to the output buffer we're + * building */ + Copy(tmpbuf, d, ulen, U8); + d += ulen; + s += u; } /* End of looping through the source string */ SvUTF8_on(dest); *d = '\0'; @@ -5858,7 +5690,7 @@ PP(pp_coreargs) /* Reset the stack pointer. Without this, we end up returning our own arguments in list context, in addition to the values we are supposed to return. nextstate usually does this on sub entry, but we need - to run the next op with the caller’s hints, so we cannot have a + to run the next op with the caller's hints, so we cannot have a nextstate. */ SP = PL_stack_base + cxstack[cxstack_ix].blk_oldsp; @@ -2884,8 +2884,8 @@ PP(pp_goto) SAVEFREESV(cv); /* later, undo the 'avoid premature free' hack */ if (CvISXSUB(cv)) { OP* const retop = cx->blk_sub.retop; - SV **newsp __attribute__unused__; - I32 gimme __attribute__unused__; + SV **newsp PERL_UNUSED_DECL; + I32 gimme PERL_UNUSED_DECL; if (reified) { I32 index; for (index=0; index<items; index++) @@ -3352,9 +3352,9 @@ Perl_sv_compile_2op_is_broken(pTHX_ SV *sv, OP **startop, const char *code, CATCH_SET(TRUE); if (runtime) - (void) doeval(G_SCALAR, startop, runcv, PL_curcop->cop_seq); + (void) doeval(G_SCALAR, startop, runcv, PL_curcop->cop_seq, NULL); else - (void) doeval(G_SCALAR, startop, PL_compcv, PL_cop_seqmax); + (void) doeval(G_SCALAR, startop, PL_compcv, PL_cop_seqmax, NULL); CATCH_SET(need_catch); POPBLOCK(cx,PL_curpm); POPEVAL(cx); @@ -3410,7 +3410,7 @@ Perl_find_runcv(pTHX_ U32 *db_seqp) return cv; } else if (CxTYPE(cx) == CXt_EVAL && !CxTRYBLOCK(cx)) - return PL_compcv; + return cx->blk_eval.cv; } } return PL_main_cv; @@ -3455,13 +3455,22 @@ S_try_yyparse(pTHX_ int gramtype) * pushes undef (also croaks if startop != NULL). */ +/* This function is called from three places, sv_compile_2op, pp_return + * and pp_entereval. These can be distinguished as follows: + * sv_compile_2op - startop is non-null + * pp_require - startop is null; in_require is true + * pp_entereval - stortop is null; in_require is false + */ + STATIC bool -S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq) +S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq, HV *hh) { dVAR; dSP; OP * const saveop = PL_op; + COP * const oldcurcop = PL_curcop; bool in_require = (saveop && saveop->op_type == OP_REQUIRE); int yystatus; + CV *evalcv; PL_in_eval = (in_require ? (EVAL_INREQUIRE | (PL_in_eval & EVAL_INEVAL)) @@ -3469,24 +3478,23 @@ S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq) PUSHMARK(SP); - SAVESPTR(PL_compcv); - PL_compcv = MUTABLE_CV(newSV_type(SVt_PVCV)); - CvEVAL_on(PL_compcv); + evalcv = MUTABLE_CV(newSV_type(SVt_PVCV)); + CvEVAL_on(evalcv); assert(CxTYPE(&cxstack[cxstack_ix]) == CXt_EVAL); - cxstack[cxstack_ix].blk_eval.cv = PL_compcv; + cxstack[cxstack_ix].blk_eval.cv = evalcv; cxstack[cxstack_ix].blk_gimme = gimme; - CvOUTSIDE_SEQ(PL_compcv) = seq; - CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outside)); + CvOUTSIDE_SEQ(evalcv) = seq; + CvOUTSIDE(evalcv) = MUTABLE_CV(SvREFCNT_inc_simple(outside)); /* set up a scratch pad */ - CvPADLIST(PL_compcv) = pad_new(padnew_SAVE); + CvPADLIST(evalcv) = pad_new(padnew_SAVE); PL_op = NULL; /* avoid PL_op and PL_curpad referring to different CVs */ if (!PL_madskills) - SAVEMORTALIZESV(PL_compcv); /* must remain until end of current statement */ + SAVEMORTALIZESV(evalcv); /* must remain until end of current statement */ /* make sure we compile in the right package */ @@ -3507,6 +3515,10 @@ S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq) PL_madskills = 0; #endif + if (!startop) ENTER_with_name("evalcomp"); + SAVESPTR(PL_compcv); + PL_compcv = evalcv; + /* try to compile it */ PL_eval_root = NULL; @@ -3516,6 +3528,48 @@ S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq) else CLEAR_ERRSV(); + if (!startop) { + SAVEHINTS(); + if (in_require) { + PL_hints = 0; + hv_clear(GvHV(PL_hintgv)); + } + else { + PL_hints = saveop->op_private & OPpEVAL_COPHH + ? oldcurcop->cop_hints : saveop->op_targ; + if (hh) { + /* SAVEHINTS created a new HV in PL_hintgv, which we need to GC */ + SvREFCNT_dec(GvHV(PL_hintgv)); + GvHV(PL_hintgv) = hh; + } + } + SAVECOMPILEWARNINGS(); + if (in_require) { + if (PL_dowarn & G_WARN_ALL_ON) + PL_compiling.cop_warnings = pWARN_ALL ; + else if (PL_dowarn & G_WARN_ALL_OFF) + PL_compiling.cop_warnings = pWARN_NONE ; + else + PL_compiling.cop_warnings = pWARN_STD ; + } + else { + PL_compiling.cop_warnings = + DUP_WARNINGS(oldcurcop->cop_warnings); + cophh_free(CopHINTHASH_get(&PL_compiling)); + if (Perl_cop_fetch_label(aTHX_ oldcurcop, NULL, NULL)) { + /* The label, if present, is the first entry on the chain. So rather + than writing a blank label in front of it (which involves an + allocation), just use the next entry in the chain. */ + PL_compiling.cop_hints_hash + = cophh_copy(oldcurcop->cop_hints_hash->refcounted_he_next); + /* Check the assumption that this removed the label. */ + assert(Perl_cop_fetch_label(aTHX_ &PL_compiling, NULL, NULL) == NULL); + } + else + PL_compiling.cop_hints_hash = cophh_copy(oldcurcop->cop_hints_hash); + } + } + CALL_BLOCK_HOOKS(bhk_eval, saveop); /* note that yyparse() may raise an exception, e.g. C<BEGIN{die}>, @@ -3523,6 +3577,8 @@ S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq) yystatus = (!in_require && CATCH_GET) ? S_try_yyparse(aTHX_ GRAMPROG) : yyparse(GRAMPROG); + if (!startop && yystatus != 3) LEAVE_with_name("evalcomp"); + if (yystatus || PL_parser->error_count || !PL_eval_root) { SV **newsp; /* Used by POPBLOCK. */ PERL_CONTEXT *cx; @@ -3548,9 +3604,8 @@ S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq) POPEVAL(cx); namesv = cx->blk_eval.old_namesv; } - } - if (yystatus != 3) LEAVE_with_name("eval"); /* pp_entereval knows about this LEAVE. */ + } if (in_require) { if (!cx) { @@ -3616,7 +3671,7 @@ S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq) /* compiled okay, so do it */ - CvDEPTH(PL_compcv) = 1; + CvDEPTH(evalcv) = 1; SP = PL_stack_base + POPMARK; /* pop original mark */ PL_op = saveop; /* The caller may need it. */ PL_parser->lex_state = LEX_NOTPARSING; /* $^S needs this. */ @@ -4052,18 +4107,6 @@ PP(pp_require) CopFILE_set(&PL_compiling, tryname); lex_start(NULL, tryrsfp, 0); - SAVEHINTS(); - PL_hints = 0; - hv_clear(GvHV(PL_hintgv)); - - SAVECOMPILEWARNINGS(); - if (PL_dowarn & G_WARN_ALL_ON) - PL_compiling.cop_warnings = pWARN_ALL ; - else if (PL_dowarn & G_WARN_ALL_OFF) - PL_compiling.cop_warnings = pWARN_NONE ; - else - PL_compiling.cop_warnings = pWARN_STD ; - if (filter_sub || filter_cache) { /* We can use the SvPV of the filter PVIO itself as our cache, rather than hanging another SV from it. In turn, filter_add() optionally @@ -4089,7 +4132,7 @@ PP(pp_require) encoding = PL_encoding; PL_encoding = NULL; - if (doeval(gimme, NULL, NULL, PL_curcop->cop_seq)) + if (doeval(gimme, NULL, NULL, PL_curcop->cop_seq, NULL)) op = DOCATCH(PL_eval_start); else op = PL_op->op_next; @@ -4125,12 +4168,20 @@ PP(pp_entereval) char *tmpbuf = tbuf; STRLEN len; CV* runcv; - U32 seq; + U32 seq, lex_flags = 0; HV *saved_hh = NULL; + const bool bytes = PL_op->op_private & OPpEVAL_BYTES; if (PL_op->op_private & OPpEVAL_HAS_HH) { saved_hh = MUTABLE_HV(SvREFCNT_inc(POPs)); } + else if (PL_hints & HINT_LOCALIZE_HH || ( + PL_op->op_private & OPpEVAL_COPHH + && PL_curcop->cop_hints & HINT_LOCALIZE_HH + )) { + saved_hh = cop_hints_2hv(PL_curcop, 0); + hv_magic(saved_hh, NULL, PERL_MAGIC_hints); + } sv = POPs; if (!SvPOK(sv)) { /* make sure we've got a plain PV (no overload etc) before testing @@ -4140,13 +4191,29 @@ PP(pp_entereval) const char * const p = SvPV_const(sv, len); sv = newSVpvn_flags(p, len, SVs_TEMP | SvUTF8(sv)); + lex_flags |= LEX_START_COPIED; + + if (bytes && SvUTF8(sv)) + SvPVbyte_force(sv, len); + } + else if (bytes && SvUTF8(sv)) { + /* Don't modify someone else's scalar */ + STRLEN len; + sv = newSVsv(sv); + (void)sv_2mortal(sv); + SvPVbyte_force(sv,len); + lex_flags |= LEX_START_COPIED; } TAINT_IF(SvTAINTED(sv)); TAINT_PROPER("eval"); ENTER_with_name("eval"); - lex_start(sv, NULL, LEX_START_SAME_FILTER); + lex_start(sv, NULL, lex_flags | (PL_op->op_private & OPpEVAL_UNICODE + ? LEX_IGNORE_UTF8_HINTS + : bytes ? LEX_EVALBYTES : LEX_START_SAME_FILTER + ) + ); SAVETMPS; /* switch to eval mode */ @@ -4165,32 +4232,6 @@ PP(pp_entereval) CopFILE_set(&PL_compiling, tmpbuf+2); SAVECOPLINE(&PL_compiling); CopLINE_set(&PL_compiling, 1); - /* XXX For C<eval "...">s within BEGIN {} blocks, this ends up - deleting the eval's FILEGV from the stash before gv_check() runs - (i.e. before run-time proper). To work around the coredump that - ensues, we always turn GvMULTI_on for any globals that were - introduced within evals. See force_ident(). GSAR 96-10-12 */ - SAVEHINTS(); - PL_hints = PL_op->op_targ; - if (saved_hh) { - /* SAVEHINTS created a new HV in PL_hintgv, which we need to GC */ - SvREFCNT_dec(GvHV(PL_hintgv)); - GvHV(PL_hintgv) = saved_hh; - } - SAVECOMPILEWARNINGS(); - PL_compiling.cop_warnings = DUP_WARNINGS(PL_curcop->cop_warnings); - cophh_free(CopHINTHASH_get(&PL_compiling)); - if (Perl_cop_fetch_label(aTHX_ PL_curcop, NULL, NULL)) { - /* The label, if present, is the first entry on the chain. So rather - than writing a blank label in front of it (which involves an - allocation), just use the next entry in the chain. */ - PL_compiling.cop_hints_hash - = cophh_copy(PL_curcop->cop_hints_hash->refcounted_he_next); - /* Check the assumption that this removed the label. */ - assert(Perl_cop_fetch_label(aTHX_ &PL_compiling, NULL, NULL) == NULL); - } - else - PL_compiling.cop_hints_hash = cophh_copy(PL_curcop->cop_hints_hash); /* special case: an eval '' executed within the DB package gets lexically * placed in the first non-DB CV rather than the current CV - this * allows the debugger to execute code, find lexicals etc, in the @@ -4207,6 +4248,11 @@ PP(pp_entereval) if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash) save_lines(CopFILEAV(&PL_compiling), PL_parser->linestr); else { + /* XXX For C<eval "...">s within BEGIN {} blocks, this ends up + deleting the eval's FILEGV from the stash before gv_check() runs + (i.e. before run-time proper). To work around the coredump that + ensues, we always turn GvMULTI_on for any globals that were + introduced within evals. See force_ident(). GSAR 96-10-12 */ char *const safestr = savepvn(tmpbuf, len); SAVEDELETE(PL_defstash, safestr, len); saved_delete = TRUE; @@ -4214,7 +4260,7 @@ PP(pp_entereval) PUTBACK; - if (doeval(gimme, NULL, runcv, seq)) { + if (doeval(gimme, NULL, runcv, seq, saved_hh)) { if (was != PL_breakable_sub_gen /* Some subs defined here. */ ? (PERLDB_LINE || PERLDB_SAVESRC) : PERLDB_SAVESRC_NOSUBS) { @@ -4249,12 +4295,14 @@ PP(pp_leaveeval) const U8 save_flags = PL_op -> op_flags; I32 optype; SV *namesv; + CV *evalcv; PERL_ASYNC_CHECK(); POPBLOCK(cx,newpm); POPEVAL(cx); namesv = cx->blk_eval.old_namesv; retop = cx->blk_eval.retop; + evalcv = cx->blk_eval.cv; TAINT_NOT; SP = adjust_stack_on_leave((gimme == G_VOID) ? SP : newsp, SP, newsp, @@ -4262,9 +4310,9 @@ PP(pp_leaveeval) PL_curpm = newpm; /* Don't pop $1 et al till now */ #ifdef DEBUGGING - assert(CvDEPTH(PL_compcv) == 1); + assert(CvDEPTH(evalcv) == 1); #endif - CvDEPTH(PL_compcv) = 0; + CvDEPTH(evalcv) = 0; if (optype == OP_REQUIRE && !(gimme == G_SCALAR ? SvTRUE(*SP) : SP > newsp)) @@ -2549,8 +2549,6 @@ PP(pp_entersub) switch (SvTYPE(sv)) { /* This is overwhelming the most common case: */ case SVt_PVGV: - if (!isGV_with_GP(sv)) - DIE(aTHX_ "Not a CODE reference"); we_have_a_glob: if (!(cv = GvCVu((const GV *)sv))) { HV *stash; @@ -1765,6 +1765,7 @@ S_sortcv(pTHX_ SV *const a, SV *const b) I32 result; PMOP * const pm = PL_curpm; OP * const sortop = PL_op; + COP * const cop = PL_curcop; SV **pad; PERL_ARGS_ASSERT_SORTCV; @@ -1777,6 +1778,7 @@ S_sortcv(pTHX_ SV *const a, SV *const b) if (PL_stack_sp != PL_stack_base + 1) Perl_croak(aTHX_ "Sort subroutine didn't return single value"); PL_op = sortop; + PL_curcop = cop; pad = PL_curpad; PL_curpad = 0; result = SvIV(*PL_stack_sp); PL_curpad = pad; @@ -1798,6 +1800,7 @@ S_sortcv_stacked(pTHX_ SV *const a, SV *const b) AV * const av = GvAV(PL_defgv); PMOP * const pm = PL_curpm; OP * const sortop = PL_op; + COP * const cop = PL_curcop; SV **pad; PERL_ARGS_ASSERT_SORTCV_STACKED; @@ -1830,6 +1833,7 @@ S_sortcv_stacked(pTHX_ SV *const a, SV *const b) if (PL_stack_sp != PL_stack_base + 1) Perl_croak(aTHX_ "Sort subroutine didn't return single value"); PL_op = sortop; + PL_curcop = cop; pad = PL_curpad; PL_curpad = 0; result = SvIV(*PL_stack_sp); PL_curpad = pad; @@ -1881,7 +1885,7 @@ S_sv_ncmp(pTHX_ SV *const a, SV *const b) PERL_ARGS_ASSERT_SV_NCMP; #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) - if (Perl_isnan(right) || Perl_isnan(left)) { + if (Perl_isnan(nv1) || Perl_isnan(nv2)) { #else if (nv1 != nv1 || nv2 != nv2) { #endif @@ -355,9 +355,9 @@ PP(pp_glob) dVAR; OP *result; dSP; - /* make a copy of the pattern, to ensure that magic is called once - * and only once */ - TOPm1s = sv_2mortal(newSVsv(TOPm1s)); + /* make a copy of the pattern if it is gmagical, to ensure that magic + * is called once and only once */ + if (SvGMAGICAL(TOPm1s)) TOPm1s = sv_2mortal(newSVsv(TOPm1s)); tryAMAGICunTARGET(iter_amg, -1, (PL_op->op_flags & OPf_SPECIAL)); @@ -290,6 +290,12 @@ PERL_CALLCONV OP * Perl_ck_chdir(pTHX_ OP *o) #define PERL_ARGS_ASSERT_CK_CHDIR \ assert(o) +PERL_CALLCONV OP * Perl_ck_cmp(pTHX_ OP *o) + __attribute__warn_unused_result__ + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_CK_CMP \ + assert(o) + PERL_CALLCONV OP * Perl_ck_concat(pTHX_ OP *o) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1); @@ -412,6 +418,12 @@ PERL_CALLCONV OP * Perl_ck_join(pTHX_ OP *o) #define PERL_ARGS_ASSERT_CK_JOIN \ assert(o) +PERL_CALLCONV OP * Perl_ck_length(pTHX_ OP *o) + __attribute__warn_unused_result__ + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_CK_LENGTH \ + assert(o) + PERL_CALLCONV OP * Perl_ck_lfun(pTHX_ OP *o) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1); @@ -2528,7 +2540,7 @@ PERL_CALLCONV OP* Perl_newCONDOP(pTHX_ I32 flags, OP* first, OP* trueop, OP* fal assert(first) PERL_CALLCONV CV* Perl_newCONSTSUB(pTHX_ HV* stash, const char* name, SV* sv); -PERL_CALLCONV CV* Perl_newCONSTSUB_flags(pTHX_ HV* stash, const char* name, U32 flags, SV* sv); +PERL_CALLCONV CV* Perl_newCONSTSUB_flags(pTHX_ HV* stash, const char* name, STRLEN len, U32 flags, SV* sv); PERL_CALLCONV OP* Perl_newCVREF(pTHX_ I32 flags, OP* o) __attribute__malloc__ __attribute__warn_unused_result__; @@ -2769,6 +2781,12 @@ PERL_CALLCONV CV * Perl_newXS_flags(pTHX_ const char *name, XSUBADDR_t subaddr, #define PERL_ARGS_ASSERT_NEWXS_FLAGS \ assert(subaddr); assert(filename) +PERL_CALLCONV CV * Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, XSUBADDR_t subaddr, const char *const filename, const char *const proto, U32 flags) + __attribute__nonnull__(pTHX_3) + __attribute__nonnull__(pTHX_4); +#define PERL_ARGS_ASSERT_NEWXS_LEN_FLAGS \ + assert(subaddr); assert(filename) + PERL_CALLCONV void Perl_new_collate(pTHX_ const char* newcoll); PERL_CALLCONV void Perl_new_ctype(pTHX_ const char* newctype) __attribute__nonnull__(pTHX_1); @@ -5426,7 +5444,7 @@ STATIC void S_xmldump_attr(pTHX_ I32 level, PerlIO *file, const char* pat, ...) # endif #endif -#if defined(PERL_IN_DUMP_C) || defined(PERL_IN_HV_C) || defined(PERL_IN_SV_C) +#if defined(PERL_IN_DUMP_C) || defined(PERL_IN_HV_C) || defined(PERL_IN_SV_C) || defined(PERL_IN_SCOPE_C) PERL_CALLCONV void Perl_hv_kill_backrefs(pTHX_ HV *hv) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_HV_KILL_BACKREFS \ @@ -5903,7 +5921,7 @@ STATIC OP* S_do_smartmatch(pTHX_ HV* seen_this, HV* seen_other, const bool copie STATIC OP* S_docatch(pTHX_ OP *o) __attribute__warn_unused_result__; -STATIC bool S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq); +STATIC bool S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq, HV* hh); STATIC OP* S_dofindlabel(pTHX_ OP *o, const char *label, OP **opstack, OP **oplimit) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1) @@ -6730,9 +6748,6 @@ STATIC void S_utf8_mg_pos_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, co #define PERL_ARGS_ASSERT_UTF8_MG_POS_CACHE_UPDATE \ assert(sv); assert(mgp) -STATIC SV * S_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ, const SV *const keyname, I32 aindex, int subscript_type) - __attribute__warn_unused_result__; - STATIC I32 S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_VISIT \ @@ -6769,6 +6784,11 @@ STATIC void S_unreferenced_to_tmp_stack(pTHX_ AV *const unreferenced) # endif #endif +#if defined(PERL_IN_SV_C) || defined (PERL_IN_OP_C) +PERL_CALLCONV SV * Perl_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ, const SV *const keyname, I32 aindex, int subscript_type) + __attribute__warn_unused_result__; + +#endif #if defined(PERL_IN_TOKE_C) STATIC int S_ao(pTHX_ int toketype); STATIC void S_check_uni(pTHX); @@ -6977,6 +6997,12 @@ STATIC bool S_isa_lookup(pTHX_ HV *stash, const char * const name, STRLEN len, U #endif #if defined(PERL_IN_UTF8_C) +PERL_CALLCONV UV Perl__to_fold_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const U8 flags) + __attribute__nonnull__(pTHX_2) + __attribute__nonnull__(pTHX_3); +#define PERL_ARGS_ASSERT__TO_FOLD_LATIN1 \ + assert(p); assert(lenp) + STATIC STRLEN S_is_utf8_char_slow(const U8 *s, const STRLEN len) __attribute__warn_unused_result__ __attribute__nonnull__(1); @@ -6997,6 +7023,17 @@ STATIC SV* S_swash_get(pTHX_ SV* swash, UV start, UV span) #define PERL_ARGS_ASSERT_SWASH_GET \ assert(swash) +STATIC U8 S_to_lower_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp) + __attribute__warn_unused_result__; + +#endif +#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) +PERL_CALLCONV UV Perl__to_upper_title_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const char S_or_s) + __attribute__nonnull__(pTHX_2) + __attribute__nonnull__(pTHX_3); +#define PERL_ARGS_ASSERT__TO_UPPER_TITLE_LATIN1 \ + assert(p); assert(lenp) + #endif #if defined(PERL_IN_UTIL_C) STATIC bool S_ckwarn_common(pTHX_ U32 w); @@ -1387,8 +1387,8 @@ is the recommended Unicode-aware way of saying scan += len; \ len = 0; \ } else { \ - uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\ - uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \ + len = UTF8SKIP(uc);\ + uvc = to_utf8_fold( uc, foldbuf, &foldlen); \ foldlen -= UNISKIP( uvc ); \ scan = foldbuf + UNISKIP( uvc ); \ } \ @@ -4523,7 +4523,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags) struct regexp *r; register regexp_internal *ri; STRLEN plen; - char *exp; + char* VOL exp; char* xend; regnode *scan; I32 flags; @@ -10454,7 +10454,11 @@ parseit: if (! PL_utf8_tofold) { U8 dummy[UTF8_MAXBYTES+1]; STRLEN dummy_len; - to_utf8_fold((U8*) "A", dummy, &dummy_len); + + /* This particular string is above \xff in both UTF-8 and + * UTFEBCDIC */ + to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len); + assert(PL_utf8_tofold); /* Verify that worked */ } PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold); } @@ -837,7 +837,7 @@ re.pm, especially to the documentation. #ifdef DEBUGGING #define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags \ - __attribute__unused__ = 0; GET_RE_DEBUG_FLAGS; + PERL_UNUSED_DECL = 0; GET_RE_DEBUG_FLAGS; #define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) \ const char * const rpv = \ diff --git a/regen/keywords.pl b/regen/keywords.pl index 5f7f1ef851..c4cd187273 100755 --- a/regen/keywords.pl +++ b/regen/keywords.pl @@ -45,6 +45,8 @@ my %feature_kw = ( say => 'say', state => 'state', + + evalbytes=>'evalbytes', ); my %pos = map { ($_ => 1) } @{$by_strength{'+'}}; @@ -165,6 +167,7 @@ __END__ -eof -eq +eval +-evalbytes -exec +exists -exit diff --git a/regen/opcodes b/regen/opcodes index 688f1661cd..f75411e398 100644 --- a/regen/opcodes +++ b/regen/opcodes @@ -138,14 +138,14 @@ stringify string ck_fun fsT@ S left_shift left bitshift (<<) ck_bitop fsT2 S S right_shift right bitshift (>>) ck_bitop fsT2 S S -lt numeric lt (<) ck_null Iifs2 S S< -i_lt integer lt (<) ck_null ifs2 S S< -gt numeric gt (>) ck_null Iifs2 S S< -i_gt integer gt (>) ck_null ifs2 S S< -le numeric le (<=) ck_null Iifs2 S S< -i_le integer le (<=) ck_null ifs2 S S< -ge numeric ge (>=) ck_null Iifs2 S S< -i_ge integer ge (>=) ck_null ifs2 S S< +lt numeric lt (<) ck_cmp Iifs2 S S< +i_lt integer lt (<) ck_cmp ifs2 S S< +gt numeric gt (>) ck_cmp Iifs2 S S< +i_gt integer gt (>) ck_cmp ifs2 S S< +le numeric le (<=) ck_cmp Iifs2 S S< +i_le integer le (<=) ck_cmp ifs2 S S< +ge numeric ge (>=) ck_cmp Iifs2 S S< +i_ge integer ge (>=) ck_cmp ifs2 S S< eq numeric eq (==) ck_null Iifs2 S S< i_eq integer eq (==) ck_null ifs2 S S< ne numeric ne (!=) ck_null Iifs2 S S< @@ -192,7 +192,7 @@ abs abs ck_fun fsTu% S? # String stuff. -length length ck_fun ifsTu% S? +length length ck_length ifsTu% S? substr substr ck_substr st@ S S S? S? vec vec ck_fun ist@ S S S @@ -483,7 +483,7 @@ semctl semctl ck_fun imst@ S S S S require require ck_require du% S? dofile do "file" ck_fun d1 S hintseval eval hints ck_svconst s$ -entereval eval "string" ck_eval d% S +entereval eval "string" ck_eval du% S? leaveeval eval "string" exit ck_null 1 S #evalonce eval constant string ck_null d1 S entertry eval {block} ck_eval d| @@ -138,7 +138,7 @@ /* Doesn't do an assert to verify that is correct */ #define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \ if (!CAT2(PL_utf8_,class)) { \ - bool throw_away __attribute__unused__; \ + bool throw_away PERL_UNUSED_DECL; \ ENTER; save_re_context(); \ throw_away = CAT2(is_utf8_,class)((const U8*)" "); \ LEAVE; } } STMT_END @@ -1200,8 +1200,8 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \ uscan += len; \ len=0; \ } else { \ - uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \ - uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \ + uvc = to_utf8_fold( (U8 *) uc, foldbuf, &foldlen ); \ + len = UTF8SKIP(uc); \ foldlen -= UNISKIP( uvc ); \ uscan = foldbuf + UNISKIP( uvc ); \ } \ @@ -1552,10 +1552,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, * fact that the Latin 1 folds are already determined, and the * only multi-char fold in that range is the sharp-s folding to * 'ss'. Thus, a pattern character can match as little as 1/3 of a - * string character. Adjust lnc accordingly, always matching at - * least 1 */ + * string character. Adjust lnc accordingly, rounding up, so that + * if we need to match at least 4+1/3 chars, that really is 5. */ expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2; - lnc = (lnc < expansion) ? 1 : lnc / expansion; + lnc = (lnc + expansion - 1) / expansion; /* As in the non-UTF8 case, if we have to match 3 characters, and * only 2 are left, it's guaranteed to fail, so don't start a @@ -1567,10 +1567,12 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, e = s; /* Due to minlen logic of intuit() */ } - /* XXX Note that we could recalculate e every so-often through the - * loop to stop earlier, as the worst case expansion above will - * rarely be met, and as we go along we would usually find that e - * moves further to the left. Unclear if worth the expense */ + /* XXX Note that we could recalculate e to stop the loop earlier, + * as the worst case expansion above will rarely be met, and as we + * go along we would usually find that e moves further to the left. + * This would happen only after we reached the point in the loop + * where if there were no expansion we should fail. Unclear if + * worth the expense */ while (s <= e) { char *my_strend= (char *)strend; @@ -1580,7 +1582,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, { goto got_it; } - s += UTF8SKIP(s); + s += (utf8_target) ? UTF8SKIP(s) : 1; } break; } @@ -6006,7 +6008,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) /* Here, the string is utf8, and the pattern char is different * in utf8 than not, so can't compare them directly. Outside the - * loop, find find the two utf8 bytes that represent c, and then + * loop, find the two utf8 bytes that represent c, and then * look for those in sequence in the utf8 string */ U8 high = UTF8_TWO_BYTE_HI(c); U8 low = UTF8_TWO_BYTE_LO(c); @@ -181,7 +181,7 @@ S_save_scalar_at(pTHX_ SV **sptr, const U32 flags) osv = *sptr; sv = (flags & SAVEf_KEEPOLDELEM) ? osv : (*sptr = newSV(0)); - if (SvTYPE(osv) >= SVt_PVMG && SvMAGIC(osv) && SvTYPE(osv) != SVt_PVGV) { + if (SvTYPE(osv) >= SVt_PVMG && SvMAGIC(osv)) { if (SvGMAGICAL(osv)) { SvFLAGS(osv) |= (SvFLAGS(osv) & (SVp_IOK|SVp_NOK|SVp_POK)) >> PRIVSHIFT; @@ -901,7 +901,10 @@ Perl_leave_scope(pTHX_ I32 base) if (SvTHINKFIRST(sv)) sv_force_normal_flags(sv, SV_IMMEDIATE_UNREF); + if (SvTYPE(sv) == SVt_PVHV) + Perl_hv_kill_backrefs(aTHX_ MUTABLE_HV(sv)); if (SvMAGICAL(sv)) + sv_unmagic(sv, PERL_MAGIC_backref), mg_free(sv); switch (SvTYPE(sv)) { @@ -442,7 +442,7 @@ do_report_used(pTHX_ SV *const sv) /* =for apidoc sv_report_used -Dump the contents of all SVs not yet freed. (Debugging aid). +Dump the contents of all SVs not yet freed (debugging aid). =cut */ @@ -563,7 +563,7 @@ do_curse(pTHX_ SV * const sv) { /* =for apidoc sv_clean_objs -Attempt to destroy all objects not yet freed +Attempt to destroy all objects not yet freed. =cut */ @@ -614,7 +614,7 @@ do_clean_all(pTHX_ SV *const sv) =for apidoc sv_clean_all Decrement the refcnt of each remaining SV, possibly triggering a -cleanup. This function may have to be called multiple times to free +cleanup. This function may have to be called multiple times to free SVs which are in complex self-referential hierarchies. =cut @@ -670,7 +670,7 @@ struct arena_set { /* =for apidoc sv_free_arenas -Deallocate the memory used by all arenas. Note that all the individual SV +Deallocate the memory used by all arenas. Note that all the individual SV heads and bodies within the arenas must already have been freed. =cut @@ -1127,7 +1127,10 @@ static const struct body_details fake_rv = Upgrade an SV to a more complex form. Generally adds a new body type to the SV, then copies across as much information as possible from the old body. -You generally want to use the C<SvUPGRADE> macro wrapper. See also C<svtype>. +It croaks if the SV is already in a more complex form than requested. You +generally want to use the C<SvUPGRADE> macro wrapper, which checks the type +before calling C<sv_upgrade>, and hence does not croak. See also +C<svtype>. =cut */ @@ -1427,7 +1430,7 @@ Perl_sv_upgrade(pTHX_ register SV *const sv, svtype new_type) /* =for apidoc sv_backoff -Remove any string offset. You should normally use the C<SvOOK_off> macro +Remove any string offset. You should normally use the C<SvOOK_off> macro wrapper instead. =cut @@ -1810,16 +1813,11 @@ Perl_looks_like_number(pTHX_ SV *const sv) STATIC bool S_glob_2number(pTHX_ GV * const gv) { - const U32 wasfake = SvFLAGS(gv) & SVf_FAKE; SV *const buffer = sv_newmortal(); PERL_ARGS_ASSERT_GLOB_2NUMBER; - /* FAKE globs can get coerced, so need to turn this off temporarily if it - is on. */ - SvFAKE_off(gv); gv_efullname3(buffer, gv, "*"); - SvFLAGS(gv) |= wasfake; /* We know that all GVs stringify to something that is not-a-number, so no need to test that. */ @@ -2412,7 +2410,7 @@ Perl_sv_2uv_flags(pTHX_ register SV *const sv, const I32 flags) =for apidoc sv_2nv_flags Return the num value of an SV, doing any necessary string or integer -conversion. If flags includes SV_GMAGIC, does an mg_get() first. +conversion. If flags includes SV_GMAGIC, does an mg_get() first. Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros. =cut @@ -2701,10 +2699,9 @@ S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const pe =for apidoc sv_2pv_flags Returns a pointer to the string value of an SV, and sets *lp to its length. -If flags includes SV_GMAGIC, does an mg_get() first. Coerces sv to a string -if necessary. -Normally invoked via the C<SvPV_flags> macro. C<sv_2pv()> and C<sv_2pv_nomg> -usually end up here too. +If flags includes SV_GMAGIC, does an mg_get() first. Coerces sv to a +string if necessary. Normally invoked via the C<SvPV_flags> macro. +C<sv_2pv()> and C<sv_2pv_nomg> usually end up here too. =cut */ @@ -2946,27 +2943,16 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags else { if (isGV_with_GP(sv)) { GV *const gv = MUTABLE_GV(sv); - const U32 wasfake = SvFLAGS(gv) & SVf_FAKE; SV *const buffer = sv_newmortal(); - /* FAKE globs can get coerced, so need to turn this off temporarily - if it is on. */ - SvFAKE_off(gv); gv_efullname3(buffer, gv, "*"); - SvFLAGS(gv) |= wasfake; - if (SvPOK(buffer)) { - if (lp) { + assert(SvPOK(buffer)); + if (lp) { *lp = SvCUR(buffer); - } - if ( SvUTF8(buffer) ) SvUTF8_on(sv); - return SvPVX(buffer); - } - else { - if (lp) - *lp = 0; - return (char *)""; } + if ( SvUTF8(buffer) ) SvUTF8_on(sv); + return SvPVX(buffer); } if (lp) @@ -3078,7 +3064,7 @@ It calls sv_2bool_flags with the SV_GMAGIC flag. =for apidoc sv_2bool_flags This function is only used by sv_true() and friends, and only if -the latter's argument is neither SvPOK, SvIOK nor SvNOK. If the flags +the latter's argument is neither SvPOK, SvIOK nor SvNOK. If the flags contain SV_GMAGIC, then it does an mg_get() first. @@ -3145,7 +3131,7 @@ use the Encode extension for that. =for apidoc sv_utf8_upgrade_nomg -Like sv_utf8_upgrade, but doesn't do magic on C<sv> +Like sv_utf8_upgrade, but doesn't do magic on C<sv>. =for apidoc sv_utf8_upgrade_flags @@ -3547,7 +3533,7 @@ Perl_sv_utf8_encode(pTHX_ register SV *const sv) If the PV of the SV is an octet sequence in UTF-8 and contains a multiple-byte character, the C<SvUTF8> flag is turned on -so that it looks like a character. If the PV contains only single-byte +so that it looks like a character. If the PV contains only single-byte characters, the C<SvUTF8> flag stays off. Scans PV for validity and returns false if the PV is invalid UTF-8. @@ -3608,7 +3594,7 @@ Perl_sv_utf8_decode(pTHX_ register SV *const sv) Copies the contents of the source SV C<ssv> into the destination SV C<dsv>. The source SV may be destroyed if it is mortal, so don't use this -function if the source SV needs to be reused. Does not handle 'set' magic. +function if the source SV needs to be reused. Does not handle 'set' magic. Loosely speaking, it performs a copy-by-value, obliterating any previous content of the destination. @@ -3620,12 +3606,13 @@ C<SvSetMagicSV_nosteal>. Copies the contents of the source SV C<ssv> into the destination SV C<dsv>. The source SV may be destroyed if it is mortal, so don't use this -function if the source SV needs to be reused. Does not handle 'set' magic. +function if the source SV needs to be reused. Does not handle 'set' magic. Loosely speaking, it performs a copy-by-value, obliterating any previous content of the destination. If the C<flags> parameter has the C<SV_GMAGIC> bit set, will C<mg_get> on -C<ssv> if appropriate, else not. If the C<flags> parameter has the -C<NOSTEAL> bit set then the buffers of temps will not be stolen. <sv_setsv> +C<ssv> if appropriate, else not. If the C<flags> +parameter has the C<NOSTEAL> bit set then the +buffers of temps will not be stolen. <sv_setsv> and C<sv_setsv_nomg> are implemented in terms of this function. You probably want to use one of the assortment of wrappers, such as @@ -3694,7 +3681,7 @@ S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype) mro_changes = 1; } - /* We don’t need to check the name of the destination if it was not a + /* We don't need to check the name of the destination if it was not a glob to begin with. */ if(dtype == SVt_PVGV) { const char * const name = GvNAME((const GV *)dstr); @@ -3930,7 +3917,7 @@ S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr) mg = mg_find(sref, PERL_MAGIC_isa); } /* Since the *ISA assignment could have affected more than - one stash, don’t call mro_isa_changed_in directly, but let + one stash, don't call mro_isa_changed_in directly, but let magic_clearisa do it for us, as it already has the logic for dealing with globs vs arrays of globs. */ assert(mg); @@ -4375,15 +4362,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags) } else { if (isGV_with_GP(sstr)) { - /* This stringification rule for globs is spread in 3 places. - This feels bad. FIXME. */ - const U32 wasfake = sflags & SVf_FAKE; - - /* FAKE globs can get coerced, so need to turn this off - temporarily if it is on. */ - SvFAKE_off(sstr); gv_efullname3(dstr, MUTABLE_GV(sstr), "*"); - SvFLAGS(sstr) |= wasfake; } else (void)SvOK_off(dstr); @@ -4617,7 +4596,7 @@ Perl_sv_sethek(pTHX_ register SV *const sv, const HEK *const hek) return; } { - sv_upgrade(sv, SVt_PV); + SvUPGRADE(sv, SVt_PV); sv_usepvn_flags(sv, (char *)HEK_KEY(share_hek_hek(hek)), HEK_LEN(hek), SV_HAS_TRAILING_NUL); SvLEN_set(sv, 0); SvREADONLY_on(sv); @@ -4644,10 +4623,10 @@ so that pointer should not be freed or used by the programmer after giving it to sv_usepvn, and neither should any pointers from "behind" that pointer (e.g. ptr + 1) be used. -If C<flags> & SV_SMAGIC is true, will call SvSETMAGIC. If C<flags> & +If C<flags> & SV_SMAGIC is true, will call SvSETMAGIC. If C<flags> & SV_HAS_TRAILING_NUL is true, then C<ptr[len]> must be NUL, and the realloc -will be skipped. (i.e. the buffer is actually at least 1 byte longer than -C<len>, and already meets the requirements for storing in C<SvPVX>) +will be skipped (i.e. the buffer is actually at least 1 byte longer than +C<len>, and already meets the requirements for storing in C<SvPVX>). =cut */ @@ -4758,11 +4737,12 @@ S_sv_release_COW(pTHX_ register SV *sv, const char *pvx, SV *after) Undo various types of fakery on an SV: if the PV is a shared string, make a private copy; if we're a ref, stop refing; if we're a glob, downgrade to an xpvmg; if we're a copy-on-write scalar, this is the on-write time when -we do the copy, and is also used locally. If C<SV_COW_DROP_PV> is set +we do the copy, and is also used locally. If C<SV_COW_DROP_PV> is set then a copy-on-write scalar drops its PV buffer (if any) and becomes -SvPOK_off rather than making a copy. (Used where this scalar is about to be -set to some other value.) In addition, the C<flags> parameter gets passed to -C<sv_unref_flags()> when unreffing. C<sv_force_normal> calls this function +SvPOK_off rather than making a copy. (Used where this +scalar is about to be set to some other value.) In addition, +the C<flags> parameter gets passed to C<sv_unref_flags()> +when unreffing. C<sv_force_normal> calls this function with flags set to 0. =cut @@ -4887,7 +4867,7 @@ Perl_sv_force_normal_flags(pTHX_ register SV *const sv, const U32 flags) Efficient removal of characters from the beginning of the string buffer. SvPOK(sv) must be true and the C<ptr> must be a pointer to somewhere inside the string buffer. The C<ptr> becomes the first character of the adjusted -string. Uses the "OOK hack". +string. Uses the "OOK hack". Beware: after this function returns, C<ptr> and SvPVX_const(sv) may no longer refer to the same chunk of data. @@ -4990,8 +4970,9 @@ Handles 'get' magic, but not 'set' magic. See C<sv_catpvn_mg>. Concatenates the string onto the end of the string which is in the SV. The C<len> indicates number of bytes to copy. If the SV has the UTF-8 status set, then the bytes appended should be valid UTF-8. -If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<dsv> if -appropriate, else not. C<sv_catpvn> and C<sv_catpvn_nomg> are implemented +If C<flags> has the C<SV_SMAGIC> bit set, will +C<mg_set> on C<dsv> afterwards if appropriate. +C<sv_catpvn> and C<sv_catpvn_nomg> are implemented in terms of this function. =cut @@ -5060,7 +5041,9 @@ not 'set' magic. See C<sv_catsv_mg>. Concatenates the string from SV C<ssv> onto the end of the string in SV C<dsv>. Modifies C<dsv> but not C<ssv>. If C<flags> has C<SV_GMAGIC> -bit set, will C<mg_get> on the SVs if appropriate, else not. C<sv_catsv> +bit set, will C<mg_get> on the C<ssv>, if appropriate, before +reading it. If the C<flags> contain C<SV_SMAGIC>, C<mg_set> will be +called on the modified SV afterward, if appropriate. C<sv_catsv> and C<sv_catsv_nomg> are implemented in terms of this function. =cut */ @@ -5123,8 +5106,8 @@ Perl_sv_catpv(pTHX_ register SV *const sv, register const char *ptr) Concatenates the string onto the end of the string which is in the SV. If the SV has the UTF-8 status set, then the bytes appended should -be valid UTF-8. If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> -on the SVs if appropriate, else not. +be valid UTF-8. If C<flags> has the C<SV_SMAGIC> bit set, will C<mg_set> +on the modified SV if appropriate. =cut */ @@ -5186,7 +5169,7 @@ Perl_newSV(pTHX_ const STRLEN len) /* =for apidoc sv_magicext -Adds magic to an SV, upgrading it if necessary. Applies the +Adds magic to an SV, upgrading it if necessary. Applies the supplied vtable and returns a pointer to the magic added. Note that C<sv_magicext> will allow things that C<sv_magic> will not. @@ -5279,8 +5262,9 @@ Perl_sv_magicext(pTHX_ SV *const sv, SV *const obj, const int how, /* =for apidoc sv_magic -Adds magic to an SV. First upgrades C<sv> to type C<SVt_PVMG> if necessary, -then adds a new magic item of type C<how> to the head of the magic list. +Adds magic to an SV. First upgrades C<sv> to type C<SVt_PVMG> if +necessary, then adds a new magic item of type C<how> to the head of the +magic list. See C<sv_magicext> (which C<sv_magic> now calls) for a description of the handling of the C<name> and C<namlen> arguments. @@ -5443,7 +5427,7 @@ Perl_sv_unmagicext(pTHX_ SV *const sv, const int type, MGVTBL *vtbl) Weaken a reference: set the C<SvWEAKREF> flag on this RV; give the referred-to SV C<PERL_MAGIC_backref> magic if it hasn't already; and push a back-reference to this RV onto the array of backreferences -associated with that magic. If the RV is magical, set magic will be +associated with that magic. If the RV is magical, set magic will be called after the RV is cleared. =cut @@ -5735,8 +5719,8 @@ Perl_sv_kill_backrefs(pTHX_ SV *const sv, AV *const av) /* =for apidoc sv_insert -Inserts a string at the specified offset/length within the SV. Similar to -the Perl substr() function. Handles get magic. +Inserts a string at the specified offset/length within the SV. Similar to +the Perl substr() function. Handles get magic. =for apidoc sv_insert_flags @@ -5953,10 +5937,10 @@ S_anonymise_cv_maybe(pTHX_ GV *gv, CV* cv) =for apidoc sv_clear Clear an SV: call any destructors, free up any memory used by the body, -and free the body itself. The SV's head is I<not> freed, although +and free the body itself. The SV's head is I<not> freed, although its type is set to all 1's so that it won't inadvertently be assumed to be live during global destruction etc. -This function should only be called when REFCNT is zero. Most of the time +This function should only be called when REFCNT is zero. Most of the time you'll want to call C<sv_free()> (or its macro wrapper C<SvREFCNT_dec>) instead. @@ -6363,7 +6347,7 @@ S_curse(pTHX_ SV * const sv, const bool check_refcnt) { /* =for apidoc sv_newref -Increment an SV's reference count. Use the C<SvREFCNT_inc()> wrapper +Increment an SV's reference count. Use the C<SvREFCNT_inc()> wrapper instead. =cut @@ -6490,7 +6474,7 @@ Perl_sv_len(pTHX_ register SV *const sv) =for apidoc sv_len_utf8 Returns the number of characters in the string in an SV, counting wide -UTF-8 bytes as a single character. Handles magic and type coercion. +UTF-8 bytes as a single character. Handles magic and type coercion. =cut */ @@ -7159,14 +7143,14 @@ S_assert_uft8_cache_coherent(pTHX_ const char *const func, STRLEN from_cache, =for apidoc sv_eq Returns a boolean indicating whether the strings in the two SVs are -identical. Is UTF-8 and 'use bytes' aware, handles get magic, and will +identical. Is UTF-8 and 'use bytes' aware, handles get magic, and will coerce its args to strings if necessary. =for apidoc sv_eq_flags Returns a boolean indicating whether the strings in the two SVs are -identical. Is UTF-8 and 'use bytes' aware and coerces its args to strings -if necessary. If the flags include SV_GMAGIC, it handles get-magic, too. +identical. Is UTF-8 and 'use bytes' aware and coerces its args to strings +if necessary. If the flags include SV_GMAGIC, it handles get-magic, too. =cut */ @@ -7180,7 +7164,6 @@ Perl_sv_eq_flags(pTHX_ register SV *sv1, register SV *sv2, const U32 flags) const char *pv2; STRLEN cur2; I32 eq = 0; - char *tpv = NULL; SV* svrecode = NULL; if (!sv1) { @@ -7244,8 +7227,6 @@ Perl_sv_eq_flags(pTHX_ register SV *sv1, register SV *sv2, const U32 flags) eq = (pv1 == pv2) || memEQ(pv1, pv2, cur1); SvREFCNT_dec(svrecode); - if (tpv) - Safefree(tpv); return eq; } @@ -7255,15 +7236,15 @@ Perl_sv_eq_flags(pTHX_ register SV *sv1, register SV *sv2, const U32 flags) Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the string in C<sv1> is less than, equal to, or greater than the string in -C<sv2>. Is UTF-8 and 'use bytes' aware, handles get magic, and will +C<sv2>. Is UTF-8 and 'use bytes' aware, handles get magic, and will coerce its args to strings if necessary. See also C<sv_cmp_locale>. =for apidoc sv_cmp_flags Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the string in C<sv1> is less than, equal to, or greater than the string in -C<sv2>. Is UTF-8 and 'use bytes' aware and will coerce its args to strings -if necessary. If the flags include SV_GMAGIC, it handles get magic. See +C<sv2>. Is UTF-8 and 'use bytes' aware and will coerce its args to strings +if necessary. If the flags include SV_GMAGIC, it handles get magic. See also C<sv_cmp_locale_flags>. =cut @@ -7355,15 +7336,15 @@ Perl_sv_cmp_flags(pTHX_ register SV *const sv1, register SV *const sv2, /* =for apidoc sv_cmp_locale -Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and +Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and 'use bytes' aware, handles get magic, and will coerce its args to strings if necessary. See also C<sv_cmp>. =for apidoc sv_cmp_locale_flags -Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and -'use bytes' aware and will coerce its args to strings if necessary. If the -flags contain SV_GMAGIC, it handles get magic. See also C<sv_cmp_flags>. +Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and +'use bytes' aware and will coerce its args to strings if necessary. If the +flags contain SV_GMAGIC, it handles get magic. See also C<sv_cmp_flags>. =cut */ @@ -7430,12 +7411,12 @@ Perl_sv_cmp_locale_flags(pTHX_ register SV *const sv1, register SV *const sv2, /* =for apidoc sv_collxfrm -This calls C<sv_collxfrm_flags> with the SV_GMAGIC flag. See +This calls C<sv_collxfrm_flags> with the SV_GMAGIC flag. See C<sv_collxfrm_flags>. =for apidoc sv_collxfrm_flags -Add Collate Transform magic to an SV if it doesn't already have it. If the +Add Collate Transform magic to an SV if it doesn't already have it. If the flags contain SV_GMAGIC, it handles get-magic. Any scalar variable may carry PERL_MAGIC_collxfrm magic that contains the @@ -7883,7 +7864,7 @@ screamer2: =for apidoc sv_inc Auto-increment of the value in the SV, doing string to numeric conversion -if necessary. Handles 'get' magic and operator overloading. +if necessary. Handles 'get' magic and operator overloading. =cut */ @@ -7901,7 +7882,7 @@ Perl_sv_inc(pTHX_ register SV *const sv) =for apidoc sv_inc_nomg Auto-increment of the value in the SV, doing string to numeric conversion -if necessary. Handles operator overloading. Skips handling 'get' magic. +if necessary. Handles operator overloading. Skips handling 'get' magic. =cut */ @@ -8064,7 +8045,7 @@ Perl_sv_inc_nomg(pTHX_ register SV *const sv) =for apidoc sv_dec Auto-decrement of the value in the SV, doing string to numeric conversion -if necessary. Handles 'get' magic and operator overloading. +if necessary. Handles 'get' magic and operator overloading. =cut */ @@ -8083,7 +8064,7 @@ Perl_sv_dec(pTHX_ register SV *const sv) =for apidoc sv_dec_nomg Auto-decrement of the value in the SV, doing string to numeric conversion -if necessary. Handles operator overloading. Skips handling 'get' magic. +if necessary. Handles operator overloading. Skips handling 'get' magic. =cut */ @@ -8214,7 +8195,7 @@ Perl_sv_dec_nomg(pTHX_ register SV *const sv) =for apidoc sv_mortalcopy Creates a new SV which is a copy of the original SV (using C<sv_setsv>). -The new SV is marked as mortal. It will be destroyed "soon", either by an +The new SV is marked as mortal. It will be destroyed "soon", either by an explicit call to FREETMPS, or by an implicit call at places such as statement boundaries. See also C<sv_newmortal> and C<sv_2mortal>. @@ -8243,7 +8224,7 @@ Perl_sv_mortalcopy(pTHX_ SV *const oldstr) =for apidoc sv_newmortal Creates a new null SV which is mortal. The reference count of the SV is -set to 1. It will be destroyed "soon", either by an explicit call to +set to 1. It will be destroyed "soon", either by an explicit call to FREETMPS, or by an implicit call at places such as statement boundaries. See also C<sv_mortalcopy> and C<sv_2mortal>. @@ -8272,7 +8253,8 @@ string. You are responsible for ensuring that the source string is at least C<len> bytes long. If the C<s> argument is NULL the new SV will be undefined. Currently the only flag bits accepted are C<SVf_UTF8> and C<SVs_TEMP>. If C<SVs_TEMP> is set, then C<sv_2mortal()> is called on the result before -returning. If C<SVf_UTF8> is set, C<s> is considered to be in UTF-8 and the +returning. If C<SVf_UTF8> is set, C<s> +is considered to be in UTF-8 and the C<SVf_UTF8> flag will be set on the new SV. C<newSVpvn_utf8()> is a convenience wrapper for this function, defined as @@ -8383,7 +8365,7 @@ Perl_newSVpvn(pTHX_ const char *const s, const STRLEN len) =for apidoc newSVhek Creates a new SV from the hash key structure. It will generate scalars that -point to the shared string table where possible. Returns a new (undefined) +point to the shared string table where possible. Returns a new (undefined) SV if the hek is NULL. =cut @@ -8455,10 +8437,11 @@ Perl_newSVhek(pTHX_ const HEK *const hek) =for apidoc newSVpvn_share Creates a new SV with its SvPVX_const pointing to a shared string in the string -table. If the string does not already exist in the table, it is created -first. Turns on READONLY and FAKE. If the C<hash> parameter is non-zero, that -value is used; otherwise the hash is computed. The string's hash can be later -be retrieved from the SV with the C<SvSHARED_HASH()> macro. The idea here is +table. If the string does not already exist in the table, it is +created first. Turns on READONLY and FAKE. If the C<hash> parameter +is non-zero, that value is used; otherwise the hash is computed. +The string's hash can be later be retrieved from the SV +with the C<SvSHARED_HASH()> macro. The idea here is that as the string table is used for shared hash keys these strings will have SvPVX_const == HeKEY and hash lookup will avoid string compare. @@ -8695,7 +8678,7 @@ Perl_newRV(pTHX_ SV *const sv) =for apidoc newSVsv Creates a new SV which is an exact duplicate of the original SV. -(Uses C<sv_setsv>). +(Uses C<sv_setsv>.) =cut */ @@ -8830,6 +8813,9 @@ Using various gambits, try to get an IO from an SV: the IO slot if its a GV; or the recursive result if we're an RV; or the IO slot of the symbol named after the PV if we're a string. +'Get' magic is ignored on the sv passed in, but will be called on +C<SvRV(sv)> if sv is an RV. + =cut */ @@ -8859,15 +8845,23 @@ Perl_sv_2io(pTHX_ SV *const sv) default: if (!SvOK(sv)) Perl_croak(aTHX_ PL_no_usym, "filehandle"); - if (SvROK(sv)) + if (SvROK(sv)) { + SvGETMAGIC(SvRV(sv)); return sv_2io(SvRV(sv)); - gv = gv_fetchsv(sv, 0, SVt_PVIO); + } + gv = gv_fetchsv_nomg(sv, 0, SVt_PVIO); if (gv) io = GvIO(gv); else io = 0; - if (!io) - Perl_croak(aTHX_ "Bad filehandle: %"SVf, SVfARG(sv)); + if (!io) { + SV *newsv = sv; + if (SvGMAGICAL(sv)) { + newsv = sv_newmortal(); + sv_setsv_nomg(newsv, sv); + } + Perl_croak(aTHX_ "Bad filehandle: %"SVf, SVfARG(newsv)); + } break; } return io; @@ -8922,7 +8916,7 @@ Perl_sv_2cv(pTHX_ SV *sv, HV **const st, GV **const gvp, const I32 lref) *st = CvSTASH(cv); return cv; } - else if(isGV_with_GP(sv)) + else if(SvGETMAGIC(sv), isGV_with_GP(sv)) gv = MUTABLE_GV(sv); else Perl_croak(aTHX_ "Not a subroutine reference"); @@ -9011,7 +9005,7 @@ can't cope with complex macro expressions. Always use the macro instead. Get a sensible string out of the SV somehow. If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<sv> if -appropriate, else not. C<sv_pvn_force> and C<sv_pvn_force_nomg> are +appropriate, else not. C<sv_pvn_force> and C<sv_pvn_force_nomg> are implemented in terms of this function. You normally want to use the various wrapper macros instead: see C<SvPV_force> and C<SvPV_force_nomg> @@ -9076,7 +9070,8 @@ Perl_sv_pvn_force_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) /* =for apidoc sv_pvbyten_force -The backend for the C<SvPVbytex_force> macro. Always use the macro instead. +The backend for the C<SvPVbytex_force> macro. Always use the macro +instead. =cut */ @@ -9095,7 +9090,8 @@ Perl_sv_pvbyten_force(pTHX_ SV *const sv, STRLEN *const lp) /* =for apidoc sv_pvutf8n_force -The backend for the C<SvPVutf8x_force> macro. Always use the macro instead. +The backend for the C<SvPVutf8x_force> macro. Always use the macro +instead. =cut */ @@ -9468,7 +9464,7 @@ Perl_sv_bless(pTHX_ SV *const sv, HV *const stash) return sv; } -/* Downgrades a PVGV to a PVMG. If it’s actually a PVLV, we leave the type +/* Downgrades a PVGV to a PVMG. If it's actually a PVLV, we leave the type * as it is after unglobbing it. */ @@ -9558,7 +9554,7 @@ Perl_sv_unref_flags(pTHX_ SV *const ref, const U32 flags) /* =for apidoc sv_untaint -Untaint an SV. Use C<SvTAINTED_off> instead. +Untaint an SV. Use C<SvTAINTED_off> instead. =cut */ @@ -9578,7 +9574,7 @@ Perl_sv_untaint(pTHX_ SV *const sv) /* =for apidoc sv_tainted -Test an SV for taintedness. Use C<SvTAINTED> instead. +Test an SV for taintedness. Use C<SvTAINTED> instead. =cut */ @@ -9799,7 +9795,7 @@ output to an SV. If the appended data contains "wide" characters (including, but not limited to, SVs with a UTF-8 PV formatted with %s, and characters >255 formatted with %c), the original SV might get upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. See -C<sv_catpvf_mg>. If the original SV was UTF-8, the pattern should be +C<sv_catpvf_mg>. If the original SV was UTF-8, the pattern should be valid UTF-8; if the original SV was bytes, the pattern should be too. =cut */ @@ -12710,19 +12706,19 @@ ready to run at the exact same point as the previous one. The pseudo-fork code uses COPY_STACKS while the threads->create doesn't. -CLONEf_KEEP_PTR_TABLE +CLONEf_KEEP_PTR_TABLE - perl_clone keeps a ptr_table with the pointer of the old variable as a key and the new variable as a value, this allows it to check if something has been cloned and not clone it again but rather just use the value and increase the -refcount. If KEEP_PTR_TABLE is not set then perl_clone will kill +refcount. If KEEP_PTR_TABLE is not set then perl_clone will kill the ptr_table using the function C<ptr_table_free(PL_ptr_table); PL_ptr_table = NULL;>, reason to keep it around is if you want to dup some of your own variable who are outside the graph perl scans, example of this -code is in threads.xs create +code is in threads.xs create. -CLONEf_CLONE_HOST +CLONEf_CLONE_HOST - This is a win32 thing, it is ignored on unix, it tells perls win32host code (which is c++) to clone itself, this is needed on win32 if you want to run two threads at the same time, @@ -13601,7 +13597,7 @@ will be converted into Unicode (and UTF-8). If the sv already is UTF-8 (or if it is not POK), or if the encoding is not a reference, nothing is done to the sv. If the encoding is not an C<Encode::XS> Encoding object, bad things will happen. -(See F<lib/encoding.pm> and L<Encode>). +(See F<lib/encoding.pm> and L<Encode>.) The PV of the sv is returned. @@ -13798,8 +13794,8 @@ S_find_array_subscript(pTHX_ const AV *const av, const SV *const val) #define FUV_SUBSCRIPT_HASH 3 /* "$foo{keyname}" */ #define FUV_SUBSCRIPT_WITHIN 4 /* "within @foo" */ -STATIC SV* -S_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ, +SV* +Perl_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ, const SV *const keyname, I32 aindex, int subscript_type) { @@ -13857,12 +13853,12 @@ S_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ, /* =for apidoc find_uninit_var -Find the name of the undefined variable (if any) that caused the operator o +Find the name of the undefined variable (if any) that caused the operator to issue a "Use of uninitialized value" warning. -If match is true, only return a name if it's value matches uninit_sv. +If match is true, only return a name if its value matches uninit_sv. So roughly speaking, if a unary operator (such as OP_COS) generates a warning, then following the direct child of the op may yield an -OP_PADSV or OP_GV that gives the name of the undefined variable. On the +OP_PADSV or OP_GV that gives the name of the undefined variable. On the other hand, with OP_ADD there are two branches to follow, so we only print the variable name if we get an exact match. @@ -14122,6 +14118,7 @@ S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv, /* ops where $_ may be an implicit arg */ case OP_TRANS: + case OP_TRANSR: case OP_SUBST: case OP_MATCH: if ( !(obase->op_flags & OPf_STACKED)) { @@ -14307,7 +14304,7 @@ S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv, /* =for apidoc report_uninit -Print appropriate "Use of uninitialized variable" warning +Print appropriate "Use of uninitialized variable" warning. =cut */ diff --git a/t/comp/hints.t b/t/comp/hints.t index b81028a83d..7796727aee 100644 --- a/t/comp/hints.t +++ b/t/comp/hints.t @@ -62,10 +62,12 @@ BEGIN { } # op_entereval should keep the pragmas it was compiled with eval q* + BEGIN { print "not " if $^H{foo} ne "a"; print "ok 13 - \$^H{foo} is 'a' at eval-\"\" time\n"; print "not " unless $^H & 0x00020000; print "ok 14 - \$^H contains HINT_LOCALIZE_HH at eval\"\"-time\n"; + } *; } BEGIN { @@ -84,7 +86,9 @@ BEGIN { BEGIN{$^H{x}=1}; for my $tno (15..16) { eval q( - print $^H{x}==1 && !$^H{y} ? "ok $tno\n" : "not ok $tno\n"; + BEGIN { + print $^H{x}==1 && !$^H{y} ? "ok $tno\n" : "not ok $tno\n"; + } $^H{y} = 1; ); if ($@) { diff --git a/t/comp/require.t b/t/comp/require.t index 07ac51bfe1..d704762bae 100644 --- a/t/comp/require.t +++ b/t/comp/require.t @@ -22,7 +22,7 @@ krunch.pm krunch.pmc whap.pm whap.pmc); my $Is_EBCDIC = (ord('A') == 193) ? 1 : 0; my $Is_UTF8 = (${^OPEN} || "") =~ /:utf8/; -my $total_tests = 52; +my $total_tests = 53; if ($Is_EBCDIC || $Is_UTF8) { $total_tests -= 3; } print "1..$total_tests\n"; @@ -286,6 +286,14 @@ if (defined &DynaLoader::boot_DynaLoader) { print "${not}ok $i - require ignores I/O layers\n"; } +{ + BEGIN { ${^OPEN} = ":utf8\0"; } + %INC = (); + write_file('bleah.pm',"require re; re->import('/x'); 1;\n"); + my $not = eval 'use bleah; "ab" =~ /a b/' ? "" : "not "; + $i++; + print "${not}ok $i - require does not localise %^H at run time\n"; +} ########################################## # What follows are UTF-8 specific tests. # diff --git a/t/comp/retainedlines.t b/t/comp/retainedlines.t index 240c5b177e..25687bed9e 100644 --- a/t/comp/retainedlines.t +++ b/t/comp/retainedlines.t @@ -6,7 +6,7 @@ # we've not yet verified that use works. # use strict; -print "1..73\n"; +print "1..74\n"; my $test = 0; sub failed { @@ -157,4 +157,7 @@ for (0xA, 0) { eval qq{#line 42 "hash-line-eval"\n labadalabada()\n}; is $::{"_<hash-line-eval"}[42], " labadalabada()\n", '#line 42 "foo" in a string eval updates @{"_<foo"}'; + eval qq{#line 42 "figgle"\n#line 85 "doggo"\n labadalabada()\n}; + is $::{"_<doggo"}[85], " labadalabada()\n", + 'subsequent #line 42 "foo" in a string eval updates @{"_<foo"}'; } diff --git a/t/lib/dbmt_common.pl b/t/lib/dbmt_common.pl index b7d8c2ea4c..5d4098c735 100644 --- a/t/lib/dbmt_common.pl +++ b/t/lib/dbmt_common.pl @@ -43,7 +43,7 @@ if (! -e $Dfile) { } SKIP: { skip "different file permission semantics on $^O", 1 - if $^O eq 'amigaos' || $^O eq 'os2' || $^O eq 'MSWin32' || $^O eq 'NetWare' || $^O eq 'dos' || $^O eq 'cygwin'; + if $^O eq 'amigaos' || $^O eq 'os2' || $^O eq 'MSWin32' || $^O eq 'NetWare' || $^O eq 'dos' || $^O eq 'cygwin' || $^O eq 'vos'; my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime, $blksize,$blocks) = stat($Dfile); is($mode & 0777, 0640); diff --git a/t/lib/proxy_constant_subs.t b/t/lib/proxy_constant_subs.t index e3cb41dbc3..9e73006fce 100644 --- a/t/lib/proxy_constant_subs.t +++ b/t/lib/proxy_constant_subs.t @@ -23,10 +23,10 @@ foreach my $symbol (@symbols) { $ps = svref_2object(\*{"Fcntl::$symbol"}); $ms = svref_2object(\*{"::$symbol"}); } - isa_ok($ps, 'B::GV'); + object_ok($ps, 'B::GV'); is($ps->GvFLAGS() & GVf_IMPORTED_CV, 0, "GVf_IMPORTED_CV not set on original"); - isa_ok($ms, 'B::GV'); + object_ok($ms, 'B::GV'); is($ms->GvFLAGS() & GVf_IMPORTED_CV, GVf_IMPORTED_CV, "GVf_IMPORTED_CV set on imported GV"); } diff --git a/t/lib/warnings/9uninit b/t/lib/warnings/9uninit index 0f0f465ec1..e0c7320fdf 100644 --- a/t/lib/warnings/9uninit +++ b/t/lib/warnings/9uninit @@ -635,6 +635,11 @@ sub sortf {$a-1 <=> $b-1}; @sort = sort { undef } 1, 2; sub frobnicate($$) { undef } @sort = sort frobnicate 1, 2; +@sort = sort pyfg 1, 2; +@sort = sort pyfgc 1, 2; +no warnings; +sub pyfg { undef } +sub pyfgc($$) { undef } EXPECT Use of uninitialized value $m1 in sort at - line 6. Use of uninitialized value $g1 in sort at - line 6. @@ -653,7 +658,9 @@ Use of uninitialized value $m1 in sort at - line 9. Use of uninitialized value $g1 in sort at - line 9. Use of uninitialized value $g1 in sort at - line 9. Use of uninitialized value in sort at - line 10. -Use of uninitialized value in sort at - line 11. +Use of uninitialized value in sort at - line 12. +Use of uninitialized value in sort at - line 13. +Use of uninitialized value in sort at - line 14. ######## my $nan = sin 9**9**9; if ($nan == $nan) { @@ -771,6 +778,7 @@ s/$m1/z/; undef $_; s//$g1/; undef $_; s/$m1/$g1/; undef $_; tr/x/y/; undef $_; +tr/x/y/r; undef $_; my $_; /y/; @@ -781,6 +789,7 @@ s/$m1/z/; undef $_; s//$g1/; undef $_; s/$m1/$g1/; undef $_; tr/x/y/; undef $_; +tr/x/y/r; undef $_; $g2 =~ /y/; $g2 =~ /$m1/; @@ -790,6 +799,7 @@ $g2 =~ s/$m1/z/; undef $g2; $g2 =~ s//$g1/; undef $g2; $g2 =~ s/$m1/$g1/; undef $g2; $g2 =~ tr/x/y/; undef $g2; # XXX can't extract var name yet +$g2 =~ tr/x/y/r; undef $g2; # XXX can't extract var name yet my $foo = "abc"; $foo =~ /$m1/; @@ -821,50 +831,53 @@ Use of uninitialized value $_ in substitution (s///) at - line 12. Use of uninitialized value $_ in substitution (s///) at - line 12. Use of uninitialized value $g1 in substitution iterator at - line 12. Use of uninitialized value $_ in transliteration (tr///) at - line 13. -Use of uninitialized value $_ in pattern match (m//) at - line 16. -Use of uninitialized value $m1 in regexp compilation at - line 17. +Use of uninitialized value $_ in transliteration (tr///) at - line 14. Use of uninitialized value $_ in pattern match (m//) at - line 17. -Use of uninitialized value $g1 in regexp compilation at - line 18. +Use of uninitialized value $m1 in regexp compilation at - line 18. Use of uninitialized value $_ in pattern match (m//) at - line 18. -Use of uninitialized value $_ in substitution (s///) at - line 19. -Use of uninitialized value $m1 in regexp compilation at - line 20. -Use of uninitialized value $_ in substitution (s///) at - line 20. +Use of uninitialized value $g1 in regexp compilation at - line 19. +Use of uninitialized value $_ in pattern match (m//) at - line 19. Use of uninitialized value $_ in substitution (s///) at - line 20. +Use of uninitialized value $m1 in regexp compilation at - line 21. Use of uninitialized value $_ in substitution (s///) at - line 21. -Use of uninitialized value $g1 in substitution (s///) at - line 21. Use of uninitialized value $_ in substitution (s///) at - line 21. -Use of uninitialized value $g1 in substitution (s///) at - line 21. -Use of uninitialized value $m1 in regexp compilation at - line 22. Use of uninitialized value $_ in substitution (s///) at - line 22. +Use of uninitialized value $g1 in substitution (s///) at - line 22. Use of uninitialized value $_ in substitution (s///) at - line 22. -Use of uninitialized value $g1 in substitution iterator at - line 22. -Use of uninitialized value $_ in transliteration (tr///) at - line 23. -Use of uninitialized value $g2 in pattern match (m//) at - line 25. -Use of uninitialized value $m1 in regexp compilation at - line 26. -Use of uninitialized value $g2 in pattern match (m//) at - line 26. -Use of uninitialized value $g1 in regexp compilation at - line 27. +Use of uninitialized value $g1 in substitution (s///) at - line 22. +Use of uninitialized value $m1 in regexp compilation at - line 23. +Use of uninitialized value $_ in substitution (s///) at - line 23. +Use of uninitialized value $_ in substitution (s///) at - line 23. +Use of uninitialized value $g1 in substitution iterator at - line 23. +Use of uninitialized value $_ in transliteration (tr///) at - line 24. +Use of uninitialized value $_ in transliteration (tr///) at - line 25. Use of uninitialized value $g2 in pattern match (m//) at - line 27. -Use of uninitialized value $g2 in substitution (s///) at - line 28. -Use of uninitialized value $m1 in regexp compilation at - line 29. -Use of uninitialized value $g2 in substitution (s///) at - line 29. -Use of uninitialized value $g2 in substitution (s///) at - line 29. -Use of uninitialized value $g2 in substitution (s///) at - line 30. -Use of uninitialized value $g1 in substitution (s///) at - line 30. +Use of uninitialized value $m1 in regexp compilation at - line 28. +Use of uninitialized value $g2 in pattern match (m//) at - line 28. +Use of uninitialized value $g1 in regexp compilation at - line 29. +Use of uninitialized value $g2 in pattern match (m//) at - line 29. Use of uninitialized value $g2 in substitution (s///) at - line 30. -Use of uninitialized value $g1 in substitution (s///) at - line 30. Use of uninitialized value $m1 in regexp compilation at - line 31. Use of uninitialized value $g2 in substitution (s///) at - line 31. Use of uninitialized value $g2 in substitution (s///) at - line 31. -Use of uninitialized value $g1 in substitution iterator at - line 31. -Use of uninitialized value in transliteration (tr///) at - line 32. -Use of uninitialized value $m1 in regexp compilation at - line 35. -Use of uninitialized value $g1 in regexp compilation at - line 36. +Use of uninitialized value $g2 in substitution (s///) at - line 32. +Use of uninitialized value $g1 in substitution (s///) at - line 32. +Use of uninitialized value $g2 in substitution (s///) at - line 32. +Use of uninitialized value $g1 in substitution (s///) at - line 32. +Use of uninitialized value $m1 in regexp compilation at - line 33. +Use of uninitialized value $g2 in substitution (s///) at - line 33. +Use of uninitialized value $g2 in substitution (s///) at - line 33. +Use of uninitialized value $g1 in substitution iterator at - line 33. +Use of uninitialized value in transliteration (tr///) at - line 34. +Use of uninitialized value in transliteration (tr///) at - line 35. Use of uninitialized value $m1 in regexp compilation at - line 38. -Use of uninitialized value $g1 in substitution (s///) at - line 39. -Use of uninitialized value $m1 in regexp compilation at - line 40. -Use of uninitialized value $g1 in substitution iterator at - line 40. -Use of uninitialized value $m1 in substitution iterator at - line 41. -Use of uninitialized value in substitution iterator at - line 44. +Use of uninitialized value $g1 in regexp compilation at - line 39. +Use of uninitialized value $m1 in regexp compilation at - line 41. +Use of uninitialized value $g1 in substitution (s///) at - line 42. +Use of uninitialized value $m1 in regexp compilation at - line 43. +Use of uninitialized value $g1 in substitution iterator at - line 43. +Use of uninitialized value $m1 in substitution iterator at - line 44. +Use of uninitialized value in substitution iterator at - line 47. ######## use warnings 'uninitialized'; my ($m1); diff --git a/t/lib/warnings/op b/t/lib/warnings/op index f6f105d222..1a1bb26b7f 100644 --- a/t/lib/warnings/op +++ b/t/lib/warnings/op @@ -72,7 +72,12 @@ defined(%hash) is deprecated (Maybe you should just omit the defined()?) my %h ; defined %h ; - + + $[ used in comparison (did you mean $] ?) + + length() used on @array (did you mean "scalar(@array)"?) + length() used on %hash (did you mean "scalar(keys %hash)"?) + /---/ should probably be written as "---" join(/---/, @foo); @@ -557,19 +562,19 @@ Useless use of a constant (undef) in void context at - line 9. use warnings 'misc' ; my $a ; my @a = () ; my %a = () ; my $b = \@a ; my $c = \%a ;my $d = 'test'; @a =~ /abc/ ; -@a =~ s/a/b/ ; -@a =~ tr/a/b/ ; +@a2 =~ s/a/b/ ; +@a3 =~ tr/a/b/ ; @$b =~ /abc/ ; @$b =~ s/a/b/ ; @$b =~ tr/a/b/ ; %a =~ /abc/ ; -%a =~ s/a/b/ ; -%a =~ tr/a/b/ ; +%a2 =~ s/a/b/ ; +%a3 =~ tr/a/b/ ; %$c =~ /abc/ ; %$c =~ s/a/b/ ; %$c =~ tr/a/b/ ; $d =~ tr/a/b/d ; -$d =~ tr/a/bc/; +$d2 =~ tr/a/bc/; { no warnings 'misc' ; my $a ; my @a = () ; my %a = () ; my $b = \@a ; my $c = \%a ; my $d = 'test'; @@ -589,21 +594,21 @@ $d =~ tr/a/b/d ; $d =~ tr/a/bc/ ; } EXPECT -Applying pattern match (m//) to @array will act on scalar(@array) at - line 5. -Applying substitution (s///) to @array will act on scalar(@array) at - line 6. -Applying transliteration (tr///) to @array will act on scalar(@array) at - line 7. +Applying pattern match (m//) to @a will act on scalar(@a) at - line 5. +Applying substitution (s///) to @a2 will act on scalar(@a2) at - line 6. +Applying transliteration (tr///) to @a3 will act on scalar(@a3) at - line 7. Applying pattern match (m//) to @array will act on scalar(@array) at - line 8. Applying substitution (s///) to @array will act on scalar(@array) at - line 9. Applying transliteration (tr///) to @array will act on scalar(@array) at - line 10. -Applying pattern match (m//) to %hash will act on scalar(%hash) at - line 11. -Applying substitution (s///) to %hash will act on scalar(%hash) at - line 12. -Applying transliteration (tr///) to %hash will act on scalar(%hash) at - line 13. +Applying pattern match (m//) to %a will act on scalar(%a) at - line 11. +Applying substitution (s///) to %a2 will act on scalar(%a2) at - line 12. +Applying transliteration (tr///) to %a3 will act on scalar(%a3) at - line 13. Applying pattern match (m//) to %hash will act on scalar(%hash) at - line 14. Applying substitution (s///) to %hash will act on scalar(%hash) at - line 15. Applying transliteration (tr///) to %hash will act on scalar(%hash) at - line 16. Useless use of /d modifier in transliteration operator at - line 17. Replacement list is longer than search list at - line 18. -Can't modify private array in substitution (s///) at - line 6, near "s/a/b/ ;" +Can't modify array dereference in substitution (s///) at - line 6, near "s/a/b/ ;" BEGIN not safe after errors--compilation aborted at - line 20. ######## # op.c @@ -880,6 +885,61 @@ Prototype mismatch: sub main::fred () vs ($) at - line 4. Prototype mismatch: sub main::freD () vs ($) at - line 11. Prototype mismatch: sub main::FRED () vs ($) at - line 14. ######## +# op.c [Perl_ck_cmp] +use warnings 'syntax' ; +no warnings 'deprecated'; +@a = $[ < 5; +@a = $[ > 5; +@a = $[ <= 5; +@a = $[ >= 5; +@a = 42 < $[; +@a = 42 > $[; +@a = 42 <= $[; +@a = 42 >= $[; +use integer; +@a = $[ < 5; +@a = $[ > 5; +@a = $[ <= 5; +@a = $[ >= 5; +@a = 42 < $[; +@a = 42 > $[; +@a = 42 <= $[; +@a = 42 >= $[; +EXPECT +$[ used in numeric lt (<) (did you mean $] ?) at - line 4. +$[ used in numeric gt (>) (did you mean $] ?) at - line 5. +$[ used in numeric le (<=) (did you mean $] ?) at - line 6. +$[ used in numeric ge (>=) (did you mean $] ?) at - line 7. +$[ used in numeric lt (<) (did you mean $] ?) at - line 8. +$[ used in numeric gt (>) (did you mean $] ?) at - line 9. +$[ used in numeric le (<=) (did you mean $] ?) at - line 10. +$[ used in numeric ge (>=) (did you mean $] ?) at - line 11. +$[ used in numeric lt (<) (did you mean $] ?) at - line 13. +$[ used in numeric gt (>) (did you mean $] ?) at - line 14. +$[ used in numeric le (<=) (did you mean $] ?) at - line 15. +$[ used in numeric ge (>=) (did you mean $] ?) at - line 16. +$[ used in numeric lt (<) (did you mean $] ?) at - line 17. +$[ used in numeric gt (>) (did you mean $] ?) at - line 18. +$[ used in numeric le (<=) (did you mean $] ?) at - line 19. +$[ used in numeric ge (>=) (did you mean $] ?) at - line 20. +######## +# op.c [Perl_ck_length] +use warnings 'syntax' ; +length(@a); +length(%b); +length(@$c); +length(%$d); +length($a); +length(my %h); +length(my @g); +EXPECT +length() used on @a (did you mean "scalar(@a)"?) at - line 3. +length() used on %b (did you mean "scalar(keys %b)"?) at - line 4. +length() used on @array (did you mean "scalar(@array)"?) at - line 5. +length() used on %hash (did you mean "scalar(keys %hash)"?) at - line 6. +length() used on %h (did you mean "scalar(keys %h)"?) at - line 8. +length() used on @g (did you mean "scalar(@g)"?) at - line 9. +######## # op.c use warnings 'syntax' ; join /---/, 'x', 'y', 'z'; @@ -1131,6 +1191,21 @@ Deprecated use of my() in false conditional at - line 7. Deprecated use of my() in false conditional at - line 8. ######## # op.c +$[ = 1; +($[) = 1; +use warnings 'deprecated'; +$[ = 2; +($[) = 2; +no warnings 'deprecated'; +$[ = 3; +($[) = 3; +EXPECT +Use of assignment to $[ is deprecated at - line 2. +Use of assignment to $[ is deprecated at - line 3. +Use of assignment to $[ is deprecated at - line 5. +Use of assignment to $[ is deprecated at - line 6. +######## +# op.c use warnings 'void'; @x = split /y/, "z"; $x = split /y/, "z"; diff --git a/t/lib/warnings/pp_hot b/t/lib/warnings/pp_hot index a0b9b10139..e53832a371 100644 --- a/t/lib/warnings/pp_hot +++ b/t/lib/warnings/pp_hot @@ -116,7 +116,7 @@ my $fh = *STDOUT{IO}; close STDOUT or die "Can't close STDOUT"; print $fh "Shouldn't print anything, but shouldn't SEGV either\n"; EXPECT -print() on closed filehandle at - line 7. +print() on closed filehandle $__ANONIO__ at - line 7. ######## # pp_hot.c [pp_print] package foo; diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8 index 0514ee7425..735d86ff25 100644 --- a/t/lib/warnings/utf8 +++ b/t/lib/warnings/utf8 @@ -197,6 +197,13 @@ Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succ ######## require "../test.pl"; use warnings 'utf8'; +sub Is_Super { return '!utf8::Any' } +print "\x{1100000}" =~ /\p{Is_Super}/, "\n"; +EXPECT +1 +######## +require "../test.pl"; +use warnings 'utf8'; my $file = tempfile(); open(my $fh, "+>:utf8", $file); print $fh "\x{D7FF}", "\n"; diff --git a/t/mro/c3_with_overload.t b/t/mro/c3_with_overload.t index 498ce2f613..a75c31a8b3 100644 --- a/t/mro/c3_with_overload.t +++ b/t/mro/c3_with_overload.t @@ -29,10 +29,10 @@ require q(./test.pl); plan(tests => 7); } my $x = InheritingFromOverloadedTest->new(); -isa_ok($x, 'InheritingFromOverloadedTest'); +object_ok($x, 'InheritingFromOverloadedTest'); my $y = OverloadingTest->new(); -isa_ok($y, 'OverloadingTest'); +object_ok($y, 'OverloadingTest'); is("$x", 'InheritingFromOverloadedTest stringified', '... got the right value when stringifing'); is("$y", 'OverloadingTest stringified', '... got the right value when stringifing'); diff --git a/t/mro/c3_with_overload_utf8.t b/t/mro/c3_with_overload_utf8.t index 498ce2f613..a75c31a8b3 100644 --- a/t/mro/c3_with_overload_utf8.t +++ b/t/mro/c3_with_overload_utf8.t @@ -29,10 +29,10 @@ require q(./test.pl); plan(tests => 7); } my $x = InheritingFromOverloadedTest->new(); -isa_ok($x, 'InheritingFromOverloadedTest'); +object_ok($x, 'InheritingFromOverloadedTest'); my $y = OverloadingTest->new(); -isa_ok($y, 'OverloadingTest'); +object_ok($y, 'OverloadingTest'); is("$x", 'InheritingFromOverloadedTest stringified', '... got the right value when stringifing'); is("$y", 'OverloadingTest stringified', '... got the right value when stringifing'); diff --git a/t/mro/isa_c3.t b/t/mro/isa_c3.t index 713d10ef3a..dd129cf442 100644 --- a/t/mro/isa_c3.t +++ b/t/mro/isa_c3.t @@ -64,6 +64,6 @@ foreach my $package (qw(klonk urkkk kapow kayo thwacke zzzzzwap whamm)) { is("@{mro::get_linear_isa($package)}", "@$isa", "\@ISA for $package"); foreach my $class ($package, @$isa, 'UNIVERSAL') { - isa_ok($ref, $class, $package); + object_ok($ref, $class, $package); } } diff --git a/t/mro/isa_c3_utf8.t b/t/mro/isa_c3_utf8.t index 0e69e04eba..3e2e7a999c 100644 --- a/t/mro/isa_c3_utf8.t +++ b/t/mro/isa_c3_utf8.t @@ -66,6 +66,6 @@ foreach my $package (qw(kà´ŒoんḰ urḲḵk 캎oẃ к á¹í™”ckÑ Å¹zzzá‹Ÿá‘ is("@{mro::get_linear_isa($package)}", "@$isa", "\@ISA for $package"); foreach my $class ($package, @$isa, 'UNIVERSAL') { - isa_ok($ref, $class, $package); + object_ok($ref, $class, $package); } } diff --git a/t/mro/isa_dfs.t b/t/mro/isa_dfs.t index 889ee6e531..77c122ea74 100644 --- a/t/mro/isa_dfs.t +++ b/t/mro/isa_dfs.t @@ -60,6 +60,6 @@ foreach my $package (qw(klonk urkkk kapow kayo thwacke zzzzzwap whamm)) { is("@{mro::get_linear_isa($package)}", "@$isa", "\@ISA for $package"); foreach my $class ($package, @$isa, 'UNIVERSAL') { - isa_ok($ref, $class, $package); + object_ok($ref, $class, $package); } } diff --git a/t/mro/isa_dfs_utf8.t b/t/mro/isa_dfs_utf8.t index b6608be4c4..1c95eaa58e 100644 --- a/t/mro/isa_dfs_utf8.t +++ b/t/mro/isa_dfs_utf8.t @@ -62,6 +62,6 @@ foreach my $package (qw(kà´ŒoんḰ urḲḵk 캎oẃ к á¹í™”ckÑ Å¹zzzá‹Ÿá‘ is("@{mro::get_linear_isa($package)}", "@$isa", "\@ISA for $package"); foreach my $class ($package, @$isa, 'UNIVERSAL') { - isa_ok($ref, $class, $package); + object_ok($ref, $class, $package); } } diff --git a/t/mro/next_edgecases.t b/t/mro/next_edgecases.t index 7402ec90ad..e177d7098f 100644 --- a/t/mro/next_edgecases.t +++ b/t/mro/next_edgecases.t @@ -21,7 +21,7 @@ plan(tests => 12); # call the submethod in the direct instance my $foo = Foo->new(); - isa_ok($foo, 'Foo'); + object_ok($foo, 'Foo'); can_ok($foo, 'bar'); is($foo->bar(), 'Foo::bar', '... got the right return value'); @@ -37,8 +37,8 @@ plan(tests => 12); } my $bar = Bar->new(); - isa_ok($bar, 'Bar'); - isa_ok($bar, 'Foo'); + object_ok($bar, 'Bar'); + object_ok($bar, 'Foo'); # test it working with with Sub::Name SKIP: { @@ -68,8 +68,8 @@ plan(tests => 12); } my $baz = Baz->new(); - isa_ok($baz, 'Baz'); - isa_ok($baz, 'Foo'); + object_ok($baz, 'Baz'); + object_ok($baz, 'Foo'); { my $m = sub { (shift)->next::method() }; diff --git a/t/mro/next_edgecases_utf8.t b/t/mro/next_edgecases_utf8.t index bd461c777f..ba6ff8b06b 100644 --- a/t/mro/next_edgecases_utf8.t +++ b/t/mro/next_edgecases_utf8.t @@ -24,7 +24,7 @@ plan(tests => 12); # call the submethod in the direct instance my $foo = ᕘ->new(); - isa_ok($foo, 'ᕘ'); + object_ok($foo, 'ᕘ'); can_ok($foo, 'Æš'); is($foo->Æš(), 'ᕘ::Æš', '... got the right return value'); @@ -40,8 +40,8 @@ plan(tests => 12); } my $bar = Baɾ->new(); - isa_ok($bar, 'Baɾ'); - isa_ok($bar, 'ᕘ'); + object_ok($bar, 'Baɾ'); + object_ok($bar, 'ᕘ'); # test it working with with Sub::Name SKIP: { @@ -71,8 +71,8 @@ plan(tests => 12); } my $baz = બʑ->new(); - isa_ok($baz, 'બʑ'); - isa_ok($baz, 'ᕘ'); + object_ok($baz, 'બʑ'); + object_ok($baz, 'ᕘ'); { my $m = sub { (shift)->next::method() }; diff --git a/t/mro/overload_c3.t b/t/mro/overload_c3.t index a62e631af0..db2b1ec660 100644 --- a/t/mro/overload_c3.t +++ b/t/mro/overload_c3.t @@ -35,10 +35,10 @@ require q(./test.pl); plan(tests => 7); } my $x = InheritingFromOverloadedTest->new(); -isa_ok($x, 'InheritingFromOverloadedTest'); +object_ok($x, 'InheritingFromOverloadedTest'); my $y = OverloadingTest->new(); -isa_ok($y, 'OverloadingTest'); +object_ok($y, 'OverloadingTest'); is("$x", 'InheritingFromOverloadedTest stringified', '... got the right value when stringifing'); is("$y", 'OverloadingTest stringified', '... got the right value when stringifing'); diff --git a/t/mro/overload_c3_utf8.t b/t/mro/overload_c3_utf8.t index 5a483ef5c0..bcb9f70c77 100644 --- a/t/mro/overload_c3_utf8.t +++ b/t/mro/overload_c3_utf8.t @@ -38,10 +38,10 @@ require q(./test.pl); plan(tests => 7); } my $x = ìºÒŽê€€á¸®á¹†áµ·êœ°ë¡¬áµ•veÅ”Åoadì——í…Ÿáµµ->ãƒáš¹(); -isa_ok($x, 'ìºÒŽê€€á¸®á¹†áµ·êœ°ë¡¬áµ•veÅ”Åoadì——í…Ÿáµµ'); +object_ok($x, 'ìºÒŽê€€á¸®á¹†áµ·êœ°ë¡¬áµ•veÅ”Åoadì——í…Ÿáµµ'); my $y = Ovì—rꪩࡃá›ï¼´eÅá¹±->ãƒáš¹(); -isa_ok($y, 'Ovì—rꪩࡃá›ï¼´eÅá¹±'); +object_ok($y, 'Ovì—rꪩࡃá›ï¼´eÅá¹±'); is("$x", 'ìºÒŽê€€á¸®á¹†áµ·êœ°ë¡¬áµ•veÅ”Åoadì——í…Ÿáµµ stringified', '... got the right value when stringifing'); is("$y", 'Ovì—rꪩࡃá›ï¼´eÅá¹± stringified', '... got the right value when stringifing'); diff --git a/t/mro/overload_dfs.t b/t/mro/overload_dfs.t index 89f11d0260..5943c855db 100644 --- a/t/mro/overload_dfs.t +++ b/t/mro/overload_dfs.t @@ -35,10 +35,10 @@ require q(./test.pl); plan(tests => 7); } my $x = InheritingFromOverloadedTest->new(); -isa_ok($x, 'InheritingFromOverloadedTest'); +object_ok($x, 'InheritingFromOverloadedTest'); my $y = OverloadingTest->new(); -isa_ok($y, 'OverloadingTest'); +object_ok($y, 'OverloadingTest'); is("$x", 'InheritingFromOverloadedTest stringified', '... got the right value when stringifing'); is("$y", 'OverloadingTest stringified', '... got the right value when stringifing'); diff --git a/t/mro/package_aliases.t b/t/mro/package_aliases.t index b08e8edd2f..3bc3c8fa74 100644 --- a/t/mro/package_aliases.t +++ b/t/mro/package_aliases.t @@ -30,8 +30,8 @@ plan(tests => 52); ok (Old->isa (New::), 'Old inherits from New'); ok (New->isa (Old::), 'New inherits from Old'); -isa_ok (bless ({}, Old::), New::, 'Old object'); -isa_ok (bless ({}, New::), Old::, 'New object'); +object_ok (bless ({}, Old::), New::, 'Old object'); +object_ok (bless ({}, New::), Old::, 'New object'); # Test that replacing a package by assigning to an existing glob diff --git a/t/mro/package_aliases_utf8.t b/t/mro/package_aliases_utf8.t index ae214e5ce5..0106154c43 100644 --- a/t/mro/package_aliases_utf8.t +++ b/t/mro/package_aliases_utf8.t @@ -33,8 +33,8 @@ plan(tests => 52); ok (ऑlㄉ->isa(ï¼®eáº::), 'ऑlㄉ inherits from ï¼®eáº'); ok (ï¼®eáº->isa(ऑlㄉ::), 'ï¼®eẠinherits from ऑlㄉ'); -isa_ok (bless ({}, ऑlㄉ::), ï¼®eáº::, 'ऑlㄉ object'); -isa_ok (bless ({}, ï¼®eáº::), ऑlㄉ::, 'ï¼®eẠobject'); +object_ok (bless ({}, ऑlㄉ::), ï¼®eáº::, 'ऑlㄉ object'); +object_ok (bless ({}, ï¼®eáº::), ऑlㄉ::, 'ï¼®eẠobject'); # Test that replacing a package by assigning to an existing glob diff --git a/t/op/array_base.t b/t/op/array_base.t index fe5045afea..34404d491f 100644 --- a/t/op/array_base.t +++ b/t/op/array_base.t @@ -1,5 +1,6 @@ #!perl -w use strict; +no warnings 'deprecated'; BEGIN { require './test.pl'; diff --git a/t/op/coreamp.t b/t/op/coreamp.t index 2027f41ac5..d3f03eb58a 100644 --- a/t/op/coreamp.t +++ b/t/op/coreamp.t @@ -30,6 +30,7 @@ package sov { } my %op_desc = ( + evalbytes=> 'eval "string"', join => 'join or string', readline => '<HANDLE>', readpipe => 'quoted execution (``, qx)', @@ -118,10 +119,11 @@ sub test_proto { elsif ($p =~ '^([$*]+);?\z') { # Fixed-length $$$ or *** my $args = length $1; $tests += 2; + my $desc = quotemeta op_desc($o); eval " &CORE::$o((1)x($args-1)) "; - like $@, qr/^Not enough arguments for $o at /, "&$o with too few args"; + like $@, qr/^Not enough arguments for $desc at /, "&$o w/too few args"; eval " &CORE::$o((1)x($args+1)) "; - like $@, qr/^Too many arguments for $o at /, "&$o with too many args"; + like $@, qr/^Too many arguments for $desc at /, "&$o w/too many args"; } elsif ($p =~ '^([$*]+);([$*]+)\z') { # Variable-length $$$ or *** my $minargs = length $1; @@ -396,6 +398,29 @@ test_proto $_ for qw( endgrent endhostent endnetent endprotoent endpwent endservent ); +test_proto 'evalbytes'; +$tests += 4; +{ + chop(my $upgraded = "use utf8; '\xc4\x80'" . chr 256); + is &myevalbytes($upgraded), chr 256, '&evalbytes'; + # Test hints + require strict; + strict->import; + &myevalbytes(' + is someone, "someone", "run-time hint bits do not leak into &evalbytes" + '); + use strict; + BEGIN { $^H{coreamp} = 42 } + $^H{coreamp} = 75; + &myevalbytes(' + BEGIN { + is $^H{coreamp}, 42, "compile-time hh propagates into &evalbytes"; + } + ${"frobnicate"} + '); + like $@, qr/strict/, 'compile-time hint bits propagate into &evalbytes'; +} + test_proto 'exit'; $tests ++; is runperl(prog => '&CORE::exit; END { print qq-ok\n- }'), "ok\n", diff --git a/t/op/coresubs.t b/t/op/coresubs.t index 60aa1b7814..1665cf6ab6 100644 --- a/t/op/coresubs.t +++ b/t/op/coresubs.t @@ -84,7 +84,7 @@ while(<$kh>) { # These ops currently accept any number of args, despite their # prototypes, if they have any: next if $word =~ /^(?:chom?p|exec|keys|each|not|read(?:lin|pip)e - |reset|system|values|l?stat)/x; + |reset|system|values|l?stat)|evalbytes/x; $tests ++; $code = diff --git a/t/op/cproto.t b/t/op/cproto.t index c9cfe466ca..ad2249df2c 100644 --- a/t/op/cproto.t +++ b/t/op/cproto.t @@ -7,7 +7,7 @@ BEGIN { } BEGIN { require './test.pl'; } -plan tests => 245; +plan tests => 246; while (<DATA>) { chomp; @@ -77,6 +77,7 @@ endservent () eof (;*) eq undef eval undef +evalbytes (_) exec undef exists undef exit (;$) diff --git a/t/op/each.t b/t/op/each.t index d9e15422b2..d12d678ea5 100644 --- a/t/op/each.t +++ b/t/op/each.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 56; +plan tests => 57; $h{'abc'} = 'ABC'; $h{'def'} = 'DEF'; @@ -255,3 +255,17 @@ for my $k (qw(each keys values)) { } ::is($c, 1, "single key now freed"); } + +{ + # Make sure each() does not leave the iterator in an inconsistent state + # (RITER set to >= 0, with EITER null) if the active iterator is + # deleted, leaving the hash apparently empty. + my %h; + $h{1} = 2; + each %h; + delete $h{1}; + each %h; + $h{1}=2; + is join ("-", each %h), '1-2', + 'each on apparently empty hash does not leave RITER set'; +} diff --git a/t/op/eval.t b/t/op/eval.t index 49a1ccab41..f8e23e3295 100644 --- a/t/op/eval.t +++ b/t/op/eval.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan(tests => 118); +plan(tests => 120); eval 'pass();'; @@ -436,9 +436,9 @@ is($got, "ok\n", 'eval and last'); like($@, qr/^syntax error/, 'eval syntax error, no warnings'); } -# a syntax error in an eval called magically 9eg vie tie or overload) +# a syntax error in an eval called magically (eg via tie or overload) # resulted in an assertion failure in S_docatch, since doeval had already -# poppedthe EVAL context due to the failure, but S_docatch expected the +# popped the EVAL context due to the failure, but S_docatch expected the # context to still be there. { @@ -567,3 +567,22 @@ for my $k (!0) { is "a" =~ /a/, "1", "string eval leaves readonly lexicals readonly [perl #19135]"; } + +# [perl #68750] +fresh_perl_is(<<'EOP', "ok\nok\nok\n", undef, 'eval clears %^H'); + BEGIN { + require re; re->import('/x'); # should only affect surrounding scope + eval ' + print "a b" =~ /a b/ ? "ok\n" : "nokay\n"; + use re "/m"; + print "a b" =~ /a b/ ? "ok\n" : "nokay\n"; + '; + } + print "ab" =~ /a b/ ? "ok\n" : "nokay\n"; +EOP + +# [perl #70151] +{ + BEGIN { eval 'require re; import re "/x"' } + ok "ab" =~ /a b/, 'eval does not localise %^H at run time'; +} diff --git a/t/op/evalbytes.t b/t/op/evalbytes.t new file mode 100644 index 0000000000..4a60614814 --- /dev/null +++ b/t/op/evalbytes.t @@ -0,0 +1,34 @@ +#!./perl + +BEGIN { + chdir 't'; + @INC = '../lib'; + require './test.pl'; +} + +plan(tests => 8); + +{ + local $SIG{__WARN__} = sub {}; + eval "evalbytes 'foo'"; + like $@, qr/syntax error/, 'evalbytes outside feature scope'; +} + +# We enable unicode_eval just to test that it does not interfere. +use feature 'evalbytes', 'unicode_eval'; + +is evalbytes("1+7"), 8, 'evalbytes basic sanity check'; + +my $code = 'qq(\xff\xfe)'; +is evalbytes($code), "\xff\xfe", 'evalbytes on extra-ASCII bytes'; +chop((my $upcode = $code) .= chr 256); +is evalbytes($upcode), "\xff\xfe", 'evalbytes on upgraded extra-ASCII'; +{ + use utf8; + is evalbytes($code), "\xff\xfe", 'evalbytes ignores outer utf8 pragma'; +} +is evalbytes "use utf8; '\xc4\x80'", chr 256, 'use utf8 within evalbytes'; +chop($upcode = "use utf8; '\xc4\x80'" . chr 256); +is evalbytes $upcode, chr 256, 'use utf8 within evalbytes on utf8 string'; +eval { evalbytes chr 256 }; +like $@, qr/Wide character/, 'evalbytes croaks on non-bytes'; @@ -12,7 +12,7 @@ BEGIN { use warnings; -plan( tests => 238 ); +plan( tests => 239 ); # type coercion on assignment $foo = 'foo'; @@ -595,25 +595,27 @@ foreach my $type (qw(integer number string)) { "with the correct error message"); } -# RT #60954 anonymous glob should be defined, and not coredump when +# RT #65582 anonymous glob should be defined, and not coredump when # stringified. The behaviours are: # -# defined($glob) "$glob" -# 5.8.8 false "" with uninit warning -# 5.10.0 true (coredump) -# 5.12.0 true "" +# defined($glob) "$glob" $glob .= ... +# 5.8.8 false "" with uninit warning "" with uninit warning +# 5.10.0 true (coredump) (coredump) +# 5.1[24] true "" "" with uninit warning +# 5.16 true "*__ANON__::..." "*__ANON__::..." { my $io_ref = *STDOUT{IO}; my $glob = *$io_ref; - ok(defined $glob, "RT #60954 anon glob should be defined"); + ok(defined $glob, "RT #65582 anon glob should be defined"); my $warn = ''; local $SIG{__WARN__} = sub { $warn = $_[0] }; use warnings; my $str = "$glob"; - is($warn, '', "RT #60954 anon glob stringification shouldn't warn"); - is($str, '', "RT #60954 anon glob stringification should be empty"); + is($warn, '', "RT #65582 anon glob stringification shouldn't warn"); + is($str, '*__ANON__::$__ANONIO__', + "RT #65582/#96326 anon glob stringification"); } # [perl #71254] - Assigning a glob to a variable that has a current @@ -854,13 +856,13 @@ ok eval { my $glob = do { no warnings "once"; \*phing::foo}; delete $::{"phing::"}; *$glob = *greck; -}, "Assigning a glob-with-sub to a glob that has lost its stash warks"; +}, "Assigning a glob-with-sub to a glob that has lost its stash works"; ok eval { sub pon::foo; my $glob = \*pon::foo; delete $::{"pon::"}; *$glob = *foo; -}, "Assigning a glob to a glob-with-sub that has lost its stash warks"; +}, "Assigning a glob to a glob-with-sub that has lost its stash works"; { package Tie::Alias; @@ -916,6 +918,18 @@ package HTTP::MobileAttribute::Plugin::Locator { "stash elem for slot is not freed prematurely"; } +# Check that constants promoted to CVs point to the right GVs when the name +# contains a null. +package lrcg { + use constant x => 3; + # These two lines abuse the optimisation that copies the scalar ref from + # one stash element to another, to get a constant with a null in its name + *{"yz\0a"} = \&{"x"}; + my $ref = \&{"yz\0a"}; + ::ok !exists $lrcg::{yz}, + 'constants w/nulls in their names point 2 the right GVs when promoted'; +} + __END__ Perl Rules diff --git a/t/op/hash.t b/t/op/hash.t index b8aeaa7d2e..29cca3921e 100644 --- a/t/op/hash.t +++ b/t/op/hash.t @@ -8,7 +8,7 @@ BEGIN { use strict; -plan tests => 10; +plan tests => 11; my %h; @@ -168,3 +168,16 @@ is($destroyed, 1, 'Timely hash destruction with lvalue keys'); delete $h{k}; # must be in void context to trigger the bug ok $normal_exit, 'freed hash elems are not visible to DESTROY'; } + +# Weak references to pad hashes +SKIP: { + skip_if_miniperl("No Scalar::Util::weaken under miniperl", 1); + my $ref; + require Scalar::Util; + { + my %hash; + Scalar::Util::weaken($ref = \%hash); + 1; # the previous statement must not be the last + } + is $ref, undef, 'weak refs to pad hashes go stale on scope exit'; +} diff --git a/t/op/override.t b/t/op/override.t index 413ba77e84..ab2cbf1515 100644 --- a/t/op/override.t +++ b/t/op/override.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 26; +plan tests => 28; # # This file tries to test builtin override using CORE::GLOBAL @@ -52,6 +52,18 @@ is( $r, join($dirsep, "Foo", "Bar.pm") ); eval "use 5.006"; is( $r, "5.006" ); +{ + local $_ = 'foo.pm'; + require; + is( $r, 'foo.pm' ); +} + +{ + my $_ = 'bar.pm'; + require; + is( $r, 'bar.pm' ); +} + # localizing *CORE::GLOBAL::foo should revert to finding CORE::foo { local(*CORE::GLOBAL::require); @@ -11,9 +11,9 @@ sub r { } my $a = r(); -isa_ok($a, 'Regexp'); +object_ok($a, 'Regexp'); my $b = r(); -isa_ok($b, 'Regexp'); +object_ok($b, 'Regexp'); my $b1 = $b; @@ -21,9 +21,9 @@ isnt($a + 0, $b + 0, 'Not the same object'); bless $b, 'Pie'; -isa_ok($b, 'Pie'); -isa_ok($a, 'Regexp'); -isa_ok($b1, 'Pie'); +object_ok($b, 'Pie'); +object_ok($a, 'Regexp'); +object_ok($b1, 'Pie'); my $c = r(); like("$c", qr/Good/); @@ -43,16 +43,16 @@ is($$d1, 'Bad'); # Assignment to an implicitly blessed Regexp object retains the class # (No different from direct value assignment to any other blessed SV -isa_ok($d, 'Regexp'); +object_ok($d, 'Regexp'); like("$d", qr/\ARegexp=SCALAR\(0x[0-9a-f]+\)\z/); # As does an explicitly blessed Regexp object. my $e = bless qr/Faux Pie/, 'Stew'; -isa_ok($e, 'Stew'); +object_ok($e, 'Stew'); $$e = 'Fake!'; is($$e, 'Fake!'); -isa_ok($e, 'Stew'); +object_ok($e, 'Stew'); like("$e", qr/\Stew=SCALAR\(0x[0-9a-f]+\)\z/); diff --git a/t/op/stash.t b/t/op/stash.t index e7d66098ba..9e223eb9ac 100644 --- a/t/op/stash.t +++ b/t/op/stash.t @@ -92,7 +92,7 @@ SKIP: { delete $one::{one}; my $gv = b($sub)->GV; - isa_ok( $gv, "B::GV", "deleted stash entry leaves CV with valid GV"); + object_ok( $gv, "B::GV", "deleted stash entry leaves CV with valid GV"); is( b($sub)->CvFLAGS & $CVf_ANON, $CVf_ANON, "...and CVf_ANON set"); is( eval { $gv->NAME }, "__ANON__", "...and an __ANON__ name"); is( eval { $gv->STASH->NAME }, "one", "...but leaves stash intact"); @@ -104,7 +104,7 @@ SKIP: { %two:: = (); $gv = b($sub)->GV; - isa_ok( $gv, "B::GV", "cleared stash leaves CV with valid GV"); + object_ok( $gv, "B::GV", "cleared stash leaves CV with valid GV"); is( b($sub)->CvFLAGS & $CVf_ANON, $CVf_ANON, "...and CVf_ANON set"); is( eval { $gv->NAME }, "__ANON__", "...and an __ANON__ name"); is( eval { $gv->STASH->NAME }, "two", "...but leaves stash intact"); @@ -116,7 +116,7 @@ SKIP: { undef %three::; $gv = b($sub)->GV; - isa_ok( $gv, "B::GV", "undefed stash leaves CV with valid GV"); + object_ok( $gv, "B::GV", "undefed stash leaves CV with valid GV"); is( b($sub)->CvFLAGS & $CVf_ANON, $CVf_ANON, "...and CVf_ANON set"); is( eval { $gv->NAME }, "__ANON__", "...and an __ANON__ name"); is( eval { $gv->STASH->NAME }, "__ANON__", "...and an __ANON__ stash"); diff --git a/t/op/taint.t b/t/op/taint.t index ba32722250..39a2925b5a 100644 --- a/t/op/taint.t +++ b/t/op/taint.t @@ -17,7 +17,7 @@ BEGIN { use strict; use Config; -plan tests => 786; +plan tests => 787; $| = 1; @@ -2169,6 +2169,12 @@ end ok "M$TAINT" ~~ ['m', 'M'], '$tainted ~~ ["whatever", "match"]'; ok !("M$TAINT" ~~ ['m', undef]), '$tainted ~~ ["whatever", undef]'; +# Tainted values and ref() +for(1,2) { + my $x = bless \"M$TAINT", ref(bless[], "main"); +} +pass("no death when TARG of ref is tainted"); + # This may bomb out with the alarm signal so keep it last SKIP: { diff --git a/t/op/tie.t b/t/op/tie.t index 3d4eb20394..b6567fc282 100644 --- a/t/op/tie.t +++ b/t/op/tie.t @@ -1110,3 +1110,73 @@ $x = *bar; print &{\&$x}, "\n"; EXPECT 73 +######## + +# Lexicals should not be visible to magic methods on scope exit +BEGIN { unless (defined &DynaLoader::boot_DynaLoader) { + print "HASH\nHASH\nARRAY\nARRAY\n"; exit; +}} +use Scalar::Util 'weaken'; +{ package xoufghd; + sub TIEHASH { Scalar::Util::weaken($_[1]); bless \$_[1], xoufghd:: } + *TIEARRAY = *TIEHASH; + DESTROY { + bless ${$_[0]} || return, 0; +} } +for my $sub ( + # hashes: ties before backrefs + sub { + my %hash; + $ref = ref \%hash; + tie %hash, xoufghd::, \%hash; + 1; + }, + # hashes: backrefs before ties + sub { + my %hash; + $ref = ref \%hash; + weaken(my $x = \%hash); + tie %hash, xoufghd::, \%hash; + 1; + }, + # arrays: ties before backrefs + sub { + my @array; + $ref = ref \@array; + tie @array, xoufghd::, \@array; + 1; + }, + # arrays: backrefs before ties + sub { + my @array; + $ref = ref \@array; + weaken(my $x = \@array); + tie @array, xoufghd::, \@array; + 1; + }, +) { + &$sub; + &$sub; + print $ref, "\n"; +} +EXPECT +HASH +HASH +ARRAY +ARRAY +######## + +# Localising a tied variable with a typeglob in it should copy magic +sub TIESCALAR{bless[]} +sub FETCH{warn "fetching\n"; *foo} +sub STORE{} +tie $x, ""; +local $x; +warn "before"; +"$x"; +warn "after"; +EXPECT +fetching +before at - line 8. +fetching +after at - line 10. diff --git a/t/op/tie_fetch_count.t b/t/op/tie_fetch_count.t index 85d88d63b9..e4fe5f4f7f 100644 --- a/t/op/tie_fetch_count.t +++ b/t/op/tie_fetch_count.t @@ -7,7 +7,7 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; require './test.pl'; - plan (tests => 287); + plan (tests => 291); } use strict; @@ -210,14 +210,16 @@ $var8->bolgy ; check_count '->method'; # Functions that operate on filenames or filehandles for ([chdir=>''],[chmod=>'0,'],[chown=>'0,0,'],[utime=>'0,0,'], - [truncate=>'',',0'],[stat=>''],[lstat=>'']) { + [truncate=>'',',0'],[stat=>''],[lstat=>''],[open=>'my $fh,"<&",'], + ['()=sort'=>'',' 1,2,3']) { my($op,$args,$postargs) = @$_; $postargs //= ''; # This line makes $var8 hold a glob: $var8 = *dummy; $dummy = $var8; $count = 0; eval "$op $args \$var8 $postargs"; check_count "$op $args\$tied_glob$postargs"; $var8 = *dummy; $dummy = $var8; $count = 0; - eval "$op $args \\\$var8 $postargs"; + my $ref = \$var8; + eval "$op $args \$ref $postargs"; check_count "$op $args\\\$tied_glob$postargs"; } diff --git a/t/porting/diag.t b/t/porting/diag.t index 331b356828..50aa996f3d 100644 --- a/t/porting/diag.t +++ b/t/porting/diag.t @@ -2,6 +2,7 @@ use warnings; use strict; +chdir 't'; require './test.pl'; plan('no_plan'); diff --git a/t/porting/dual-life.t b/t/porting/dual-life.t index b6f66942db..45747212ea 100644 --- a/t/porting/dual-life.t +++ b/t/porting/dual-life.t @@ -6,6 +6,7 @@ use strict; # # * Are all dual-life programs being generated in utils/? +chdir 't'; require './test.pl'; plan('no_plan'); @@ -19,7 +20,7 @@ use File::Spec::Functions; my $not_installed = qr{^(?: \.\./cpan/Encode/bin/u(?:cm(?:2table|lint|sort)|nidump) | - \.\./cpan/Module-Build/MB-[\w\d]+/Simple/bin/foo(?:\.PL)? + \.\./cpan/Module-Build/MB-[\w\d]+/Simple/(?:test_install/)?bin/.* )\z}ix; my %dist_dir_exe; diff --git a/t/porting/exec-bit.t b/t/porting/exec-bit.t index 02506c7c36..718c81dabc 100644 --- a/t/porting/exec-bit.t +++ b/t/porting/exec-bit.t @@ -15,6 +15,10 @@ if ( $^O eq "VMS" ) { skip_all( "Filename case may not be preserved and other porting issues." ); } +if ( $^O eq "vos" ) { + skip_all( "VOS combines the read and execute permission bits." ); +} + plan('no_plan'); use ExtUtils::Manifest qw(maniread); diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat index 2ba19a1c49..8d4bc5920c 100644 --- a/t/porting/known_pod_issues.dat +++ b/t/porting/known_pod_issues.dat @@ -1,4 +1,4 @@ -# This file is the data file for porting/podcheck.t. +# This file is the data file for t/porting/podcheck.t. # There are three types of lines. # Comment lines are white-space only or begin with a '#', like this one. Any # changes you make to the comment lines will be lost when the file is @@ -100,6 +100,7 @@ pwd_mkdb(8) recvmsg(3) s2p Scalar::Readonly +Semi::Semicolons sendmail(1) sendmsg(3) sha1sum(1) diff --git a/t/porting/buildtoc.t b/t/porting/pod_rules.t index 4fbcac92cb..ee53667b3e 100644 --- a/t/porting/buildtoc.t +++ b/t/porting/pod_rules.t @@ -9,7 +9,7 @@ use strict; require 't/test.pl'; my $result = runperl(switches => ['-f', '-Ilib'], - progfile => 'pod/buildtoc', - args => ['--build-toc', '-q', '--test', '--build-all']); + progfile => 'Porting/pod_rules.pl', + args => ['--tap']); print $result; diff --git a/t/porting/podcheck.t b/t/porting/podcheck.t index dbf6fb9d31..46b6eac97a 100644 --- a/t/porting/podcheck.t +++ b/t/porting/podcheck.t @@ -1680,7 +1680,9 @@ if (! $has_input_files) { } # If regenerating the data file, start with the modules for which we don't -# check targets +# check targets. If you change the sort order, you need to run --regen before +# committing so that future commits that do run regen don't show irrelevant +# changes. if ($regen) { foreach (sort { lc $a cmp lc $b } keys %valid_modules) { my_safer_print($copy_fh, $_, "\n"); diff --git a/t/re/pat.t b/t/re/pat.t index ed87f07701..54d44ac046 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -21,7 +21,7 @@ BEGIN { require './test.pl'; } -plan tests => 463; # Update this when adding/deleting tests. +plan tests => 464; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1208,6 +1208,13 @@ EOP sprintf("'U+%04X not legal IDFirst'", ord($char))); } } + + { # [perl #101710] + my $pat = "b"; + utf8::upgrade($pat); + like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string"); + } + } # End of sub run_tests 1; diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 2a510d13aa..4d88190a5e 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -1521,11 +1521,14 @@ sub run_tests { my $ary = shift @$t; foreach my $pat (@$t) { foreach my $str (@$ary) { - ok $str =~ /($pat)/, $pat; - is($1, $str, $pat); + my $temp_str = $str; + $temp_str = display($temp_str); + ok $str =~ /($pat)/, $temp_str . " =~ /($pat)"; + my $temp_1 = $1; + is($1, $str, "\$1='" . display($temp_1) . "' eq '" . $temp_str . "' after ($pat)"); utf8::upgrade ($str); - ok $str =~ /($pat)/, "Upgraded string - $pat"; - is($1, $str, "Upgraded string - $pat"); + ok $str =~ /($pat)/, "Upgraded " . $temp_str . " =~ /($pat)/"; + is($1, $str, "\$1='" . display($temp_1) . "' eq '" . $temp_str . "'(upgraded) after ($pat)"); } } } @@ -2054,7 +2057,7 @@ EOP (?<=[=&]) (?=.) )}iox'; is($@, '', $message); - isa_ok($r, 'Regexp', $message); + object_ok($r, 'Regexp', $message); } # RT #82610 @@ -2112,6 +2115,37 @@ EOP unlike("s\N{U+DF}", qr/^\x{00DF}/i, "\"s\\N{U+DF}\", qr/^\\x{00DF}/i"); } + # User-defined Unicode properties to match above-Unicode code points + sub Is_32_Bit_Super { return "110000\tFFFFFFFF\n" } + sub Is_Portable_Super { return '!utf8::Any' } # Matches beyond 32 bits + + { # Assertion was failing on on 64-bit platforms; just didn't work on 32. + no warnings qw(non_unicode portable); + use Config; + + # We use 'ok' instead of 'like' because the warnings are lexically + # scoped, and want to turn them off, so have to do the match in this + # scope + if ($Config{uvsize} < 8) { + ok(chr(0xFFFF_FFFE) =~ /\p{Is_32_Bit_Super}/, + "chr(0xFFFF_FFFE) can match a Unicode property"); + ok(chr(0xFFFF_FFFF) =~ /\p{Is_32_Bit_Super}/, + "chr(0xFFFF_FFFF) can match a Unicode property"); + } + else { + no warnings 'overflow'; + ok(chr(0xFFFF_FFFF_FFFF_FFFE) =~ qr/\p{Is_Portable_Super}/, + "chr(0xFFFF_FFFF_FFFF_FFFE) can match a Unicode property"); + ok(chr(0xFFFF_FFFF_FFFF_FFFF) =~ qr/^\p{Is_Portable_Super}$/, + "chr(0xFFFF_FFFF_FFFF_FFFF) can match a Unicode property"); + + # This test is because something was declared as 32 bits, but + # should have been cast to 64; only a problem where + # sizeof(STRLEN) != sizeof(UV) + ok(chr(0xFFFF_FFFF_FFFF_FFFE) !~ qr/\p{Is_32_Bit_Super}/, "chr(0xFFFF_FFFF_FFFF_FFFE) shouldn't match a range ending in 0xFFFF_FFFF"); + } + } + # !!! NOTE that tests that aren't at all likely to crash perl should go # a ways above, above these last ones. diff --git a/t/run/fresh_perl.t b/t/run/fresh_perl.t index 0aaf28a800..9c76a64f46 100644 --- a/t/run/fresh_perl.t +++ b/t/run/fresh_perl.t @@ -763,18 +763,25 @@ foo at - line 1. -lw # Make sure the presence of the CORE::GLOBAL::glob typeglob does not affect # whether File::Glob::csh_glob is called. -++$INC{"File/Glob.pm"}; # prevent it from loading -my $called1 = -my $called2 = 0; -*File::Glob::csh_glob = sub { ++$called1 }; -my $output1 = eval q{ glob(q(./"TEST")) }; -undef *CORE::GLOBAL::glob; # but leave the typeglob itself there -++$CORE::GLOBAL::glob if 0; # "used only once" -undef *File::Glob::csh_glob; # avoid redefinition warnings -*File::Glob::csh_glob = sub { ++$called2 }; -my $output2 = eval q{ glob(q(./"TEST")) }; -print "ok1" if $called1 eq $called2; -print "ok2" if $output1 eq $output2; +if ($^O eq 'VMS') { + # A pattern with a double quote in it is a syntax error to LIB$FIND_FILE + # Should we strip quotes in Perl_vms_start_glob the way csh_glob() does? + print "ok1\nok2\n"; +} +else { + ++$INC{"File/Glob.pm"}; # prevent it from loading + my $called1 = + my $called2 = 0; + *File::Glob::csh_glob = sub { ++$called1 }; + my $output1 = eval q{ glob(q(./"TEST")) }; + undef *CORE::GLOBAL::glob; # but leave the typeglob itself there + ++$CORE::GLOBAL::glob if 0; # "used only once" + undef *File::Glob::csh_glob; # avoid redefinition warnings + *File::Glob::csh_glob = sub { ++$called2 }; + my $output2 = eval q{ glob(q(./"TEST")) }; + print "ok1" if $called1 eq $called2; + print "ok2" if $output1 eq $output2; +} EXPECT ok1 ok2 diff --git a/t/run/switchd.t b/t/run/switchd.t index 3ea468156d..eadcd94053 100644 --- a/t/run/switchd.t +++ b/t/run/switchd.t @@ -9,7 +9,7 @@ BEGIN { require "./test.pl"; } # This test depends on t/lib/Devel/switchd*.pm. -plan(tests => 5); +plan(tests => 7); my $r; @@ -78,3 +78,35 @@ like( qr "1\r?\n2\r?\n", 'Subroutine redefinition works in the debugger [perl #48332]', ); + +# [rt.cpan.org #69862] +like( + runperl( + switches => [ '-Ilib', '-d:switchd_empty' ], + progs => [ + 'sub DB::sub { goto &$DB::sub }', + 'sub foo { print qq _1\n_ }', + 'sub bar { print qq _2\n_ }', + 'delete $::{foo}; eval { foo() };', + 'my $bar = *bar; undef *bar; eval { &$bar };', + ], + ), + qr "1\r?\n2\r?\n", + 'Subroutines no longer found under their names can be called', +); + +# [rt.cpan.org #69862] +like( + runperl( + switches => [ '-Ilib', '-d:switchd_empty' ], + progs => [ + 'sub DB::sub { goto &$DB::sub }', + 'sub foo { goto &bar::baz; }', + 'sub bar::baz { print qq _ok\n_ }', + 'delete $::{bar::::};', + 'foo();', + ], + ), + qr "ok\r?\n", + 'No crash when calling orphaned subroutine via goto &', +); @@ -1064,7 +1064,7 @@ sub can_ok ($@) { } -# Call $class->new( @$args ); and run the result through isa_ok. +# Call $class->new( @$args ); and run the result through object_ok. # See Test::More::new_ok sub new_ok { my($class, $args, $obj_name) = @_; @@ -1078,7 +1078,7 @@ sub new_ok { my $error = $@; if($ok) { - isa_ok($obj, $class, $object_name); + object_ok($obj, $class, $object_name); } else { ok( 0, "new() died" ); @@ -1099,20 +1099,29 @@ sub isa_ok ($$;$) { if( !defined $object ) { $diag = "$obj_name isn't defined"; } - elsif( !ref $object ) { - $diag = "$obj_name isn't a reference"; - } else { + my $whatami = ref $object ? 'object' : 'class'; + # We can't use UNIVERSAL::isa because we want to honor isa() overrides local($@, $!); # eval sometimes resets $! my $rslt = eval { $object->isa($class) }; - if( $@ ) { - if( $@ =~ /^Can't call method "isa" on unblessed reference/ ) { + my $error = $@; # in case something else blows away $@ + + if( $error ) { + if( $error =~ /^Can't call method "isa" on unblessed reference/ ) { + # It's an unblessed reference + $obj_name = 'The reference' unless defined $obj_name; if( !UNIVERSAL::isa($object, $class) ) { my $ref = ref $object; $diag = "$obj_name isn't a '$class' it's a '$ref'"; } - } else { + } + elsif( $error =~ /Can't call method "isa" without a package/ ) { + # It's something that can't even be a class + $obj_name = 'The thing' unless defined $obj_name; + $diag = "$obj_name isn't a class or reference"; + } + else { die <<WHOA; WHOA! I tried to call ->isa on your object and got some weird error. This should never happen. Please contact the author immediately. @@ -1122,6 +1131,7 @@ WHOA } } elsif( !$rslt ) { + $obj_name = "The $whatami" unless defined $obj_name; my $ref = ref $object; $diag = "$obj_name isn't a '$class' it's a '$ref'"; } @@ -1130,6 +1140,34 @@ WHOA _ok( !$diag, _where(), $name ); } + +sub class_ok { + my($class, $isa, $class_name) = @_; + + # Written so as to count as one test + local $Level = $Level + 1; + if( ref $class ) { + ok( 0, "$class is a refrence, not a class name" ); + } + else { + isa_ok($class, $isa, $class_name); + } +} + + +sub object_ok { + my($obj, $isa, $obj_name) = @_; + + local $Level = $Level + 1; + if( !ref $obj ) { + ok( 0, "$obj is not a reference" ); + } + else { + isa_ok($obj, $isa, $obj_name); + } +} + + # Purposefully avoiding a closure. sub __capture { push @::__capture, join "", @_; diff --git a/t/test_pl/can_isa_ok.t b/t/test_pl/can_isa_ok.t new file mode 100644 index 0000000000..081d3e563b --- /dev/null +++ b/t/test_pl/can_isa_ok.t @@ -0,0 +1,63 @@ +#!/usr/bin/env perl -w + +# Test isa_ok() and can_ok() in test.pl + +use strict; +use warnings; + +BEGIN { require "test.pl"; } + +require Test::More; + +can_ok('Test::More', qw(require_ok use_ok ok is isnt like skip can_ok + pass fail eq_array eq_hash eq_set)); +can_ok(bless({}, "Test::More"), qw(require_ok use_ok ok is isnt like skip + can_ok pass fail eq_array eq_hash eq_set)); + + +isa_ok(bless([], "Foo"), "Foo"); +isa_ok([], 'ARRAY'); +isa_ok(\42, 'SCALAR'); +{ + local %Bar::; + local @Foo::ISA = 'Bar'; + isa_ok( "Foo", "Bar" ); +} + + +# can_ok() & isa_ok should call can() & isa() on the given object, not +# just class, in case of custom can() +{ + local *Foo::can; + local *Foo::isa; + *Foo::can = sub { $_[0]->[0] }; + *Foo::isa = sub { $_[0]->[0] }; + my $foo = bless([0], 'Foo'); + ok( ! $foo->can('bar') ); + ok( ! $foo->isa('bar') ); + $foo->[0] = 1; + can_ok( $foo, 'blah'); + isa_ok( $foo, 'blah'); +} + + +note "object/class_ok"; { + { + package Child; + our @ISA = qw(Parent); + } + + { + package Parent; + sub new { bless {}, shift } + } + + # Unfortunately we can't usefully test the failure case without + # significantly modifying test.pl + class_ok "Child", "Parent"; + class_ok "Parent", "Parent"; + object_ok( Parent->new, "Parent" ); + object_ok( Child->new, "Parent" ); +} + +done_testing; diff --git a/t/uni/case.pl b/t/uni/case.pl index 8a2f752459..8bd115b479 100644 --- a/t/uni/case.pl +++ b/t/uni/case.pl @@ -25,7 +25,12 @@ sub casetest { my %simple; for my $i (split(/\n/, $simple)) { my ($k, $v) = split(' ', $i); - $simple{$k} = $v; + + # Add the simple mapping to the simples test list, except the input + # may include code points that the specials override, so don't add + # those to the test list. The specials keys are the code points, + # encoded in utf8,, but without the utf8 flag on, so pack with C0. + $simple{$k} = $v unless exists $spec->{pack("C0U", hex $k)}; } my %seen; diff --git a/t/uni/eval.t b/t/uni/eval.t new file mode 100644 index 0000000000..f08c706cc7 --- /dev/null +++ b/t/uni/eval.t @@ -0,0 +1,42 @@ +#!./perl + +# Check if eval correctly ignores the UTF-8 hint. + +BEGIN { + require './test.pl'; +} + +plan (tests => 5); + +use open qw( :utf8 :std ); +use feature 'unicode_eval'; + +{ + my $w; + $SIG{__WARN__} = sub { $w = shift }; + use utf8; + my $prog = "qq!\x{f9}!"; + + eval $prog; + ok !$w; + + $w = ""; + utf8::upgrade($prog); + eval $prog; + is $w, ''; +} + +{ + use utf8; + isnt eval "q!\360\237\220\252!", eval "q!\x{1f42a}!"; +} + +{ + no utf8; #Let's make real sure. + my $not_utf8 = "q!\343\203\213!"; + isnt eval $not_utf8, eval "q!\x{30cb}!"; + { + use utf8; + isnt eval $not_utf8, eval "q!\x{30cb}!"; + } +} diff --git a/t/uni/stash.t b/t/uni/stash.t index f6e8c4244c..168b93c874 100644 --- a/t/uni/stash.t +++ b/t/uni/stash.t @@ -84,7 +84,7 @@ plan( tests => 58 ); delete $온ꪵ::{온ꪵ}; my $gv = b($sub)->GV; - isa_ok( $gv, "B::GV", "deleted stash entry leaves CV with valid GV"); + object_ok( $gv, "B::GV", "deleted stash entry leaves CV with valid GV"); is( b($sub)->CvFLAGS & $CVf_ANON, $CVf_ANON, "...and CVf_ANON set"); is( eval { $gv->NAME }, "__ANON__", "...and an __ANON__ name"); is( eval { $gv->STASH->NAME }, "온ꪵ", "...but leaves stash intact"); @@ -96,7 +96,7 @@ plan( tests => 58 ); %tê–¿:: = (); $gv = b($sub)->GV; - isa_ok( $gv, "B::GV", "cleared stash leaves CV with valid GV"); + object_ok( $gv, "B::GV", "cleared stash leaves CV with valid GV"); is( b($sub)->CvFLAGS & $CVf_ANON, $CVf_ANON, "...and CVf_ANON set"); is( eval { $gv->NAME }, "__ANON__", "...and an __ANON__ name"); is( eval { $gv->STASH->NAME }, "tê–¿", "...but leaves stash intact"); @@ -108,7 +108,7 @@ plan( tests => 58 ); undef %á–Ÿë ˆï¿‡::; $gv = b($sub)->GV; - isa_ok( $gv, "B::GV", "undefed stash leaves CV with valid GV"); + object_ok( $gv, "B::GV", "undefed stash leaves CV with valid GV"); is( b($sub)->CvFLAGS & $CVf_ANON, $CVf_ANON, "...and CVf_ANON set"); is( eval { $gv->NAME }, "__ANON__", "...and an __ANON__ name"); is( eval { $gv->STASH->NAME }, "__ANON__", "...and an __ANON__ stash"); @@ -133,7 +133,7 @@ static const char ident_too_long[] = "Identifier too long"; #ifdef USE_UTF8_SCRIPTS # define UTF (!IN_BYTES) #else -# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) +# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) #endif /* The maximum number of characters preceding the unrecognized one to display */ @@ -669,9 +669,8 @@ from which code will be read to be parsed. If both are non-null, the code in I<line> comes first and must consist of complete lines of input, and I<rsfp> supplies the remainder of the source. -The I<flags> parameter is reserved for future use, and must always -be zero, except for one flag that is currently reserved for perl's internal -use. +The I<flags> parameter is reserved for future use. Currently it is only +used by perl internally, so extensions should always pass zero. =cut */ @@ -684,9 +683,8 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) { dVAR; const char *s = NULL; - STRLEN len; yy_parser *parser, *oparser; - if (flags && flags != LEX_START_SAME_FILTER) + if (flags && flags & ~LEX_START_FLAGS) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); /* create and initialise a parser */ @@ -717,25 +715,27 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->rsfp = rsfp; parser->rsfp_filters = !(flags & LEX_START_SAME_FILTER) || !oparser - ? newAV() - : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters)); + ? NULL + : MUTABLE_AV(SvREFCNT_inc( + oparser->rsfp_filters + ? oparser->rsfp_filters + : (oparser->rsfp_filters = newAV()) + )); Newx(parser->lex_brackstack, 120, char); Newx(parser->lex_casestack, 12, char); *parser->lex_casestack = '\0'; if (line) { + STRLEN len; s = SvPV_const(line, len); + parser->linestr = flags & LEX_START_COPIED + ? SvREFCNT_inc_simple_NN(line) + : newSVpvn_flags(s, len, SvUTF8(line)); + if (!len || s[len-1] != ';') + sv_catpvs(parser->linestr, "\n;"); } else { - len = 0; - } - - if (!len) { parser->linestr = newSVpvs("\n;"); - } else { - parser->linestr = newSVpvn_flags(s, len, SvUTF8(line)); - if (s[len-1] != ';') - sv_catpvs(parser->linestr, "\n;"); } parser->oldoldbufptr = parser->oldbufptr = @@ -743,8 +743,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->linestart = SvPVX(parser->linestr); parser->bufend = parser->bufptr + SvCUR(parser->linestr); parser->last_lop = parser->last_uni = NULL; + parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES); - parser->in_pod = 0; + parser->in_pod = parser->filtered = 0; } @@ -1262,7 +1263,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags) } if (flags & LEX_FAKE_EOF) { goto eof; - } else if (!PL_parser->rsfp) { + } else if (!PL_parser->rsfp && !PL_parser->filtered) { got_some = 0; } else if (filter_gets(linestr, old_bufend_pos)) { got_some = 1; @@ -1279,7 +1280,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags) else if (PL_parser->rsfp) (void)PerlIO_close(PL_parser->rsfp); PL_parser->rsfp = NULL; - PL_parser->in_pod = 0; + PL_parser->in_pod = PL_parser->filtered = 0; #ifdef PERL_MAD if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n)) PL_faketokens = 1; @@ -1584,7 +1585,7 @@ S_incline(pTHX_ const char *s) tmplen = 0; } - if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) { + if (!PL_rsfp && !PL_parser->filtered) { /* must copy *{"::_<(eval N)[oldfilename:L]"} * to *{"::_<newfilename"} */ /* However, the long form of evals is only turned on by the @@ -3838,6 +3839,9 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) if (!PL_parser) return NULL; + if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) + Perl_croak(aTHX_ "Source filters apply only to byte streams"); + if (!PL_rsfp_filters) PL_rsfp_filters = newAV(); if (!datasv) @@ -3850,6 +3854,45 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) SvPV_nolen(datasv))); av_unshift(PL_rsfp_filters, 1); av_store(PL_rsfp_filters, 0, datasv) ; + if ( + !PL_parser->filtered + && PL_parser->lex_flags & LEX_EVALBYTES + && PL_bufptr < PL_bufend + ) { + const char *s = PL_bufptr; + while (s < PL_bufend) { + if (*s == '\n') { + SV *linestr = PL_parser->linestr; + char *buf = SvPVX(linestr); + STRLEN const bufptr_pos = PL_parser->bufptr - buf; + STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; + STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; + STRLEN const linestart_pos = PL_parser->linestart - buf; + STRLEN const last_uni_pos = + PL_parser->last_uni ? PL_parser->last_uni - buf : 0; + STRLEN const last_lop_pos = + PL_parser->last_lop ? PL_parser->last_lop - buf : 0; + av_push(PL_rsfp_filters, linestr); + PL_parser->linestr = + newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); + buf = SvPVX(PL_parser->linestr); + PL_parser->bufend = buf + SvCUR(PL_parser->linestr); + PL_parser->bufptr = buf + bufptr_pos; + PL_parser->oldbufptr = buf + oldbufptr_pos; + PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; + PL_parser->linestart = buf + linestart_pos; + if (PL_parser->last_uni) + PL_parser->last_uni = buf + last_uni_pos; + if (PL_parser->last_lop) + PL_parser->last_lop = buf + last_lop_pos; + SvLEN(linestr) = SvCUR(linestr); + SvCUR(linestr) = s-SvPVX(linestr); + PL_parser->filtered = 1; + break; + } + s++; + } + } return(datasv); } @@ -3892,7 +3935,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) /* This API is bad. It should have been using unsigned int for maxlen. Not sure if we want to change the API, but if not we should sanity check the value here. */ - const unsigned int correct_length + unsigned int correct_length = maxlen < 0 ? #ifdef PERL_MICRO 0x7FFFFFFF @@ -3944,6 +3987,31 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) idx)); return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ } + if (SvTYPE(datasv) != SVt_PVIO) { + if (correct_length) { + /* Want a block */ + const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); + if (!remainder) return 0; /* eof */ + if (correct_length > remainder) correct_length = remainder; + sv_catpvn(buf_sv, SvEND(datasv), correct_length); + SvCUR_set(datasv, SvCUR(datasv) + correct_length); + } else { + /* Want a line */ + const char *s = SvEND(datasv); + const char *send = SvPVX(datasv) + SvLEN(datasv); + while (s < send) { + if (*s == '\n') { + s++; + break; + } + s++; + } + if (s == send) return 0; /* eof */ + sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); + SvCUR_set(datasv, s-SvPVX(datasv)); + } + return SvCUR(buf_sv); + } /* Get function pointer hidden within datasv */ funcp = DPTR2FPTR(filter_t, IoANY(datasv)); DEBUG_P(PerlIO_printf(Perl_debug_log, @@ -4096,7 +4164,7 @@ Perl_madlex(pTHX) } /* put off final whitespace till peg */ - if (optype == ';' && !PL_rsfp) { + if (optype == ';' && !PL_rsfp && !PL_parser->filtered) { PL_nextwhite = PL_thiswhite; PL_thiswhite = 0; } @@ -4680,7 +4748,7 @@ Perl_yylex(pTHX) if (PL_madskills) PL_faketokens = 0; #endif - if (!PL_rsfp) { + if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) { PL_last_uni = 0; PL_last_lop = 0; if (PL_lex_brackets && @@ -4827,7 +4895,7 @@ Perl_yylex(pTHX) PL_parser->in_pod = 0; } } - if (PL_rsfp) + if (PL_rsfp || PL_parser->filtered) incline(s); } while (PL_parser->in_pod); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; @@ -5053,15 +5121,17 @@ Perl_yylex(pTHX) if (PL_madskills) PL_faketokens = 0; #endif - if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) { - if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) { + if (PL_lex_state != LEX_NORMAL || + (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) { + if (*s == '#' && s == PL_linestart && PL_in_eval + && !PL_rsfp && !PL_parser->filtered) { /* handle eval qq[#line 1 "foo"\n ...] */ CopLINE_dec(PL_curcop); incline(s); } if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) { s = SKIPSPACE0(s); - if (!PL_in_eval || PL_rsfp) + if (!PL_in_eval || PL_rsfp || PL_parser->filtered) incline(s); } else { @@ -5838,7 +5908,7 @@ Perl_yylex(pTHX) if (PL_expect == XSTATE && isALPHA(tmp) && (s == PL_linestart+1 || s[-2] == '\n') ) { - if (PL_in_eval && !PL_rsfp) { + if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) { d = PL_bufend; while (s < d) { if (*s++ == '\n') { @@ -7244,6 +7314,10 @@ Perl_yylex(pTHX) UNIBRACK(OP_ENTEREVAL); } + case KEY_evalbytes: + PL_expect = XTERM; + UNIBRACK(-OP_ENTEREVAL); + case KEY_eof: UNI(OP_EOF); @@ -9251,7 +9325,8 @@ S_scan_heredoc(pTHX_ register char *s) register char *d; register char *e; char *peek; - const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR)); + const int outer = (PL_rsfp || PL_parser->filtered) + && !(PL_lex_inwhat == OP_SCALAR); #ifdef PERL_MAD I32 stuffstart = s - SvPVX(PL_linestr); char *tstart; @@ -9375,7 +9450,8 @@ S_scan_heredoc(pTHX_ register char *s) PL_multi_start = CopLINE(PL_curcop); PL_multi_open = PL_multi_close = '<'; term = *PL_tokenbuf; - if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) { + if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp + && !PL_parser->filtered) { char * const bufptr = PL_sublex_info.super_bufptr; char * const bufend = PL_sublex_info.super_bufend; char * const olds = s - SvCUR(herewas); @@ -9798,7 +9874,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) char * const svlast = SvEND(sv) - 1; for (; s < ns; s++) { - if (*s == '\n' && !PL_rsfp) + if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) CopLINE_inc(PL_curcop); } if (!found) @@ -9865,7 +9941,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) if (PL_multi_open == PL_multi_close) { for (; s < PL_bufend; s++,to++) { /* embedded newlines increment the current line number */ - if (*s == '\n' && !PL_rsfp) + if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) CopLINE_inc(PL_curcop); /* handle quoted delimiters */ if (*s == '\\' && s+1 < PL_bufend && term != '\\') { @@ -9897,7 +9973,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* read until we run out of string, or we find the terminator */ for (; s < PL_bufend; s++,to++) { /* embedded newlines increment the line count */ - if (*s == '\n' && !PL_rsfp) + if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) CopLINE_inc(PL_curcop); /* backslashes can escape the open or closing characters */ if (*s == '\\' && s+1 < PL_bufend) { @@ -10457,7 +10533,7 @@ S_scan_formline(pTHX_ register char *s) break; } } - if (PL_in_eval && !PL_rsfp) { + if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) { eol = (char *) memchr(s,'\n',PL_bufend-s); if (!eol++) eol = PL_bufend; @@ -10488,7 +10564,7 @@ S_scan_formline(pTHX_ register char *s) break; } s = (char*)eol; - if (PL_rsfp) { + if (PL_rsfp || PL_parser->filtered) { bool got_some; #ifdef PERL_MAD if (PL_madskills) { diff --git a/universal.c b/universal.c index d623a67ed3..b62a92370b 100644 --- a/universal.c +++ b/universal.c @@ -154,7 +154,7 @@ Perl_sv_derived_from_pvn(pTHX_ SV *sv, const char *const name, const STRLEN len, SvGETMAGIC(sv); - if (SvROK(sv)) { /* hugdo: */ + if (SvROK(sv)) { const char *type; sv = SvRV(sv); type = sv_reftype(sv,0); @@ -924,11 +924,11 @@ XS(XS_Internals_SvREFCNT) /* This is dangerous stuff. */ sv = SvRV(svz); if (items == 1) - XSRETURN_IV(SvREFCNT(sv) - 1); /* Minus the ref created for us. */ + XSRETURN_UV(SvREFCNT(sv) - 1); /* Minus the ref created for us. */ else if (items == 2) { /* I hope you really know what you are doing. */ - SvREFCNT(sv) = SvIV(ST(1)); - XSRETURN_IV(SvREFCNT(sv)); + SvREFCNT(sv) = SvUV(ST(1)) + 1; /* we free one ref on exit */ + XSRETURN_UV(SvREFCNT(sv) - 1); } XSRETURN_UNDEF; /* Can't happen. */ } @@ -1314,10 +1314,79 @@ Perl_is_uni_xdigit(pTHX_ UV c) return is_utf8_xdigit(tmpbuf); } +UV +Perl__to_upper_title_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const char S_or_s) +{ + /* We have the latin1-range values compiled into the core, so just use + * those, converting the result to utf8. The only difference between upper + * and title case in this range is that LATIN_SMALL_LETTER_SHARP_S is + * either "SS" or "Ss". Which one to use is passed into the routine in + * 'S_or_s' to avoid a test */ + + UV converted = toUPPER_LATIN1_MOD(c); + + PERL_ARGS_ASSERT__TO_UPPER_TITLE_LATIN1; + + assert(S_or_s == 'S' || S_or_s == 's'); + + if (UNI_IS_INVARIANT(converted)) { /* No difference between the two for + characters in this range */ + *p = (U8) converted; + *lenp = 1; + return converted; + } + + /* toUPPER_LATIN1_MOD gives the correct results except for three outliers, + * which it maps to one of them, so as to only have to have one check for + * it in the main case */ + if (UNLIKELY(converted == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) { + switch (c) { + case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS: + converted = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS; + break; + case MICRO_SIGN: + converted = GREEK_CAPITAL_LETTER_MU; + break; + case LATIN_SMALL_LETTER_SHARP_S: + *(p)++ = 'S'; + *p = S_or_s; + *lenp = 2; + return 'S'; + default: + Perl_croak(aTHX_ "panic: to_upper_title_latin1 did not expect '%c' to map to '%c'", c, LATIN_SMALL_LETTER_Y_WITH_DIAERESIS); + /* NOTREACHED */ + } + } + + *(p)++ = UTF8_TWO_BYTE_HI(converted); + *p = UTF8_TWO_BYTE_LO(converted); + *lenp = 2; + + return converted; +} + +/* Call the function to convert a UTF-8 encoded character to the specified case. + * Note that there may be more than one character in the result. + * INP is a pointer to the first byte of the input character + * OUTP will be set to the first byte of the string of changed characters. It + * needs to have space for UTF8_MAXBYTES_CASE+1 bytes + * LENP will be set to the length in bytes of the string of changed characters + * + * The functions return the ordinal of the first character in the string of OUTP */ +#define CALL_UPPER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_toupper, "ToUc", "utf8::ToSpecUpper") +#define CALL_TITLE_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_totitle, "ToTc", "utf8::ToSpecTitle") +#define CALL_LOWER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tolower, "ToLc", "utf8::ToSpecLower") + +/* This additionally has the input parameter SPECIALS, which if non-zero will + * cause this to use the SPECIALS hash for folding (meaning get full case + * folding); otherwise, when zero, this implies a simple case fold */ +#define CALL_FOLD_CASE(INP, OUTP, LENP, SPECIALS) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tofold, "ToCf", (SPECIALS) ? "utf8::ToSpecFold" : NULL) UV Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp) { + dVAR; + /* Convert the Unicode character whose ordinal is c to its uppercase * version and store that in UTF-8 in p and its length in bytes in lenp. * Note that the p needs to be at least UTF8_MAXBYTES_CASE+1 bytes since @@ -1328,42 +1397,99 @@ Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp) PERL_ARGS_ASSERT_TO_UNI_UPPER; + if (c < 256) { + return _to_upper_title_latin1((U8) c, p, lenp, 'S'); + } + uvchr_to_utf8(p, c); - return to_utf8_upper(p, p, lenp); + return CALL_UPPER_CASE(p, p, lenp); } UV Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp) { + dVAR; + PERL_ARGS_ASSERT_TO_UNI_TITLE; + if (c < 256) { + return _to_upper_title_latin1((U8) c, p, lenp, 's'); + } + uvchr_to_utf8(p, c); - return to_utf8_title(p, p, lenp); + return CALL_TITLE_CASE(p, p, lenp); +} + +STATIC U8 +S_to_lower_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp) +{ + /* We have the latin1-range values compiled into the core, so just use + * those, converting the result to utf8. Since the result is always just + * one character, we allow p to be NULL */ + + U8 converted = toLOWER_LATIN1(c); + + if (p != NULL) { + if (UNI_IS_INVARIANT(converted)) { + *p = converted; + *lenp = 1; + } + else { + *p = UTF8_TWO_BYTE_HI(converted); + *(p+1) = UTF8_TWO_BYTE_LO(converted); + *lenp = 2; + } + } + return converted; } UV Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp) { + dVAR; + PERL_ARGS_ASSERT_TO_UNI_LOWER; - if (c > 255) { - uvchr_to_utf8(p, c); - return to_utf8_lower(p, p, lenp); + if (c < 256) { + return to_lower_latin1((U8) c, p, lenp); } - /* We have the latin1-range values compiled into the core, so just use - * those, converting the result to utf8 */ - c = toLOWER_LATIN1(c); - if (UNI_IS_INVARIANT(c)) { - *p = c; + uvchr_to_utf8(p, c); + return CALL_LOWER_CASE(p, p, lenp); +} + +UV +Perl__to_fold_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const U8 flags) +{ + UV converted; + + PERL_ARGS_ASSERT__TO_FOLD_LATIN1; + + if (c == MICRO_SIGN) { + converted = GREEK_SMALL_LETTER_MU; + } + else if (flags && c == LATIN_SMALL_LETTER_SHARP_S) { + *(p)++ = 's'; + *p = 's'; + *lenp = 2; + return 's'; + } + else { /* In this range the fold of all other characters is their lower + case */ + converted = toLOWER_LATIN1(c); + } + + if (UNI_IS_INVARIANT(converted)) { + *p = (U8) converted; *lenp = 1; } else { - *p = UTF8_TWO_BYTE_HI(c); - *(p+1) = UTF8_TWO_BYTE_LO(c); + *(p)++ = UTF8_TWO_BYTE_HI(converted); + *p = UTF8_TWO_BYTE_LO(converted); *lenp = 2; } - return c; + + return converted; } UV @@ -1371,8 +1497,12 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags) { PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS; + if (c < 256) { + return _to_fold_latin1((U8) c, p, lenp, flags); + } + uvchr_to_utf8(p, c); - return _to_utf8_fold_flags(p, p, lenp, flags); + return CALL_FOLD_CASE(p, p, lenp, flags); } /* for now these all assume no locale info available for Unicode > 255 */ @@ -1992,8 +2122,15 @@ Perl_to_utf8_upper(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) PERL_ARGS_ASSERT_TO_UTF8_UPPER; - return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper"); + if (UTF8_IS_INVARIANT(*p)) { + return _to_upper_title_latin1(*p, ustrp, lenp, 'S'); + } + else if UTF8_IS_DOWNGRADEABLE_START(*p) { + return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)), + ustrp, lenp, 'S'); + } + + return CALL_UPPER_CASE(p, ustrp, lenp); } /* @@ -2016,8 +2153,15 @@ Perl_to_utf8_title(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) PERL_ARGS_ASSERT_TO_UTF8_TITLE; - return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle"); + if (UTF8_IS_INVARIANT(*p)) { + return _to_upper_title_latin1(*p, ustrp, lenp, 's'); + } + else if UTF8_IS_DOWNGRADEABLE_START(*p) { + return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)), + ustrp, lenp, 's'); + } + + return CALL_TITLE_CASE(p, ustrp, lenp); } /* @@ -2040,8 +2184,14 @@ Perl_to_utf8_lower(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) PERL_ARGS_ASSERT_TO_UTF8_LOWER; - return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower"); + if (UTF8_IS_INVARIANT(*p)) { + return to_lower_latin1(*p, ustrp, lenp); + } + else if UTF8_IS_DOWNGRADEABLE_START(*p) { + return to_lower_latin1(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)), ustrp, lenp); + } + + return CALL_LOWER_CASE(p, ustrp, lenp); } /* @@ -2064,19 +2214,23 @@ The first character of the foldcased version is returned UV Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags) { - const char *specials = (flags) ? "utf8::ToSpecFold" : NULL; - dVAR; PERL_ARGS_ASSERT__TO_UTF8_FOLD_FLAGS; - return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_tofold, "ToFold", specials); + if (UTF8_IS_INVARIANT(*p)) { + return _to_fold_latin1(*p, ustrp, lenp, flags); + } + else if UTF8_IS_DOWNGRADEABLE_START(*p) { + return _to_fold_latin1(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)), + ustrp, lenp, flags); + } + + return CALL_FOLD_CASE(p, ustrp, lenp, flags); } /* Note: - * A "swash" is a swatch hash. - * A "swatch" is a bit vector generated by utf8.c:S_swash_get(). + * Returns a "swash" which is a hash described in utf8.c:S_swash_fetch(). * C<pkg> is a pointer to a package name for SWASHNEW, should be "utf8". * For other parameters, see utf8::SWASHNEW in lib/utf8_heavy.pl. */ @@ -2166,6 +2320,34 @@ Perl_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 minbits * of the string C<ptr>. If C<do_utf8> is true, the string C<ptr> is * assumed to be in utf8. If C<do_utf8> is false, the string C<ptr> is * assumed to be in native 8-bit encoding. Caches the swatch in C<swash>. + * + * A "swash" is a hash which contains initially the keys/values set up by + * SWASHNEW. The purpose is to be able to completely represent a Unicode + * property for all possible code points. Things are stored in a compact form + * (see utf8_heavy.pl) so that calculation is required to find the actual + * property value for a given code point. As code points are looked up, new + * key/value pairs are added to the hash, so that the calculation doesn't have + * to ever be re-done. Further, each calculation is done, not just for the + * desired one, but for a whole block of code points adjacent to that one. + * For binary properties on ASCII machines, the block is usually for 64 code + * points, starting with a code point evenly divisible by 64. Thus if the + * property value for code point 257 is requested, the code goes out and + * calculates the property values for all 64 code points between 256 and 319, + * and stores these as a single 64-bit long bit vector, called a "swatch", + * under the key for code point 256. The key is the UTF-8 encoding for code + * point 256, minus the final byte. Thus, if the length of the UTF-8 encoding + * for a code point is 13 bytes, the key will be 12 bytes long. If the value + * for code point 258 is then requested, this code realizes that it would be + * stored under the key for 256, and would find that value and extract the + * relevant bit, offset from 256. + * + * Non-binary properties are stored in as many bits as necessary to represent + * their values (32 currently, though the code is more general than that), not + * as single bits, but the principal is the same: the value for each key is a + * vector that encompasses the property values for all code points whose UTF-8 + * representations are represented by the key. That is, for all code points + * whose UTF-8 representations are length N bytes, and the key is the first N-1 + * bytes of that. */ UV Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8) @@ -2208,19 +2390,6 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8) /* If char is encoded then swatch is for the prefix */ needents = (1 << UTF_ACCUMULATION_SHIFT); off = NATIVE_TO_UTF(ptr[klen]) & UTF_CONTINUATION_MASK; - if (UTF8_IS_SUPER(ptr) && ckWARN_d(WARN_NON_UNICODE)) { - const UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXBYTES, 0, 0); - - /* This outputs warnings for binary properties only, assuming that - * to_utf8_case() will output any for non-binary. Also, surrogates - * aren't checked for, as that would warn on things like - * /\p{Gc=Cs}/ */ - SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE); - if (SvUV(*bitssvp) == 1) { - Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE), - "Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", code_point); - } - } } /* @@ -2254,7 +2423,7 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8) 0 : UTF8_ALLOW_ANY); swatch = swash_get(swash, /* On EBCDIC & ~(0xA0-1) isn't a useful thing to do */ - (klen) ? (code_point & ~(needents - 1)) : 0, + (klen) ? (code_point & ~((UV)needents - 1)) : 0, needents); if (IN_PERL_COMPILETIME) @@ -2277,6 +2446,24 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8) Copy(ptr, PL_last_swash_key, klen, U8); } + if (UTF8_IS_SUPER(ptr) && ckWARN_d(WARN_NON_UNICODE)) { + SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE); + + /* This outputs warnings for binary properties only, assuming that + * to_utf8_case() will output any for non-binary. Also, surrogates + * aren't checked for, as that would warn on things like /\p{Gc=Cs}/ */ + + if (SvUV(*bitssvp) == 1) { + /* User-defined properties can silently match above-Unicode */ + SV** const user_defined_svp = hv_fetchs(hv, "USER_DEFINED", FALSE); + if (! user_defined_svp || ! SvUV(*user_defined_svp)) { + const UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXBYTES, 0, 0); + Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE), + "Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", code_point); + } + } + } + switch ((int)((slen << 3) / needents)) { case 1: bit = 1 << (off & 7); @@ -2425,7 +2612,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) const STRLEN bits = SvUV(*bitssvp); const STRLEN octets = bits >> 3; /* if bits == 1, then octets == 0 */ const UV none = SvUV(*nonesvp); - const UV end = start + span; + UV end = start + span; PERL_ARGS_ASSERT_SWASH_GET; @@ -2434,6 +2621,12 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) (UV)bits); } + /* If overflowed, use the max possible */ + if (end < start) { + end = UV_MAX; + span = end - start; + } + /* create and initialize $swatch */ scur = octets ? (span * octets) : (span + 7) / 8; swatch = newSV(scur); @@ -2463,7 +2656,8 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) SvCUR_set(swatch, scur); s = (U8*)SvPVX(swatch); - /* read $swash->{LIST} */ + /* read $swash->{LIST}. XXX Note that this is a linear scan through a + * sorted list. A binary search would be much more efficient */ l = (U8*)SvPV(*listsvp, lcur); lend = l + lcur; while (l < lend) { @@ -2490,6 +2684,10 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) STRLEN offset; if (key >= end) goto go_out_list; + /* XXX If it should ever happen (very unlikely) that we would + * want a non-binary result for the code point at UV_MAX, + * special handling would need to be inserted here, as is done + * below for the binary case */ /* offset must be non-negative (start <= min <= key < end) */ offset = octets * (key - start); if (bits == 8) @@ -2513,6 +2711,15 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) UV key; if (min < start) min = start; + + /* Special case when the upper-end is the highest possible code + * point representable on the platform. Otherwise, the code below + * exits before setting this bit. Done here to avoid testing for + * this extremely unlikely possibility in the loop */ + if (UNLIKELY(end == UV_MAX && max == UV_MAX)) { + const STRLEN offset = (STRLEN)(max - start); + s[offset >> 3] |= 1 << (offset & 7); + } for (key = min; key <= max; key++) { const STRLEN offset = (STRLEN)(key - start); if (key >= end) @@ -3171,9 +3378,9 @@ bool Perl_check_utf8_print(pTHX_ register const U8* s, const STRLEN len) { /* May change: warns if surrogates, non-character code points, or - * non-Unicode code points are in s which has length len. Returns TRUE if - * none found; FALSE otherwise. The only other validity check is to make - * sure that this won't exceed the string's length */ + * non-Unicode code points are in s which has length len bytes. Returns + * TRUE if none found; FALSE otherwise. The only other validity check is + * to make sure that this won't exceed the string's length */ const U8* const e = s + len; bool ok = TRUE; @@ -3186,7 +3393,7 @@ Perl_check_utf8_print(pTHX_ register const U8* s, const STRLEN len) "%s in %s", unees, PL_op ? OP_DESC(PL_op) : "print"); return FALSE; } - if (*s >= UTF8_FIRST_PROBLEMATIC_CODE_POINT_FIRST_BYTE) { + if (UNLIKELY(*s >= UTF8_FIRST_PROBLEMATIC_CODE_POINT_FIRST_BYTE)) { STRLEN char_len; if (UTF8_IS_SUPER(s)) { if (ckWARN_d(WARN_NON_UNICODE)) { @@ -3389,8 +3596,6 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, register UV l1, bool u1 STRLEN n1 = 0, n2 = 0; /* Number of bytes in current char */ U8 foldbuf1[UTF8_MAXBYTES_CASE+1]; U8 foldbuf2[UTF8_MAXBYTES_CASE+1]; - U8 natbuf[2]; /* Holds native 8-bit char converted to utf8; - these always fit in 2 bytes */ PERL_ARGS_ASSERT_FOLDEQ_UTF8_FLAGS; @@ -3497,9 +3702,8 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, register UV l1, bool u1 else if (u1) { to_utf8_fold(p1, foldbuf1, &n1); } - else { /* Not utf8, convert to it first and then get fold */ - uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p1))); - to_utf8_fold(natbuf, foldbuf1, &n1); + else { /* Not utf8, get utf8 fold */ + to_uni_fold(NATIVE_TO_UNI(*p1), foldbuf1, &n1); } f1 = foldbuf1; } @@ -3546,8 +3750,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, register UV l1, bool u1 to_utf8_fold(p2, foldbuf2, &n2); } else { - uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p2))); - to_utf8_fold(natbuf, foldbuf2, &n2); + to_uni_fold(NATIVE_TO_UNI(*p2), foldbuf2, &n2); } f2 = foldbuf2; } @@ -60,7 +60,8 @@ EXTCONST unsigned char PL_utf8skip[] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* bogus */ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* scripts */ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6, /* cjk etc. */ -7,13, /* Perl extended (not UTF-8). Up to 72bit allowed (64-bit + reserved). */ +7,13, /* Perl extended (not official UTF-8). Up to 72bit allowed (64-bit + + reserved). */ }; #else EXTCONST unsigned char PL_utf8skip[]; @@ -6523,6 +6523,19 @@ long _ftol( double ); /* Defined by VC6 C libs. */ long _ftol2( double dblSource ) { return _ftol( dblSource ); } #endif +PERL_STATIC_INLINE bool +S_gv_has_usable_name(pTHX_ GV *gv) +{ + GV **gvp; + return GvSTASH(gv) + && HvENAME(GvSTASH(gv)) + && (gvp = (GV **)hv_fetch( + GvSTASH(gv), GvNAME(gv), + GvNAMEUTF8(gv) ? -GvNAMELEN(gv) : GvNAMELEN(gv), 0 + )) + && *gvp == gv; +} + void Perl_get_db_sub(pTHX_ SV **svp, CV *cv) { @@ -6530,7 +6543,8 @@ Perl_get_db_sub(pTHX_ SV **svp, CV *cv) SV * const dbsv = GvSVn(PL_DBsub); const bool save_taint = PL_tainted; - /* We do not care about using sv to call CV; + /* When we are called from pp_goto (svp is null), + * we do not care about using dbsv to call CV; * it's for informational purposes only. */ @@ -6541,23 +6555,33 @@ Perl_get_db_sub(pTHX_ SV **svp, CV *cv) if (!PERLDB_SUB_NN) { GV *gv = CvGV(cv); - if ( svp && ((CvFLAGS(cv) & (CVf_ANON | CVf_CLONED)) + if (!svp) { + gv_efullname3(dbsv, gv, NULL); + } + else if ( (CvFLAGS(cv) & (CVf_ANON | CVf_CLONED)) || strEQ(GvNAME(gv), "END") - || ((GvCV(gv) != cv) && /* Could be imported, and old sub redefined. */ + || ( /* Could be imported, and old sub redefined. */ + (GvCV(gv) != cv || !S_gv_has_usable_name(aTHX_ gv)) + && !( (SvTYPE(*svp) == SVt_PVGV) && (GvCV((const GV *)*svp) == cv) - && (gv = (GV *)*svp) + /* Use GV from the stack as a fallback. */ + && S_gv_has_usable_name(aTHX_ gv = (GV *)*svp) ) ) - )) { - /* Use GV from the stack as a fallback. */ + ) { /* GV is potentially non-unique, or contain different CV. */ SV * const tmp = newRV(MUTABLE_SV(cv)); sv_setsv(dbsv, tmp); SvREFCNT_dec(tmp); } else { - gv_efullname3(dbsv, gv, NULL); + sv_sethek(dbsv, HvENAME_HEK(GvSTASH(gv))); + sv_catpvs(dbsv, "::"); + sv_catpvn_flags( + dbsv, GvNAME(gv), GvNAMELEN(gv), + GvNAMEUTF8(gv) ? SV_CATUTF8 : SV_CATBYTES + ); } } else { diff --git a/utils/h2xs.PL b/utils/h2xs.PL index 634e891f84..6239f8f26f 100644 --- a/utils/h2xs.PL +++ b/utils/h2xs.PL @@ -789,7 +789,7 @@ if( @path_h ){ } else { @paths = (File::Spec->curdir(), $Config{usrinc}, - (split ' ', $Config{locincpth}), '/usr/include'); + (split / +/, $Config{locincpth} // ""), '/usr/include'); } foreach my $path_h (@path_h) { $name ||= $path_h; diff --git a/vms/descrip_mms.template b/vms/descrip_mms.template index 6247e92fab..7d140656f1 100644 --- a/vms/descrip_mms.template +++ b/vms/descrip_mms.template @@ -345,7 +345,7 @@ x2p : [.x2p]$(DBG)a2p$(E) [.x2p]s2p.com [.x2p]find2perl.com extra.pods : miniperl @ @extra_pods.com -PERLDELTA_CURRENT = [.pod]perl5155delta.pod +PERLDELTA_CURRENT = [.pod]perl5156delta.pod $(PERLDELTA_CURRENT) : [.pod]perldelta.pod Copy/NoConfirm/Log $(MMS$SOURCE) $(MMS$TARGET) @@ -361,7 +361,7 @@ $(PERLDELTA_CURRENT) : [.pod]perldelta.pod [.pod]perltoc.pod : $(PERLDELTA_CURRENT) [.pod]perlapi.pod [.pod]perlintern.pod [.pod]perlmodlib.pod extra.pods $(PERL_EXE) @ define/user_mode $(DBG)PERLSHR SYS$DISK:[]$(DBG)perlshr$(E) - $(PERL) "-f" [.pod]buildtoc "-q" "--build-toc" + $(PERL) "-f" [.pod]buildtoc "-q" archcorefiles : $(ac) $(ARCHAUTO)time.stamp @ $(NOOP) @@ -1181,8 +1181,7 @@ Perl_my_getenv(pTHX_ const char *lnm, bool sys) * off and make sure we only retrieve the equivalence name for * that index. */ if ((cp2 = strchr(lnm,';')) != NULL) { - strcpy(uplnm,lnm); - uplnm[cp2-lnm] = '\0'; + my_strlcpy(uplnm, lnm, cp2 - lnm + 1); idx = strtoul(cp2+1,NULL,0); lnm = uplnm; flags &= ~PERL__TRNENV_JOIN_SEARCHLIST; @@ -1274,8 +1273,7 @@ Perl_my_getenv_len(pTHX_ const char *lnm, unsigned long *len, bool sys) flags |= PERL__TRNENV_JOIN_SEARCHLIST; if ((cp2 = strchr(lnm,';')) != NULL) { - strcpy(buf,lnm); - buf[cp2-lnm] = '\0'; + my_strlcpy(buf, lnm, cp2 - lnm + 1); idx = strtoul(cp2+1,NULL,0); lnm = buf; flags &= ~PERL__TRNENV_JOIN_SEARCHLIST; @@ -1400,19 +1398,18 @@ prime_env_iter(void) } else if ((tmpdsc.dsc$a_pointer = env_tables[i]->dsc$a_pointer) && !str$case_blind_compare(&tmpdsc,&clisym)) { - strcpy(cmd,"Show Symbol/Global *"); + my_strlcpy(cmd, "Show Symbol/Global *", sizeof(cmd)); cmddsc.dsc$w_length = 20; if (env_tables[i]->dsc$w_length == 12 && (tmpdsc.dsc$a_pointer = env_tables[i]->dsc$a_pointer + 6) && - !str$case_blind_compare(&tmpdsc,&local)) strcpy(cmd+12,"Local *"); + !str$case_blind_compare(&tmpdsc,&local)) my_strlcpy(cmd+12, "Local *", sizeof(cmd)-12); flags = defflags | CLI$M_NOLOGNAM; } else { - strcpy(cmd,"Show Logical *"); + my_strlcpy(cmd, "Show Logical *", sizeof(cmd)); if (str$case_blind_compare(env_tables[i],&fildevdsc)) { - strcat(cmd," /Table="); - strncat(cmd,env_tables[i]->dsc$a_pointer,env_tables[i]->dsc$w_length); - cmddsc.dsc$w_length = strlen(cmd); + my_strlcat(cmd," /Table=", sizeof(cmd)); + cmddsc.dsc$w_length = my_strlcat(cmd, env_tables[i]->dsc$a_pointer, env_tables[i]->dsc$w_length + 1); } else cmddsc.dsc$w_length = 14; /* N.B. We test this below */ flags = defflags | CLI$M_NOCLISYM; @@ -2182,7 +2179,7 @@ Perl_my_chdir(pTHX_ const char *dir) newdir = PerlMem_malloc(dirlen); if (newdir ==NULL) _ckvmssts_noperl(SS$_INSFMEM); - strncpy(newdir, dir1, dirlen-1); + memcpy(newdir, dir1, dirlen-1); newdir[dirlen-1] = '\0'; ret = chdir(newdir); PerlMem_free(newdir); @@ -3642,7 +3639,7 @@ store_pipelocs(pTHX) #else if (PL_origargv && PL_origargv[0]) { /* maybe nul if embedded Perl */ #endif - strcpy(temp, PL_origargv[0]); + my_strlcpy(temp, PL_origargv[0], sizeof(temp)); x = strrchr(temp,']'); if (x == NULL) { x = strrchr(temp,'>'); @@ -3664,8 +3661,7 @@ store_pipelocs(pTHX) if (p == NULL) _ckvmssts_noperl(SS$_INSFMEM); p->next = head_PLOC; head_PLOC = p; - strncpy(p->dir,unixdir,sizeof(p->dir)-1); - p->dir[NAM$C_MAXRSS] = '\0'; + my_strlcpy(p->dir, unixdir, sizeof(p->dir)); } } @@ -3688,8 +3684,7 @@ store_pipelocs(pTHX) p = (pPLOC) PerlMem_malloc(sizeof(PLOC)); p->next = head_PLOC; head_PLOC = p; - strncpy(p->dir,unixdir,sizeof(p->dir)-1); - p->dir[NAM$C_MAXRSS] = '\0'; + my_strlcpy(p->dir, unixdir, sizeof(p->dir)); } /* most likely spot (ARCHLIB) put first in the list */ @@ -3700,8 +3695,7 @@ store_pipelocs(pTHX) if (p == NULL) _ckvmssts_noperl(SS$_INSFMEM); p->next = head_PLOC; head_PLOC = p; - strncpy(p->dir,unixdir,sizeof(p->dir)-1); - p->dir[NAM$C_MAXRSS] = '\0'; + my_strlcpy(p->dir, unixdir, sizeof(p->dir)); } #endif PerlMem_free(unixdir); @@ -3742,10 +3736,8 @@ find_vmspipe(pTHX) while (p) { char * exp_res; int dirlen; - strcpy(file, p->dir); - dirlen = strlen(file); - strncat(file, "vmspipe.com",NAM$C_MAXRSS - dirlen); - file[NAM$C_MAXRSS] = '\0'; + dirlen = my_strlcpy(file, p->dir, sizeof(file)); + my_strlcat(file, "vmspipe.com", sizeof(file)); p = p->next; exp_res = int_rmsexpand_tovms(file, vmspipe_file, 0); @@ -4201,7 +4193,7 @@ safe_popen(pTHX_ const char *cmd, const char *in_mode, int *psts) tfilebuf[0] = '@'; vmspipe = find_vmspipe(aTHX); if (vmspipe) { - strcpy(tfilebuf+1,vmspipe); + vmspipedsc.dsc$w_length = my_strlcpy(tfilebuf+1, vmspipe, sizeof(tfilebuf)-1) + 1; } else { /* uh, oh...we're in tempfile hell */ tpipe = vmspipe_tempfile(aTHX); if (!tpipe) { /* a fish popular in Boston */ @@ -4211,9 +4203,9 @@ safe_popen(pTHX_ const char *cmd, const char *in_mode, int *psts) return NULL; } fgetname(tpipe,tfilebuf+1,1); + vmspipedsc.dsc$w_length = strlen(tfilebuf); } vmspipedsc.dsc$a_pointer = tfilebuf; - vmspipedsc.dsc$w_length = strlen(tfilebuf); sts = setup_cmddsc(aTHX_ cmd,0,0,&vmscmd); if (!(sts & 1)) { @@ -4245,7 +4237,7 @@ safe_popen(pTHX_ const char *cmd, const char *in_mode, int *psts) n = sizeof(Info); _ckvmssts_noperl(lib$get_vm(&n, &info)); - strcpy(mode,in_mode); + my_strlcpy(mode, in_mode, sizeof(mode)); info->mode = *mode; info->done = FALSE; info->completion = 0; @@ -4400,18 +4392,13 @@ safe_popen(pTHX_ const char *cmd, const char *in_mode, int *psts) } } - symbol[MAX_DCL_SYMBOL] = '\0'; - - strncpy(symbol, in, MAX_DCL_SYMBOL); - d_symbol.dsc$w_length = strlen(symbol); + d_symbol.dsc$w_length = my_strlcpy(symbol, in, sizeof(symbol)); _ckvmssts_noperl(lib$set_symbol(&d_sym_in, &d_symbol, &table)); - strncpy(symbol, err, MAX_DCL_SYMBOL); - d_symbol.dsc$w_length = strlen(symbol); + d_symbol.dsc$w_length = my_strlcpy(symbol, err, sizeof(symbol)); _ckvmssts_noperl(lib$set_symbol(&d_sym_err, &d_symbol, &table)); - strncpy(symbol, out, MAX_DCL_SYMBOL); - d_symbol.dsc$w_length = strlen(symbol); + d_symbol.dsc$w_length = my_strlcpy(symbol, out, sizeof(symbol)); _ckvmssts_noperl(lib$set_symbol(&d_sym_out, &d_symbol, &table)); /* Done with the names for the pipes */ @@ -4428,8 +4415,7 @@ safe_popen(pTHX_ const char *cmd, const char *in_mode, int *psts) sprintf(cmd_sym_name,"PERL_POPEN_CMD%d",j); d_sym_cmd.dsc$w_length = strlen(cmd_sym_name); - strncpy(symbol, p, MAX_DCL_SYMBOL); - d_symbol.dsc$w_length = strlen(symbol); + d_symbol.dsc$w_length = my_strlcpy(symbol, p, sizeof(symbol)); _ckvmssts_noperl(lib$set_symbol(&d_sym_cmd, &d_symbol, &table)); if (strlen(p) > MAX_DCL_SYMBOL) { @@ -5787,7 +5773,7 @@ int_expanded: /* VMS file specs are not in UTF-8 */ if (fs_utf8 != NULL) *fs_utf8 = 0; - strcpy(outbuf, spec_buf); + my_strlcpy(outbuf, spec_buf, VMS_MAXRSS); ret_spec = outbuf; } } @@ -5801,7 +5787,7 @@ int_expanded: char * new_src = NULL; if (spec_buf == outbuf) { new_src = PerlMem_malloc(VMS_MAXRSS); - strcpy(new_src, spec_buf); + my_strlcpy(new_src, spec_buf, VMS_MAXRSS); } else { src = spec_buf; } @@ -5816,7 +5802,7 @@ int_expanded: /* Copy the buffer if needed */ if (outbuf != spec_buf) - strcpy(outbuf, spec_buf); + my_strlcpy(outbuf, spec_buf, VMS_MAXRSS); ret_spec = outbuf; } } @@ -5986,7 +5972,7 @@ int_fileify_dirspec(const char *dir, char *buf, int *utf8_fl) dirlen = strlen(trndir); } else { - strncpy(trndir,dir,dirlen); + memcpy(trndir, dir, dirlen); trndir[dirlen] = '\0'; } @@ -6048,7 +6034,7 @@ int_fileify_dirspec(const char *dir, char *buf, int *utf8_fl) trndir[trndir_len] = '\0'; } } - strcpy(buf, trndir); + my_strlcpy(buf, trndir, VMS_MAXRSS); PerlMem_free(trndir); PerlMem_free(vmsdir); return buf; @@ -6345,7 +6331,7 @@ int_fileify_dirspec(const char *dir, char *buf, int *utf8_fl) if (rms_is_nam_fnb(dirnam, NAM$M_EXP_NAME)) { /* They provided at least the name; we added the type, if necessary, */ - strcpy(buf, my_esa); + my_strlcpy(buf, my_esa, VMS_MAXRSS); sts = rms_free_search_context(&dirfab); PerlMem_free(trndir); PerlMem_free(esa); @@ -6390,7 +6376,7 @@ int_fileify_dirspec(const char *dir, char *buf, int *utf8_fl) if ((cp1) != NULL) { /* There's more than one directory in the path. Just roll back. */ *cp1 = term; - strcpy(buf, my_esa); + my_strlcpy(buf, my_esa, VMS_MAXRSS); } else { if (rms_is_nam_fnb(dirnam, NAM$M_ROOT_DIR)) { @@ -6557,10 +6543,10 @@ static char * int_pathify_dirspec_simple(const char * dir, char * buf, int len; len = v_len + r_len + d_len - 1; char dclose = d_spec[d_len - 1]; - strncpy(buf, dir, len); + memcpy(buf, dir, len); buf[len] = '.'; len++; - strncpy(&buf[len], n_spec, n_len); + memcpy(&buf[len], n_spec, n_len); len += n_len; buf[len] = dclose; buf[len + 1] = '\0'; @@ -6575,16 +6561,16 @@ static char * int_pathify_dirspec_simple(const char * dir, char * buf, int len; len = v_len + r_len + d_len - 1; char dclose = d_spec[d_len - 1]; - strncpy(buf, dir, len); + memcpy(buf, dir, len); buf[len] = '.'; len++; - strncpy(&buf[len], n_spec, n_len); + memcpy(&buf[len], n_spec, n_len); len += n_len; if (e_len > 0) { if (decc_efs_charset) { buf[len] = '^'; len++; - strncpy(&buf[len], e_spec, e_len); + memcpy(&buf[len], e_spec, e_len); len += e_len; } else { set_vaxc_errno(RMS$_DIR); @@ -6645,7 +6631,7 @@ static char *int_pathify_dirspec(const char *dir, char *buf) /* If no directory specified use the current default */ if (*dir) - strcpy(trndir, dir); + my_strlcpy(trndir, dir, VMS_MAXRSS); else { getcwd(trndir, VMS_MAXRSS - 1); need_to_lower = 1; @@ -6663,7 +6649,7 @@ static char *int_pathify_dirspec(const char *dir, char *buf) /* Trap simple rooted lnms, and return lnm:[000000] */ if (!strcmp(trndir+trnlen-2,".]")) { - strcpy(buf, dir); + my_strlcpy(buf, dir, VMS_MAXRSS); strcat(buf, ":[000000]"); PerlMem_free(trndir); @@ -6717,7 +6703,7 @@ static char *int_pathify_dirspec(const char *dir, char *buf) /* Traditional mode, assume .DIR is directory */ buf[0] = '['; buf[1] = '.'; - strncpy(&buf[2], n_spec, n_len); + memcpy(&buf[2], n_spec, n_len); buf[n_len + 2] = ']'; buf[n_len + 3] = '\0'; PerlMem_free(trndir); @@ -6846,7 +6832,7 @@ static char *int_pathify_dirspec(const char *dir, char *buf) } } - strcpy(buf, trndir); + my_strlcpy(buf, trndir, VMS_MAXRSS); if (buf[dir_len - 1] != '/') { buf[dir_len] = '/'; buf[dir_len + 1] = '\0'; @@ -6998,8 +6984,7 @@ static char *int_tounixspec(const char *spec, char *rslt, int * utf8_fl) tunix = PerlMem_malloc(VMS_MAXRSS); if (tunix == NULL) _ckvmssts_noperl(SS$_INSFMEM); - strcpy(tunix, spec); - tunix_len = strlen(tunix); + tunix_len = my_strlcpy(tunix, spec, VMS_MAXRSS); nl_flag = 0; if (tunix[tunix_len - 1] == '\n') { tunix[tunix_len - 1] = '\"'; @@ -7010,13 +6995,13 @@ static char *int_tounixspec(const char *spec, char *rslt, int * utf8_fl) uspec = decc$translate_vms(tunix); PerlMem_free(tunix); if ((int)uspec > 0) { - strcpy(rslt,uspec); + my_strlcpy(rslt, uspec, VMS_MAXRSS); if (nl_flag) { strcat(rslt,"\n"); } else { /* If we can not translate it, makemaker wants as-is */ - strcpy(rslt, spec); + my_strlcpy(rslt, spec, VMS_MAXRSS); } return rslt; } @@ -7057,7 +7042,7 @@ static char *int_tounixspec(const char *spec, char *rslt, int * utf8_fl) } /* This is already UNIX or at least nothing VMS understands */ if (cmp_rslt) { - strcpy(rslt,spec); + my_strlcpy(rslt, spec, VMS_MAXRSS); if (vms_debug_fileify) { fprintf(stderr, "int_tounixspec: rslt = %s\n", rslt); } @@ -7430,7 +7415,7 @@ int unixlen; else { /* This is already a VMS specification, no conversion */ unixlen--; - strncpy(vmspath,unixpath, vmspath_len); + my_strlcpy(vmspath, unixpath, vmspath_len + 1); } } else @@ -7542,7 +7527,7 @@ int unixlen; if (strncmp(unixpath,"\"^UP^",5) != 0) sprintf(vmspath,"\"^UP^%s\"",unixpath); else - strcpy(vmspath, unixpath); + my_strlcpy(vmspath, unixpath, vmspath_len + 1); } else { vmspath[specdsc.dsc$w_length] = 0; @@ -7752,12 +7737,11 @@ int sts, v_len, r_len, d_len, n_len, e_len, vs_len; if (nextslash != NULL) { seg_len = nextslash - unixptr; - strncpy(esa, unixptr, seg_len); + memcpy(esa, unixptr, seg_len); esa[seg_len] = 0; } else { - strcpy(esa, unixptr); - seg_len = strlen(unixptr); + seg_len = my_strlcpy(esa, unixptr, sizeof(esa)); } /* trnlnm(section) */ islnm = vmstrnenv(esa, trn, 0, fildev, 0); @@ -7800,8 +7784,7 @@ int sts, v_len, r_len, d_len, n_len, e_len, vs_len; if ((unixptr[seg_len] == '/') || (dir_flag != 0)) { /* This must be a directory */ if (((n_len + e_len) == 0)&&(seg_len <= vmspath_len)) { - strcpy(vmsptr, esa); - vmslen=strlen(vmsptr); + vmslen = my_strlcpy(vmsptr, esa, vmspath_len - 1); vmsptr[vmslen] = ':'; vmslen++; vmsptr[vmslen] = '\0'; @@ -7818,7 +7801,7 @@ int sts, v_len, r_len, d_len, n_len, e_len, vs_len; /* transfer the volume */ if (v_len > 0 && ((v_len + vmslen) < vmspath_len)) { - strncpy(vmsptr, v_spec, v_len); + memcpy(vmsptr, v_spec, v_len); vmsptr += v_len; vmsptr[0] = '\0'; vmslen += v_len; @@ -7841,7 +7824,7 @@ int sts, v_len, r_len, d_len, n_len, e_len, vs_len; } } if (r_len > 0) { - strncpy(vmsptr, r_spec, r_len); + memcpy(vmsptr, r_spec, r_len); vmsptr += r_len; vmslen += r_len; vmsptr[0] = '\0'; @@ -7872,7 +7855,7 @@ int sts, v_len, r_len, d_len, n_len, e_len, vs_len; d_spec++; d_len--; } - strncpy(vmsptr, d_spec, d_len); + memcpy(vmsptr, d_spec, d_len); vmsptr += d_len; vmslen += d_len; vmsptr[0] = '\0'; @@ -7964,8 +7947,7 @@ int sts, v_len, r_len, d_len, n_len, e_len, vs_len; if (nextslash != NULL) { int cmp; seg_len = nextslash - &unixptr[1]; - strncpy(vmspath, unixptr, seg_len + 1); - vmspath[seg_len+1] = 0; + my_strlcpy(vmspath, unixptr, seg_len + 1); cmp = 1; if (seg_len == 3) { cmp = strncmp(vmspath, "dev", 4); @@ -7983,8 +7965,7 @@ int sts, v_len, r_len, d_len, n_len, e_len, vs_len; sts = posix_root_to_vms(esa, vmspath_len, "/", NULL); if ($VMS_STATUS_SUCCESS(sts)) { - strcpy(vmspath, esa); - vmslen = strlen(vmspath); + vmslen = my_strlcpy(vmspath, esa, vmspath_len + 1); vmsptr = vmspath + vmslen; unixptr++; if (unixptr < lastslash) { @@ -8029,9 +8010,7 @@ int sts, v_len, r_len, d_len, n_len, e_len, vs_len; */ /* Posix to VMS destroyed this, so copy it again */ - strncpy(vmspath, &unixptr[1], seg_len); - vmspath[seg_len] = 0; - vmslen = seg_len; + vmslen = my_strlcpy(vmspath, &unixptr[1], seg_len); vmsptr = &vmsptr[vmslen]; islnm = 0; @@ -8469,7 +8448,7 @@ static char *int_tovmsspec if ((v_len != 0) || (r_len != 0) || (d_len != 0) || (vs_len != 0)) { if (utf8_flag != NULL) *utf8_flag = 0; - strcpy(rslt, path); + my_strlcpy(rslt, path, VMS_MAXRSS); if (vms_debug_fileify) { fprintf(stderr, "int_tovmsspec: rslt = %s\n", rslt); } @@ -8490,7 +8469,7 @@ static char *int_tovmsspec */ if (utf8_flag != NULL) *utf8_flag = 0; - strcpy(rslt, path); + my_strlcpy(rslt, path, VMS_MAXRSS); if (vms_debug_fileify) { fprintf(stderr, "int_tovmsspec: rslt = %s\n", rslt); } @@ -8521,7 +8500,7 @@ static char *int_tovmsspec } } if ((decc_efs_charset == 0) || (has_macro)) { - strcpy(rslt, path); + my_strlcpy(rslt, path, VMS_MAXRSS); if (vms_debug_fileify) { fprintf(stderr, "int_tovmsspec: rslt = %s\n", rslt); } @@ -8625,7 +8604,7 @@ static char *int_tovmsspec } else { if (cp2 != dirend) { - strcpy(rslt,trndev); + my_strlcpy(rslt, trndev, VMS_MAXRSS); cp1 = rslt + trnend; if (*cp2 != 0) { *(cp1++) = '.'; @@ -8968,7 +8947,7 @@ static char *mp_do_tovmspath(pTHX_ const char *path, char *buf, int ts, int * ut return cp; } else { - strcpy(__tovmspath_retbuf,vmsified); + my_strlcpy(__tovmspath_retbuf, vmsified, sizeof(__tovmspath_retbuf)); Safefree(vmsified); return __tovmspath_retbuf; } @@ -9022,7 +9001,7 @@ static char *mp_do_tounixpath(pTHX_ const char *path, char *buf, int ts, int * u return cp; } else { - strcpy(__tounixpath_retbuf,unixified); + my_strlcpy(__tounixpath_retbuf, unixified, sizeof(__tounixpath_retbuf)); Safefree(unixified); return __tounixpath_retbuf; } @@ -9427,15 +9406,14 @@ int rms_sts; string = PerlMem_malloc(resultspec.dsc$w_length+1); if (string == NULL) _ckvmssts_noperl(SS$_INSFMEM); - strncpy(string, resultspec.dsc$a_pointer, resultspec.dsc$w_length); - string[resultspec.dsc$w_length] = '\0'; + my_strlcpy(string, resultspec.dsc$a_pointer, resultspec.dsc$w_length); if (NULL == had_version) *(strrchr(string, ';')) = '\0'; if ((!had_directory) && (had_device == NULL)) { if (NULL == (devdir = strrchr(string, ']'))) devdir = strrchr(string, '>'); - strcpy(string, devdir + 1); + my_strlcpy(string, devdir + 1, resultspec.dsc$w_length); } /* * Be consistent with what the C RTL has already done to the rest of @@ -9581,14 +9559,12 @@ int pid; unsigned long int flags = 17, one = 1, retsts; int len; - strcat(command, argv[0]); - len = strlen(command); + len = my_strlcat(command, argv[0], sizeof(command)); while (--argc && (len < MAX_DCL_SYMBOL)) { - strcat(command, " \""); - strcat(command, *(++argv)); - strcat(command, "\""); - len = strlen(command); + my_strlcat(command, " \"", sizeof(command)); + my_strlcat(command, *(++argv), sizeof(command)); + len = my_strlcat(command, "\"", sizeof(command)); } value.dsc$a_pointer = command; value.dsc$w_length = strlen(value.dsc$a_pointer); @@ -9857,8 +9833,7 @@ Perl_trim_unixpath(pTHX_ char *fspec, const char *wildspec, int opts) } } else { - strncpy(unixwild, wildspec, VMS_MAXRSS-1); - unixwild[VMS_MAXRSS-1] = 0; + my_strlcpy(unixwild, wildspec, VMS_MAXRSS); } unixified = PerlMem_malloc(VMS_MAXRSS); if (unixified == NULL) _ckvmssts_noperl(SS$_INSFMEM); @@ -10205,7 +10180,7 @@ collectversions(pTHX_ DIR *dd) /* Add the version wildcard, ignoring the "*.*" put on before */ i = strlen(dd->pattern); Newx(text,i + e->d_namlen + 3,char); - strcpy(text, dd->pattern); + my_strlcpy(text, dd->pattern, i + 1); sprintf(&text[i - 3], "%s;*", e->d_name); /* Set up the pattern descriptor. */ @@ -10343,7 +10318,7 @@ Perl_readdir(pTHX_ DIR *dd) } } - strncpy(dd->entry.d_name, n_spec, n_len + e_len); + memcpy(dd->entry.d_name, n_spec, n_len + e_len); dd->entry.d_name[n_len + e_len] = '\0'; dd->entry.d_namlen = strlen(dd->entry.d_name); @@ -10368,8 +10343,7 @@ Perl_readdir(pTHX_ DIR *dd) /* counted strings apparently with a Unicode flag */ } *q = 0; - strcpy(dd->entry.d_name, new_name); - dd->entry.d_namlen = strlen(dd->entry.d_name); + dd->entry.d_namlen = my_strlcpy(dd->entry.d_name, new_name, sizeof(dd->entry.d_name)); } } @@ -10522,7 +10496,7 @@ setup_argstr(pTHX_ SV *really, SV **mark, SV **sp) Newx(PL_Cmd, cmdlen+1, char); if (tmps && *tmps) { - strcpy(PL_Cmd,tmps); + my_strlcpy(PL_Cmd, tmps, cmdlen + 1); mark++; } else *PL_Cmd = '\0'; @@ -10530,8 +10504,8 @@ setup_argstr(pTHX_ SV *really, SV **mark, SV **sp) if (*mark) { char *s = SvPVx(*mark,n_a); if (!*s) continue; - if (*PL_Cmd) strcat(PL_Cmd," "); - strcat(PL_Cmd,s); + if (*PL_Cmd) my_strlcat(PL_Cmd, " ", cmdlen+1); + my_strlcat(PL_Cmd, s, cmdlen+1); } } return PL_Cmd; @@ -10574,8 +10548,7 @@ setup_cmddsc(pTHX_ const char *incmd, int check_img, int *suggest_quote, cmdlen = strlen(incmd); cmd = PerlMem_malloc(cmdlen+1); if (cmd == NULL) _ckvmssts_noperl(SS$_INSFMEM); - strncpy(cmd, incmd, cmdlen); - cmd[cmdlen] = 0; + my_strlcpy(cmd, incmd, cmdlen + 1); image_name[0] = 0; image_argv[0] = 0; @@ -10784,7 +10757,7 @@ setup_cmddsc(pTHX_ const char *incmd, int check_img, int *suggest_quote, else { tchr = tmpspec; } - strcpy(image_name, tchr); + my_strlcpy(image_name, tchr, sizeof(image_name)); } } } @@ -10803,30 +10776,30 @@ setup_cmddsc(pTHX_ const char *incmd, int check_img, int *suggest_quote, vmscmd->dsc$a_pointer = PerlMem_malloc(MAX_DCL_LINE_LENGTH); if (vmscmd->dsc$a_pointer == NULL) _ckvmssts_noperl(SS$_INSFMEM); if (!isdcl) { - strcpy(vmscmd->dsc$a_pointer,"$ MCR "); + my_strlcpy(vmscmd->dsc$a_pointer,"$ MCR ", MAX_DCL_LINE_LENGTH); if (image_name[0] != 0) { - strcat(vmscmd->dsc$a_pointer, image_name); - strcat(vmscmd->dsc$a_pointer, " "); + my_strlcat(vmscmd->dsc$a_pointer, image_name, MAX_DCL_LINE_LENGTH); + my_strlcat(vmscmd->dsc$a_pointer, " ", MAX_DCL_LINE_LENGTH); } } else if (image_name[0] != 0) { - strcpy(vmscmd->dsc$a_pointer, image_name); - strcat(vmscmd->dsc$a_pointer, " "); + my_strlcpy(vmscmd->dsc$a_pointer, image_name, MAX_DCL_LINE_LENGTH); + my_strlcat(vmscmd->dsc$a_pointer, " ", MAX_DCL_LINE_LENGTH); } else { - strcpy(vmscmd->dsc$a_pointer,"@"); + my_strlcpy(vmscmd->dsc$a_pointer, "@", MAX_DCL_LINE_LENGTH); } if (suggest_quote) *suggest_quote = 1; /* If there is an image name, use original command */ if (image_name[0] == 0) - strcat(vmscmd->dsc$a_pointer,resspec); + my_strlcat(vmscmd->dsc$a_pointer, resspec, MAX_DCL_LINE_LENGTH); else { rest = cmd; while (*rest && isspace(*rest)) rest++; } if (image_argv[0] != 0) { - strcat(vmscmd->dsc$a_pointer,image_argv); - strcat(vmscmd->dsc$a_pointer, " "); + my_strlcat(vmscmd->dsc$a_pointer, image_argv, MAX_DCL_LINE_LENGTH); + my_strlcat(vmscmd->dsc$a_pointer, " ", MAX_DCL_LINE_LENGTH); } if (rest) { int rest_len; @@ -10835,7 +10808,7 @@ setup_cmddsc(pTHX_ const char *incmd, int check_img, int *suggest_quote, rest_len = strlen(rest); vmscmd_len = strlen(vmscmd->dsc$a_pointer); if ((rest_len + vmscmd_len) < MAX_DCL_LINE_LENGTH) - strcat(vmscmd->dsc$a_pointer,rest); + my_strlcat(vmscmd->dsc$a_pointer, rest, MAX_DCL_LINE_LENGTH); else retsts = CLI$_BUFOVF; } @@ -10853,8 +10826,7 @@ setup_cmddsc(pTHX_ const char *incmd, int check_img, int *suggest_quote, vmscmd->dsc$w_length = strlen(cmd); vmscmd->dsc$a_pointer = PerlMem_malloc(vmscmd->dsc$w_length + 1); - strncpy(vmscmd->dsc$a_pointer,cmd,vmscmd->dsc$w_length); - vmscmd->dsc$a_pointer[vmscmd->dsc$w_length] = 0; + my_strlcpy(vmscmd->dsc$a_pointer, cmd, vmscmd->dsc$w_length + 1); PerlMem_free(cmd); PerlMem_free(resspec); @@ -11217,8 +11189,8 @@ Perl_my_fgetname(FILE *fp, char * buf) { } /* Convert this to Unix format */ - vms_name = PerlMem_malloc(VMS_MAXRSS + 1); - strcpy(vms_name, retname); + vms_name = PerlMem_malloc(VMS_MAXRSS); + my_strlcpy(vms_name, retname, VMS_MAXRSS); retname = int_tounixspec(vms_name, buf, NULL); PerlMem_free(vms_name); @@ -11358,7 +11330,7 @@ static int fillpasswd (pTHX_ const char *name, struct passwd *pwd) if (pwd->pw_unixdir[ldir]=='/') pwd->pw_unixdir[ldir]= '\0'; } else - strcpy(pwd->pw_unixdir, pwd->pw_dir); + my_strlcpy(pwd->pw_unixdir, pwd->pw_dir, sizeof(pwd->pw_unixdir)); if (!decc_efs_case_preserve) __mystrtolower(pwd->pw_unixdir); return 1; @@ -11394,8 +11366,7 @@ struct passwd *Perl_my_getpwnam(pTHX_ const char *name) else { _ckvmssts(sts); } } } - strncpy(__pw_namecache, name, sizeof(__pw_namecache)); - __pw_namecache[sizeof __pw_namecache - 1] = '\0'; + my_strlcpy(__pw_namecache, name, sizeof(__pw_namecache)); __pwdcache.pw_name= __pw_namecache; return &__pwdcache; } /* end of my_getpwnam() */ @@ -11834,9 +11805,9 @@ tz_parse(pTHX_ time_t *w, int *dst, char *zone, int *gmtoff) reversed = -1; /* flag need to check */ envtz[0] = ucxtz[0] = '\0'; tz = my_getenv("TZ",0); - if (tz) strcpy(envtz, tz); + if (tz) my_strlcpy(envtz, tz, sizeof(envtz)); tz = my_getenv("UCX$TZ",0); - if (tz) strcpy(ucxtz, tz); + if (tz) my_strlcpy(ucxtz, tz, sizeof(ucxtz)); if (!envtz[0] && !ucxtz[0]) return 0; /* we give up */ } tz = envtz; @@ -12431,7 +12402,7 @@ Perl_cando_by_name_int fileified = PerlMem_malloc(VMS_MAXRSS); if (fileified == NULL) _ckvmssts_noperl(SS$_INSFMEM); if (!strpbrk(fname,"/]>:")) { - strcpy(fileified,fname); + my_strlcpy(fileified, fname, VMS_MAXRSS); trnlnm_iter_count = 0; while (!strpbrk(fileified,"/]>:") && my_trnlnm(fileified,fileified,0)) { trnlnm_iter_count++; @@ -12451,7 +12422,7 @@ Perl_cando_by_name_int } } else { - strcpy(vmsname,fname); + my_strlcpy(vmsname, fname, VMS_MAXRSS); } /* sys$check_access needs a file spec, not a directory spec. @@ -12698,7 +12669,7 @@ Perl_flex_stat_int(pTHX_ const char *fspec, Stat_t *statbufp, int lstat_flag) if (temp_fspec == NULL) _ckvmssts_noperl(SS$_INSFMEM); - strcpy(temp_fspec, fspec); + my_strlcpy(temp_fspec, fspec, VMS_MAXRSS); SAVE_ERRNO; @@ -13487,14 +13458,14 @@ mod2fname(pTHX_ CV *cv) for(counter = 0; counter <= num_entries; counter++) { /* If it's not the first name then tack on a __ */ if (counter) { - strcat(work_name, "__"); + my_strlcat(work_name, "__", sizeof(work_name)); } - strcat(work_name, SvPV_nolen(*av_fetch(in_array, counter, FALSE))); + my_strlcat(work_name, SvPV_nolen(*av_fetch(in_array, counter, FALSE)), sizeof(work_name)); } /* Check to see if we actually have to bother...*/ if (strlen(work_name) + 3 <= max_name_len) { - strcat(ultimate_name, work_name); + my_strlcat(ultimate_name, work_name, sizeof(ultimate_name)); } else { /* It's too darned big, so we need to go strip. We use the same */ /* algorithm as xsubpp does. First, strip out doubled __ */ @@ -13509,7 +13480,7 @@ mod2fname(pTHX_ CV *cv) last = *source; } /* Go put it back */ - strcpy(work_name, workbuff); + my_strlcpy(work_name, workbuff, sizeof(work_name)); /* Is it still too big? */ if (strlen(work_name) + 3 > max_name_len) { /* Strip duplicate letters */ @@ -13522,7 +13493,7 @@ mod2fname(pTHX_ CV *cv) *dest++ = *source; last = toupper(*source); } - strcpy(work_name, workbuff); + my_strlcpy(work_name, workbuff, sizeof(work_name)); } /* Is it *still* too big? */ @@ -13530,7 +13501,7 @@ mod2fname(pTHX_ CV *cv) /* Too bad, we truncate */ work_name[max_name_len - 2] = 0; } - strcat(ultimate_name, work_name); + my_strlcat(ultimate_name, work_name, sizeof(ultimate_name)); } /* Okay, return it */ @@ -13808,7 +13779,7 @@ Perl_vms_start_glob if (!found) { /* Be POSIXish: return the input pattern when no matches */ - strcpy(rstr,SvPVX(tmpglob)); + my_strlcpy(rstr, SvPVX(tmpglob), VMS_MAXRSS); strcat(rstr,"\n"); ok = (PerlIO_puts(tmpfp,rstr) != EOF); } @@ -14291,7 +14262,7 @@ mp_do_vms_realpath(pTHX_ const char *filespec, char *outbuf, int dir_len = v_len + r_len + d_len + n_len; if (dir_len > 0) { - strncpy(dir_name, filespec, dir_len); + memcpy(dir_name, filespec, dir_len); dir_name[dir_len] = '\0'; file_name = (char *)&filespec[dir_len + 1]; } @@ -14303,7 +14274,7 @@ mp_do_vms_realpath(pTHX_ const char *filespec, char *outbuf, if (tchar != NULL) { int dir_len = tchar - filespec; - strncpy(dir_name, filespec, dir_len); + memcpy(dir_name, filespec, dir_len); dir_name[dir_len] = '\0'; file_name = (char *) &filespec[dir_len + 1]; } @@ -14327,7 +14298,7 @@ mp_do_vms_realpath(pTHX_ const char *filespec, char *outbuf, /* And now add the original filespec to it */ if (file_name != NULL) { - strcat(outbuf, file_name); + my_strlcat(outbuf, file_name, VMS_MAXRSS); } return outbuf; } diff --git a/vos/syslog.h b/vos/syslog.h deleted file mode 100644 index 1916fc1b34..0000000000 --- a/vos/syslog.h +++ /dev/null @@ -1,75 +0,0 @@ -/* Beginning of modification history */ -/* Written 02-08-13 by PG */ -/* End of modification history */ - -/* This header conforms to IEEE Std 1003.1-2001 */ - -#ifndef _INCLUDED_SYSLOG_H -#define _INCLUDED_SYSLOG_H - -/* values of the "logopt" option of openlog */ - -#define LOG_PID 1 -#define LOG_CONS 2 -#define LOG_NDELAY 4 -#define LOG_ODELAY 8 -#define LOG_NOWAIT 16 - -/* values of the "facility" argument of openlog - and of the "priority" argument of syslog */ - -#define LOG_KERN 0 -#define LOG_USER (1<<3) -#define LOG_MAIL (2<<3) -#define LOG_NEWS (3<<3) -#define LOG_UUCP (4<<3) -#define LOG_DAEMON (5<<3) -#define LOG_AUTH (6<<3) -#define LOG_CRON (7<<3) -#define LOG_LPR (8<<3) -#define LOG_LOCAL0 (9<<3) -#define LOG_LOCAL1 (10<<3) -#define LOG_LOCAL2 (11<<3) -#define LOG_LOCAL3 (12<<3) -#define LOG_LOCAL4 (13<<3) -#define LOG_LOCAL5 (14<<3) -#define LOG_LOCAL6 (15<<3) -#define LOG_LOCAL7 (16<<3) - -/* macro for constructing "maskpri" arg to setlogmask */ - -#define LOG_MASK(p) (1 << (p)) - -/* values of the "priority" argument of syslog */ - -#define LOG_EMERG 0 -#define LOG_ALERT 1 -#define LOG_CRIT 2 -#define LOG_ERR 3 -#define LOG_WARNING 4 -#define LOG_NOTICE 5 -#define LOG_INFO 6 -#define LOG_DEBUG 7 - -#undef __P -#ifdef __PROTOTYPES__ -#define __P(args) args -#else -#define __P(args) () -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -extern void closelog __P((void)); -extern void openlog __P((const char *ident, int logopt, - int facility)); -extern int setlogmask __P((int maskpri)); -extern void syslog __P((int priority, const char * message, ...)); - -#ifdef __cplusplus -} -#endif - -#endif /* _INCLUDED_SYSLOG_H */ @@ -8,6 +8,8 @@ add syslog entries. */ /* Modified 08-02-04 by Paul Green (Paul.Green@stratus.com) to open the syslog file in the working dir. */ +/* Modified 11-10-17 by Paul Green to remove the dummy copies + of socketpair() and the syslog functions. */ /* End of modification history */ #include <errno.h> @@ -18,8 +20,6 @@ #include <sys/types.h> #include <unistd.h> -#include "vos/syslog.h" - /* VOS doesn't supply a truncate function, so we build one up from the available POSIX functions. */ @@ -35,18 +35,6 @@ truncate(const char *path, off_t len) return code; } -/* VOS doesn't implement AF_UNIX (AF_LOCAL) style sockets, and - the perl emulation of them hangs on VOS (due to stcp-1257), - so we supply this version that always fails. */ - -int -socketpair (int family, int type, int protocol, int fd[2]) { - fd[0] = 0; - fd[1] = 0; - errno = ENOSYS; - return -1; -} - /* Supply a private version of the power function that returns 1 for x**0. This avoids c-1471. Abigail's Japh tests depend on this fix. We leave all the other cases to the VOS C @@ -65,228 +53,3 @@ double x, y; return(s_crt_pow(&x,&y)); } - -/* entries */ - -extern void s$log_system_message ( -/* char_varying (256) *message_text, - char_varying (66) *module_name, - short int *error_code */ ); - -/* constants */ - -#define ALL_PRIORITIES 255 /* 8 priorities, all enabled */ -#define BUFFER_LEN 256 -#define IDENT_LEN 64 -#define MSG_LEN 256 -#define PATH_LEN 257 - -/* static */ - -int vos_syslog_facility = LOG_USER>>3; -int vos_syslog_fd = -1; -int vos_syslog_logopt = 0; -char vos_syslog_ident[IDENT_LEN] = ""; -int vos_syslog_ident_len = 0; -int vos_syslog_mask = ALL_PRIORITIES; -char vos_syslog_path[PATH_LEN] = "syslog"; - -char vos_syslog_facility_name [17][10] = { - "[KERN] ", /* LOG_KERN */ - "[USER] ", /* LOG_USER */ - "[MAIL] ", /* LOG_MAIL */ - "[NEWS] ", /* LOG_NEWS */ - "[UUCP] ", /* LOG_UUCP */ - "[DAEMON] ", /* LOG_DAEMON */ - "[AUTH] ", /* LOG_AUTH */ - "[CRON] ", /* LOG_CRON */ - "[LPR] ", /* LOG_LPR */ - "[LOCAL0] ", /* LOG_LOCAL0 */ - "[LOCAL1] ", /* LOG_LOCAL1 */ - "[LOCAL2] ", /* LOG_LOCAL2 */ - "[LOCAL3] ", /* LOG_LOCAL3 */ - "[LOCAL4] ", /* LOG_LOCAL4 */ - "[LOCAL5] ", /* LOG_LOCAL5 */ - "[LOCAL6] ", /* LOG_LOCAL6 */ - "[LOCAL7] "}; /* LOG_LOCAL7 */ - -/* syslog functions */ - -static void open_syslog (void) -{ - if (vos_syslog_fd >= 0) - return; - - vos_syslog_fd = open (vos_syslog_path, O_RDWR | O_CREAT | O_APPEND, 0777); - if (vos_syslog_fd < 0) - fprintf (stderr, "Unable to open %s (errno=%d, os_errno=%d)\n", - vos_syslog_path, errno, os_errno); -} - -void closelog (void) -{ - if (vos_syslog_fd >= 0) - close (vos_syslog_fd); - - vos_syslog_facility = LOG_USER>>3; - vos_syslog_fd = -1; - vos_syslog_logopt = 0; - vos_syslog_ident[0] = '\0'; - vos_syslog_ident_len = 0; - vos_syslog_mask = ALL_PRIORITIES; - return; -} - -void openlog (const char *ident, int logopt, int facility) -{ -int n; - - if (ident != NULL) - { - strncpy (vos_syslog_ident, ident, sizeof (vos_syslog_ident)); - n = IDENT_LEN - - strnlen (vos_syslog_ident, sizeof (vos_syslog_ident)); - strncat (vos_syslog_ident, ": ", n); - vos_syslog_ident_len = strnlen (vos_syslog_ident, - sizeof (vos_syslog_ident)); - } - - vos_syslog_logopt = logopt; - vos_syslog_facility = facility>>3; - - if ((logopt & LOG_NDELAY) == LOG_NDELAY) - open_syslog (); - - return; -} - -int setlogmask (int maskpri) -{ -int old_mask; - - old_mask = vos_syslog_mask; - - if (maskpri > 0) - vos_syslog_mask = maskpri; - - return old_mask; -} - -void syslog (int priority, const char *format, ...) -{ -va_list ap; -int bare_facility; -int bare_priority; -int buffer_n; -char buffer[BUFFER_LEN]; -short int code; -char_varying(MSG_LEN) message; -char_varying(66) module_name; -int n; -int pid_n; -char pid_string[32]; -int r; -int user_n; -char user_string[256]; - - /* Calculate priority and facility value. */ - - bare_priority = priority & 3; - bare_facility = priority >> 3; - - /* If the priority is not set in the mask, do not log the - message. */ - - if ((vos_syslog_mask & LOG_MASK(bare_priority)) == 0) - return; - - /* Output facility name. */ - - if (bare_facility == 0) - bare_facility = vos_syslog_facility; - - strcpy (buffer, vos_syslog_facility_name[bare_facility]); - - /* Output priority value. */ - - /* TBD */ - - /* Output identity string. */ - - buffer_n = BUFFER_LEN - strlen (buffer); - strncat (buffer, vos_syslog_ident, buffer_n); - - /* Output process ID. */ - - if ((vos_syslog_logopt & LOG_PID) == LOG_PID) - { - pid_n = snprintf (pid_string, sizeof (pid_string), - "PID=0x%x ", getpid ()); - if (pid_n) - { - buffer_n = BUFFER_LEN - strlen (buffer); - strncat (buffer, pid_string, buffer_n); - } - } - - /* Output formatted message. */ - - va_start (ap, format); - user_n = vsnprintf (user_string, sizeof (user_string), format, ap); - va_end (ap); - - /* Ensure string ends in a newline. */ - - if (user_n > 0) - { - if (user_n >= sizeof (user_string)) - user_n = sizeof (user_string) - 1; - - /* arrays are zero-origin.... */ - - if (user_string [user_n-1] != '\n') - { - user_string [user_n-1] = '\n'; - user_string [user_n++] = '\0'; - } - } - else - { - user_string [0] = '\n'; - user_string [1] = '\0'; - user_n = 1; - } - - buffer_n = BUFFER_LEN - strnlen (buffer, sizeof (buffer)); - strncat (buffer, user_string, buffer_n); - - /* If the log is not open, try to open it now. */ - - if (vos_syslog_fd < 0) - open_syslog (); - - /* Try to write the message to the syslog file. */ - - if (vos_syslog_fd < 0) - r = -1; - else - { - buffer_n = strnlen (buffer, sizeof (buffer)); - r = write (vos_syslog_fd, buffer, buffer_n); - } - - /* If we were unable to write to the log and if LOG_CONS is - set, send it to the console. */ - - if (r < 0) - if ((vos_syslog_logopt & LOG_CONS) == LOG_CONS) - { - strcpy_vstr_nstr (&message, "syslog: "); - n = MSG_LEN - sizeof ("syslog: "); - strncat_vstr_nstr (&message, buffer, n); - strcpy_vstr_nstr (&module_name, ""); - s$log_system_message (&message, &module_name, &code); - } - - return; -} diff --git a/vos/vosish.h b/vos/vosish.h index c9e37654e0..36b3b410a4 100644 --- a/vos/vosish.h +++ b/vos/vosish.h @@ -8,11 +8,5 @@ is a work-around for posix-1302. */ #undef SA_SIGINFO -/* The following declaration is an avoidance for posix-950. */ -extern int ioctl (int fd, int request, ...); - /* Specify a prototype for truncate() since we are supplying one. */ extern int truncate (const char *path, off_t len); - -/* Specify a prototype for socketpair() since we supplying one. */ -extern int socketpair (int family, int type, int protocol, int fd[2]); diff --git a/win32/Makefile b/win32/Makefile index 63ea91925f..0fdbdd362b 100644 --- a/win32/Makefile +++ b/win32/Makefile @@ -37,7 +37,7 @@ INST_TOP = $(INST_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -#INST_VER = \5.15.4 +#INST_VER = \5.15.5 # # Comment this out if you DON'T want your perl installation to have @@ -540,7 +540,8 @@ PERLSTATIC = FIRSTUNIFILE = ..\lib\unicore\Decomposition.pl UNIDATAFILES = ..\lib\unicore\Decomposition.pl \ ..\lib\unicore\CombiningClass.pl ..\lib\unicore\Name.pl \ - ..\lib\unicore\Heavy.pl ..\lib\unicore\mktables.lst \ + ..\lib\unicore\Heavy.pl ..\lib\unicore\mktables.lst \ + ..\lib\unicore\UCD.pl ..\lib\unicore\Name.pm \ ..\lib\unicore\TestProp.pl # Directories of Unicode data files generated by mktables @@ -1073,14 +1074,14 @@ utils: $(PERLEXE) $(X2P) copy ..\README.vmesa ..\pod\perlvmesa.pod copy ..\README.vos ..\pod\perlvos.pod copy ..\README.win32 ..\pod\perlwin32.pod - copy ..\pod\perldelta.pod ..\pod\perl5155delta.pod + copy ..\pod\perldelta.pod ..\pod\perl5156delta.pod cd ..\win32 $(PERLEXE) $(PL2BAT) $(UTILS) $(PERLEXE) $(ICWD) ..\autodoc.pl .. $(PERLEXE) $(ICWD) ..\pod\perlmodlib.pl -q ..\pod\perltoc.pod: $(PERLEXE) Extensions Extensions_nonxs - $(PERLEXE) -f ..\pod\buildtoc --build-toc -q + $(PERLEXE) -f ..\pod\buildtoc -q # Note that the pod cleanup in this next section is parsed (and regenerated # by pod/buildtoc so please check that script before making changes here @@ -1166,7 +1167,7 @@ distclean: realclean -if exist $(LIBDIR)\XS rmdir /s /q $(LIBDIR)\XS -if exist $(LIBDIR)\Win32API rmdir /s /q $(LIBDIR)\Win32API -cd $(PODDIR) && del /f *.html *.bat \ - perl5155delta.pod perlaix.pod perlamiga.pod perlapi.pod \ + perl5156delta.pod perlaix.pod perlamiga.pod perlapi.pod \ perlbeos.pod perlbs2000.pod perlce.pod perlcn.pod \ perlcygwin.pod perldgux.pod perldos.pod perlepoc.pod \ perlfreebsd.pod perlhaiku.pod perlhpux.pod perlhurd.pod \ diff --git a/win32/Makefile.ce b/win32/Makefile.ce index f50b9aa0bd..70f7ef0bfa 100644 --- a/win32/Makefile.ce +++ b/win32/Makefile.ce @@ -6,7 +6,7 @@ SRCDIR = .. PV = 59 -INST_VER = 5.15.4 +INST_VER = 5.15.5 # INSTALL_ROOT specifies a path where this perl will be installed on CE device INSTALL_ROOT=/netzwerk/sprache/perl @@ -538,6 +538,7 @@ MINIMOD = ..\lib\ExtUtils\Miniperl.pm UNIDATAFILES = ..\lib\unicore\Decomposition.pl \ ..\lib\unicore\CombiningClass.pl ..\lib\unicore\Name.pl \ ..\lib\unicore\Heavy.pl ..\lib\unicore\mktables.lst \ + ..\lib\unicore\UCD.pl ..\lib\unicore\Name.pm \ ..\lib\unicore\TestProp.pl # Directories of Unicode data files generated by mktables diff --git a/win32/include/sys/socket.h b/win32/include/sys/socket.h index 3b92b13d54..26a92c78dd 100644 --- a/win32/include/sys/socket.h +++ b/win32/include/sys/socket.h @@ -30,11 +30,17 @@ # include <ws2tcpip.h> # ifndef SIO_GET_INTERFACE_LIST_EX + +# ifndef MSG_WAITALL +# define MSG_WAITALL 0x8 +# endif + /* The ws2tcpip.h header included in VC6 doesn't define the * sin6_scope_id member of sockaddr_in6. We define our own * version and redefine sockaddr_in6 to point to this one * instead for compiling e.g. Socket.xs. */ + struct my_sockaddr_in6 { short sin6_family; /* AF_INET6 */ u_short sin6_port; /* Transport level port number */ @@ -71,11 +77,106 @@ (x)->sin6_scope_id = 0; \ } +# ifndef IPV6_HDRINCL +# define IPV6_HDRINCL 2 +# endif +# ifndef IPV6_UNICAST_HOPS +# define IPV6_UNICAST_HOPS 4 +# endif +# ifndef IPV6_MULTICAST_IF +# define IPV6_MULTICAST_IF 9 +# endif +# ifndef IPV6_MULTICAST_HOPS +# define IPV6_MULTICAST_HOPS 10 +# endif +# ifndef IPV6_MULTICAST_LOOP +# define IPV6_MULTICAST_LOOP 11 +# endif +# ifndef IPV6_ADD_MEMBERSHIP +# define IPV6_ADD_MEMBERSHIP 12 +# endif +# ifndef IPV6_DROP_MEMBERSHIP +# define IPV6_DROP_MEMBERSHIP 13 +# endif +# ifndef IPV6_JOIN_GROUP +# define IPV6_JOIN_GROUP IPV6_ADD_MEMBERSHIP +# endif +# ifndef IPV6_LEAVE_GROUP +# define IPV6_LEAVE_GROUP IPV6_DROP_MEMBERSHIP +# endif +# ifndef IPV6_PKTINFO +# define IPV6_PKTINFO 19 +# endif +# ifndef IPV6_HOPLIMIT +# define IPV6_HOPLIMIT 21 +# endif +# ifndef IPV6_PROTECTION_LEVEL +# define IPV6_PROTECTION_LEVEL 23 +# endif + + /* The ws2tcpip.h header included in MinGW includes ipv6_mreq already */ +# ifndef __GNUC__ + typedef struct ipv6_mreq { + struct in_addr6 ipv6mr_multiaddr; + unsigned int ipv6mr_interface; + } IPV6_MREQ; +# endif + +# ifndef EAI_AGAIN +# define EAI_AGAIN WSATRY_AGAIN +# endif +# ifndef EAI_BADFLAGS +# define EAI_BADFLAGS WSAEINVAL +# endif +# ifndef EAI_FAIL +# define EAI_FAIL WSANO_RECOVERY +# endif +# ifndef EAI_FAMILY +# define EAI_FAMILY WSAEAFNOSUPPORT +# endif +# ifndef EAI_MEMORY +# define EAI_MEMORY WSA_NOT_ENOUGH_MEMORY +# endif +# ifndef EAI_NODATA +# define EAI_NODATA WSANO_DATA +# endif +# ifndef EAI_NONAME +# define EAI_NONAME WSAHOST_NOT_FOUND +# endif +# ifndef EAI_SERVICE +# define EAI_SERVICE WSATYPE_NOT_FOUND +# endif +# ifndef EAI_SOCKTYPE +# define EAI_SOCKTYPE WSAESOCKTNOSUPPORT +# endif + +# ifndef NI_NOFQDN +# define NI_NOFQDN 0x01 +# endif +# ifndef NI_NUMERICHOST +# define NI_NUMERICHOST 0x02 +# endif +# ifndef NI_NAMEREQD +# define NI_NAMEREQD 0x04 +# endif +# ifndef NI_NUMERICSERV +# define NI_NUMERICSERV 0x08 +# endif +# ifndef NI_DGRAM +# define NI_DGRAM 0x10 +# endif + # endif # endif #endif +/* Early Platform SDKs have an incorrect definition of EAI_NODATA */ +#if (EAI_NODATA == EAI_NONAME) +# undef EAI_NODATA +# define EAI_NODATA WSANO_DATA +#endif + #include "win32.h" #ifdef __cplusplus diff --git a/win32/makefile.mk b/win32/makefile.mk index 1a3daf2f4f..2f6550991b 100644 --- a/win32/makefile.mk +++ b/win32/makefile.mk @@ -38,7 +38,7 @@ INST_TOP *= $(INST_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -#INST_VER *= \5.15.4 +#INST_VER *= \5.15.5 # # Comment this out if you DON'T want your perl installation to have @@ -632,6 +632,7 @@ PERLSTATIC = # Unicode data files generated by mktables UNIDATAFILES = ..\lib\unicore\Decomposition.pl ..\lib\unicore\TestProp.pl \ ..\lib\unicore\CombiningClass.pl ..\lib\unicore\Name.pl \ + ..\lib\unicore\UCD.pl ..\lib\unicore\Name.pm \ ..\lib\unicore\Heavy.pl ..\lib\unicore\mktables.lst # Directories of Unicode data files generated by mktables @@ -1259,13 +1260,13 @@ utils: $(PERLEXE) $(X2P) copy ..\README.vmesa ..\pod\perlvmesa.pod copy ..\README.vos ..\pod\perlvos.pod copy ..\README.win32 ..\pod\perlwin32.pod - copy ..\pod\perldelta.pod ..\pod\perl5155delta.pod + copy ..\pod\perldelta.pod ..\pod\perl5156delta.pod $(PERLEXE) $(PL2BAT) $(UTILS) $(PERLEXE) $(ICWD) ..\autodoc.pl .. $(PERLEXE) $(ICWD) ..\pod\perlmodlib.pl -q ..\pod\perltoc.pod: $(PERLEXE) Extensions Extensions_nonxs - $(PERLEXE) -f ..\pod\buildtoc --build-toc -q + $(PERLEXE) -f ..\pod\buildtoc -q # Note that the pod cleanup in this next section is parsed (and regenerated # by pod/buildtoc so please check that script before making changes here @@ -1351,7 +1352,7 @@ distclean: realclean -if exist $(LIBDIR)\XS rmdir /s /q $(LIBDIR)\XS -if exist $(LIBDIR)\Win32API rmdir /s /q $(LIBDIR)\Win32API -cd $(PODDIR) && del /f *.html *.bat \ - perl5155delta.pod perlaix.pod perlamiga.pod perlapi.pod \ + perl5156delta.pod perlaix.pod perlamiga.pod perlapi.pod \ perlbeos.pod perlbs2000.pod perlce.pod perlcn.pod \ perlcygwin.pod perldgux.pod perldos.pod perlepoc.pod \ perlfreebsd.pod perlhaiku.pod perlhpux.pod perlhurd.pod \ diff --git a/win32/pod.mak b/win32/pod.mak index 6fb176de00..94c1631d04 100644 --- a/win32/pod.mak +++ b/win32/pod.mak @@ -34,6 +34,7 @@ POD = perl.pod \ perl5153delta.pod \ perl5154delta.pod \ perl5155delta.pod \ + perl5156delta.pod \ perl561delta.pod \ perl56delta.pod \ perl581delta.pod \ @@ -157,6 +158,7 @@ MAN = perl.man \ perl5153delta.man \ perl5154delta.man \ perl5155delta.man \ + perl5156delta.man \ perl561delta.man \ perl56delta.man \ perl581delta.man \ @@ -280,6 +282,7 @@ HTML = perl.html \ perl5153delta.html \ perl5154delta.html \ perl5155delta.html \ + perl5156delta.html \ perl561delta.html \ perl56delta.html \ perl581delta.html \ @@ -403,6 +406,7 @@ TEX = perl.tex \ perl5153delta.tex \ perl5154delta.tex \ perl5155delta.tex \ + perl5156delta.tex \ perl561delta.tex \ perl56delta.tex \ perl581delta.tex \ |