diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-01-07 12:10:41 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-02-09 10:13:57 -0700 |
commit | 9d9177bec752277fb0bb090203f47d85c3aba878 (patch) | |
tree | f312c12f5dd773346d960b45315375b032d72ce8 | |
parent | f59ff1943250231e4a9af32ff93e52cdfba13134 (diff) | |
download | perl-9d9177bec752277fb0bb090203f47d85c3aba878.tar.gz |
Add regen/mk_invlists.pl, charclass_invlists.h
This will be used to generate compile-time inversion lists in a C hdr
file that can be included in programs for initialization speed
Three simple inversion lists are included in this initial commit
-rw-r--r-- | MANIFEST | 2 | ||||
-rwxr-xr-x | Makefile.SH | 3 | ||||
-rw-r--r-- | charclass_invlists.h | 39 | ||||
-rw-r--r-- | lib/unicore/README.perl | 4 | ||||
-rw-r--r-- | regen/mk_invlists.pl | 83 |
5 files changed, 130 insertions, 1 deletions
@@ -8,6 +8,7 @@ beos/beosish.h BeOS port beos/nm.c BeOS port cflags.SH A script that emits C compilation flags per file Changes Describe how to peruse changes between releases +charclass_invlists.h Compiled-in inversion lists config_h.SH Produces config.h configpm Produces lib/Config.pm Configure Portability tool @@ -4862,6 +4863,7 @@ regen/embed.pl Produces {embed,embedvar,proto}.h regen/feature.pl Generates feature.pm regen/keywords.pl Program to write keywords.h regen/mg_vtable.pl generate mg_vtable.h +regen/mk_invlists.pl Generates charclass_invlists.h regen/mk_PL_charclass.pl Populate the PL_charclass table regen/opcode.pl Opcode header generator regen/opcodes Opcode data diff --git a/Makefile.SH b/Makefile.SH index b8845da8cb..5cd7179237 100755 --- a/Makefile.SH +++ b/Makefile.SH @@ -458,7 +458,8 @@ h2 = embed.h form.h gv.h handy.h hv.h keywords.h mg.h op.h opcode.h h3 = pad.h patchlevel.h perl.h perlapi.h perly.h pp.h proto.h regcomp.h h4 = regexp.h scope.h sv.h unixish.h util.h iperlsys.h thread.h h5 = utf8.h warnings.h mydtrace.h op_reg_common.h l1_char_class_tab.h -h = $(h1) $(h2) $(h3) $(h4) $(h5) +h6 = charclass_invlists.h +h = $(h1) $(h2) $(h3) $(h4) $(h5) $(h6) c1 = av.c scope.c op.c doop.c doio.c dump.c gv.c hv.c mg.c reentr.c mro.c perl.c c2 = perly.c pp.c pp_hot.c pp_ctl.c pp_sys.c regcomp.c regexec.c utf8.c sv.c diff --git a/charclass_invlists.h b/charclass_invlists.h new file mode 100644 index 0000000000..dbb32d857b --- /dev/null +++ b/charclass_invlists.h @@ -0,0 +1,39 @@ +/* -*- buffer-read-only: t -*- + * !!!!!!! DO NOT EDIT THIS FILE !!!!!!! + * This file is built by regen/mk_invlists.pl from Unicode::UCD. + * Any changes made here will be lost! + */ + +/* See the generating file for comments */ + + +UV Latin1_invlist[] = { + 2, /* Number of elements */ + 0, /* Current iteration position */ + 1064334010, /* Version and data structure type */ + 0, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ + 256, + 0 +}; + +UV AboveLatin1_invlist[] = { + 1, /* Number of elements */ + 0, /* Current iteration position */ + 1064334010, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ + 256 +}; + +UV ASCII_invlist[] = { + 2, /* Number of elements */ + 0, /* Current iteration position */ + 1064334010, /* Version and data structure type */ + 0, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ + 128, + 0 +}; + +/* ex: set ro: */ diff --git a/lib/unicore/README.perl b/lib/unicore/README.perl index 2518a604a5..88152d2ccd 100644 --- a/lib/unicore/README.perl +++ b/lib/unicore/README.perl @@ -114,6 +114,10 @@ Also, you should regen l1_char_class_tab.h, by perl regen/mk_L_charclass.pl +and, regen charclass_invlists.h by + +perl regen/mk_invlists.pl + Finally: p4 submit diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl new file mode 100644 index 0000000000..5b69708b46 --- /dev/null +++ b/regen/mk_invlists.pl @@ -0,0 +1,83 @@ +#!perl -w +use 5.015; +use strict; +use warnings; +use Unicode::UCD "prop_invlist"; +require 'regen/regen_lib.pl'; + +# This program outputs charclass_invlists.h, which contains various inversion +# lists in the form of C arrays that are to be used as-is for inversion lists. +# Thus, the lists it contains are essentially pre-compiled, and need only a +# light-weight fast wrapper to make them usable at run-time. + +# As such, this code knows about the internal structure of these lists, and +# any change made to that has to be done here as well. A random number stored +# in the headers is used to minimize the possibility of things getting +# out-of-sync, or the wrong data structure being passed. Currently that +# random number is: +my $VERSION_DATA_STRUCTURE_TYPE = 1064334010; + +my $out_fh = open_new('charclass_invlists.h', '>', + {style => '*', by => $0, + from => "Unicode::UCD"}); + +print $out_fh "/* See the generating file for comments */\n\n"; + +sub output_invlist ($$) { + my $name = shift; + my $invlist = shift; # Reference to inversion list array + + # Output the inversion list $invlist using the name $name for it. + # It is output in the exact internal form for inversion lists. + + my $zero_or_one; # Is the last element of the header 0, or 1 ? + + # If the first element is 0, it goes in the header, instead of the body + if ($invlist->[0] == 0) { + shift @$invlist; + + $zero_or_one = 0; + + # Add a dummy 0 at the end so that the length is constant. inversion + # lists are always stored with enough room so that if they change from + # beginning with 0, they don't have to grow. + push @$invlist, 0; + } + else { + $zero_or_one = 1; + } + + print $out_fh "\nUV ${name}_invlist[] = {\n"; + + print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n"; + print $out_fh "\t0,\t/* Current iteration position */\n"; + print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n"; + print $out_fh "\t", $zero_or_one, + ",\t/* 0 if this is the first element of the list proper;", + "\n\t\t 1 if the next element is the first */\n"; + + # The main body are the UVs passed in to this routine. Do the final + # element separately + for my $i (0 .. @$invlist - 1 - 1) { + print $out_fh "\t$invlist->[$i],\n"; + } + + # The final element does not have a trailing comma, as C can't handle it. + print $out_fh "\t$invlist->[-1]\n"; + + print $out_fh "};\n"; +} + +output_invlist("Latin1", [ 0, 256 ]); +output_invlist("AboveLatin1", [ 256 ]); + +for my $prop (qw( + ASCII + ) +) { + + my @invlist = prop_invlist($prop); + output_invlist($prop, \@invlist); +} + +read_only_bottom_close_and_rename($out_fh) |