diff options
author | Charles Bailey <bailey@newman.upenn.edu> | 2000-10-20 04:44:37 +0000 |
---|---|---|
committer | bailey <bailey@newman.upenn.edu> | 2000-10-20 04:44:37 +0000 |
commit | 22d4bb9ccb8701e68f9243547d7e3a3c55f70908 (patch) | |
tree | d2e7714cc660a21e8bf2624c99646b0cb001e40c /lib | |
parent | 4b19af017623bfa3bb72bb164598a517f586e0d3 (diff) | |
download | perl-22d4bb9ccb8701e68f9243547d7e3a3c55f70908.tar.gz |
SYN SYN
p4raw-id: //depot/vmsperl@7375
Diffstat (limited to 'lib')
342 files changed, 8178 insertions, 5184 deletions
diff --git a/lib/AutoLoader.pm b/lib/AutoLoader.pm index c26db72394..af33ee80f8 100644 --- a/lib/AutoLoader.pm +++ b/lib/AutoLoader.pm @@ -4,6 +4,7 @@ use 5.005_64; our(@EXPORT, @EXPORT_OK, $VERSION); my $is_dosish; +my $is_epoc; my $is_vms; BEGIN { @@ -11,6 +12,7 @@ BEGIN { @EXPORT = @EXPORT = (); @EXPORT_OK = @EXPORT_OK = qw(AUTOLOAD); $is_dosish = $^O eq 'dos' || $^O eq 'os2' || $^O eq 'MSWin32'; + $is_epoc = $^O eq 'epoc'; $is_vms = $^O eq 'VMS'; $VERSION = '5.57'; } @@ -51,7 +53,11 @@ AUTOLOAD { $filename = "./$filename"; } } - elsif ($is_vms) { + elsif ($is_epoc) { + unless ($filename =~ m{^([a-z?]:)?[\\/]}is) { + $filename = "./$filename"; + } + }elsif ($is_vms) { # XXX todo by VMSmiths $filename = "./$filename"; } diff --git a/lib/Benchmark.pm b/lib/Benchmark.pm index 3c10a5bc52..b557be3cc7 100644 --- a/lib/Benchmark.pm +++ b/lib/Benchmark.pm @@ -552,7 +552,9 @@ sub countit { # accuracy since we're not couting these times. $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation. my $td = timeit($n, $code); - $tc = $td->[1] + $td->[2]; + my $new_tc = $td->[1] + $td->[2]; + # Make sure we are making progress. + $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc; } # Now, do the 'for real' timing(s), repeating until we exceed @@ -581,6 +583,7 @@ sub countit { $ttot = $utot + $stot; last if $ttot >= $tmax; + $ttot = 0.01 if $ttot < 0.01; my $r = $tmax / $ttot - 1; # Linear approximation. $n = int( $r * $ntot ); $n = $nmin if $n < $nmin; diff --git a/lib/CGI.pm b/lib/CGI.pm index 63805544f8..fd06f64e41 100644 --- a/lib/CGI.pm +++ b/lib/CGI.pm @@ -17,18 +17,24 @@ require 5.004; # The most recent version and complete docs are available at: # http://stein.cshl.org/WWW/software/CGI/ -$CGI::revision = '$Id: CGI.pm,v 1.30 2000/03/28 21:31:40 lstein Exp $'; -$CGI::VERSION='2.66'; +$CGI::revision = '$Id: CGI.pm,v 1.45 2000/09/13 02:55:41 lstein Exp $'; +$CGI::VERSION='2.74'; # HARD-CODED LOCATION FOR FILE UPLOAD TEMPORARY FILES. # UNCOMMENT THIS ONLY IF YOU KNOW WHAT YOU'RE DOING. # $TempFile::TMPDIRECTORY = '/usr/tmp'; use CGI::Util qw(rearrange make_attributes unescape escape expires); +use constant XHTML_DTD => ['-//W3C//DTD XHTML 1.0 Transitional//EN', + 'DTD/xhtml1-transitional.dtd']; + # >>>>> Here are some globals that you might want to adjust <<<<<< sub initialize_globals { # Set this to 1 to enable copious autoloader debugging messages $AUTOLOAD_DEBUG = 0; + + # Set this to 1 to generate XTML-compatible output + $XHTML = 1; # Change this to the preferred DTD to print in start_html() # or use default_dtd('text of DTD to use'); @@ -80,6 +86,8 @@ sub initialize_globals { $BEEN_THERE = 0; undef @QUERY_PARAM; undef %EXPORT; + undef $QUERY_CHARSET; + undef %QUERY_FIELDNAMES; # prevent complaints by mod_perl 1; @@ -344,10 +352,12 @@ sub init { # if we get called more than once, we want to initialize # ourselves from the original query (which may be gone # if it was read from STDIN originally.) - if (@QUERY_PARAM && !defined($initializer)) { + if (defined(@QUERY_PARAM) && !defined($initializer)) { foreach (@QUERY_PARAM) { $self->param('-name'=>$_,'-value'=>$QUERY_PARAM{$_}); } + $self->charset($QUERY_CHARSET); + $self->{'.fieldnames'} = {%QUERY_FIELDNAMES}; return; } @@ -356,6 +366,9 @@ sub init { $fh = to_filehandle($initializer) if $initializer; + # set charset to the safe ISO-8859-1 + $self->charset('ISO-8859-1'); + METHOD: { # avoid unreasonably large postings @@ -468,8 +481,6 @@ sub init { $self->delete('.submit'); $self->delete('.cgifields'); - # set charset to the safe ISO-8859-1 - $self->charset('ISO-8859-1'); $self->save_request unless $initializer; } @@ -519,6 +530,8 @@ sub save_request { next unless defined $_; $QUERY_PARAM{$_}=$self->{$_}; } + $QUERY_CHARSET = $self->charset; + %QUERY_FIELDNAMES = %{$self->{'.fieldnames'}}; } sub parse_params { @@ -569,13 +582,13 @@ sub _make_tag_func { } ); if ($tagname=~/start_(\w+)/i) { - $func .= qq! return "<\U$1\E\$attr>";} !; + $func .= qq! return "<\L$1\E\$attr>";} !; } elsif ($tagname=~/end_(\w+)/i) { - $func .= qq! return "<\U/$1\E>"; } !; + $func .= qq! return "<\L/$1\E>"; } !; } else { $func .= qq# - my(\$tag,\$untag) = ("\U<$tagname\E\$attr>","\U</$tagname>\E"); - return \$tag unless \@_; + return \$XHTML ? "\L<$tagname\E\$attr />" : "\L<$tagname\E\$attr>" unless \@_; + my(\$tag,\$untag) = ("\L<$tagname\E\$attr>","\L</$tagname>\E"); my \@result = map { "\$tag\$_\$untag" } (ref(\$_[0]) eq 'ARRAY') ? \@{\$_[0]} : "\@_"; return "\@result"; @@ -644,6 +657,8 @@ sub _setup_symbols { $DEBUG=0, next if /^[:-]no_?[Dd]ebug$/; $DEBUG=2, next if /^[:-][Dd]ebug$/; $USE_PARAM_SEMICOLONS++, next if /^[:-]newstyle_urls$/; + $XHTML++, next if /^[:-]xhtml$/; + $XHTML=0, next if /^[:-]no_?xhtml$/; $USE_PARAM_SEMICOLONS=0, next if /^[:-]oldstyle_urls$/; $PRIVATE_TEMPFILES++, next if /^[:-]private_tempfiles$/; $EXPORT{$_}++, next if /^[:-]any$/; @@ -717,7 +732,8 @@ END_OF_FUNC # Deletes the named parameter entirely. #### sub delete { - my($self,$name) = self_or_default(@_); + my($self,@p) = self_or_default(@_); + my($name) = rearrange([NAME],@p); CORE::delete $self->{$name}; CORE::delete $self->{'.fieldnames'}->{$name}; @{$self->{'.parameters'}}=grep($_ ne $name,$self->param()); @@ -851,7 +867,8 @@ END_OF_FUNC sub STORE { my $self = shift; my $tag = shift; - my @vals = split("\0",shift); + my $vals = shift; + my @vals = index($vals,"\0")!=-1 ? split("\0",$vals) : $vals; $self->param(-name=>$tag,-value=>\@vals); } END_OF_FUNC @@ -1042,6 +1059,9 @@ sub save { print $filehandle "$escaped_param=",escape("$value"),"\n"; } } + foreach (keys %{$self->{'.fieldnames'}}) { + print $filehandle ".cgifields=",escape("$_"),"\n"; + } print $filehandle "=\n"; # end of record } END_OF_FUNC @@ -1135,10 +1155,11 @@ sub header { return undef if $self->{'.header_printed'}++ and $HEADERS_ONCE; - my($type,$status,$cookie,$target,$expires,$nph,$charset,@other) = + my($type,$status,$cookie,$target,$expires,$nph,$charset,$attachment,@other) = rearrange([['TYPE','CONTENT_TYPE','CONTENT-TYPE'], 'STATUS',['COOKIE','COOKIES'],'TARGET', - 'EXPIRES','NPH','CHARSET'],@p); + 'EXPIRES','NPH','CHARSET', + 'ATTACHMENT'],@p); $nph ||= $NPH; if (defined $charset) { @@ -1151,11 +1172,11 @@ sub header { # need to fix it up a little. foreach (@other) { next unless my($header,$value) = /([^\s=]+)=\"?(.+?)\"?$/; - ($_ = $header) =~ s/^(\w)(.*)/$1 . lc ($2) . ": $value"/e; + ($_ = $header) =~ s/^(\w)(.*)/$1 . lc ($2) . ': '.$self->unescapeHTML($value)/e; } $type ||= 'text/html' unless defined($type); - $type .= "; charset=$charset" if $type ne '' and $type !~ /\bcharset\b/; + $type .= "; charset=$charset" if $type ne '' and $type =~ m!^text/! and $type !~ /\bcharset\b/; # Maybe future compatibility. Maybe not. my $protocol = $ENV{SERVER_PROTOCOL} || 'HTTP/1.0'; @@ -1178,6 +1199,7 @@ sub header { if $expires; push(@header,"Date: " . expires(0,'http')) if $expires || $cookie; push(@header,"Pragma: no-cache") if $self->cache(); + push(@header,"Content-Disposition: attachment; filename=\"$attachment\"") if $attachment; push(@header,@other); push(@header,"Content-Type: $type") if $type ne ''; @@ -1216,7 +1238,7 @@ END_OF_FUNC sub redirect { my($self,@p) = self_or_default(@_); my($url,$target,$cookie,$nph,@other) = rearrange([[LOCATION,URI,URL],TARGET,COOKIE,NPH],@p); - $url = $url || $self->self_url; + $url ||= $self->self_url; my(@o); foreach (@other) { tr/\"//d; push(@o,split("=",$_,2)); } unshift(@o, @@ -1253,39 +1275,45 @@ END_OF_FUNC 'start_html' => <<'END_OF_FUNC', sub start_html { my($self,@p) = &self_or_default(@_); - my($title,$author,$base,$xbase,$script,$noscript,$target,$meta,$head,$style,$dtd,@other) = - rearrange([TITLE,AUTHOR,BASE,XBASE,SCRIPT,NOSCRIPT,TARGET,META,HEAD,STYLE,DTD],@p); + my($title,$author,$base,$xbase,$script,$noscript,$target,$meta,$head,$style,$dtd,$lang,@other) = + rearrange([TITLE,AUTHOR,BASE,XBASE,SCRIPT,NOSCRIPT,TARGET,META,HEAD,STYLE,DTD,LANG],@p); # strangely enough, the title needs to be escaped as HTML # while the author needs to be escaped as a URL $title = $self->escapeHTML($title || 'Untitled Document'); $author = $self->escape($author); + $lang ||= 'en-US'; my(@result); if ($dtd) { - if (ref $dtd && $ref eq 'ARRAY') { + if (defined(ref($dtd)) and (ref($dtd) eq 'ARRAY')) { $dtd = $DEFAULT_DTD unless $dtd->[0] =~ m|^-//|; } else { $dtd = $DEFAULT_DTD unless $dtd =~ m|^-//|; } } else { - $dtd = $DEFAULT_DTD; + $dtd = $XHTML ? XHTML_DTD : $DEFAULT_DTD; } if (ref($dtd) && ref($dtd) eq 'ARRAY') { - push(@result,qq(<!DOCTYPE HTML PUBLIC "$dtd->[0]"\n\t"$dtd->[1]">)); + push(@result,qq(<!DOCTYPE html\n\tPUBLIC "$dtd->[0]"\n\t"$dtd->[1]">)); } else { - push(@result,qq(<!DOCTYPE HTML PUBLIC "$dtd">)); + push(@result,qq(<!DOCTYPE html\n\tPUBLIC "$dtd">)); } - push(@result,"<HTML><HEAD><TITLE>$title</TITLE>"); - push(@result,"<LINK REV=MADE HREF=\"mailto:$author\">") if defined $author; + push(@result,$XHTML ? qq(<html xmlns="http://www.w3.org/1999/xhtml" lang="$lang"><head><title>$title</title>) + : qq(<html lang="$lang"><head><title>$title</title>)); + if (defined $author) { + push(@result,$XHTML ? "<link rev=\"made\" href=\"mailto:$author\" />" + : "<link rev=\"made\" href=\"mailto:$author\">"); + } if ($base || $xbase || $target) { my $href = $xbase || $self->url('-path'=>1); - my $t = $target ? qq/ TARGET="$target"/ : ''; - push(@result,qq/<BASE HREF="$href"$t>/); + my $t = $target ? qq/ target="$target"/ : ''; + push(@result,$XHTML ? qq(<base href="$href"$t />) : qq(<base href="$href"$t>)); } if ($meta && ref($meta) && (ref($meta) eq 'HASH')) { - foreach (keys %$meta) { push(@result,qq(<META NAME="$_" CONTENT="$meta->{$_}">)); } + foreach (keys %$meta) { push(@result,$XHTML ? qq(<meta name="$_" content="$meta->{$_}" />) + : qq(<meta name="$_" content="$meta->{$_}">)); } } push(@result,ref($head) ? @$head : $head) if $head; @@ -1296,13 +1324,13 @@ sub start_html { # handle -noscript parameter push(@result,<<END) if $noscript; -<NOSCRIPT> +<noscript> $noscript -</NOSCRIPT> +</noscript> END ; my($other) = @other ? " @other" : ''; - push(@result,"</HEAD><BODY$other>"); + push(@result,"</head><body$other>"); return join("\n",@result); } END_OF_FUNC @@ -1315,26 +1343,40 @@ sub _style { my ($self,$style) = @_; my (@result); my $type = 'text/css'; + + my $cdata_start = $XHTML ? "\n<!--/* <![CDATA[ */" : "\n<!-- "; + my $cdata_end = $XHTML ? "\n/* ]]> */-->\n" : " -->\n"; + if (ref($style)) { - my($src,$code,$stype,@other) = - rearrange([SRC,CODE,TYPE], - '-foo'=>'bar', # a trick to allow the '-' to be omitted - ref($style) eq 'ARRAY' ? @$style : %$style); - $type = $stype if $stype; - push(@result,qq/<LINK REL="stylesheet" TYPE="$type" HREF="$src">/) if $src; - push(@result,style({'type'=>$type},"<!--\n$code\n-->")) if $code; + my($src,$code,$stype,@other) = + rearrange([SRC,CODE,TYPE], + '-foo'=>'bar', # a trick to allow the '-' to be omitted + ref($style) eq 'ARRAY' ? @$style : %$style); + $type = $stype if $stype; + if (ref($src) eq "ARRAY") # Check to see if the $src variable is an array reference + { # If it is, push a LINK tag for each one. + foreach $src (@$src) + { + push(@result,qq/<link rel="stylesheet" type="$type" href="$src">/) if $src; + } + } + else + { # Otherwise, push the single -src, if it exists. + push(@result,qq/<link rel="stylesheet" type="$type" href="$src">/) if $src; + } + push(@result,style({'type'=>$type},"$cdata_start\n$code\n$cdata_end")) if $code; } else { - push(@result,style({'type'=>$type},"<!--\n$style\n-->")); + push(@result,style({'type'=>$type},"$cdata_start\n$style\n$cdata_end")); } @result; } END_OF_FUNC - '_script' => <<'END_OF_FUNC', sub _script { my ($self,$script) = @_; my (@result); + my (@scripts) = ref($script) eq 'ARRAY' ? @$script : ($script); foreach $script (@scripts) { my($src,$code,$language); @@ -1353,18 +1395,21 @@ sub _script { } else { ($src,$code,$language, $type) = ('',$script,'JavaScript', 'text/javascript'); } + + my $comment = '//'; # javascript by default + $comment = '#' if $type=~/perl|tcl/i; + $comment = "'" if $type=~/vbscript/i; + + my $cdata_start = "\n<!-- Hide script\n"; + $cdata_start .= "$comment<![CDATA[\n" if $XHTML; + my $cdata_end = $XHTML ? "\n$comment]]>" : $comment; + $cdata_end .= " End script hiding -->\n"; + my(@satts); push(@satts,'src'=>$src) if $src; push(@satts,'language'=>$language); push(@satts,'type'=>$type); - $code = "<!-- Hide script\n$code\n// End script hiding -->" - if $code && $type=~/javascript/i; - $code = "<!-- Hide script\n$code\n\# End script hiding -->" - if $code && $type=~/perl/i; - $code = "<!-- Hide script\n$code\n\# End script hiding -->" - if $code && $type=~/tcl/i; - $code = "<!-- Hide script\n$code\n' End script hiding -->" - if $code && $type=~/vbscript/i; + $code = "$cdata_start$code$cdata_end"; push(@result,script({@satts},$code || '')); } @result; @@ -1377,7 +1422,7 @@ END_OF_FUNC #### 'end_html' => <<'END_OF_FUNC', sub end_html { - return "</BODY></HTML>"; + return "</body></html>"; } END_OF_FUNC @@ -1396,9 +1441,9 @@ END_OF_FUNC sub isindex { my($self,@p) = self_or_default(@_); my($action,@other) = rearrange([ACTION],@p); - $action = qq/ACTION="$action"/ if $action; + $action = qq/action="$action"/ if $action; my($other) = @other ? " @other" : ''; - return "<ISINDEX $action$other>"; + return $XHTML ? "<isindex $action$other />" : "<isindex $action$other>"; } END_OF_FUNC @@ -1416,13 +1461,16 @@ sub startform { my($method,$action,$enctype,@other) = rearrange([METHOD,ACTION,ENCTYPE],@p); - $method = $method || 'POST'; + $method = lc($method) || 'post'; $enctype = $enctype || &URL_ENCODED; - $action = $action ? qq/ACTION="$action"/ : $method eq 'GET' ? - 'ACTION="'.$self->script_name.'"' : ''; + unless (defined $action) { + $action = $self->url(-absolute=>1,-path=>1); + $action .= "?$ENV{QUERY_STRING}" if $ENV{QUERY_STRING}; + } + $action = qq(action="$action"); my($other) = @other ? " @other" : ''; $self->{'.parametersToAdd'}={}; - return qq/<FORM METHOD="$method" $action ENCTYPE="$enctype"$other>\n/; + return qq/<form method="$method" $action enctype="$enctype"$other>\n/; } END_OF_FUNC @@ -1465,10 +1513,10 @@ END_OF_FUNC sub endform { my($self,@p) = self_or_default(@_); if ( $NOSTICKY ) { - return wantarray ? ("</FORM>") : "\n</FORM>"; + return wantarray ? ("</form>") : "\n</form>"; } else { - return wantarray ? ($self->get_fields,"</FORM>") : - $self->get_fields ."\n</FORM>"; + return wantarray ? ($self->get_fields,"</form>") : + $self->get_fields ."\n</form>"; } } END_OF_FUNC @@ -1492,15 +1540,16 @@ sub _textfield { my $current = $override ? $default : (defined($self->param($name)) ? $self->param($name) : $default); - $current = defined($current) ? $self->escapeHTML($current) : ''; + $current = defined($current) ? $self->escapeHTML($current,1) : ''; $name = defined($name) ? $self->escapeHTML($name) : ''; - my($s) = defined($size) ? qq/ SIZE=$size/ : ''; - my($m) = defined($maxlength) ? qq/ MAXLENGTH=$maxlength/ : ''; + my($s) = defined($size) ? qq/ size=$size/ : ''; + my($m) = defined($maxlength) ? qq/ maxlength=$maxlength/ : ''; my($other) = @other ? " @other" : ''; # this entered at cristy's request to fix problems with file upload fields # and WebTV -- not sure it won't break stuff - my($value) = $current ne '' ? qq(VALUE="$current") : ''; - return qq/<INPUT TYPE="$tag" NAME="$name" $value$s$m$other>/; + my($value) = $current ne '' ? qq(value="$current") : ''; + return $XHTML ? qq(<input type="$tag" name="$name" $value$s$m$other />) + : qq/<input type="$tag" name="$name" $value$s$m$other>/; } END_OF_FUNC @@ -1578,10 +1627,10 @@ sub textarea { $name = defined($name) ? $self->escapeHTML($name) : ''; $current = defined($current) ? $self->escapeHTML($current) : ''; - my($r) = $rows ? " ROWS=$rows" : ''; - my($c) = $cols ? " COLS=$cols" : ''; + my($r) = $rows ? " rows=$rows" : ''; + my($c) = $cols ? " cols=$cols" : ''; my($other) = @other ? " @other" : ''; - return qq{<TEXTAREA NAME="$name"$r$c$other>$current</TEXTAREA>}; + return qq{<textarea name="$name"$r$c$other>$current</textarea>}; } END_OF_FUNC @@ -1604,17 +1653,18 @@ sub button { [ONCLICK,SCRIPT]],@p); $label=$self->escapeHTML($label); - $value=$self->escapeHTML($value); + $value=$self->escapeHTML($value,1); $script=$self->escapeHTML($script); my($name) = ''; $name = qq/ NAME="$label"/ if $label; $value = $value || $label; my($val) = ''; - $val = qq/ VALUE="$value"/ if $value; - $script = qq/ ONCLICK="$script"/ if $script; + $val = qq/ value="$value"/ if $value; + $script = qq/ onclick="$script"/ if $script; my($other) = @other ? " @other" : ''; - return qq/<INPUT TYPE="button"$name$val$script$other>/; + return $XHTML ? qq(<input type="button"$name$val$script$other />) + : qq/<input type="button"$name$val$script$other>/; } END_OF_FUNC @@ -1635,15 +1685,16 @@ sub submit { my($label,$value,@other) = rearrange([NAME,[VALUE,LABEL]],@p); $label=$self->escapeHTML($label); - $value=$self->escapeHTML($value); + $value=$self->escapeHTML($value,1); - my($name) = ' NAME=".submit"' unless $NOSTICKY; - $name = qq/ NAME="$label"/ if defined($label); + my($name) = ' name=".submit"' unless $NOSTICKY; + $name = qq/ name="$label"/ if defined($label); $value = defined($value) ? $value : $label; my($val) = ''; - $val = qq/ VALUE="$value"/ if defined($value); + $val = qq/ value="$value"/ if defined($value); my($other) = @other ? " @other" : ''; - return qq/<INPUT TYPE="submit"$name$val$other>/; + return $XHTML ? qq(<input type="submit"$name$val$other />) + : qq/<input type="submit"$name$val$other>/; } END_OF_FUNC @@ -1660,9 +1711,10 @@ sub reset { my($self,@p) = self_or_default(@_); my($label,@other) = rearrange([NAME],@p); $label=$self->escapeHTML($label); - my($value) = defined($label) ? qq/ VALUE="$label"/ : ''; + my($value) = defined($label) ? qq/ value="$label"/ : ''; my($other) = @other ? " @other" : ''; - return qq/<INPUT TYPE="reset"$value$other>/; + return $XHTML ? qq(<input type="reset"$value$other />) + : qq/<input type="reset"$value$other>/; } END_OF_FUNC @@ -1684,11 +1736,12 @@ sub defaults { my($label,@other) = rearrange([[NAME,VALUE]],@p); - $label=$self->escapeHTML($label); + $label=$self->escapeHTML($label,1); $label = $label || "Defaults"; - my($value) = qq/ VALUE="$label"/; + my($value) = qq/ value="$label"/; my($other) = @other ? " @other" : ''; - return qq/<INPUT TYPE="submit" NAME=".defaults"$value$other>/; + return $XHTML ? qq(<input type="submit" name=".defaults"$value$other />) + : qq/<input type="submit" NAME=".defaults"$value$other>/; } END_OF_FUNC @@ -1726,17 +1779,18 @@ sub checkbox { if (!$override && ($self->{'.fieldnames'}->{$name} || defined $self->param($name))) { - $checked = grep($_ eq $value,$self->param($name)) ? ' CHECKED' : ''; + $checked = grep($_ eq $value,$self->param($name)) ? ' checked' : ''; } else { - $checked = $checked ? ' CHECKED' : ''; + $checked = $checked ? qq/ checked/ : ''; } my($the_label) = defined $label ? $label : $name; $name = $self->escapeHTML($name); - $value = $self->escapeHTML($value); + $value = $self->escapeHTML($value,1); $the_label = $self->escapeHTML($the_label); my($other) = @other ? " @other" : ''; $self->register_parameter($name); - return qq{<INPUT TYPE="checkbox" NAME="$name" VALUE="$value"$checked$other>$the_label}; + return $XHTML ? qq{<input type="checkbox" name="$name" value="$value"$checked$other />$the_label} + : qq{<input type="checkbox" name="$name" value="$value"$checked$other>$the_label}; } END_OF_FUNC @@ -1777,7 +1831,12 @@ sub checkbox_group { my(%checked) = $self->previous_or_default($name,$defaults,$override); - $break = $linebreak ? "<BR>" : ''; + if ($linebreak) { + $break = $XHTML ? "<br />" : "<br>"; + } + else { + $break = ''; + } $name=$self->escapeHTML($name); # Create the elements @@ -1787,15 +1846,16 @@ sub checkbox_group { my($other) = @other ? " @other" : ''; foreach (@values) { - $checked = $checked{$_} ? ' CHECKED' : ''; + $checked = $checked{$_} ? qq/ checked/ : ''; $label = ''; unless (defined($nolabels) && $nolabels) { $label = $_; $label = $labels->{$_} if defined($labels) && defined($labels->{$_}); $label = $self->escapeHTML($label); } - $_ = $self->escapeHTML($_); - push(@elements,qq/<INPUT TYPE="checkbox" NAME="$name" VALUE="$_"$checked$other>${label}${break}/); + $_ = $self->escapeHTML($_,1); + push(@elements,$XHTML ? qq(<input type="checkbox" name="$name" value="$_"$checked$other />${label}${break}) + : qq/<input type="checkbox" name="$name" value="$_"$checked$other>${label}${break}/); } $self->register_parameter($name); return wantarray ? @elements : join(' ',@elements) @@ -1807,32 +1867,34 @@ END_OF_FUNC # Escape HTML -- used internally 'escapeHTML' => <<'END_OF_FUNC', sub escapeHTML { - my ($self,$toencode) = self_or_default(@_); - return undef unless defined($toencode); - return $toencode if ref($self) && $self->{'dontescape'}; - if (uc $self->{'.charset'} eq 'ISO-8859-1') { - # fix non-compliant bug in IE and Netscape - $toencode =~ s{(.)}{ - if ($1 eq '<') { '<' } - elsif ($1 eq '>') { '>' } - elsif ($1 eq '&') { '&' } - elsif ($1 eq '"') { '"' } - elsif ($1 eq "\x8b") { '‹' } - elsif ($1 eq "\x9b") { '›' } - else { $1 } - }gsex; - } else { - $toencode =~ s/(.)/'&#'.ord($1).';'/gsex; - } - return $toencode; + my ($self,$toencode,$newlinestoo) = CGI::self_or_default(@_); + return undef unless defined($toencode); + return $toencode if ref($self) && $self->{'dontescape'}; + $toencode =~ s{&}{&}gso; + $toencode =~ s{<}{<}gso; + $toencode =~ s{>}{>}gso; + $toencode =~ s{"}{"}gso; + my $latin = uc $self->{'.charset'} eq 'ISO-8859-1' || + uc $self->{'.charset'} eq 'WINDOWS-1252'; + if ($latin) { # bug in some browsers + $toencode =~ s{\x8b}{‹}gso; + $toencode =~ s{\x9b}{›}gso; + if (defined $newlinestoo && $newlinestoo) { + $toencode =~ s{\012}{ }gso; + $toencode =~ s{\015}{ }gso; + } + } + return $toencode; } END_OF_FUNC # unescape HTML -- used internally 'unescapeHTML' => <<'END_OF_FUNC', sub unescapeHTML { - my $string = ref($_[0]) ? $_[1] : $_[0]; + my ($self,$string) = CGI::self_or_default(@_); return undef unless defined($string); + my $latin = defined $self->{'.charset'} ? $self->{'.charset'} =~ /^(ISO-8859-1|WINDOWS-1252)$/i + : 1; # thanks to Randal Schwartz for the correct solution to this one $string=~ s[&(.*?);]{ local $_ = $1; @@ -1840,8 +1902,8 @@ sub unescapeHTML { /^quot$/i ? '"' : /^gt$/i ? ">" : /^lt$/i ? "<" : - /^#(\d+)$/ ? chr($1) : - /^#x([0-9a-f]+)$/i ? chr(hex($1)) : + /^#(\d+)$/ && $latin ? chr($1) : + /^#x([0-9a-f]+)$/i && $latin ? chr(hex($1)) : $_ }gex; return $string; @@ -1852,6 +1914,8 @@ END_OF_FUNC '_tableize' => <<'END_OF_FUNC', sub _tableize { my($rows,$columns,$rowheaders,$colheaders,@elements) = @_; + $rowheaders = [] unless defined $rowheaders; + $colheaders = [] unless defined $colheaders; my($result); if (defined($columns)) { @@ -1862,23 +1926,23 @@ sub _tableize { } # rearrange into a pretty table - $result = "<TABLE>"; + $result = "<table>"; my($row,$column); unshift(@$colheaders,'') if @$colheaders && @$rowheaders; - $result .= "<TR>" if @{$colheaders}; + $result .= "<tr>" if @{$colheaders}; foreach (@{$colheaders}) { - $result .= "<TH>$_</TH>"; + $result .= "<th>$_</th>"; } for ($row=0;$row<$rows;$row++) { - $result .= "<TR>"; - $result .= "<TH>$rowheaders->[$row]</TH>" if @$rowheaders; + $result .= "<tr>"; + $result .= "<th>$rowheaders->[$row]</th>" if @$rowheaders; for ($column=0;$column<$columns;$column++) { - $result .= "<TD>" . $elements[$column*$rows + $row] . "</TD>" + $result .= "<td>" . $elements[$column*$rows + $row] . "</td>" if defined($elements[$column*$rows + $row]); } - $result .= "</TR>"; + $result .= "</tr>"; } - $result .= "</TABLE>"; + $result .= "</table>"; return $result; } END_OF_FUNC @@ -1927,16 +1991,23 @@ sub radio_group { my($other) = @other ? " @other" : ''; foreach (@values) { - my($checkit) = $checked eq $_ ? ' CHECKED' : ''; - my($break) = $linebreak ? '<BR>' : ''; + my($checkit) = $checked eq $_ ? qq/ checked/ : ''; + my($break); + if ($linebreak) { + $break = $XHTML ? "<br />" : "<br>"; + } + else { + $break = ''; + } my($label)=''; unless (defined($nolabels) && $nolabels) { $label = $_; $label = $labels->{$_} if defined($labels) && defined($labels->{$_}); - $label = $self->escapeHTML($label); + $label = $self->escapeHTML($label,1); } $_=$self->escapeHTML($_); - push(@elements,qq/<INPUT TYPE="radio" NAME="$name" VALUE="$_"$checkit$other>${label}${break}/); + push(@elements,$XHTML ? qq(<input type="radio" name="$name" value="$_"$checkit$other />${label}${break}) + : qq/<input type="radio" name="$name" value="$_"$checkit$other>${label}${break}/); } $self->register_parameter($name); return wantarray ? @elements : join(' ',@elements) @@ -1979,17 +2050,17 @@ sub popup_menu { my(@values); @values = $self->_set_values_and_labels($values,\$labels,$name); - $result = qq/<SELECT NAME="$name"$other>\n/; + $result = qq/<select name="$name"$other>\n/; foreach (@values) { - my($selectit) = defined($selected) ? ($selected eq $_ ? 'SELECTED' : '' ) : ''; + my($selectit) = defined($selected) ? ($selected eq $_ ? qq/selected/ : '' ) : ''; my($label) = $_; $label = $labels->{$_} if defined($labels) && defined($labels->{$_}); my($value) = $self->escapeHTML($_); - $label=$self->escapeHTML($label); - $result .= "<OPTION $selectit VALUE=\"$value\">$label\n"; + $label=$self->escapeHTML($label,1); + $result .= "<option $selectit value=\"$value\">$label</option>\n"; } - $result .= "</SELECT>\n"; + $result .= "</select>\n"; return $result; } END_OF_FUNC @@ -2028,21 +2099,21 @@ sub scrolling_list { $size = $size || scalar(@values); my(%selected) = $self->previous_or_default($name,$defaults,$override); - my($is_multiple) = $multiple ? ' MULTIPLE' : ''; - my($has_size) = $size ? " SIZE=$size" : ''; + my($is_multiple) = $multiple ? qq/ multiple/ : ''; + my($has_size) = $size ? qq/ size="$size"/: ''; my($other) = @other ? " @other" : ''; $name=$self->escapeHTML($name); - $result = qq/<SELECT NAME="$name"$has_size$is_multiple$other>\n/; + $result = qq/<select name="$name"$has_size$is_multiple$other>\n/; foreach (@values) { - my($selectit) = $selected{$_} ? 'SELECTED' : ''; + my($selectit) = $selected{$_} ? qq/selected/ : ''; my($label) = $_; $label = $labels->{$_} if defined($labels) && defined($labels->{$_}); $label=$self->escapeHTML($label); - my($value)=$self->escapeHTML($_); - $result .= "<OPTION $selectit VALUE=\"$value\">$label</OPTION>\n"; + my($value)=$self->escapeHTML($_,1); + $result .= "<option $selectit value=\"$value\">$label</option>\n"; } - $result .= "</SELECT>\n"; + $result .= "</select>\n"; $self->register_parameter($name); return $result; } @@ -2084,8 +2155,9 @@ sub hidden { $name=$self->escapeHTML($name); foreach (@value) { - $_ = defined($_) ? $self->escapeHTML($_) : ''; - push(@result,qq/<INPUT TYPE="hidden" NAME="$name" VALUE="$_">/); + $_ = defined($_) ? $self->escapeHTML($_,1) : ''; + push @result,$XHTMl ? qq(<input type="hidden" name="$name" value="$_" />) + : qq(<input type="hidden" name="$name" value="$_">); } return wantarray ? @result : join('',@result); } @@ -2107,10 +2179,11 @@ sub image_button { my($name,$src,$alignment,@other) = rearrange([NAME,SRC,ALIGN],@p); - my($align) = $alignment ? " ALIGN=\U$alignment" : ''; + my($align) = $alignment ? " align=\U$alignment" : ''; my($other) = @other ? " @other" : ''; $name=$self->escapeHTML($name); - return qq/<INPUT TYPE="image" NAME="$name" SRC="$src"$align$other>/; + return $XHTML ? qq(<input type="image" name="$name" src="$src"$align$other />) + : qq/<input type="image" name="$name" src="$src"$align$other>/; } END_OF_FUNC @@ -2145,26 +2218,28 @@ END_OF_FUNC 'url' => <<'END_OF_FUNC', sub url { my($self,@p) = self_or_default(@_); - my ($relative,$absolute,$full,$path_info,$query) = - rearrange(['RELATIVE','ABSOLUTE','FULL',['PATH','PATH_INFO'],['QUERY','QUERY_STRING']],@p); + my ($relative,$absolute,$full,$path_info,$query,$base) = + rearrange(['RELATIVE','ABSOLUTE','FULL',['PATH','PATH_INFO'],['QUERY','QUERY_STRING'],'BASE'],@p); my $url; - $full++ if !($relative || $absolute); + $full++ if $base || !($relative || $absolute); my $path = $self->path_info; - my $script_name; - if (exists($ENV{REQUEST_URI})) { - my $index; - $script_name = $ENV{REQUEST_URI}; - # strip query string - substr($script_name,$index) = '' if ($index = index($script_name,'?')) >= 0; - # and path - if (exists($ENV{PATH_INFO})) { - my $decoded_path = unescape($ENV{PATH_INFO}); - substr($script_name,$index) = '' if ($index = rindex($script_name,$decoded_path)) >= 0; - } - } else { - $script_name = $self->script_name; - } + my $script_name = $self->script_name; + +# If anybody knows why I ever wrote this please tell me! +# if (exists($ENV{REQUEST_URI})) { +# my $index; +# $script_name = $ENV{REQUEST_URI}; +# # strip query string +# substr($script_name,$index) = '' if ($index = index($script_name,'?')) >= 0; +# # and path +# if (exists($ENV{PATH_INFO})) { +# (my $encoded_path = $ENV{PATH_INFO}) =~ s!([^a-zA-Z0-9_./-])!uc sprintf("%%%02x",ord($1))!eg;; +# substr($script_name,$index) = '' if ($index = rindex($script_name,$encoded_path)) >= 0; +# } +# } else { +# $script_name = $self->script_name; +# } if ($full) { my $protocol = $self->protocol(); @@ -2179,12 +2254,14 @@ sub url { unless (lc($protocol) eq 'http' && $port == 80) || (lc($protocol) eq 'https' && $port == 443); } + return $url if $base; $url .= $script_name; } elsif ($relative) { ($url) = $script_name =~ m!([^/]+)$!; } elsif ($absolute) { $url = $script_name; } + $url .= $path if $path_info and defined $path; $url .= "?" . $self->query_string if $query and $self->query_string; $url = '' unless defined $url; @@ -2239,7 +2316,7 @@ sub cookie { push(@param,'-expires'=>$expires) if $expires; push(@param,'-secure'=>$secure) if $secure; - return CGI::Cookie->new(@param); + return new CGI::Cookie(@param); } END_OF_FUNC @@ -2338,6 +2415,9 @@ sub query_string { push(@pairs,"$eparam=$value"); } } + foreach (keys %{$self->{'.fieldnames'}}) { + push(@pairs,".cgifields=".escape("$_")); + } return join($USE_PARAM_SEMICOLONS ? ';' : '&',@pairs); } END_OF_FUNC @@ -2762,7 +2842,7 @@ sub read_multipart { my($param)= $header{'Content-Disposition'}=~/ name="?([^\";]*)"?/; # Bug: Netscape doesn't escape quotation marks in file names!!! - my($filename) = $header{'Content-Disposition'}=~/ filename="?([^\";]*)"?/; + my($filename) = $header{'Content-Disposition'}=~/ filename="?([^\"]*)"?/; # add this parameter to our list $self->add_parameter($param); @@ -2920,12 +3000,11 @@ END_OF_FUNC sub new { my($pack,$name,$file,$delete) = @_; require Fcntl unless defined &Fcntl::O_RDWR; - my $fv = ('Fh::' . ++$FH . quotemeta($name)); - warn unless *{$fv}; - my $ref = \*{$fv}; + my $fv = ++$FH . quotemeta($name); + my $ref = \*{"Fh::$fv"}; sysopen($ref,$file,Fcntl::O_RDWR()|Fcntl::O_CREAT()|Fcntl::O_EXCL(),0600) || return; unlink($file) if $delete; - CORE::delete $Fh::{$FH}; + CORE::delete $Fh::{$fv}; return bless $ref,$pack; } END_OF_FUNC @@ -3099,8 +3178,7 @@ sub read { die "Malformed multipart POST\n" unless ($start >= 0) || ($self->{LENGTH} > 0); # If the boundary begins the data, then skip past it - # and return undef. The +2 here is a fiendish plot to - # remove the CR/LF pair at the end of the boundary. + # and return undef. if ($start == 0) { # clear us out completely if we've hit the last boundary. @@ -3111,7 +3189,8 @@ sub read { } # just remove the boundary. - substr($self->{BUFFER},0,length($self->{BOUNDARY})+2)=''; + substr($self->{BUFFER},0,length($self->{BOUNDARY}))=''; + $self->{BUFFER} =~ s/^\012\015?//; return undef; } @@ -3678,13 +3757,13 @@ the keys are the names of the CGI parameters, and the values are the parameters' values. The Vars() method does this. Called in a scalar context, it returns the parameter list as a tied hash reference. Changing a key changes the value of the parameter in the underlying -CGI parameter list. Called in an array context, it returns the +CGI parameter list. Called in a list context, it returns the parameter list as an ordinary hash. This allows you to read the contents of the parameter list, but not to change it. When using this, the thing you must watch out for are multivalued CGI parameters. Because a hash cannot distinguish between scalar and -array context, multivalued parameters will be returned as a packed +list context, multivalued parameters will be returned as a packed string, separated by the "\0" (null) character. You must split this packed string in order to get at the individual values. This is the convention introduced long ago by Steve Brenner in his cgi-lib.pl @@ -3942,6 +4021,13 @@ have the hidden fields appear in the querystring in a GET method. For example, a search script generated this way will have a very nice url with search parameters for bookmarking. +=item -no_xhtml + +By default, CGI.pm versions 2.69 and higher emit XHTML +(http://www.w3.org/TR/xhtml1/). The -no_xhtml pragma disables this +feature. Thanks to Michalis Kabrianis <kabrianis@hellug.gr> for this +feature. + =item -nph This makes CGI.pm produce a header appropriate for an NPH (no @@ -4114,6 +4200,7 @@ pages. -expires=>'+3d', -cookie=>$cookie, -charset=>'utf-7', + -attachment=>'foo.gif', -Cost=>'$2.00'); header() returns the Content-type: header. You can provide your own @@ -4162,6 +4249,12 @@ The B<-charset> parameter can be used to control the character set sent to the browser. If not provided, defaults to ISO-8859-1. As a side effect, this sets the charset() method as well. +The B<-attachment> parameter can be used to turn the page into an +attachment. Instead of displaying the page, some browsers will prompt +the user to save it to disk. The value of the argument is the +suggested name for the saved file. In order for this to work, you may +have to set the B<-type> to "application/octet-stream". + =head2 GENERATING A REDIRECTION HEADER print $query->redirect('http://somewhere.else/in/movie/land'); @@ -4172,9 +4265,7 @@ time of day or the identity of the user. The redirect() function redirects the browser to a different URL. If you use redirection like this, you should B<not> print out a header as -well. As of version 2.0, we produce both the unofficial Location: -header and the official URI: header. This should satisfy most servers -and browsers. +well. One hint I can offer is that relative links may not work correctly when you generate a redirection to another document on your site. @@ -4210,10 +4301,11 @@ page's appearance and behavior. This method returns a canned HTML header and the opening <BODY> tag. All parameters are optional. In the named parameter form, recognized -parameters are -title, -author, -base, -xbase and -target (see below -for the explanation). Any additional parameters you provide, such as -the Netscape unofficial BGCOLOR attribute, are added to the <BODY> -tag. Additional parameters must be proceeded by a hyphen. +parameters are -title, -author, -base, -xbase, -dtd, -lang and -target +(see below for the explanation). Any additional parameters you +provide, such as the Netscape unofficial BGCOLOR attribute, are added +to the <BODY> tag. Additional parameters must be proceeded by a +hyphen. The argument B<-xbase> allows you to provide an HREF for the <BASE> tag different from the current location, as in @@ -4239,22 +4331,25 @@ into a series of header <META> tags that look something like this: <META NAME="keywords" CONTENT="pharaoh secret mummy"> <META NAME="description" CONTENT="copyright 1996 King Tut"> -There is no direct support for the HTTP-EQUIV type of <META> tag. -This is because you can modify the HTTP header directly with the -B<header()> method. For example, if you want to send the Refresh: -header, do it in the header() method: +To create an HTTP-EQUIV type of <META> tag, use B<-head>, described +below. + +The B<-style> argument is used to incorporate cascading stylesheets +into your code. See the section on CASCADING STYLESHEETS for more +information. - print $q->header(-Refresh=>'10; URL=http://www.capricorn.com'); +The B<-lang> argument is used to incorporate a language attribute into +the <HTML> tag. The default if not specified is "en-US" for US +English. For example: -The B<-style> tag is used to incorporate cascading stylesheets into -your code. See the section on CASCADING STYLESHEETS for more information. + print $q->header(-lang=>'fr-CA'); You can place other arbitrary HTML elements to the <HEAD> section with the B<-head> tag. For example, to place the rarely-used <LINK> element in the head section, use this: print start_html(-head=>Link({-rel=>'next', - -href=>'http://www.capricorn.com/s2.html'})); + -href=>'http://www.capricorn.com/s2.html'})); To incorporate multiple HTML elements into the <HEAD> section, just pass an array reference: @@ -4267,6 +4362,12 @@ array reference: ] ); +And here's how to create an HTTP-EQUIV <META> tag: + + print header(-head=>meta({-http_equiv => 'Content-Type', + -content => 'text/html'})) + + JAVASCRIPTING: The B<-script>, B<-noScript>, B<-onLoad>, B<-onMouseOver>, B<-onMouseOut> and B<-onUnload> parameters are used to add Netscape JavaScript calls to your pages. B<-script> should @@ -4423,6 +4524,7 @@ You can also retrieve the unprocessed query string with query_string(): $absolute_url = $query->url(-absolute=>1); $url_with_path = $query->url(-path_info=>1); $url_with_path_and_query = $query->url(-path_info=>1,-query=>1); + $netloc = $query->url(-base => 1); B<url()> returns the script's URL in a variety of formats. Called without any arguments, it returns the full form of the URL, including @@ -4464,6 +4566,10 @@ Append the query string to the URL. This can be combined with B<-full>, B<-absolute> or B<-relative>. B<-query_string> is provided as a synonym. +=item B<-base> + +Generate just the protocol and net location, as in http://www.foo.com:8000 + =back =head2 MIXING POST AND URL PARAMETERS @@ -4579,7 +4685,7 @@ element of the list. For example, here's one way to make an ordered list: print ul( - li({-type=>'disc'},['Sneezy','Doc','Sleepy','Happy']); + li({-type=>'disc'},['Sneezy','Doc','Sleepy','Happy']) ); This example will result in HTML output that looks like this: @@ -4793,7 +4899,7 @@ This is the older type of encoding used by all browsers prior to Netscape 2.0. It is compatible with many CGI scripts and is suitable for short fields containing text data. For your convenience, CGI.pm stores the name of this encoding -type in B<$CGI::URL_ENCODED>. +type in B<&CGI::URL_ENCODED>. =item B<multipart/form-data> @@ -4947,7 +5053,7 @@ recognized. See textfield(). filefield() will return a file upload field for Netscape 2.0 browsers. In order to take full advantage of this I<you must use the new multipart encoding scheme> for the form. You can do this either -by calling B<start_form()> with an encoding type of B<$CGI::MULTIPART>, +by calling B<start_form()> with an encoding type of B<&CGI::MULTIPART>, or by calling the new method B<start_multipart_form()> instead of vanilla B<start_form()>. @@ -5711,13 +5817,17 @@ To create multiple cookies, give header() an array reference: -value=>\%answers); print $query->header(-cookie=>[$cookie1,$cookie2]); -To retrieve a cookie, request it by name by calling cookie() -method without the B<-value> parameter: +To retrieve a cookie, request it by name by calling cookie() method +without the B<-value> parameter: use CGI; $query = new CGI; - %answers = $query->cookie(-name=>'answers'); - # $query->cookie('answers') will work too! + $riddle = $query->cookie('riddle_name'); + %answers = $query->cookie('answers'); + +Cookies created with a single scalar value, such as the "riddle_name" +cookie, will be returned in that form. Cookies with array and hash +values can also be retrieved. The cookie and CGI namespaces are separate. If you have a parameter named 'answers' and a cookie named 'answers', the values retrieved by @@ -5854,6 +5964,9 @@ http://www.w3.org/pub/WWW/TR/Wd-css-1.html for more information. ); print end_html; +Pass an array reference to B<-style> in order to incorporate multiple +stylesheets into your document. + =head1 DEBUGGING If you are running the script from the command line or in the perl @@ -5962,9 +6075,8 @@ like $query->user_agent(netscape); =item B<path_info()> Returns additional path information from the script URL. -E.G. fetching /cgi-bin/your_script/additional/stuff will -result in $query->path_info() returning -"additional/stuff". +E.G. fetching /cgi-bin/your_script/additional/stuff will result in +$query->path_info() returning "/additional/stuff". NOTE: The Microsoft Internet Information Server is broken with respect to additional path information. If @@ -6014,6 +6126,10 @@ name. When using virtual hosts, returns the name of the host that the browser attempted to contact +=item B<server_port ()> + +Return the port that the server is listening on. + =item B<server_software ()> Returns the server software and version number. diff --git a/lib/CGI/Carp.pm b/lib/CGI/Carp.pm index 0a5c1218ee..5aea1985ec 100644 --- a/lib/CGI/Carp.pm +++ b/lib/CGI/Carp.pm @@ -142,6 +142,33 @@ of the error message that caused the script to die. Example: In order to correctly intercept compile-time errors, you should call set_message() from within a BEGIN{} block. +=head1 MAKING WARNINGS APPEAR AS HTML COMMENTS + +It is now also possible to make non-fatal errors appear as HTML +comments embedded in the output of your program. To enable this +feature, export the new "warningsToBrowser" subroutine. Since sending +warnings to the browser before the HTTP headers have been sent would +cause an error, any warnings are stored in an internal buffer until +you call the warningsToBrowser() subroutine with a true argument: + + use CGI::Carp qw(fatalsToBrowser warningsToBrowser); + use CGI qw(:standard); + print header(); + warningsToBrowser(1); + +You may also give a false argument to warningsToBrowser() to prevent +warnings from being sent to the browser while you are printing some +content where HTML comments are not allowed: + + warningsToBrowser(0); # disable warnings + print "<SCRIPT type=javascript><!--\n"; + print_some_javascript_code(); + print "//--></SCRIPT>\n"; + warningsToBrowser(1); # re-enable warnings + +Note: In this respect warningsToBrowser() differs fundamentally from +fatalsToBrowser(), which you should never call yourself! + =head1 CHANGE LOG 1.05 carpout() added and minor corrections by Marc Hedlund @@ -166,7 +193,11 @@ set_message() from within a BEGIN{} block. 1.12 Changed die() on line 217 to CORE::die to avoid B<-w> warning. 1.13 Added cluck() to make the module orthogonal with Carp. - More mod_perl related fixes. + More mod_perl related fixes. + +1.20 Patch from Ilmari Karonen (perl@itz.pp.sci.fi): Added + warningsToBrowser(). Replaced <CODE> tags with <PRE> in + fatalsToBrowser() output. =head1 AUTHORS @@ -190,18 +221,11 @@ use Carp; @ISA = qw(Exporter); @EXPORT = qw(confess croak carp); -@EXPORT_OK = qw(carpout fatalsToBrowser wrap set_message cluck); - -BEGIN { - $] >= 5.005 - ? eval q#sub ineval { defined $^S ? $^S : _longmess() =~ /eval [\{\']/m }# - : eval q#sub ineval { _longmess() =~ /eval [\{\']/m }#; - $@ and die; -} +@EXPORT_OK = qw(carpout fatalsToBrowser warningsToBrowser wrap set_message cluck); $main::SIG{__WARN__}=\&CGI::Carp::warn; $main::SIG{__DIE__}=\&CGI::Carp::die; -$CGI::Carp::VERSION = '1.16'; +$CGI::Carp::VERSION = '1.20'; $CGI::Carp::CUSTOM_MSG = undef; # fancy import routine detects and handles 'errorWrap' specially. @@ -210,6 +234,7 @@ sub import { my(%routines); grep($routines{$_}++,@_,@EXPORT); $WRAP++ if $routines{'fatalsToBrowser'} || $routines{'wrap'}; + $WARN++ if $routines{'warningsToBrowser'}; my($oldlevel) = $Exporter::ExportLevel; $Exporter::ExportLevel = 1; Exporter::import($pkg,keys %routines); @@ -223,7 +248,7 @@ sub realdie { CORE::die(@_); } sub id { my $level = shift; my($pack,$file,$line,$sub) = caller($level); - my($id) = $file=~m|([^/]+)\z|; + my($id) = $file=~m|([^/]+)$|; return ($file,$line,$id); } @@ -235,7 +260,7 @@ sub stamp { $id = $file; ($pack,$file) = caller($frame++); } until !$file; - ($id) = $id=~m|([^/]+)\z|; + ($id) = $id=~m|([^/]+)$|; return "[$time] $id: "; } @@ -243,23 +268,40 @@ sub warn { my $message = shift; my($file,$line,$id) = id(1); $message .= " at $file line $line.\n" unless $message=~/\n$/; + _warn($message) if $WARN; my $stamp = stamp; $message=~s/^/$stamp/gm; realwarn $message; } +sub _warn { + my $msg = shift; + if ($EMIT_WARNINGS) { + # We need to mangle the message a bit to make it a valid HTML + # comment. This is done by substituting similar-looking ISO + # 8859-1 characters for <, > and -. This is a hack. + $msg =~ tr/<>-/\253\273\255/; + chomp $msg; + print STDOUT "<!-- warning: $msg -->\n"; + } else { + push @WARNINGS, $msg; + } +} + +sub ineval { _longmess() =~ /eval [\{\']/m } + # The mod_perl package Apache::Registry loads CGI programs by calling # eval. These evals don't count when looking at the stack backtrace. sub _longmess { my $message = Carp::longmess(); my $mod_perl = exists $ENV{MOD_PERL}; $message =~ s,eval[^\n]+Apache/Registry\.pm.*,,s if $mod_perl; - return( $message ); + return $message; } sub die { realdie @_ if ineval; - my $message = shift; + my ($message) = @_; my $time = scalar(localtime); my($file,$line,$id) = id(1); $message .= " at $file line $line." unless $message=~/\n$/; @@ -299,6 +341,11 @@ sub carpout { ( print SAVEERR "Unable to redirect STDERR: $!\n" and exit(1) ); } +sub warningsToBrowser { + $EMIT_WARNINGS = @_ ? shift : 1; + _warn(shift @WARNINGS) while $EMIT_WARNINGS and @WARNINGS; +} + # headers sub fatalsToBrowser { my($msg) = @_; @@ -318,6 +365,8 @@ END print STDOUT "Content-type: text/html\n\n" unless $mod_perl; + warningsToBrowser(1); # emit warnings before dying + if ($CUSTOM_MSG) { if (ref($CUSTOM_MSG) eq 'CODE') { &$CUSTOM_MSG($msg); # nicer to perl 5.003 users @@ -329,7 +378,7 @@ END my $mess = <<END; <H1>Software error:</H1> -<CODE>$msg</CODE> +<PRE>$msg</PRE> <P> $outer_message END diff --git a/lib/CGI/Cookie.pm b/lib/CGI/Cookie.pm index 9e5a14b47b..6737832080 100644 --- a/lib/CGI/Cookie.pm +++ b/lib/CGI/Cookie.pm @@ -40,17 +40,18 @@ sub raw_fetch { my %results; my($key,$value); - my(@pairs) = split("; ",$raw_cookie); + my(@pairs) = split("; ?",$raw_cookie); foreach (@pairs) { - if (/^([^=]+)=(.*)/) { - $key = $1; - $value = $2; - } - else { - $key = $_; - $value = ''; - } - $results{$key} = $value; + s/\s*(.*?)\s*/$1/; + if (/^([^=]+)=(.*)/) { + $key = $1; + $value = $2; + } + else { + $key = $_; + $value = ''; + } + $results{$key} = $value; } return \%results unless wantarray; return %results; @@ -60,17 +61,18 @@ sub parse { my ($self,$raw_cookie) = @_; my %results; - my(@pairs) = split("; ",$raw_cookie); + my(@pairs) = split("; ?",$raw_cookie); foreach (@pairs) { - my($key,$value) = split("="); - my(@values) = map unescape($_),split('&',$value); - $key = unescape($key); - # Some foreign cookies are not in name=value format, so ignore - # them. - next if !defined($value); - # A bug in Netscape can cause several cookies with same name to - # appear. The FIRST one in HTTP_COOKIE is the most recent version. - $results{$key} ||= $self->new(-name=>$key,-value=>\@values); + s/\s*(.*?)\s*/$1/; + my($key,$value) = split("="); + my(@values) = map unescape($_),split('&',$value); + $key = unescape($key); + # Some foreign cookies are not in name=value format, so ignore + # them. + next if !defined($value); + # A bug in Netscape can cause several cookies with same name to + # appear. The FIRST one in HTTP_COOKIE is the most recent version. + $results{$key} ||= $self->new(-name=>$key,-value=>\@values); } return \%results unless wantarray; return %results; @@ -382,7 +384,7 @@ Get or set the cookie's value. Example: $value = $c->value; @new_value = $c->value(['a','b','c','d']); -B<value()> is context sensitive. In an array context it will return +B<value()> is context sensitive. In a list context it will return the current value of the cookie as an array. In a scalar context it will return the B<first> value of a multivalued cookie. diff --git a/lib/CGI/Pretty.pm b/lib/CGI/Pretty.pm index 20173f9acf..d348807d68 100644 --- a/lib/CGI/Pretty.pm +++ b/lib/CGI/Pretty.pm @@ -72,7 +72,7 @@ sub _make_tag_func { \$attr = " \@attr" if \@attr; } - my(\$tag,\$untag) = ("\U<$tagname\E\$attr>","\U</$tagname>\E"); + my(\$tag,\$untag) = ("\L<$tagname\E\$attr>","\L</$tagname>\E"); return \$tag unless \@_; my \@result; @@ -128,7 +128,7 @@ sub initialize_globals { $CGI::Pretty::LINEBREAK = "\n"; # These tags are not prettify'd. - @CGI::Pretty::AS_IS = qw( A PRE CODE SCRIPT TEXTAREA ); + @CGI::Pretty::AS_IS = qw( a pre code script textarea ); 1; } diff --git a/lib/CGI/Push.pm b/lib/CGI/Push.pm index 6b8e012a15..83002f2336 100644 --- a/lib/CGI/Push.pm +++ b/lib/CGI/Push.pm @@ -16,7 +16,7 @@ package CGI::Push; # The most recent version and complete docs are available at: # http://stein.cshl.org/WWW/software/CGI/ -$CGI::Push::VERSION='1.02'; +$CGI::Push::VERSION='1.03'; use CGI; use CGI::Util 'rearrange'; @ISA = ('CGI'); @@ -60,7 +60,7 @@ sub do_push { while (1) { last unless (@contents = &$callback($self,++$COUNTER)) && defined($contents[0]); print "Content-type: ${type}$CGI::CRLF$CGI::CRLF" - unless $type eq 'dynamic'; + unless $type =~ /^dynamic|heterogeneous$/i; print @contents,"$CGI::CRLF"; print "${boundary}$CGI::CRLF"; do_sleep($self->push_delay()) if $self->push_delay(); diff --git a/lib/CGI/Util.pm b/lib/CGI/Util.pm index cb6dd8a9e2..ac7376d41a 100644 --- a/lib/CGI/Util.pm +++ b/lib/CGI/Util.pm @@ -1,13 +1,5 @@ package CGI::Util; -=pod - -=head1 NAME - -CGI::Util - various utilities - -=cut - use strict; use vars '$VERSION','@EXPORT_OK','@ISA','$EBCDIC','@A2E'; require Exporter; @@ -56,14 +48,14 @@ sub rearrange { my ($i,%pos); $i = 0; foreach (@$order) { - foreach (ref($_) eq 'ARRAY' ? @$_ : $_) { $pos{$_} = $i; } + foreach (ref($_) eq 'ARRAY' ? @$_ : $_) { $pos{lc($_)} = $i; } $i++; } my (@result,%leftover); $#result = $#$order; # preextend while (@param) { - my $key = uc(shift(@param)); + my $key = lc(shift(@param)); $key =~ s/^\-//; if (exists $pos{$key}) { $result[$pos{$key}] = shift(@param); @@ -72,7 +64,7 @@ sub rearrange { } } - push (@result,make_attributes(\%leftover)) if %leftover; + push (@result,make_attributes(\%leftover,1)) if %leftover; @result; } @@ -84,7 +76,7 @@ sub make_attributes { foreach (keys %{$attr}) { my($key) = $_; $key=~s/^\-//; # get rid of initial - if present - $key=~tr/a-z_/A-Z-/; # parameters are upper case, use dashes + $key=~tr/A-Z_/a-z-/; # parameters are lower case, use dashes my $value = $escape ? simple_escape($attr->{$_}) : $attr->{$_}; push(@att,defined($attr->{$_}) ? qq/$key="$value"/ : qq/$key/); } @@ -92,16 +84,14 @@ sub make_attributes { } sub simple_escape { - return unless defined (my $toencode = shift); - $toencode =~ s{(.)}{ - if ($1 eq '<') { '<' } - elsif ($1 eq '>') { '>' } - elsif ($1 eq '&') { '&' } - elsif ($1 eq '"') { '"' } - elsif ($1 eq "\x8b") { '‹' } - elsif ($1 eq "\x9b") { '›' } - else { $1 } - }gsex; + return unless defined(my $toencode = shift); + $toencode =~ s{&}{&}gso; + $toencode =~ s{<}{<}gso; + $toencode =~ s{>}{>}gso; + $toencode =~ s{\"}{"}gso; +# Doesn't work. Can't work. forget it. +# $toencode =~ s{\x8b}{‹}gso; +# $toencode =~ s{\x9b}{›}gso; $toencode; } diff --git a/lib/CGI/eg/RunMeFirst b/lib/CGI/eg/RunMeFirst new file mode 100755 index 0000000000..018b11b718 --- /dev/null +++ b/lib/CGI/eg/RunMeFirst @@ -0,0 +1,36 @@ +#!/usr/local/bin/perl + +# Make a world-writeable directory for saving state. +$ww = 'WORLD_WRITABLE'; +unless (-w $ww) { + $u = umask 0; + mkdir $ww, 0777; + umask $u; +} + +# Decode the sample image. +for $uu (<*.uu>) { + unless (open UU, "<$uu") { warn "Can't open $uu: $!\n"; next } + while (<UU>) { + chomp; + if (/^begin\s+\d+\s+(.+)$/) { + $bin = $1; + last; + } + } + unless (open BIN, "> $bin") { warn "Can't create $bin: $!\n"; next } + binmode BIN; + while (<UU>) { + chomp; + last if /^end/; + print BIN unpack "u", $_; + } + close BIN; + close UU; +} + +# Create symlinks from *.txt to *.cgi for documentation purposes. +foreach (<*.cgi>) { + ($target = $_) =~ s/cgi$/txt/i; + symlink $_, $target unless -e $target; +} diff --git a/lib/CGI/eg/caution.xbm b/lib/CGI/eg/caution.xbm new file mode 100644 index 0000000000..87fcdbef8a --- /dev/null +++ b/lib/CGI/eg/caution.xbm @@ -0,0 +1,12 @@ +#define caution_width 32 +#define caution_height 32 +static char caution_bits[] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0x10,0x01, + 0x00,0x00,0x08,0x07,0x00,0x00,0x08,0x0e,0x00,0x00,0x04,0x0e,0x00,0x00,0x04, + 0x1c,0x00,0x00,0x02,0x1c,0x00,0x00,0xe2,0x38,0x00,0x00,0xf1,0x39,0x00,0x00, + 0xf1,0x71,0x00,0x80,0xf0,0x71,0x00,0x80,0xf0,0xe1,0x00,0x40,0xf0,0xe1,0x00, + 0x40,0xf0,0xc1,0x01,0x20,0xf0,0xc1,0x01,0x20,0xf0,0x81,0x03,0x10,0xe0,0x80, + 0x03,0x10,0xe0,0x00,0x07,0x08,0xe0,0x00,0x07,0x08,0xe0,0x00,0x0e,0x04,0x00, + 0x00,0x0e,0x04,0xe0,0x00,0x1c,0x02,0xf0,0x01,0x1c,0x02,0xf0,0x01,0x38,0x01, + 0xe0,0x00,0x38,0x01,0x00,0x00,0x70,0x01,0x00,0x00,0x70,0xff,0xff,0xff,0x7f, + 0xf8,0xff,0xff,0x3f,0x00,0x00,0x00,0x00}; diff --git a/lib/CGI/eg/clickable_image.cgi b/lib/CGI/eg/clickable_image.cgi new file mode 100644 index 0000000000..81daf09690 --- /dev/null +++ b/lib/CGI/eg/clickable_image.cgi @@ -0,0 +1,26 @@ +#!/usr/local/bin/perl + +use CGI; +$query = new CGI; +print $query->header; +print $query->start_html("A Clickable Image"); +print <<END; +<H1>A Clickable Image</H1> +</A> +END +print "Sorry, this isn't very exciting!\n"; + +print $query->startform; +print $query->image_button('picture',"./wilogo.gif"); +print "Give me a: ",$query->popup_menu('letter',['A','B','C','D','E','W']),"\n"; # +print "<P>Magnification: ",$query->radio_group('magnification',['1X','2X','4X','20X']),"\n"; +print "<HR>\n"; + +if ($query->param) { + print "<P>Magnification, <EM>",$query->param('magnification'),"</EM>\n"; + print "<P>Selected Letter, <EM>",$query->param('letter'),"</EM>\n"; + ($x,$y) = ($query->param('picture.x'),$query->param('picture.y')); + print "<P>Selected Position <EM>($x,$y)</EM>\n"; +} + +print $query->end_html; diff --git a/lib/CGI/eg/cookie.cgi b/lib/CGI/eg/cookie.cgi new file mode 100644 index 0000000000..98adda196e --- /dev/null +++ b/lib/CGI/eg/cookie.cgi @@ -0,0 +1,88 @@ +#!/usr/local/bin/perl + +use CGI qw(:standard); + +@ANIMALS=sort qw/lion tiger bear pig porcupine ferret zebra gnu ostrich + emu moa goat weasel yak chicken sheep hyena dodo lounge-lizard + squirrel rat mouse hedgehog racoon baboon kangaroo hippopotamus + giraffe/; + +# Recover the previous animals from the magic cookie. +# The cookie has been formatted as an associative array +# mapping animal name to the number of animals. +%zoo = cookie('animals'); + +# Recover the new animal(s) from the parameter 'new_animal' +@new = param('new_animals'); + +# If the action is 'add', then add new animals to the zoo. Otherwise +# delete them. +foreach (@new) { + if (param('action') eq 'Add') { + $zoo{$_}++; + } elsif (param('action') eq 'Delete') { + $zoo{$_}-- if $zoo{$_}; + delete $zoo{$_} unless $zoo{$_}; + } +} + +# Add new animals to old, and put them in a cookie +$the_cookie = cookie(-name=>'animals', + -value=>\%zoo, + -expires=>'+1h'); + +# Print the header, incorporating the cookie and the expiration date... +print header(-cookie=>$the_cookie); + +# Now we're ready to create our HTML page. +print start_html('Animal crackers'); + +print <<EOF; +<h1>Animal Crackers</h1> +Choose the animals you want to add to the zoo, and click "add". +Come back to this page any time within the next hour and the list of +animals in the zoo will be resurrected. You can even quit Netscape +completely! +<p> +Try adding the same animal several times to the list. Does this +remind you vaguely of a shopping cart? +<p> +<em>This script only works with Netscape browsers</em> +<p> +<center> +<table border> +<tr><th>Add/Delete<th>Current Contents +EOF + ; + +print "<tr><td>",start_form; +print scrolling_list(-name=>'new_animals', + -values=>[@ANIMALS], + -multiple=>1, + -override=>1, + -size=>10),"<br>"; +print submit(-name=>'action',-value=>'Delete'), + submit(-name=>'action',-value=>'Add'); +print end_form; + +print "<td>"; +if (%zoo) { # make a table + print "<ul>\n"; + foreach (sort keys %zoo) { + print "<li>$zoo{$_} $_\n"; + } + print "</ul>\n"; +} else { + print "<strong>The zoo is empty.</strong>\n"; +} +print "</table></center>"; + +print <<EOF; +<hr> +<ADDRESS>Lincoln D. Stein</ADDRESS><BR> +<A HREF="./">More Examples</A> +EOF + ; +print end_html; + + diff --git a/lib/CGI/eg/crash.cgi b/lib/CGI/eg/crash.cgi new file mode 100644 index 0000000000..64f03c7b3d --- /dev/null +++ b/lib/CGI/eg/crash.cgi @@ -0,0 +1,6 @@ +#!/usr/local/bin/perl + +use CGI::Carp qw(fatalsToBrowser); + +# This line invokes a fatal error message at compile time. +foo bar baz; diff --git a/lib/CGI/eg/customize.cgi b/lib/CGI/eg/customize.cgi new file mode 100644 index 0000000000..c1c8187514 --- /dev/null +++ b/lib/CGI/eg/customize.cgi @@ -0,0 +1,92 @@ +#!/usr/local/bin/perl + +use CGI qw(:standard :html3); + +# Some constants to use in our form. +@colors=qw/aqua black blue fuschia gray green lime maroon navy olive + purple red silver teal white yellow/; +@sizes=("<default>",1..7); + +# recover the "preferences" cookie. +%preferences = cookie('preferences'); + +# If the user wants to change the background color or her +# name, they will appear among our CGI parameters. +foreach ('text','background','name','size') { + $preferences{$_} = param($_) || $preferences{$_}; +} + +# Set some defaults +$preferences{'background'} = $preferences{'background'} || 'silver'; +$preferences{'text'} = $preferences{'text'} || 'black'; + +# Refresh the cookie so that it doesn't expire. This also +# makes any changes the user made permanent. +$the_cookie = cookie(-name=>'preferences', + -value=>\%preferences, + -expires=>'+30d'); +print header(-cookie=>$the_cookie); + +# Adjust the title to incorporate the user's name, if provided. +$title = $preferences{'name'} ? + "Welcome back, $preferences{name}!" : "Customizable Page"; + +# Create the HTML page. We use several of Netscape's +# extended tags to control the background color and the +# font size. It's safe to use Netscape features here because +# cookies don't work anywhere else anyway. +print start_html(-title=>$title, + -bgcolor=>$preferences{'background'}, + -text=>$preferences{'text'} + ); + +print basefont({SIZE=>$preferences{size}}) if $preferences{'size'} > 0; + +print h1($title),<<END; +You can change the appearance of this page by submitting +the fill-out form below. If you return to this page any time +within 30 days, your preferences will be restored. +END + ; + +# Create the form +print hr(), + start_form, + + "Your first name: ", + textfield(-name=>'name', + -default=>$preferences{'name'}, + -size=>30),br, + + table( + TR( + td("Preferred"), + td("Page color:"), + td(popup_menu(-name=>'background', + -values=>\@colors, + -default=>$preferences{'background'}) + ), + ), + TR( + td(''), + td("Text color:"), + td(popup_menu(-name=>'text', + -values=>\@colors, + -default=>$preferences{'text'}) + ) + ), + TR( + td(''), + td("Font size:"), + td(popup_menu(-name=>'size', + -values=>\@sizes, + -default=>$preferences{'size'}) + ) + ) + ), + + submit(-label=>'Set preferences'), + hr; + +print a({HREF=>"/"},'Go to the home page'); +print end_html; diff --git a/lib/CGI/eg/diff_upload.cgi b/lib/CGI/eg/diff_upload.cgi new file mode 100644 index 0000000000..913f9ca179 --- /dev/null +++ b/lib/CGI/eg/diff_upload.cgi @@ -0,0 +1,68 @@ +#!/usr/local/bin/perl + +$DIFF = "/usr/bin/diff"; +$PERL = "/usr/bin/perl"; + +use CGI qw(:standard); +use CGI::Carp; + +print header; +print start_html("File Diff Example"); +print "<strong>Version </strong>$CGI::VERSION<p>"; + +print <<EOF; +<H1>File Diff Example</H1> +Enter two files. When you press "submit" their diff will be +produced. +EOF + ; + +# Start a multipart form. +print start_multipart_form; +print "File #1:",filefield(-name=>'file1',-size=>45),"<BR>\n"; +print "File #2:",filefield(-name=>'file2',-size=>45),"<BR>\n"; +print "Diff type: ",radio_group(-name=>'type', + -value=>['context','normal']),"<br>\n"; +print reset,submit(-name=>'submit',-value=>'Do Diff'); +print endform; + +# Process the form if there is a file name entered +$file1 = param('file1'); +$file2 = param('file2'); + +$|=1; # for buffering +if ($file1 && $file2) { + $realfile1 = tmpFileName($file1); + $realfile2 = tmpFileName($file2); + print "<HR>\n"; + print "<H2>$file1 vs $file2</H2>\n"; + + print "<PRE>\n"; + $options = "-c" if param('type') eq 'context'; + system "$DIFF $options $realfile1 $realfile2 | $PERL -pe 's/>/>/g; s/</</g;'"; + close $file1; + close $file2; + print "</PRE>\n"; +} + +print <<EOF; +<HR> +<A HREF="../cgi_docs.html">CGI documentation</A> +<HR> +<ADDRESS> +<A HREF="/~lstein">Lincoln D. Stein</A> +</ADDRESS><BR> +Last modified 17 July 1996 +EOF + ; +print end_html; + +sub sanitize { + my $name = shift; + my($safe) = $name=~/([a-zA-Z0-9._~#,]+)/; + unless ($safe) { + print "<strong>$name is not a valid Unix filename -- sorry</strong>"; + exit 0; + } + return $safe; +} diff --git a/lib/CGI/eg/dna_small_gif.uu b/lib/CGI/eg/dna_small_gif.uu new file mode 100644 index 0000000000..1745c73761 --- /dev/null +++ b/lib/CGI/eg/dna_small_gif.uu @@ -0,0 +1,63 @@ +begin 444 dna_small.gif +M1TE&.#=A)0`J`.<``+9%&Y<R0M<F'ID\,!<07%<G1:P<0Q`A2Q`P;"L9/L$: +M,"480N5"&RL7:4LD0T,G144[7BHL2B4?3\0I+"</)BQ.9KD0/S878\96$Z\@ +M(:\1*RL:3L0W&QL?2#4?9>@_&A$_5<I"&C`A3*,3-A`//9X<)\@Q(L`@.#\E +M7K,R*R\T6)H++1L72T8=4207:T`G=JX..MD^&!$_;^)2$#T=7S`79AL7.A$3 +M1-=%#^,Q&QT_:C8=1!L86]\R#4M":4H76R,515HZ4"477G@T,J\;(X(@/$\7 +M."4A2N9;$"DZ6RL34-8I$34A73P86I84/\87%1`0/V,B2"0<0N!(%QPH91<6 +M2=!5$3(=73(E23`/,!L4.=$Z'-MH%>`Y#3$=2"=#59M((H88,GP\/]X^&+$R +M$"(79"DF33(93"$86=%;&"T4/=Y"&\\A$Q4Z4!`62*T4-Q4B9+X1*BH96SP? +M1<0D/3(F<-TM$!`=5:H.(!<64C$W?#8J3*`S(S<@3=8V%K$](QT.6Q`43AL4 +M1:$D,;<0,\X9&,8])\@=$A(U244S5U,D6Q<V;]\I&&<K/8!*.R044>=/#YH> +M08$I1B,09S$35R(:4C0?<19$7<D^#Q`>5!()-;4702M`=;56)A`25,0K%"X< +M83`N>K`H'HDS*1`40,M&%!<@7M,_$A<N2L)%%18E4^<M$A@=5=0Q$Q`E311* +M2L8E&2D<7Q\A7P\80B48,%E/8[1-'J4/-"H<3"PA1$,T?Q<15R44,R,A4AL/ +M,^M4%2,07!L+,[\[&!P4520I:C(9.=54$[,2/M-)&RP?7M=(%1$J8<-,%2H@ +M4B,=51436^]2"!X<1A<44RD</>E+#%0>0S0/:4PJ9I,F-6DG420Y<"@09R,0 +M-V$R/R4M<:\H0!(H410Z:AT27&4Q9"L@;=1%(MA-$M5+#RL4+M8Z&AT<7[<: +M(-QA&2,36R$++B(84!0:5ALO:K$A.]X?&!$84V@;/SX<6Q`;2$(B2QD21W4J +M1=TW&@```````````````"P`````)0`J```(_@!]:(N'18\W%15T$1N'C)V? +M0_HH,<$G9YBX!C6<3;+T@AZ-)K@\O*"!KP^$>"KVA`L3*8P3)^5ZA)@B:AH+ +M9];F7.$Q+!`!0=*<E3'5K(:EHS2N%"AP*Y(\&##H6(!A:!XU99B*H?%4IDF? +M!8'<"-F%9I*TH8A83:*TXYD!J83D$0ICI-NE(:O8I6&FJ$64$M5$M%!3PU,F +M2UJD"9I41E824WT2G?BV-!$(!`=,+<IF!P>.%",&P7J"9XB82L5,48F5K,:" +M'94FU='6;!*R3T->E"%&95DR"`/6P,I0PDF)0SG8($$RJN0R`FI(H7$A2]VD +M*^^L_6(T3U*V%XH"_AB:HDS8H$AO5@R:P,8.I%^.:*AC(>Z!'":6RC`1L@W- +M%1Q/,%(.'5+1`<D^74R@P2#?T*'/$"%@\(L5Y?RAB!SO3#*,-6CL44\4]C1# +MR0Q3C&&"!AH08@(#:Y2C@`&1---,.#)\T84=R##`0",H++8#)34,@X80R:"` +M0A<]?'#.!";`TDH.8QBB0354A!,*)WI0\<D\YJRQ3PE[V",(.D#48$T9GO22 +MA0.BC,)'*]3TP,`^\_#QA0P@>`/"*]=<HX\3"C!BAQV%5",$+Z0,0X,UL3SB +M#B[NN$'/,A&,$L,Y[-BASR(@<**("E^,0(<&;[QA0@^,3%",#2S8H,01_LX0 +M`(0:W%`@13#TN+'#&LK@0$T0VER##50*Q/%-%%&H$`0#K7"`!B^X/`),()F0 +M0PNT[K#BRBS1-!("`^=,$8`D="B`QRW?1**+-I?(X$@(L(R2AQ8+Y,*#-*YX +MH(00'1Q!CSI,U'!%)^_%4(X&FUB@01$.@*!#$I?4D48I&1BA!2*HD%))(.+T +MYTEOT3B#PB/U4(,$%QD0(<\QX^CRBC=)@%`!&+:<<TH&M*C1"RJX'-'.'`YX +M<H0@<)#"0CX+I%`*-%:4L@XFS`#[11M?U-))"&RP84LWZL!A1A^YH.*)!)3< +MHXX6L]P3"!I`H/!`*>90`PD.F'3SR@-AG)`&_A=3'"**#[J(P<<I@YCA2@32 +M!,*+Q9H`(XT2'O2RS"CGS.U9!K>0^C0&S/#1""(ZI)'&*6,,$842N%Q!@3NK +M"R'%%4KP0D(Q?"`S!3)<I#!!".68$T(IMHP#>BVE(/$+)#-80<PTLBRP^@$> +M<+,`-[QH$LP+0_P!1C>=S("!,N>8XP<8H6131!B;3`#&#)"DTH(L5#P0S#W+ +MQ)*`$K1HHD4^;E!2QP%:B``&4H$,/Y2C$Y*(1%0(00A)@```77B"*$0@#3>P +MP&PU4`,^H!","P!!$')0P^H6((I5L"$'.;``';[Q!@-\PP"<B$<]EG"(#*C" +M%-(@0":\L0U?D&`#_L50@33.(`4M4``=&4@'+-9!!P9JP`#9T$4H=*$+!#1B +M'3A(`SYH`81'B.,(XJ@"":0(CDRPH@/<"$88V+"%$N#A&)MPX0N2H`L]C&,< +MEW"$%6:0BCV@XPIH>$0?)M$`<92!%OG8Q3#@(`U2C*)%9-!`&`K`!!EP0@80 +M2,0M;I$.:"C#"D.@P#UJ``]GT"*,K%B&*0+!@TFH8VV*0$(:_/`)?7@#'.'0 +M@R[H8``\:&`,=NC"*"B@!2TTHP5]N((TEH&`<)B"!PU`0RQ(P8MNS``9K6@% +M'W31AG%$(BZ2&((PA-&#'I"A&`F8!DD\\`H6-,.,X'B'%,+R#FX4`PS&_L!$ +M.4R0#7W<0@%O2,07$($#9@C3"%C8P!5H88\]J$,"-E!;(.X@C5T<`!65V,40 +M,'&(0C!"`1JP@"3TH`T9=&,5JRC$-]+1`U%TP@@[^,$>Z$&*0%1A%XFK`BFD +MX0L6Y"(#A_A$#[JPC@%\@1@02`(SJ#&&.`A@'\&$!AALX(4Y="`/TA"",UA! +M@%W<8!*\$`$@C%""+G1A"9AP!"<>9@ME*)$0F\C!$I8`B3\TP@%'@$(QH$`" +M)8@#'(B0ACB`P`,@%&,'9"##)S`1!&)\`0)^&,,^+$`("QC"#D\X!":HX0!< +ML(`&-O!%,&H@BS#F0AI'`!LO^"`,/OS!%KJ0_H0HEA`E27Q#%T%X0"%"@`1E +M2,`!:F!%+Q#A!G)40A!HP`4OFK`#-13#`?I(QCW38(Q/9&``%0#!);01BE>$ +M@!TZNL81$G`!-]2!&&5@A1N&T0Q<,$$$%*!``BR1A5$PPPH<74$%]*`#$.RA +M`(VP@#`X$(`7``(%T=@`.5Q`C@:0`P[)D$4'$H`."MB#"908Q=X@,01B9$(7 +MG'A%`#8!"QSL@PUAL,0==M"")CQCP4)0QQUDP806H&,!T]A!`EH@AC2P`P,^ +MT`,*NN%-&&T!&L@X!!Y^<(4.D(,%E>A`,T20C#IXL`6-:,`5F+"#!W"@'%OH +MA`IJ$5(#;`(;AV/'FA103(EV+(,>GHB&&ZJ`!D'<X0[-N$(N@)$+4$C@`<(` +M$(&/<8Q;7$,1UT"``+>P!57`X1F9D`4<W$`"4MP@$ZYX\B-"004Y2$`.HT@% +M)@IAB#><0!]FB(<I.(&`(7P"%GPPQ3)F`0YB0"$0@8"")L!Q!RH`8A&AL,8L +BR!$$,@@#&5OX!1V.\85XA.,.,A##!T2!AP@LXP#;"`@`.P`` +end diff --git a/lib/CGI/eg/file_upload.cgi b/lib/CGI/eg/file_upload.cgi new file mode 100644 index 0000000000..3037de7b14 --- /dev/null +++ b/lib/CGI/eg/file_upload.cgi @@ -0,0 +1,71 @@ +#!/usr/local/bin/perl -w + +use strict 'refs'; +use lib '..'; +use CGI qw(:standard); +use CGI::Carp qw/fatalsToBrowser/; + +print header(); +print start_html("File Upload Example"); +print strong("Version "),$CGI::VERSION,p; + +print h1("File Upload Example"), + 'This example demonstrates how to prompt the remote user to + select a remote file for uploading. ', + strong("This feature only works with Netscape 2.0 or greater, or IE 4.0 or greater."), + p, + 'Select the ',cite('browser'),' button to choose a text file + to upload. When you press the submit button, this script + will count the number of lines, words, and characters in + the file.'; + +my @types = ('count lines','count words','count characters'); + +# Start a multipart form. +print start_multipart_form(), + "Enter the file to process:", + filefield('filename','',45), + br, + checkbox_group('count',\@types,\@types), + p, + reset,submit('submit','Process File'), + endform; + +# Process the form if there is a file name entered +if (my $file = param('filename')) { + my %stats; + my $tmpfile=tmpFileName($file); + my $mimetype = uploadInfo($file)->{'Content-Type'} || ''; + print hr(), + h2($file), + h3($tmpfile), + h4("MIME Type:",em($mimetype)); + + my($lines,$words,$characters,@words) = (0,0,0,0); + while (<$file>) { + $lines++; + $words += @words=split(/\s+/); + $characters += length($_); + } + close $file; + grep($stats{$_}++,param('count')); + if (%stats) { + print strong("Lines: "),$lines,br if $stats{'count lines'}; + print strong("Words: "),$words,br if $stats{'count words'}; + print strong("Characters: "),$characters,br if $stats{'count characters'}; + } else { + print strong("No statistics selected."); + } +} + +# print cite("URL parameters: "),url_param(); + +print hr(), + a({href=>"../cgi_docs.html"},"CGI documentation"), + hr, + address( + a({href=>'/~lstein'},"Lincoln D. Stein")), + br, + 'Last modified July 17, 1996', + end_html; + diff --git a/lib/CGI/eg/frameset.cgi b/lib/CGI/eg/frameset.cgi new file mode 100644 index 0000000000..fc86e92e9a --- /dev/null +++ b/lib/CGI/eg/frameset.cgi @@ -0,0 +1,81 @@ +#!/usr/local/bin/perl + +use CGI; +$query = new CGI; +print $query->header; +$TITLE="Frameset Example"; + +# We use the path information to distinguish between calls +# to the script to: +# (1) create the frameset +# (2) create the query form +# (3) create the query response + +$path_info = $query->path_info; + +# If no path information is provided, then we create +# a side-by-side frame set +if (!$path_info) { + &print_frameset; + exit 0; +} + +# If we get here, then we either create the query form +# or we create the response. +&print_html_header; +&print_query if $path_info=~/query/; +&print_response if $path_info=~/response/; +&print_end; + + +# Create the frameset +sub print_frameset { + $script_name = $query->script_name; + print <<EOF; +<html><head><title>$TITLE</title></head> +<frameset cols="50,50"> +<frame src="$script_name/query" name="query"> +<frame src="$script_name/response" name="response"> +</frameset> +EOF + ; + exit 0; +} + +sub print_html_header { + print $query->start_html($TITLE); +} + +sub print_end { + print qq{<P><hr><A HREF="../index.html" TARGET="_top">More Examples</A>}; + print $query->end_html; +} + +sub print_query { + $script_name = $query->script_name; + print "<H1>Frameset Query</H1>\n"; + print $query->startform(-action=>"$script_name/response",-TARGET=>"response"); + print "What's your name? ",$query->textfield('name'); + print "<P>What's the combination?<P>", + $query->checkbox_group(-name=>'words', + -values=>['eenie','meenie','minie','moe']); + + print "<P>What's your favorite color? ", + $query->popup_menu(-name=>'color', + -values=>['red','green','blue','chartreuse']), + "<P>"; + print $query->submit; + print $query->endform; +} + +sub print_response { + print "<H1>Frameset Result</H1>\n"; + unless ($query->param) { + print "<b>No query submitted yet.</b>"; + return; + } + print "Your name is <EM>",$query->param(name),"</EM>\n"; + print "<P>The keywords are: <EM>",join(", ",$query->param(words)),"</EM>\n"; + print "<P>Your favorite color is <EM>",$query->param(color),"</EM>\n"; +} + diff --git a/lib/CGI/eg/index.html b/lib/CGI/eg/index.html new file mode 100644 index 0000000000..133ecc4a16 --- /dev/null +++ b/lib/CGI/eg/index.html @@ -0,0 +1,119 @@ +<HTML> <HEAD> +<TITLE>More Examples of Scripts Created with CGI.pm</TITLE> +</HEAD> + +<BODY> +<H1>More Examples of Scripts Created with CGI.pm</H1> + +<H2> Basic Non Sequitur Questionnaire</H2> +<UL> + <LI> <A HREF="tryit.cgi">Try the script</A> + <LI> <A HREF="tryit.txt">Look at its source code</A> +</UL> + +<H2> Advanced Non Sequitur Questionnaire</H2> +<UL> + <LI> <A HREF="monty.cgi">Try the script</A> + <LI> <A HREF="monty.txt">Look at its source code</A> +</UL> + +<H2> Save and restore the state of a form to a file</H2> +<UL> + <LI> <A HREF="save_state.cgi">Try the script</A> + <LI> <A HREF="save_state.txt">Look at its source code</A> +</UL> + +<H2> Server Push</H2> +<ul> + <li><a href="nph-multipart.cgi">Try the script</a> + <li><a href="nph-multipart.txt">Look at its source code</a> +</ul> + +<H2> Read the coordinates from a clickable image map</H2> +<UL> + <LI> <A HREF="clickable_image.cgi">Try the script</A> + <LI> <A HREF="clickable_image.txt">Look at its source code</A> +</UL> + +<H2> Multiple independent forms on the same page</H2> +<UL> + <LI> <A HREF="multiple_forms.cgi">Try the script</A> + <LI> <A HREF="multiple_forms.txt">Look at its source code</A> +</UL> + +<H2> How to maintain state on a page with internal links</H2> +<UL> + <LI> <A HREF="internal_links.cgi">Try the script</A> + <LI> <A HREF="internal_links.txt">Look at its source code</A> +</UL> + +<h2>Echo fatal script errors to the browser</h2> +<em>This script deliberately generates a compile-time error.</em> +<ul> + <li><a href="crash.cgi">Try the script</a> + <li><a href="crash.txt">Look at its source code</a> +</ul> + +<EM>The Following Scripts Work with Netscape Navigator 2.0 and higher, +or Internet Explorer 3.0 and higher</EM> + +<H2> Prompt for a file to upload and process it</H2> +<UL> + <LI> <A HREF="file_upload.cgi">Try the script</A> + <LI> <A HREF="file_upload.txt">Look at its source code</A> +</UL> + +<h2> A Continuously-Updated Page using Server Push</h2> +<ul> + <li><a href="nph-clock.cgi">Try the script</a> + <li><a href="nph-clock.txt">Look at its source code</a> +</ul> + +<h2>Compute the "diff" between two uploaded files</h2> +<ul> + <li><a href="diff_upload.cgi">Try the script</a> + <li><a href="diff_upload.txt">Look at its source code</a> +</ul> + +<h2>Maintain state over a long period with a cookie</h2> +<ul> + <li><a href="cookie.cgi">Try the script</a> + <li><a href="cookie.txt">Look at its source code</a> +</ul> + +<h2>Permanently customize the appearance of a page with a cookie</h2> +<ul> + <li><a href="customize.cgi">Try the script</a> + <li><a href="customize.txt">Look at its source code</a> +</ul> + +<h2> Popup the response in a new window</h2> +<ul> + <li><a href="popup.cgi">Try the script</a> + <li><a href="popup.txt">Look at its source code</a> +</ul> + +<h2> Side-by-side form and response using frames</h2> +<ul> + <li><a href="frameset.cgi">Try the script</a> + <li><a href="frameset.txt">Look at its source code</a> +</ul> + +<h2>Verify the Contents of a fill-out form with JavaScript</h2> +<ul> + <li><a href="javascript.cgi">Try the script</a> + <li><a href="javascript.txt">Look at its source code</a> +</ul> + +<HR> +<MENU> + <LI> <A HREF="../cgi_docs.html">CGI.pm documentation</A> + <LI> <A HREF="../CGI.pm.tar.gz">Download the CGI.pm distribution</A> +</MENU> +<HR> +<ADDRESS>Lincoln D. Stein, lstein@genome.wi.mit.edu<br> +<a href="/">Whitehead Institute/MIT Center for Genome Research</a></ADDRESS> +<!-- hhmts start --> +Last modified: Wed Jun 23 15:31:47 EDT 1999 +<!-- hhmts end --> +</BODY> </HTML> diff --git a/lib/CGI/eg/internal_links.cgi b/lib/CGI/eg/internal_links.cgi new file mode 100644 index 0000000000..4806966842 --- /dev/null +++ b/lib/CGI/eg/internal_links.cgi @@ -0,0 +1,33 @@ +#!/usr/local/bin/perl + +use CGI; +$query = new CGI; + +# We generate a regular HTML file containing a very long list +# and a popup menu that does nothing except to show that we +# don't lose the state information. +print $query->header; +print $query->start_html("Internal Links Example"); +print "<H1>Internal Links Example</H1>\n"; +print "Click <cite>Submit Query</cite> to create a state. Then scroll down and", + " click on any of the <cite>Jump to top</cite> links. This is not very exciting."; + +print "<A NAME=\"start\"></A>\n"; # an anchor point at the top + +# pick a default starting value; +$query->param('amenu','FOO1') unless $query->param('amenu'); + +print $query->startform; +print $query->popup_menu('amenu',[('FOO1'..'FOO9')]); +print $query->submit,$query->endform; + +# We create a long boring list for the purposes of illustration. +$myself = $query->self_url; +print "<OL>\n"; +for (1..100) { + print qq{<LI>List item #$_ <A HREF="$myself#start">Jump to top</A>\n}; +} +print "</OL>\n"; + +print $query->end_html; + diff --git a/lib/CGI/eg/javascript.cgi b/lib/CGI/eg/javascript.cgi new file mode 100644 index 0000000000..91c2b9e648 --- /dev/null +++ b/lib/CGI/eg/javascript.cgi @@ -0,0 +1,105 @@ +#!/usr/local/bin/perl + +# This script illustrates how to use JavaScript to validate fill-out +# forms. +use CGI qw(:standard); + +# Here's the javascript code that we include in the document. +$JSCRIPT=<<EOF; + // validate that the user is the right age. Return + // false to prevent the form from being submitted. + function validateForm() { + var today = new Date(); + var birthday = validateDate(document.form1.birthdate); + if (birthday == 0) { + document.form1.birthdate.focus() + document.form1.birthdate.select(); + return false; + } + var milliseconds = today.getTime()-birthday; + var years = milliseconds/(1000 * 60 * 60 * 24 * 365.25); + if ((years > 20) || (years < 5)) { + alert("You must be between the ages of 5 and 20 to submit this form"); + document.form1.birthdate.focus(); + document.form1.birthdate.select(); + return false; + } + // Since we've calculated the age in years already, + // we might as well send it up to our CGI script. + document.form1.age.value=Math.floor(years); + return true; + } + + // make sure that the contents of the supplied + // field contain a valid date. + function validateDate(element) { + var date = Date.parse(element.value); + if (0 == date) { + alert("Please enter date in format MMM DD, YY"); + element.focus(); + element.select(); + } + return date; + } + + // Compliments, compliments + function doPraise(element) { + if (element.checked) { + self.status=element.value + " is an excellent choice!"; + return true; + } else { + return false; + } + } + + function checkColor(element) { + var color = element.options[element.selectedIndex].text; + if (color == "blonde") { + if (confirm("Is it true that blondes have more fun?")) + alert("Darn. That leaves me out."); + } else + alert(color + " is a fine choice!"); + } +EOF + ; + +# here's where the execution begins +print header; +print start_html(-title=>'Personal Profile',-script=>$JSCRIPT); + +print h1("Big Brother Wants to Know All About You"), + strong("Note: "),"This page uses JavaScript and requires ", + "Netscape 2.0 or higher to do anything special."; + +&print_prompt(); +print hr; +&print_response() if param; +print end_html; + +sub print_prompt { + print start_form(-name=>'form1', + -onSubmit=>"return validateForm()"),"\n"; + print "Birthdate (e.g. Jan 3, 1972): ", + textfield(-name=>'birthdate', + -onBlur=>"validateDate(this)"),"<p>\n"; + print "Sex: ",radio_group(-name=>'gender', + -value=>[qw/male female/], + -onClick=>"doPraise(this)"),"<p>\n"; + print "Hair color: ",popup_menu(-name=>'color', + -value=>[qw/brunette blonde red gray/], + -default=>'red', + -onChange=>"checkColor(this)"),"<p>\n"; + print hidden(-name=>'age',-value=>0); + print submit(); + print end_form; +} + +sub print_response { + import_names('Q'); + print h2("Your profile"), + "You claim to be a ",b($Q::age)," year old ",b($Q::color,$Q::gender),".", + "You should be ashamed of yourself for lying so ", + "blatantly to big brother!", + hr; +} + diff --git a/lib/CGI/eg/make_links.pl b/lib/CGI/eg/make_links.pl new file mode 100755 index 0000000000..a0aa824556 --- /dev/null +++ b/lib/CGI/eg/make_links.pl @@ -0,0 +1,8 @@ +#!/usr/local/bin/perl + +# this is just a utility for creating symlinks from *.txt to *.cgi +# for documentation purposes. +foreach (<*.cgi>) { + ($target=$_)=~s/cgi$/txt/; + symlink $_,$target +} diff --git a/lib/CGI/eg/monty.cgi b/lib/CGI/eg/monty.cgi new file mode 100644 index 0000000000..693c2586fc --- /dev/null +++ b/lib/CGI/eg/monty.cgi @@ -0,0 +1,84 @@ +#!/usr/local/bin/perl + +use CGI; +use CGI::Carp qw/fatalsToBrowser/; + +$query = new CGI; + +print $query->header; +print $query->start_html("Example CGI.pm Form"); +print "<H1> Example CGI.pm Form</H1>\n"; +&print_prompt($query); +&do_work($query); +&print_tail; +print $query->end_html; + +sub print_prompt { + my($query) = @_; + + print $query->start_form; + print "<EM>What's your name?</EM><BR>"; + print $query->textfield('name'); + print $query->checkbox('Not my real name'); + + print "<P><EM>Where can you find English Sparrows?</EM><BR>"; + print $query->checkbox_group( + -name=>'Sparrow locations', + -Values=>[England,France,Spain,Asia,Hoboken], + -linebreak=>'yes', + -defaults=>[England,Asia]); + + print "<P><EM>How far can they fly?</EM><BR>", + $query->radio_group( + -name=>'how far', + -Values=>['10 ft','1 mile','10 miles','real far'], + -default=>'1 mile'); + + print "<P><EM>What's your favorite color?</EM> "; + print $query->popup_menu(-name=>'Color', + -Values=>['black','brown','red','yellow'], + -default=>'red'); + + print $query->hidden('Reference','Monty Python and the Holy Grail'); + + print "<P><EM>What have you got there?</EM><BR>"; + print $query->scrolling_list( + -name=>'possessions', + -Values=>['A Coconut','A Grail','An Icon', + 'A Sword','A Ticket'], + -size=>5, + -multiple=>'true'); + + print "<P><EM>Any parting comments?</EM><BR>"; + print $query->textarea(-name=>'Comments', + -rows=>10, + -columns=>50); + + print "<P>",$query->reset; + print $query->submit('Action','Shout'); + print $query->submit('Action','Scream'); + print $query->endform; + print "<HR>\n"; + } + +sub do_work { + my($query) = @_; + my(@values,$key); + + print "<H2>Here are the current settings in this form</H2>"; + + foreach $key ($query->param) { + print "<STRONG>$key</STRONG> -> "; + @values = $query->param($key); + print join(", ",@values),"<BR>\n"; + } +} + +sub print_tail { + print <<END; +<HR> +<ADDRESS>Lincoln D. Stein</ADDRESS><BR> +<A HREF="/">Home Page</A> +END + ; +} diff --git a/lib/CGI/eg/multiple_forms.cgi b/lib/CGI/eg/multiple_forms.cgi new file mode 100644 index 0000000000..b38bf93e96 --- /dev/null +++ b/lib/CGI/eg/multiple_forms.cgi @@ -0,0 +1,54 @@ +#!/usr/local/bin/perl + +use CGI; + +$query = new CGI; +print $query->header; +print $query->start_html('Multiple Forms'); +print "<H1>Multiple Forms</H1>\n"; + +# Print the first form +print $query->startform; +$name = $query->remote_user || 'anonymous@' . $query->remote_host; + +print "What's your name? ",$query->textfield('name',$name,50); +print "<P>What's the combination?<P>", + $query->checkbox_group('words',['eenie','meenie','minie','moe']); +print "<P>What's your favorite color? ", + $query->popup_menu('color',['red','green','blue','chartreuse']), + "<P>"; +print $query->submit('form_1','Send Form 1'); +print $query->endform; + +# Print the second form +print "<HR>\n"; +print $query->startform; +print "Some radio buttons: ",$query->radio_group('radio buttons', + [qw{one two three four five}],'three'),"\n"; +print "<P>What's the password? ",$query->password_field('pass','secret'); +print $query->defaults,$query->submit('form_2','Send Form 2'),"\n"; +print $query->endform; + +print "<HR>\n"; + +$query->import_names('Q'); +if ($Q::form_1) { + print "<H2>Form 1 Submitted</H2>\n"; + print "Your name is <EM>$Q::name</EM>\n"; + print "<P>The combination is: <EM>{",join(",",@Q::words),"}</EM>\n"; + print "<P>Your favorite color is <EM>$Q::color</EM>\n"; +} elsif ($Q::form_2) { + print <<EOF; +<H2>Form 2 Submitted</H2> +<P>The value of the radio buttons is <EM>$Q::radio_buttons</EM> +<P>The secret password is <EM>$Q::pass</EM> +EOF + ; +} +print qq{<P><A HREF="./">Other examples</A>}; +print qq{<P><A HREF="../cgi_docs.html">Go to the documentation</A>}; + +print $query->end_html; + + + diff --git a/lib/CGI/eg/nph-clock.cgi b/lib/CGI/eg/nph-clock.cgi new file mode 100644 index 0000000000..55a2fbe545 --- /dev/null +++ b/lib/CGI/eg/nph-clock.cgi @@ -0,0 +1,18 @@ +#!/usr/local/bin/perl -w + +use CGI::Push qw(:standard :html3); + +do_push(-next_page=>\&draw_time,-delay=>1); + +sub draw_time { + my $time = `/bin/date`; + return start_html('Tick Tock'), + div({-align=>CENTER}, + h1('Virtual Clock'), + h2($time) + ), + hr, + a({-href=>'index.html'},'More examples'), + end_html(); +} + diff --git a/lib/CGI/eg/nph-multipart.cgi b/lib/CGI/eg/nph-multipart.cgi new file mode 100755 index 0000000000..f8cea59a87 --- /dev/null +++ b/lib/CGI/eg/nph-multipart.cgi @@ -0,0 +1,10 @@ +#!/usr/local/bin/perl +use CGI qw/:push -nph/; +$| = 1; +print multipart_init(-boundary=>'----------------here we go!'); +while (1) { + print multipart_start(-type=>'text/plain'), + "The current time is ",scalar(localtime),"\n", + multipart_end; + sleep 1; +} diff --git a/lib/CGI/eg/popup.cgi b/lib/CGI/eg/popup.cgi new file mode 100644 index 0000000000..88cea1da9c --- /dev/null +++ b/lib/CGI/eg/popup.cgi @@ -0,0 +1,32 @@ +#!/usr/local/bin/perl + +use CGI; +$query = new CGI; +print $query->header; +print $query->start_html('Popup Window'); + + +if (!$query->param) { + print "<H1>Ask your Question</H1>\n"; + print $query->startform(-target=>'_new'); + print "What's your name? ",$query->textfield('name'); + print "<P>What's the combination?<P>", + $query->checkbox_group(-name=>'words', + -values=>['eenie','meenie','minie','moe'], + -defaults=>['eenie','moe']); + + print "<P>What's your favorite color? ", + $query->popup_menu(-name=>'color', + -values=>['red','green','blue','chartreuse']), + "<P>"; + print $query->submit; + print $query->endform; + +} else { + print "<H1>And the Answer is...</H1>\n"; + print "Your name is <EM>",$query->param(name),"</EM>\n"; + print "<P>The keywords are: <EM>",join(", ",$query->param(words)),"</EM>\n"; + print "<P>Your favorite color is <EM>",$query->param(color),"</EM>\n"; +} +print qq{<P><A HREF="cgi_docs.html">Go to the documentation</A>}; +print $query->end_html; diff --git a/lib/CGI/eg/save_state.cgi b/lib/CGI/eg/save_state.cgi new file mode 100644 index 0000000000..85bacaf59a --- /dev/null +++ b/lib/CGI/eg/save_state.cgi @@ -0,0 +1,67 @@ +#!/usr/local/bin/perl + +use CGI; +$query = new CGI; + +print $query->header; +print $query->start_html("Save and Restore Example"); +print "<H1>Save and Restore Example</H1>\n"; + +# Here's where we take action on the previous request +&save_parameters($query) if $query->param('action') eq 'SAVE'; +$query = &restore_parameters($query) if $query->param('action') eq 'RESTORE'; + +# Here's where we create the form +print $query->start_multipart_form; +print "Popup 1: ",$query->popup_menu('popup1',[qw{red green purple magenta orange chartreuse brown}]),"\n"; +print "Popup 2: ",$query->popup_menu('popup2',[qw{lion tiger bear zebra potto wildebeest frog emu gazelle}]),"\n"; +print "<P>"; +$default_name = $query->remote_addr . '.sav'; +print "Save/restore state from file: ",$query->textfield('savefile',$default_name),"\n"; +print "<P>"; +print $query->submit('action','SAVE'),$query->submit('action','RESTORE'); +print "<P>",$query->defaults; +print $query->endform; + +# Here we print out a bit at the end +print $query->end_html; + +sub save_parameters { + local($query) = @_; + local($filename) = &clean_name($query->param('savefile')); + if (open(FILE,">$filename")) { + $query->save(FILE); + close FILE; + print "<STRONG>State has been saved to file $filename</STRONG>\n"; + print "<P>If you remember this name you can restore the state later.\n"; + } else { + print "<STRONG>Error:</STRONG> couldn't write to file $filename: $!\n"; + } +} + +sub restore_parameters { + local($query) = @_; + local($filename) = &clean_name($query->param('savefile')); + if (open(FILE,$filename)) { + $query = new CGI(FILE); # Throw out the old query, replace it with a new one + close FILE; + print "<STRONG>State has been restored from file $filename</STRONG>\n"; + } else { + print "<STRONG>Error:</STRONG> couldn't restore file $filename: $!\n"; + } + return $query; +} + + +# Very important subroutine -- get rid of all the naughty +# metacharacters from the file name. If there are, we +# complain bitterly and die. +sub clean_name { + local($name) = @_; + unless ($name=~/^[\w\._\-]+$/) { + print "<STRONG>$name has naughty characters. Only "; + print "alphanumerics are allowed. You can't use absolute names.</STRONG>"; + die "Attempt to use naughty characters"; + } + return "WORLD_WRITABLE/$name"; +} diff --git a/lib/CGI/eg/tryit.cgi b/lib/CGI/eg/tryit.cgi new file mode 100644 index 0000000000..83c620c3e4 --- /dev/null +++ b/lib/CGI/eg/tryit.cgi @@ -0,0 +1,37 @@ +#!/usr/local/bin/perl + +use CGI ':standard'; + +print header; +print start_html('A Simple Example'), + h1('A Simple Example'), + start_form, + "What's your name? ",textfield('name'), + p, + "What's the combination?", + p, + checkbox_group(-name=>'words', + -values=>['eenie','meenie','minie','moe'], + -defaults=>['eenie','minie']), + p, + "What's your favorite color? ", + popup_menu(-name=>'color', + -values=>['red','green','blue','chartreuse']), + p, + submit, + end_form, + hr; + +if (param()) { + print + "Your name is: ",em(param('name')), + p, + "The keywords are: ",em(join(", ",param('words'))), + p, + "Your favorite color is: ",em(param('color')), + hr; +} +print a({href=>'../cgi_docs.html'},'Go to the documentation'); +print end_html; + + diff --git a/lib/CGI/eg/wilogo_gif.uu b/lib/CGI/eg/wilogo_gif.uu new file mode 100644 index 0000000000..c5d10423b4 --- /dev/null +++ b/lib/CGI/eg/wilogo_gif.uu @@ -0,0 +1,13 @@ +begin 444 wilogo.gif +M1TE&.#=A7@!$`(```'X2F?___RP`````7@!$```"_D2.J<#MKF)ZU,A3,[OO +M(IUY']A%9"6AW$F)+#2]Y:BNLF6_\;WMH<?#I72^VP+D"@*)F&"O25KRDM&B +M[%C-7;4_J)*6'4ZE&O`W8"1OQ5UGPWRBIKDPM!MW9J]-[;LUKL;$5W.'YQ3( +M(O<&-^>F*(A55\BX%UEI^;<VB0BH1RFX2=<IELE4^*0'N?-I>OJ8N%(*Z^4G +M.OJJ>8HZ.(>;JRMD><E[!KQHB^3;:APL6Z8\RKPK/)O:*-WLW&7]*\UYR]J) +M?<P=1MR-_6VN76,WGAV^32W^3CZ_SCY3;W__C-R^CU^\%M#T!9PVL(ZZ&>X" +M%A1XSM]!A?T8/C0T$1XMJG\B&G+,"-&C/(VS0(842;`)M'S>_OE8F#"=2S#* +M8LHLAS'D1Y,42UGY9O,F-T:X@@JEE@D1RW>/D@8R.DZ-+*E0CQ:9JJ5JU!SQ +MR&BU2D.;E*4'ER0TNY%G2A/Y.G[=VG%81+5K_UG$21<A6;=YP9'5B++O7:@7 +M\]J5]]?DX7:)%<]5%=B/55>-GQW55;$8L\RW6J8-9>QM7<^A/SMZK!ESY$,+ +(KPA.EJ```#L` +end diff --git a/lib/CPAN.pm b/lib/CPAN.pm index 84dfd31a2b..aeb6a57958 100644 --- a/lib/CPAN.pm +++ b/lib/CPAN.pm @@ -1,18 +1,12 @@ +# -*- Mode: cperl; coding: utf-8; cperl-indent-level: 4 -*- package CPAN; -use vars qw{$Try_autoload - $Revision - $META $Signal $Cwd $End - $Suppress_readline %Dontload - $Frontend $Defaultsite - }; #}; +$VERSION = '1.57_68RC'; -$VERSION = '1.52'; - -# $Id: CPAN.pm,v 1.276 2000/01/08 15:29:46 k Exp $ +# $Id: CPAN.pm,v 1.354 2000/10/08 14:20:57 k Exp $ # only used during development: $Revision = ""; -# $Revision = "[".substr(q$Revision: 1.276 $, 10)."]"; +# $Revision = "[".substr(q$Revision: 1.354 $, 10)."]"; use Carp (); use Config (); @@ -29,6 +23,8 @@ use Safe (); use Text::ParseWords (); use Text::Wrap; use File::Spec; +no lib "."; # we need to run chdir all over and we would get at wrong + # libraries there END { $End++; &cleanup; } @@ -47,6 +43,8 @@ END { $End++; &cleanup; } Eval 2048 Config 4096 Tarzip 8192 + Version 16384 + Queue 32768 ]; $CPAN::DEBUG ||= 0; @@ -55,9 +53,12 @@ $CPAN::Frontend ||= "CPAN::Shell"; $CPAN::Defaultsite ||= "ftp://ftp.perl.org/pub/CPAN"; package CPAN; -use vars qw($VERSION @EXPORT $AUTOLOAD $DEBUG $META $term); use strict qw(vars); +use vars qw($VERSION @EXPORT $AUTOLOAD $DEBUG $META $HAS_USABLE $term + $Revision $Signal $Cwd $End $Suppress_readline $Frontend + $Defaultsite $Have_warned); + @CPAN::ISA = qw(CPAN::Debug Exporter); @EXPORT = qw( @@ -75,12 +76,6 @@ sub AUTOLOAD { if (exists $EXPORT{$l}){ CPAN::Shell->$l(@_); } else { - my $ok = CPAN::Shell->try_dot_al($AUTOLOAD); - if ($ok) { - goto &$AUTOLOAD; -# } else { -# $CPAN::Frontend->mywarn("Could not autoload $AUTOLOAD"); - } $CPAN::Frontend->mywarn(qq{Unknown command "$AUTOLOAD". }. qq{Type ? for help. }); @@ -93,6 +88,8 @@ sub shell { $Suppress_readline = ! -t STDIN unless defined $Suppress_readline; CPAN::Config->load unless $CPAN::Config_loaded++; + CPAN::Index->read_metadata_cache; + my $prompt = "cpan> "; local($^W) = 1; unless ($Suppress_readline) { @@ -121,7 +118,7 @@ sub shell { select $odef; } - no strict; + # no strict; # I do not recall why no strict was here (2000-09-03) $META->checklock(); my $getcwd; $getcwd = $CPAN::Config->{'getcwd'} || 'cwd'; @@ -130,14 +127,19 @@ sub shell { $try_detect_readline = $term->ReadLine eq "Term::ReadLine::Stub" if $term; my $rl_avail = $Suppress_readline ? "suppressed" : ($term->ReadLine ne "Term::ReadLine::Stub") ? "enabled" : - "available (try ``install Bundle::CPAN'')"; + "available (try 'install Bundle::CPAN')"; $CPAN::Frontend->myprint( - qq{ -cpan shell -- CPAN exploration and modules installation (v$CPAN::VERSION$CPAN::Revision) -ReadLine support $rl_avail + sprintf qq{ +cpan shell -- CPAN exploration and modules installation (v%s%s) +ReadLine support %s -}) unless $CPAN::Config->{'inhibit_startup_message'} ; +}, + $CPAN::VERSION, + $CPAN::Revision, + $rl_avail + ) + unless $CPAN::Config->{'inhibit_startup_message'} ; my($continuation) = ""; while () { if ($Suppress_readline) { @@ -180,7 +182,7 @@ ReadLine support $rl_avail my $command = shift @line; eval { CPAN::Shell->$command(@line) }; warn $@ if $@; - chdir $cwd; + chdir $cwd or $CPAN::Frontend->mydie(qq{Could not chdir to "$cwd": $!}); $CPAN::Frontend->myprint("\n"); $continuation = ""; $prompt = "cpan> "; @@ -194,8 +196,8 @@ ReadLine support $rl_avail $CPAN::META->has_inst("Term::ReadLine::Perl") ) { delete $INC{"Term/ReadLine.pm"}; - my $redef; - local($SIG{__WARN__}) = CPAN::Shell::dotdot_onreload(\$redef); + my $redef = 0; + local($SIG{__WARN__}) = CPAN::Shell::paintdots_onreload(\$redef); require Term::ReadLine; $CPAN::Frontend->myprint("\n$redef subroutines in ". "Term::ReadLine redefined\n"); @@ -210,7 +212,6 @@ package CPAN::CacheMgr; use File::Find; package CPAN::Config; -import ExtUtils::MakeMaker 'neatvalue'; use vars qw(%can $dot_cpan); %can = ( @@ -231,6 +232,8 @@ use vars qw($last_time $date_of_03); @CPAN::Index::ISA = qw(CPAN::Debug); $last_time ||= 0; $date_of_03 ||= 0; +# use constant PROTOCOL => "2.0"; # outcommented to avoid warning on upgrade from 1.57 +sub PROTOCOL { 2.0 } package CPAN::InfoObj; @CPAN::InfoObj::ISA = qw(CPAN::Debug); @@ -248,7 +251,7 @@ package CPAN::Module; @CPAN::Module::ISA = qw(CPAN::InfoObj); package CPAN::Shell; -use vars qw($AUTOLOAD $redef @ISA); +use vars qw($AUTOLOAD @ISA); @CPAN::Shell::ISA = qw(CPAN::Debug); #-> sub CPAN::Shell::AUTOLOAD ; @@ -269,86 +272,12 @@ For this you just need to type }); } } else { - my $ok = CPAN::Shell->try_dot_al($AUTOLOAD); - if ($ok) { - goto &$AUTOLOAD; -# } else { -# $CPAN::Frontend->mywarn("Could not autoload $autoload"); - } $CPAN::Frontend->mywarn(qq{Unknown command '$autoload'. }. qq{Type ? for help. }); } } -#-> CPAN::Shell::try_dot_al -sub try_dot_al { - my($class,$autoload) = @_; - return unless $CPAN::Try_autoload; - # I don't see how to re-use that from the AutoLoader... - my($name,$ok); - # Braces used to preserve $1 et al. - { - my ($pkg,$func) = $autoload =~ /(.*)::([^:]+)$/; - $pkg =~ s|::|/|g; - if (defined($name=$INC{"$pkg.pm"})) - { - $name =~ s|^(.*)$pkg\.pm\z|$1auto/$pkg/$func.al|s; - $name = undef unless (-r $name); - } - unless (defined $name) - { - $name = "auto/$autoload.al"; - $name =~ s|::|/|g; - } - } - my $save = $@; - eval {local $SIG{__DIE__};require $name}; - if ($@) { - if (substr($autoload,-9) eq '::DESTROY') { - *$autoload = sub {}; - $ok = 1; - } else { - if ($name =~ s{(\w{12,})\.al\z}{substr($1,0,11).".al"}e){ - eval {local $SIG{__DIE__};require $name}; - } - if ($@){ - $@ =~ s/ at .*\n//; - Carp::croak $@; - } else { - $ok = 1; - } - } - } else { - - $ok = 1; - - } - $@ = $save; -# my $lm = Carp::longmess(); -# warn "ok[$ok] autoload[$autoload] longmess[$lm]"; # debug - return $ok; -} - -#### autoloader is experimental -#### to try it we have to set $Try_autoload and uncomment -#### the use statement and uncomment the __END__ below -#### You also need AutoSplit 1.01 available. MakeMaker will -#### then build CPAN with all the AutoLoad stuff. -# use AutoLoader; -# $Try_autoload = 1; - -if ($CPAN::Try_autoload) { - my $p; - for $p (qw( - CPAN::Author CPAN::Bundle CPAN::CacheMgr CPAN::Complete - CPAN::Config CPAN::Debug CPAN::Distribution CPAN::FTP - CPAN::FTP::netrc CPAN::Index CPAN::InfoObj CPAN::Module - )) { - *{"$p\::AUTOLOAD"} = \&AutoLoader::AUTOLOAD; - } -} - package CPAN::Tarzip; use vars qw($AUTOLOAD @ISA); @CPAN::Tarzip::ISA = qw(CPAN::Debug); @@ -402,70 +331,81 @@ package CPAN::Queue; use vars qw{ @All }; +# CPAN::Queue::new ; sub new { - my($class,$mod) = @_; - my $self = bless {mod => $mod}, $class; + my($class,$s) = @_; + my $self = bless { qmod => $s }, $class; push @All, $self; - # my @all = map { $_->{mod} } @All; - # warn "Adding Queue object for mod[$mod] all[@all]"; return $self; } +# CPAN::Queue::first ; sub first { my $obj = $All[0]; - $obj->{mod}; + $obj->{qmod}; } +# CPAN::Queue::delete_first ; sub delete_first { my($class,$what) = @_; my $i; for my $i (0..$#All) { - if ( $All[$i]->{mod} eq $what ) { + if ( $All[$i]->{qmod} eq $what ) { splice @All, $i, 1; return; } } } +# CPAN::Queue::jumpqueue ; sub jumpqueue { - my $class = shift; - my @what = @_; - my $obj; + my $class = shift; + my @what = @_; + CPAN->debug(sprintf("before jumpqueue All[%s] what[%s]", + join(",",map {$_->{qmod}} @All), + join(",",@what) + )) if $CPAN::DEBUG; WHAT: for my $what (reverse @what) { - my $jumped = 0; - for (my $i=0; $i<$#All;$i++) { #prevent deep recursion - if ($All[$i]->{mod} eq $what){ - $jumped++; - if ($jumped > 100) { # one's OK if e.g. just processing now; - # more are OK if user typed it several - # times - $CPAN::Frontend->mywarn( + my $jumped = 0; + for (my $i=0; $i<$#All;$i++) { #prevent deep recursion + CPAN->debug("i[$All[$i]]what[$what]") if $CPAN::DEBUG; + if ($All[$i]->{qmod} eq $what){ + $jumped++; + if ($jumped > 100) { # one's OK if e.g. just + # processing now; more are OK if + # user typed it several times + $CPAN::Frontend->mywarn( qq{Object [$what] queued more than 100 times, ignoring} ); - next WHAT; - } - } + next WHAT; + } + } + } + my $obj = bless { qmod => $what }, $class; + unshift @All, $obj; } - my $obj = bless { mod => $what }, $class; - unshift @All, $obj; - } + CPAN->debug(sprintf("after jumpqueue All[%s] what[%s]", + join(",",map {$_->{qmod}} @All), + join(",",@what) + )) if $CPAN::DEBUG; } +# CPAN::Queue::exists ; sub exists { my($self,$what) = @_; - my @all = map { $_->{mod} } @All; - my $exists = grep { $_->{mod} eq $what } @All; - # warn "Checking exists in Queue object for mod[$what] all[@all] exists[$exists]"; + my @all = map { $_->{qmod} } @All; + my $exists = grep { $_->{qmod} eq $what } @All; + # warn "in exists what[$what] all[@all] exists[$exists]"; $exists; } +# CPAN::Queue::delete ; sub delete { my($self,$mod) = @_; - @All = grep { $_->{mod} ne $mod } @All; - # my @all = map { $_->{mod} } @All; - # warn "Deleting Queue object for mod[$mod] all[@all]"; + @All = grep { $_->{qmod} ne $mod } @All; } +# CPAN::Queue::nullify_queue ; sub nullify_queue { @All = (); } @@ -476,44 +416,31 @@ package CPAN; $META ||= CPAN->new; # In case we re-eval ourselves we need the || -1; +# from here on only subs. +################################################################################ -# __END__ # uncomment this and AutoSplit version 1.01 will split it - -#-> sub CPAN::autobundle ; -sub autobundle; -#-> sub CPAN::bundle ; -sub bundle; -#-> sub CPAN::expand ; -sub expand; -#-> sub CPAN::force ; -sub force; -#-> sub CPAN::install ; -sub install; -#-> sub CPAN::make ; -sub make; -#-> sub CPAN::clean ; -sub clean; -#-> sub CPAN::test ; -sub test; - -#-> sub CPAN::all ; +#-> sub CPAN::all_objects ; sub all_objects { my($mgr,$class) = @_; CPAN::Config->load unless $CPAN::Config_loaded++; CPAN->debug("mgr[$mgr] class[$class]") if $CPAN::DEBUG; CPAN::Index->reload; - values %{ $META->{$class} }; + values %{ $META->{readwrite}{$class} }; # unsafe meta access, ok } *all = \&all_objects; -# Called by shell, not in batch mode. Not clean XXX +# Called by shell, not in batch mode. In batch mode I see no risk in +# having many processes updating something as installations are +# continually checked at runtime. In shell mode I suspect it is +# unintentional to open more than one shell at a time + #-> sub CPAN::checklock ; sub checklock { my($self) = @_; my $lockfile = MM->catfile($CPAN::Config->{cpan_home},".lock"); if (-f $lockfile && -M _ > 0) { - my $fh = FileHandle->new($lockfile); + my $fh = FileHandle->new($lockfile) or + $CPAN::Frontend->mydie("Could not open $lockfile: $!"); my $other = <$fh>; $fh->close; if (defined $other && $other) { @@ -545,7 +472,11 @@ You may want to kill it and delete the lockfile, maybe. On UNIX try: qq{ and then rerun us.\n} ); } - } + } else { + $CPAN::Frontend->mydie(sprintf("CPAN.pm panic: Lockfile $lockfile ". + "reports other process with ID ". + "$other. Cannot proceed.\n")); + } } my $dotcpan = $CPAN::Config->{cpan_home}; eval { File::Path::mkpath($dotcpan);}; @@ -610,11 +541,11 @@ or $fh->print($$, "\n"); $self->{LOCK} = $lockfile; $fh->close; - $SIG{'TERM'} = sub { + $SIG{TERM} = sub { &cleanup; $CPAN::Frontend->mydie("Got SIGTERM, leaving"); }; - $SIG{'INT'} = sub { + $SIG{INT} = sub { # no blocks!!! &cleanup if $Signal; $CPAN::Frontend->mydie("Got another SIGINT") if $Signal; @@ -642,7 +573,8 @@ or # # Larry - $SIG{'__DIE__'} = \&cleanup; + # global backstop to cleanup if we should really die + $SIG{__DIE__} = \&cleanup; $self->debug("Signal handler set.") if $CPAN::DEBUG; } @@ -663,13 +595,51 @@ sub exists { CPAN::Index->reload; ### Carp::croak "exists called without class argument" unless $class; $id ||= ""; - exists $META->{$class}{$id}; + exists $META->{readonly}{$class}{$id} or + exists $META->{readwrite}{$class}{$id}; # unsafe meta access, ok } #-> sub CPAN::delete ; sub delete { my($mgr,$class,$id) = @_; - delete $META->{$class}{$id}; + delete $META->{readonly}{$class}{$id}; # unsafe meta access, ok + delete $META->{readwrite}{$class}{$id}; # unsafe meta access, ok +} + +#-> sub CPAN::has_usable +# has_inst is sometimes too optimistic, we should replace it with this +# has_usable whenever a case is given +sub has_usable { + my($self,$mod,$message) = @_; + return 1 if $HAS_USABLE->{$mod}; + my $has_inst = $self->has_inst($mod,$message); + return unless $has_inst; + my $usable; + $usable = { + LWP => [ # we frequently had "Can't locate object + # method "new" via package "LWP::UserAgent" at + # (eval 69) line 2006 + sub {require LWP}, + sub {require LWP::UserAgent}, + sub {require HTTP::Request}, + sub {require URI::URL}, + ], + Net::FTP => [ + sub {require Net::FTP}, + sub {require Net::Config}, + ] + }; + if ($usable->{$mod}) { + for my $c (0..$#{$usable->{$mod}}) { + my $code = $usable->{$mod}[$c]; + my $ret = eval { &$code() }; + if ($@) { + warn "DEBUG: c[$c]\$\@[$@]ret[$ret]"; + return; + } + } + } + return $HAS_USABLE->{$mod} = 1; } #-> sub CPAN::has_inst @@ -677,11 +647,14 @@ sub has_inst { my($self,$mod,$message) = @_; Carp::croak("CPAN->has_inst() called without an argument") unless defined $mod; - if (defined $message && $message eq "no") { - $Dontload{$mod}||=1; - return 0; - } elsif (exists $Dontload{$mod}) { - return 0; + if (defined $message && $message eq "no" + || + exists $CPAN::META->{dontload_hash}{$mod} # unsafe meta access, ok + || + exists $CPAN::Config->{dontload_hash}{$mod} + ) { + $CPAN::META->{dontload_hash}{$mod}||=1; # unsafe meta access, ok + return 0; } my $file = $mod; my $obj; @@ -707,13 +680,13 @@ sub has_inst { } return 1; } elsif ($mod eq "Net::FTP") { - warn qq{ + $CPAN::Frontend->mywarn(qq{ Please, install Net::FTP as soon as possible. CPAN.pm installs it for you if you just type install Bundle::libnet -}; - sleep 2; +}) unless $Have_warned->{"Net::FTP"}++; + sleep 3; } elsif ($mod eq "MD5"){ $CPAN::Frontend->myprint(qq{ CPAN: MD5 security checks disabled because MD5 not installed. @@ -732,7 +705,9 @@ sub instance { my($mgr,$class,$id) = @_; CPAN::Index->reload; $id ||= ""; - $META->{$class}{$id} ||= $class->new(ID => $id ); + # unsafe meta access, ok? + return $META->{readwrite}{$class}{$id} if exists $META->{readwrite}{$class}{$id}; + $META->{readwrite}{$class}{$id} ||= $class->new(ID => $id); } #-> sub CPAN::new ; @@ -760,9 +735,9 @@ sub cleanup { } } return if $ineval && !$End; - return unless defined $META->{'LOCK'}; - return unless -f $META->{'LOCK'}; - unlink $META->{'LOCK'}; + return unless defined $META->{LOCK}; # unsafe meta access, ok + return unless -f $META->{LOCK}; # unsafe meta access, ok + unlink $META->{LOCK}; # unsafe meta access, ok # require Carp; # Carp::cluck("DEBUGGING"); $CPAN::Frontend->mywarn("Lockfile removed.\n"); @@ -785,6 +760,7 @@ sub cachesize { shift->{DU}; } +#-> sub CPAN::CacheMgr::tidyup ; sub tidyup { my($self) = @_; return unless -d $self->{ID}; @@ -942,49 +918,85 @@ sub debug { package CPAN::Config; #-> sub CPAN::Config::edit ; +# returns true on successful action sub edit { - my($class,@args) = @_; + my($self,@args) = @_; return unless @args; - CPAN->debug("class[$class]args[".join(" | ",@args)."]"); + CPAN->debug("self[$self]args[".join(" | ",@args)."]"); my($o,$str,$func,$args,$key_exists); $o = shift @args; if($can{$o}) { - $class->$o(@args); + $self->$o(@args); return 1; } else { - if (ref($CPAN::Config->{$o}) eq ARRAY) { + CPAN->debug("o[$o]") if $CPAN::DEBUG; + if ($o =~ /list$/) { $func = shift @args; $func ||= ""; + CPAN->debug("func[$func]") if $CPAN::DEBUG; + my $changed; # Let's avoid eval, it's easier to comprehend without. if ($func eq "push") { push @{$CPAN::Config->{$o}}, @args; + $changed = 1; } elsif ($func eq "pop") { pop @{$CPAN::Config->{$o}}; + $changed = 1; } elsif ($func eq "shift") { shift @{$CPAN::Config->{$o}}; + $changed = 1; } elsif ($func eq "unshift") { unshift @{$CPAN::Config->{$o}}, @args; + $changed = 1; } elsif ($func eq "splice") { splice @{$CPAN::Config->{$o}}, @args; + $changed = 1; } elsif (@args) { $CPAN::Config->{$o} = [@args]; + $changed = 1; } else { - $CPAN::Frontend->myprint( - join "", - " $o ", - ExtUtils::MakeMaker::neatvalue($CPAN::Config->{$o}), - "\n" - ); + $self->prettyprint($o); } + if ($o eq "urllist" && $changed) { + # reset the cached values + undef $CPAN::FTP::Thesite; + undef $CPAN::FTP::Themethod; + } + return $changed; } else { $CPAN::Config->{$o} = $args[0] if defined $args[0]; - $CPAN::Frontend->myprint(" $o " . - (defined $CPAN::Config->{$o} ? - $CPAN::Config->{$o} : "UNDEFINED")); + $self->prettyprint($o); } } } +sub prettyprint { + my($self,$k) = @_; + my $v = $CPAN::Config->{$k}; + if (ref $v) { + my(@report) = ref $v eq "ARRAY" ? + @$v : + map { sprintf(" %-18s => %s\n", + $_, + defined $v->{$_} ? $v->{$_} : "UNDEFINED" + )} keys %$v; + $CPAN::Frontend->myprint( + join( + "", + sprintf( + " %-18s\n", + $k + ), + map {"\t$_\n"} @report + ) + ); + } elsif (defined $v) { + $CPAN::Frontend->myprint(sprintf " %-18s %s\n", $k, $v); + } else { + $CPAN::Frontend->myprint(sprintf " %-18s %s\n", $k, "UNDEFINED"); + } +} + #-> sub CPAN::Config::commit ; sub commit { my($self,$configpm) = @_; @@ -1005,7 +1017,8 @@ Please specify a filename where to save the configuration or try } } - my $msg = <<EOF unless $configpm =~ /MyConfig/; + my $msg; + $msg = <<EOF unless $configpm =~ /MyConfig/; # This is CPAN.pm's systemwide configuration file. This file provides # defaults for users, and the values can be changed in a per-user @@ -1016,7 +1029,8 @@ EOF $msg ||= "\n"; my($fh) = FileHandle->new; rename $configpm, "$configpm~" if -f $configpm; - open $fh, ">$configpm" or warn "Couldn't open >$configpm: $!"; + open $fh, ">$configpm" or + $CPAN::Frontend->mywarn("Couldn't open >$configpm: $!"); $fh->print(qq[$msg\$CPAN::Config = \{\n]); foreach (sort keys %$CPAN::Config) { $fh->print( @@ -1069,8 +1083,8 @@ sub load { # system wide settings shift @INC; } - return unless @miss = $self->not_loaded; - # XXX better check for arrayrefs too + return unless @miss = $self->missing_config_data; + require CPAN::FirstTime; my($configpm,$fh,$redo,$theycalled); $redo ||= ""; @@ -1137,15 +1151,18 @@ $configpm initialized. CPAN::FirstTime::init($configpm); } -#-> sub CPAN::Config::not_loaded ; -sub not_loaded { +#-> sub CPAN::Config::missing_config_data ; +sub missing_config_data { my(@miss); - for (qw( - cpan_home keep_source_where build_dir build_cache scan_cache - index_expire gzip tar unzip make pager makepl_arg make_arg - make_install_arg urllist inhibit_startup_message - ftp_proxy http_proxy no_proxy prerequisites_policy - )) { + for ( + "cpan_home", "keep_source_where", "build_dir", "build_cache", + "scan_cache", "index_expire", "gzip", "tar", "unzip", "make", + "pager", + "makepl_arg", "make_arg", "make_install_arg", "urllist", + "inhibit_startup_message", "ftp_proxy", "http_proxy", "no_proxy", + "prerequisites_policy", + "cache_metadata", + ) { push @miss, $_ unless defined $CPAN::Config->{$_}; } return @miss; @@ -1241,11 +1258,19 @@ Other *help = \&h; #-> sub CPAN::Shell::a ; -sub a { $CPAN::Frontend->myprint(shift->format_result('Author',@_));} -#-> sub CPAN::Shell::b ; -sub b { +sub a { + my($self,@arg) = @_; + # authors are always UPPERCASE + for (@arg) { + $_ = uc $_; + } + $CPAN::Frontend->myprint($self->format_result('Author',@arg)); +} + +#-> sub CPAN::Shell::local_bundles ; + +sub local_bundles { my($self,@which) = @_; - CPAN->debug("which[@which]") if $CPAN::DEBUG; my($incdir,$bdir,$dh); foreach $incdir ($CPAN::Config->{'cpan_home'},@INC) { $bdir = MM->catdir($incdir,"Bundle"); @@ -1253,15 +1278,24 @@ sub b { my($entry); for $entry ($dh->read) { next if -d MM->catdir($bdir,$entry); - next unless $entry =~ s/\.pm\z//; + next unless $entry =~ s/\.pm(?!\n)\Z//; $CPAN::META->instance('CPAN::Bundle',"Bundle::$entry"); } } } +} + +#-> sub CPAN::Shell::b ; +sub b { + my($self,@which) = @_; + CPAN->debug("which[@which]") if $CPAN::DEBUG; + $self->local_bundles; $CPAN::Frontend->myprint($self->format_result('Bundle',@which)); } + #-> sub CPAN::Shell::d ; sub d { $CPAN::Frontend->myprint(shift->format_result('Distribution',@_));} + #-> sub CPAN::Shell::m ; sub m { # emacs confused here }; sub mimimimimi { # emacs in sync here $CPAN::Frontend->myprint(shift->format_result('Module',@_)); @@ -1286,13 +1320,16 @@ sub i { } #-> sub CPAN::Shell::o ; + +# CPAN::Shell::o and CPAN::Config::edit are closely related. 'o conf' +# should have been called set and 'o debug' maybe 'set debug' sub o { my($self,$o_type,@o_what) = @_; $o_type ||= ""; CPAN->debug("o_type[$o_type] o_what[".join(" | ",@o_what)."]\n"); if ($o_type eq 'conf') { shift @o_what if @o_what && $o_what[0] eq 'help'; - if (!@o_what) { + if (!@o_what) { # print all things, "o conf" my($k,$v); $CPAN::Frontend->myprint("CPAN::Config options"); if (exists $INC{'CPAN/Config.pm'}) { @@ -1308,25 +1345,12 @@ sub o { } $CPAN::Frontend->myprint("\n"); for $k (sort keys %$CPAN::Config) { - $v = $CPAN::Config->{$k}; - if (ref $v) { - $CPAN::Frontend->myprint( - join( - "", - sprintf( - " %-18s\n", - $k - ), - map {"\t$_\n"} @{$v} - ) - ); - } else { - $CPAN::Frontend->myprint(sprintf " %-18s %s\n", $k, $v); - } + CPAN::Config->prettyprint($k); } $CPAN::Frontend->myprint("\n"); } elsif (!CPAN::Config->edit(@o_what)) { - $CPAN::Frontend->myprint(qq[Type 'o conf' to view configuration edit options\n\n]); + $CPAN::Frontend->myprint(qq{Type 'o conf' to view configuration }. + qq{edit options\n\n}); } } elsif ($o_type eq 'debug') { my(%valid); @@ -1369,7 +1393,8 @@ sub o { my($k,$v); for $k (sort {$CPAN::DEBUG{$a} <=> $CPAN::DEBUG{$b}} keys %CPAN::DEBUG) { $v = $CPAN::DEBUG{$k}; - $CPAN::Frontend->myprint(sprintf " %-14s(%s)\n", $k, $v) if $v & $CPAN::DEBUG; + $CPAN::Frontend->myprint(sprintf " %-14s(%s)\n", $k, $v) + if $v & $CPAN::DEBUG; } } else { $CPAN::Frontend->myprint("Debugging turned off completely.\n"); @@ -1383,10 +1408,10 @@ Known options: } } -sub dotdot_onreload { +sub paintdots_onreload { my($ref) = shift; sub { - if ( $_[0] =~ /Subroutine (\w+) redefined/ ) { + if ( $_[0] =~ /[Ss]ubroutine (\w+) redefined/ ) { my($subr) = $1; ++$$ref; local($|) = 1; @@ -1407,8 +1432,8 @@ sub reload { CPAN->debug("reloading the whole CPAN.pm") if $CPAN::DEBUG; my $fh = FileHandle->new($INC{'CPAN.pm'}); local($/); - $redef = 0; - local($SIG{__WARN__}) = dotdot_onreload(\$redef); + my $redef = 0; + local($SIG{__WARN__}) = paintdots_onreload(\$redef); eval <$fh>; warn $@ if $@; $CPAN::Frontend->myprint("\n$redef subroutines redefined\n"); @@ -1424,12 +1449,12 @@ index re-reads the index files\n}); sub _binary_extensions { my($self) = shift @_; my(@result,$module,%seen,%need,$headerdone); - my $isaperl = q{perl5[._-]\\d{3}(_[0-4][0-9])?\\.tar[._-]gz\z}; for $module ($self->expand('Module','/./')) { my $file = $module->cpan_file; next if $file eq "N/A"; next if $file =~ /^Contact Author/; - next if $file =~ / $isaperl /xo; + my $dist = $CPAN::META->instance('CPAN::Distribution',$file); + next if $dist->isa_perl; next unless $module->xs_file; local($|) = 1; $CPAN::Frontend->myprint("."); @@ -1467,15 +1492,21 @@ sub _u_r_common { my($self) = shift @_; my($what) = shift @_; CPAN->debug("self[$self] what[$what] args[@_]") if $CPAN::DEBUG; - Carp::croak "Usage: \$obj->_u_r_common($what)" unless defined $what; - Carp::croak "Usage: \$obj->_u_r_common(a|r|u)" unless $what =~ /^[aru]$/; + Carp::croak "Usage: \$obj->_u_r_common(a|r|u)" unless + $what && $what =~ /^[aru]$/; my(@args) = @_; @args = '/./' unless @args; my(@result,$module,%seen,%need,$headerdone, $version_undefs,$version_zeroes); $version_undefs = $version_zeroes = 0; my $sprintf = "%-25s %9s %9s %s\n"; - for $module ($self->expand('Module',@args)) { + my @expand = $self->expand('Module',@args); + my $expand = scalar @expand; + if (0) { # Looks like noise to me, was very useful for debugging + # for metadata cache + $CPAN::Frontend->myprint(sprintf "%d matches in the database\n", $expand); + } + for $module (@expand) { my $file = $module->cpan_file; next unless defined $file; # ?? my($latest) = $module->cpan_version; @@ -1493,7 +1524,7 @@ sub _u_r_common { } elsif ($have == 0){ $version_zeroes++; } - next if $have >= $latest; + next unless CPAN::Version->vgt($latest, $have); # to be pedantic we should probably say: # && !($have eq "undef" && $latest ne "undef" && $latest gt ""); # to catch the case where CPAN has a version 0 and we have a version undef @@ -1531,9 +1562,11 @@ sub _u_r_common { "in CPAN file" )); } - $latest = substr($latest,0,8) if length($latest) > 8; - $have = substr($have,0,8) if length($have) > 8; - $CPAN::Frontend->myprint(sprintf $sprintf, $module->id, $have, $latest, $file); + $CPAN::Frontend->myprint(sprintf $sprintf, + $module->id, + $have, + $latest, + $file); $need{$module->id}++; } unless (%need) { @@ -1615,47 +1648,80 @@ sub autobundle { $to\n\n"); } +#-> sub CPAN::Shell::expandany ; +sub expandany { + my($self,$s) = @_; + CPAN->debug("s[$s]") if $CPAN::DEBUG; + if ($s =~ m|/|) { # looks like a file + return $CPAN::META->instance('CPAN::Distribution',$s); + # Distributions spring into existence, not expand + } elsif ($s =~ m|^Bundle::|) { + $self->local_bundles; # scanning so late for bundles seems + # both attractive and crumpy: always + # current state but easy to forget + # somewhere + return $self->expand('Bundle',$s); + } else { + return $self->expand('Module',$s) + if $CPAN::META->exists('CPAN::Module',$s); + } + return; +} + #-> sub CPAN::Shell::expand ; sub expand { shift; my($type,@args) = @_; my($arg,@m); for $arg (@args) { - my $regex; + my($regex,$command); if ($arg =~ m|^/(.*)/$|) { $regex = $1; - } + } elsif ($arg =~ m/^=/) { + $command = substr($arg,1); + } my $class = "CPAN::$type"; my $obj; if (defined $regex) { - for $obj ( - sort - {$a->id cmp $b->id} - $CPAN::META->all_objects($class) - ) { - unless ($obj->id){ - # BUG, we got an empty object somewhere - CPAN->debug(sprintf( - "Empty id on obj[%s]%%[%s]", - $obj, - join(":", %$obj) - )) if $CPAN::DEBUG; - next; - } - push @m, $obj - if $obj->id =~ /$regex/i - or - ( - ( - $] < 5.00303 ### provide sort of - ### compatibility with 5.003 - || - $obj->can('name') - ) - && - $obj->name =~ /$regex/i - ); - } + for $obj ( + sort + {$a->id cmp $b->id} + $CPAN::META->all_objects($class) + ) { + unless ($obj->id){ + # BUG, we got an empty object somewhere + CPAN->debug(sprintf( + "Empty id on obj[%s]%%[%s]", + $obj, + join(":", %$obj) + )) if $CPAN::DEBUG; + next; + } + push @m, $obj + if $obj->id =~ /$regex/i + or + ( + ( + $] < 5.00303 ### provide sort of + ### compatibility with 5.003 + || + $obj->can('name') + ) + && + $obj->name =~ /$regex/i + ); + } + } elsif ($command) { + die "leading equal sign in command disabled, ". + "please edit CPAN.pm to enable eval() or ". + "do not use = on argument list"; + for my $self ( + sort + {$a->id cmp $b->id} + $CPAN::META->all_objects($class) + ) { + push @m, $self if eval $command; + } } else { my($xarg) = $arg; if ( $type eq 'Bundle' ) { @@ -1770,42 +1836,41 @@ sub rematein { } setup_output(); CPAN->debug("pragma[$pragma]meth[$meth] some[@some]") if $CPAN::DEBUG; - my($s,@s); + + # Here is the place to set "test_count" on all involved parties to + # 0. We then can pass this counter on to the involved + # distributions and those can refuse to test if test_count > X. In + # the first stab at it we could use a 1 for "X". + + # But when do I reset the distributions to start with 0 again? + # Jost suggested to have a random or cycling interaction ID that + # we pass through. But the ID is something that is just left lying + # around in addition to the counter, so I'd prefer to set the + # counter to 0 now, and repeat at the end of the loop. But what + # about dependencies? They appear later and are not reset, they + # enter the queue but not its copy. How do they get a sensible + # test_count? + + # construct the queue + my($s,@s,@qcopy); foreach $s (@some) { - CPAN::Queue->new($s); - } - while ($s = CPAN::Queue->first) { my $obj; if (ref $s) { + CPAN->debug("s is an object[$s]") if $CPAN::DEBUG; $obj = $s; - } elsif ($s =~ m|/|) { # looks like a file - $obj = $CPAN::META->instance('CPAN::Distribution',$s); - } elsif ($s =~ m|^Bundle::|) { - $obj = $CPAN::META->instance('CPAN::Bundle',$s); + } elsif ($s =~ m|^/|) { # looks like a regexp + $CPAN::Frontend->mywarn("Sorry, $meth with a regular expression is ". + "not supported\n"); + sleep 2; + next; } else { - $obj = $CPAN::META->instance('CPAN::Module',$s) - if $CPAN::META->exists('CPAN::Module',$s); + CPAN->debug("calling expandany [$s]") if $CPAN::DEBUG; + $obj = CPAN::Shell->expandany($s); } if (ref $obj) { - CPAN->debug( - qq{pragma[$pragma]meth[$meth]obj[$obj]as_string\[}. - $obj->as_string. - qq{\]} - ) if $CPAN::DEBUG; - $obj->$pragma() - if - $pragma - && - ($] < 5.00303 || $obj->can($pragma)); ### - ### compatibility - ### with - ### 5.003 - if ($]>=5.00303 && $obj->can('called_for')) { - $obj->called_for($s); - } - CPAN::Queue->delete($s) if $obj->$meth(); # if it is more - # than once in - # the queue + $obj->color_cmd_tmps(0,1); + CPAN::Queue->new($s); + push @qcopy, $obj; } elsif ($CPAN::META->exists('CPAN::Author',$s)) { $obj = $CPAN::META->instance('CPAN::Author',$s); $CPAN::Frontend->myprint( @@ -1814,6 +1879,7 @@ sub rematein { $obj->fullname, " ;-)\n" ); + sleep 2; } else { $CPAN::Frontend ->myprint(qq{Warning: Cannot $meth $s, }. @@ -1822,13 +1888,55 @@ Try the command i /$s/ -to find objects with similar identifiers. +to find objects with matching identifiers. }); + sleep 2; } + } + + # queuerunner (please be warned: when I started to change the + # queue to hold objects instead of names, I made one or two + # mistakes and never found which. I reverted back instead) + while ($s = CPAN::Queue->first) { + my $obj; + if (ref $s) { + $obj = $s; # I do not believe, we would survive if this happened + } else { + $obj = CPAN::Shell->expandany($s); + } + if ($pragma + && + ($] < 5.00303 || $obj->can($pragma))){ + ### compatibility with 5.003 + $obj->$pragma($meth); # the pragma "force" in + # "CPAN::Distribution" must know + # what we are intending + } + if ($]>=5.00303 && $obj->can('called_for')) { + $obj->called_for($s); + } + CPAN->debug( + qq{pragma[$pragma]meth[$meth]obj[$obj]as_string\[}. + $obj->as_string. + qq{\]} + ) if $CPAN::DEBUG; + + if ($obj->$meth()){ + CPAN::Queue->delete($s); + } else { + CPAN->debug("failed"); + } + + $obj->undelay; CPAN::Queue->delete_first($s); } + for my $obj (@qcopy) { + $obj->color_cmd_tmps(0,0); + } } +#-> sub CPAN::Shell::dump ; +sub dump { shift->rematein('dump',@_); } #-> sub CPAN::Shell::force ; sub force { shift->rematein('force',@_); } #-> sub CPAN::Shell::get ; @@ -1860,7 +1968,7 @@ sub ftp_get { my $ftp = Net::FTP->new($host); return 0 unless defined $ftp; $ftp->debug(1) if $CPAN::DEBUG{'FTP'} & $CPAN::DEBUG; - $class->debug(qq[Going to ->login("anonymous","$Config::Config{'cf_email'}")\n]); + $class->debug(qq[Going to login("anonymous","$Config::Config{cf_email}")]); unless ( $ftp->login("anonymous",$Config::Config{'cf_email'}) ){ warn "Couldn't login on $host"; return; @@ -1881,61 +1989,33 @@ sub ftp_get { # If more accuracy is wanted/needed, Chris Leach sent me this patch... - # leach,> *** /install/perl/live/lib/CPAN.pm- Wed Sep 24 13:08:48 1997 - # leach,> --- /tmp/cp Wed Sep 24 13:26:40 1997 - # leach,> *************** - # leach,> *** 1562,1567 **** - # leach,> --- 1562,1580 ---- - # leach,> return 1 if substr($url,0,4) eq "file"; - # leach,> return 1 unless $url =~ m|://([^/]+)|; - # leach,> my $host = $1; - # leach,> + my $proxy = $CPAN::Config->{'http_proxy'} || $ENV{'http_proxy'}; - # leach,> + if ($proxy) { - # leach,> + $proxy =~ m|://([^/:]+)|; - # leach,> + $proxy = $1; - # leach,> + my $noproxy = $CPAN::Config->{'no_proxy'} || $ENV{'no_proxy'}; - # leach,> + if ($noproxy) { - # leach,> + if ($host !~ /$noproxy$/) { - # leach,> + $host = $proxy; - # leach,> + } - # leach,> + } else { - # leach,> + $host = $proxy; - # leach,> + } - # leach,> + } - # leach,> require Net::Ping; - # leach,> return 1 unless $Net::Ping::VERSION >= 2; - # leach,> my $p; - - -# this is quite optimistic and returns one on several occasions where -# inappropriate. But this does no harm. It would do harm if we were -# too pessimistic (as I was before the http_proxy -sub is_reachable { - my($self,$url) = @_; - return 1; # we can't simply roll our own, firewalls may break ping - return 0 unless $url; - return 1 if substr($url,0,4) eq "file"; - return 1 unless $url =~ m|^(\w+)://([^/]+)|; - my $proxytype = $1 . "_proxy"; # ftp_proxy or http_proxy - my $host = $2; - return 1 if $CPAN::Config->{$proxytype} || $ENV{$proxytype}; - require Net::Ping; - return 1 unless $Net::Ping::VERSION >= 2; - my $p; - # 1.3101 had it different: only if the first eval raised an - # exception we tried it with TCP. Now we are happy if icmp wins - # the order and return, we don't even check for $@. Thanks to - # thayer@uis.edu for the suggestion. - eval {$p = Net::Ping->new("icmp");}; - return 1 if $p && ref($p) && $p->ping($host, 10); - eval {$p = Net::Ping->new("tcp");}; - $CPAN::Frontend->mydie($@) if $@; - return $p->ping($host, 10); -} + # > *** /install/perl/live/lib/CPAN.pm- Wed Sep 24 13:08:48 1997 + # > --- /tmp/cp Wed Sep 24 13:26:40 1997 + # > *************** + # > *** 1562,1567 **** + # > --- 1562,1580 ---- + # > return 1 if substr($url,0,4) eq "file"; + # > return 1 unless $url =~ m|://([^/]+)|; + # > my $host = $1; + # > + my $proxy = $CPAN::Config->{'http_proxy'} || $ENV{'http_proxy'}; + # > + if ($proxy) { + # > + $proxy =~ m|://([^/:]+)|; + # > + $proxy = $1; + # > + my $noproxy = $CPAN::Config->{'no_proxy'} || $ENV{'no_proxy'}; + # > + if ($noproxy) { + # > + if ($host !~ /$noproxy$/) { + # > + $host = $proxy; + # > + } + # > + } else { + # > + $host = $proxy; + # > + } + # > + } + # > require Net::Ping; + # > return 1 unless $Net::Ping::VERSION >= 2; + # > my $p; + #-> sub CPAN::FTP::localize ; -# sorry for the ugly code here, I'll clean it up as soon as Net::FTP -# is in the core sub localize { my($self,$file,$aslocal,$force) = @_; $force ||= 0; @@ -1945,9 +2025,19 @@ sub localize { if $CPAN::DEBUG; if ($^O eq 'MacOS') { + # Comment by AK on 2000-09-03: Uniq short filenames would be + # available in CHECKSUMS file my($name, $path) = File::Basename::fileparse($aslocal, ''); if (length($name) > 31) { - $name =~ s/(\.(readme(\.(gz|Z))?|(tar\.)?(gz|Z)|tgz|zip|pm\.(gz|Z)))$//; + $name =~ s/( + \.( + readme(\.(gz|Z))? | + (tar\.)?(gz|Z) | + tgz | + zip | + pm\.(gz|Z) + ) + )$//x; my $suf = $1; my $size = 31 - length($suf); while (length($name) > $size) { @@ -1973,19 +2063,22 @@ sub localize { to insufficient permissions.\n}) unless -w $aslocal_dir; # Inheritance is not easier to manage than a few if/else branches - if ($CPAN::META->has_inst('LWP::UserAgent')) { - require LWP::UserAgent; + if ($CPAN::META->has_usable('LWP::UserAgent')) { unless ($Ua) { $Ua = LWP::UserAgent->new; my($var); $Ua->proxy('ftp', $var) - if $var = $CPAN::Config->{'ftp_proxy'} || $ENV{'ftp_proxy'}; + if $var = $CPAN::Config->{ftp_proxy} || $ENV{ftp_proxy}; $Ua->proxy('http', $var) - if $var = $CPAN::Config->{'http_proxy'} || $ENV{'http_proxy'}; + if $var = $CPAN::Config->{http_proxy} || $ENV{http_proxy}; $Ua->no_proxy($var) - if $var = $CPAN::Config->{'no_proxy'} || $ENV{'no_proxy'}; + if $var = $CPAN::Config->{no_proxy} || $ENV{no_proxy}; } } + $ENV{ftp_proxy} = $CPAN::Config->{ftp_proxy} if $CPAN::Config->{ftp_proxy}; + $ENV{http_proxy} = $CPAN::Config->{http_proxy} + if $CPAN::Config->{http_proxy}; + $ENV{no_proxy} = $CPAN::Config->{no_proxy} if $CPAN::Config->{no_proxy}; # Try the list of urls for each single object. We keep a record # where we did get a file from @@ -2008,14 +2101,16 @@ sub localize { ($a == $Thesite) } 0..$last; } - my($level,@levels); + my(@levels); if ($Themethod) { @levels = ($Themethod, grep {$_ ne $Themethod} qw/easy hard hardest/); } else { @levels = qw/easy hard hardest/; } @levels = qw/easy/ if $^O eq 'MacOS'; - for $level (@levels) { + my($levelno); + for $levelno (0..$#levels) { + my $level = $levels[$levelno]; my $method = "host$level"; my @host_seq = $level eq "easy" ? @reordered : 0..$last; # reordered has CDROM up front @@ -2030,17 +2125,20 @@ sub localize { return $ret; } else { unlink $aslocal; + last if $CPAN::Signal; # need to cleanup } } - my(@mess); - push @mess, - qq{Please check, if the URLs I found in your configuration file \(}. - join(", ", @{$CPAN::Config->{urllist}}). - qq{\) are valid. The urllist can be edited.}, - qq{E.g. with ``o conf urllist push ftp://myurl/''}; - $CPAN::Frontend->myprint(Text::Wrap::wrap("","",@mess). "\n\n"); - sleep 2; - $CPAN::Frontend->myprint("Cannot fetch $file\n\n"); + unless ($CPAN::Signal) { + my(@mess); + push @mess, + qq{Please check, if the URLs I found in your configuration file \(}. + join(", ", @{$CPAN::Config->{urllist}}). + qq{\) are valid. The urllist can be edited.}, + qq{E.g. with 'o conf urllist push ftp://myurl/'}; + $CPAN::Frontend->myprint(Text::Wrap::wrap("","",@mess). "\n\n"); + sleep 2; + $CPAN::Frontend->myprint("Cannot fetch $file\n\n"); + } if ($restore) { rename "$aslocal.bak", $aslocal; $CPAN::Frontend->myprint("Trying to get away with old file:\n" . @@ -2054,19 +2152,13 @@ sub hosteasy { my($self,$host_seq,$file,$aslocal) = @_; my($i); HOSTEASY: for $i (@$host_seq) { - my $url = $CPAN::Config->{urllist}[$i] || $CPAN::Defaultsite; - unless ($self->is_reachable($url)) { - $CPAN::Frontend->myprint("Skipping $url (seems to be not reachable)\n"); - sleep 2; - next; - } + my $url = $CPAN::Config->{urllist}[$i] || $CPAN::Defaultsite; $url .= "/" unless substr($url,-1) eq "/"; $url .= $file; $self->debug("localizing perlish[$url]") if $CPAN::DEBUG; if ($url =~ /^file:/) { my $l; - if ($CPAN::META->has_inst('LWP')) { - require URI::URL; + if ($CPAN::META->has_inst('URI::URL')) { my $u = URI::URL->new($url); $l = $u->path; } else { # works only on Unix, is poorly constructed, but @@ -2095,7 +2187,7 @@ sub hosteasy { } } } - if ($CPAN::META->has_inst('LWP')) { + if ($CPAN::META->has_usable('LWP')) { $CPAN::Frontend->myprint("Fetching with LWP: $url "); @@ -2110,7 +2202,7 @@ sub hosteasy { utime $now, $now, $aslocal; # download time is more # important than upload time return $aslocal; - } elsif ($url !~ /\.gz\z/) { + } elsif ($url !~ /\.gz(?!\n)\Z/) { my $gzurl = "$url.gz"; $CPAN::Frontend->myprint("Fetching with LWP: $gzurl @@ -2121,22 +2213,20 @@ sub hosteasy { ) { $Thesite = $i; return $aslocal; - } else { - # next HOSTEASY ; } } else { - # Alan Burlison informed me that in firewall envs Net::FTP - # can still succeed where LWP fails. So we do not skip - # Net::FTP anymore when LWP is available. - # next HOSTEASY ; + # Alan Burlison informed me that in firewall environments + # Net::FTP can still succeed where LWP fails. So we do not + # skip Net::FTP anymore when LWP is available. } } else { $self->debug("LWP not installed") if $CPAN::DEBUG; } + return if $CPAN::Signal; if ($url =~ m|^ftp://(.*?)/(.*)/(.*)|) { # that's the nice and easy way thanks to Graham my($host,$dir,$getfile) = ($1,$2,$3); - if ($CPAN::META->has_inst('Net::FTP')) { + if ($CPAN::META->has_usable('Net::FTP')) { $dir =~ s|/+|/|g; $CPAN::Frontend->myprint("Fetching with Net::FTP: $url @@ -2147,7 +2237,7 @@ sub hosteasy { $Thesite = $i; return $aslocal; } - if ($aslocal !~ /\.gz\z/) { + if ($aslocal !~ /\.gz(?!\n)\Z/) { my $gz = "$aslocal.gz"; $CPAN::Frontend->myprint("Fetching with Net::FTP $url.gz @@ -2165,6 +2255,7 @@ sub hosteasy { # next HOSTEASY; } } + return if $CPAN::Signal; } } @@ -2182,10 +2273,6 @@ sub hosthard { File::Path::mkpath($aslocal_dir); HOSTHARD: for $i (@$host_seq) { my $url = $CPAN::Config->{urllist}[$i] || $CPAN::Defaultsite; - unless ($self->is_reachable($url)) { - $CPAN::Frontend->myprint("Skipping $url (not reachable)\n"); - next; - } $url .= "/" unless substr($url,-1) eq "/"; $url .= $file; my($proto,$host,$dir,$getfile); @@ -2199,6 +2286,8 @@ sub hosthard { } else { next HOSTHARD; # who said, we could ftp anything except ftp? } + next HOSTHARD if $proto eq "file"; # file URLs would have had + # success above. Likely a bogus URL $self->debug("localizing funkyftpwise[$url]") if $CPAN::DEBUG; my($f,$funkyftp); @@ -2207,83 +2296,78 @@ sub hosthard { $funkyftp = $CPAN::Config->{$f}; next unless defined $funkyftp; next if $funkyftp =~ /^\s*$/; - my($want_compressed); - my $aslocal_uncompressed; - ($aslocal_uncompressed = $aslocal) =~ s/\.gz//; - my($source_switch) = ""; + my($asl_ungz, $asl_gz); + ($asl_ungz = $aslocal) =~ s/\.gz//; + $asl_gz = "$asl_ungz.gz"; + my($src_switch) = ""; if ($f eq "lynx"){ - $source_switch = " -source"; + $src_switch = " -source"; } elsif ($f eq "ncftp"){ - $source_switch = " -c"; + $src_switch = " -c"; } my($chdir) = ""; - my($stdout_redir) = " > $aslocal_uncompressed"; + my($stdout_redir) = " > $asl_ungz"; if ($f eq "ncftpget"){ $chdir = "cd $aslocal_dir && "; $stdout_redir = ""; } $CPAN::Frontend->myprint( qq[ -Trying with "$funkyftp$source_switch" to get +Trying with "$funkyftp$src_switch" to get $url ]); my($system) = - "$chdir$funkyftp$source_switch '$url' $devnull$stdout_redir"; + "$chdir$funkyftp$src_switch '$url' $devnull$stdout_redir"; $self->debug("system[$system]") if $CPAN::DEBUG; my($wstatus); if (($wstatus = system($system)) == 0 && ($f eq "lynx" ? - -s $aslocal_uncompressed # lynx returns 0 on my - # system even if it fails + -s $asl_ungz # lynx returns 0 when it fails somewhere : 1 ) ) { if (-s $aslocal) { # Looks good - } elsif ($aslocal_uncompressed ne $aslocal) { + } elsif ($asl_ungz ne $aslocal) { # test gzip integrity - if ( - CPAN::Tarzip->gtest($aslocal_uncompressed) - ) { - rename $aslocal_uncompressed, $aslocal; + if (CPAN::Tarzip->gtest($asl_ungz)) { + # e.g. foo.tar is gzipped --> foo.tar.gz + rename $asl_ungz, $aslocal; } else { - CPAN::Tarzip->gzip($aslocal_uncompressed, - "$aslocal_uncompressed.gz"); + CPAN::Tarzip->gzip($asl_ungz,$asl_gz); } } $Thesite = $i; return $aslocal; - } elsif ($url !~ /\.gz\z/) { - unlink $aslocal_uncompressed if - -f $aslocal_uncompressed && -s _ == 0; + } elsif ($url !~ /\.gz(?!\n)\Z/) { + unlink $asl_ungz if + -f $asl_ungz && -s _ == 0; my $gz = "$aslocal.gz"; my $gzurl = "$url.gz"; $CPAN::Frontend->myprint( qq[ -Trying with "$funkyftp$source_switch" to get +Trying with "$funkyftp$src_switch" to get $url.gz ]); - my($system) = "$funkyftp$source_switch '$url.gz' $devnull > ". - "$aslocal_uncompressed.gz"; + my($system) = "$funkyftp$src_switch '$url.gz' $devnull > $asl_gz"; $self->debug("system[$system]") if $CPAN::DEBUG; my($wstatus); if (($wstatus = system($system)) == 0 && - -s "$aslocal_uncompressed.gz" + -s $asl_gz ) { # test gzip integrity - if (CPAN::Tarzip->gtest("$aslocal_uncompressed.gz")) { - CPAN::Tarzip->gunzip("$aslocal_uncompressed.gz", - $aslocal); + if (CPAN::Tarzip->gtest($asl_gz)) { + CPAN::Tarzip->gunzip($asl_gz,$aslocal); } else { - rename $aslocal_uncompressed, $aslocal; + # somebody uncompressed file for us? + rename $asl_ungz, $aslocal; } $Thesite = $i; return $aslocal; } else { - unlink "$aslocal_uncompressed.gz" if - -f "$aslocal_uncompressed.gz"; + unlink $asl_gz if -f $asl_gz; } } else { my $estatus = $wstatus >> 8; @@ -2295,8 +2379,9 @@ System call "$system" returned status $estatus (wstat $wstatus)$size }); } - } - } + return if $CPAN::Signal; + } # lynx,ncftpget,ncftp + } # host } sub hosthardest { @@ -2311,10 +2396,6 @@ sub hosthardest { last HOSTHARDEST; } my $url = $CPAN::Config->{urllist}[$i] || $CPAN::Defaultsite; - unless ($self->is_reachable($url)) { - $CPAN::Frontend->myprint("Skipping $url (not reachable)\n"); - next; - } $url .= "/" unless substr($url,-1) eq "/"; $url .= $file; $self->debug("localizing ftpwise[$url]") if $CPAN::DEBUG; @@ -2367,6 +2448,7 @@ sub hosthardest { } else { $CPAN::Frontend->myprint("Hmm... Still failed!\n"); } + return if $CPAN::Signal; } else { $CPAN::Frontend->mywarn(qq{Your $netrcfile is not }. qq{correctly protected.\n}); @@ -2396,9 +2478,10 @@ sub hosthardest { } else { $CPAN::Frontend->myprint("Bad luck... Still failed!\n"); } + return if $CPAN::Signal; $CPAN::Frontend->myprint("Can't access URL $url.\n\n"); sleep 2; - } + } # host } sub talk_ftp { @@ -2576,8 +2659,8 @@ sub cpl { @return = grep( /^$word/, sort qw( - ! a b d h i m o q r u autobundle clean - make test install force reload look cvs_import + ! a b d h i m o q r u autobundle clean dump + make test install force readme reload look cvs_import ) ); } elsif ( $line !~ /^[\!abcdhimorutl]/ ) { @@ -2588,7 +2671,9 @@ sub cpl { @return = cplx('CPAN::Bundle',$word); } elsif ($line =~ /^d\s/) { @return = cplx('CPAN::Distribution',$word); - } elsif ($line =~ /^([mru]|make|clean|test|install|readme|look|cvs_import)\s/ ) { + } elsif ($line =~ m/^( + [mru]|make|clean|dump|test|install|readme|look|cvs_import + )\s/x ) { @return = (cplx('CPAN::Module',$word),cplx('CPAN::Bundle',$word)); } elsif ($line =~ /^i\s/) { @return = cpl_any($word); @@ -2605,7 +2690,11 @@ sub cpl { #-> sub CPAN::Complete::cplx ; sub cplx { my($class, $word) = @_; - grep /^\Q$word\E/, map { $_->id } $CPAN::META->all_objects($class); + # I believed for many years that this was sorted, today I + # realized, it wasn't sorted anymore. Now (rev 1.301 / v 1.55) I + # make it sorted again. Maybe sort was dropped when GNU-readline + # support came in? The RCS file is difficult to read on that:-( + sort grep /^\Q$word\E/, map { $_->id } $CPAN::META->all_objects($class); } #-> sub CPAN::Complete::cpl_any ; @@ -2668,51 +2757,63 @@ sub reload { for ($CPAN::Config->{index_expire}) { $_ = 0.001 unless $_ && $_ > 0.001; } + $CPAN::META->{PROTOCOL} ||= "1.0"; + if ( $CPAN::META->{PROTOCOL} < PROTOCOL ) { + # warn "Setting last_time to 0"; + $last_time = 0; # No warning necessary + } return if $last_time + $CPAN::Config->{index_expire}*86400 > $time and ! $force; - ## IFF we are developing, it helps to wipe out the memory between - ## reloads, otherwise it is not what a user expects. - - ## undef $CPAN::META; # Neue Gruendlichkeit since v1.52(r1.274) - ## $CPAN::META = CPAN->new; - my($debug,$t2); + if (0) { + # IFF we are developing, it helps to wipe out the memory + # between reloads, otherwise it is not what a user expects. + undef $CPAN::META; # Neue Gruendlichkeit since v1.52(r1.274) + $CPAN::META = CPAN->new; + } + { + my($debug,$t2); + local $last_time = $time; + local $CPAN::META->{PROTOCOL} = PROTOCOL; + + my $needshort = $^O eq "dos"; + + $cl->rd_authindex($cl + ->reload_x( + "authors/01mailrc.txt.gz", + $needshort ? + File::Spec->catfile('authors', '01mailrc.gz') : + File::Spec->catfile('authors', '01mailrc.txt.gz'), + $force)); + $t2 = time; + $debug = "timing reading 01[".($t2 - $time)."]"; + $time = $t2; + return if $CPAN::Signal; # this is sometimes lengthy + $cl->rd_modpacks($cl + ->reload_x( + "modules/02packages.details.txt.gz", + $needshort ? + File::Spec->catfile('modules', '02packag.gz') : + File::Spec->catfile('modules', '02packages.details.txt.gz'), + $force)); + $t2 = time; + $debug .= "02[".($t2 - $time)."]"; + $time = $t2; + return if $CPAN::Signal; # this is sometimes lengthy + $cl->rd_modlist($cl + ->reload_x( + "modules/03modlist.data.gz", + $needshort ? + File::Spec->catfile('modules', '03mlist.gz') : + File::Spec->catfile('modules', '03modlist.data.gz'), + $force)); + $cl->write_metadata_cache; + $t2 = time; + $debug .= "03[".($t2 - $time)."]"; + $time = $t2; + CPAN->debug($debug) if $CPAN::DEBUG; + } $last_time = $time; - - my $needshort = $^O eq "dos"; - - $cl->rd_authindex($cl - ->reload_x( - "authors/01mailrc.txt.gz", - $needshort ? - File::Spec->catfile('authors', '01mailrc.gz') : - File::Spec->catfile('authors', '01mailrc.txt.gz'), - $force)); - $t2 = time; - $debug = "timing reading 01[".($t2 - $time)."]"; - $time = $t2; - return if $CPAN::Signal; # this is sometimes lengthy - $cl->rd_modpacks($cl - ->reload_x( - "modules/02packages.details.txt.gz", - $needshort ? - File::Spec->catfile('modules', '02packag.gz') : - File::Spec->catfile('modules', '02packages.details.txt.gz'), - $force)); - $t2 = time; - $debug .= "02[".($t2 - $time)."]"; - $time = $t2; - return if $CPAN::Signal; # this is sometimes lengthy - $cl->rd_modlist($cl - ->reload_x( - "modules/03modlist.data.gz", - $needshort ? - File::Spec->catfile('modules', '03mlist.gz') : - File::Spec->catfile('modules', '03modlist.data.gz'), - $force)); - $t2 = time; - $debug .= "03[".($t2 - $time)."]"; - $time = $t2; - CPAN->debug($debug) if $CPAN::DEBUG; + $CPAN::META->{PROTOCOL} = PROTOCOL; } #-> sub CPAN::Index::reload_x ; @@ -2773,7 +2874,7 @@ sub userid { #-> sub CPAN::Index::rd_modpacks ; sub rd_modpacks { - my($cl, $index_target) = @_; + my($self, $index_target) = @_; my @lines; return unless defined $index_target; $CPAN::Frontend->myprint("Going to read $index_target\n"); @@ -2785,16 +2886,43 @@ sub rd_modpacks { unshift @ls, "\n" x length($1) if /^(\n+)/; push @lines, @ls; } + # read header + my $line_count; while (@lines) { my $shift = shift(@lines); + $shift =~ /^Line-Count:\s+(\d+)/; + $line_count = $1 if $1; last if $shift =~ /^\s*$/; } + if (not defined $line_count) { + + warn qq{Warning: Your $index_target does not contain a Line-Count header. +Please check the validity of the index file by comparing it to more +than one CPAN mirror. I'll continue but problems seem likely to +happen.\a +}; + + sleep 5; + } elsif ($line_count != scalar @lines) { + + warn sprintf qq{Warning: Your %s +contains a Line-Count header of %d but I see %d lines there. Please +check the validity of the index file by comparing it to more than one +CPAN mirror. I'll continue but problems seem likely to happen.\a\n}, +$index_target, $line_count, scalar(@lines); + + } + # A necessity since we have metadata_cache: delete what isn't + # there anymore + my $secondtime = $CPAN::META->exists("CPAN::Module","CPAN"); + CPAN->debug("secondtime[$secondtime]") if $CPAN::DEBUG; + my(%exists); foreach (@lines) { chomp; - my($mod,$version,$dist) = split; -### $version =~ s/^\+//; - - # if it is a bundle, instantiate a bundle object + # before 1.56 we split into 3 and discarded the rest. From + # 1.57 we assign remaining text to $comment thus allowing to + # influence isa_perl + my($mod,$version,$dist,$comment) = split " ", $_, 4; my($bundle,$id,$userid); if ($mod eq 'CPAN' && @@ -2803,18 +2931,18 @@ sub rd_modpacks { CPAN::Queue->exists('CPAN') ) ) { - local($^W)= 0; - if ($version > $CPAN::VERSION){ - $CPAN::Frontend->myprint(qq{ - There\'s a new CPAN.pm version (v$version) available! + local($^W)= 0; + if ($version > $CPAN::VERSION){ + $CPAN::Frontend->myprint(qq{ + There's a new CPAN.pm version (v$version) available! [Current version is v$CPAN::VERSION] You might want to try install Bundle::CPAN reload cpan without quitting the current session. It should be a seamless upgrade while we are running... -}); - sleep 2; +}); #}); + sleep 2; $CPAN::Frontend->myprint(qq{\n}); } last if $CPAN::Signal; @@ -2824,29 +2952,29 @@ sub rd_modpacks { if ($bundle){ $id = $CPAN::META->instance('CPAN::Bundle',$mod); - # warn "made mod[$mod]a bundle"; # Let's make it a module too, because bundles have so much - # in common with modules - $CPAN::META->instance('CPAN::Module',$mod); - # warn "made mod[$mod]a module"; + # in common with modules. -# This "next" makes us faster but if the job is running long, we ignore -# rereads which is bad. So we have to be a bit slower again. -# } elsif ($CPAN::META->exists('CPAN::Module',$mod)) { -# next; + # Changed in 1.57_63: seems like memory bloat now without + # any value, so commented out + + # $CPAN::META->instance('CPAN::Module',$mod); + + } else { - } - else { # instantiate a module object $id = $CPAN::META->instance('CPAN::Module',$mod); + } - if ($id->cpan_file ne $dist){ - $userid = $cl->userid($dist); + if ($id->cpan_file ne $dist){ # update only if file is + # different. CPAN prohibits same + # name with different version + $userid = $self->userid($dist); $id->set( 'CPAN_USERID' => $userid, 'CPAN_VERSION' => $version, - 'CPAN_FILE' => $dist + 'CPAN_FILE' => $dist, ); } @@ -2863,13 +2991,29 @@ sub rd_modpacks { $CPAN::META->instance( 'CPAN::Distribution' => $dist )->set( - 'CPAN_USERID' => $userid + 'CPAN_USERID' => $userid, + 'CPAN_COMMENT' => $comment, ); } - + if ($secondtime) { + for my $name ($mod,$dist) { + CPAN->debug("exists name[$name]") if $CPAN::DEBUG; + $exists{$name} = undef; + } + } return if $CPAN::Signal; } undef $fh; + if ($secondtime) { + for my $class (qw(CPAN::Module CPAN::Bundle CPAN::Distribution)) { + for my $o ($CPAN::META->all_objects($class)) { + next if exists $exists{$o->{ID}}; + $CPAN::META->delete($class,$o->{ID}); + CPAN->debug("deleting ID[$o->{ID}] in class[$class]") + if $CPAN::DEBUG; + } + } + } } #-> sub CPAN::Index::rd_modlist ; @@ -2904,25 +3048,123 @@ sub rd_modlist { return if $CPAN::Signal; for (keys %$ret) { my $obj = $CPAN::META->instance(CPAN::Module,$_); + delete $ret->{$_}{modid}; # not needed here, maybe elsewhere $obj->set(%{$ret->{$_}}); return if $CPAN::Signal; } } +#-> sub CPAN::Index::write_metadata_cache ; +sub write_metadata_cache { + my($self) = @_; + return unless $CPAN::Config->{'cache_metadata'}; + return unless $CPAN::META->has_usable("Storable"); + my $cache; + foreach my $k (qw(CPAN::Bundle CPAN::Author CPAN::Module + CPAN::Distribution)) { + $cache->{$k} = $CPAN::META->{readonly}{$k}; # unsafe meta access, ok + } + my $metadata_file = MM->catfile($CPAN::Config->{cpan_home},"Metadata"); + $cache->{last_time} = $last_time; + $cache->{PROTOCOL} = PROTOCOL; + $CPAN::Frontend->myprint("Going to write $metadata_file\n"); + eval { Storable::nstore($cache, $metadata_file) }; + $CPAN::Frontend->mywarn($@) if $@; +} + +#-> sub CPAN::Index::read_metadata_cache ; +sub read_metadata_cache { + my($self) = @_; + return unless $CPAN::Config->{'cache_metadata'}; + return unless $CPAN::META->has_usable("Storable"); + my $metadata_file = MM->catfile($CPAN::Config->{cpan_home},"Metadata"); + return unless -r $metadata_file and -f $metadata_file; + $CPAN::Frontend->myprint("Going to read $metadata_file\n"); + my $cache; + eval { $cache = Storable::retrieve($metadata_file) }; + $CPAN::Frontend->mywarn($@) if $@; + if (!$cache || ref $cache ne 'HASH'){ + $last_time = 0; + return; + } + if (exists $cache->{PROTOCOL}) { + if (PROTOCOL > $cache->{PROTOCOL}) { + $CPAN::Frontend->mywarn(sprintf("Ignoring Metadata cache written ". + "with protocol v%s, requiring v%s", + $cache->{PROTOCOL}, + PROTOCOL) + ); + return; + } + } else { + $CPAN::Frontend->mywarn("Ignoring Metadata cache written ". + "with protocol v1.0"); + return; + } + my $clcnt = 0; + my $idcnt = 0; + while(my($class,$v) = each %$cache) { + next unless $class =~ /^CPAN::/; + $CPAN::META->{readonly}{$class} = $v; # unsafe meta access, ok + while (my($id,$ro) = each %$v) { + $CPAN::META->{readwrite}{$class}{$id} ||= + $class->new(ID=>$id, RO=>$ro); + $idcnt++; + } + $clcnt++; + } + unless ($clcnt) { # sanity check + $CPAN::Frontend->myprint("Warning: Found no data in $metadata_file\n"); + return; + } + if ($idcnt < 1000) { + $CPAN::Frontend->myprint("Warning: Found only $idcnt objects ". + "in $metadata_file\n"); + return; + } + $CPAN::META->{PROTOCOL} ||= + $cache->{PROTOCOL}; # reading does not up or downgrade, but it + # does initialize to some protocol + $last_time = $cache->{last_time}; +} + package CPAN::InfoObj; +# Accessors +sub cpan_userid { shift->{RO}{CPAN_USERID} } +sub id { shift->{ID} } + #-> sub CPAN::InfoObj::new ; -sub new { my $this = bless {}, shift; %$this = @_; $this } +sub new { + my $this = bless {}, shift; + %$this = @_; + $this +} + +# The set method may only be used by code that reads index data or +# otherwise "objective" data from the outside world. All session +# related material may do anything else with instance variables but +# must not touch the hash under the RO attribute. The reason is that +# the RO hash gets written to Metadata file and is thus persistent. #-> sub CPAN::InfoObj::set ; sub set { my($self,%att) = @_; - my(%oldatt) = %$self; - %$self = (%oldatt, %att); -} + my $class = ref $self; + + # This must be ||=, not ||, because only if we write an empty + # reference, only then the set method will write into the readonly + # area. But for Distributions that spring into existence, maybe + # because of a typo, we do not like it that they are written into + # the readonly area and made permanent (at least for a while) and + # that is why we do not "allow" other places to call ->set. + my $ro = $self->{RO} = + $CPAN::META->{readonly}{$class}{$self->id} ||= {}; -#-> sub CPAN::InfoObj::id ; -sub id { shift->{'ID'} } + while (my($k,$v) = each %att) { + $ro->{$k} = $v; + } +} #-> sub CPAN::InfoObj::as_glimpse ; sub as_glimpse { @@ -2941,31 +3183,36 @@ sub as_string { my $class = ref($self); $class =~ s/^CPAN:://; push @m, $class, " id = $self->{ID}\n"; - for (sort keys %$self) { - next if $_ eq 'ID'; + for (sort keys %{$self->{RO}}) { + # next if m/^(ID|RO)$/; my $extra = ""; if ($_ eq "CPAN_USERID") { $extra .= " (".$self->author; my $email; # old perls! if ($email = $CPAN::META->instance(CPAN::Author, - $self->{$_} - )->email) { + $self->cpan_userid + )->email) { $extra .= " <$email>"; } else { $extra .= " <no email>"; } $extra .= ")"; } - if (ref($self->{$_}) eq "ARRAY") { # language interface? XXX - push @m, sprintf " %-12s %s%s\n", $_, "@{$self->{$_}}", $extra; + next unless defined $self->{RO}{$_}; + push @m, sprintf " %-12s %s%s\n", $_, $self->{RO}{$_}, $extra; + } + for (sort keys %$self) { + next if m/^(ID|RO)$/; + if (ref($self->{$_}) eq "ARRAY") { + push @m, sprintf " %-12s %s\n", $_, "@{$self->{$_}}"; } elsif (ref($self->{$_}) eq "HASH") { push @m, sprintf( - " %-12s %s%s\n", + " %-12s %s\n", $_, join(" ",keys %{$self->{$_}}), - $extra); + ); } else { - push @m, sprintf " %-12s %s%s\n", $_, $self->{$_}, $extra; + push @m, sprintf " %-12s %s\n", $_, $self->{$_}; } } join "", @m, "\n"; @@ -2974,13 +3221,14 @@ sub as_string { #-> sub CPAN::InfoObj::author ; sub author { my($self) = @_; - $CPAN::META->instance(CPAN::Author,$self->{CPAN_USERID})->fullname; + $CPAN::META->instance(CPAN::Author,$self->cpan_userid)->fullname; } +#-> sub CPAN::InfoObj::dump ; sub dump { my($self) = @_; require Data::Dumper; - Data::Dumper::Dumper($self); + print Data::Dumper::Dumper($self); } package CPAN::Author; @@ -2995,21 +3243,53 @@ sub as_glimpse { join "", @m; } -# Dead code, I would have liked to have,,, but it was never reached,,, -#sub make { -# my($self) = @_; -# return "Don't be silly, you can't make $self->{FULLNAME} ;-)\n"; -#} - #-> sub CPAN::Author::fullname ; -sub fullname { shift->{'FULLNAME'} } +sub fullname { shift->{RO}{FULLNAME} } *name = \&fullname; #-> sub CPAN::Author::email ; -sub email { shift->{'EMAIL'} } +sub email { shift->{RO}{EMAIL} } package CPAN::Distribution; +# Accessors +sub cpan_comment { shift->{RO}{CPAN_COMMENT} } + +sub undelay { + my $self = shift; + delete $self->{later}; +} + +#-> sub CPAN::Distribution::color_cmd_tmps ; +sub color_cmd_tmps { + my($self) = shift; + my($depth) = shift || 0; + my($color) = shift || 0; + # a distribution needs to recurse into its prereq_pms + + return if exists $self->{incommandcolor} + && $self->{incommandcolor}==$color; + $CPAN::Frontend->mydie(sprintf("CPAN.pm panic: deep recursion in ". + "color_cmd_tmps depth[%s] self[%s] id[%s]", + $depth, + $self, + $self->id + )) if $depth>=100; + ##### warn "color_cmd_tmps $depth $color " . $self->id; # sleep 1; + my $prereq_pm = $self->prereq_pm; + if (defined $prereq_pm) { + for my $pre (keys %$prereq_pm) { + my $premo = CPAN::Shell->expand("Module",$pre); + $premo->color_cmd_tmps($depth+1,$color); + } + } + if ($color==0) { + delete $self->{sponsored_mods}; + delete $self->{badtestcnt}; + } + $self->{incommandcolor} = $color; +} + #-> sub CPAN::Distribution::as_string ; sub as_string { my $self = shift; @@ -3022,9 +3302,11 @@ sub containsmods { my $self = shift; return if exists $self->{CONTAINSMODS}; for my $mod ($CPAN::META->all_objects("CPAN::Module")) { - my $mod_file = $mod->{CPAN_FILE} or next; + my $mod_file = $mod->cpan_file or next; my $dist_id = $self->{ID} or next; my $mod_id = $mod->{ID} or next; + # warn "mod_file[$mod_file] dist_id[$dist_id] mod_id[$mod_id]"; + # sleep 1; $self->{CONTAINSMODS}{$mod_id} = undef if $mod_file eq $dist_id; } } @@ -3032,8 +3314,8 @@ sub containsmods { #-> sub CPAN::Distribution::called_for ; sub called_for { my($self,$id) = @_; - $self->{'CALLED_FOR'} = $id if defined $id; - return $self->{'CALLED_FOR'}; + $self->{CALLED_FOR} = $id if defined $id; + return $self->{CALLED_FOR}; } #-> sub CPAN::Distribution::get ; @@ -3042,30 +3324,32 @@ sub get { EXCUSE: { my @e; exists $self->{'build_dir'} and push @e, - "Unwrapped into directory $self->{'build_dir'}"; + "Is already unwrapped into directory $self->{'build_dir'}"; $CPAN::Frontend->myprint(join "", map {" $_\n"} @e) and return if @e; } my($local_file); my($local_wanted) = - MM->catfile( - $CPAN::Config->{keep_source_where}, - "authors", - "id", - split("/",$self->{ID}) - ); + MM->catfile( + $CPAN::Config->{keep_source_where}, + "authors", + "id", + split("/",$self->id) + ); $self->debug("Doing localize") if $CPAN::DEBUG; $local_file = CPAN::FTP->localize("authors/id/$self->{ID}", $local_wanted) or $CPAN::Frontend->mydie("Giving up on '$local_wanted'\n"); + return if $CPAN::Signal; $self->{localfile} = $local_file; - my $builddir = $CPAN::META->{cachemgr}->dir; + $CPAN::META->{cachemgr} ||= CPAN::CacheMgr->new(); # unsafe meta access, ok + my $builddir = $CPAN::META->{cachemgr}->dir; # unsafe meta access, ok $self->debug("doing chdir $builddir") if $CPAN::DEBUG; chdir $builddir or Carp::croak("Couldn't chdir $builddir: $!"); my $packagedir; $self->debug("local_file[$local_file]") if $CPAN::DEBUG; - if ($CPAN::META->has_inst('MD5')) { + if ($CPAN::META->has_inst("MD5")) { $self->debug("MD5 is installed, verifying"); $self->verifyMD5; } else { @@ -3074,74 +3358,96 @@ sub get { $self->debug("Removing tmp") if $CPAN::DEBUG; File::Path::rmtree("tmp"); mkdir "tmp", 0755 or Carp::croak "Couldn't mkdir tmp: $!"; - chdir "tmp"; + chdir "tmp" or $CPAN::Frontend->mydie(qq{Could not chdir to "tmp": $!});; $self->debug("Changed directory to tmp") if $CPAN::DEBUG; + return if $CPAN::Signal; if (! $local_file) { Carp::croak "bad download, can't do anything :-(\n"; - } elsif ($local_file =~ /(\.tar\.(gz|Z)|\.tgz)\z/i){ + } elsif ($local_file =~ /(\.tar\.(gz|Z)|\.tgz)(?!\n)\Z/i){ + $self->{was_uncompressed}++ unless CPAN::Tarzip->gtest($local_file); $self->untar_me($local_file); - } elsif ( $local_file =~ /\.zip\z/i ) { + } elsif ( $local_file =~ /\.zip(?!\n)\Z/i ) { $self->unzip_me($local_file); - } elsif ( $local_file =~ /\.pm\.(gz|Z)\z/) { + } elsif ( $local_file =~ /\.pm\.(gz|Z)(?!\n)\Z/) { + $self->{was_uncompressed}++ unless CPAN::Tarzip->gtest($local_file); $self->pm2dir_me($local_file); } else { $self->{archived} = "NO"; } - chdir File::Spec->updir; + my $cwd = File::Spec->updir; + chdir $cwd or $CPAN::Frontend->mydie(qq{Could not chdir to "": $!}); if ($self->{archived} ne 'NO') { - chdir File::Spec->catdir(File::Spec->curdir, "tmp"); - # Let's check if the package has its own directory. - my $dh = DirHandle->new(File::Spec->curdir) - or Carp::croak("Couldn't opendir .: $!"); - my @readdir = grep $_ !~ /^\.\.?\z/s, $dh->read; ### MAC?? - $dh->close; - my ($distdir,$packagedir); - if (@readdir == 1 && -d $readdir[0]) { - $distdir = $readdir[0]; - $packagedir = MM->catdir($builddir,$distdir); - -d $packagedir and $CPAN::Frontend->myprint("Removing previously used $packagedir\n"); - File::Path::rmtree($packagedir); - rename($distdir,$packagedir) or Carp::confess("Couldn't rename $distdir to $packagedir: $!"); - } else { - my $pragmatic_dir = $self->{'CPAN_USERID'} . '000'; - $pragmatic_dir =~ s/\W_//g; - $pragmatic_dir++ while -d "../$pragmatic_dir"; - $packagedir = MM->catdir($builddir,$pragmatic_dir); - File::Path::mkpath($packagedir); - my($f); - for $f (@readdir) { # is already without "." and ".." - my $to = MM->catdir($packagedir,$f); - rename($f,$to) or Carp::confess("Couldn't rename $f to $to: $!"); - } - } - $self->{'build_dir'} = $packagedir; - chdir File::Spec->updir; - - $self->debug("Changed directory to .. (self is $self [".$self->as_string."])") - if $CPAN::DEBUG; - File::Path::rmtree("tmp"); - if ($CPAN::Config->{keep_source_where} =~ /^no/i ){ - $CPAN::Frontend->myprint("Going to unlink $local_file\n"); - unlink $local_file or Carp::carp "Couldn't unlink $local_file"; - } - my($makefilepl) = MM->catfile($packagedir,"Makefile.PL"); - unless (-f $makefilepl) { - my($configure) = MM->catfile($packagedir,"Configure"); - if (-f $configure) { - # do we have anything to do? - $self->{'configure'} = $configure; - } elsif (-f MM->catfile($packagedir,"Makefile")) { - $CPAN::Frontend->myprint(qq{ + $cwd = File::Spec->catdir(File::Spec->curdir, "tmp"); + chdir $cwd or $CPAN::Frontend->mydie(qq{Could not chdir to "$cwd": $!}); + # Let's check if the package has its own directory. + my $dh = DirHandle->new(File::Spec->curdir) + or Carp::croak("Couldn't opendir .: $!"); + my @readdir = grep $_ !~ /^\.\.?(?!\n)\Z/s, $dh->read; ### MAC?? + $dh->close; + my ($distdir,$packagedir); + if (@readdir == 1 && -d $readdir[0]) { + $distdir = $readdir[0]; + $packagedir = MM->catdir($builddir,$distdir); + -d $packagedir and $CPAN::Frontend->myprint("Removing previously used ". + "$packagedir\n"); + File::Path::rmtree($packagedir); + rename($distdir,$packagedir) or + Carp::confess("Couldn't rename $distdir to $packagedir: $!"); + } else { + my $userid = $self->cpan_userid; + unless ($userid) { + CPAN->debug("no userid? self[$self]"); + $userid = "anon"; + } + my $pragmatic_dir = $userid . '000'; + $pragmatic_dir =~ s/\W_//g; + $pragmatic_dir++ while -d "../$pragmatic_dir"; + $packagedir = MM->catdir($builddir,$pragmatic_dir); + File::Path::mkpath($packagedir); + my($f); + for $f (@readdir) { # is already without "." and ".." + my $to = MM->catdir($packagedir,$f); + rename($f,$to) or Carp::confess("Couldn't rename $f to $to: $!"); + } + } + $self->{'build_dir'} = $packagedir; + $cwd = File::Spec->updir; + chdir $cwd or $CPAN::Frontend->mydie(qq{Could not chdir to "$cwd": $!}); + + $self->debug("Changed directory to .. (self[$self]=[". + $self->as_string."])") if $CPAN::DEBUG; + File::Path::rmtree("tmp"); + if ($CPAN::Config->{keep_source_where} =~ /^no/i ){ + $CPAN::Frontend->myprint("Going to unlink $local_file\n"); + unlink $local_file or Carp::carp "Couldn't unlink $local_file"; + } + my($makefilepl) = MM->catfile($packagedir,"Makefile.PL"); + unless (-f $makefilepl) { + my($configure) = MM->catfile($packagedir,"Configure"); + if (-f $configure) { + # do we have anything to do? + $self->{'configure'} = $configure; + } elsif (-f MM->catfile($packagedir,"Makefile")) { + $CPAN::Frontend->myprint(qq{ Package comes with a Makefile and without a Makefile.PL. We\'ll try to build it with that Makefile then. }); - $self->{writemakefile} = "YES"; - sleep 2; - } else { - my $fh = FileHandle->new(">$makefilepl") - or Carp::croak("Could not open >$makefilepl"); - my $cf = $self->called_for || "unknown"; - $fh->print( + $self->{writemakefile} = "YES"; + sleep 2; + } else { + my $cf = $self->called_for || "unknown"; + if ($cf =~ m|/|) { + $cf =~ s|.*/||; + $cf =~ s|\W.*||; + } + $cf =~ s|[/\\:]||g; # risk of filesystem damage + $cf = "unknown" unless length($cf); + $CPAN::Frontend->myprint(qq{Package comes without Makefile.PL. + Writing one on our own (calling it $cf)\n}); + $self->{had_no_makefile_pl}++; + my $fh = FileHandle->new(">$makefilepl") + or Carp::croak("Could not open >$makefilepl"); + $fh->print( qq{# This Makefile.PL has been autogenerated by the module CPAN.pm # because there was no Makefile.PL supplied. # Autogenerated on: }.scalar localtime().qq{ @@ -3150,14 +3456,14 @@ use ExtUtils::MakeMaker; WriteMakefile(NAME => q[$cf]); }); - $CPAN::Frontend->myprint(qq{Package comes without Makefile.PL. - Writing one on our own (calling it $cf)\n}); - } - } + $fh->close; + } + } } return $self; } +# CPAN::Distribution::untar_me ; sub untar_me { my($self,$local_file) = @_; $self->{archived} = "tar"; @@ -3168,22 +3474,23 @@ sub untar_me { } } +# CPAN::Distribution::unzip_me ; sub unzip_me { my($self,$local_file) = @_; $self->{archived} = "zip"; - my $system = "$CPAN::Config->{unzip} $local_file"; - if (system($system) == 0) { + if (CPAN::Tarzip->unzip($local_file)) { $self->{unwrapped} = "YES"; } else { $self->{unwrapped} = "NO"; } + return; } sub pm2dir_me { my($self,$local_file) = @_; $self->{archived} = "pm"; my $to = File::Basename::basename($local_file); - $to =~ s/\.(gz|Z)\z//; + $to =~ s/\.(gz|Z)(?!\n)\Z//; if (CPAN::Tarzip->gunzip($local_file,$to)) { $self->{unwrapped} = "YES"; } else { @@ -3195,7 +3502,7 @@ sub pm2dir_me { sub new { my($class,%att) = @_; - $CPAN::META->{cachemgr} ||= CPAN::CacheMgr->new(); + # $CPAN::META->{cachemgr} ||= CPAN::CacheMgr->new(); my $this = { %att }; return bless $this, $class; @@ -3227,13 +3534,14 @@ Please define it with "o conf shell <your shell>" my $getcwd; $getcwd = $CPAN::Config->{'getcwd'} || 'cwd'; my $pwd = CPAN->$getcwd(); - chdir($dir); + chdir($dir) or $CPAN::Frontend->mydie(qq{Could not chdir to "$dir": $!}); $CPAN::Frontend->myprint(qq{Working directory is $dir\n}); system($CPAN::Config->{'shell'}) == 0 or $CPAN::Frontend->mydie("Subprocess shell error"); - chdir($pwd); + chdir($pwd) or $CPAN::Frontend->mydie(qq{Could not chdir to "$pwd": $!}); } +# CPAN::Distribution::cvs_import ; sub cvs_import { my($self) = @_; $self->get; @@ -3243,10 +3551,10 @@ sub cvs_import { my $module = $CPAN::META->instance('CPAN::Module', $package); my $version = $module->cpan_version; - my $userid = $self->{CPAN_USERID}; + my $userid = $self->cpan_userid; my $cvs_dir = (split '/', $dir)[-1]; - $cvs_dir =~ s/-\d+[^-]+\z//; + $cvs_dir =~ s/-\d+[^-]+(?!\n)\Z//; my $cvs_root = $CPAN::Config->{cvsroot} || $ENV{CVSROOT}; my $cvs_site_perl = @@ -3262,14 +3570,14 @@ sub cvs_import { my $getcwd; $getcwd = $CPAN::Config->{'getcwd'} || 'cwd'; my $pwd = CPAN->$getcwd(); - chdir($dir); + chdir($dir) or $CPAN::Frontend->mydie(qq{Could not chdir to "$dir": $!}); $CPAN::Frontend->myprint(qq{Working directory is $dir\n}); $CPAN::Frontend->myprint(qq{@cmd\n}); - system(@cmd) == 0 or + system(@cmd) == 0 or $CPAN::Frontend->mydie("cvs import failed"); - chdir($pwd); + chdir($pwd) or $CPAN::Frontend->mydie(qq{Could not chdir to "$pwd": $!}); } #-> sub CPAN::Distribution::readme ; @@ -3343,7 +3651,7 @@ sub verifyMD5 { $lc_file = CPAN::FTP->localize("authors/id/@local", "$lc_want.gz",1); if ($lc_file) { - $lc_file =~ s/\.gz\z//; + $lc_file =~ s/\.gz(?!\n)\Z//; CPAN::Tarzip->gunzip("$lc_file.gz",$lc_file); } else { return; @@ -3401,33 +3709,42 @@ sub MD5_check_file { $CPAN::Frontend->myprint("Checksum for $file ok\n"); return $self->{MD5_STATUS} = "OK"; } else { - $CPAN::Frontend->myprint(qq{Checksum mismatch for }. + $CPAN::Frontend->myprint(qq{\nChecksum mismatch for }. qq{distribution file. }. qq{Please investigate.\n\n}. $self->as_string, $CPAN::META->instance( 'CPAN::Author', - $self->{CPAN_USERID} + $self->cpan_userid )->as_string); - my $wrap = qq{I\'d recommend removing $file. It seems to -be a bogus file. Maybe you have configured your \`urllist\' with a -bad URL. Please check this array with \`o conf urllist\', and + + my $wrap = qq{I\'d recommend removing $file. Its MD5 +checksum is incorrect. Maybe you have configured your 'urllist' with +a bad URL. Please check this array with 'o conf urllist', and retry.}; - $CPAN::Frontend->myprint(Text::Wrap::wrap("","",$wrap)); - $CPAN::Frontend->myprint("\n\n"); - sleep 3; - return; + + $CPAN::Frontend->mydie(Text::Wrap::wrap("","",$wrap)); + + # former versions just returned here but this seems a + # serious threat that deserves a die + + # $CPAN::Frontend->myprint("\n\n"); + # sleep 3; + # return; } # close $fh if fileno($fh); } else { $self->{MD5_STATUS} ||= ""; if ($self->{MD5_STATUS} eq "NIL") { - $CPAN::Frontend->myprint(qq{ -No md5 checksum for $basename in local $chk_file. -Removing $chk_file + $CPAN::Frontend->mywarn(qq{ +Warning: No md5 checksum for $basename in $chk_file. + +The cause for this may be that the file is very new and the checksum +has not yet been calculated, but it may also be that something is +going awry right now. }); - unlink $chk_file or $CPAN::Frontend->myprint("Could not unlink: $!"); - sleep 1; + my $answer = ExtUtils::MakeMaker::prompt("Proceed?", "yes"); + $answer =~ /^\s*y/i or $CPAN::Frontend->mydie("Aborted."); } $self->{MD5_STATUS} = "NIL"; return; @@ -3449,28 +3766,58 @@ sub eq_MD5 { } #-> sub CPAN::Distribution::force ; + +# Both modules and distributions know if "force" is in effect by +# autoinspection, not by inspecting a global variable. One of the +# reason why this was chosen to work that way was the treatment of +# dependencies. They should not autpomatically inherit the force +# status. But this has the downside that ^C and die() will return to +# the prompt but will not be able to reset the force_update +# attributes. We try to correct for it currently in the read_metadata +# routine, and immediately before we check for a Signal. I hope this +# works out in one of v1.57_53ff + sub force { - my($self) = @_; - $self->{'force_update'}++; + my($self, $method) = @_; for my $att (qw( MD5_STATUS archived build_dir localfile make install unwrapped writemakefile )) { delete $self->{$att}; } + if ($method && $method eq "install") { + $self->{"force_update"}++; # name should probably have been force_install + } } +#-> sub CPAN::Distribution::unforce ; +sub unforce { + my($self) = @_; + delete $self->{'force_update'}; +} + +#-> sub CPAN::Distribution::isa_perl ; sub isa_perl { my($self) = @_; my $file = File::Basename::basename($self->id); - return unless $file =~ m{ ^ perl - (5) - ([._-]) - (\d{3}(_[0-4][0-9])?) - \.tar[._-]gz - \z - }xs; - "$1.$3"; + if ($file =~ m{ ^ perl + -? + (5) + ([._-]) + ( + \d{3}(_[0-4][0-9])? + | + \d*[24680]\.\d+ + ) + \.tar[._-]gz + (?!\n)\Z + }xs){ + return "$1.$3"; + } elsif ($self->cpan_comment + && + $self->cpan_comment =~ /isa_perl\(.+?\)/){ + return $1; + } } #-> sub CPAN::Distribution::perl ; @@ -3505,9 +3852,11 @@ sub make { # Emergency brake if they said install Pippi and get newest perl if ($self->isa_perl) { if ( - $self->called_for ne $self->id && ! $self->{'force_update'} + $self->called_for ne $self->id && + ! $self->{force_update} ) { - $CPAN::Frontend->mydie(sprintf qq{ + # if we die here, we break bundles + $CPAN::Frontend->mywarn(sprintf qq{ The most recent version "%s" of the module "%s" comes with the current version of perl (%s). I\'ll build that only if you ask for something like @@ -3523,6 +3872,7 @@ or $self->isa_perl, $self->called_for, $self->id); + sleep 5; return; } } $self->get; @@ -3539,7 +3889,10 @@ or $1 || "Had some problem writing Makefile"; defined $self->{'make'} and push @e, - "Has already been processed within this session"; + "Has already been processed within this session"; + + exists $self->{later} and length($self->{later}) and + push @e, $self->{later}; $CPAN::Frontend->myprint(join "", map {" $_\n"} @e) and return if @e; } @@ -3606,6 +3959,7 @@ or } if (-f "Makefile") { $self->{writemakefile} = "YES"; + delete $self->{make_clean}; # if cleaned before, enable next } else { $self->{writemakefile} = qq{NO Makefile.PL refused to write a Makefile.}; @@ -3615,98 +3969,177 @@ or # $self->{writemakefile} .= <$fh>; } } - return if $CPAN::Signal; - if (my @prereq = $self->needs_prereq){ - my $id = $self->id; - $CPAN::Frontend->myprint("---- Dependencies detected ". - "during [$id] -----\n"); + if ($CPAN::Signal){ + delete $self->{force_update}; + return; + } + if (my @prereq = $self->unsat_prereq){ + return 1 if $self->follow_prereqs(@prereq); # signal success to the queuerunner + } + $system = join " ", $CPAN::Config->{'make'}, $CPAN::Config->{make_arg}; + if (system($system) == 0) { + $CPAN::Frontend->myprint(" $system -- OK\n"); + $self->{'make'} = "YES"; + } else { + $self->{writemakefile} ||= "YES"; + $self->{'make'} = "NO"; + $CPAN::Frontend->myprint(" $system -- NOT OK\n"); + } +} + +sub follow_prereqs { + my($self) = shift; + my(@prereq) = @_; + my $id = $self->id; + $CPAN::Frontend->myprint("---- Unsatisfied dependencies detected ". + "during [$id] -----\n"); - for my $p (@prereq) { + for my $p (@prereq) { $CPAN::Frontend->myprint(" $p\n"); - } - my $follow = 0; - if ($CPAN::Config->{prerequisites_policy} eq "follow") { + } + my $follow = 0; + if ($CPAN::Config->{prerequisites_policy} eq "follow") { $follow = 1; - } elsif ($CPAN::Config->{prerequisites_policy} eq "ask") { + } elsif ($CPAN::Config->{prerequisites_policy} eq "ask") { require ExtUtils::MakeMaker; my $answer = ExtUtils::MakeMaker::prompt( "Shall I follow them and prepend them to the queue of modules we are processing right now?", "yes"); $follow = $answer =~ /^\s*y/i; - } else { + } else { local($") = ", "; - $CPAN::Frontend->myprint(" Ignoring dependencies on modules @prereq\n"); - } - if ($follow) { - CPAN::Queue->jumpqueue(@prereq,$id); # requeue yourself - return; - } + $CPAN::Frontend-> + myprint(" Ignoring dependencies on modules @prereq\n"); } - $system = join " ", $CPAN::Config->{'make'}, $CPAN::Config->{make_arg}; - if (system($system) == 0) { - $CPAN::Frontend->myprint(" $system -- OK\n"); - $self->{'make'} = "YES"; - } else { - $self->{writemakefile} ||= "YES"; - $self->{'make'} = "NO"; - $CPAN::Frontend->myprint(" $system -- NOT OK\n"); + if ($follow) { + # color them as dirty + for my $p (@prereq) { + CPAN::Shell->expandany($p)->color_cmd_tmps(0,1); + } + CPAN::Queue->jumpqueue(@prereq,$id); # queue them and requeue yourself + $self->{later} = "Delayed until after prerequisites"; + return 1; # signal success to the queuerunner } } -#-> sub CPAN::Distribution::needs_prereq ; -sub needs_prereq { - my($self) = @_; - return unless -f "Makefile"; # we cannot say much - my $fh = FileHandle->new("<Makefile") or - $CPAN::Frontend->mydie("Couldn't open Makefile: $!"); - local($/) = "\n"; +#-> sub CPAN::Distribution::unsat_prereq ; +sub unsat_prereq { + my($self) = @_; + my $prereq_pm = $self->prereq_pm or return; + my(@need); + NEED: while (my($need_module, $need_version) = each %$prereq_pm) { + my $nmo = $CPAN::META->instance("CPAN::Module",$need_module); + # we were too demanding: + next if $nmo->uptodate; + + # if they have not specified a version, we accept any installed one + if (not defined $need_version or + $need_version == 0 or + $need_version eq "undef") { + next if defined $nmo->inst_file; + } - my(@p,@need); - while (<$fh>) { - last if /MakeMaker post_initialize section/; - my($p) = m{^[\#] - \s+PREREQ_PM\s+=>\s+(.+) - }x; - next unless $p; - # warn "Found prereq expr[$p]"; + # We only want to install prereqs if either they're not installed + # or if the installed version is too old. We cannot omit this + # check, because if 'force' is in effect, nobody else will check. + { + local($^W) = 0; + if ( + defined $nmo->inst_file && + ! CPAN::Version->vgt($need_version, $nmo->inst_version) + ){ + CPAN->debug(sprintf "id[%s]inst_file[%s]inst_version[%s]need_version[%s]", + $nmo->id, + $nmo->inst_file, + $nmo->inst_version, + CPAN::Version->readable($need_version) + ); + next NEED; + } + } - while ( $p =~ m/(?:\s)([\w\:]+)=>q\[.*?\],?/g ){ - push @p, $1; + if ($self->{sponsored_mods}{$need_module}++){ + # We have already sponsored it and for some reason it's still + # not available. So we do nothing. Or what should we do? + # if we push it again, we have a potential infinite loop + next; + } + push @need, $need_module; } - last; - } - for my $p (@p) { - my $mo = $CPAN::META->instance("CPAN::Module",$p); - next if $mo->uptodate; - # it's not needed, so don't push it. We cannot omit this step, because - # if 'force' is in effect, nobody else will check. - if ($self->{have_sponsored}{$p}++){ - # We have already sponsored it and for some reason it's still - # not available. So we do nothing. Or what should we do? - # if we push it again, we have a potential infinite loop - next; - } - push @need, $p; + @need; +} + +#-> sub CPAN::Distribution::prereq_pm ; +sub prereq_pm { + my($self) = @_; + return $self->{prereq_pm} if + exists $self->{prereq_pm_detected} && $self->{prereq_pm_detected}; + return unless $self->{writemakefile}; # no need to have succeeded + # but we must have run it + my $build_dir = $self->{build_dir} or die "Panic: no build_dir?"; + my $makefile = File::Spec->catfile($build_dir,"Makefile"); + my(%p) = (); + my $fh; + if (-f $makefile + and + $fh = FileHandle->new("<$makefile\0")) { + + local($/) = "\n"; + + # A.Speer @p -> %p, where %p is $p{Module::Name}=Required_Version + while (<$fh>) { + last if /MakeMaker post_initialize section/; + my($p) = m{^[\#] + \s+PREREQ_PM\s+=>\s+(.+) + }x; + next unless $p; + # warn "Found prereq expr[$p]"; + + # Regexp modified by A.Speer to remember actual version of file + # PREREQ_PM hash key wants, then add to + while ( $p =~ m/(?:\s)([\w\:]+)=>q\[(.*?)\],?/g ){ + # In case a prereq is mentioned twice, complain. + if ( defined $p{$1} ) { + warn "Warning: PREREQ_PM mentions $1 more than once, last mention wins"; + } + $p{$1} = $2; + } + last; + } } - return @need; + $self->{prereq_pm_detected}++; + return $self->{prereq_pm} = \%p; } #-> sub CPAN::Distribution::test ; sub test { my($self) = @_; $self->make; - return if $CPAN::Signal; + if ($CPAN::Signal){ + delete $self->{force_update}; + return; + } $CPAN::Frontend->myprint("Running make test\n"); + if (my @prereq = $self->unsat_prereq){ + return 1 if $self->follow_prereqs(@prereq); # signal success to the queuerunner + } EXCUSE: { my @e; - exists $self->{'make'} or push @e, + exists $self->{make} or exists $self->{later} or push @e, "Make had some problems, maybe interrupted? Won't test"; exists $self->{'make'} and $self->{'make'} eq 'NO' and - push @e, "Oops, make had returned bad status"; + push @e, "Can't test without successful make"; + + exists $self->{build_dir} or push @e, "Has no own directory"; + $self->{badtestcnt} ||= 0; + $self->{badtestcnt} > 0 and + push @e, "Won't repeat unsuccessful test during this command"; + + exists $self->{later} and length($self->{later}) and + push @e, $self->{later}; - exists $self->{'build_dir'} or push @e, "Has no own directory"; $CPAN::Frontend->myprint(join "", map {" $_\n"} @e) and return if @e; } chdir $self->{'build_dir'} or @@ -3722,9 +4155,10 @@ sub test { my $system = join " ", $CPAN::Config->{'make'}, "test"; if (system($system) == 0) { $CPAN::Frontend->myprint(" $system -- OK\n"); - $self->{'make_test'} = "YES"; + $self->{make_test} = "YES"; } else { - $self->{'make_test'} = "NO"; + $self->{make_test} = "NO"; + $self->{badtestcnt}++; $CPAN::Frontend->myprint(" $system -- NOT OK\n"); } } @@ -3735,7 +4169,9 @@ sub clean { $CPAN::Frontend->myprint("Running make clean\n"); EXCUSE: { my @e; - exists $self->{'build_dir'} or push @e, "Has no own directory"; + exists $self->{make_clean} and $self->{make_clean} eq "YES" and + push @e, "make clean already called once"; + exists $self->{build_dir} or push @e, "Has no own directory"; $CPAN::Frontend->myprint(join "", map {" $_\n"} @e) and return if @e; } chdir $self->{'build_dir'} or @@ -3749,10 +4185,31 @@ sub clean { my $system = join " ", $CPAN::Config->{'make'}, "clean"; if (system($system) == 0) { - $CPAN::Frontend->myprint(" $system -- OK\n"); - $self->force; + $CPAN::Frontend->myprint(" $system -- OK\n"); + + # $self->force; + + # Jost Krieger pointed out that this "force" was wrong because + # it has the effect that the next "install" on this distribution + # will untar everything again. Instead we should bring the + # object's state back to where it is after untarring. + + delete $self->{force_update}; + delete $self->{install}; + delete $self->{writemakefile}; + delete $self->{make}; + delete $self->{make_test}; # no matter if yes or no, tests must be redone + $self->{make_clean} = "YES"; + } else { - # Hmmm, what to do if make clean failed? + # Hmmm, what to do if make clean failed? + + $CPAN::Frontend->myprint(qq{ $system -- NOT OK + +make clean did not succeed, marking directory as unusable for further work. +}); + $self->force("make"); # so that this directory won't be used again + } } @@ -3760,18 +4217,21 @@ sub clean { sub install { my($self) = @_; $self->test; - return if $CPAN::Signal; + if ($CPAN::Signal){ + delete $self->{force_update}; + return; + } $CPAN::Frontend->myprint("Running make install\n"); EXCUSE: { my @e; - exists $self->{'build_dir'} or push @e, "Has no own directory"; + exists $self->{build_dir} or push @e, "Has no own directory"; - exists $self->{'make'} or push @e, + exists $self->{make} or exists $self->{later} or push @e, "Make had some problems, maybe interrupted? Won't install"; exists $self->{'make'} and $self->{'make'} eq 'NO' and - push @e, "Oops, make had returned bad status"; + push @e, "make had returned bad status, install seems impossible"; push @e, "make test had returned bad status, ". "won't install without force" @@ -3783,6 +4243,9 @@ sub install { $self->{'install'} eq "YES" ? "Already done" : "Already tried without success"; + exists $self->{later} and length($self->{later}) and + push @e, $self->{later}; + $CPAN::Frontend->myprint(join "", map {" $_\n"} @e) and return if @e; } chdir $self->{'build_dir'} or @@ -3816,6 +4279,7 @@ sub install { qq{to root to install the package\n}); } } + delete $self->{force_update}; } #-> sub CPAN::Distribution::dir ; @@ -3825,10 +4289,49 @@ sub dir { package CPAN::Bundle; +sub undelay { + my $self = shift; + delete $self->{later}; + for my $c ( $self->contains ) { + my $obj = CPAN::Shell->expandany($c) or next; + $obj->undelay; + } +} + +#-> sub CPAN::Bundle::color_cmd_tmps ; +sub color_cmd_tmps { + my($self) = shift; + my($depth) = shift || 0; + my($color) = shift || 0; + # a module needs to recurse to its cpan_file, a distribution needs + # to recurse into its prereq_pms, a bundle needs to recurse into its modules + + return if exists $self->{incommandcolor} + && $self->{incommandcolor}==$color; + $CPAN::Frontend->mydie(sprintf("CPAN.pm panic: deep recursion in ". + "color_cmd_tmps depth[%s] self[%s] id[%s]", + $depth, + $self, + $self->id + )) if $depth>=100; + ##### warn "color_cmd_tmps $depth $color " . $self->id; # sleep 1; + + for my $c ( $self->contains ) { + my $obj = CPAN::Shell->expandany($c) or next; + CPAN->debug("c[$c]obj[$obj]") if $CPAN::DEBUG; + $obj->color_cmd_tmps($depth+1,$color); + } + if ($color==0) { + delete $self->{badtestcnt}; + } + $self->{incommandcolor} = $color; +} + #-> sub CPAN::Bundle::as_string ; sub as_string { my($self) = @_; $self->contains; + # following line must be "=", not "||=" because we have a moving target $self->{INST_VERSION} = $self->inst_version; return $self->SUPER::as_string; } @@ -3842,9 +4345,9 @@ sub contains { unless ($parsefile) { # Try to get at it in the cpan directory $self->debug("no parsefile") if $CPAN::DEBUG; - Carp::confess "I don't know a $id" unless $self->{CPAN_FILE}; + Carp::confess "I don't know a $id" unless $self->cpan_file; my $dist = $CPAN::META->instance('CPAN::Distribution', - $self->{CPAN_FILE}); + $self->cpan_file); $dist->get; $self->debug($dist->as_string) if $CPAN::DEBUG; my($todir) = $CPAN::Config->{'cpan_home'}; @@ -3877,7 +4380,7 @@ sub contains { } close $fh; delete $self->{STATUS}; - $self->{CONTAINS} = join ", ", @result; + $self->{CONTAINS} = \@result; $self->debug("CONTAINS[@result]") if $CPAN::DEBUG; unless (@result) { $CPAN::Frontend->mywarn(qq{ @@ -3902,9 +4405,9 @@ sub find_bundle_file { require ExtUtils::Manifest; my $getcwd = $CPAN::Config->{'getcwd'} || 'cwd'; my $cwd = CPAN->$getcwd(); - chdir $where; + chdir $where or $CPAN::Frontend->mydie(qq{Could not chdir to "$where": $!}); ExtUtils::Manifest::mkmanifest(); - chdir $cwd; + chdir $cwd or $CPAN::Frontend->mydie(qq{Could not chdir to "$cwd": $!}); } my $fh = FileHandle->new($manifest) or Carp::croak("Couldn't open $manifest: $!"); @@ -3936,22 +4439,21 @@ sub find_bundle_file { Carp::croak("Couldn't find a Bundle file in $where"); } +# needs to work slightly different from Module::inst_file because of +# cpan_home/Bundle/ directory. + #-> sub CPAN::Bundle::inst_file ; sub inst_file { my($self) = @_; - my($me,$inst_file); - ($me = $self->id) =~ s/.*://; -## my(@me,$inst_file); -## @me = split /::/, $self->id; -## $me[-1] .= ".pm"; - $inst_file = MM->catfile($CPAN::Config->{'cpan_home'}, - "Bundle", "$me.pm"); -## "Bundle", @me); - return $self->{'INST_FILE'} = $inst_file if -f $inst_file; -# $inst_file = + return $self->{INST_FILE} if + exists $self->{INST_FILE} && $self->{INST_FILE}; + my($inst_file); + my(@me); + @me = split /::/, $self->id; + $me[-1] .= ".pm"; + $inst_file = MM->catfile($CPAN::Config->{'cpan_home'}, @me); + return $self->{INST_FILE} = $inst_file if -f $inst_file; $self->SUPER::inst_file; -# return $self->{'INST_FILE'} = $inst_file if -f $inst_file; -# return $self->{'INST_FILE'}; # even if undefined? } #-> sub CPAN::Bundle::rematein ; @@ -3960,7 +4462,7 @@ sub rematein { $self->debug("self[$self] meth[$meth]") if $CPAN::DEBUG; my($id) = $self->id; Carp::croak "Can't $meth $id, don't have an associated bundle file. :-(\n" - unless $self->inst_file || $self->{CPAN_FILE}; + unless $self->inst_file || $self->cpan_file; my($s,%fail); for $s ($self->contains) { my($type) = $s =~ m|/| ? 'CPAN::Distribution' : @@ -3973,14 +4475,36 @@ explicitly a file $s. sleep 3; } # possibly noisy action: + $self->debug("type[$type] s[$s]") if $CPAN::DEBUG; my $obj = $CPAN::META->instance($type,$s); $obj->$meth(); - my $success = $obj->can("uptodate") ? $obj->uptodate : 0; - $success ||= $obj->{'install'} && $obj->{'install'} eq "YES"; - $fail{$s} = 1 unless $success; + if ($obj->isa(CPAN::Bundle) + && + exists $obj->{install_failed} + && + ref($obj->{install_failed}) eq "HASH" + ) { + for (keys %{$obj->{install_failed}}) { + $self->{install_failed}{$_} = undef; # propagate faiure up + # to me in a + # recursive call + $fail{$s} = 1; # the bundle itself may have succeeded but + # not all children + } + } else { + my $success; + $success = $obj->can("uptodate") ? $obj->uptodate : 0; + $success ||= $obj->{'install'} && $obj->{'install'} eq "YES"; + if ($success) { + delete $self->{install_failed}{$s}; + } else { + $fail{$s} = 1; + } + } } + # recap with less noise - if ( $meth eq "install") { + if ( $meth eq "install" ) { if (%fail) { require Text::Wrap; my $raw = sprintf(qq{Bundle summary: @@ -3990,9 +4514,21 @@ The following items in bundle %s had installation problems:}, $CPAN::Frontend->myprint(Text::Wrap::fill("","",$raw)); $CPAN::Frontend->myprint("\n"); my $paragraph = ""; + my %reported; for $s ($self->contains) { - $paragraph .= "$s " if $fail{$s}; + if ($fail{$s}){ + $paragraph .= "$s "; + $self->{install_failed}{$s} = undef; + $reported{$s} = undef; + } } + my $report_propagated; + for $s (sort keys %{$self->{install_failed}}) { + next if exists $reported{$s}; + $paragraph .= "and the following items had problems +during recursive bundle calls: " unless $report_propagated++; + $paragraph .= "$s "; + } $CPAN::Frontend->myprint(Text::Wrap::fill(" "," ",$paragraph)); $CPAN::Frontend->myprint("\n"); } else { @@ -4015,7 +4551,11 @@ sub get { shift->rematein('get',@_); } #-> sub CPAN::Bundle::make ; sub make { shift->rematein('make',@_); } #-> sub CPAN::Bundle::test ; -sub test { shift->rematein('test',@_); } +sub test { + my $self = shift; + $self->{badtestcnt} ||= 0; + $self->rematein('test',@_); +} #-> sub CPAN::Bundle::install ; sub install { my $self = shift; @@ -4035,6 +4575,49 @@ No File found for bundle } . $self->id . qq{\n}), return; package CPAN::Module; +# Accessors +# sub cpan_userid { shift->{RO}{CPAN_USERID} } +sub userid { + my $self = shift; + return unless exists $self->{RO}{userid}; + $self->{RO}{userid}; +} +sub description { shift->{RO}{description} } + +sub undelay { + my $self = shift; + delete $self->{later}; + if ( my $dist = CPAN::Shell->expand("Distribution", $self->cpan_file) ) { + $dist->undelay; + } +} + +#-> sub CPAN::Module::color_cmd_tmps ; +sub color_cmd_tmps { + my($self) = shift; + my($depth) = shift || 0; + my($color) = shift || 0; + # a module needs to recurse to its cpan_file + + return if exists $self->{incommandcolor} + && $self->{incommandcolor}==$color; + $CPAN::Frontend->mydie(sprintf("CPAN.pm panic: deep recursion in ". + "color_cmd_tmps depth[%s] self[%s] id[%s]", + $depth, + $self, + $self->id + )) if $depth>=100; + ##### warn "color_cmd_tmps $depth $color " . $self->id; # sleep 1; + + if ( my $dist = CPAN::Shell->expand("Distribution", $self->cpan_file) ) { + $dist->color_cmd_tmps($depth+1,$color); + } + if ($color==0) { + delete $self->{badtestcnt}; + } + $self->{incommandcolor} = $color; +} + #-> sub CPAN::Module::as_glimpse ; sub as_glimpse { my($self) = @_; @@ -4056,11 +4639,11 @@ sub as_string { local($^W) = 0; push @m, $class, " id = $self->{ID}\n"; my $sprintf = " %-12s %s\n"; - push @m, sprintf($sprintf, 'DESCRIPTION', $self->{description}) - if $self->{description}; + push @m, sprintf($sprintf, 'DESCRIPTION', $self->description) + if $self->description; my $sprintf2 = " %-12s %s (%s)\n"; my($userid); - if ($userid = $self->{'CPAN_USERID'} || $self->{'userid'}){ + if ($userid = $self->cpan_userid || $self->userid){ my $author; if ($author = CPAN::Shell->expand('Author',$userid)) { my $email = ""; @@ -4076,10 +4659,10 @@ sub as_string { ); } } - push @m, sprintf($sprintf, 'CPAN_VERSION', $self->{CPAN_VERSION}) - if $self->{CPAN_VERSION}; - push @m, sprintf($sprintf, 'CPAN_FILE', $self->{CPAN_FILE}) - if $self->{CPAN_FILE}; + push @m, sprintf($sprintf, 'CPAN_VERSION', $self->cpan_version) + if $self->cpan_version; + push @m, sprintf($sprintf, 'CPAN_FILE', $self->cpan_file) + if $self->cpan_file; my $sprintf3 = " %-12s %1s%1s%1s%1s (%s,%s,%s,%s)\n"; my(%statd,%stats,%statl,%stati); @statd{qw,? i c a b R M S,} = qw,unknown idea @@ -4096,24 +4679,28 @@ sub as_string { push @m, sprintf( $sprintf3, 'DSLI_STATUS', - $self->{statd}, - $self->{stats}, - $self->{statl}, - $self->{stati}, - $statd{$self->{statd}}, - $stats{$self->{stats}}, - $statl{$self->{statl}}, - $stati{$self->{stati}} - ) if $self->{statd}; + $self->{RO}{statd}, + $self->{RO}{stats}, + $self->{RO}{statl}, + $self->{RO}{stati}, + $statd{$self->{RO}{statd}}, + $stats{$self->{RO}{stats}}, + $statl{$self->{RO}{statl}}, + $stati{$self->{RO}{stati}} + ) if $self->{RO}{statd}; my $local_file = $self->inst_file; if ($local_file) { $self->{MANPAGE} ||= $self->manpage_headline($local_file); } my($item); - for $item (qw/MANPAGE CONTAINS/) { + for $item (qw/MANPAGE/) { push @m, sprintf($sprintf, $item, $self->{$item}) if exists $self->{$item}; } + for $item (qw/CONTAINS/) { + push @m, sprintf($sprintf, $item, join(" ",@{$self->{$item}})) + if exists $self->{$item} && @{$self->{$item}}; + } push @m, sprintf($sprintf, 'INST_FILE', $local_file || "(not installed)"); push @m, sprintf($sprintf, 'INST_VERSION', @@ -4124,7 +4711,7 @@ sub as_string { sub manpage_headline { my($self,$local_file) = @_; my(@local_file) = $local_file; - $local_file =~ s/\.pm\z/.pod/; + $local_file =~ s/\.pm(?!\n)\Z/.pod/; push @local_file, $local_file; my(@result,$locf); for $locf (@local_file) { @@ -4151,19 +4738,23 @@ sub manpage_headline { #-> sub CPAN::Module::cpan_file ; sub cpan_file { my $self = shift; - CPAN->debug($self->id) if $CPAN::DEBUG; - unless (defined $self->{'CPAN_FILE'}) { + CPAN->debug(sprintf "id[%s]", $self->id) if $CPAN::DEBUG; + unless (defined $self->{RO}{CPAN_FILE}) { CPAN::Index->reload; } - if (exists $self->{'CPAN_FILE'} && defined $self->{'CPAN_FILE'}){ - return $self->{'CPAN_FILE'}; - } elsif (exists $self->{'userid'} && defined $self->{'userid'}) { - my $fullname = $CPAN::META->instance(CPAN::Author, - $self->{'userid'})->fullname; - my $email = $CPAN::META->instance(CPAN::Author, - $self->{'userid'})->email; + if (exists $self->{RO}{CPAN_FILE} && defined $self->{RO}{CPAN_FILE}){ + return $self->{RO}{CPAN_FILE}; + } elsif ( defined $self->userid ) { + my $fullname = $CPAN::META->instance("CPAN::Author", + $self->userid)->fullname; + my $email = $CPAN::META->instance("CPAN::Author", + $self->userid)->email; unless (defined $fullname && defined $email) { - return "Contact Author $self->{userid} (Try ``a $self->{userid}'')"; + my $userid = $self->userid; + return sprintf("Contact Author %s (Try 'a %s')", + $userid, + $userid, + ); } return "Contact Author $fullname <$email>"; } else { @@ -4176,17 +4767,14 @@ sub cpan_file { #-> sub CPAN::Module::cpan_version ; sub cpan_version { my $self = shift; - $self->{'CPAN_VERSION'} = 'undef' - unless defined $self->{'CPAN_VERSION'}; # I believe this is - # always a bug in the - # index and should be - # reported as such, - # but usually I find - # out such an error - # and do not want to - # provoke too many - # bugreports - $self->{'CPAN_VERSION'}; + + $self->{RO}{CPAN_VERSION} = 'undef' + unless defined $self->{RO}{CPAN_VERSION}; + # I believe this is always a bug in the index and should be reported + # as such, but usually I find out such an error and do not want to + # provoke too many bugreports + + $self->{RO}{CPAN_VERSION}; } #-> sub CPAN::Module::force ; @@ -4198,7 +4786,9 @@ sub force { #-> sub CPAN::Module::rematein ; sub rematein { my($self,$meth) = @_; - $self->debug($self->id) if $CPAN::DEBUG; + $CPAN::Frontend->myprint(sprintf("Running %s for module %s\n", + $meth, + $self->id)); my $cpan_file = $self->cpan_file; if ($cpan_file eq "N/A" || $cpan_file =~ /^Contact Author/){ $CPAN::Frontend->mywarn(sprintf qq{ @@ -4206,7 +4796,7 @@ sub rematein { Either the module has not yet been uploaded to CPAN, or it is temporary unavailable. Please contact the author to find out - more about the status. Try ``i %s''. + more about the status. Try 'i %s'. }, $self->id, $self->id, @@ -4215,8 +4805,9 @@ sub rematein { } my $pack = $CPAN::META->instance('CPAN::Distribution',$cpan_file); $pack->called_for($self->id); - $pack->force if exists $self->{'force_update'}; + $pack->force($meth) if exists $self->{'force_update'}; $pack->$meth(); + $pack->unforce if $pack->can("unforce") && exists $self->{'force_update'}; delete $self->{'force_update'}; } @@ -4229,9 +4820,16 @@ sub cvs_import { shift->rematein('cvs_import') } #-> sub CPAN::Module::get ; sub get { shift->rematein('get',@_); } #-> sub CPAN::Module::make ; -sub make { shift->rematein('make') } +sub make { + my $self = shift; + $self->rematein('make'); +} #-> sub CPAN::Module::test ; -sub test { shift->rematein('test') } +sub test { + my $self = shift; + $self->{badtestcnt} ||= 0; + $self->rematein('test',@_); +} #-> sub CPAN::Module::uptodate ; sub uptodate { my($self) = @_; @@ -4245,9 +4843,11 @@ sub uptodate { local($^W)=0; if ($inst_file && - $have >= $latest + ! CPAN::Version->vgt($latest, $have) ) { - return 1; + CPAN->debug("returning uptodate. inst_file[$inst_file] ". + "latest[$latest] have[$have]") if $CPAN::DEBUG; + return 1; } return; } @@ -4304,14 +4904,49 @@ sub inst_version { my($self) = @_; my $parsefile = $self->inst_file or return; local($^W) = 0 if $] < 5.00303 && $ExtUtils::MakeMaker::VERSION < 5.38; - # warn "HERE"; - my $have = MM->parse_version($parsefile) || "undef"; + my $have; + + # there was a bug in 5.6.0 that let lots of unini warnings out of + # parse_version. Fixed shortly after 5.6.0 by PMQS. We can remove + # the following workaround after 5.6.1 is out. + local($SIG{__WARN__}) = sub { my $w = shift; + return if $w =~ /uninitialized/i; + warn $w; + }; + + $have = MM->parse_version($parsefile) || "undef"; + $have =~ s/^ //; # since the %vd hack these two lines here are needed + $have =~ s/ $//; # trailing whitespace happens all the time + + # My thoughts about why %vd processing should happen here + + # Alt1 maintain it as string with leading v: + # read index files do nothing + # compare it use utility for compare + # print it do nothing + + # Alt2 maintain it as what is is + # read index files convert + # compare it use utility because there's still a ">" vs "gt" issue + # print it use CPAN::Version for print + + # Seems cleaner to hold it in memory as a string starting with a "v" + + # If the author of this module made a mistake and wrote a quoted + # "v1.13" instead of v1.13, we simply leave it at that with the + # effect that *we* will treat it like a v-tring while the rest of + # perl won't. Seems sensible when we consider that any action we + # could take now would just add complexity. + + $have = CPAN::Version->readable($have); + $have =~ s/\s*//g; # stringify to float around floating point issues - $have; + $have; # no stringify needed, \s* above matches always } package CPAN::Tarzip; +# CPAN::Tarzip::gzip sub gzip { my($class,$read,$write) = @_; if ($CPAN::META->has_inst("Compress::Zlib")) { @@ -4326,10 +4961,12 @@ sub gzip { $fhw->close; return 1; } else { - system("$CPAN::Config->{'gzip'} -c $read > $write")==0; + system("$CPAN::Config->{gzip} -c $read > $write")==0; } } + +# CPAN::Tarzip::gunzip sub gunzip { my($class,$read,$write) = @_; if ($CPAN::META->has_inst("Compress::Zlib")) { @@ -4346,26 +4983,43 @@ sub gunzip { $fhw->close; return 1; } else { - system("$CPAN::Config->{'gzip'} -dc $read > $write")==0; + system("$CPAN::Config->{gzip} -dc $read > $write")==0; } } + +# CPAN::Tarzip::gtest sub gtest { my($class,$read) = @_; - if ($CPAN::META->has_inst("Compress::Zlib")) { - my($buffer); + # After I had reread the documentation in zlib.h, I discovered that + # uncompressed files do not lead to an gzerror (anymore?). + if ( $CPAN::META->has_inst("Compress::Zlib") ) { + my($buffer,$len); + $len = 0; my $gz = Compress::Zlib::gzopen($read, "rb") - or $CPAN::Frontend->mydie("Cannot open $read: $!\n"); - 1 while $gz->gzread($buffer) > 0 ; - $CPAN::Frontend->mydie("Error reading from $read: $!\n") - if $gz->gzerror != Compress::Zlib::Z_STREAM_END(); - $gz->gzclose() ; - return 1; + or $CPAN::Frontend->mydie(sprintf("Cannot gzopen %s: %s\n", + $read, + $Compress::Zlib::gzerrno)); + while ($gz->gzread($buffer) > 0 ){ + $len += length($buffer); + $buffer = ""; + } + my $err = $gz->gzerror; + my $success = ! $err || $err == Compress::Zlib::Z_STREAM_END(); + if ($len == -s $read){ + $success = 0; + CPAN->debug("hit an uncompressed file") if $CPAN::DEBUG; + } + $gz->gzclose(); + CPAN->debug("err[$err]success[$success]") if $CPAN::DEBUG; + return $success; } else { - return system("$CPAN::Config->{'gzip'} -dt $read")==0; + return system("$CPAN::Config->{gzip} -dt $read")==0; } } + +# CPAN::Tarzip::TIEHANDLE sub TIEHANDLE { my($class,$file) = @_; my $ret; @@ -4375,7 +5029,7 @@ sub TIEHANDLE { die "Could not gzopen $file"; $ret = bless {GZ => $gz}, $class; } else { - my $pipe = "$CPAN::Config->{'gzip'} --decompress --stdout $file |"; + my $pipe = "$CPAN::Config->{gzip} --decompress --stdout $file |"; my $fh = FileHandle->new($pipe) or die "Could pipe[$pipe]: $!"; binmode $fh; $ret = bless {FH => $fh}, $class; @@ -4383,6 +5037,8 @@ sub TIEHANDLE { $ret; } + +# CPAN::Tarzip::READLINE sub READLINE { my($self) = @_; if (exists $self->{GZ}) { @@ -4397,6 +5053,8 @@ sub READLINE { } } + +# CPAN::Tarzip::READ sub READ { my($self,$ref,$length,$offset) = @_; die "read with offset not implemented" if defined $offset; @@ -4410,55 +5068,76 @@ sub READ { } } + +# CPAN::Tarzip::DESTROY sub DESTROY { - my($self) = @_; - if (exists $self->{GZ}) { - my $gz = $self->{GZ}; - $gz->gzclose(); - } else { - my $fh = $self->{FH}; - $fh->close if defined $fh; - } - undef $self; + my($self) = @_; + if (exists $self->{GZ}) { + my $gz = $self->{GZ}; + $gz->gzclose() if defined $gz; # hard to say if it is allowed + # to be undef ever. AK, 2000-09 + } else { + my $fh = $self->{FH}; + $fh->close if defined $fh; + } + undef $self; } + +# CPAN::Tarzip::untar sub untar { my($class,$file) = @_; - # had to disable, because version 0.07 seems to be buggy - if (MM->maybe_command($CPAN::Config->{'gzip'}) + if (0) { # makes changing order easier + } elsif (MM->maybe_command($CPAN::Config->{gzip}) && MM->maybe_command($CPAN::Config->{'tar'})) { - my $system = "$CPAN::Config->{'gzip'} --decompress --stdout " . - "< $file | $CPAN::Config->{tar} xvf -"; + my($system); + my $is_compressed = $class->gtest($file); + if ($is_compressed) { + $system = "$CPAN::Config->{gzip} --decompress --stdout " . + "< $file | $CPAN::Config->{tar} xvf -"; + } else { + $system = "$CPAN::Config->{tar} xvf $file"; + } if (system($system) != 0) { - # people find the most curious tar binaries that cannot handle - # pipes - my $system = "$CPAN::Config->{'gzip'} --decompress $file"; - if (system($system)==0) { - $CPAN::Frontend->myprint(qq{Uncompressed $file successfully\n}); - } else { - $CPAN::Frontend->mydie( - qq{Couldn\'t uncompress $file\n} - ); - } - $file =~ s/\.gz\z//; - $system = "$CPAN::Config->{tar} xvf $file"; - $CPAN::Frontend->myprint(qq{Using Tar:$system:\n}); - if (system($system)==0) { - $CPAN::Frontend->myprint(qq{Untarred $file successfully\n}); - } else { - $CPAN::Frontend->mydie(qq{Couldn\'t untar $file\n}); - } - return 1; + # people find the most curious tar binaries that cannot handle + # pipes + if ($is_compressed) { + (my $ungzf = $file) =~ s/\.gz(?!\n)\Z//; + if (CPAN::Tarzip->gunzip($file, $ungzf)) { + $CPAN::Frontend->myprint(qq{Uncompressed $file successfully\n}); + } else { + $CPAN::Frontend->mydie(qq{Couldn\'t uncompress $file\n}); + } + $file = $ungzf; + } + $system = "$CPAN::Config->{tar} xvf $file"; + $CPAN::Frontend->myprint(qq{Using Tar:$system:\n}); + if (system($system)==0) { + $CPAN::Frontend->myprint(qq{Untarred $file successfully\n}); + } else { + $CPAN::Frontend->mydie(qq{Couldn\'t untar $file\n}); + } + return 1; } else { - return 1; + return 1; } } elsif ($CPAN::META->has_inst("Archive::Tar") && $CPAN::META->has_inst("Compress::Zlib") ) { my $tar = Archive::Tar->new($file,1); - $tar->extract($tar->list_files); # I'm pretty sure we have nothing - # that isn't compressed + my $af; # archive file + my @af; + for $af ($tar->list_files) { + if ($af =~ m!^(/|\.\./)!) { + $CPAN::Frontend->mydie("ALERT: Archive contains ". + "illegal member [$af]"); + } + $CPAN::Frontend->myprint("$af\n"); + push @af, $af; + return if $CPAN::Signal; + } + $tar->extract(@af); ExtUtils::MM_MacOS::convert_files([$tar->list_files], 1) if ($^O eq 'MacOS'); @@ -4473,6 +5152,119 @@ is available. Can\'t continue. } } +sub unzip { + my($class,$file) = @_; + if ($CPAN::META->has_inst("Archive::Zip")) { + # blueprint of the code from Archive::Zip::Tree::extractTree(); + my $zip = Archive::Zip->new(); + my $status; + $status = $zip->read($file); + die "Read of file[$file] failed\n" if $status != Archive::Zip::AZ_OK(); + $CPAN::META->debug("Successfully read file[$file]") if $CPAN::DEBUG; + my @members = $zip->members(); + for my $member ( @members ) { + my $af = $member->fileName(); + if ($af =~ m!^(/|\.\./)!) { + $CPAN::Frontend->mydie("ALERT: Archive contains ". + "illegal member [$af]"); + } + my $status = $member->extractToFileNamed( $af ); + $CPAN::META->debug("af[$af]status[$status]") if $CPAN::DEBUG; + die "Extracting of file[$af] from zipfile[$file] failed\n" if + $status != Archive::Zip::AZ_OK(); + return if $CPAN::Signal; + } + return 1; + } else { + my $unzip = $CPAN::Config->{unzip} or + $CPAN::Frontend->mydie("Cannot unzip, no unzip program available"); + my @system = ($unzip, $file); + return system(@system) == 0; + } +} + + +package CPAN::Version; +# CPAN::Version::vcmp courtesy Jost Krieger +sub vcmp { + my($self,$l,$r) = @_; + local($^W) = 0; + CPAN->debug("l[$l] r[$r]") if $CPAN::DEBUG; + + return 0 if $l eq $r; # short circuit for quicker success + + if ($l=~/^v/ <=> $r=~/^v/) { + for ($l,$r) { + next if /^v/; + $_ = $self->float2vv($_); + } + } + + return + ($l ne "undef") <=> ($r ne "undef") || + ($] >= 5.006 && + $l =~ /^v/ && + $r =~ /^v/ && + $self->vstring($l) cmp $self->vstring($r)) || + $l <=> $r || + $l cmp $r; +} + +sub vgt { + my($self,$l,$r) = @_; + $self->vcmp($l,$r) > 0; +} + +sub vstring { + my($self,$n) = @_; + $n =~ s/^v// or die "CPAN::Version::vstring() called with invalid arg [$n]"; + pack "U*", split /\./, $n; +} + +# vv => visible vstring +sub float2vv { + my($self,$n) = @_; + my($rev) = int($n); + $rev ||= 0; + my($mantissa) = $n =~ /\.(\d{1,12})/; # limit to 12 digits so that + # architecture cannot + # influnce + $mantissa ||= 0; + $mantissa .= "0" while length($mantissa)%3; + my $ret = "v" . $rev; + while ($mantissa) { + $mantissa =~ s/(\d{1,3})// or + die "Panic: length>0 but not a digit? mantissa[$mantissa]"; + $ret .= ".".int($1); + } + # warn "n[$n]ret[$ret]"; + $ret; +} + +sub readable { + my($self,$n) = @_; + $n =~ /^([\w\-\+\.]+)/; + + return $1 if defined $1 && length($1)>0; + # if the first user reaches version v43, he will be treated as "+". + # We'll have to decide about a new rule here then, depending on what + # will be the prevailing versioning behavior then. + + if ($] < 5.006) { # or whenever v-strings were introduced + # we get them wrong anyway, whatever we do, because 5.005 will + # have already interpreted 0.2.4 to be "0.24". So even if he + # indexer sends us something like "v0.2.4" we compare wrongly. + + # And if they say v1.2, then the old perl takes it as "v12" + + $CPAN::Frontend->mywarn("Suspicious version string seen [$n]"); + return $n; + } + my $better = sprintf "v%vd", $n; + CPAN->debug("n[$n] better[$better]") if $CPAN::DEBUG; + return $better; +} + package CPAN; 1; @@ -4584,10 +5376,10 @@ also is run unconditionally. But for CPAN checks if an install is actually needed for it and prints I<module up to date> in the case that the distribution file containing -the module doesnE<39>t need to be updated. +the module doesn't need to be updated. CPAN also keeps track of what it has done within the current session -and doesnE<39>t try to build a package a second time regardless if it +and doesn't try to build a package a second time regardless if it succeeded or not. The C<force> command takes as a first argument the method to invoke (currently: C<make>, C<test>, or C<install>) and executes the command from scratch. @@ -4659,7 +5451,7 @@ installation. You start on one architecture with the help of a Bundle file produced earlier. CPAN installs the whole Bundle for you, but when you try to repeat the job on the second architecture, CPAN responds with a C<"Foo up to date"> message for all modules. So you -invoke CPAN's recompile on the second architecture and youE<39>re done. +invoke CPAN's recompile on the second architecture and you're done. Another popular use for C<recompile> is to act as a rescue in case your perl breaks binary compatibility. If one of the modules that CPAN uses @@ -4704,7 +5496,7 @@ so you would have to say The first example will be driven by an object of the class CPAN::Module, the second by an object of class CPAN::Distribution. -=head2 ProgrammerE<39>s interface +=head2 Programmer's interface If you do not enter the shell, the available shell commands are both available as methods (C<CPAN::Shell-E<gt>install(...)>) and as @@ -4749,8 +5541,11 @@ functionalities that are available in the shell. print "No VERSION in ", $mod->id, "\n"; } + # find out which distribution on CPAN contains a module: + print CPAN::Shell->expand("Module","Apache::Constants")->cpan_file + Or if you want to write a cronjob to watch The CPAN, you could list -all modules that need updating: +all modules that need updating. First a quick and dirty way: perl -e 'use CPAN; CPAN::Shell->r;' @@ -4880,17 +5675,18 @@ enthusiasm). =head2 Debugging -The debugging of this module is pretty difficult, because we have +The debugging of this module is a bit complex, because we have interferences of the software producing the indices on CPAN, of the mirroring process on CPAN, of packaging, of configuration, of synchronicity, and of bugs within CPAN.pm. -In interactive mode you can try "o debug" which will list options for -debugging the various parts of the package. The output may not be very -useful for you as it's just a by-product of my own testing, but if you -have an idea which part of the package may have a bug, it's sometimes -worth to give it a try and send me more specific output. You should -know that "o debug" has built-in completion support. +For code debugging in interactive mode you can try "o debug" which +will list options for debugging the various parts of the code. You +should know that "o debug" has built-in completion support. + +For data debugging there is the C<dump> command which takes the same +arguments as make/test/install and outputs the object's Data::Dumper +dump. =head2 Floppy, Zip, Offline Mode @@ -4918,7 +5714,10 @@ defined: build_cache size of cache for directories to build modules build_dir locally accessible directory to build modules index_expire after this many days refetch index files + cache_metadata use serializer to cache metadata cpan_home local directory reserved for this package + dontload_hash anonymous hash: modules in the keys will not be + loaded by the CPAN::has_inst() routine gzip location of external program gzip inactivity_timeout breaks interactive Makefile.PLs after this many seconds inactivity. Set to 0 to never break. @@ -4973,7 +5772,8 @@ works like the corresponding perl commands. =head2 Note on urllist parameter's format urllist parameters are URLs according to RFC 1738. We do a little -guessing if your URL is not compliant, but if you have problems with file URLs, please try the correct format. Either: +guessing if your URL is not compliant, but if you have problems with +file URLs, please try the correct format. Either: file://localhost/whatever/ftp/pub/CPAN/ @@ -5048,7 +5848,13 @@ unattained. =head1 WORKING WITH CPAN.pm BEHIND FIREWALLS Thanks to Graham Barr for contributing the following paragraphs about -the interaction between perl, and various firewall configurations. +the interaction between perl, and various firewall configurations. For +further informations on firewalls, it is recommended to consult the +documentation that comes with the ncftp program. If you are unable to +go through the firewall with a simple Perl setup, it is very likely +that you can configure ncftp so that it works for your firewall. + +=head2 Three basic types of firewalls Firewalls can be categorized into three basic types. @@ -5091,7 +5897,7 @@ There are two that I can think off. =item SOCKS If you are using a SOCKS firewall you will need to compile perl and link -it with the SOCKS library, this is what is normally called a ``socksified'' +it with the SOCKS library, this is what is normally called a 'socksified' perl. With this executable you will be able to connect to servers outside the firewall as if it is not there. @@ -5105,12 +5911,120 @@ special compiling is need as you can access hosts directly. =back +=head2 Configuring lynx or ncftp for going through a firewall + +If you can go through your firewall with e.g. lynx, presumably with a +command such as + + /usr/local/bin/lynx -pscott:tiger + +then you would configure CPAN.pm with the command + + o conf lynx "/usr/local/bin/lynx -pscott:tiger" + +That's all. Similarly for ncftp or ftp, you would configure something +like + + o conf ncftp "/usr/bin/ncftp -f /home/scott/ncftplogin.cfg" + +Your milage may vary... + +=head1 FAQ + +=over + +=item 1) I installed a new version of module X but CPAN keeps saying, + I have the old version installed + +Most probably you B<do> have the old version installed. This can +happen if a module installs itself into a different directory in the +@INC path than it was previously installed. This is not really a +CPAN.pm problem, you would have the same problem when installing the +module manually. The easiest way to prevent this behaviour is to add +the argument C<UNINST=1> to the C<make install> call, and that is why +many people add this argument permanently by configuring + + o conf make_install_arg UNINST=1 + +=item 2) So why is UNINST=1 not the default? + +Because there are people who have their precise expectations about who +may install where in the @INC path and who uses which @INC array. In +fine tuned environments C<UNINST=1> can cause damage. + +=item 3) When I install bundles or multiple modules with one command + there is too much output to keep track of + +You may want to configure something like + + o conf make_arg "| tee -ai /root/.cpan/logs/make.out" + o conf make_install_arg "| tee -ai /root/.cpan/logs/make_install.out" + +so that STDOUT is captured in a file for later inspection. + + +=item 4) I am not root, how can I install a module in a personal + directory? + +You will most probably like something like this: + + o conf makepl_arg "LIB=~/myperl/lib \ + INSTALLMAN1DIR=~/myperl/man/man1 \ + INSTALLMAN3DIR=~/myperl/man/man3" + install Sybase::Sybperl + +You can make this setting permanent like all C<o conf> settings with +C<o conf commit>. + +You will have to add ~/myperl/man to the MANPATH environment variable +and also tell your perl programs to look into ~/myperl/lib, e.g. by +including + + use lib "$ENV{HOME}/myperl/lib"; + +or setting the PERL5LIB environment variable. + +Another thing you should bear in mind is that the UNINST parameter +should never be set if you are not root. + +=item 5) How to get a package, unwrap it, and make a change before + building it? + + look Sybase::Sybperl + +=item 6) I installed a Bundle and had a couple of fails. When I + retried, everything resolved nicely. Can this be fixed to work + on first try? + +The reason for this is that CPAN does not know the dependencies of all +modules when it starts out. To decide about the additional items to +install, it just uses data found in the generated Makefile. An +undetected missing piece breaks the process. But it may well be that +your Bundle installs some prerequisite later than some depending item +and thus your second try is able to resolve everything. Please note, +CPAN.pm does not know the dependency tree in advance and cannot sort +the queue of things to install in a topologically correct order. It +resolves perfectly well IFF all modules declare the prerequisites +correctly with the PREREQ_PM attribute to MakeMaker. For bundles which +fail and you need to install often, it is recommended sort the Bundle +definition file manually. It is planned to improve the metadata +situation for dependencies on CPAN in general, but this will still +take some time. + +=item 7) In our intranet we have many modules for internal use. How + can I integrate these modules with CPAN.pm but without uploading + the modules to CPAN? + +Have a look at the CPAN::Site module. + +=back + =head1 BUGS We should give coverage for B<all> of the CPAN and not just the PAUSE part, right? In this discussion CPAN and PAUSE have become equal -- -but they are not. PAUSE is authors/ and modules/. CPAN is PAUSE plus -the clpa/, doc/, misc/, ports/, src/, scripts/. +but they are not. PAUSE is authors/, modules/ and scripts/. CPAN is +PAUSE plus the clpa/, doc/, misc/, ports/, and src/. Future development should be directed towards a better integration of the other parts. diff --git a/lib/CPAN/FirstTime.pm b/lib/CPAN/FirstTime.pm index 0e795da4fb..9f8366e073 100644 --- a/lib/CPAN/FirstTime.pm +++ b/lib/CPAN/FirstTime.pm @@ -16,7 +16,7 @@ use FileHandle (); use File::Basename (); use File::Path (); use vars qw($VERSION); -$VERSION = substr q$Revision: 1.38 $, 10; +$VERSION = substr q$Revision: 1.46 $, 10; =head1 NAME @@ -149,7 +149,7 @@ next question. print qq{ How big should the disk cache be for keeping the build directories -with all the intermediate files? +with all the intermediate files\? }; @@ -174,6 +174,20 @@ disable the cache scanning with 'never'. } while ($ans ne 'atstart' && $ans ne 'never'); $CPAN::Config->{scan_cache} = $ans; + print qq{ + +To considerably speed up the initial CPAN shell startup, it is +possible to use Storable to create a cache of metadata. If Storable +is not available, the normal index mechanism will be used. + +}; + + defined($default = $CPAN::Config->{cache_metadata}) or $default = 1; + do { + $ans = prompt("Cache metadata (yes/no)?", ($default ? 'yes' : 'no')); + } while ($ans !~ /^\s*[yn]/i); + $CPAN::Config->{cache_metadata} = ($ans =~ /^\s*y/i ? 1 : 0); + # # prerequisites_policy # Do we follow PREREQ_PM? @@ -188,7 +202,7 @@ policy to one of the three values. }; - $default = $CPAN::Config->{prerequisites_policy} || 'follow'; + $default = $CPAN::Config->{prerequisites_policy} || 'ask'; do { $ans = prompt("Policy on building prerequisites (follow, ask or ignore)?", @@ -361,7 +375,8 @@ sub conf_sites { File::Copy::copy($m,$mby) or die "Could not update $mby: $!"; } my $loopcount = 0; - while () { + local $^T = time; + while ($mby) { if ( ! -f $mby ){ print qq{You have no $mby I\'m trying to fetch one @@ -383,6 +398,7 @@ sub conf_sites { } } read_mirrored_by($mby); + bring_your_own(); } sub find_exe { @@ -424,7 +440,7 @@ sub picklist { } sub read_mirrored_by { - my($local) = @_; + my $local = shift or return; my(%all,$url,$expected_size,$default,$ans,$host,$dst,$country,$continent,@location); my $fh = FileHandle->new; $fh->open($local) or die "Couldn't open $local: $!"; @@ -512,25 +528,33 @@ http: -- that host a CPAN mirror. @urls = picklist (\@urls, $prompt, $default); foreach (@urls) { s/ \(.*\)//; } - %seen = map (($_ => 1), @urls); + push @{$CPAN::Config->{urllist}}, @urls; +} +sub bring_your_own { + my %seen = map (($_ => 1), @{$CPAN::Config->{urllist}}); + my($ans,@urls); do { - $ans = prompt ("Enter another URL or RETURN to quit:", ""); + my $prompt = "Enter another URL or RETURN to quit:"; + unless (%seen) { + $prompt = qq{CPAN.pm needs at least one URL where it can fetch CPAN files from. + +Please enter your CPAN site:}; + } + $ans = prompt ($prompt, ""); if ($ans) { - $ans =~ s|/?$|/|; # has to end with one slash + $ans =~ s|/?\z|/|; # has to end with one slash $ans = "file:$ans" unless $ans =~ /:/; # without a scheme is a file: if ($ans =~ /^\w+:\/./) { - push @urls, $ans - unless $seen{$ans}; - } - else { + push @urls, $ans unless $seen{$ans}++; + } else { print qq{"$ans" doesn\'t look like an URL at first sight. I\'ll ignore it for now. You can add it to $INC{'CPAN/MyConfig.pm'} later if you\'re sure it\'s right.\n}; } } - } while $ans; + } while $ans || !%seen; push @{$CPAN::Config->{urllist}}, @urls; # xxx delete or comment these out when you're happy that it works diff --git a/lib/Class/Struct.pm b/lib/Class/Struct.pm index 63eddac739..ac1fb4736d 100644 --- a/lib/Class/Struct.pm +++ b/lib/Class/Struct.pm @@ -168,8 +168,7 @@ sub struct { $cnt = 0; foreach $name (@methods){ if ( do { no strict 'refs'; defined &{$class . "::$name"} } ) { - warnings::warn "function '$name' already defined, overrides struct accessor method" - if warnings::enabled(); + warnings::warnif("function '$name' already defined, overrides struct accessor method"); } else { $pre = $pst = $cmt = $sel = ''; diff --git a/lib/Cwd.pm b/lib/Cwd.pm index d86428cdb8..727959132c 100644 --- a/lib/Cwd.pm +++ b/lib/Cwd.pm @@ -66,30 +66,38 @@ kept up to date if all packages which use chdir import it from Cwd. =cut -## use strict; +use strict; use Carp; -$VERSION = '2.03'; +our $VERSION = '2.03'; -require Exporter; -@ISA = qw(Exporter); -@EXPORT = qw(cwd getcwd fastcwd fastgetcwd); -@EXPORT_OK = qw(chdir abs_path fast_abs_path realpath fast_realpath); +use base qw/ Exporter /; +our @EXPORT = qw(cwd getcwd fastcwd fastgetcwd); +our @EXPORT_OK = qw(chdir abs_path fast_abs_path realpath fast_realpath); # The 'natural and safe form' for UNIX (pwd may be setuid root) sub _backtick_pwd { - my $cwd; - chop($cwd = `pwd`); + my $cwd = `pwd`; + # `pwd` may fail e.g. if the disk is full + chomp($cwd) if defined $cwd; $cwd; } # Since some ports may predefine cwd internally (e.g., NT) # we take care not to override an existing definition for cwd(). -*cwd = \&_backtick_pwd unless defined &cwd; +unless(defined &cwd) { + # The pwd command is not available in some chroot(2)'ed environments + if(grep { -x "$_/pwd" } split(':', $ENV{PATH})) { + *cwd = \&_backtick_pwd; + } + else { + *cwd = \&getcwd; + } +} # By Brandon S. Allbery @@ -156,7 +164,7 @@ sub fastcwd { my $chdir_init = 0; sub chdir_init { - if ($ENV{'PWD'} and $^O ne 'os2' and $^O ne 'dos') { + if ($ENV{'PWD'} and $^O ne 'os2' and $^O ne 'dos' and $^O ne 'MSWin32') { my($dd,$di) = stat('.'); my($pd,$pi) = stat($ENV{'PWD'}); if (!defined $dd or !defined $pd or $di != $pi or $dd != $pd) { @@ -164,10 +172,12 @@ sub chdir_init { } } else { - $ENV{'PWD'} = cwd(); + my $wd = cwd(); + $wd = Win32::GetFullPathName($wd) if $^O eq 'MSWin32'; + $ENV{'PWD'} = $wd; } # Strip an automounter prefix (where /tmp_mnt/foo/bar == /foo/bar) - if ($ENV{'PWD'} =~ m|(/[^/]+(/[^/]+/[^/]+))(.*)|s) { + if ($^O ne 'MSWin32' and $ENV{'PWD'} =~ m|(/[^/]+(/[^/]+/[^/]+))(.*)|s) { my($pd,$pi) = stat($2); my($dd,$di) = stat($1); if (defined $pd and defined $dd and $di == $pi and $dd == $pd) { @@ -178,11 +188,17 @@ sub chdir_init { } sub chdir { - my $newdir = shift || ''; # allow for no arg (chdir to HOME dir) - $newdir =~ s|///*|/|g; + my $newdir = @? ? shift : ''; # allow for no arg (chdir to HOME dir) + $newdir =~ s|///*|/|g unless $^O eq 'MSWin32'; chdir_init() unless $chdir_init; return 0 unless CORE::chdir $newdir; - if ($^O eq 'VMS') { return $ENV{'PWD'} = $ENV{'DEFAULT'} } + if ($^O eq 'VMS') { + return $ENV{'PWD'} = $ENV{'DEFAULT'} + } + elsif ($^O eq 'MSWin32') { + $ENV{'PWD'} = Win32::GetFullPathName($newdir); + return 1; + } if ($newdir =~ m#^/#s) { $ENV{'PWD'} = $newdir; @@ -200,46 +216,70 @@ sub chdir { 1; } +# Taken from Cwd.pm It is really getcwd with an optional +# parameter instead of '.' +# -# By Jeff "japhy" Pinyan (07/23/2000) -# usage: abs_path(PATHNAME) -# see the docs - -sub abs_path { - my $base = @_ ? $_[0] : "."; - my $path = ""; - my $file; - - do { - my @devino = (stat($base))[0,1] or - carp("stat($base): $!"), return; - - $base .= "/.."; +sub abs_path +{ + my $start = @_ ? shift : '.'; + my($dotdots, $cwd, @pst, @cst, $dir, @tst); - opendir PREV, $base or carp("opendir($base): $!"), return; - while (defined($file = readdir PREV)) { - next if $file eq "." or $file eq ".."; - my @entry = (lstat("$base/$file"))[0,1] or - carp("lstat($base/$file): $!"), return; - last if $devino[0] == $entry[0] and $devino[1] == $entry[1]; + unless (@cst = stat( $start )) + { + carp "stat($start): $!"; + return ''; } - closedir PREV; - - $path = (defined $file and $file) . "/$path"; - } while defined $file; - - length($path) > 1 and chop $path; - return $path; + $cwd = ''; + $dotdots = $start; + do + { + $dotdots .= '/..'; + @pst = @cst; + unless (opendir(PARENT, $dotdots)) + { + carp "opendir($dotdots): $!"; + return ''; + } + unless (@cst = stat($dotdots)) + { + carp "stat($dotdots): $!"; + closedir(PARENT); + return ''; + } + if ($pst[0] == $cst[0] && $pst[1] == $cst[1]) + { + $dir = undef; + } + else + { + do + { + unless (defined ($dir = readdir(PARENT))) + { + carp "readdir($dotdots): $!"; + closedir(PARENT); + return ''; + } + $tst[0] = $pst[0]+1 unless (@tst = lstat("$dotdots/$dir")) + } + while ($dir eq '.' || $dir eq '..' || $tst[0] != $pst[0] || + $tst[1] != $pst[1]); + } + $cwd = (defined $dir ? "$dir" : "" ) . "/$cwd" ; + closedir(PARENT); + } while (defined $dir); + chop($cwd) unless $cwd eq '/'; # drop the trailing / + $cwd; } - # added function alias for those of us more # used to the libc function. --tchrist 27-Jan-00 *realpath = \&abs_path; sub fast_abs_path { my $cwd = getcwd(); - my $path = shift || '.'; + my $path = @_ ? shift : '.'; CORE::chdir($path) || croak "Cannot chdir to $path:$!"; my $realpath = getcwd(); CORE::chdir($cwd) || croak "Cannot chdir back to $cwd:$!"; @@ -308,12 +348,17 @@ sub _qnx_cwd { } sub _qnx_abs_path { - my $path = shift || '.'; + my $path = @_ ? shift : '.'; my $realpath=`/usr/bin/fullpath -t $path`; chop $realpath; return $realpath; } +sub _epoc_cwd { + $ENV{'PWD'} = EPOC::getcwd(); + return $ENV{'PWD'}; +} + { no warnings; # assignments trigger 'subroutine redefined' warning @@ -362,6 +407,12 @@ sub _qnx_abs_path { *fastcwd = \&cwd; *abs_path = \&fast_abs_path; } + elsif ($^O eq 'epoc') { + *getcwd = \&_epoc_cwd; + *fastgetcwd = \&_epoc_cwd; + *fastcwd = \&_epoc_cwd; + *abs_path = \&fast_abs_path; + } } # package main; eval join('',<DATA>) || die $@; # quick test diff --git a/lib/ExtUtils/Command.pm b/lib/ExtUtils/Command.pm index bccc76cc19..aec4013d02 100644 --- a/lib/ExtUtils/Command.pm +++ b/lib/ExtUtils/Command.pm @@ -177,7 +177,7 @@ Creates directory, including any parent directories. sub mkpath { - File::Path::mkpath([expand_wildcards()],1,0777); + File::Path::mkpath([expand_wildcards()],0,0777); } =item test_f file diff --git a/lib/ExtUtils/Embed.pm b/lib/ExtUtils/Embed.pm index b649b6b77b..c4167a3026 100644 --- a/lib/ExtUtils/Embed.pm +++ b/lib/ExtUtils/Embed.pm @@ -193,7 +193,7 @@ sub ldopts { @path = $path ? split(/:/, $path) : @INC; push(@potential_libs, @link_args) if scalar @link_args; - push(@potential_libs, $Config{libs}) if defined $std; + push(@potential_libs, $Config{perllibs}) if defined $std; push(@mods, static_ext()) if $std; @@ -484,7 +484,7 @@ B<xsinit()> uses the xsi_* functions to generate most of it's code. =head1 EXAMPLES For examples on how to use B<ExtUtils::Embed> for building C/C++ applications -with embedded perl, see the eg/ directory and L<perlembed>. +with embedded perl, see L<perlembed>. =head1 SEE ALSO diff --git a/lib/ExtUtils/Install.pm b/lib/ExtUtils/Install.pm index 36c72219a9..92db8c908a 100644 --- a/lib/ExtUtils/Install.pm +++ b/lib/ExtUtils/Install.pm @@ -16,6 +16,28 @@ my $splitchar = $^O eq 'VMS' ? '|' : ($^O eq 'os2' || $^O eq 'dos') ? ';' : ':'; my @PERL_ENV_LIB = split $splitchar, defined $ENV{'PERL5LIB'} ? $ENV{'PERL5LIB'} : $ENV{'PERLLIB'} || ''; my $Inc_uninstall_warn_handler; +# install relative to here + +my $INSTALL_ROOT = $ENV{PERL_INSTALL_ROOT}; + +use File::Spec; + +sub install_rooted_file { + if (defined $INSTALL_ROOT) { + MY->catfile($INSTALL_ROOT, $_[0]); + } else { + $_[0]; + } +} + +sub install_rooted_dir { + if (defined $INSTALL_ROOT) { + MY->catdir($INSTALL_ROOT, $_[0]); + } else { + $_[0]; + } +} + #our(@EXPORT, @ISA, $Is_VMS); #use strict; @@ -55,8 +77,9 @@ sub install { opendir DIR, $source_dir_or_file or next; for (readdir DIR) { next if $_ eq "." || $_ eq ".." || $_ eq ".exists"; - if (-w $hash{$source_dir_or_file} || - mkpath($hash{$source_dir_or_file})) { + my $targetdir = install_rooted_dir($hash{$source_dir_or_file}); + if (-w $targetdir || + mkpath($targetdir)) { last; } else { warn "Warning: You do not have permissions to " . @@ -66,7 +89,8 @@ sub install { } closedir DIR; } - $packlist->read($pack{"read"}) if (-f $pack{"read"}); + my $tmpfile = install_rooted_file($pack{"read"}); + $packlist->read($tmpfile) if (-f $tmpfile); my $cwd = cwd(); my($source); @@ -80,11 +104,13 @@ sub install { #October 1997: we want to install .pm files into archlib if #there are any files in arch. So we depend on having ./blib/arch #hardcoded here. - my $targetroot = $hash{$source}; + + my $targetroot = install_rooted_dir($hash{$source}); + if ($source eq "blib/lib" and exists $hash{"blib/arch"} and directory_not_empty("blib/arch")) { - $targetroot = $hash{"blib/arch"}; + $targetroot = install_rooted_dir($hash{"blib/arch"}); print "Files found in blib/arch: installing files in blib/lib into architecture dependent library tree\n"; } chdir($source) or next; @@ -93,8 +119,9 @@ sub install { $atime,$mtime,$ctime,$blksize,$blocks) = stat; return unless -f _; return if $_ eq ".exists"; - my $targetdir = MY->catdir($targetroot,$File::Find::dir); - my $targetfile = MY->catfile($targetdir,$_); + my $targetdir = MY->catdir($targetroot, $File::Find::dir); + my $origfile = $_; + my $targetfile = MY->catfile($targetdir, $_); my $diff = 0; if ( -f $targetfile && -s _ == $size) { @@ -129,16 +156,16 @@ sub install { } else { inc_uninstall($_,$File::Find::dir,$verbose,0); # nonono set to 0 } - $packlist->{$targetfile}++; + $packlist->{$origfile}++; }, "."); chdir($cwd) or Carp::croak("Couldn't chdir to $cwd: $!"); } if ($pack{'write'}) { - $dir = dirname($pack{'write'}); + $dir = install_rooted_dir(dirname($pack{'write'})); mkpath($dir,0,0755); print "Writing $pack{'write'}\n"; - $packlist->write($pack{'write'}); + $packlist->write(install_rooted_file($pack{'write'})); } } @@ -289,18 +316,20 @@ sub add { } sub DESTROY { - my $self = shift; - my($file,$i,$plural); - foreach $file (sort keys %$self) { - $plural = @{$self->{$file}} > 1 ? "s" : ""; - print "## Differing version$plural of $file found. You might like to\n"; - for (0..$#{$self->{$file}}) { - print "rm ", $self->{$file}[$_], "\n"; - $i++; + unless(defined $INSTALL_ROOT) { + my $self = shift; + my($file,$i,$plural); + foreach $file (sort keys %$self) { + $plural = @{$self->{$file}} > 1 ? "s" : ""; + print "## Differing version$plural of $file found. You might like to\n"; + for (0..$#{$self->{$file}}) { + print "rm ", $self->{$file}[$_], "\n"; + $i++; + } + } + $plural = $i>1 ? "all those files" : "this file"; + print "## Running 'make install UNINST=1' will unlink $plural for you.\n"; } - } - $plural = $i>1 ? "all those files" : "this file"; - print "## Running 'make install UNINST=1' will unlink $plural for you.\n"; } 1; diff --git a/lib/ExtUtils/Liblist.pm b/lib/ExtUtils/Liblist.pm index 640978a214..b22363bb5b 100644 --- a/lib/ExtUtils/Liblist.pm +++ b/lib/ExtUtils/Liblist.pm @@ -17,18 +17,18 @@ sub ext { sub _unix_os2_ext { my($self,$potential_libs, $verbose) = @_; - if ($^O =~ 'os2' and $Config{libs}) { + if ($^O =~ 'os2' and $Config{perllibs}) { # Dynamic libraries are not transitive, so we may need including # the libraries linked against perl.dll again. $potential_libs .= " " if $potential_libs; - $potential_libs .= $Config{libs}; + $potential_libs .= $Config{perllibs}; } return ("", "", "", "") unless $potential_libs; warn "Potential libraries are '$potential_libs':\n" if $verbose; my($so) = $Config{'so'}; - my($libs) = $Config{'libs'}; + my($libs) = $Config{'perllibs'}; my $Config_libext = $Config{lib_ext} || ".a"; @@ -198,7 +198,7 @@ sub _win32_ext { my $BC = 1 if $cc =~ /^bcc/i; my $GC = 1 if $cc =~ /^gcc/i; my $so = $Config{'so'}; - my $libs = $Config{'libs'}; + my $libs = $Config{'perllibs'}; my $libpth = $Config{'libpth'}; my $libext = $Config{'lib_ext'} || ".lib"; @@ -342,7 +342,7 @@ sub _vms_ext { $self->{CCFLAS} || $Config{'ccflags'}; @crtls = ( ($dbgqual =~ m-/Debug-i ? $Config{'dbgprefix'} : '') . 'PerlShr/Share' ); - push(@crtls, grep { not /\(/ } split /\s+/, $Config{'libs'}); + push(@crtls, grep { not /\(/ } split /\s+/, $Config{'perllibs'}); push(@crtls, grep { not /\(/ } split /\s+/, $Config{'libc'}); # In general, we pass through the basic libraries from %Config unchanged. # The one exception is that if we're building in the Perl source tree, and @@ -628,7 +628,7 @@ Unix-OS/2 version in several respects: =item * If C<$potential_libs> is empty, the return value will be empty. -Otherwise, the libraries specified by C<$Config{libs}> (see Config.pm) +Otherwise, the libraries specified by C<$Config{perllibs}> (see Config.pm) will be appended to the list of C<$potential_libs>. The libraries will be searched for in the directories specified in C<$potential_libs>, C<$Config{libpth}>, and in C<$Config{installarchlib}/CORE>. @@ -672,7 +672,7 @@ Entries in C<$potential_libs> beginning with a colon and followed by alphanumeric characters are treated as flags. Unknown flags will be ignored. An entry that matches C</:nodefault/i> disables the appending of default -libraries found in C<$Config{libs}> (this should be only needed very rarely). +libraries found in C<$Config{perllibs}> (this should be only needed very rarely). An entry that matches C</:nosearch/i> disables all searching for the libraries specified after it. Translation of C<-Lfoo> and @@ -682,7 +682,7 @@ valid files or directories. An entry that matches C</:search/i> reenables searching for the libraries specified after it. You can put it at the end to -enable searching for default libraries specified by C<$Config{libs}>. +enable searching for default libraries specified by C<$Config{perllibs}>. =item * diff --git a/lib/ExtUtils/MM_Unix.pm b/lib/ExtUtils/MM_Unix.pm index 8e337d97fa..52862c598e 100644 --- a/lib/ExtUtils/MM_Unix.pm +++ b/lib/ExtUtils/MM_Unix.pm @@ -305,8 +305,8 @@ sub cflags { $libperl ||= $self->{LIBPERL_A} || "libperl$self->{LIB_EXT}" ; $libperl =~ s/\.\$\(A\)$/$self->{LIB_EXT}/; - @cflags{qw(cc ccflags optimize large split shellflags)} - = @Config{qw(cc ccflags optimize large split shellflags)}; + @cflags{qw(cc ccflags optimize shellflags)} + = @Config{qw(cc ccflags optimize shellflags)}; my($optdebug) = ""; $cflags{shellflags} ||= ''; @@ -341,16 +341,12 @@ sub cflags { optimize=\"$cflags{optimize}\" perltype=\"$cflags{perltype}\" optdebug=\"$cflags{optdebug}\" - large=\"$cflags{large}\" - split=\"$cflags{'split'}\" eval '$prog' echo cc=\$cc echo ccflags=\$ccflags echo optimize=\$optimize echo perltype=\$perltype echo optdebug=\$optdebug - echo large=\$large - echo split=\$split `; my($line); foreach $line (@o){ @@ -368,7 +364,7 @@ sub cflags { $cflags{optimize} = $optdebug; } - for (qw(ccflags optimize perltype large split)) { + for (qw(ccflags optimize perltype)) { $cflags{$_} =~ s/^\s+//; $cflags{$_} =~ s/\s+/ /g; $cflags{$_} =~ s/\s+$//; @@ -411,8 +407,6 @@ sub cflags { CCFLAGS = $self->{CCFLAGS} OPTIMIZE = $self->{OPTIMIZE} PERLTYPE = $self->{PERLTYPE} -LARGE = $self->{LARGE} -SPLIT = $self->{SPLIT} MPOLLUTE = $pollute }; @@ -483,7 +477,7 @@ sub const_cccmd { return '' unless $self->needs_linking(); return $self->{CONST_CCCMD} = q{CCCMD = $(CC) -c $(INC) $(CCFLAGS) $(OPTIMIZE) \\ - $(PERLTYPE) $(LARGE) $(SPLIT) $(MPOLLUTE) $(DEFINE_VERSION) \\ + $(PERLTYPE) $(MPOLLUTE) $(DEFINE_VERSION) \\ $(XS_DEFINE_VERSION)}; } @@ -812,7 +806,7 @@ DIST_DEFAULT = $dist_default =item dist_basics (o) -Defines the targets distclean, distcheck, skipcheck, manifest. +Defines the targets distclean, distcheck, skipcheck, manifest, veryclean. =cut @@ -840,6 +834,11 @@ manifest : $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -MExtUtils::Manifest=mkmanifest \\ -e mkmanifest }; + + push @m, q{ +veryclean : realclean + $(RM_F) *~ *.orig */*~ */*.orig +}; join "", @m; } @@ -1071,13 +1070,7 @@ $(INST_DYNAMIC): $(OBJECT) $(MYEXTLIB) $(BOOTSTRAP) $(INST_ARCHAUTODIR)/.exists } $ldfrom = "-all $ldfrom -none" if ($^O eq 'dec_osf'); - # Brain dead solaris linker does not use LD_RUN_PATH? - # This fixes dynamic extensions which need shared libs - my $ldrun = ''; - $ldrun = join ' ', map "-R$_", split /:/, $self->{LD_RUN_PATH} - if ($^O eq 'solaris'); - - # The IRIX linker also doesn't use LD_RUN_PATH + # The IRIX linker doesn't use LD_RUN_PATH $ldrun = qq{-rpath "$self->{LD_RUN_PATH}"} if ($^O eq 'irix' && $self->{LD_RUN_PATH}); @@ -1147,9 +1140,9 @@ in these dirs: @$dirs "; } - foreach $dir (@$dirs){ - next unless defined $dir; # $self->{PERL_SRC} may be undefined - foreach $name (@$names){ + foreach $name (@$names){ + foreach $dir (@$dirs){ + next unless defined $dir; # $self->{PERL_SRC} may be undefined my ($abs, $val); if ($self->file_name_is_absolute($name)) { # /foo/bar $abs = $name; @@ -2371,7 +2364,7 @@ $(MAKE_APERL_FILE) : $(FIRST_MAKEFILE) # The front matter of the linkcommand... $linkcmd = join ' ', "\$(CC)", - grep($_, @Config{qw(large split ldflags ccdlflags)}); + grep($_, @Config{qw(ldflags ccdlflags)}); $linkcmd =~ s/\s+/ /g; $linkcmd =~ s,(perl\.exp),\$(PERL_INC)/$1,; @@ -2454,7 +2447,7 @@ MAP_PERLINC = @{$perlinc || []} MAP_STATIC = ", join(" \\\n\t", reverse sort keys %static), " -MAP_PRELIBS = $Config::Config{libs} $Config::Config{cryptlib} +MAP_PRELIBS = $Config::Config{perllibs} $Config::Config{cryptlib} "; if (defined $libperl) { @@ -2499,11 +2492,6 @@ MAP_LIBPERL = $libperl # SUNOS ld does not take the full path to a shared library my $llibperl = ($libperl)?'$(MAP_LIBPERL)':'-lperl'; - # Brain dead solaris linker does not use LD_RUN_PATH? - # This fixes dynamic extensions which need shared libs - my $ldfrom = ($^O eq 'solaris')? - join(' ', map "-R$_", split /:/, $self->{LD_RUN_PATH}):''; - push @m, " \$(MAP_TARGET) :: $tmp/perlmain\$(OBJ_EXT) \$(MAP_LIBPERL) \$(MAP_STATIC) \$(INST_ARCHAUTODIR)/extralibs.all \$(MAP_LINKCMD) -o \$\@ \$(OPTIMIZE) $tmp/perlmain\$(OBJ_EXT) $ldfrom \$(MAP_STATIC) $llibperl `cat \$(INST_ARCHAUTODIR)/extralibs.all` \$(MAP_PRELIBS) @@ -3173,9 +3161,11 @@ form Foo/Bar and replaces the slash with C<::>. Returns the replacement. sub replace_manpage_separator { my($self,$man) = @_; if ($^O eq 'uwin') { - $man =~ s,/+,.,g; + $man =~ s,/+,.,g; + } elsif ($Is_Dos) { + $man =~ s,/+,__,g; } else { - $man =~ s,/+,::,g; + $man =~ s,/+,::,g; } $man; } @@ -3494,7 +3484,7 @@ WARN_IF_OLD_PACKLIST = $(PERL) -we 'exit unless -f $$ARGV[0];' \\ -e 'print "Please make sure the two installations are not conflicting\n";' UNINST=0 -VERBINST=1 +VERBINST=0 MOD_INSTALL = $(PERL) -I$(INST_LIB) -I$(PERL_LIB) -MExtUtils::Install \ -e "install({@ARGV},'$(VERBINST)',0,'$(UNINST)');" diff --git a/lib/ExtUtils/MM_VMS.pm b/lib/ExtUtils/MM_VMS.pm index d21a56acba..d966c7d236 100644 --- a/lib/ExtUtils/MM_VMS.pm +++ b/lib/ExtUtils/MM_VMS.pm @@ -1890,6 +1890,7 @@ $(OBJECT) : $(PERL_INC)iperlsys.h # We do NOT just update config.h because that is not sufficient. # An out of date config.h is not fatal but complains loudly! $(PERL_INC)config.h : $(PERL_SRC)config.sh + $(NOOP) $(PERL_ARCHLIB)Config.pm : $(PERL_SRC)config.sh $(NOECHO) Write Sys$Error "$(PERL_ARCHLIB)Config.pm may be out of date with config.h or genconfig.pl" diff --git a/lib/ExtUtils/Manifest.pm b/lib/ExtUtils/Manifest.pm index 8bb3fc8ebd..28b70539fc 100644 --- a/lib/ExtUtils/Manifest.pm +++ b/lib/ExtUtils/Manifest.pm @@ -238,7 +238,7 @@ sub cp { sub ln { my ($srcFile, $dstFile) = @_; - return &cp if $Is_VMS; + return &cp if $Is_VMS or ($^O eq 'MSWin32' and Win32::IsWin95()); link($srcFile, $dstFile); local($_) = $dstFile; # chmod a+r,go-w+X (except "X" only applies to u=x) my $mode= 0444 | (stat)[2] & 0700; diff --git a/lib/ExtUtils/typemap b/lib/ExtUtils/typemap index 0260678570..9961f2d61d 100644 --- a/lib/ExtUtils/typemap +++ b/lib/ExtUtils/typemap @@ -225,13 +225,13 @@ T_U_CHAR T_FLOAT sv_setnv($arg, (double)$var); T_NV - sv_setnv($arg, (double)$var); + sv_setnv($arg, (NV)$var); T_DOUBLE sv_setnv($arg, (double)$var); T_PV sv_setpv((SV*)$arg, $var); T_PTR - sv_setiv($arg, (IV)$var); + sv_setiv($arg, PTR2IV($var)); T_PTRREF sv_setref_pv($arg, Nullch, (void*)$var); T_REF_IV_REF diff --git a/lib/File/Copy.pm b/lib/File/Copy.pm index e6cf786034..8d1d7834c9 100644 --- a/lib/File/Copy.pm +++ b/lib/File/Copy.pm @@ -221,7 +221,7 @@ File::Copy - Copy files or filehandles use POSIX; use File::Copy cp; - $n=FileHandle->new("/dev/null","r"); + $n = FileHandle->new("/a/file","r"); cp($n,"x");' =head1 DESCRIPTION diff --git a/lib/File/Find.pm b/lib/File/Find.pm index a9f190c722..6e6e462767 100644 --- a/lib/File/Find.pm +++ b/lib/File/Find.pm @@ -42,6 +42,22 @@ Reports the name of a directory only AFTER all its entries have been reported. Entry point finddepth() is a shortcut for specifying C<{ bydepth => 1 }> in the first argument of find(). +=item C<preprocess> + +The value should be a code reference. This code reference is used to +preprocess a directory; it is called after readdir() but before the loop that +calls the wanted() function. It is called with a list of strings and is +expected to return a list of strings. The code can be used to sort the +strings alphabetically, numerically, or to filter out directory entries based +on their name alone. + +=item C<postprocess> + +The value should be a code reference. It is invoked just before leaving the +current directory. It is called in void context with no arguments. The name +of the current directory is in $File::Find::dir. This hook is handy for +summarizing a directory, such as calculating its disk usage. + =item C<follow> Causes symbolic links to be followed. Since directory trees with symbolic @@ -55,7 +71,7 @@ If either I<follow> or I<follow_fast> is in effect: =item * -It is guarantueed that an I<lstat> has been called before the user's +It is guaranteed that an I<lstat> has been called before the user's I<wanted()> function is called. This enables fast file checks involving S< _>. =item * @@ -67,11 +83,10 @@ pathname of the file with all symbolic links resolved =item C<follow_fast> -This is similar to I<follow> except that it may report some files -more than once. It does detect cycles however. -Since only symbolic links have to be hashed, this is -much cheaper both in space and time. -If processing a file more than once (by the user's I<wanted()> function) +This is similar to I<follow> except that it may report some files more +than once. It does detect cycles, however. Since only symbolic links +have to be hashed, this is much cheaper both in space and time. If +processing a file more than once (by the user's I<wanted()> function) is worse than just taking time, the option I<follow> should be used. =item C<follow_skip> @@ -97,14 +112,14 @@ C<$_> will be the same as C<$File::Find::name>. If find is used in taint-mode (-T command line switch or if EUID != UID or if EGID != GID) then internally directory names have to be untainted before they can be cd'ed to. Therefore they are checked against a regular -expression I<untaint_pattern>. Note, that all names passed to the +expression I<untaint_pattern>. Note that all names passed to the user's I<wanted()> function are still tainted. =item C<untaint_pattern> See above. This should be set using the C<qr> quoting operator. The default is set to C<qr|^([-+@\w./]+)$|>. -Note that the paranthesis which are vital. +Note that the parantheses are vital. =item C<untaint_skip> @@ -116,15 +131,15 @@ are skipped. The default is to 'die' in such a case. The wanted() function does whatever verifications you want. C<$File::Find::dir> contains the current directory name, and C<$_> the current filename within that directory. C<$File::Find::name> contains -the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when -the function is called, unless C<no_chdir> was specified. -When <follow> or <follow_fast> are in effect there is also a -C<$File::Find::fullname>. -The function may set C<$File::Find::prune> to prune the tree -unless C<bydepth> was specified. -Unless C<follow> or C<follow_fast> is specified, for compatibility -reasons (find.pl, find2perl) there are in addition the following globals -available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>, +the complete pathname to the file. You are chdir()'d to +C<$File::Find::dir> when the function is called, unless C<no_chdir> +was specified. When <follow> or <follow_fast> are in effect, there is +also a C<$File::Find::fullname>. The function may set +C<$File::Find::prune> to prune the tree unless C<bydepth> was +specified. Unless C<follow> or C<follow_fast> is specified, for +compatibility reasons (find.pl, find2perl) there are in addition the +following globals available: C<$File::Find::topdir>, +C<$File::Find::topdev>, C<$File::Find::topino>, C<$File::Find::topmode> and C<$File::Find::topnlink>. This library is useful for the C<find2perl> tool, which when fed, @@ -161,7 +176,7 @@ module. =head1 CAVEAT -Be aware that the option to follow symblic links can be dangerous. +Be aware that the option to follow symbolic links can be dangerous. Depending on the structure of the directory tree (including symbolic links to directories) you might traverse a given (physical) directory more than once (only if C<follow_fast> is in effect). @@ -183,7 +198,8 @@ require File::Basename; my %SLnkSeen; my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, - $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat); + $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, + $pre_process, $post_process); sub contract_name { my ($cdir,$fn) = @_; @@ -282,6 +298,8 @@ sub _find_opt { my $cwd_untainted = $cwd; $wanted_callback = $wanted->{wanted}; $bydepth = $wanted->{bydepth}; + $pre_process = $wanted->{preprocess}; + $post_process = $wanted->{postprocess}; $no_chdir = $wanted->{no_chdir}; $full_check = $wanted->{follow}; $follow = $full_check || $wanted->{follow_fast}; @@ -464,6 +482,8 @@ sub _find_dir($$$) { } @filenames = readdir DIR; closedir(DIR); + @filenames = &$pre_process(@filenames) if $pre_process; + push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; if ($nlink == 2 && !$avoid_nlink) { # This dir has no subdirectories. @@ -518,7 +538,11 @@ sub _find_dir($$$) { } $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; - if ( $nlink < 0 ) { # must be finddepth, report dirname now + if ( $nlink == -2 ) { + $name = $dir = $p_dir; + $_ = "."; + &$post_process; # End-of-directory processing + } elsif ( $nlink < 0 ) { # must be finddepth, report dirname now $name = $dir_name; if ( substr($name,-2) eq '/.' ) { $name =~ s|/\.$||; diff --git a/lib/File/Temp.pm b/lib/File/Temp.pm index aac8b7a93c..a35104400d 100644 --- a/lib/File/Temp.pm +++ b/lib/File/Temp.pm @@ -4,6 +4,40 @@ package File::Temp; File::Temp - return name and handle of a temporary file safely +=begin __INTERNALS + +=head1 PORTABILITY + +This module is designed to be portable across operating systems +and it currently supports Unix, VMS, DOS, OS/2 and Windows. When +porting to a new OS there are generally three main issues +that have to be solved: + +=over 4 + +=item * + +Can the OS unlink an open file? If it can't then the +C<_can_unlink_opened_file> method should be modified. + +=item * + +Are the return values from C<stat> reliable? By default all the +return values from C<stat> are compared when unlinking a temporary +file using the filename and the handle. Operating systems other than +unix do not always have valid entries in all fields. If C<unlink0> fails +then the C<stat> comparison should be modified accordingly. + +=item * + +Security. Systems that can not support a test for the sticky bit +on a directory can not use the MEDIUM and HIGH security tests. +The C<_can_do_level> method should be modified accordingly. + +=back + +=end __INTERNALS + =head1 SYNOPSIS use File::Temp qw/ tempfile tempdir /; @@ -61,12 +95,12 @@ filehandle of a temporary file. The tempdir() function can be used to create a temporary directory. The security aspect of temporary file creation is emphasized such that -a filehandle and filename are returned together. This helps guarantee that -a race condition can not occur where the temporary file is created by another process -between checking for the existence of the file and its -opening. Additional security levels are provided to check, for -example, that the sticky bit is set on world writable directories. -See L<"safe_level"> for more information. +a filehandle and filename are returned together. This helps guarantee +that a race condition can not occur where the temporary file is +created by another process between checking for the existence of the +file and its opening. Additional security levels are provided to +check, for example, that the sticky bit is set on world writable +directories. See L<"safe_level"> for more information. For compatibility with popular C library functions, Perl implementations of the mkstemp() family of functions are provided. These are, mkstemp(), @@ -91,8 +125,9 @@ use File::Spec 0.8; use File::Path qw/ rmtree /; use Fcntl 1.03; use Errno qw( EEXIST ENOENT ENOTDIR EINVAL ); +require VMS::Stdio if $^O eq 'VMS'; -# Need the Symbol package if we are running older perl +# Need the Symbol package if we are running older perl require Symbol if $] < 5.006; @@ -131,7 +166,7 @@ Exporter::export_tags('POSIX','mktemp'); # Version number -$VERSION = '0.09'; +$VERSION = '0.10'; # This is a list of characters that can be used in random filenames @@ -162,12 +197,25 @@ use constant HIGH => 2; my $OPENFLAGS = O_CREAT | O_EXCL | O_RDWR; -for my $oflag (qw/FOLLOW BINARY LARGEFILE EXLOCK NOINHERIT TEMPORARY/) { +for my $oflag (qw/ FOLLOW BINARY LARGEFILE EXLOCK NOINHERIT /) { my ($bit, $func) = (0, "Fcntl::O_" . $oflag); no strict 'refs'; $OPENFLAGS |= $bit if eval { $bit = &$func(); 1 }; } +# On some systems the O_TEMPORARY flag can be used to tell the OS +# to automatically remove the file when it is closed. This is fine +# in most cases but not if tempfile is called with UNLINK=>0 and +# the filename is requested -- in the case where the filename is to +# be passed to another routine. This happens on windows. We overcome +# this by using a second open flags variable + +my $OPENTEMPFLAGS = $OPENFLAGS; +for my $oflag (qw/ TEMPORARY /) { + my ($bit, $func) = (0, "Fcntl::O_" . $oflag); + no strict 'refs'; + $OPENTEMPFLAGS |= $bit if eval { $bit = &$func(); 1 }; +} # INTERNAL ROUTINES - not to be used outside of package @@ -190,7 +238,13 @@ for my $oflag (qw/FOLLOW BINARY LARGEFILE EXLOCK NOINHERIT TEMPORARY/) { # default is 0 # "suffixlen" => number of characters at end of PATH to be ignored. # default is 0. +# "unlink_on_close" => indicates that, if possible, the OS should remove +# the file as soon as it is closed. Usually indicates +# use of the O_TEMPORARY flag to sysopen. +# Usually irrelevant on unix + # "open" and "mkdir" can not both be true +# "unlink_on_close" is not used when "mkdir" is true. # The default options are equivalent to mktemp(). @@ -214,6 +268,7 @@ sub _gettemp { "open" => 0, "mkdir" => 0, "suffixlen" => 0, + "unlink_on_close" => 0, ); # Read the template @@ -285,6 +340,7 @@ sub _gettemp { if ($^O eq 'VMS') { # need volume to avoid relative dir spec $parent = File::Spec->catdir($volume, @dirs[0..$#dirs-1]); + $parent = 'sys$disk:[]' if $parent eq ''; } else { # Put it back together without the last one @@ -338,21 +394,6 @@ sub _gettemp { } - # Calculate the flags that we wish to use for the sysopen - # Some of these are not always available -# my $openflags; -# if ($options{"open"}) { - # Default set -# $openflags = O_CREAT | O_EXCL | O_RDWR; - -# for my $oflag (qw/FOLLOW BINARY LARGEFILE EXLOCK NOINHERIT TEMPORARY/) { -# my ($bit, $func) = (0, "Fcntl::O_" . $oflag); -# no strict 'refs'; -# $openflags |= $bit if eval { $bit = &$func(); 1 }; -# } - -# } - # Now try MAX_TRIES time to open the file for (my $i = 0; $i < MAX_TRIES; $i++) { @@ -377,7 +418,18 @@ sub _gettemp { umask(066); # Attempt to open the file - if ( sysopen($fh, $path, $OPENFLAGS, 0600) ) { + my $open_success = undef; + if ( $^O eq 'VMS' and $options{"unlink_on_close"} ) { + # make it auto delete on close by setting FAB$V_DLT bit + $fh = VMS::Stdio::vmssysopen($path, $OPENFLAGS, 0600, 'fop=dlt'); + $open_success = $fh; + } else { + my $flags = ( $options{"unlink_on_close"} ? + $OPENTEMPFLAGS : + $OPENFLAGS ); + $open_success = sysopen($fh, $path, $flags, 0600); + } + if ( $open_success ) { # Reset umask umask($umask); @@ -557,8 +609,12 @@ sub _is_safe { # Check to see whether owner is neither superuser (or a system uid) nor me # Use the real uid from the $< variable # UID is in [4] - if ( $info[4] > File::Temp->top_system_uid() && $info[4] != $<) { - carp "Directory owned neither by root nor the current user"; + if ($info[4] > File::Temp->top_system_uid() && $info[4] != $<) { + + Carp::cluck(sprintf "uid=$info[4] topuid=%s \$<=$< path='$path'", + File::Temp->top_system_uid()); + + carp "Directory owned neither by root nor the current user."; return 0; } @@ -657,7 +713,7 @@ sub _is_verysafe { sub _can_unlink_opened_file { - if ($^O eq 'MSWin32' || $^O eq 'os2' || $^O eq 'VMS') { + if ($^O eq 'MSWin32' || $^O eq 'os2' || $^O eq 'VMS' || $^O eq 'dos') { return 0; } else { return 1; @@ -681,7 +737,7 @@ sub _can_do_level { return 1 if $level == STANDARD; # Currently, the systems that can do HIGH or MEDIUM are identical - if ( $^O eq 'MSWin32' || $^O eq 'os2') { + if ( $^O eq 'MSWin32' || $^O eq 'os2' || $^O eq 'cygwin' || $^O eq 'dos') { return 0; } else { return 1; @@ -703,7 +759,7 @@ sub _can_do_level { # - isdir (flag to indicate that we are being given a directory) # [and hence no filehandle] -# Status is not referred to since all the magic is done with and END block +# Status is not referred to since all the magic is done with an END block { # Will set up two lexical variables to contain all the files to be @@ -758,10 +814,12 @@ sub _can_do_level { if (-d $fname) { # Directory exists so store it + # first on VMS turn []foo into [.foo] for rmtree + $fname = VMS::Filespec::vmspath($fname) if $^O eq 'VMS'; push (@dirs_to_unlink, $fname); } else { - carp "Request to remove directory $fname could not be completed since it does not exists!\n"; + carp "Request to remove directory $fname could not be completed since it does not exist!\n"; } } else { @@ -818,6 +876,13 @@ But see the WARNING at the end. Translates the template as before except that a directory name is specified. + ($fh, $filename) = tempfile($template, UNLINK => 1); + +Return the filename and filehandle as before except that the file is +automatically removed when the program exits. Default is for the file +to be removed if a file handle is requested and to be kept if the +filename is requested. + If the template is not specified, a template is always automatically generated. This temporary file is placed in tmpdir() (L<File::Spec>) unless a directory is specified explicitly with the @@ -844,6 +909,8 @@ if warnings are turned on. Consider using the tmpnam() and mktemp() functions described elsewhere in this document if opening the file is not required. +Options can be combined as required. + =cut sub tempfile { @@ -854,9 +921,9 @@ sub tempfile { # Default options my %options = ( "DIR" => undef, # Directory prefix - "SUFFIX" => '', # Template suffix - "UNLINK" => 0, # Unlink file on exit - "OPEN" => 1, # Do not open file + "SUFFIX" => '', # Template suffix + "UNLINK" => 0, # Do not unlink file on exit + "OPEN" => 1, # Open file ); # Check to see whether we have an odd or even number of arguments @@ -873,6 +940,12 @@ sub tempfile { } + if ($options{"DIR"} and $^O eq 'VMS') { + + # on VMS turn []foo into [.foo] for concatenation + $options{"DIR"} = VMS::Filespec::vmspath($options{"DIR"}); + } + # Construct the template # Have a choice of trying to work around the mkstemp/mktemp/tmpnam etc @@ -909,8 +982,9 @@ sub tempfile { my ($fh, $path); croak "Error in tempfile() using $template" unless (($fh, $path) = _gettemp($template, - "open" => $options{'OPEN'}, + "open" => $options{'OPEN'}, "mkdir"=> 0 , + "unlink_on_close" => $options{'UNLINK'}, "suffixlen" => length($options{'SUFFIX'}), ) ); @@ -1023,8 +1097,9 @@ sub tempdir { if ($options{'TMPDIR'} || $options{'DIR'}) { # Strip parent directory from the filename - # + # # There is no filename at the end + $template = VMS::Filespec::vmspath($template) if $^O eq 'VMS'; my ($volume, $directories, undef) = File::Spec->splitpath( $template, 1); # Last directory is then our template @@ -1033,7 +1108,7 @@ sub tempdir { # Prepend the supplied directory or temp dir if ($options{"DIR"}) { - $template = File::Spec->catfile($options{"DIR"}, $template); + $template = File::Spec->catdir($options{"DIR"}, $template); } elsif ($options{TMPDIR}) { @@ -1314,7 +1389,7 @@ exits. No access to the filename is provided. sub tmpfile { - # Simply call tmpnam() in an array context + # Simply call tmpnam() in a list context my ($fh, $file) = tmpnam(); # Make sure file is removed when filehandle is closed @@ -1402,8 +1477,8 @@ the time the end block is executed since the deferred removal may not have access to the filehandle). Additionally, on Windows NT not all the fields returned by stat() can -be compared. For example, the C<dev> and C<rdev> fields seem to be different -and also. Also, it seems that the size of the file returned by stat() +be compared. For example, the C<dev> and C<rdev> fields seem to be +different. Also, it seems that the size of the file returned by stat() does not always agree, with C<stat(FH)> being more accurate than C<stat(filename)>, presumably because of caching issues even when using autoflush (this is usually overcome by waiting a while after @@ -1456,6 +1531,10 @@ sub unlink0 { @okstat = (1,2,3,4,5,7,8,9,10); } elsif ($^O eq 'os2') { @okstat = (0, 2..$#fh); + } elsif ($^O eq 'VMS') { # device and file ID are sufficient + @okstat = (0, 1); + } elsif ($^O eq 'dos') { + @okstat = (0,2..7,11..$#fh); } # Now compare each entry explicitly by number @@ -1483,7 +1562,9 @@ sub unlink0 { print "Link count = $fh[3] \n" if $DEBUG; # Make sure that the link count is zero - return ( $fh[3] == 0 ? 1 : 0); + # - Cygwin provides deferred unlinking, however, + # on Win9x the link count remains 1 + return ( $fh[3] == 0 or $^O eq 'cygwin' ? 1 : 0); } else { _deferred_unlink($fh, $path, 0); @@ -1653,7 +1734,7 @@ descriptor before passing it to another process. =head1 HISTORY Originally began life in May 1999 as an XS interface to the system -mkstemp() function. In March 2000, the mkstemp() code was +mkstemp() function. In March 2000, the OpenBSD mkstemp() code was translated to Perl for total control of the code's security checking, to ensure the presence of the function regardless of operating system and to help with portability. diff --git a/lib/FileHandle.pm b/lib/FileHandle.pm index 34c3475d9c..5eb3a89adc 100644 --- a/lib/FileHandle.pm +++ b/lib/FileHandle.pm @@ -238,12 +238,12 @@ See L<perlfunc/printf>. =item $fh->getline This works like <$fh> described in L<perlop/"I/O Operators"> -except that it's more readable and can be safely called in an -array context but still returns just one line. +except that it's more readable and can be safely called in a +list context but still returns just one line. =item $fh->getlines -This works like <$fh> when called in an array context to +This works like <$fh> when called in a list context to read all the remaining lines in a file, except that it's more readable. It will also croak() if accidentally called in a scalar context. diff --git a/lib/Getopt/Long.pm b/lib/Getopt/Long.pm index f474c7c4a9..2bb0548a2c 100644 --- a/lib/Getopt/Long.pm +++ b/lib/Getopt/Long.pm @@ -2,12 +2,12 @@ package Getopt::Long; -# RCS Status : $Id: GetoptLong.pl,v 2.24 2000-03-14 21:28:52+01 jv Exp $ +# RCS Status : $Id: GetoptLong.pl,v 2.25 2000-08-28 21:45:17+02 jv Exp $ # Author : Johan Vromans # Created On : Tue Sep 11 15:00:12 1990 # Last Modified By: Johan Vromans -# Last Modified On: Tue Mar 14 21:28:40 2000 -# Update Count : 721 +# Last Modified On: Mon Jul 31 21:21:13 2000 +# Update Count : 739 # Status : Released ################ Copyright ################ @@ -36,7 +36,7 @@ BEGIN { require 5.004; use Exporter (); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); - $VERSION = "2.23"; + $VERSION = 2.24; @ISA = qw(Exporter); @EXPORT = qw(&GetOptions $REQUIRE_ORDER $PERMUTE $RETURN_IN_ORDER); @@ -52,7 +52,7 @@ use vars qw($error $debug $major_version $minor_version); use vars qw($autoabbrev $getopt_compat $ignorecase $bundling $order $passthrough); # Official invisible variables. -use vars qw($genprefix $caller); +use vars qw($genprefix $caller $gnu_compat); # Public subroutines. sub Configure (@); @@ -89,6 +89,27 @@ sub ConfigDefaults () { $error = 0; # error tally $ignorecase = 1; # ignore case when matching options $passthrough = 0; # leave unrecognized options alone + $gnu_compat = 0; # require --opt=val if value is optional +} + +# Override import. +sub import { + my $pkg = shift; # package + my @syms = (); # symbols to import + my @config = (); # configuration + my $dest = \@syms; # symbols first + for ( @_ ) { + if ( $_ eq ':config' ) { + $dest = \@config; # config next + next; + } + push (@$dest, $_); # push + } + # Hide one level and call super. + local $Exporter::ExportLevel = 1; + $pkg->SUPER::import(@syms); + # And configure. + Configure (@config) if @config; } ################ Initialization ################ @@ -100,6 +121,87 @@ sub ConfigDefaults () { ConfigDefaults(); +################ OO Interface ################ + +package Getopt::Long::Parser; + +# NOTE: The object oriented routines use $error for thread locking. +my $_lock = sub { + lock ($Getopt::Long::error) if $] >= 5.005 +}; + +# Store a copy of the default configuration. Since ConfigDefaults has +# just been called, what we get from Configure is the default. +my $default_config = do { + &$_lock; + Getopt::Long::Configure () +}; + +sub new { + my $that = shift; + my $class = ref($that) || $that; + my %atts = @_; + + # Register the callers package. + my $self = { caller => (caller)[0] }; + + bless ($self, $class); + + # Process config attributes. + if ( defined $atts{config} ) { + &$_lock; + my $save = Getopt::Long::Configure ($default_config, @{$atts{config}}); + $self->{settings} = Getopt::Long::Configure ($save); + delete ($atts{config}); + } + # Else use default config. + else { + $self->{settings} = $default_config; + } + + if ( %atts ) { # Oops + Getopt::Long::Croak(__PACKAGE__.": unhandled attributes: ". + join(" ", sort(keys(%atts)))); + } + + $self; +} + +sub configure { + my ($self) = shift; + + &$_lock; + + # Restore settings, merge new settings in. + my $save = Getopt::Long::Configure ($self->{settings}, @_); + + # Restore orig config and save the new config. + $self->{settings} = Configure ($save); +} + +sub getoptions { + my ($self) = shift; + + &$_lock; + + # Restore config settings. + my $save = Getopt::Long::Configure ($self->{settings}); + + # Call main routine. + my $ret = 0; + $Getopt::Long::caller = $self->{caller}; + eval { $ret = Getopt::Long::GetOptions (@_); }; + + # Restore saved settings. + Getopt::Long::Configure ($save); + + # Handle errors and return value. + die ($@) if $@; + return $ret; +} + +package Getopt::Long; + ################ Package return ################ 1; @@ -108,12 +210,12 @@ __END__ ################ AutoLoading subroutines ################ -# RCS Status : $Id: GetoptLongAl.pl,v 2.27 2000-03-17 09:07:26+01 jv Exp $ +# RCS Status : $Id: GetoptLongAl.pl,v 2.29 2000-08-28 21:56:18+02 jv Exp $ # Author : Johan Vromans # Created On : Fri Mar 27 11:50:30 1998 # Last Modified By: Johan Vromans -# Last Modified On: Fri Mar 17 09:00:09 2000 -# Update Count : 55 +# Last Modified On: Fri Jul 28 19:12:29 2000 +# Update Count : 97 # Status : Released sub GetOptions { @@ -137,13 +239,14 @@ sub GetOptions { print STDERR ("GetOpt::Long $Getopt::Long::VERSION ", "called from package \"$pkg\".", "\n ", - 'GetOptionsAl $Revision: 2.27 $ ', + 'GetOptionsAl $Revision: 2.29 $ ', "\n ", "ARGV: (@ARGV)", "\n ", "autoabbrev=$autoabbrev,". "bundling=$bundling,", "getopt_compat=$getopt_compat,", + "gnu_compat=$gnu_compat,", "order=$order,", "\n ", "ignorecase=$ignorecase,", @@ -200,7 +303,7 @@ sub GetOptions { next; } - # Match option spec. Allow '?' as an alias. + # Match option spec. Allow '?' as an alias only. if ( $opt !~ /^((\w+[-\w]*)(\|(\?|\w[-\w]*)?)*)?([!~+]|[=:][infse][@%]?)?$/ ) { $error .= "Error in option spec: \"$opt\"\n"; next; @@ -208,14 +311,22 @@ sub GetOptions { my ($o, $c, $a) = ($1, $5); $c = '' unless defined $c; + # $linko keeps track of the primary name the user specified. + # This name will be used for the internal or external linkage. + # In other words, if the user specifies "FoO|BaR", it will + # match any case combinations of 'foo' and 'bar', but if a global + # variable needs to be set, it will be $opt_FoO in the exact case + # as specified. + my $linko; + if ( ! defined $o ) { # empty -> '-' option - $opctl{$o = ''} = $c; + $opctl{$linko = $o = ''} = $c; } else { # Handle alias names my @o = split (/\|/, $o); - my $linko = $o = $o[0]; + $linko = $o = $o[0]; # Force an alias if the option name is not locase. $a = $o unless $o eq lc($o); $o = lc ($o) @@ -254,18 +365,18 @@ sub GetOptions { $a = $_; } } - $o = $linko; } # If no linkage is supplied in the @optionlist, copy it from # the userlinkage if available. if ( defined $userlinkage ) { unless ( @optionlist > 0 && ref($optionlist[0]) ) { - if ( exists $userlinkage->{$o} && ref($userlinkage->{$o}) ) { - print STDERR ("=> found userlinkage for \"$o\": ", - "$userlinkage->{$o}\n") + if ( exists $userlinkage->{$linko} && + ref($userlinkage->{$linko}) ) { + print STDERR ("=> found userlinkage for \"$linko\": ", + "$userlinkage->{$linko}\n") if $debug; - unshift (@optionlist, $userlinkage->{$o}); + unshift (@optionlist, $userlinkage->{$linko}); } else { # Do nothing. Being undefined will be handled later. @@ -276,13 +387,13 @@ sub GetOptions { # Copy the linkage. If omitted, link to global variable. if ( @optionlist > 0 && ref($optionlist[0]) ) { - print STDERR ("=> link \"$o\" to $optionlist[0]\n") + print STDERR ("=> link \"$linko\" to $optionlist[0]\n") if $debug; if ( ref($optionlist[0]) =~ /^(SCALAR|CODE)$/ ) { - $linkage{$o} = shift (@optionlist); + $linkage{$linko} = shift (@optionlist); } elsif ( ref($optionlist[0]) =~ /^(ARRAY)$/ ) { - $linkage{$o} = shift (@optionlist); + $linkage{$linko} = shift (@optionlist); $opctl{$o} .= '@' if $opctl{$o} ne '' and $opctl{$o} !~ /\@$/; $bopctl{$o} .= '@' @@ -290,7 +401,7 @@ sub GetOptions { $bopctl{$o} ne '' and $bopctl{$o} !~ /\@$/; } elsif ( ref($optionlist[0]) =~ /^(HASH)$/ ) { - $linkage{$o} = shift (@optionlist); + $linkage{$linko} = shift (@optionlist); $opctl{$o} .= '%' if $opctl{$o} ne '' and $opctl{$o} !~ /\%$/; $bopctl{$o} .= '%' @@ -304,22 +415,22 @@ sub GetOptions { else { # Link to global $opt_XXX variable. # Make sure a valid perl identifier results. - my $ov = $o; + my $ov = $linko; $ov =~ s/\W/_/g; if ( $c =~ /@/ ) { - print STDERR ("=> link \"$o\" to \@$pkg","::opt_$ov\n") + print STDERR ("=> link \"$linko\" to \@$pkg","::opt_$ov\n") if $debug; - eval ("\$linkage{\$o} = \\\@".$pkg."::opt_$ov;"); + eval ("\$linkage{\$linko} = \\\@".$pkg."::opt_$ov;"); } elsif ( $c =~ /%/ ) { - print STDERR ("=> link \"$o\" to \%$pkg","::opt_$ov\n") + print STDERR ("=> link \"$linko\" to \%$pkg","::opt_$ov\n") if $debug; - eval ("\$linkage{\$o} = \\\%".$pkg."::opt_$ov;"); + eval ("\$linkage{\$linko} = \\\%".$pkg."::opt_$ov;"); } else { - print STDERR ("=> link \"$o\" to \$$pkg","::opt_$ov\n") + print STDERR ("=> link \"$linko\" to \$$pkg","::opt_$ov\n") if $debug; - eval ("\$linkage{\$o} = \\\$".$pkg."::opt_$ov;"); + eval ("\$linkage{\$linko} = \\\$".$pkg."::opt_$ov;"); } } } @@ -382,7 +493,11 @@ sub GetOptions { next unless defined $opt; if ( defined $arg ) { - $opt = $aliases{$opt} if defined $aliases{$opt}; + if ( defined $aliases{$opt} ) { + print STDERR ("=> alias \"$opt\" -> \"$aliases{$opt}\"\n") + if $debug; + $opt = $aliases{$opt}; + } if ( defined $linkage{$opt} ) { print STDERR ("=> ref(\$L{$opt}) -> ", @@ -646,7 +761,7 @@ sub FindOption ($$$$$$$) { } # Apparently valid. $opt = $tryopt; - print STDERR ("=> found \"$type\" for ", $opt, "\n") if $debug; + print STDERR ("=> found \"$type\" for \"", $opt, "\"\n") if $debug; #### Determine argument status #### @@ -675,7 +790,16 @@ sub FindOption ($$$$$$$) { ($mand, $type, $dsttype, $key) = $type =~ /^(.)(.)([@%]?)$/; # Check if there is an option argument available. - if ( defined $optarg ? ($optarg eq '') + if ( $gnu_compat ) { + return (1, $opt, $optarg, $dsttype, $incr, $key) + if defined $optarg; + return (1, $opt, $type eq "s" ? '' : 0, $dsttype, $incr, $key) + if $mand eq ':'; + } + + # Check if there is an option argument available. + if ( defined $optarg + ? ($optarg eq '') : !(defined $rest || @ARGV > 0) ) { # Complain if this option needs an argument. if ( $mand eq "=" ) { @@ -684,10 +808,7 @@ sub FindOption ($$$$$$$) { $error++; undef $opt; } - if ( $mand eq ":" ) { - $arg = $type eq "s" ? '' : 0; - } - return (1, $opt,$arg,$dsttype,$incr,$key); + return (1, $opt, $type eq "s" ? '' : 0, $dsttype, $incr, $key); } # Get (possibly optional) argument. @@ -795,12 +916,12 @@ sub Configure (@) { my $prevconfig = [ $error, $debug, $major_version, $minor_version, $autoabbrev, $getopt_compat, $ignorecase, $bundling, $order, - $passthrough, $genprefix ]; + $gnu_compat, $passthrough, $genprefix ]; if ( ref($options[0]) eq 'ARRAY' ) { ( $error, $debug, $major_version, $minor_version, $autoabbrev, $getopt_compat, $ignorecase, $bundling, $order, - $passthrough, $genprefix ) = @{shift(@options)}; + $gnu_compat, $passthrough, $genprefix ) = @{shift(@options)}; } my $opt; @@ -811,8 +932,13 @@ sub Configure (@) { $action = 0; $try = $+; } - if ( $try eq 'default' or $try eq 'defaults' ) { - ConfigDefaults () if $action; + if ( ($try eq 'default' or $try eq 'defaults') && $action ) { + ConfigDefaults (); + } + elsif ( ($try eq 'posix_default' or $try eq 'posix_defaults') ) { + local $ENV{POSIXLY_CORRECT}; + $ENV{POSIXLY_CORRECT} = 1 if $action; + ConfigDefaults (); } elsif ( $try eq 'auto_abbrev' or $try eq 'autoabbrev' ) { $autoabbrev = $action; @@ -820,6 +946,17 @@ sub Configure (@) { elsif ( $try eq 'getopt_compat' ) { $getopt_compat = $action; } + elsif ( $try eq 'gnu_getopt' ) { + if ( $action ) { + $gnu_compat = 1; + $bundling = 1; + $getopt_compat = 0; + $permute = 1; + } + } + elsif ( $try eq 'gnu_compat' ) { + $gnu_compat = $action; + } elsif ( $try eq 'ignorecase' or $try eq 'ignore_case' ) { $ignorecase = $action; } @@ -841,14 +978,14 @@ sub Configure (@) { elsif ( $try eq 'pass_through' or $try eq 'passthrough' ) { $passthrough = $action; } - elsif ( $try =~ /^prefix=(.+)$/ ) { + elsif ( $try =~ /^prefix=(.+)$/ && $action ) { $genprefix = $1; # Turn into regexp. Needs to be parenthesized! $genprefix = "(" . quotemeta($genprefix) . ")"; eval { '' =~ /$genprefix/; }; Croak ("Getopt::Long: invalid pattern \"$genprefix\"") if $@; } - elsif ( $try =~ /^prefix_pattern=(.+)$/ ) { + elsif ( $try =~ /^prefix_pattern=(.+)$/ && $action ) { $genprefix = $1; # Parenthesize if needed. $genprefix = "(" . $genprefix . ")" @@ -930,7 +1067,7 @@ could use the more descriptive C<--long>. To distinguish between a bundle of single-character options and a long one, two dashes are used to precede the option name. Early implementations of long options used a plus C<+> instead. Also, option values could be specified either -like +like --size=24 @@ -943,7 +1080,7 @@ The C<+> form is now obsolete and strongly deprecated. =head1 Getting Started with Getopt::Long Getopt::Long is the Perl5 successor of C<newgetopt.pl>. This was -the firs Perl module that provided support for handling the new style +the first Perl module that provided support for handling the new style of command line options, hence the name Getopt::Long. This module also supports single-character options and bundling. In this case, the options are restricted to alphabetic characters only, and the @@ -1166,11 +1303,11 @@ requires a least C<--hea> and C<--hei> for the head and height options. =head2 Summary of Option Specifications Each option specifier consists of two parts: the name specification -and the argument specification. +and the argument specification. The name specification contains the name of the option, optionally followed by a list of alternative names separated by vertical bar -characters. +characters. length option name is "length" length|size|l name is "length", aliases are "size" and "l" @@ -1243,6 +1380,24 @@ considered an option on itself. =head1 Advanced Possibilities +=head2 Object oriented interface + +Getopt::Long can be used in an object oriented way as well: + + use Getopt::Long; + $p = new Getopt::Long::Parser; + $p->configure(...configuration options...); + if ($p->getoptions(...options descriptions...)) ... + +Configuration options can be passed to the constructor: + + $p = new Getopt::Long::Parser + config => [...configuration options...]; + +For thread safety, each method call will acquire an exclusive lock to +the Getopt::Long module. So don't call these methods from a callback +routine! + =head2 Documentation and help texts Getopt::Long encourages the use of Pod::Usage to produce help @@ -1365,7 +1520,7 @@ options, -vax -would set C<a>, C<v> and C<x>, but +would set C<a>, C<v> and C<x>, but --vax @@ -1423,8 +1578,8 @@ When applied to the following command line: arg1 --width=72 arg2 --width=60 arg3 -This will call -C<process("arg1")> while C<$width> is C<80>, +This will call +C<process("arg1")> while C<$width> is C<80>, C<process("arg2")> while C<$width> is C<72>, and C<process("arg3")> while C<$width> is C<60>. @@ -1436,10 +1591,15 @@ L<Configuring Getopt::Long>. Getopt::Long can be configured by calling subroutine Getopt::Long::Configure(). This subroutine takes a list of quoted -strings, each specifying a configuration option to be set, e.g. -C<ignore_case>, or reset, e.g. C<no_ignore_case>. Case does not +strings, each specifying a configuration option to be enabled, e.g. +C<ignore_case>, or disabled, e.g. C<no_ignore_case>. Case does not matter. Multiple calls to Configure() are possible. +Alternatively, as of version 2.24, the configuration options may be +passed together with the C<use> statement: + + use Getopt::Long qw(:config no_ignore_case bundling); + The following options are available: =over 12 @@ -1449,34 +1609,53 @@ The following options are available: This option causes all configuration options to be reset to their default values. +=item posix_default + +This option causes all configuration options to be reset to their +default values as if the environment variable POSIXLY_CORRECT had +been set. + =item auto_abbrev Allow option names to be abbreviated to uniqueness. -Default is set unless environment variable -POSIXLY_CORRECT has been set, in which case C<auto_abbrev> is reset. +Default is enabled unless environment variable +POSIXLY_CORRECT has been set, in which case C<auto_abbrev> is disabled. =item getopt_compat Allow C<+> to start options. -Default is set unless environment variable -POSIXLY_CORRECT has been set, in which case C<getopt_compat> is reset. +Default is enabled unless environment variable +POSIXLY_CORRECT has been set, in which case C<getopt_compat> is disabled. + +=item gnu_compat + +C<gnu_compat> controls whether C<--opt=> is allowed, and what it should +do. Without C<gnu_compat>, C<--opt=> gives an error. With C<gnu_compat>, +C<--opt=> will give option C<opt> and empty value. +This is the way GNU getopt_long() does it. + +=item gnu_getopt + +This is a short way of setting C<gnu_compat> C<bundling> C<permute> +C<no_getopt_compat>. With C<gnu_getopt>, command line handling should be +fully compatible with GNU getopt_long(). =item require_order Whether command line arguments are allowed to be mixed with options. -Default is set unless environment variable -POSIXLY_CORRECT has been set, in which case C<require_order> is reset. +Default is disabled unless environment variable +POSIXLY_CORRECT has been set, in which case C<require_order> is enabled. See also C<permute>, which is the opposite of C<require_order>. =item permute Whether command line arguments are allowed to be mixed with options. -Default is set unless environment variable -POSIXLY_CORRECT has been set, in which case C<permute> is reset. +Default is enabled unless environment variable +POSIXLY_CORRECT has been set, in which case C<permute> is disabled. Note that C<permute> is the opposite of C<require_order>. -If C<permute> is set, this means that +If C<permute> is enabled, this means that --foo arg1 --bar arg2 arg3 @@ -1493,7 +1672,7 @@ processed. The only exception is when C<--> is used: will call the call-back routine for arg1 and arg2, and terminate GetOptions() leaving C<"arg2"> in C<@ARGV>. -If C<require_order> is set, options processing +If C<require_order> is enabled, options processing terminates when the first non-option is encountered. --foo arg1 --bar arg2 arg3 @@ -1502,40 +1681,40 @@ is equivalent to --foo -- arg1 --bar arg2 arg3 -=item bundling (default: reset) +=item bundling (default: disabled) -Setting this option will allow single-character options to be bundled. +Enabling this option will allow single-character options to be bundled. To distinguish bundles from long option names, long options I<must> be introduced with C<--> and single-character options (and bundles) with C<->. -Note: resetting C<bundling> also resets C<bundling_override>. +Note: disabling C<bundling> also disables C<bundling_override>. -=item bundling_override (default: reset) +=item bundling_override (default: disabled) -If C<bundling_override> is set, bundling is enabled as with -C<bundling> but now long option names override option bundles. +If C<bundling_override> is enabled, bundling is enabled as with +C<bundling> but now long option names override option bundles. -Note: resetting C<bundling_override> also resets C<bundling>. +Note: disabling C<bundling_override> also disables C<bundling>. B<Note:> Using option bundling can easily lead to unexpected results, especially when mixing long options and bundles. Caveat emptor. -=item ignore_case (default: set) +=item ignore_case (default: enabled) -If set, case is ignored when matching long option names. Single +If enabled, case is ignored when matching long option names. Single character options will be treated case-sensitive. -Note: resetting C<ignore_case> also resets C<ignore_case_always>. +Note: disabling C<ignore_case> also disables C<ignore_case_always>. -=item ignore_case_always (default: reset) +=item ignore_case_always (default: disabled) When bundling is in effect, case is ignored on single-character -options also. +options also. -Note: resetting C<ignore_case_always> also resets C<ignore_case>. +Note: disabling C<ignore_case_always> also disables C<ignore_case>. -=item pass_through (default: reset) +=item pass_through (default: disabled) Options that are unknown, ambiguous or supplied with an invalid option value are passed through in C<@ARGV> instead of being flagged as @@ -1543,7 +1722,7 @@ errors. This makes it possible to write wrapper scripts that process only part of the user supplied command line arguments, and pass the remaining options to some other program. -This can be very confusing, especially when C<permute> is also set. +This can be very confusing, especially when C<permute> is also enabled. =item prefix @@ -1556,9 +1735,9 @@ A Perl pattern that identifies the strings that introduce options. Default is C<(--|-|\+)> unless environment variable POSIXLY_CORRECT has been set, in which case it is C<(--|-)>. -=item debug (default: reset) +=item debug (default: disabled) -Enable copious debugging output. +Enable debugging output. =back @@ -1569,11 +1748,10 @@ signalled using die() and will terminate the calling program unless the call to Getopt::Long::GetOptions() was embedded in C<eval { ... }>, or die() was trapped using C<$SIG{__DIE__}>. -A return value of 1 (true) indicates success. - -A return status of 0 (false) indicates that the function detected one -or more errors during option parsing. These errors are signalled using -warn() and can be trapped with C<$SIG{__WARN__}>. +GetOptions returns true to indicate success. +It returns false when the function detected one or more errors during +option parsing. These errors are signalled using warn() and can be +trapped with C<$SIG{__WARN__}>. Errors that can't happen are signalled using Carp::croak(). @@ -1629,21 +1807,44 @@ Now the command line may look like: Note that to terminate options processing still requires a double dash C<-->. -GetOptions() will not interpret a leading C<"<>"> as option starters -if the next argument is a reference. To force C<"<"> and C<">"> as -option starters, use C<"><">. Confusing? Well, B<using a starter +GetOptions() will not interpret a leading C<< "<>" >> as option starters +if the next argument is a reference. To force C<< "<" >> and C<< ">" >> as +option starters, use C<< "><" >>. Confusing? Well, B<using a starter argument is strongly deprecated> anyway. =head2 Configuration variables Previous versions of Getopt::Long used variables for the purpose of -configuring. Although manipulating these variables still work, it -is strongly encouraged to use the new C<config> routine. Besides, it -is much easier. +configuring. Although manipulating these variables still work, it is +strongly encouraged to use the C<Configure> routine that was introduced +in version 2.17. Besides, it is much easier. + +=head1 Trouble Shooting + +=head2 Warning: Ignoring '!' modifier for short option + +This warning is issued when the '!' modifier is applied to a short +(one-character) option and bundling is in effect. E.g., + + Getopt::Long::Configure("bundling"); + GetOptions("foo|f!" => \$foo); + +Note that older Getopt::Long versions did not issue a warning, because +the '!' modifier was applied to the first name only. This bug was +fixed in 2.22. + +Solution: separate the long and short names and apply the '!' to the +long names only, e.g., + + GetOptions("foo!" => \$foo, "f" => \$foo); + +=head2 GetOptions does not return a false result when an option is not supplied + +That's why they're called 'options'. =head1 AUTHOR -Johan Vromans E<lt>jvromans@squirrel.nlE<gt> +Johan Vromans <jvromans@squirrel.nl> =head1 COPYRIGHT AND DISCLAIMER @@ -1660,7 +1861,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. If you do not have a copy of the GNU General Public License write to -the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, +the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. =cut diff --git a/lib/Math/BigFloat.pm b/lib/Math/BigFloat.pm index d8d643ca3e..74a023e0d8 100644 --- a/lib/Math/BigFloat.pm +++ b/lib/Math/BigFloat.pm @@ -4,6 +4,7 @@ use Math::BigInt; use Exporter; # just for use to be happy @ISA = (Exporter); +$VERSION = '0.01'; # never had version before use overload '+' => sub {new Math::BigFloat &fadd}, @@ -85,6 +86,7 @@ sub fnorm { #(string) return fnum_str # normalize number -- for internal use sub norm { #(mantissa, exponent) return fnum_str local($_, $exp) = @_; + $exp = 0 unless defined $exp; if ($_ eq 'NaN') { 'NaN'; } else { @@ -174,12 +176,14 @@ sub round { #(int_str, int_str, int_str) return int_str } else { local($cmp) = Math::BigInt::bcmp(Math::BigInt::bmul($r,'+2'),$base); if ( $cmp < 0 || - ($cmp == 0 && - ( $rnd_mode eq 'zero' || + ($cmp == 0 && ( + ($rnd_mode eq 'zero' ) || ($rnd_mode eq '-inf' && (substr($q,$[,1) eq '+')) || ($rnd_mode eq '+inf' && (substr($q,$[,1) eq '-')) || - ($rnd_mode eq 'even' && $q =~ /[24680]$/) || - ($rnd_mode eq 'odd' && $q =~ /[13579]$/) )) ) { + ($rnd_mode eq 'even' && $q =~ /[13579]$/ ) || + ($rnd_mode eq 'odd' && $q =~ /[24680]$/ ) ) + ) + ) { $q; # round down } else { Math::BigInt::badd($q, ((substr($q,$[,1) eq '-') ? '-1' : '+1')); @@ -244,9 +248,17 @@ sub fcmp #(fnum_str, fnum_str) return cond_code if ($xm eq '+0' || $ym eq '+0') { return $xm <=> $ym; } - ord($y) <=> ord($x) - || ($xe <=> $ye) * (substr($x,$[,1).'1') - || Math::BigInt::cmp($xm,$ym); + if ( $xe < $ye ) # adjust the exponents to be equal + { + $ym .= '0' x ($ye - $xe); + $ye = $xe; + } + elsif ( $ye < $xe ) # same here + { + $xm .= '0' x ($xe - $ye); + $xe = $ye; + } + return Math::BigInt::cmp($xm,$ym); } } diff --git a/lib/Math/BigInt.pm b/lib/Math/BigInt.pm index a43969c2b2..808e5522b9 100644 --- a/lib/Math/BigInt.pm +++ b/lib/Math/BigInt.pm @@ -1,4 +1,5 @@ package Math::BigInt; +$VERSION='0.01'; use overload '+' => sub {new Math::BigInt &badd}, diff --git a/lib/Math/Complex.pm b/lib/Math/Complex.pm index e5434f48d7..e7a071ab7f 100644 --- a/lib/Math/Complex.pm +++ b/lib/Math/Complex.pm @@ -7,15 +7,18 @@ package Math::Complex; -$VERSION = "1.30"; - our($VERSION, @ISA, @EXPORT, %EXPORT_TAGS, $Inf); +$VERSION = 1.31; + BEGIN { - my $e = $!; - $Inf = CORE::exp(CORE::exp(30)); # We do want an arithmetic overflow. - $! = $e; # Clear ERANGE. - undef $Inf unless $Inf =~ /^inf(?:inity)?$/i; # Inf INF inf Infinity + unless ($^O eq 'unicosmk') { + my $e = $!; + # We do want an arithmetic overflow. + eval '$Inf = CORE::exp(CORE::exp(30))'; + $! = $e; # Clear ERANGE. + undef $Inf unless $Inf =~ /^inf(?:inity)?$/i; # Inf INF inf Infinity + } $Inf = "Inf" if !defined $Inf || !($Inf > 0); # Desperation. } @@ -1250,23 +1253,15 @@ sub display_format { my %obj = %{$self->{display_format}}; @display_format{keys %obj} = values %obj; } - if (@_ == 1) { - $display_format{style} = shift; - } else { - my %new = @_; - @display_format{keys %new} = values %new; - } - } else { # Called as a class method - if (@_ = 1) { - $display_format{style} = $self; - } else { - my %new = @_; - @display_format{keys %new} = values %new; - } - undef $self; + } + if (@_ == 1) { + $display_format{style} = shift; + } else { + my %new = @_; + @display_format{keys %new} = values %new; } - if (defined $self) { + if (ref $self) { # Called as an object method $self->{display_format} = { %display_format }; return wantarray ? @@ -1274,6 +1269,7 @@ sub display_format { $self->{display_format}->{style}; } + # Called as a class method %DISPLAY_FORMAT = %display_format; return wantarray ? @@ -1393,11 +1389,11 @@ sub stringify_polar { $t -= int(CORE::abs($t) / pit2) * pit2; - if ($format{polar_pretty_print}) { + if ($format{polar_pretty_print} && $t) { my ($a, $b); for $a (2..9) { $b = $t * $a / pi; - if (int($b) == $b) { + if ($b =~ /^-?\d+$/) { $b = $b < 0 ? "-" : "" if CORE::abs($b) == 1; $theta = "${b}pi/$a"; last; diff --git a/lib/Math/Trig.pm b/lib/Math/Trig.pm index 492706cd6a..b28f150798 100644 --- a/lib/Math/Trig.pm +++ b/lib/Math/Trig.pm @@ -36,14 +36,15 @@ my @rdlcnv = qw(cartesian_to_cylindrical %EXPORT_TAGS = ('radial' => [ @rdlcnv ]); -sub pi2 () { 2 * pi } # use constant generates warning -sub pip2 () { pi / 2 } # use constant generates warning -use constant DR => pi2/360; -use constant RD => 360/pi2; -use constant DG => 400/360; -use constant GD => 360/400; -use constant RG => 400/pi2; -use constant GR => pi2/400; +sub pi2 () { 2 * pi } +sub pip2 () { pi / 2 } + +sub DR () { pi2/360 } +sub RD () { 360/pi2 } +sub DG () { 400/360 } +sub GD () { 360/400 } +sub RG () { 400/pi2 } +sub GR () { pi2/400 } # # Truncating remainder. @@ -58,17 +59,23 @@ sub remt ($$) { # Angle conversions. # -sub rad2deg ($) { remt(RD * $_[0], 360) } +sub rad2rad($) { remt($_[0], pi2) } + +sub deg2deg($) { remt($_[0], 360) } + +sub grad2grad($) { remt($_[0], 400) } -sub deg2rad ($) { remt(DR * $_[0], pi2) } +sub rad2deg ($;$) { my $d = RD * $_[0]; $_[1] ? $d : deg2deg($d) } -sub grad2deg ($) { remt(GD * $_[0], 360) } +sub deg2rad ($;$) { my $d = DR * $_[0]; $_[1] ? $d : rad2rad($d) } -sub deg2grad ($) { remt(DG * $_[0], 400) } +sub grad2deg ($;$) { my $d = GD * $_[0]; $_[1] ? $d : deg2deg($d) } -sub rad2grad ($) { remt(RG * $_[0], 400) } +sub deg2grad ($;$) { my $d = DG * $_[0]; $_[1] ? $d : grad2grad($d) } -sub grad2rad ($) { remt(GR * $_[0], pi2) } +sub rad2grad ($;$) { my $d = RG * $_[0]; $_[1] ? $d : grad2grad($d) } + +sub grad2rad ($;$) { my $d = GR * $_[0]; $_[1] ? $d : rad2rad($d) } sub cartesian_to_spherical { my ( $x, $y, $z ) = @_; @@ -280,6 +287,14 @@ and the imaginary part of approximately C<-1.317>. $gradians = rad2grad($radians); The full circle is 2 I<pi> radians or I<360> degrees or I<400> gradians. +The result is by default wrapped to be inside the [0, {2pi,360,400}[ circle. +If you don't want this, supply a true second argument: + + $zillions_of_radians = deg2rad($zillions_of_degrees, 1); + $negative_degrees = rad2deg($negative_radians, 1); + +You can also do the wrapping explicitly by rad2rad(), deg2deg(), and +grad2grad(). =head1 RADIAL COORDINATE CONVERSIONS diff --git a/lib/Net/protoent.pm b/lib/Net/protoent.pm index 334af78914..6aad940903 100644 --- a/lib/Net/protoent.pm +++ b/lib/Net/protoent.pm @@ -5,7 +5,7 @@ use 5.005_64; our(@EXPORT, @EXPORT_OK, %EXPORT_TAGS); BEGIN { use Exporter (); - @EXPORT = qw(getprotobyname getprotobynumber getprotoent); + @EXPORT = qw(getprotobyname getprotobynumber getprotoent getproto); @EXPORT_OK = qw( $p_name @p_aliases $p_proto ); %EXPORT_TAGS = ( FIELDS => [ @EXPORT_OK, @EXPORT ] ); } diff --git a/lib/Pod/Checker.pm b/lib/Pod/Checker.pm index ae32677db1..37ed68f873 100644 --- a/lib/Pod/Checker.pm +++ b/lib/Pod/Checker.pm @@ -10,7 +10,7 @@ package Pod::Checker; use vars qw($VERSION); -$VERSION = 1.098; ## Current version of this package +$VERSION = 1.2; ## Current version of this package require 5.005; ## requires this Perl version or later use Pod::ParseUtils; ## for hyperlinks and lists @@ -44,7 +44,8 @@ This function can take a hash of options: =item B<-warnings> =E<gt> I<val> -Turn warnings on/off. See L<"Warnings">. +Turn warnings on/off. I<val> is usually 1 for on, but higher values +trigger additional warnings. See L<"Warnings">. =back @@ -212,11 +213,15 @@ There is some whitespace on a seemingly empty line. POD is very sensitive to such things, so this is flagged. B<vi> users switch on the B<list> option to avoid this problem. +=begin _disabled_ + =item * file does not start with =head The file starts with a different POD directive than head. This is most probably something you do not want. +=end _disabled_ + =item * No numeric argument for =over The C<=over> command is supposed to have a numeric argument (the @@ -243,7 +248,8 @@ type of the I<first> C<=item> determines the type of the list. Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>> can potentially cause errors as they could be misinterpreted as -markup commands. +markup commands. This is only printed when the -warnings level is +greater than 1. =item * Unknown entity @@ -273,11 +279,41 @@ The NAME section (C<=head1 NAME>) should consist of a single paragraph with the script/module name, followed by a dash `-' and a very short description of what the thing is good for. -=item * Hyperlinks +=back + +=head2 Hyperlinks + +There are some warnings wrt. malformed hyperlinks. + +=over 4 + +=item * collapsing newlines to blanks + +A hyperlink LE<lt>...E<gt> spans more than one line. This may indicate +and error. -There are some warnings wrt. hyperlinks: -Leading/trailing whitespace, newlines in hyperlinks, -brackets C<()>. +=item * ignoring leading/trailing whitespace in link + +There is whitespace at the beginning or the end of the contents of +LE<lt>...E<gt>. + +=item * (section) in '$page' deprecated + +There is a section detected in the page name of LE<lt>...E<gt>, e.g. +C<LE<gt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only. +Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able +to expand this to appropriate code. For links to (builtin) functions, +please say C<LE<lt>perlfunc/mkdirE<gt>>, without (). + +=item * alternative text/node '%s' contains non-escaped | or / + +The characters C<|> and C</> are special in the LE<lt>...E<gt> context. +Although the hyperlink parser does its best to determine which "/" is +text and which is a delimiter in case of doubt, one ought to escape +these literal characters like this: + + / E<sol> + | E<verbar> =back @@ -307,7 +343,6 @@ use strict; use Carp; use Exporter; use Pod::Parser; -require VMS::Filespec if $^O eq 'VMS'; use vars qw(@ISA @EXPORT); @ISA = qw(Pod::Parser); @@ -471,7 +506,6 @@ sub podchecker( $ ; $ % ) { ## Now create a pod checker my $checker = new Pod::Checker(%options); - $checker->parseopts(-process_cut_cmd => 1, -warnings => 1); ## Now check the pod document for errors $checker->parse_from_file($infile, $outfile); @@ -486,6 +520,27 @@ sub podchecker( $ ; $ % ) { ## Method definitions begin here ##------------------------------- +################################## + +=over 4 + +=item C<Pod::Checker-E<gt>new( %options )> + +Return a reference to a new Pod::Checker object that inherits from +Pod::Parser and is used for calling the required methods later. The +following options are recognized: + +C<-warnings =E<gt> num> + Print warnings if C<num> is true. The higher the value of C<num>, +the more warnings are printed. Currently there are only levels 1 and 2. + +C<-quiet =E<gt> num> + If C<num> is true, do not print any errors/warnings. This is useful +when Pod::Checker is used to munge POD code into plain text from within +POD formatters. + +=cut + ## sub new { ## my $this = shift; ## my $class = ref($this) || $this; @@ -501,7 +556,9 @@ sub initialize { ## Initialize number of errors, and setup an error function to ## increment this number and then print to the designated output. $self->{_NUM_ERRORS} = 0; - $self->errorsub('poderror'); # set the error handling subroutine + $self->{-quiet} ||= 0; + # set the error handling subroutine + $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror'); $self->{_commands} = 0; # total number of POD commands encountered $self->{_list_stack} = []; # stack for nested lists $self->{_have_begin} = ''; # stores =begin @@ -511,12 +568,11 @@ sub initialize { # print warnings? $self->{-warnings} = 1 unless(defined $self->{-warnings}); $self->{_current_head1} = ''; # the current =head1 block + $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings}); } ################################## -=over 4 - =item C<$checker-E<gt>poderror( @args )> =item C<$checker-E<gt>poderror( {%opts}, @args )> @@ -547,7 +603,6 @@ The error level, should be 'WARNING' or 'ERROR'. sub poderror { my $self = shift; my %opts = (ref $_[0]) ? %{shift()} : (); - $opts{-file} = VMS::Filespec::unixify($opts{-file}) if (exists($opts{-file}) && $^O eq 'VMS'); ## Retrieve options chomp( my $msg = ($opts{-msg} || "")."@_" ); @@ -562,7 +617,7 @@ sub poderror { ## Increment error count and print message " ++($self->{_NUM_ERRORS}) if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR')); - my $out_fh = $self->output_handle(); + my $out_fh = $self->output_handle() || \*STDERR; print $out_fh ($severity, $msg, $line, $file, "\n") if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING'); } @@ -672,7 +727,6 @@ sub end_pod { ## print the number of errors found my $self = shift; my $infile = $self->input_file(); - $infile = VMS::Filespec::unixify($infile) if $^O eq 'VMS'; my $out_fh = $self->output_handle(); if(@{$self->{_list_stack}}) { @@ -691,12 +745,15 @@ sub end_pod { my %nodes; foreach($self->node()) { $nodes{$_} = 1; - if(/^(\S+)\s+/) { + if(/^(\S+)\s+\S/) { # we have more than one word. Use the first as a node, too. # This is used heavily in perlfunc.pod $nodes{$1} ||= 2; # derived node } } + foreach($self->idx()) { + $nodes{$_} = 3; # index node + } foreach($self->hyperlink()) { my ($line,$link) = @$_; # _TODO_ what if there is a link to the page itself by the name, @@ -746,14 +803,17 @@ sub command { $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', -msg => "Unknown command '$cmd'" }); } - else { - # found a valid command - if(!$self->{_commands}++ && $cmd !~ /^head/) { - $self->poderror({ -line => $line, -file => $file, - -severity => 'WARNING', - -msg => "file does not start with =head" }); - } - ## check syntax of particular command + else { # found a valid command + $self->{_commands}++; # delete this line if below is enabled again + + ##### following check disabled due to strong request + #if(!$self->{_commands}++ && $cmd !~ /^head/) { + # $self->poderror({ -line => $line, -file => $file, + # -severity => 'WARNING', + # -msg => "file does not start with =head" }); + #} + + # check syntax of particular command if($cmd eq 'over') { # check for argument $arg = $self->interpolate_and_check($paragraph, $line,$file); @@ -1005,12 +1065,13 @@ sub _check_ptree { unless(ref) { my $count; # count the unescaped angle brackets + # complain only when warning level is greater than 1 my $i = $_; if($count = $i =~ tr/<>/<>/) { $self->poderror({ -line => $line, -file => $file, -severity => 'WARNING', -msg => "$count unescaped <> in paragraph" }) - if($self->{-warnings}); + if($self->{-warnings} && $self->{-warnings}>1); } $text .= $i; next; diff --git a/lib/Pod/Find.pm b/lib/Pod/Find.pm index 8de197b71d..516a624fce 100644 --- a/lib/Pod/Find.pm +++ b/lib/Pod/Find.pm @@ -13,8 +13,9 @@ package Pod::Find; use vars qw($VERSION); -$VERSION = 0.12; ## Current version of this package -require 5.005; ## requires this Perl version or later +$VERSION = 0.21; ## Current version of this package +require 5.005; ## requires this Perl version or later +use Carp; ############################################################################# @@ -32,12 +33,38 @@ Pod::Find - find POD documents in directory trees print "podname=",simplify_name('a/b/c/mymodule.pod'),"\n"; + $location = pod_where( { -inc => 1 }, "Pod::Find" ); + =head1 DESCRIPTION -B<Pod::Find> provides a function B<pod_find> that searches for POD -documents in a given set of files and directories. It returns a hash -with the file names as keys and the POD name as value. The POD name -is derived from the file name and its position in the directory tree. +B<Pod::Find> provides a set of functions to locate POD files. Note that +no function is exported by default to avoid pollution of your namespace, +so be sure to specify them in the B<use> statement if you need them: + + use Pod::Find qw(pod_find); + +=cut + +use strict; +#use diagnostics; +use Exporter; +use File::Spec; +use File::Find; +use Cwd; + +use vars qw(@ISA @EXPORT_OK $VERSION); +@ISA = qw(Exporter); +@EXPORT_OK = qw(&pod_find &simplify_name &pod_where &contains_pod); + +# package global variables +my $SIMPLIFY_RX; + +=head2 C<pod_find( { %opts } , @directories )> + +The function B<pod_find> searches for POD documents in a given set of +files and/or directories. It returns a hash with the file names as keys +and the POD name as value. The POD name is derived from the file name +and its position in the directory tree. E.g. when searching in F<$HOME/perl5lib>, the file F<$HOME/perl5lib/MyModule.pm> would get the POD name I<MyModule>, @@ -51,73 +78,39 @@ A warning is printed if more than one POD file with the same POD name is found, e.g. F<CPAN.pm> in different directories. This usually indicates duplicate occurrences of modules in the I<@INC> search path. -The function B<simplify_name> is equivalent to B<basename>, but also -strips Perl-like extensions (.pm, .pl, .pod) and extensions like -F<.bat>, F<.cmd> on Win32 and OS/2, respectively. - -Note that neither B<pod_find> nor B<simplify_name> are exported by -default so be sure to specify them in the B<use> statement if you need -them: - - use Pod::Find qw(pod_find simplify_name); - -=head1 OPTIONS - -The first argument for B<pod_find> may be a hash reference with options. -The rest are either directories that are searched recursively or files. -The POD names of files are the plain basenames with any Perl-like extension -(.pm, .pl, .pod) stripped. +B<OPTIONS> The first argument for B<pod_find> may be a hash reference +with options. The rest are either directories that are searched +recursively or files. The POD names of files are the plain basenames +with any Perl-like extension (.pm, .pl, .pod) stripped. =over 4 -=item B<-verbose> +=item C<-verbose =E<gt> 1> Print progress information while scanning. -=item B<-perl> +=item C<-perl =E<gt> 1> Apply Perl-specific heuristics to find the correct PODs. This includes stripping Perl-like extensions, omitting subdirectories that are numeric but do I<not> match the current Perl interpreter's version id, suppressing F<site_perl> as a module hierarchy name etc. -=item B<-script> +=item C<-script =E<gt> 1> Search for PODs in the current Perl interpreter's installation B<scriptdir>. This is taken from the local L<Config|Config> module. -=item B<-inc> +=item C<-inc =E<gt> 1> Search for PODs in the current Perl interpreter's I<@INC> paths. This -automatically considers paths specified in the C<PERL5LIB> environment. +automatically considers paths specified in the C<PERL5LIB> environment +as this is prepended to I<@INC> by the Perl interpreter itself. =back -=head1 AUTHOR - -Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, -heavily borrowing code from Nick Ing-Simmons' PodToHtml. - -=head1 SEE ALSO - -L<Pod::Parser>, L<Pod::Checker> - =cut -use strict; -#use diagnostics; -use Exporter; -use File::Spec; -use File::Find; -use Cwd; - -use vars qw(@ISA @EXPORT_OK $VERSION); -@ISA = qw(Exporter); -@EXPORT_OK = qw(&pod_find &simplify_name); - -# package global variables -my $SIMPLIFY_RX; - # return a hash of the POD files found # first argument may be a hashref (options), # rest is a list of directories to search recursively @@ -167,7 +160,9 @@ sub pod_find $try = File::Spec->catfile($pwd,$try); } # simplify path - $try = File::Spec->canonpath($try); + # on VMS canonpath will vmsify:[the.path], but File::Find::find + # wants /unixy/paths + $try = File::Spec->canonpath($try) if ($^O ne 'VMS'); my $name; if(-f $try) { if($name = _check_and_extract_name($try, $opts{-verbose})) { @@ -222,27 +217,14 @@ sub _check_and_extract_name { # check extension or executable flag # this involves testing the .bat extension on Win32! - unless($file =~ /\.(pod|pm|plx?)\z/i || (-f $file && -x _ && -T _)) { - return undef; + unless(-f $file && -T _ && ($file =~ /\.(pod|pm|plx?)\z/i || -x _ )) { + return undef; } - # check for one line of POD - unless(open(POD,"<$file")) { - warn "Error: $file is unreadable: $!\n"; - return undef; - } - local $/ = undef; - my $pod = <POD>; - close(POD); - unless($pod =~ /\n=(head\d|pod|over|item)\b/) { - warn "No POD in $file, skipping.\n" - if($verbose); - return; - } - undef $pod; + return undef unless contains_pod($file,$verbose); # strip non-significant path components - # _TODO_ what happens on e.g. Win32? + # TODO what happens on e.g. Win32? my $name = $file; if(defined $root_rx) { $name =~ s!$root_rx!!s; @@ -256,6 +238,14 @@ sub _check_and_extract_name { $name; } +=head2 C<simplify_name( $str )> + +The function B<simplify_name> is equivalent to B<basename>, but also +strips Perl-like extensions (.pm, .pl, .pod) and extensions like +F<.bat>, F<.cmd> on Win32 and OS/2, respectively. + +=cut + # basic simplification of the POD name: # basename & strip extension sub simplify_name { @@ -274,5 +264,180 @@ sub _simplify { $_[0] =~ s/\.(bat|exe|cmd)\z//i if($^O =~ /win|os2/i); } +# contribution from Tim Jenness <t.jenness@jach.hawaii.edu> + +=head2 C<pod_where( { %opts }, $pod )> + +Returns the location of a pod document given a search directory +and a module (e.g. C<File::Find>) or script (e.g. C<perldoc>) name. + +Options: + +=over 4 + +=item C<-inc =E<gt> 1> + +Search @INC for the pod and also the C<scriptdir> defined in the +L<Config|Config> module. + +=item C<-dirs =E<gt> [ $dir1, $dir2, ... ]> + +Reference to an array of search directories. These are searched in order +before looking in C<@INC> (if B<-inc>). Current directory is used if +none are specified. + +=item C<-verbose =E<gt> 1> + +List directories as they are searched + +=back + +Returns the full path of the first occurence to the file. +Package names (eg 'A::B') are automatically converted to directory +names in the selected directory. (eg on unix 'A::B' is converted to +'A/B'). Additionally, '.pm', '.pl' and '.pod' are appended to the +search automatically if required. + +A subdirectory F<pod/> is also checked if it exists in any of the given +search directories. This ensures that e.g. L<perlfunc|perlfunc> is +found. + +It is assumed that if a module name is supplied, that that name +matches the file name. Pods are not opened to check for the 'NAME' +entry. + +A check is made to make sure that the file that is found does +contain some pod documentation. + +=cut + +sub pod_where { + + # default options + my %options = ( + '-inc' => 0, + '-verbose' => 0, + '-dirs' => [ '.' ], + ); + + # Check for an options hash as first argument + if (defined $_[0] && ref($_[0]) eq 'HASH') { + my $opt = shift; + + # Merge default options with supplied options + %options = (%options, %$opt); + } + + # Check usage + carp 'Usage: pod_where({options}, $pod)' unless (scalar(@_)); + + # Read argument + my $pod = shift; + + # Split on :: and then join the name together using File::Spec + my @parts = split (/::/, $pod); + + # Get full directory list + my @search_dirs = @{ $options{'-dirs'} }; + + if ($options{'-inc'}) { + + require Config; + + # Add @INC + push (@search_dirs, @INC) if $options{'-inc'}; + + # Add location of pod documentation for perl man pages (eg perlfunc) + # This is a pod directory in the private install tree + #my $perlpoddir = File::Spec->catdir($Config::Config{'installprivlib'}, + # 'pod'); + #push (@search_dirs, $perlpoddir) + # if -d $perlpoddir; + + # Add location of binaries such as pod2text + push (@search_dirs, $Config::Config{'scriptdir'}) + if -d $Config::Config{'scriptdir'}; + } + + # Loop over directories + Dir: foreach my $dir ( @search_dirs ) { + + # Don't bother if cant find the directory + if (-d $dir) { + warn "Looking in directory $dir\n" + if $options{'-verbose'}; + + # Now concatenate this directory with the pod we are searching for + my $fullname = File::Spec->catfile($dir, @parts); + warn "Filename is now $fullname\n" + if $options{'-verbose'}; + + # Loop over possible extensions + foreach my $ext ('', '.pod', '.pm', '.pl') { + my $fullext = $fullname . $ext; + if (-f $fullext && + contains_pod($fullext, $options{'-verbose'}) ) { + warn "FOUND: $fullext\n" if $options{'-verbose'}; + return $fullext; + } + } + } else { + warn "Directory $dir does not exist\n" + if $options{'-verbose'}; + next Dir; + } + if(-d File::Spec->catdir($dir,'pod')) { + $dir = File::Spec->catdir($dir,'pod'); + redo Dir; + } + } + # No match; + return undef; +} + +=head2 C<contains_pod( $file , $verbose )> + +Returns true if the supplied filename (not POD module) contains some pod +information. + +=cut + +sub contains_pod { + my $file = shift; + my $verbose = 0; + $verbose = shift if @_; + + # check for one line of POD + unless(open(POD,"<$file")) { + warn "Error: $file is unreadable: $!\n"; + return undef; + } + + local $/ = undef; + my $pod = <POD>; + close(POD) || die "Error closing $file: $!\n"; + unless($pod =~ /\n=(head\d|pod|over|item)\b/s) { + warn "No POD in $file, skipping.\n" + if($verbose); + return 0; + } + + return 1; +} + +=head1 AUTHOR + +Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, +heavily borrowing code from Nick Ing-Simmons' PodToHtml. + +Tim Jenness E<lt>t.jenness@jach.hawaii.eduE<gt> provided +C<pod_where> and C<contains_pod>. + +=head1 SEE ALSO + +L<Pod::Parser>, L<Pod::Checker>, L<perldoc> + +=cut + 1; diff --git a/lib/Pod/Functions.pm b/lib/Pod/Functions.pm index 03cbf711eb..44619d53d8 100644 --- a/lib/Pod/Functions.pm +++ b/lib/Pod/Functions.pm @@ -296,7 +296,7 @@ values HASH return a list of the values in a hash vec Binary test or set particular bits in a string wait Process wait for any child process to die waitpid Process wait for a particular child process to die -wantarray Misc,Flow get list vs array context of current subroutine call +wantarray Misc,Flow get void vs scalar vs list context of current subroutine call warn I/O print debugging info write I/O print a picture record y/// String transliterate a string diff --git a/lib/Pod/Html.pm b/lib/Pod/Html.pm index 346495f3de..f70a42bccc 100644 --- a/lib/Pod/Html.pm +++ b/lib/Pod/Html.pm @@ -893,6 +893,10 @@ sub scan_dir { $pages{$_} = "" unless defined $pages{$_}; $pages{$_} .= "$dir/$_.pod:"; push(@pods, "$dir/$_.pod"); + } elsif (/\.html\z/) { # .html + s/\.html\z//; + $pages{$_} = "" unless defined $pages{$_}; + $pages{$_} .= "$dir/$_.pod:"; } elsif (/\.pm\z/) { # .pm s/\.pm\z//; $pages{$_} = "" unless defined $pages{$_}; diff --git a/lib/Pod/InputObjects.pm b/lib/Pod/InputObjects.pm index 849182bf37..352373b9da 100644 --- a/lib/Pod/InputObjects.pm +++ b/lib/Pod/InputObjects.pm @@ -11,7 +11,7 @@ package Pod::InputObjects; use vars qw($VERSION); -$VERSION = 1.12; ## Current version of this package +$VERSION = 1.13; ## Current version of this package require 5.005; ## requires this Perl version or later ############################################################################# @@ -42,7 +42,7 @@ are defined: =begin __PRIVATE__ -=item B<Pod::InputSource> +=item package B<Pod::InputSource> An object corresponding to a source of POD input text. It is mostly a wrapper around a filehandle or C<IO::Handle>-type object (or anything @@ -51,23 +51,23 @@ additional information relevant to the parsing of PODs. =end __PRIVATE__ -=item B<Pod::Paragraph> +=item package B<Pod::Paragraph> An object corresponding to a paragraph of POD input text. It may be a plain paragraph, a verbatim paragraph, or a command paragraph (see L<perlpod>). -=item B<Pod::InteriorSequence> +=item package B<Pod::InteriorSequence> An object corresponding to an interior sequence command from the POD input text (see L<perlpod>). -=item B<Pod::ParseTree> +=item package B<Pod::ParseTree> An object corresponding to a tree of parsed POD text. Each "node" in a parse-tree (or I<ptree>) is either a text-string or a reference to a B<Pod::InteriorSequence> object. The nodes appear in the parse-tree -in they order in which they were parsed from left-to-right. +in the order in which they were parsed from left-to-right. =back @@ -232,7 +232,7 @@ It has the following methods/attributes: ##--------------------------------------------------------------------------- -=head2 B<new()> +=head2 Pod::Paragraph-E<gt>B<new()> my $pod_para1 = Pod::Paragraph->new(-text => $text); my $pod_para2 = Pod::Paragraph->new(-name => $cmd, @@ -284,7 +284,7 @@ sub new { ##--------------------------------------------------------------------------- -=head2 B<cmd_name()> +=head2 $pod_para-E<gt>B<cmd_name()> my $para_cmd = $pod_para->cmd_name(); @@ -303,7 +303,7 @@ sub cmd_name { ##--------------------------------------------------------------------------- -=head2 B<text()> +=head2 $pod_para-E<gt>B<text()> my $para_text = $pod_para->text(); @@ -318,7 +318,7 @@ sub text { ##--------------------------------------------------------------------------- -=head2 B<raw_text()> +=head2 $pod_para-E<gt>B<raw_text()> my $raw_pod_para = $pod_para->raw_text(); @@ -335,7 +335,7 @@ sub raw_text { ##--------------------------------------------------------------------------- -=head2 B<cmd_prefix()> +=head2 $pod_para-E<gt>B<cmd_prefix()> my $prefix = $pod_para->cmd_prefix(); @@ -351,7 +351,7 @@ sub cmd_prefix { ##--------------------------------------------------------------------------- -=head2 B<cmd_separator()> +=head2 $pod_para-E<gt>B<cmd_separator()> my $separator = $pod_para->cmd_separator(); @@ -367,7 +367,7 @@ sub cmd_separator { ##--------------------------------------------------------------------------- -=head2 B<parse_tree()> +=head2 $pod_para-E<gt>B<parse_tree()> my $ptree = $pod_parser->parse_text( $pod_para->text() ); $pod_para->parse_tree( $ptree ); @@ -387,13 +387,13 @@ sub parse_tree { ##--------------------------------------------------------------------------- -=head2 B<file_line()> +=head2 $pod_para-E<gt>B<file_line()> my ($filename, $line_number) = $pod_para->file_line(); my $position = $pod_para->file_line(); Returns the current filename and line number for the paragraph -object. If called in an array context, it returns a list of two +object. If called in a list context, it returns a list of two elements: first the filename, then the line number. If called in a scalar context, it returns a string containing the filename, followed by a colon (':'), followed by the line number. @@ -423,7 +423,7 @@ It has the following methods/attributes: ##--------------------------------------------------------------------------- -=head2 B<new()> +=head2 Pod::InteriorSequence-E<gt>B<new()> my $pod_seq1 = Pod::InteriorSequence->new(-name => $cmd -ldelim => $delimiter); @@ -497,7 +497,7 @@ sub new { ##--------------------------------------------------------------------------- -=head2 B<cmd_name()> +=head2 $pod_seq-E<gt>B<cmd_name()> my $seq_cmd = $pod_seq->cmd_name(); @@ -546,7 +546,7 @@ sub _unset_child2parent_links { ##--------------------------------------------------------------------------- -=head2 B<prepend()> +=head2 $pod_seq-E<gt>B<prepend()> $pod_seq->prepend($text); $pod_seq1->prepend($pod_seq2); @@ -565,7 +565,7 @@ sub prepend { ##--------------------------------------------------------------------------- -=head2 B<append()> +=head2 $pod_seq-E<gt>B<append()> $pod_seq->append($text); $pod_seq1->append($pod_seq2); @@ -584,7 +584,7 @@ sub append { ##--------------------------------------------------------------------------- -=head2 B<nested()> +=head2 $pod_seq-E<gt>B<nested()> $outer_seq = $pod_seq->nested || print "not nested"; @@ -602,7 +602,7 @@ sub nested { ##--------------------------------------------------------------------------- -=head2 B<raw_text()> +=head2 $pod_seq-E<gt>B<raw_text()> my $seq_raw_text = $pod_seq->raw_text(); @@ -623,7 +623,7 @@ sub raw_text { ##--------------------------------------------------------------------------- -=head2 B<left_delimiter()> +=head2 $pod_seq-E<gt>B<left_delimiter()> my $ldelim = $pod_seq->left_delimiter(); @@ -642,7 +642,7 @@ sub left_delimiter { ##--------------------------------------------------------------------------- -=head2 B<right_delimiter()> +=head2 $pod_seq-E<gt>B<right_delimiter()> The rightmost delimiter beginning the argument text to the interior sequence (should be ">"). @@ -659,7 +659,7 @@ sub right_delimiter { ##--------------------------------------------------------------------------- -=head2 B<parse_tree()> +=head2 $pod_seq-E<gt>B<parse_tree()> my $ptree = $pod_parser->parse_text($paragraph_text); $pod_seq->parse_tree( $ptree ); @@ -680,13 +680,13 @@ sub parse_tree { ##--------------------------------------------------------------------------- -=head2 B<file_line()> +=head2 $pod_seq-E<gt>B<file_line()> my ($filename, $line_number) = $pod_seq->file_line(); my $position = $pod_seq->file_line(); Returns the current filename and line number for the interior sequence -object. If called in an array context, it returns a list of two +object. If called in a list context, it returns a list of two elements: first the filename, then the line number. If called in a scalar context, it returns a string containing the filename, followed by a colon (':'), followed by the line number. @@ -701,7 +701,7 @@ sub file_line { ##--------------------------------------------------------------------------- -=head2 B<DESTROY()> +=head2 Pod::InteriorSequence::B<DESTROY()> This method performs any necessary cleanup for the interior-sequence. If you override this method then it is B<imperative> that you invoke @@ -738,7 +738,7 @@ itself contain a parse-tree (since interior sequences may be nested). ##--------------------------------------------------------------------------- -=head2 B<new()> +=head2 Pod::ParseTree-E<gt>B<new()> my $ptree1 = Pod::ParseTree->new; my $ptree2 = new Pod::ParseTree; @@ -766,7 +766,7 @@ sub new { ##--------------------------------------------------------------------------- -=head2 B<top()> +=head2 $ptree-E<gt>B<top()> my $top_node = $ptree->top(); $ptree->top( $top_node ); @@ -794,7 +794,7 @@ sub top { ##--------------------------------------------------------------------------- -=head2 B<children()> +=head2 $ptree-E<gt>B<children()> This method gets/sets the children of the top node in the parse-tree. If no arguments are given, it returns the list (array) of children @@ -814,7 +814,7 @@ sub children { ##--------------------------------------------------------------------------- -=head2 B<prepend()> +=head2 $ptree-E<gt>B<prepend()> This method prepends the given text or parse-tree to the current parse-tree. If the first item on the parse-tree is text and the argument is also text, @@ -842,7 +842,7 @@ sub prepend { ##--------------------------------------------------------------------------- -=head2 B<append()> +=head2 $ptree-E<gt>B<append()> This method appends the given text or parse-tree to the current parse-tree. If the last item on the parse-tree is text and the argument is also text, @@ -866,7 +866,7 @@ sub append { } } -=head2 B<raw_text()> +=head2 $ptree-E<gt>B<raw_text()> my $ptree_raw_text = $ptree->raw_text(); @@ -902,7 +902,7 @@ sub _set_child2parent_links { ## nothing to do, Pod::ParseTrees cant have parent pointers } -=head2 B<DESTROY()> +=head2 Pod::ParseTree::B<DESTROY()> This method performs any necessary cleanup for the parse-tree. If you override this method then it is B<imperative> diff --git a/lib/Pod/LaTeX.pm b/lib/Pod/LaTeX.pm index 8adb58921c..c909d21f35 100644 --- a/lib/Pod/LaTeX.pm +++ b/lib/Pod/LaTeX.pm @@ -36,7 +36,7 @@ use Carp; use vars qw/ $VERSION %HTML_Escapes @LatexSections /; -$VERSION = '0.52'; +$VERSION = '0.53'; # Definitions of =headN -> latex mapping @LatexSections = (qw/ @@ -57,6 +57,8 @@ $VERSION = '0.52'; 'lt' => '$<$', # ' left chevron, less-than 'gt' => '$>$', # ' right chevron, greater-than 'quot' => '"', # double quote + 'sol' => '/', + 'verbar' => '$|$', "Aacute" => "\\'{A}", # capital A, acute accent "aacute" => "\\'{a}", # small a, acute accent @@ -675,7 +677,7 @@ pushed onto this stack. When the list context ends, it is popped from the stack. The array will be empty if no lists are active. -Returns array of list information in array context +Returns array of list information in list context Returns array ref in scalar context =cut @@ -952,6 +954,9 @@ sub verbatim { # Clean trailing space $paragraph =~ s/\s+$//; + # Clean tabs + $paragraph =~ s/\t/ /g; + $self->_output('\begin{verbatim}' . "\n$paragraph\n". '\end{verbatim}'."\n"); } } @@ -1248,7 +1253,7 @@ sub add_item { $paragraph =~ s/^\s+//; my $type; - if ($paragraph eq '*') { + if (substr($paragraph, 0,1) eq '*') { $type = 'itemize'; } elsif ($paragraph =~ /^\d/) { $type = 'enumerate'; @@ -1264,10 +1269,21 @@ sub add_item { my $type = $self->lists->[-1]->type; if ($type eq 'description') { + # Handle long items - long items do not wrap + if (length($paragraph) < 40) { + # A real description list item + $self->_output("\\item[$paragraph] \\mbox{}"); + } else { + # The item is now simply bold text + $self->_output(qq{\\item \\textbf{$paragraph}}); + } - $self->_output("\\item[$paragraph] \\mbox{}"); } else { - $self->_output('\item '); + # If the item was '* Something' we still need to write + # out the something + my $extra_info = $paragraph; + $extra_info =~ s/^\*\s*//; + $self->_output("\\item $extra_info"); } # Store the item name in the object. Required so that @@ -1392,6 +1408,8 @@ Special characters and the C<latex> equivalents are: & \& \ $\backslash$ ^ \^{} + ~ \~{} + | $|$ =cut @@ -1412,6 +1430,12 @@ sub _replace_special_chars { # Replace ^ characters with \^{} so that $^F works okay $paragraph =~ s/(\^)/\\$1\{\}/g; + # Replace tilde (~) with \texttt{\~{}} + $paragraph =~ s/~/\\texttt\{\\~\{\}\}/g; + + # Replace | with $|$ + $paragraph =~ s'\|'$|$'g; + # Now add the dollars around each \backslash $paragraph =~ s/(\\backslash)/\$$1\$/g; @@ -1560,7 +1584,7 @@ under the same terms as Perl itself. =head1 REVISION -$Id: LaTeX.pm,v 1.4 2000/05/16 01:26:55 timj Exp $ +$Id: LaTeX.pm,v 1.6 2000/08/21 09:05:03 timj Exp $ =end __PRIVATE__ diff --git a/lib/Pod/Man.pm b/lib/Pod/Man.pm index 439b22c35b..0fdb6d0360 100644 --- a/lib/Pod/Man.pm +++ b/lib/Pod/Man.pm @@ -1,5 +1,5 @@ # Pod::Man -- Convert POD data to formatted *roff input. -# $Id: Man.pm,v 1.4 2000/04/26 04:03:41 eagle Exp $ +# $Id: Man.pm,v 1.8 2000/10/10 02:14:31 eagle Exp $ # # Copyright 1999, 2000 by Russ Allbery <rra@stanford.edu> # @@ -38,7 +38,7 @@ use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION); # Perl core and too many things could munge CVS magic revision strings. # This number should ideally be the same as the CVS revision in podlators, # however. -$VERSION = 1.04; +$VERSION = 1.08; ############################################################################ @@ -47,8 +47,10 @@ $VERSION = 1.04; # The following is the static preamble which starts all *roff output we # generate. It's completely static except for the font to use as a -# fixed-width font, which is designed by @CFONT@. $PREAMBLE should -# therefore be run through s/\@CFONT\@/<font>/g before output. +# fixed-width font, which is designed by @CFONT@, and the left and right +# quotes to use for C<> text, designated by @LQOUTE@ and @RQUOTE@. +# $PREAMBLE should therefore be run through s/\@CFONT\@/<font>/g before +# output. $PREAMBLE = <<'----END OF PREAMBLE----'; .de Sh \" Subsection heading .br @@ -93,8 +95,8 @@ $PREAMBLE = <<'----END OF PREAMBLE----'; . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" -. ds C` ` -. ds C' ' +. ds C` @LQUOTE@ +. ds C' @RQUOTE@ 'br\} .el\{\ . ds -- \|\(em\| @@ -183,7 +185,8 @@ $PREAMBLE = <<'----END OF PREAMBLE----'; .\} .rm #[ #] #H #V #F C ----END OF PREAMBLE---- - +#`# for cperl-mode + # This table is taken nearly verbatim from Tom Christiansen's pod2man. It # assumes that the standard preamble has already been printed, since that's # what defines all of the accent marks. Note that some of these are quoted @@ -194,7 +197,7 @@ $PREAMBLE = <<'----END OF PREAMBLE----'; 'lt' => '<', # left chevron, less-than 'gt' => '>', # right chevron, greater-than 'quot' => '"', # double quote - 'sol' => '/', # solidus + 'sol' => '/', # solidus (forward slash) 'verbar' => '|', # vertical bar 'Aacute' => "A\\*'", # capital A, acute accent @@ -275,7 +278,7 @@ sub protect { s/^([.\'\\])/\\&$1/mg; $_; } - + # Given a command and a single argument that may or may not contain double # quotes, handle double-quote formatting for it. If there are no double # quotes, just return the command followed by the argument in double quotes. @@ -306,7 +309,7 @@ sub switchquotes { # Translate a font string into an escape. sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] } - + ############################################################################ # Initialization ############################################################################ @@ -325,7 +328,8 @@ sub initialize { for (qw/fixed fixedbold fixeditalic fixedbolditalic/) { if (defined $$self{$_}) { if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) { - croak "roff font should be 1 or 2 chars, not `$$self{$_}'"; + croak qq(roff font should be 1 or 2 chars,) + . qq( not "$$self{$_}"); } } else { $$self{$_} = ''; @@ -370,16 +374,35 @@ sub initialize { $$self{$_} =~ s/\"/\"\"/g if $$self{$_}; } + # Figure out what quotes we'll be using for C<> text. + $$self{quotes} ||= '"'; + if ($$self{quotes} eq 'none') { + $$self{LQUOTE} = $$self{RQUOTE} = ''; + } elsif (length ($$self{quotes}) == 1) { + $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes}; + } elsif ($$self{quotes} =~ /^(.)(.)$/ + || $$self{quotes} =~ /^(..)(..)$/) { + $$self{LQUOTE} = $1; + $$self{RQUOTE} = $2; + } else { + croak qq(Invalid quote specification "$$self{quotes}"); + } + + # Double the first quote; note that this should not be s///g as two + # double quotes is represented in *roff as three double quotes, not + # four. Weird, I know. + $$self{LQUOTE} =~ s/\"/\"\"/; + $$self{RQUOTE} =~ s/\"/\"\"/; + $$self{INDENT} = 0; # Current indentation level. $$self{INDENTS} = []; # Stack of indentations. $$self{INDEX} = []; # Index keys waiting to be printed. + $$self{ITEMS} = 0; # The number of consecutive =items. $self->SUPER::initialize; } -# For each document we process, output the preamble first. Note that the -# fixed width font is a global default; once we interpolate it into the -# PREAMBLE, it ain't ever changing. Maybe fix this later. +# For each document we process, output the preamble first. sub begin_pod { my $self = shift; @@ -425,15 +448,18 @@ sub begin_pod { } # Now, print out the preamble and the title. - $PREAMBLE =~ s/\@CFONT\@/$$self{fixed}/; - chomp $PREAMBLE; + local $_ = $PREAMBLE; + s/\@CFONT\@/$$self{fixed}/; + s/\@LQUOTE\@/$$self{LQUOTE}/; + s/\@RQUOTE\@/$$self{RQUOTE}/; + chomp $_; print { $self->output_handle } <<"----END OF HEADER----"; .\\" Automatically generated by Pod::Man version $VERSION .\\" @{[ scalar localtime ]} .\\" .\\" Standard preamble: .\\" ====================================================================== -$PREAMBLE +$_ .\\" ====================================================================== .\\" .IX Title "$name $section" @@ -460,9 +486,19 @@ sub command { my $self = shift; my $command = shift; return if $command eq 'pod'; - return if ($$self{EXCLUDE} && $command ne 'end'); - $command = 'cmd_' . $command; - $self->$command (@_); + return if ($$self{EXCLUDE} && $command ne 'end'); + if ($self->can ('cmd_' . $command)) { + $command = 'cmd_' . $command; + $self->$command (@_); + } else { + my ($text, $line, $paragraph) = @_; + my $file; + ($file, $line) = $paragraph->file_line; + $text =~ s/\n+\z//; + $text = " $text" if ($text =~ /^\S/); + warn qq($file:$line: Unknown command paragraph "=$command$text"\n); + return; + } } # Called for a verbatim paragraph. Gets the paragraph, the line number, and @@ -479,7 +515,7 @@ sub verbatim { 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me; s/\\/\\e/g; s/^(\s*\S)/'\&' . $1/gme; - $self->makespace if $$self{NEEDSPACE}; + $self->makespace; $self->output (".Vb $lines\n$_.Ve\n"); $$self{NEEDSPACE} = 0; } @@ -505,7 +541,7 @@ sub textblock { > ( ,?\s+(and\s+)? # Allow lots of them, conjuncted. - L< + L< / ( [:\w]+ ( \(\) )? ) > @@ -531,7 +567,7 @@ sub textblock { # scalars as well as scalars and does the right thing with them. $text = $self->parse ($text, @_); $text =~ s/\n\s*$/\n/; - $self->makespace if $$self{NEEDSPACE}; + $self->makespace; $self->output (protect $self->mapfonts ($text)); $self->outindex; $$self{NEEDSPACE} = 1; @@ -591,7 +627,7 @@ sub sequence { my $tmp = $self->buildlink ($_); return bless \ "$tmp", 'Pod::Man::String'; } - + # Whitespace protection replaces whitespace with "\ ". if ($command eq 'S') { s/\s+/\\ /g; @@ -621,6 +657,10 @@ sub cmd_head1 { local $_ = $self->parse (@_); s/\s+$//; s/\\s-?\d//g; + if ($$self{ITEMS} > 1) { + $$self{ITEMS} = 0; + $self->output (".PD\n"); + } $self->output (switchquotes ('.SH', $self->mapfonts ($_))); $self->outindex (($_ eq 'NAME') ? () : ('Header', $_)); $$self{NEEDSPACE} = 0; @@ -631,6 +671,10 @@ sub cmd_head2 { my $self = shift; local $_ = $self->parse (@_); s/\s+$//; + if ($$self{ITEMS} > 1) { + $$self{ITEMS} = 0; + $self->output (".PD\n"); + } $self->output (switchquotes ('.Sh', $self->mapfonts ($_))); $self->outindex ('Subsection', $_); $$self{NEEDSPACE} = 0; @@ -685,7 +729,7 @@ sub cmd_item { my $index; if (/\w/ && !/^\w[.\)]\s*$/) { $index = $_; - $index =~ s/^\s*[-*+o.]?\s*//; + $index =~ s/^\s*[-*+o.]?(?:\s+|\Z)//; } s/^\*(\s|\Z)/\\\(bu$1/; if ($$self{WEIRDINDENT}) { @@ -693,9 +737,11 @@ sub cmd_item { $$self{WEIRDINDENT} = 0; } $_ = $self->mapfonts ($_); + $self->output (".PD 0\n") if ($$self{ITEMS} == 1); $self->output (switchquotes ('.Ip', $_, $$self{INDENT})); $self->outindex ($index ? ('Item', $index) : ()); $$self{NEEDSPACE} = 0; + $$self{ITEMS}++; } # Begin a block for a particular translator. Setting VERBATIM triggers @@ -828,7 +874,7 @@ sub parse { $self->parse_text ({ -expand_seq => 'sequence', -expand_ptree => 'collapse' }, @_); } - + # Takes a parse tree and a flag saying whether or not to treat it as literal # text (not call guesswork on it), and returns the concatenation of all of # the text strings in that parse tree. If the literal flag isn't true, @@ -942,7 +988,10 @@ sub guesswork { # Make vertical whitespace. sub makespace { my $self = shift; - $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n"); + $self->output (".PD\n") if ($$self{ITEMS} > 1); + $$self{ITEMS} = 0; + $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n") + if $$self{NEEDSPACE}; } # Output any pending index entries, and optionally an index entry given as @@ -1103,6 +1152,18 @@ Pod::Man doesn't assume you have this, and defaults to CB. Some systems (such as Solaris) have this font available as CX. Only matters for troff(1) output. +=item quotes + +Sets the quote marks used to surround CE<lt>> text. If the value is a +single character, it is used as both the left and right quote; if it is two +characters, the first character is used as the left quote and the second as +the right quoted; and if it is four characters, the first two are used as +the left quote and the second two as the right quote. + +This may also be set to the special value C<none>, in which case no quote +marks are added around CE<lt>> text (but the font is still changed for troff +output). + =item release Set the centered footer. By default, this is the version of Perl you run @@ -1139,7 +1200,7 @@ details. =over 4 -=item roff font should be 1 or 2 chars, not `%s' +=item roff font should be 1 or 2 chars, not "%s" (F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that wasn't either one or two characters. Pod::Man doesn't support *roff fonts @@ -1152,6 +1213,16 @@ versions of nroff(1) and troff(1) don't either). unable to parse. You should never see this error message; it probably indicates a bug in Pod::Man. +=item Invalid quote specification "%s" + +(F) The quote specification given (the quotes option to the constructor) was +invalid. A quote specification must be one, two, or four characters long. + +=item %s:%d: Unknown command paragraph "%s". + +(W) The POD source contained a non-standard command paragraph (something of +the form C<=command args>) that Pod::Man didn't know about. It was ignored. + =item Unknown escape EE<lt>%sE<gt> (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't @@ -1162,6 +1233,11 @@ know about. C<EE<lt>%sE<gt>> was printed verbatim in the output. (W) The POD source contained a non-standard interior sequence (something of the form C<XE<lt>E<gt>>) that Pod::Man didn't know about. It was ignored. +=item %s: Unknown command paragraph "%s" on line %d. + +(W) The POD source contained a non-standard command paragraph (something of +the form C<=command args>) that Pod::Man didn't know about. It was ignored. + =item Unmatched =back (W) Pod::Man encountered a C<=back> command that didn't correspond to an diff --git a/lib/Pod/ParseUtils.pm b/lib/Pod/ParseUtils.pm index 2cb8cdcd3b..6703a7fbc8 100644 --- a/lib/Pod/ParseUtils.pm +++ b/lib/Pod/ParseUtils.pm @@ -10,7 +10,7 @@ package Pod::ParseUtils; use vars qw($VERSION); -$VERSION = 0.2; ## Current version of this package +$VERSION = 0.22; ## Current version of this package require 5.005; ## requires this Perl version or later =head1 NAME @@ -49,7 +49,7 @@ The following methods are available: =over 4 -=item new() +=item Pod::List-E<gt>new() Create a new list object. Properties may be specified through a hash reference like this: @@ -79,7 +79,7 @@ sub initialize { $self->{-type} ||= ''; } -=item file() +=item $list-E<gt>file() Without argument, retrieves the file name the list is in. This must have been set before by either specifying B<-file> in the B<new()> @@ -92,7 +92,7 @@ sub file { return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; } -=item start() +=item $list-E<gt>start() Without argument, retrieves the line number where the list started. This must have been set before by either specifying B<-start> in the @@ -106,7 +106,7 @@ sub start { return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start}; } -=item indent() +=item $list-E<gt>indent() Without argument, retrieves the indent level of the list as specified in C<=over n>. This must have been set before by either specifying @@ -120,7 +120,7 @@ sub indent { return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent}; } -=item type() +=item $list-E<gt>type() Without argument, retrieves the list type, which can be an arbitrary value, e.g. C<OL>, C<UL>, ... when thinking the HTML way. @@ -135,7 +135,7 @@ sub type { return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; } -=item rx() +=item $list-E<gt>rx() Without argument, retrieves a regular expression for simplifying the individual item strings once the list type has been determined. Usage: @@ -152,7 +152,7 @@ sub rx { return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx}; } -=item item() +=item $list-E<gt>item() Without argument, retrieves the array of the items in this list. The items may be represented by any scalar. @@ -172,7 +172,7 @@ sub item { } } -=item parent() +=item $list-E<gt>parent() Without argument, retrieves information about the parent holding this list, which is represented as an arbitrary scalar. @@ -188,7 +188,7 @@ sub parent { return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent}; } -=item tag() +=item $list-E<gt>tag() Without argument, retrieves information about the list tag, which can be any scalar. @@ -227,7 +227,7 @@ used to construct hyperlinks. =over 4 -=item new() +=item Pod::Hyperlink-E<gt>new() The B<new()> method can either be passed a set of key/value pairs or a single scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object @@ -269,10 +269,14 @@ sub initialize { $self->{_warnings} = []; } -=item parse($string) +=item $link-E<gt>parse($string) This method can be used to (re)parse a (new) hyperlink, i.e. the contents of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object. +Warnings are stored in the B<warnings> property. +E.g. sections like C<LE<lt>open(2)E<gt>> are deprected, as they do not point +to Perl documents. C<LE<lt>DBI::foo(3p)E<gt>> is wrong as well, the manpage +section can simply be dropped. =cut @@ -280,7 +284,7 @@ sub parse { my $self = shift; local($_) = $_[0]; # syntax check the link and extract destination - my ($alttext,$page,$node,$type) = ('','','',''); + my ($alttext,$page,$node,$type) = (undef,'','',''); $self->{_warnings} = []; @@ -308,25 +312,24 @@ sub parse { # problem: a lot of people use (), or (1) or the like to indicate # man page sections. But this collides with L<func()> that is supposed # to point to an internal funtion... - # I would like the following better, here and below: - #if(m!^(\w+(?:::\w+)*)$!) { - my $page_rx = '[\w.]+(?:::[\w.]+)*'; + my $page_rx = '[\w.]+(?:::[\w.]+)*(?:[(](?:\d\w*|)[)]|)'; + # page name only if(m!^($page_rx)$!o) { $page = $1; $type = 'page'; } # alttext, page and "section" - elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) { + elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) { ($alttext, $page, $node) = ($1, $2, $3); $type = 'section'; } # alttext and page - elsif(m!^(.+?)\s*[|]\s*($page_rx)$!o) { + elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) { ($alttext, $page) = ($1, $2); $type = 'page'; } # alttext and "section" - elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) { + elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) { ($alttext, $node) = ($1,$2); $type = 'section'; } @@ -356,16 +359,16 @@ sub parse { $type = 'hyperlink'; } # alttext, page and item - elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) { + elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) { ($alttext, $page, $node) = ($1, $2, $3); $type = 'item'; } # alttext and item - elsif(m!^(.+?)\s*[|]\s*/(.+)$!) { + elsif(m!^(.*?)\s*[|]\s*/(.+)$!) { ($alttext, $node) = ($1,$2); } # nonstandard: alttext and hyperlink - elsif(m!^(.+?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) { + elsif(m!^(.*?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) { ($alttext, $node) = ($1,$2); $type = 'hyperlink'; } @@ -377,9 +380,19 @@ sub parse { # collapse whitespace in nodes $node =~ s/\s+/ /gs; - #if($page =~ /[(]\w*[)]$/) { - # $self->warning("section in '$page' deprecated"); - #} + # empty alternative text expands to node name + if(defined $alttext) { + if(!length($alttext)) { + $alttext = $node | $page; + } + } + else { + $alttext = ''; + } + + if($page =~ /[(]\w*[)]$/) { + $self->warning("(section) in '$page' deprecated"); + } if($node =~ m:[|/]:) { $self->warning("node '$node' contains non-escaped | or /"); } @@ -435,7 +448,7 @@ sub _construct_text { } } -=item markup($string) +=item $link-E<gt>markup($string) Set/retrieve the textual value of the link. This string contains special markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the @@ -450,7 +463,7 @@ sub markup { return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup}; } -=item text() +=item $link-E<gt>text() This method returns the textual representation of the hyperlink as above, but without markers (read only). Depending on the link type this is one of @@ -469,7 +482,7 @@ sub text { $_[0]->{_text}; } -=item warning() +=item $link-E<gt>warning() After parsing, this method returns any warnings encountered during the parsing process. @@ -486,7 +499,9 @@ sub warning { return @{$self->{_warnings}}; } -=item line(), file() +=item $link-E<gt>file() + +=item $link-E<gt>line() Just simple slots for storing information about the line and the file the link was encountered in. Has to be filled in manually. @@ -503,7 +518,7 @@ sub file { return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; } -=item page() +=item $link-E<gt>page() This method sets or returns the POD page this link points to. @@ -518,7 +533,7 @@ sub page { $_[0]->{-page}; } -=item node() +=item $link-E<gt>node() As above, but the destination node text of the link. @@ -533,7 +548,7 @@ sub node { $_[0]->{-node}; } -=item alttext() +=item $link-E<gt>alttext() Sets or returns an alternative text specified in the link. @@ -548,7 +563,7 @@ sub alttext { $_[0]->{-alttext}; } -=item type() +=item $link-E<gt>type() The node type, either C<section> or C<item>. As an unofficial type, there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>> @@ -560,7 +575,7 @@ sub type { return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; } -=item link() +=item $link-E<gt>link() Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>. @@ -620,7 +635,7 @@ The following methods are available: =over 4 -=item new() +=item Pod::Cache-E<gt>new() Create a new cache object. This object can hold an arbitrary number of POD documents of class Pod::Cache::Item. @@ -635,7 +650,7 @@ sub new { return $self; } -=item item() +=item $cache-E<gt>item() Add a new item to the cache. Without arguments, this method returns a list of all cache elements. @@ -654,7 +669,7 @@ sub item { } } -=item find_page($name) +=item $cache-E<gt>find_page($name) Look for a POD document named C<$name> in the cache. Returns the reference to the corresponding Pod::Cache::Item object or undef if @@ -686,7 +701,7 @@ The following methods are available: =over 4 -=item new() +=item Pod::Cache::Item-E<gt>new() Create a new object. @@ -707,7 +722,7 @@ sub initialize { $self->{-nodes} = [] unless(defined $self->{-nodes}); } -=item page() +=item $cacheitem-E<gt>page() Set/retrieve the POD document name (e.g. "Pod::Parser"). @@ -718,7 +733,7 @@ sub page { return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page}; } -=item description() +=item $cacheitem-E<gt>description() Set/retrieve the POD short description as found in the C<=head1 NAME> section. @@ -730,7 +745,7 @@ sub description { return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description}; } -=item path() +=item $cacheitem-E<gt>path() Set/retrieve the POD file storage path. @@ -741,7 +756,7 @@ sub path { return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path}; } -=item file() +=item $cacheitem-E<gt>file() Set/retrieve the POD file name. @@ -752,7 +767,7 @@ sub file { return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; } -=item nodes() +=item $cacheitem-E<gt>nodes() Add a node (or a list of nodes) to the document's node list. Note that the order is kept, i.e. start with the first node and end with the last. @@ -775,14 +790,12 @@ sub nodes { } } -=item find_node($name) +=item $cacheitem-E<gt>find_node($name) Look for a node or index entry named C<$name> in the object. Returns the unique id of the node (i.e. the second element of the array stored in the node arry) or undef if not found. -=back - =cut sub find_node { @@ -798,7 +811,7 @@ sub find_node { undef; } -=item idx() +=item $cacheitem-E<gt>idx() Add an index entry (or a list of them) to the document's index list. Note that the order is kept, i.e. start with the first node and end with the last. @@ -807,6 +820,8 @@ same order the entries have been added. An index entry can be any scalar, but usually is a pair of string and unique id. +=back + =cut # The POD index entries diff --git a/lib/Pod/Parser.pm b/lib/Pod/Parser.pm index 48fc198ded..6782519d96 100644 --- a/lib/Pod/Parser.pm +++ b/lib/Pod/Parser.pm @@ -10,7 +10,7 @@ package Pod::Parser; use vars qw($VERSION); -$VERSION = 1.12; ## Current version of this package +$VERSION = 1.13; ## Current version of this package require 5.005; ## requires this Perl version or later ############################################################################# @@ -205,7 +205,6 @@ use strict; use Pod::InputObjects; use Carp; use Exporter; -require VMS::Filespec if $^O eq 'VMS'; BEGIN { if ($] < 5.6) { require Symbol; @@ -783,11 +782,11 @@ sub parse_text { ## Iterate over all sequence starts text (NOTE: split with ## capturing parens keeps the delimiters) $_ = $text; - my @tokens = split /([A-Z]<(?:<+\s+)?)/; + my @tokens = split /([A-Z]<(?:<+\s)?)/; while ( @tokens ) { $_ = shift @tokens; ## Look for the beginning of a sequence - if ( /^([A-Z])(<(?:<+\s+)?)$/ ) { + if ( /^([A-Z])(<(?:<+\s)?)$/ ) { ## Push a new sequence onto the stack of those "in-progress" ($cmd, $ldelim) = ($1, $2); $seq = Pod::InteriorSequence->new( @@ -848,7 +847,6 @@ sub parse_text { my $errorsub = (@seq_stack > 1) ? $self->errorsub() : undef; while (@seq_stack > 1) { ($cmd, $file, $line) = ($seq->name, $seq->file_line); - $file = VMS::Filespec::unixify($file) if $^O eq 'VMS'; $ldelim = $seq->ldelim; ($rdelim = $ldelim) =~ tr/</>/; $rdelim =~ s/^(\S+)(\s*)$/$2$1/; @@ -1081,10 +1079,9 @@ sub parse_from_filehandle { && (length $paragraph)); ## Issue a warning about any non-empty blank lines - if (length($1) > 1 and $myOpts{'-warnings'} and ! $myData{_CUTTING}) { + if (length($1) > 0 and $myOpts{'-warnings'} and ! $myData{_CUTTING}) { my $errorsub = $self->errorsub(); my $file = $self->input_file(); - $file = VMS::Filespec::unixify($file) if $^O eq 'VMS'; my $errmsg = "*** WARNING: line containing nothing but whitespace". " in paragraph at line $nlines in file $file\n"; (ref $errorsub) and &{$errorsub}($errmsg) diff --git a/lib/Pod/Select.pm b/lib/Pod/Select.pm index 5dd1595107..d86d823350 100644 --- a/lib/Pod/Select.pm +++ b/lib/Pod/Select.pm @@ -10,7 +10,7 @@ package Pod::Select; use vars qw($VERSION); -$VERSION = 1.12; ## Current version of this package +$VERSION = 1.13; ## Current version of this package require 5.005; ## requires this Perl version or later ############################################################################# @@ -92,7 +92,7 @@ The formal syntax of a section specification is: =over 4 -=item +=item * I<head1-title-regex>/I<head2-title-regex>/... @@ -160,7 +160,7 @@ The formal syntax of a range specification is: =over 4 -=item +=item * /I<start-range-regex>/[../I<end-range-regex>/] diff --git a/lib/Pod/Text.pm b/lib/Pod/Text.pm index 47dcee584f..5a7bab8f2a 100644 --- a/lib/Pod/Text.pm +++ b/lib/Pod/Text.pm @@ -1,5 +1,5 @@ # Pod::Text -- Convert POD data to formatted ASCII text. -# $Id: Text.pm,v 2.4 2000/03/17 00:17:08 eagle Exp $ +# $Id: Text.pm,v 2.6 2000/10/10 02:13:17 eagle Exp $ # # Copyright 1999, 2000 by Russ Allbery <rra@stanford.edu> # @@ -37,7 +37,7 @@ use vars qw(@ISA @EXPORT %ESCAPES $VERSION); # Perl core and too many things could munge CVS magic revision strings. # This number should ideally be the same as the CVS revision in podlators, # however. -$VERSION = 2.04; +$VERSION = 2.06; ############################################################################ @@ -53,9 +53,9 @@ $VERSION = 2.04; 'lt' => '<', # left chevron, less-than 'gt' => '>', # right chevron, greater-than 'quot' => '"', # double quote - 'sol' => '/', # solidus + 'sol' => '/', # solidus (forward slash) 'verbar' => '|', # vertical bar - + "Aacute" => "\xC1", # capital A, acute accent "aacute" => "\xE1", # small a, acute accent "Acirc" => "\xC2", # capital A, circumflex accent @@ -82,8 +82,8 @@ $VERSION = 2.04; "eth" => "\xF0", # small eth, Icelandic "Euml" => "\xCB", # capital E, dieresis or umlaut mark "euml" => "\xEB", # small e, dieresis or umlaut mark - "Iacute" => "\xCD", # capital I, acute accent - "iacute" => "\xED", # small i, acute accent + "Iacute" => "\xCC", # capital I, acute accent + "iacute" => "\xEC", # small i, acute accent "Icirc" => "\xCE", # capital I, circumflex accent "icirc" => "\xEE", # small i, circumflex accent "Igrave" => "\xCD", # capital I, grave accent @@ -118,7 +118,7 @@ $VERSION = 2.04; "Yacute" => "\xDD", # capital Y, acute accent "yacute" => "\xFD", # small y, acute accent "yuml" => "\xFF", # small y, dieresis or umlaut mark - + "laquo" => "\xAB", # left pointing double angle quotation mark "lchevron" => "\xAB", # synonym (backwards compatibility) "raquo" => "\xBB", # right pointing double angle quotation mark @@ -172,6 +172,20 @@ sub initialize { $$self{sentence} = 0 unless defined $$self{sentence}; $$self{width} = 76 unless defined $$self{width}; + # Figure out what quotes we'll be using for C<> text. + $$self{quotes} ||= "'"; + if ($$self{quotes} eq 'none') { + $$self{LQUOTE} = $$self{RQUOTE} = ''; + } elsif (length ($$self{quotes}) == 1) { + $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes}; + } elsif ($$self{quotes} =~ /^(.)(.)$/ + || $$self{quotes} =~ /^(..)(..)$/) { + $$self{LQUOTE} = $1; + $$self{RQUOTE} = $2; + } else { + croak qq(Invalid quote specification "$$self{quotes}"); + } + $$self{INDENTS} = []; # Stack of indentations. $$self{MARGIN} = $$self{indent}; # Current left margin in spaces. @@ -193,8 +207,18 @@ sub command { return if $command eq 'pod'; return if ($$self{EXCLUDE} && $command ne 'end'); $self->item ("\n") if defined $$self{ITEM}; - $command = 'cmd_' . $command; - $self->$command (@_); + if ($self->can ('cmd_' . $command)) { + $command = 'cmd_' . $command; + $self->$command (@_); + } else { + my ($text, $line, $paragraph) = @_; + my $file; + ($file, $line) = $paragraph->file_line; + $text =~ s/\n+\z//; + $text = " $text" if ($text =~ /^\S/); + warn qq($file:$line: Unknown command paragraph "=$command$text"\n); + return; + } } # Called for a verbatim paragraph. Gets the paragraph, the line number, and @@ -234,7 +258,7 @@ sub textblock { > ( ,?\s+(and\s+)? # Allow lots of them, conjuncted. - L< + L< / ( [:\w]+ @@ -399,7 +423,7 @@ sub cmd_end { my $self = shift; $$self{EXCLUDE} = 0; $$self{VERBATIM} = 0; -} +} # One paragraph for a particular translator. Ignore it unless it's intended # for text, in which case we treat it as a verbatim text block. @@ -419,9 +443,11 @@ sub cmd_for { # The simple formatting ones. These are here mostly so that subclasses can # override them and do more complicated things. sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] } -sub seq_c { return $_[0]{alt} ? "``$_[1]''" : "`$_[1]'" } sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] } sub seq_i { return '*' . $_[1] . '*' } +sub seq_c { + return $_[0]{alt} ? "``$_[1]''" : "$_[0]{LQUOTE}$_[1]$_[0]{RQUOTE}" +} # The complicated one. Handle links. Since this is plain text, we can't # actually make any real links, so this is all to figure out what text we @@ -592,13 +618,14 @@ sub pod2text { # means we need to turn the first argument into a file handle. Magic # open will handle the <&STDIN case automagically. if (defined $_[1]) { + my @fhs = @_; local *IN; - unless (open (IN, $_[0])) { - croak ("Can't open $_[0] for reading: $!\n"); + unless (open (IN, $fhs[0])) { + croak ("Can't open $fhs[0] for reading: $!\n"); return; } - $_[0] = \*IN; - return $parser->parse_from_filehandle (@_); + $fhs[0] = \*IN; + return $parser->parse_from_filehandle (@fhs); } else { return $parser->parse_from_file (@_); } @@ -664,6 +691,17 @@ it's the expected formatting for manual pages; if you're formatting arbitrary text documents, setting this to true may result in more pleasing output. +=item quotes + +Sets the quote marks used to surround CE<lt>> text. If the value is a +single character, it is used as both the left and right quote; if it is two +characters, the first character is used as the left quote and the second as +the right quoted; and if it is four characters, the first two are used as +the left quote and the second two as the right quote. + +This may also be set to the special value C<none>, in which case no quote +marks are added around CE<lt>> text. + =item sentence If set to a true value, Pod::Text will assume that each sentence ends in two @@ -699,6 +737,16 @@ indicates a bug in Pod::Text; you should never see it. (F) Pod::Text was invoked via the compatibility mode pod2text() interface and the input file it was given could not be opened. +=item Invalid quote specification "%s" + +(F) The quote specification given (the quotes option to the constructor) was +invalid. A quote specification must be one, two, or four characters long. + +=item %s:%d: Unknown command paragraph "%s". + +(W) The POD source contained a non-standard command paragraph (something of +the form C<=command args>) that Pod::Man didn't know about. It was ignored. + =item Unknown escape: %s (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't diff --git a/lib/Pod/Usage.pm b/lib/Pod/Usage.pm index 571588ebd2..388607617d 100644 --- a/lib/Pod/Usage.pm +++ b/lib/Pod/Usage.pm @@ -10,7 +10,7 @@ package Pod::Usage; use vars qw($VERSION); -$VERSION = 1.12; ## Current version of this package +$VERSION = 1.14; ## Current version of this package require 5.005; ## requires this Perl version or later =head1 NAME @@ -46,7 +46,7 @@ B<pod2usage> should be given either a single argument, or a list of arguments corresponding to an associative array (a "hash"). When a single argument is given, it should correspond to exactly one of the following: -=over +=over 4 =item * @@ -68,7 +68,7 @@ assumed to be a hash. If a hash is supplied (either as a reference or as a list) it should contain one or more elements with the following keys: -=over +=over 4 =item C<-message> @@ -80,6 +80,9 @@ program's usage message. =item C<-exitval> The desired exit status to pass to the B<exit()> function. +This should be an integer, or else the string "NOEXIT" to +indicate that control should simply be returned without +terminating the invoking process. =item C<-verbose> @@ -129,7 +132,7 @@ Unless they are explicitly specified, the default values for the exit status, verbose level, and output stream to use are determined as follows: -=over +=over 4 =item * @@ -159,7 +162,7 @@ Although the above may seem a bit confusing at first, it generally does "the right thing" in most situations. This determination of the default values to use is based upon the following typical Unix conventions: -=over +=over 4 =item * @@ -211,7 +214,7 @@ convenient to use as an innocent looking error message handling function: ## Check for too many filenames pod2usage("$0: Too many files given.\n") if (@ARGV > 1); -Some users however may feel that the above "economy of expression" is +Some user's however may feel that the above "economy of expression" is not particularly readable nor consistent and may instead choose to do something more like the following: @@ -395,6 +398,7 @@ with re-writing this manpage. use strict; #use diagnostics; use Carp; +use Config; use Exporter; use File::Spec; @@ -497,8 +501,19 @@ sub pod2usage { } ## Now translate the pod document and then exit with the desired status - $parser->parse_from_file($opts{"-input"}, $opts{"-output"}); - exit($opts{"-exitval"}); + if ( $opts{"-verbose"} >= 2 + and !ref($opts{"-input"}) + and $opts{"-output"} == \*STDOUT ) + { + ## spit out the entire PODs. Might as well invoke perldoc + my $progpath = File::Spec->catfile($Config{bin}, "perldoc"); + system($progpath, $opts{"-input"}); + } + else { + $parser->parse_from_file($opts{"-input"}, $opts{"-output"}); + } + + exit($opts{"-exitval"}) unless (lc($opts{"-exitval"}) eq 'noexit'); } ##--------------------------------------------------------------------------- diff --git a/lib/Shell.pm b/lib/Shell.pm index 62aa82964c..c2f522cae3 100644 --- a/lib/Shell.pm +++ b/lib/Shell.pm @@ -1,8 +1,13 @@ package Shell; use 5.005_64; -our($capture_stderr, $VERSION); +use strict; +use warnings; +our($capture_stderr, $VERSION, $AUTOLOAD); -$VERSION = '0.2'; +$VERSION = '0.3'; + +sub new { bless \$VERSION, shift } # Nothing better to bless +sub DESTROY { } sub import { my $self = shift; @@ -10,24 +15,24 @@ sub import { my @EXPORT; if (@_) { @EXPORT = @_; - } - else { + } else { @EXPORT = 'AUTOLOAD'; } - foreach $sym (@EXPORT) { + foreach my $sym (@EXPORT) { + no strict 'refs'; *{"${callpack}::$sym"} = \&{"Shell::$sym"}; } -}; +} -AUTOLOAD { +sub AUTOLOAD { + shift if ref $_[0] && $_[0]->isa( 'Shell' ); my $cmd = $AUTOLOAD; $cmd =~ s/^.*:://; eval <<"*END*"; sub $AUTOLOAD { if (\@_ < 1) { \$Shell::capture_stderr ? `$cmd 2>&1` : `$cmd`; - } - elsif ('$^O' eq 'os2') { + } elsif ('$^O' eq 'os2') { local(\*SAVEOUT, \*READ, \*WRITE); open SAVEOUT, '>&STDOUT' or die; @@ -46,16 +51,14 @@ AUTOLOAD { close READ; waitpid \$pid, 0; \@ret; - } - else { + } else { local(\$/) = undef; my \$ret = <READ>; close READ; waitpid \$pid, 0; \$ret; } - } - else { + } else { my \$a; my \@arr = \@_; if ('$^O' eq 'MSWin32') { @@ -74,11 +77,10 @@ AUTOLOAD { s/\\\\\\\\"/\\\\\\\\"""/g; \$_ = qq["\$_"] if /\\s/; } - } - else { + } else { for (\@arr) { s/(['\\\\])/\\\\\$1/g; - \$_ = "'\$_'"; + \$_ = \$_; } } push \@arr, '2>&1' if \$Shell::capture_stderr; @@ -88,8 +90,7 @@ AUTOLOAD { my \@ret = <SUBPROC>; close SUBPROC; # XXX Oughta use a destructor. \@ret; - } - else { + } else { local(\$/) = undef; my \$ret = <SUBPROC>; close SUBPROC; @@ -104,6 +105,7 @@ AUTOLOAD { } 1; + __END__ =head1 NAME @@ -155,10 +157,45 @@ The module now should work on Win32. Jenda +There seemed to be a problem where all arguments to a shell command were +quoted before being executed. As in the following example: + + cat('</etc/passwd'); + ls('*.pl'); + +really turned into: + + cat '</etc/passwd' + ls '*.pl' + +instead of: + + cat </etc/passwd + ls *.pl + +and of course, this is wrong. + +I have fixed this bug, it was brought up by Wolfgang Laun [ID 20000326.008] + +Casey + +=head2 OBJECT ORIENTED SYNTAX + +Shell now has an OO interface. Good for namespace conservation +and shell representation. + + use Shell; + my $sh = Shell->new; + print $sh->ls; + +Casey + =head1 AUTHOR Larry Wall Changes by Jenda@Krynicky.cz and Dave Cottle <d.cottle@csc.canterbury.ac.nz> +Changes and bug fixes by Casey Tweten <crt@kiski.net> + =cut diff --git a/lib/Term/ANSIColor.pm b/lib/Term/ANSIColor.pm index e7a2157207..b61efcb045 100644 --- a/lib/Term/ANSIColor.pm +++ b/lib/Term/ANSIColor.pm @@ -1,11 +1,14 @@ # Term::ANSIColor -- Color screen output using ANSI escape sequences. -# $Id: ANSIColor.pm,v 1.1 1997/12/10 20:05:29 eagle Exp $ +# $Id: ANSIColor.pm,v 1.3 2000/08/06 18:28:10 eagle Exp $ # -# Copyright 1996, 1997 by Russ Allbery <rra@stanford.edu> -# and Zenin <zenin@best.com> +# Copyright 1996, 1997, 1998, 2000 +# by Russ Allbery <rra@stanford.edu> and Zenin <zenin@best.com> # # This program is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. +# +# Ah, September, when the sysadmins turn colors and fall off the trees.... +# -- Dave Van Domelen ############################################################################ # Modules and declarations @@ -27,8 +30,10 @@ use Exporter (); ON_GREEN ON_YELLOW ON_BLUE ON_MAGENTA ON_CYAN ON_WHITE)]); Exporter::export_ok_tags ('constants'); - -($VERSION = (split (' ', q$Revision: 1.1 $ ))[1]) =~ s/\.(\d)$/.0$1/; + +# Don't use the CVS revision as the version, since this module is also in +# Perl core and too many things could munge CVS magic revision strings. +$VERSION = 1.03; ############################################################################ @@ -38,6 +43,7 @@ Exporter::export_ok_tags ('constants'); %attributes = ('clear' => 0, 'reset' => 0, 'bold' => 1, + 'dark' => 2, 'underline' => 4, 'underscore' => 4, 'blink' => 5, @@ -92,7 +98,8 @@ sub AUTOLOAD { }; goto &$AUTOLOAD; } else { - die "undefined subroutine &$AUTOLOAD called"; + require Carp; + Carp::croak ("undefined subroutine &$AUTOLOAD called"); } } @@ -119,19 +126,28 @@ sub color { # Given a string and a set of attributes, returns the string surrounded by # escape codes to set those attributes and then clear them at the end of the -# string. If $EACHLINE is set, insert a reset before each occurrence of the -# string $EACHLINE and the starting attribute code after the string -# $EACHLINE, so that no attribute crosses line delimiters (this is often -# desirable if the output is to be piped to a pager or some other program). +# string. The attributes can be given either as an array ref as the first +# argument or as a list as the second and subsequent arguments. If +# $EACHLINE is set, insert a reset before each occurrence of the string +# $EACHLINE and the starting attribute code after the string $EACHLINE, so +# that no attribute crosses line delimiters (this is often desirable if the +# output is to be piped to a pager or some other program). sub colored { - my $string = shift; + my ($string, @codes); + if (ref $_[0]) { + @codes = @{+shift}; + $string = join ('', @_); + } else { + $string = shift; + @codes = @_; + } if (defined $EACHLINE) { - my $attr = color (@_); + my $attr = color (@codes); join '', map { $_ && $_ ne $EACHLINE ? $attr . $_ . "\e[0m" : $_ } split (/(\Q$EACHLINE\E)/, $string); } else { - color (@_) . $string . "\e[0m"; + color (@codes) . $string . "\e[0m"; } } @@ -157,6 +173,7 @@ Term::ANSIColor - Color screen output using ANSI escape sequences print "This text is normal.\n"; print colored ("Yellow on magenta.\n", 'yellow on_magenta'); print "This text is normal.\n"; + print colored ['yellow on_magenta'], "Yellow on magenta.\n"; use Term::ANSIColor qw(:constants); print BOLD, BLUE, "This text is in bold blue.\n", RESET; @@ -179,22 +196,30 @@ you can save it as a string, pass it to something else, send it to a file handle, or do anything else with it that you might care to). The recognized attributes (all of which should be fairly intuitive) are -clear, reset, bold, underline, underscore, blink, reverse, concealed, -black, red, green, yellow, blue, magenta, on_black, on_red, on_green, -on_yellow, on_blue, on_magenta, on_cyan, and on_white. Case is not -significant. Underline and underscore are equivalent, as are clear and -reset, so use whichever is the most intuitive to you. The color alone +clear, reset, dark, bold, underline, underscore, blink, reverse, +concealed, black, red, green, yellow, blue, magenta, on_black, on_red, +on_green, on_yellow, on_blue, on_magenta, on_cyan, and on_white. Case is +not significant. Underline and underscore are equivalent, as are clear +and reset, so use whichever is the most intuitive to you. The color alone sets the foreground color, and on_color sets the background color. -Note that attributes, once set, last until they are unset (by sending the -attribute "reset"). Be careful to do this, or otherwise your attribute will -last after your script is done running, and people get very annoyed at -having their prompt and typing changed to weird colors. +Note that not all attributes are supported by all terminal types, and some +terminals may not support any of these sequences. Dark, blink, and +concealed in particular are frequently not implemented. + +Attributes, once set, last until they are unset (by sending the attribute +"reset"). Be careful to do this, or otherwise your attribute will last +after your script is done running, and people get very annoyed at having +their prompt and typing changed to weird colors. As an aid to help with this, colored() takes a scalar as the first argument and any number of attribute strings as the second argument and returns the scalar wrapped in escape codes so that the attributes will be set as requested before the string and reset to normal after the string. +Alternately, you can pass a reference to an array as the first argument, +and then the contents of that array will be taken as attributes and color +codes and the remainder of the arguments as text to colorize. + Normally, colored() just puts attribute codes at the beginning and end of the string, but if you set $Term::ANSIColor::EACHLINE to some string, that string will be considered the line delimiter and the attribute will @@ -205,10 +230,10 @@ Normally you'll want to set $Term::ANSIColor::EACHLINE to C<"\n"> to use this feature. Alternately, if you import C<:constants>, you can use the constants CLEAR, -RESET, BOLD, UNDERLINE, UNDERSCORE, BLINK, REVERSE, CONCEALED, BLACK, RED, -GREEN, YELLOW, BLUE, MAGENTA, ON_BLACK, ON_RED, ON_GREEN, ON_YELLOW, -ON_BLUE, ON_MAGENTA, ON_CYAN, and ON_WHITE directly. These are the same -as color('attribute') and can be used if you prefer typing: +RESET, BOLD, DARK, UNDERLINE, UNDERSCORE, BLINK, REVERSE, CONCEALED, +BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, ON_BLACK, ON_RED, ON_GREEN, +ON_YELLOW, ON_BLUE, ON_MAGENTA, ON_CYAN, and ON_WHITE directly. These are +the same as color('attribute') and can be used if you prefer typing: print BOLD BLUE ON_WHITE "Text\n", RESET; @@ -231,14 +256,14 @@ will reset the display mode afterwards, whereas: will not. The subroutine interface has the advantage over the constants interface in -that only 2 soubrutines are exported into your namespace, verses 22 in the -constants interface. On the flip side, the constants interface has the -advantage of better compile time error checking, since misspelled names of -colors or attributes in calls to color() and colored() won't be caught -until runtime whereas misspelled names of constants will be caught at -compile time. So, polute your namespace with almost two dozen subrutines -that you may not even use that oftin, or risk a silly bug by mistyping an -attribute. Your choice, TMTOWTDI after all. +that only two subroutines are exported into your namespace, versus +twenty-two in the constants interface. On the flip side, the constants +interface has the advantage of better compile time error checking, since +misspelled names of colors or attributes in calls to color() and colored() +won't be caught until runtime whereas misspelled names of constants will +be caught at compile time. So, polute your namespace with almost two +dozen subroutines that you may not even use that often, or risk a silly +bug by mistyping an attribute. Your choice, TMTOWTDI after all. =head1 DIAGNOSTICS @@ -246,11 +271,11 @@ attribute. Your choice, TMTOWTDI after all. =item Invalid attribute name %s -You passed an invalid attribute name to either color() or colored(). +(F) You passed an invalid attribute name to either color() or colored(). -=item Identifier %s used only once: possible typo +=item Name "%s" used only once: possible typo -You probably mistyped a constant color name such as: +(W) You probably mistyped a constant color name such as: print FOOBAR "This text is color FOOBAR\n"; @@ -259,7 +284,7 @@ force the next error. =item No comma allowed after filehandle -You probably mistyped a constant color name such as: +(F) You probably mistyped a constant color name such as: print FOOBAR, "This text is color FOOBAR\n"; @@ -267,9 +292,9 @@ Generating this fatal compile error is one of the main advantages of using the constants interface, since you'll immediately know if you mistype a color name. -=item Bareword %s not allowed while "strict subs" in use +=item Bareword "%s" not allowed while "strict subs" in use -You probably mistyped a constant color name such as: +(F) You probably mistyped a constant color name such as: $Foobar = FOOBAR . "This line should be blue\n"; @@ -298,6 +323,25 @@ For easier debuging, you may prefer to always use the commas when not setting $Term::ANSIColor::AUTORESET so that you'll get a fatal compile error rather than a warning. +=head1 NOTES + +Jean Delvare provided the following table of different common terminal +emulators and their support for the various attributes: + + clear bold dark under blink reverse conceal + ------------------------------------------------------------------------ + xterm yes yes no yes bold yes yes + linux yes yes yes bold yes yes no + rxvt yes yes no yes bold/black yes no + dtterm yes yes yes yes reverse yes yes + teraterm yes reverse no yes rev/red yes no + aixterm kinda normal no yes no yes yes + +Where the entry is other than yes or no, that emulator interpret the given +attribute as something else instead. Note that on an aixterm, clear +doesn't reset colors; you have to explicitly set the colors back to what +you want. More entries in this table are welcome. + =head1 AUTHORS Original idea (using constants) by Zenin (zenin@best.com), reimplemented diff --git a/lib/Test.pm b/lib/Test.pm index c708f57a05..c18d3812e5 100644 --- a/lib/Test.pm +++ b/lib/Test.pm @@ -1,11 +1,10 @@ use strict; package Test; -use 5.005_64; use Test::Harness 1.1601 (); use Carp; our($VERSION, @ISA, @EXPORT, @EXPORT_OK, $ntest, $TestLevel); #public-ish our($TESTOUT, $ONFAIL, %todo, %history, $planned, @FAILDETAIL); #private-ish -$VERSION = '1.13'; +$VERSION = '1.14'; require Exporter; @ISA=('Exporter'); @EXPORT=qw(&plan &ok &skip); diff --git a/lib/Text/Soundex.pm b/lib/Text/Soundex.pm index 3079b90612..d5887640b8 100644 --- a/lib/Text/Soundex.pm +++ b/lib/Text/Soundex.pm @@ -108,7 +108,7 @@ many people seem to prefer an I<unlikely> value like C<Z000> can be assigned to C<$soundex_nocode>. In scalar context C<soundex> returns the soundex code of its first -argument, and in array context a list is returned in which each element is the +argument, and in list context a list is returned in which each element is the soundex code for the corresponding argument passed to C<soundex> e.g. @codes = soundex qw(Mike Stok); diff --git a/lib/Tie/Array.pm b/lib/Tie/Array.pm index eb83aaee17..e3b85d412e 100644 --- a/lib/Tie/Array.pm +++ b/lib/Tie/Array.pm @@ -8,30 +8,31 @@ our $VERSION = '1.01'; # Pod documentation after __END__ below. sub DESTROY { } -sub EXTEND { } -sub UNSHIFT { shift->SPLICE(0,0,@_) } -sub SHIFT { shift->SPLICE(0,1) } +sub EXTEND { } +sub UNSHIFT { scalar shift->SPLICE(0,0,@_) } +sub SHIFT { shift->SPLICE(0,1) } +#sub SHIFT { (shift->SPLICE(0,1))[0] } sub CLEAR { shift->STORESIZE(0) } -sub PUSH -{ +sub PUSH +{ my $obj = shift; my $i = $obj->FETCHSIZE; $obj->STORE($i++, shift) while (@_); } -sub POP +sub POP { my $obj = shift; my $newsize = $obj->FETCHSIZE - 1; my $val; - if ($newsize >= 0) + if ($newsize >= 0) { $val = $obj->FETCH($newsize); $obj->STORESIZE($newsize); } $val; -} +} sub SPLICE { @@ -46,7 +47,7 @@ sub SPLICE push(@result,$obj->FETCH($off+$i)); } if (@_ > $len) - { + { # Move items up to make room my $d = @_ - $len; my $e = $off+$len; @@ -59,7 +60,7 @@ sub SPLICE } elsif (@_ < $len) { - # Move items down to close the gap + # Move items down to close the gap my $d = $len - @_; my $e = $off+$len; for (my $i=$off+$len; $i < $sz; $i++) @@ -74,7 +75,7 @@ sub SPLICE $obj->STORE($off+$i,$_[$i]); } return @result; -} +} sub EXISTS { my $pkg = ref $_[0]; @@ -91,21 +92,21 @@ use vars qw(@ISA); @ISA = 'Tie::Array'; sub TIEARRAY { bless [], $_[0] } -sub FETCHSIZE { scalar @{$_[0]} } -sub STORESIZE { $#{$_[0]} = $_[1]-1 } +sub FETCHSIZE { scalar @{$_[0]} } +sub STORESIZE { $#{$_[0]} = $_[1]-1 } sub STORE { $_[0]->[$_[1]] = $_[2] } sub FETCH { $_[0]->[$_[1]] } sub CLEAR { @{$_[0]} = () } -sub POP { pop(@{$_[0]}) } +sub POP { pop(@{$_[0]}) } sub PUSH { my $o = shift; push(@$o,@_) } -sub SHIFT { shift(@{$_[0]}) } -sub UNSHIFT { my $o = shift; unshift(@$o,@_) } +sub SHIFT { shift(@{$_[0]}) } +sub UNSHIFT { my $o = shift; unshift(@$o,@_) } sub EXISTS { exists $_[0]->[$_[1]] } sub DELETE { delete $_[0]->[$_[1]] } sub SPLICE { - my $ob = shift; + my $ob = shift; my $sz = $ob->FETCHSIZE; my $off = @_ ? shift : 0; $off += $sz if $off < 0; @@ -121,16 +122,16 @@ __END__ Tie::Array - base class for tied arrays -=head1 SYNOPSIS +=head1 SYNOPSIS package NewArray; use Tie::Array; @ISA = ('Tie::Array'); # mandatory methods - sub TIEARRAY { ... } - sub FETCH { ... } - sub FETCHSIZE { ... } + sub TIEARRAY { ... } + sub FETCH { ... } + sub FETCHSIZE { ... } sub STORE { ... } # mandatory if elements writeable sub STORESIZE { ... } # mandatory if elements can be added/deleted @@ -138,13 +139,13 @@ Tie::Array - base class for tied arrays sub DELETE { ... } # mandatory if delete() expected to work # optional methods - for efficiency - sub CLEAR { ... } - sub PUSH { ... } - sub POP { ... } - sub SHIFT { ... } - sub UNSHIFT { ... } - sub SPLICE { ... } - sub EXTEND { ... } + sub CLEAR { ... } + sub PUSH { ... } + sub POP { ... } + sub SHIFT { ... } + sub UNSHIFT { ... } + sub SPLICE { ... } + sub EXTEND { ... } sub DESTROY { ... } package NewStdArray; @@ -162,7 +163,7 @@ Tie::Array - base class for tied arrays -=head1 DESCRIPTION +=head1 DESCRIPTION This module provides methods for array-tying classes. See L<perltie> for a list of the functions required in order to tie an array @@ -173,16 +174,16 @@ on the tied array, and implementations of C<PUSH>, C<POP>, C<SHIFT>, C<UNSHIFT>, C<SPLICE> and C<CLEAR> in terms of basic C<FETCH>, C<STORE>, C<FETCHSIZE>, C<STORESIZE>. -The B<Tie::StdArray> package provides efficient methods required for tied arrays +The B<Tie::StdArray> package provides efficient methods required for tied arrays which are implemented as blessed references to an "inner" perl array. -It inherits from B<Tie::Array>, and should cause tied arrays to behave exactly -like standard arrays, allowing for selective overloading of methods. +It inherits from B<Tie::Array>, and should cause tied arrays to behave exactly +like standard arrays, allowing for selective overloading of methods. For developers wishing to write their own tied arrays, the required methods are briefly defined below. See the L<perltie> section for more detailed descriptive, as well as example code: -=over +=over =item TIEARRAY classname, LIST @@ -190,7 +191,7 @@ The class method is invoked by the command C<tie @array, classname>. Associates an array instance with the specified class. C<LIST> would represent additional arguments (along the lines of L<AnyDBM_File> and compatriots) needed to complete the association. The method should return an object of a class which -provides the methods below. +provides the methods below. =item STORE this, index, value @@ -214,7 +215,7 @@ Sets the total number of items in the tied array associated with object I<this> to be I<count>. If this makes the array larger then class's mapping of C<undef> should be returned for new positions. If the array becomes smaller then entries beyond count should be -deleted. +deleted. =item EXTEND this, count @@ -242,7 +243,7 @@ object I<this>. Normal object destructor method. -=item PUSH this, LIST +=item PUSH this, LIST Append elements of LIST to the array. @@ -255,17 +256,17 @@ Remove last element of the array and return it. Remove the first element of the array (shifting other elements down) and return it. -=item UNSHIFT this, LIST +=item UNSHIFT this, LIST Insert LIST elements at the beginning of the array, moving existing elements up to make room. =item SPLICE this, offset, length, LIST -Perform the equivalent of C<splice> on the array. +Perform the equivalent of C<splice> on the array. -I<offset> is optional and defaults to zero, negative values count back -from the end of the array. +I<offset> is optional and defaults to zero, negative values count back +from the end of the array. I<length> is optional and defaults to rest of the array. @@ -277,16 +278,15 @@ Returns a list of the original I<length> elements at I<offset>. =head1 CAVEATS -There is no support at present for tied @ISA. There is a potential conflict +There is no support at present for tied @ISA. There is a potential conflict between magic entries needed to notice setting of @ISA, and those needed to -implement 'tie'. +implement 'tie'. Very little consideration has been given to the behaviour of tied arrays when C<$[> is not default value of zero. -=head1 AUTHOR +=head1 AUTHOR Nick Ing-Simmons E<lt>nik@tiuk.ti.comE<gt> -=cut - +=cut diff --git a/lib/Tie/Handle.pm b/lib/Tie/Handle.pm index 588ecead89..81b0792249 100644 --- a/lib/Tie/Handle.pm +++ b/lib/Tie/Handle.pm @@ -1,7 +1,7 @@ package Tie::Handle; use 5.005_64; -our $VERSION = '1.0'; +our $VERSION = '4.0'; =head1 NAME @@ -105,6 +105,15 @@ destruction of an instance. The L<perltie> section contains an example of tying handles. +=head1 COMPATIBILITY + +This version of Tie::Handle is neither related to nor compatible with +the Tie::Handle (3.0) module available on CPAN. It was due to an +accident that two modules with the same name appeared. The namespace +clash has been cleared in favor of this module that comes with the +perl core in September 2000 and accordingly the version number has +been bumped up to 4.0. + =cut use Carp; @@ -120,8 +129,7 @@ sub new { sub TIEHANDLE { my $pkg = shift; if (defined &{"{$pkg}::new"}) { - warnings::warn "WARNING: calling ${pkg}->new since ${pkg}->TIEHANDLE is missing" - if warnings::enabled(); + warnings::warnif("WARNING: calling ${pkg}->new since ${pkg}->TIEHANDLE is missing"); $pkg->new(@_); } else { @@ -184,10 +192,10 @@ sub WRITE { sub CLOSE { my $pkg = ref $_[0]; croak "$pkg doesn't define a CLOSE method"; -} +} package Tie::StdHandle; -our @ISA = 'Tie::Handle'; +our @ISA = 'Tie::Handle'; use Carp; sub TIEHANDLE @@ -197,7 +205,7 @@ sub TIEHANDLE bless $fh,$class; $fh->OPEN(@_) if (@_); return $fh; -} +} sub EOF { eof($_[0]) } sub TELL { tell($_[0]) } @@ -207,9 +215,9 @@ sub CLOSE { close($_[0]) } sub BINMODE { binmode($_[0]) } sub OPEN -{ +{ $_[0]->CLOSE if defined($_[0]->FILENO); - open($_[0],$_[1]); + @_ == 2 ? open($_[0], $_[1]) : open($_[0], $_[1], $_[2]); } sub READ { read($_[0],$_[1],$_[2]) } @@ -217,7 +225,7 @@ sub READLINE { my $fh = $_[0]; <$fh> } sub GETC { getc($_[0]) } sub WRITE -{ +{ my $fh = $_[0]; print $fh substr($_[1],0,$_[2]) } diff --git a/lib/Tie/Hash.pm b/lib/Tie/Hash.pm index c6ec3d4f5c..2244711669 100644 --- a/lib/Tie/Hash.pm +++ b/lib/Tie/Hash.pm @@ -114,8 +114,7 @@ sub new { sub TIEHASH { my $pkg = shift; if (defined &{"${pkg}::new"}) { - warnings::warn "WARNING: calling ${pkg}->new since ${pkg}->TIEHASH is missing" - if warnings::enabled(); + warnings::warnif("WARNING: calling ${pkg}->new since ${pkg}->TIEHASH is missing"); $pkg->new(@_); } else { diff --git a/lib/Tie/Scalar.pm b/lib/Tie/Scalar.pm index 0c6759006f..89ad03eddc 100644 --- a/lib/Tie/Scalar.pm +++ b/lib/Tie/Scalar.pm @@ -91,8 +91,7 @@ sub new { sub TIESCALAR { my $pkg = shift; if (defined &{"{$pkg}::new"}) { - warnings::warn "WARNING: calling ${pkg}->new since ${pkg}->TIESCALAR is missing" - if warnings::enabled(); + warnings::warnif("WARNING: calling ${pkg}->new since ${pkg}->TIESCALAR is missing"); $pkg->new(@_); } else { diff --git a/lib/Win32.pod b/lib/Win32.pod index bd1d06581e..64361f8286 100644 --- a/lib/Win32.pod +++ b/lib/Win32.pod @@ -43,7 +43,8 @@ yourself. =item Win32::DomainName() [CORE] Returns the name of the Microsoft Network domain that the -owner of the current perl process is logged into. +owner of the current perl process is logged into. This function does +B<not> work on Windows 9x. =item Win32::ExpandEnvironmentStrings(STRING) diff --git a/lib/bytes.pm b/lib/bytes.pm index f93d6158d9..f2f7e0157c 100644 --- a/lib/bytes.pm +++ b/lib/bytes.pm @@ -38,11 +38,28 @@ The C<use bytes> pragma disables character semantics for the rest of the lexical scope in which it appears. C<no bytes> can be used to reverse the effect of C<use bytes> within the current lexical scope. -Perl normally assumes character semantics in the presence of -character data (i.e. data that has come from a source that has -been marked as being of a particular character encoding). - -To understand the implications and differences between character +Perl normally assumes character semantics in the presence of character +data (i.e. data that has come from a source that has been marked as +being of a particular character encoding). When C<use bytes> is in +effect, the encoding is temporarily ignored, and each string is treated +as a series of bytes. + +As an example, when Perl sees C<$x = chr(400)>, it encodes the character +in UTF8 and stores it in $x. Then it is marked as character data, so, +for instance, C<length $x> returns C<1>. However, in the scope of the +C<bytes> pragma, $x is treated as a series of bytes - the bytes that make +up the UTF8 encoding - and C<length $x> returns C<2>: + + $x = chr(400); + print "Length is ", length $x, "\n"; # "Length is 1" + printf "Contents are %vd\n", $x; # "Contents are 400" + { + use bytes; + print "Length is ", length $x, "\n"; # "Length is 2" + printf "Contents are %vd\n", $x; # "Contents are 198.144" + } + +For more on the implications and differences between character semantics and byte semantics, see L<perlunicode>. =head1 SEE ALSO diff --git a/lib/charnames.pm b/lib/charnames.pm index 7c2209b9f0..0ec7ec2d21 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -1,5 +1,6 @@ package charnames; use bytes (); # for $bytes::hint_bits +use warnings(); $charnames::hint_bits = 0x20000; my $txt; @@ -51,6 +52,13 @@ sub import { $^H{charnames_full} = delete $h{':full'}; $^H{charnames_short} = delete $h{':short'}; $^H{charnames_scripts} = [map uc, keys %h]; + if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) { + $txt = do "unicode/Name.pl" unless $txt; + for (@{$^H{charnames_scripts}}) { + warnings::warn('utf8', "No such script: '$_'") unless + $txt =~ m/\t\t$_ (?:CAPITAL |SMALL )?LETTER /; + } + } } diff --git a/lib/chat2.pl b/lib/chat2.pl deleted file mode 100644 index 504fa7efd7..0000000000 --- a/lib/chat2.pl +++ /dev/null @@ -1,379 +0,0 @@ -# chat.pl: chat with a server -# -# This library is no longer being maintained, and is included for backward -# compatibility with Perl 4 programs which may require it. -# -# In particular, this should not be used as an example of modern Perl -# programming techniques. -# -# Suggested alternative: Socket -# -# Based on: V2.01.alpha.7 91/06/16 -# Randal L. Schwartz (was <merlyn@stonehenge.com>) -# multihome additions by A.Macpherson@bnr.co.uk -# allow for /dev/pts based systems by Joe Doupnik <JRD@CC.USU.EDU> - -package chat; - -require 'sys/socket.ph'; - -if( defined( &main'PF_INET ) ){ - $pf_inet = &main'PF_INET; - $sock_stream = &main'SOCK_STREAM; - local($name, $aliases, $proto) = getprotobyname( 'tcp' ); - $tcp_proto = $proto; -} -else { - # XXX hardwired $PF_INET, $SOCK_STREAM, 'tcp' - # but who the heck would change these anyway? (:-) - $pf_inet = 2; - $sock_stream = 1; - $tcp_proto = 6; -} - - -$sockaddr = 'S n a4 x8'; -chop($thishost = `hostname`); - -# *S = symbol for current I/O, gets assigned *chatsymbol.... -$next = "chatsymbol000000"; # next one -$nextpat = "^chatsymbol"; # patterns that match next++, ++, ++, ++ - - -## $handle = &chat'open_port("server.address",$port_number); -## opens a named or numbered TCP server - -sub open_port { ## public - local($server, $port) = @_; - - local($serveraddr,$serverproc); - - # We may be multi-homed, start with 0, fixup once connexion is made - $thisaddr = "\0\0\0\0" ; - $thisproc = pack($sockaddr, 2, 0, $thisaddr); - - *S = ++$next; - if ($server =~ /^(\d+)+\.(\d+)\.(\d+)\.(\d+)$/) { - $serveraddr = pack('C4', $1, $2, $3, $4); - } else { - local(@x) = gethostbyname($server); - return undef unless @x; - $serveraddr = $x[4]; - } - $serverproc = pack($sockaddr, 2, $port, $serveraddr); - unless (socket(S, $pf_inet, $sock_stream, $tcp_proto)) { - ($!) = ($!, close(S)); # close S while saving $! - return undef; - } - unless (bind(S, $thisproc)) { - ($!) = ($!, close(S)); # close S while saving $! - return undef; - } - unless (connect(S, $serverproc)) { - ($!) = ($!, close(S)); # close S while saving $! - return undef; - } -# We opened with the local address set to ANY, at this stage we know -# which interface we are using. This is critical if our machine is -# multi-homed, with IP forwarding off, so fix-up. - local($fam,$lport); - ($fam,$lport,$thisaddr) = unpack($sockaddr, getsockname(S)); - $thisproc = pack($sockaddr, 2, 0, $thisaddr); -# end of post-connect fixup - select((select(S), $| = 1)[0]); - $next; # return symbol for switcharound -} - -## ($host, $port, $handle) = &chat'open_listen([$port_number]); -## opens a TCP port on the current machine, ready to be listened to -## if $port_number is absent or zero, pick a default port number -## process must be uid 0 to listen to a low port number - -sub open_listen { ## public - - *S = ++$next; - local($thisport) = shift || 0; - local($thisproc_local) = pack($sockaddr, 2, $thisport, $thisaddr); - local(*NS) = "__" . time; - unless (socket(NS, $pf_inet, $sock_stream, $tcp_proto)) { - ($!) = ($!, close(NS)); - return undef; - } - unless (bind(NS, $thisproc_local)) { - ($!) = ($!, close(NS)); - return undef; - } - unless (listen(NS, 1)) { - ($!) = ($!, close(NS)); - return undef; - } - select((select(NS), $| = 1)[0]); - local($family, $port, @myaddr) = - unpack("S n C C C C x8", getsockname(NS)); - $S{"needs_accept"} = *NS; # so expect will open it - (@myaddr, $port, $next); # returning this -} - -## $handle = &chat'open_proc("command","arg1","arg2",...); -## opens a /bin/sh on a pseudo-tty - -sub open_proc { ## public - local(@cmd) = @_; - - *S = ++$next; - local(*TTY) = "__TTY" . time; - local($pty,$tty) = &_getpty(S,TTY); - die "Cannot find a new pty" unless defined $pty; - $pid = fork; - die "Cannot fork: $!" unless defined $pid; - unless ($pid) { - close STDIN; close STDOUT; close STDERR; - setpgrp(0,$$); - if (open(DEVTTY, "/dev/tty")) { - ioctl(DEVTTY,0x20007471,0); # XXX s/b &TIOCNOTTY - close DEVTTY; - } - open(STDIN,"<&TTY"); - open(STDOUT,">&TTY"); - open(STDERR,">&STDOUT"); - die "Oops" unless fileno(STDERR) == 2; # sanity - close(S); - exec @cmd; - die "Cannot exec @cmd: $!"; - } - close(TTY); - $next; # return symbol for switcharound -} - -# $S is the read-ahead buffer - -## $return = &chat'expect([$handle,] $timeout_time, -## $pat1, $body1, $pat2, $body2, ... ) -## $handle is from previous &chat'open_*(). -## $timeout_time is the time (either relative to the current time, or -## absolute, ala time(2)) at which a timeout event occurs. -## $pat1, $pat2, and so on are regexs which are matched against the input -## stream. If a match is found, the entire matched string is consumed, -## and the corresponding body eval string is evaled. -## -## Each pat is a regular-expression (probably enclosed in single-quotes -## in the invocation). ^ and $ will work, respecting the current value of $*. -## If pat is 'TIMEOUT', the body is executed if the timeout is exceeded. -## If pat is 'EOF', the body is executed if the process exits before -## the other patterns are seen. -## -## Pats are scanned in the order given, so later pats can contain -## general defaults that won't be examined unless the earlier pats -## have failed. -## -## The result of eval'ing body is returned as the result of -## the invocation. Recursive invocations are not thought -## through, and may work only accidentally. :-) -## -## undef is returned if either a timeout or an eof occurs and no -## corresponding body has been defined. -## I/O errors of any sort are treated as eof. - -$nextsubname = "expectloop000000"; # used for subroutines - -sub expect { ## public - if ($_[0] =~ /$nextpat/) { - *S = shift; - } - local($endtime) = shift; - - local($timeout,$eof) = (1,1); - local($caller) = caller; - local($rmask, $nfound, $timeleft, $thisbuf); - local($cases, $pattern, $action, $subname); - $endtime += time if $endtime < 600_000_000; - - if (defined $S{"needs_accept"}) { # is it a listen socket? - local(*NS) = $S{"needs_accept"}; - delete $S{"needs_accept"}; - $S{"needs_close"} = *NS; - unless(accept(S,NS)) { - ($!) = ($!, close(S), close(NS)); - return undef; - } - select((select(S), $| = 1)[0]); - } - - # now see whether we need to create a new sub: - - unless ($subname = $expect_subname{$caller,@_}) { - # nope. make a new one: - $expect_subname{$caller,@_} = $subname = $nextsubname++; - - $cases .= <<"EDQ"; # header is funny to make everything elsif's -sub $subname { - LOOP: { - if (0) { ; } -EDQ - while (@_) { - ($pattern,$action) = splice(@_,0,2); - if ($pattern =~ /^eof$/i) { - $cases .= <<"EDQ"; - elsif (\$eof) { - package $caller; - $action; - } -EDQ - $eof = 0; - } elsif ($pattern =~ /^timeout$/i) { - $cases .= <<"EDQ"; - elsif (\$timeout) { - package $caller; - $action; - } -EDQ - $timeout = 0; - } else { - $pattern =~ s#/#\\/#g; - $cases .= <<"EDQ"; - elsif (\$S =~ /$pattern/) { - \$S = \$'; - package $caller; - $action; - } -EDQ - } - } - $cases .= <<"EDQ" if $eof; - elsif (\$eof) { - undef; - } -EDQ - $cases .= <<"EDQ" if $timeout; - elsif (\$timeout) { - undef; - } -EDQ - $cases .= <<'ESQ'; - else { - $rmask = ""; - vec($rmask,fileno(S),1) = 1; - ($nfound, $rmask) = - select($rmask, undef, undef, $endtime - time); - if ($nfound) { - $nread = sysread(S, $thisbuf, 1024); - if ($nread > 0) { - $S .= $thisbuf; - } else { - $eof++, redo LOOP; # any error is also eof - } - } else { - $timeout++, redo LOOP; # timeout - } - redo LOOP; - } - } -} -ESQ - eval $cases; die "$cases:\n$@" if $@; - } - $eof = $timeout = 0; - do $subname(); -} - -## &chat'print([$handle,] @data) -## $handle is from previous &chat'open(). -## like print $handle @data - -sub print { ## public - if ($_[0] =~ /$nextpat/) { - *S = shift; - } - - local $out = join $, , @_; - syswrite(S, $out, length $out); - if( $chat'debug ){ - print STDERR "printed:"; - print STDERR @_; - } -} - -## &chat'close([$handle,]) -## $handle is from previous &chat'open(). -## like close $handle - -sub close { ## public - if ($_[0] =~ /$nextpat/) { - *S = shift; - } - close(S); - if (defined $S{"needs_close"}) { # is it a listen socket? - local(*NS) = $S{"needs_close"}; - delete $S{"needs_close"}; - close(NS); - } -} - -## @ready_handles = &chat'select($timeout, @handles) -## select()'s the handles with a timeout value of $timeout seconds. -## Returns an array of handles that are ready for I/O. -## Both user handles and chat handles are supported (but beware of -## stdio's buffering for user handles). - -sub select { ## public - local($timeout) = shift; - local(@handles) = @_; - local(%handlename) = (); - local(%ready) = (); - local($caller) = caller; - local($rmask) = ""; - for (@handles) { - if (/$nextpat/o) { # one of ours... see if ready - local(*SYM) = $_; - if (length($SYM)) { - $timeout = 0; # we have a winner - $ready{$_}++; - } - $handlename{fileno($_)} = $_; - } else { - $handlename{fileno(/'/ ? $_ : "$caller\'$_")} = $_; - } - } - for (sort keys %handlename) { - vec($rmask, $_, 1) = 1; - } - select($rmask, undef, undef, $timeout); - for (sort keys %handlename) { - $ready{$handlename{$_}}++ if vec($rmask,$_,1); - } - sort keys %ready; -} - -# ($pty,$tty) = $chat'_getpty(PTY,TTY): -# internal procedure to get the next available pty. -# opens pty on handle PTY, and matching tty on handle TTY. -# returns undef if can't find a pty. -# Modify "/dev/pty" to "/dev/pts" for Dell Unix v2.2 (aka SVR4.04). Joe Doupnik. - -sub _getpty { ## private - local($_PTY,$_TTY) = @_; - $_PTY =~ s/^([^']+)$/(caller)[$[]."'".$1/e; - $_TTY =~ s/^([^']+)$/(caller)[$[]."'".$1/e; - local($pty, $tty, $kind); - if( -e "/dev/pts000" ){ ## mods by Joe Doupnik Dec 1992 - $kind = "pts"; ## SVR4 Streams - } else { - $kind = "pty"; ## BSD Clist stuff - } - for $bank (112..127) { - next unless -e sprintf("/dev/$kind%c0", $bank); - for $unit (48..57) { - $pty = sprintf("/dev/$kind%c%c", $bank, $unit); - open($_PTY,"+>$pty") || next; - select((select($_PTY), $| = 1)[0]); - ($tty = $pty) =~ s/pty/tty/; - open($_TTY,"+>$tty") || next; - select((select($_TTY), $| = 1)[0]); - system "stty nl>$tty"; - return ($pty,$tty); - } - } - undef; -} - -1; diff --git a/lib/fields.pm b/lib/fields.pm index ac4581036f..37ff99d78a 100644 --- a/lib/fields.pm +++ b/lib/fields.pm @@ -172,8 +172,7 @@ sub import { if ($fno and $fno != $next) { require Carp; if ($fno < $fattr->[0]) { - warnings::warn("Hides field '$f' in base class") - if warnings::enabled(); + warnings::warnif("Hides field '$f' in base class") ; } else { Carp::croak("Field name '$f' already in use"); } diff --git a/lib/getopts.pl b/lib/getopts.pl index 25958199a6..4a50b8f6c2 100644 --- a/lib/getopts.pl +++ b/lib/getopts.pl @@ -16,41 +16,50 @@ sub Getopts { local($argumentative) = @_; local(@args,$_,$first,$rest); local($errs) = 0; + local($[) = 0; @args = split( / */, $argumentative ); while(@ARGV && ($_ = $ARGV[0]) =~ /^-(.)(.*)/) { - ($first,$rest) = ($1,$2); - $pos = index($argumentative,$first); - if($pos >= 0) { - if($pos < $#args && $args[$pos+1] eq ':') { - shift(@ARGV); - if($rest eq '') { - ++$errs unless @ARGV; - $rest = shift(@ARGV); - } - ${"opt_$first"} = $rest; - } - else { - ${"opt_$first"} = 1; - if($rest eq '') { - shift(@ARGV); + ($first,$rest) = ($1,$2); + $pos = index($argumentative,$first); + if($pos >= $[) { + if($args[$pos+1] eq ':') { + shift(@ARGV); + if($rest eq '') { + ++$errs unless(@ARGV); + $rest = shift(@ARGV); + } + eval " + push(\@opt_$first, \$rest); + if(\$opt_$first eq '') { + \$opt_$first = \$rest; + } + else { + \$opt_$first .= ' ' . \$rest; + } + "; + } + else { + eval "\$opt_$first = 1"; + if($rest eq '') { + shift(@ARGV); + } + else { + $ARGV[0] = "-$rest"; + } + } } else { - $ARGV[0] = "-$rest"; + print STDERR "Unknown option: $first\n"; + ++$errs; + if($rest ne '') { + $ARGV[0] = "-$rest"; + } + else { + shift(@ARGV); + } } - } - } - else { - print STDERR "Unknown option: $first\n"; - ++$errs; - if($rest ne '') { - $ARGV[0] = "-$rest"; - } - else { - shift(@ARGV); - } } - } $errs == 0; } diff --git a/lib/lib_pm.PL b/lib/lib_pm.PL index 0d2a73b842..bb02106058 100644 --- a/lib/lib_pm.PL +++ b/lib/lib_pm.PL @@ -23,7 +23,8 @@ print "Extracting $file (with variable substitutions)\n"; print OUT <<"!GROK!THIS!"; package lib; -use 5.005_64; +# THIS FILE IS AUTOMATICALLY GENERATED FROM lib_pm.PL. +# ANY CHANGES TO THIS FILE WILL BE OVERWRITTEN BY THE NEXT PERL BUILD. my \$archname = "$Config_archname"; my \$ver = "$Config_ver"; @@ -56,6 +57,7 @@ sub import { } # Put a corresponding archlib directory infront of $_ if it # looks like $_ has an archlib directory below it. + unshift(@INC, "$_/$archname") if -d "$_/$archname/auto"; unshift(@INC, "$_/$ver") if -d "$_/$ver"; unshift(@INC, "$_/$ver/$archname") if -d "$_/$ver/$archname"; } @@ -73,6 +75,8 @@ sub unimport { foreach (@_) { ++$names{$_}; ++$names{"$_/$archname"} if -d "$_/$archname/auto"; + ++$names{"$_/$ver"} if -d "$_/$ver"; + ++$names{"$_/$ver/$archname"} if -d "$_/$ver/$archname"; } # Remove ALL instances of each named directory. diff --git a/lib/overload.pm b/lib/overload.pm index ba96bc9ab6..2b0b99d3cd 100644 --- a/lib/overload.pm +++ b/lib/overload.pm @@ -106,7 +106,7 @@ sub mycan { # Real can would leave stubs. } %constants = ( - 'integer' => 0x1000, + 'integer' => 0x1000, 'float' => 0x2000, 'binary' => 0x4000, 'q' => 0x8000, @@ -127,11 +127,29 @@ sub mycan { # Real can would leave stubs. dereferencing => '${} @{} %{} &{} *{}', special => 'nomethod fallback ='); +use warnings::register; sub constant { # Arguments: what, sub while (@_) { - $^H{$_[0]} = $_[1]; - $^H |= $constants{$_[0]} | $overload::hint_bits; + if (@_ == 1) { + warnings::warnif ("Odd number of arguments for overload::constant"); + last; + } + elsif (!exists $constants {$_ [0]}) { + warnings::warnif ("`$_[0]' is not an overloadable type"); + } + elsif (!ref $_ [1] || "$_[1]" !~ /CODE\(0x[\da-f]+\)$/) { + # Can't use C<ref $_[1] eq "CODE"> above as code references can be + # blessed, and C<ref> would return the package the ref is blessed into. + if (warnings::enabled) { + $_ [1] = "undef" unless defined $_ [1]; + warnings::warn ("`$_[1]' is not a code reference"); + } + } + else { + $^H{$_[0]} = $_[1]; + $^H |= $constants{$_[0]} | $overload::hint_bits; + } shift, shift; } } @@ -149,7 +167,7 @@ sub remove_constant { __END__ -=head1 NAME +=head1 NAME overload - Package for overloading perl operations @@ -157,7 +175,7 @@ overload - Package for overloading perl operations package SomeThing; - use overload + use overload '+' => \&myadd, '-' => \&mysub; # etc @@ -179,12 +197,12 @@ The compilation directive package Number; use overload - "+" => \&add, + "+" => \&add, "*=" => "muas"; declares function Number::add() for addition, and method muas() in the "class" C<Number> (or one of its base classes) -for the assignment form C<*=> of multiplication. +for the assignment form C<*=> of multiplication. Arguments of this directive come in (key, value) pairs. Legal values are values legal inside a C<&{ ... }> call, so the name of a @@ -279,20 +297,20 @@ if C<+=> is not overloaded. =back B<Warning.> Due to the presense of assignment versions of operations, -routines which may be called in assignment context may create -self-referential structures. Currently Perl will not free self-referential +routines which may be called in assignment context may create +self-referential structures. Currently Perl will not free self-referential structures until cycles are C<explicitly> broken. You may get problems when traversing your structures too. -Say, +Say, use overload '+' => sub { bless [ \$_[0], \$_[1] ] }; is asking for trouble, since for code C<$obj += $foo> the subroutine -is called as C<$obj = add($obj, $foo, undef)>, or C<$obj = [\$obj, +is called as C<$obj = add($obj, $foo, undef)>, or C<$obj = [\$obj, \$foo]>. If using such a subroutine is an important optimization, one can overload C<+=> explicitly by a non-"optimized" version, or switch -to non-optimized version if C<not defined $_[2]> (see +to non-optimized version if C<not defined $_[2]> (see L<Calling Conventions for Binary Operations>). Even if no I<explicit> assignment-variants of operators are present in @@ -365,6 +383,11 @@ be used instead. C<bool> is used in the flow control operators return any arbitrary Perl value. If the corresponding operation for this value is overloaded too, that operation will be called again with this value. +As a special case if the overload returns the object itself then it will +be used directly. An overloaded conversion returning the object is +probably a bug, because you're likely to get something that looks like +C<YourPackage=HASH(0x8172b34)>. + =item * I<Iteration> "<>" @@ -382,6 +405,12 @@ If not overloaded, the argument will be dereferenced I<as is>, thus should be of correct type. These functions should return a reference of correct type, or another object with overloaded dereferencing. +As a special case if the overload returns the object itself then it +will be used directly (provided it is the correct type). + +The dereference operators must be specified explicitly they will not be passed to +"nomethod". + =item * I<Special> "nomethod", "fallback", "=", @@ -464,11 +493,16 @@ the last one is used. Say, C<1-$a> can be equivalent to if the pair C<"nomethod" =E<gt> "nomethodMethod"> was specified in the C<use overload> directive. +The C<"nomethod"> mechanism is I<not> used for the dereference operators +( ${} @{} %{} &{} *{} ). + + If some operation cannot be resolved, and there is no function assigned to C<"nomethod">, then an exception will be raised via die()-- unless C<"fallback"> was specified as a key in C<use overload> directive. -=head2 Fallback + +=head2 Fallback The key C<"fallback"> governs what to do if a method for a particular operation is not found. Three different cases are possible depending on @@ -492,7 +526,7 @@ present. =item * defined, but FALSE No autogeneration is tried. Perl tries to call -C<"nomethod"> value, and if this is missing, raises an exception. +C<"nomethod"> value, and if this is missing, raises an exception. =back @@ -510,7 +544,7 @@ This operation is called in the situations when a mutator is applied to a reference that shares its object with some other reference, such as - $a=$b; + $a=$b; ++$a; To make this change $a and not change $b, a copy of C<$$a> is made, @@ -521,7 +555,7 @@ done if C<++> is expressed via a method for C<'++'> or C<'+='> (or C<nomethod>). Note that if this operation is expressed via C<'+'> a nonmutator, i.e., as in - $a=$b; + $a=$b; $a=$a+1; then C<$a> does not reference a new copy of C<$$a>, since $$a does not @@ -535,15 +569,15 @@ string copy if the object is a plain scalar. =item B<Example> -The actually executed code for +The actually executed code for - $a=$b; + $a=$b; Something else which does not modify $a or $b.... ++$a; may be - $a=$b; + $a=$b; Something else which does not modify $a or $b.... $a = $a->clone(undef,""); $a->incr(undef,""); @@ -570,7 +604,7 @@ substitutions are possible for the following operations: C<$a+=$b> can use the method for C<"+"> if the method for C<"+="> is not defined. -=item I<Conversion operations> +=item I<Conversion operations> String, numeric, and boolean conversion are calculated in terms of one another if not all of them are defined. @@ -597,7 +631,7 @@ string or numerical conversion. can be expressed in terms of string conversion. -=item I<Comparison operations> +=item I<Comparison operations> can be expressed in terms of its "spaceship" counterpart: either C<E<lt>=E<gt>> or C<cmp>: @@ -705,20 +739,20 @@ to overload constant pieces of regular expressions. The corresponding values are references to functions which take three arguments: the first one is the I<initial> string form of the constant, the second one -is how Perl interprets this constant, the third one is how the constant is used. +is how Perl interprets this constant, the third one is how the constant is used. Note that the initial string form does not -contain string delimiters, and has backslashes in backslash-delimiter +contain string delimiters, and has backslashes in backslash-delimiter combinations stripped (thus the value of delimiter is not relevant for -processing of this string). The return value of this function is how this +processing of this string). The return value of this function is how this constant is going to be interpreted by Perl. The third argument is undefined unless for overloaded C<q>- and C<qr>- constants, it is C<q> in single-quote context (comes from strings, regular expressions, and single-quote HERE -documents), it is C<tr> for arguments of C<tr>/C<y> operators, +documents), it is C<tr> for arguments of C<tr>/C<y> operators, it is C<s> for right-hand side of C<s>-operator, and it is C<qq> otherwise. Since an expression C<"ab$cd,,"> is just a shortcut for C<'ab' . $cd . ',,'>, it is expected that overloaded constant strings are equipped with reasonable -overloaded catenation operator, otherwise absurd results will result. +overloaded catenation operator, otherwise absurd results will result. Similarly, negative numbers are considered as negations of positive constants. Note that it is probably meaningless to call the functions overload::constant() @@ -732,7 +766,7 @@ From these methods they may be called as overload::constant integer => sub {Math::BigInt->new(shift)}; } -B<BUGS> Currently overloaded-ness of constants does not propagate +B<BUGS> Currently overloaded-ness of constants does not propagate into C<eval '...'>. =head1 IMPLEMENTATION @@ -774,7 +808,7 @@ packages acquire a magic during the next C<bless>ing into the package. This magic is three-words-long for packages without overloading, and carries the cache table if the package is overloaded. -Copying (C<$a=$b>) is shallow; however, a one-level-deep copying is +Copying (C<$a=$b>) is shallow; however, a one-level-deep copying is carried out before any operation that can imply an assignment to the object $a (or $b) refers to, like C<$a++>. You can override this behavior by defining your own copy constructor (see L<"Copy Constructor">). @@ -785,8 +819,8 @@ to be changed are constant (but this is not enforced). =head1 Metaphor clash One may wonder why the semantic of overloaded C<=> is so counter intuitive. -If it I<looks> counter intuitive to you, you are subject to a metaphor -clash. +If it I<looks> counter intuitive to you, you are subject to a metaphor +clash. Here is a Perl object metaphor: @@ -805,10 +839,10 @@ that $a and $b are separate entities. The difference is not relevant in the absence of mutators. After a Perl-way assignment an operation which mutates the data referenced by $a -would change the data referenced by $b too. Effectively, after +would change the data referenced by $b too. Effectively, after C<$a = $b> values of $a and $b become I<indistinguishable>. -On the other hand, anyone who has used algebraic notation knows the +On the other hand, anyone who has used algebraic notation knows the expressive power of the arithmetic metaphor. Overloading works hard to enable this metaphor while preserving the Perlian way as far as possible. Since it is not not possible to freely mix two contradicting @@ -817,7 +851,7 @@ far as all the mutators are called via overloaded access only>. The way it is done is described in L<Copy Constructor>. If some mutator methods are directly applied to the overloaded values, -one may need to I<explicitly unlink> other values which references the +one may need to I<explicitly unlink> other values which references the same value: $a = new Data 23; @@ -841,7 +875,7 @@ However, it would not make preserve "objectness" of $a. But Perl I<has> a way to make assignments to an object do whatever you want. It is just not the overload, but tie()ing interface (see L<perlfunc/tie>). Adding a FETCH() method -which returns the object itself, and STORE() method which changes the +which returns the object itself, and STORE() method which changes the value of the object, one can reproduce the arithmetic metaphor in its completeness, at least for variables which were tie()d from the start. @@ -878,16 +912,15 @@ numeric value.) This prints: =head2 Two-face references Suppose you want to create an object which is accessible as both an -array reference, and a hash reference, similar to the builtin -L<array-accessible-as-a-hash|perlref/"Pseudo-hashes: Using an array as -a hash"> builtin Perl type. Let us make it better than the builtin -type, there will be no restriction that you cannot use the index 0 of -your array. +array reference and a hash reference, similar to the +L<pseudo-hash|perlref/"Pseudo-hashes: Using an array as a hash"> +builtin Perl type. Let's make it better than a pseudo-hash by +allowing index 0 to be treated as a normal element. package two_refs; use overload '%{}' => \&gethash, '@{}' => sub { $ {shift()} }; - sub new { - my $p = shift; + sub new { + my $p = shift; bless \ [@_], $p; } sub gethash { @@ -901,13 +934,13 @@ your array. my %fields; my $i = 0; $fields{$_} = $i++ foreach qw{zero one two three}; - sub STORE { + sub STORE { my $self = ${shift()}; my $key = $fields{shift()}; defined $key or die "Out of band access"; $$self->[$key] = shift; } - sub FETCH { + sub FETCH { my $self = ${shift()}; my $key = $fields{shift()}; defined $key or die "Out of band access"; @@ -940,7 +973,7 @@ overloaded dereference operator). Here is one possible fetching routine: sub access_hash { my ($self, $key) = (shift, shift); my $class = ref $self; - bless $self, 'overload::dummy'; # Disable overloading of %{} + bless $self, 'overload::dummy'; # Disable overloading of %{} my $out = $self->{$key}; bless $self, $class; # Restore overloading $out; @@ -952,8 +985,8 @@ level of indirection which allows a non-circular structure of references: package two_refs1; use overload '%{}' => sub { ${shift()}->[1] }, '@{}' => sub { ${shift()}->[0] }; - sub new { - my $p = shift; + sub new { + my $p = shift; my $a = [@_]; my %h; tie %h, $p, $a; @@ -970,13 +1003,13 @@ level of indirection which allows a non-circular structure of references: my %fields; my $i = 0; $fields{$_} = $i++ foreach qw{zero one two three}; - sub STORE { + sub STORE { my $a = ${shift()}; my $key = $fields{shift()}; defined $key or die "Out of band access"; $a->[$key] = shift; } - sub FETCH { + sub FETCH { my $a = ${shift()}; my $key = $fields{shift()}; defined $key or die "Out of band access"; @@ -986,7 +1019,7 @@ level of indirection which allows a non-circular structure of references: Now if $baz is overloaded like this, then C<$bar> is a reference to a reference to the intermediate array, which keeps a reference to an actual array, and the access hash. The tie()ing object for the access -hash is also a reference to a reference to the actual array, so +hash is also a reference to a reference to the actual array, so =over @@ -1061,7 +1094,7 @@ Add a pretty-printer method to the module F<symbolic.pm>: $a = $a->pretty if ref $a; $b = $b->pretty if ref $b; "[$meth $a $b]"; - } + } Now one can finish the script by @@ -1082,7 +1115,7 @@ look for an overloaded operator C<"">. Thus it is enough to use $a = 'u' unless defined $a; $b = 'u' unless defined $b; "[$meth $a $b]"; - } + } Now one can change the last line of the script to @@ -1093,7 +1126,7 @@ which outputs side = [/ [- [sqrt [+ 1 [** [n 1 u] 2]] u] 1] [n 1 u]] and one can inspect the value in debugger using all the possible -methods. +methods. Something is is still amiss: consider the loop variable $cnt of the script. It was a number, not an object. We cannot make this value of @@ -1127,9 +1160,9 @@ slightly modified str()): } else { "[$meth $a]"; } - } - my %subr = ( n => sub {$_[0]}, - sqrt => sub {sqrt $_[0]}, + } + my %subr = ( n => sub {$_[0]}, + sqrt => sub {sqrt $_[0]}, '-' => sub {shift() - shift()}, '+' => sub {shift() + shift()}, '/' => sub {shift() / shift()}, @@ -1138,7 +1171,7 @@ slightly modified str()): ); sub num { my ($meth, $a, $b) = @{+shift}; - my $subr = $subr{$meth} + my $subr = $subr{$meth} or die "Do not know how to ($meth) in symbolic"; $a = $a->num if ref $a eq __PACKAGE__; $b = $b->num if ref $b eq __PACKAGE__; @@ -1207,7 +1240,7 @@ deep only, so recursive copying is not needed): bless [@$self], ref $self; } -To make C<++> and C<--> work, we need to implement actual mutators, +To make C<++> and C<--> work, we need to implement actual mutators, either directly, or in C<nomethod>. We continue to do things inside C<nomethod>, thus add @@ -1216,7 +1249,7 @@ C<nomethod>, thus add return $obj; } -after the first line of wrap(). This is not a most effective +after the first line of wrap(). This is not a most effective implementation, one may consider sub inc { $_[0] = bless ['++', shift, 1]; } @@ -1239,8 +1272,8 @@ As a final remark, note that one can fill %subr by $subr{'++'} = $subr{'+'}; $subr{'--'} = $subr{'-'}; -This finishes implementation of a primitive symbolic calculator in -50 lines of Perl code. Since the numeric values of subexpressions +This finishes implementation of a primitive symbolic calculator in +50 lines of Perl code. Since the numeric values of subexpressions are not cached, the calculator is very slow. Here is the answer for the exercise: In the case of str(), we need no @@ -1266,9 +1299,9 @@ until the value is I<used>. To see it in action, add a method - sub STORE { - my $obj = shift; - $#$obj = 1; + sub STORE { + my $obj = shift; + $#$obj = 1; @$obj->[0,1] = ('=', shift); } @@ -1337,6 +1370,27 @@ key (in fact a presence of this method shows that this package has overloading enabled, and it is what is used by the C<Overloaded> function of module C<overload>). +The module might issue the following warnings: + +=over 4 + +=item Odd number of arguments for overload::constant + +(W) The call to overload::constant contained an odd number of arguments. +The arguments should come in pairs. + +=item `%s' is not an overloadable type + +(W) You tried to overload a constant type the overload package is unaware of. + +=item `%s' is not a code reference + +(W) The second (fourth, sixth, ...) argument of overload::constant needs +to be a code reference. Either an anonymous subroutine, or a reference +to a subroutine. + +=back + =head1 BUGS Because it is used for overloading, the per-package hash %OVERLOAD now @@ -1348,12 +1402,12 @@ C<fallback> is present (possibly undefined). This may create interesting effects if some package is not overloaded, but inherits from two overloaded packages. -Relation between overloading and tie()ing is broken. Overloading is +Relation between overloading and tie()ing is broken. Overloading is triggered or not basing on the I<previous> class of tie()d value. -This happens because the presence of overloading is checked too early, +This happens because the presence of overloading is checked too early, before any tie()d access is attempted. If the FETCH()ed class of the -tie()d value does not change, a simple workaround is to access the value +tie()d value does not change, a simple workaround is to access the value immediately after tie()ing, so that after this call the I<previous> class coincides with the current one. diff --git a/lib/perl5db.pl b/lib/perl5db.pl index cc6a405823..fb6d683f7d 100644 --- a/lib/perl5db.pl +++ b/lib/perl5db.pl @@ -426,7 +426,7 @@ if ($notty) { PeerAddr => $remoteport, Proto => 'tcp', ); - if (!$OUT) { die "Could not create socket to connect to remote host."; } + if (!$OUT) { die "Unable to connect to remote host: $remoteport\n"; } $IN = $OUT; } else { @@ -617,7 +617,7 @@ EOP next CMD; } } - $cmd =~ /^q$/ && ($exiting = 1) && exit 0; + $cmd =~ /^q$/ && ($fall_off_end = 1) && exit $?; $cmd =~ /^h$/ && do { print_help($help); next CMD; }; @@ -899,9 +899,9 @@ EOP print $OUT "Will stop on load of `@{[join '\', `', sort keys %break_on_load]}'.\n"; next CMD; }; $cmd =~ /^b\b\s*(postpone|compile)\b\s*([':A-Za-z_][':\w]*)\s*(.*)/ && do { - my $cond = $3 || '1'; + my $cond = length $3 ? $3 : '1'; my ($subname, $break) = ($2, $1 eq 'postpone'); - $subname =~ s/\'/::/; + $subname =~ s/\'/::/g; $subname = "${'package'}::" . $subname unless $subname =~ /::/; $subname = "main".$subname if substr($subname,0,2) eq "::"; @@ -910,8 +910,8 @@ EOP next CMD; }; $cmd =~ /^b\b\s*([':A-Za-z_][':\w]*(?:\[.*\])?)\s*(.*)/ && do { $subname = $1; - $cond = $2 || '1'; - $subname =~ s/\'/::/; + $cond = length $2 ? $2 : '1'; + $subname =~ s/\'/::/g; $subname = "${'package'}::" . $subname unless $subname =~ /::/; $subname = "main".$subname if substr($subname,0,2) eq "::"; @@ -931,7 +931,7 @@ EOP next CMD; }; $cmd =~ /^b\b\s*(\d*)\s*(.*)/ && do { $i = $1 || $line; - $cond = $2 || '1'; + $cond = length $2 ? $2 : '1'; if ($dbline[$i] == 0) { print $OUT "Line $i not breakable.\n"; } else { @@ -941,8 +941,12 @@ EOP next CMD; }; $cmd =~ /^d\b\s*(\d*)/ && do { $i = $1 || $line; - $dbline{$i} =~ s/^[^\0]*//; - delete $dbline{$i} if $dbline{$i} eq ''; + if ($dbline[$i] == 0) { + print $OUT "Line $i not breakable.\n"; + } else { + $dbline{$i} =~ s/^[^\0]*//; + delete $dbline{$i} if $dbline{$i} eq ''; + } next CMD; }; $cmd =~ /^A$/ && do { print $OUT "Deleting all actions...\n"; @@ -1426,7 +1430,7 @@ EOP $piped= ""; } } # CMD: - $exiting = 1 unless defined $cmd; + $fall_off_end = 1 unless defined $cmd; # Emulate `q' on EOF foreach $evalarg (@$post) { &eval; } @@ -1507,6 +1511,7 @@ sub eval { local $otrace = $trace; local $osingle = $single; local $od = $^D; + { ($evalarg) = $evalarg =~ /(.*)/s; } @res = eval "$usercontext $evalarg;\n"; # '\n' for nice recursive debug $trace = $otrace; $single = $osingle; @@ -2160,8 +2165,8 @@ B<W> Delete all watch-expressions. B<V> [I<pkg> [I<vars>]] List some (default all) variables in package (default current). Use B<~>I<pattern> and B<!>I<pattern> for positive and negative regexps. B<X> [I<vars>] Same as \"B<V> I<currentpackage> [I<vars>]\". -B<x> I<expr> Evals expression in array context, dumps the result. -B<m> I<expr> Evals expression in array context, prints methods callable +B<x> I<expr> Evals expression in list context, dumps the result. +B<m> I<expr> Evals expression in list context, prints methods callable on the first element of the result. B<m> I<class> Prints methods callable via the given class. @@ -2256,7 +2261,7 @@ I<Debugger controls:> B<L> List break/watch/act B<|>[B<|>]I<db_cmd> Send output to pager B<$psh>\[B<$psh>\] I<syscmd> Run cmd in a subprocess B<q> or B<^D> Quit B<R> Attempt a restart I<Data Examination:> B<expr> Execute perl code, also see: B<s>,B<n>,B<t> I<expr> - B<x>|B<m> I<expr> Evals expr in array context, dumps the result or lists methods. + B<x>|B<m> I<expr> Evals expr in list context, dumps the result or lists methods. B<p> I<expr> Print expression (uses script's current package). B<S> [[B<!>]I<pat>] List subroutine names [not] matching pattern B<V> [I<Pk> [I<Vars>]] List Variables in Package. Vars can be ~pattern or !pattern. @@ -2679,10 +2684,11 @@ sub end_report { } END { - $finished = $inhibit_exit; # So that some keys may be disabled. + $finished = 1 if $inhibit_exit; # So that some keys may be disabled. + $fall_off_end = 1 unless $inhibit_exit; # Do not stop in at_exit() and destructors on exit: - $DB::single = !$exiting && !$runnonstop; - DB::fake::at_exit() unless $exiting or $runnonstop; + $DB::single = !$fall_off_end && !$runnonstop; + DB::fake::at_exit() unless $fall_off_end or $runnonstop; } package DB::fake; diff --git a/lib/syslog.pl b/lib/syslog.pl index 70c439b9ae..f0dbb1c96a 100644 --- a/lib/syslog.pl +++ b/lib/syslog.pl @@ -34,7 +34,7 @@ use warnings::register; $host = 'localhost' unless $host; # set $syslog'host to change if ($] >= 5 && warnings::enabled()) { - warnings::warn "You should 'use Sys::Syslog' instead; continuing"; + warnings::warn("You should 'use Sys::Syslog' instead; continuing"); } require 'syslog.ph'; diff --git a/lib/termcap.pl b/lib/termcap.pl index 06da956666..f295a2d476 100644 --- a/lib/termcap.pl +++ b/lib/termcap.pl @@ -22,7 +22,7 @@ sub Tgetent { local($TERM) = @_; local($TERMCAP,$_,$entry,$loop,$field); - warn "Tgetent: no ospeed set" unless $ospeed; + # warn "Tgetent: no ospeed set" unless $ospeed; foreach $key (keys %TC) { delete $TC{$key}; } diff --git a/lib/unicode/ArabLink.pl b/lib/unicode/ArabLink.pl index fd5ed8a6b1..2ad1871bac 100644 --- a/lib/unicode/ArabLink.pl +++ b/lib/unicode/ArabLink.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0622 0625 R @@ -12,10 +12,9 @@ return <<'END'; 0633 063a D 0640 C 0641 0647 D -0648 0649 R -064a D -0671 U -0672 0673 R +0648 R +0649 064a D +0671 0673 R 0674 U 0675 0677 R 0678 0687 D diff --git a/lib/unicode/ArabLnkGrp.pl b/lib/unicode/ArabLnkGrp.pl index 61f30d4348..1581a04897 100644 --- a/lib/unicode/ArabLnkGrp.pl +++ b/lib/unicode/ArabLnkGrp.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0622 0623 ALEF @@ -27,8 +27,7 @@ return <<'END'; 0647 HEH 0648 WAW 0649 064a YEH -0671 <no shaping> -0672 0673 ALEF +0671 0673 ALEF 0674 <no shaping> 0675 ALEF 0676 0677 WAW diff --git a/lib/unicode/ArabShap.txt b/lib/unicode/ArabShap.txt index 6092d6223c..9b60290e62 100644 --- a/lib/unicode/ArabShap.txt +++ b/lib/unicode/ArabShap.txt @@ -1,5 +1,32 @@ -# Unicode; Schematic Name; Link; Link Group +# ArabicShaping-3.txt +# +# This file is a normative contributory data file in the +# Unicode Character Database. +# +# This file defines the shaping classes for Arabic and Syriac +# positional shaping, repeating in machine readable form the +# information printed in Tables 8-6, 8-7, 8-8, 8-10, 8-11, and +# 8-13 of The Unicode Standard, Version 3.0. +# +# See sections 8.2 and 8.3 of The Unicode Standard, Version 3.0 +# for more information. +# +# Each line contains four fields, separated by a semicolon. +# +# The first field gives the code point, in 4-digit hexadecimal +# form, of an Arabic or Syriac character. +# The second field gives a short schematic name for that character, +# abbreviated from the normative Unicode character name. +# The third field defines the joining type: R right-joining, +# D dual-joining, U non-joining +# The fourth field defines the joining group. +# +# ############################################################# + +# Unicode; Schematic Name; Joining Type; Joining Group + # Arabic characters + 0622; MADDA ON ALEF; R; ALEF 0623; HAMZA ON ALEF; R; ALEF 0624; HAMZA ON WAW; R; WAW @@ -34,9 +61,9 @@ 0646; NOON; D; NOON 0647; HEH; D; HEH 0648; WAW; R; WAW -0649; ALEF MAKSURA; R; YEH +0649; ALEF MAKSURA; D; YEH 064A; YEH; D; YEH -0671; HAMZAT WASL ON ALEF; U; <no shaping> +0671; HAMZAT WASL ON ALEF; R; ALEF 0672; WAVY HAMZA ON ALEF; R; ALEF 0673; WAVY HAMZA UNDER ALEF; R; ALEF 0674; HIGH HAMZA; U; <no shaping> @@ -139,7 +166,9 @@ 06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN 06FB; DAD WITH DOT BELOW; D; SAD 06FC; GHAIN WITH DOT BELOW; D; AIN + # Syriac characters + 0710; ALAPH; R; ALAPH 0712; BETH; D; BETH 0713; GAMAL; D; GAMAL diff --git a/lib/unicode/BidiMirr.txt b/lib/unicode/BidiMirr.txt new file mode 100644 index 0000000000..8ac5be98a1 --- /dev/null +++ b/lib/unicode/BidiMirr.txt @@ -0,0 +1,238 @@ +# BidiMirroring-1.txt +# +# This file is an informative supplement to the UnicodeData file. It +# lists characters that have the mirrored property +# where there is another Unicode character that typically has a glyph +# that is the mirror image of the original character's glyph. +# The repertoire covered by the file is Unicode 3.0.1. +# +# The file contains a list of lines with mappings from one code point +# to another one for character-based mirroring. +# Note that for "real" mirroring, a rendering engine needs to select +# appropriate alternative glyphs, and that many Unicode characters do not +# have a mirror-image Unicode character. +# +# Each mapping line contains two fields, separated by a semicolon (';'). +# Each of the two fields contains a code point represented as a +# variable-length hexadecimal value with 4 to 6 digits. +# A comment indicates where the characters are "BEST FIT" mirroring. +# +# Code points with the "mirrored" property but no appropriate mirrors are +# listed as comments at the end of the file. +# +# For information on bidi mirroring, see UTR #21: Bidirectional Algorithm, +# at http://www.unicode.org/unicode/reports/tr9/ +# +# Please address any comments to <errata@unicode.org>. +# Note that this is an archival address: messages will be checked, +# but do not expect an immediate response. +# +# This file was originally created by Markus Scherer +# +# ############################################################ + +0028; 0029 # LEFT PARENTHESIS +0029; 0028 # RIGHT PARENTHESIS +003C; 003E # LESS-THAN SIGN +003E; 003C # GREATER-THAN SIGN +005B; 005D # LEFT SQUARE BRACKET +005D; 005B # RIGHT SQUARE BRACKET +007B; 007D # LEFT CURLY BRACKET +007D; 007B # RIGHT CURLY BRACKET +00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2045; 2046 # LEFT SQUARE BRACKET WITH QUILL +2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL +207D; 207E # SUPERSCRIPT LEFT PARENTHESIS +207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS +208D; 208E # SUBSCRIPT LEFT PARENTHESIS +208E; 208D # SUBSCRIPT RIGHT PARENTHESIS +2208; 220B # ELEMENT OF +2209; 220C # NOT AN ELEMENT OF +220A; 220D # SMALL ELEMENT OF +220B; 2208 # CONTAINS AS MEMBER +220C; 2209 # DOES NOT CONTAIN AS MEMBER +220D; 220A # SMALL CONTAINS AS MEMBER +223C; 223D # TILDE OPERATOR +223D; 223C # REVERSED TILDE +2243; 22CD # ASYMPTOTICALLY EQUAL TO +2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO +2254; 2255 # COLON EQUALS +2255; 2254 # EQUALS COLON +2264; 2265 # LESS-THAN OR EQUAL TO +2265; 2264 # GREATER-THAN OR EQUAL TO +2266; 2267 # LESS-THAN OVER EQUAL TO +2267; 2266 # GREATER-THAN OVER EQUAL TO +2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO +2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO +226A; 226B # MUCH LESS-THAN +226B; 226A # MUCH GREATER-THAN +226E; 226F # [BEST FIT] NOT LESS-THAN +226F; 226E # [BEST FIT] NOT GREATER-THAN +2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO +2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO +2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO +2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO +2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO +2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO +2276; 2277 # LESS-THAN OR GREATER-THAN +2277; 2276 # GREATER-THAN OR LESS-THAN +2278; 2279 # NEITHER LESS-THAN NOR GREATER-THAN +2279; 2278 # NEITHER GREATER-THAN NOR LESS-THAN +227A; 227B # PRECEDES +227B; 227A # SUCCEEDS +227C; 227D # PRECEDES OR EQUAL TO +227D; 227C # SUCCEEDS OR EQUAL TO +227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO +227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO +2280; 2281 # [BEST FIT] DOES NOT PRECEDE +2281; 2280 # [BEST FIT] DOES NOT SUCCEED +2282; 2283 # SUBSET OF +2283; 2282 # SUPERSET OF +2284; 2285 # [BEST FIT] NOT A SUBSET OF +2285; 2284 # [BEST FIT] NOT A SUPERSET OF +2286; 2287 # SUBSET OF OR EQUAL TO +2287; 2286 # SUPERSET OF OR EQUAL TO +2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO +2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO +228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO +228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO +228F; 2290 # SQUARE IMAGE OF +2290; 228F # SQUARE ORIGINAL OF +2291; 2292 # SQUARE IMAGE OF OR EQUAL TO +2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO +22A2; 22A3 # RIGHT TACK +22A3; 22A2 # LEFT TACK +22B0; 22B1 # PRECEDES UNDER RELATION +22B1; 22B0 # SUCCEEDS UNDER RELATION +22B2; 22B3 # NORMAL SUBGROUP OF +22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP +22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO +22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO +22B6; 22B7 # ORIGINAL OF +22B7; 22B6 # IMAGE OF +22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT +22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT +22CB; 22CC # LEFT SEMIDIRECT PRODUCT +22CC; 22CB # RIGHT SEMIDIRECT PRODUCT +22CD; 2243 # REVERSED TILDE EQUALS +22D0; 22D1 # DOUBLE SUBSET +22D1; 22D0 # DOUBLE SUPERSET +22D6; 22D7 # LESS-THAN WITH DOT +22D7; 22D6 # GREATER-THAN WITH DOT +22D8; 22D9 # VERY MUCH LESS-THAN +22D9; 22D8 # VERY MUCH GREATER-THAN +22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN +22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN +22DC; 22DD # EQUAL TO OR LESS-THAN +22DD; 22DC # EQUAL TO OR GREATER-THAN +22DE; 22DF # EQUAL TO OR PRECEDES +22DF; 22DE # EQUAL TO OR SUCCEEDS +22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL +22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL +22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO +22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO +22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO +22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO +22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO +22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO +22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO +22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO +22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF +22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP +22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO +22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL +22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS +22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS +2308; 2309 # LEFT CEILING +2309; 2308 # RIGHT CEILING +230A; 230B # LEFT FLOOR +230B; 230A # RIGHT FLOOR +2329; 232A # LEFT-POINTING ANGLE BRACKET +232A; 2329 # RIGHT-POINTING ANGLE BRACKET +3008; 3009 # LEFT ANGLE BRACKET +3009; 3008 # RIGHT ANGLE BRACKET +300A; 300B # LEFT DOUBLE ANGLE BRACKET +300B; 300A # RIGHT DOUBLE ANGLE BRACKET +300C; 300D # [BEST FIT] LEFT CORNER BRACKET +300D; 300C # [BEST FIT] RIGHT CORNER BRACKET +300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET +300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET +3010; 3011 # LEFT BLACK LENTICULAR BRACKET +3011; 3010 # RIGHT BLACK LENTICULAR BRACKET +3014; 3015 # [BEST FIT] LEFT TORTOISE SHELL BRACKET +3015; 3014 # [BEST FIT] RIGHT TORTOISE SHELL BRACKET +3016; 3017 # LEFT WHITE LENTICULAR BRACKET +3017; 3016 # RIGHT WHITE LENTICULAR BRACKET +3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET +3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET +301A; 301B # LEFT WHITE SQUARE BRACKET +301B; 301A # RIGHT WHITE SQUARE BRACKET + +# The following characters have no appropriate mirroring character + +# 2201; COMPLEMENT +# 2202; PARTIAL DIFFERENTIAL +# 2203; THERE EXISTS +# 2204; THERE DOES NOT EXIST +# 2211; N-ARY SUMMATION +# 2215; DIVISION SLASH +# 2216; SET MINUS +# 221A; SQUARE ROOT +# 221B; CUBE ROOT +# 221C; FOURTH ROOT +# 221D; PROPORTIONAL TO +# 221F; RIGHT ANGLE +# 2220; ANGLE +# 2221; MEASURED ANGLE +# 2222; SPHERICAL ANGLE +# 2224; DOES NOT DIVIDE +# 2226; NOT PARALLEL TO +# 222B; INTEGRAL +# 222C; DOUBLE INTEGRAL +# 222D; TRIPLE INTEGRAL +# 222E; CONTOUR INTEGRAL +# 222F; SURFACE INTEGRAL +# 2230; VOLUME INTEGRAL +# 2231; CLOCKWISE INTEGRAL +# 2232; CLOCKWISE CONTOUR INTEGRAL +# 2233; ANTICLOCKWISE CONTOUR INTEGRAL +# 2239; EXCESS +# 223B; HOMOTHETIC +# 223E; INVERTED LAZY S +# 223F; SINE WAVE +# 2240; WREATH PRODUCT +# 2241; NOT TILDE +# 2242; MINUS TILDE +# 2244; NOT ASYMPTOTICALLY EQUAL TO +# 2245; APPROXIMATELY EQUAL TO +# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO +# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +# 2248; ALMOST EQUAL TO +# 2249; NOT ALMOST EQUAL TO +# 224A; ALMOST EQUAL OR EQUAL TO +# 224B; TRIPLE TILDE +# 224C; ALL EQUAL TO +# 225F; QUESTIONED EQUAL TO +# 2260; NOT EQUAL TO +# 2262; NOT IDENTICAL TO +# 228C; MULTISET +# 2298; CIRCLED DIVISION SLASH +# 22A6; ASSERTION +# 22A7; MODELS +# 22A8; TRUE +# 22A9; FORCES +# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE +# 22AB; DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +# 22AC; DOES NOT PROVE +# 22AD; NOT TRUE +# 22AE; DOES NOT FORCE +# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +# 22B8; MULTIMAP +# 22BE; RIGHT ANGLE WITH ARC +# 22BF; RIGHT TRIANGLE +# 2320; TOP HALF INTEGRAL +# 2321; BOTTOM HALF INTEGRAL diff --git a/lib/unicode/Bidirectional.pl b/lib/unicode/Bidirectional.pl index 73898b8399..3cc2d0aafd 100644 --- a/lib/unicode/Bidirectional.pl +++ b/lib/unicode/Bidirectional.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 0008 BN @@ -635,4 +635,6 @@ ffe5 ffe6 ET ffe8 ffee ON fff9 fffb BN fffc fffd ON +f0000 ffffd L +100000 10fffd L END diff --git a/lib/unicode/Block.pl b/lib/unicode/Block.pl index ee680b724d..2b5bfce3e1 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/Block.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 007F Basic Latin diff --git a/lib/unicode/CaseFold.txt b/lib/unicode/CaseFold.txt new file mode 100644 index 0000000000..6529c412f2 --- /dev/null +++ b/lib/unicode/CaseFold.txt @@ -0,0 +1,821 @@ +# CaseFolding-2.txt +# +# Case Folding Properties +# +# This file is a supplement to the UnicodeData file. +# It provides a case folding mapping generated from the Unicode Character Database. +# If all characters are mapped according to this mapping, then +# case differences (according to UnicodeData.txt and SpecialCasing.txt) +# are eliminated. +# +# For information on case folding, see +# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/ +# +# These are informative character properties. +# +# Send comments to mark@unicode.org +# +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# <code>; <status>; <mapping>; # <name> +# +# The status is: +# L (for Lowercase) if the case mapping matches the standard 1-1 lowercase mapping +# E (for exception) if it does not. +# +# The mapping may consist of multiple characters. +# If so, they are separated by spaces. +# +# ================================================================= + +0041; L; 0061; #LATIN CAPITAL LETTER A +0042; L; 0062; #LATIN CAPITAL LETTER B +0043; L; 0063; #LATIN CAPITAL LETTER C +0044; L; 0064; #LATIN CAPITAL LETTER D +0045; L; 0065; #LATIN CAPITAL LETTER E +0046; L; 0066; #LATIN CAPITAL LETTER F +0047; L; 0067; #LATIN CAPITAL LETTER G +0048; L; 0068; #LATIN CAPITAL LETTER H +0049; L; 0069; #LATIN CAPITAL LETTER I +004A; L; 006A; #LATIN CAPITAL LETTER J +004B; L; 006B; #LATIN CAPITAL LETTER K +004C; L; 006C; #LATIN CAPITAL LETTER L +004D; L; 006D; #LATIN CAPITAL LETTER M +004E; L; 006E; #LATIN CAPITAL LETTER N +004F; L; 006F; #LATIN CAPITAL LETTER O +0050; L; 0070; #LATIN CAPITAL LETTER P +0051; L; 0071; #LATIN CAPITAL LETTER Q +0052; L; 0072; #LATIN CAPITAL LETTER R +0053; L; 0073; #LATIN CAPITAL LETTER S +0054; L; 0074; #LATIN CAPITAL LETTER T +0055; L; 0075; #LATIN CAPITAL LETTER U +0056; L; 0076; #LATIN CAPITAL LETTER V +0057; L; 0077; #LATIN CAPITAL LETTER W +0058; L; 0078; #LATIN CAPITAL LETTER X +0059; L; 0079; #LATIN CAPITAL LETTER Y +005A; L; 007A; #LATIN CAPITAL LETTER Z +00B5; E; 03BC; #MICRO SIGN +00C0; L; 00E0; #LATIN CAPITAL LETTER A WITH GRAVE +00C1; L; 00E1; #LATIN CAPITAL LETTER A WITH ACUTE +00C2; L; 00E2; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +00C3; L; 00E3; #LATIN CAPITAL LETTER A WITH TILDE +00C4; L; 00E4; #LATIN CAPITAL LETTER A WITH DIAERESIS +00C5; L; 00E5; #LATIN CAPITAL LETTER A WITH RING ABOVE +00C6; L; 00E6; #LATIN CAPITAL LETTER AE +00C7; L; 00E7; #LATIN CAPITAL LETTER C WITH CEDILLA +00C8; L; 00E8; #LATIN CAPITAL LETTER E WITH GRAVE +00C9; L; 00E9; #LATIN CAPITAL LETTER E WITH ACUTE +00CA; L; 00EA; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +00CB; L; 00EB; #LATIN CAPITAL LETTER E WITH DIAERESIS +00CC; L; 00EC; #LATIN CAPITAL LETTER I WITH GRAVE +00CD; L; 00ED; #LATIN CAPITAL LETTER I WITH ACUTE +00CE; L; 00EE; #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +00CF; L; 00EF; #LATIN CAPITAL LETTER I WITH DIAERESIS +00D0; L; 00F0; #LATIN CAPITAL LETTER ETH +00D1; L; 00F1; #LATIN CAPITAL LETTER N WITH TILDE +00D2; L; 00F2; #LATIN CAPITAL LETTER O WITH GRAVE +00D3; L; 00F3; #LATIN CAPITAL LETTER O WITH ACUTE +00D4; L; 00F4; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +00D5; L; 00F5; #LATIN CAPITAL LETTER O WITH TILDE +00D6; L; 00F6; #LATIN CAPITAL LETTER O WITH DIAERESIS +00D8; L; 00F8; #LATIN CAPITAL LETTER O WITH STROKE +00D9; L; 00F9; #LATIN CAPITAL LETTER U WITH GRAVE +00DA; L; 00FA; #LATIN CAPITAL LETTER U WITH ACUTE +00DB; L; 00FB; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +00DC; L; 00FC; #LATIN CAPITAL LETTER U WITH DIAERESIS +00DD; L; 00FD; #LATIN CAPITAL LETTER Y WITH ACUTE +00DE; L; 00FE; #LATIN CAPITAL LETTER THORN +00DF; E; 0073 0073; #LATIN SMALL LETTER SHARP S +0100; L; 0101; #LATIN CAPITAL LETTER A WITH MACRON +0102; L; 0103; #LATIN CAPITAL LETTER A WITH BREVE +0104; L; 0105; #LATIN CAPITAL LETTER A WITH OGONEK +0106; L; 0107; #LATIN CAPITAL LETTER C WITH ACUTE +0108; L; 0109; #LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A; L; 010B; #LATIN CAPITAL LETTER C WITH DOT ABOVE +010C; L; 010D; #LATIN CAPITAL LETTER C WITH CARON +010E; L; 010F; #LATIN CAPITAL LETTER D WITH CARON +0110; L; 0111; #LATIN CAPITAL LETTER D WITH STROKE +0112; L; 0113; #LATIN CAPITAL LETTER E WITH MACRON +0114; L; 0115; #LATIN CAPITAL LETTER E WITH BREVE +0116; L; 0117; #LATIN CAPITAL LETTER E WITH DOT ABOVE +0118; L; 0119; #LATIN CAPITAL LETTER E WITH OGONEK +011A; L; 011B; #LATIN CAPITAL LETTER E WITH CARON +011C; L; 011D; #LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E; L; 011F; #LATIN CAPITAL LETTER G WITH BREVE +0120; L; 0121; #LATIN CAPITAL LETTER G WITH DOT ABOVE +0122; L; 0123; #LATIN CAPITAL LETTER G WITH CEDILLA +0124; L; 0125; #LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126; L; 0127; #LATIN CAPITAL LETTER H WITH STROKE +0128; L; 0129; #LATIN CAPITAL LETTER I WITH TILDE +012A; L; 012B; #LATIN CAPITAL LETTER I WITH MACRON +012C; L; 012D; #LATIN CAPITAL LETTER I WITH BREVE +012E; L; 012F; #LATIN CAPITAL LETTER I WITH OGONEK +0130; L; 0069; #LATIN CAPITAL LETTER I WITH DOT ABOVE +0131; E; 0069; #LATIN SMALL LETTER DOTLESS I +0132; L; 0133; #LATIN CAPITAL LIGATURE IJ +0134; L; 0135; #LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136; L; 0137; #LATIN CAPITAL LETTER K WITH CEDILLA +0139; L; 013A; #LATIN CAPITAL LETTER L WITH ACUTE +013B; L; 013C; #LATIN CAPITAL LETTER L WITH CEDILLA +013D; L; 013E; #LATIN CAPITAL LETTER L WITH CARON +013F; L; 0140; #LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141; L; 0142; #LATIN CAPITAL LETTER L WITH STROKE +0143; L; 0144; #LATIN CAPITAL LETTER N WITH ACUTE +0145; L; 0146; #LATIN CAPITAL LETTER N WITH CEDILLA +0147; L; 0148; #LATIN CAPITAL LETTER N WITH CARON +0149; E; 02BC 006E; #LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014A; L; 014B; #LATIN CAPITAL LETTER ENG +014C; L; 014D; #LATIN CAPITAL LETTER O WITH MACRON +014E; L; 014F; #LATIN CAPITAL LETTER O WITH BREVE +0150; L; 0151; #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152; L; 0153; #LATIN CAPITAL LIGATURE OE +0154; L; 0155; #LATIN CAPITAL LETTER R WITH ACUTE +0156; L; 0157; #LATIN CAPITAL LETTER R WITH CEDILLA +0158; L; 0159; #LATIN CAPITAL LETTER R WITH CARON +015A; L; 015B; #LATIN CAPITAL LETTER S WITH ACUTE +015C; L; 015D; #LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E; L; 015F; #LATIN CAPITAL LETTER S WITH CEDILLA +0160; L; 0161; #LATIN CAPITAL LETTER S WITH CARON +0162; L; 0163; #LATIN CAPITAL LETTER T WITH CEDILLA +0164; L; 0165; #LATIN CAPITAL LETTER T WITH CARON +0166; L; 0167; #LATIN CAPITAL LETTER T WITH STROKE +0168; L; 0169; #LATIN CAPITAL LETTER U WITH TILDE +016A; L; 016B; #LATIN CAPITAL LETTER U WITH MACRON +016C; L; 016D; #LATIN CAPITAL LETTER U WITH BREVE +016E; L; 016F; #LATIN CAPITAL LETTER U WITH RING ABOVE +0170; L; 0171; #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172; L; 0173; #LATIN CAPITAL LETTER U WITH OGONEK +0174; L; 0175; #LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176; L; 0177; #LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178; L; 00FF; #LATIN CAPITAL LETTER Y WITH DIAERESIS +0179; L; 017A; #LATIN CAPITAL LETTER Z WITH ACUTE +017B; L; 017C; #LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D; L; 017E; #LATIN CAPITAL LETTER Z WITH CARON +017F; E; 0073; #LATIN SMALL LETTER LONG S +0181; L; 0253; #LATIN CAPITAL LETTER B WITH HOOK +0182; L; 0183; #LATIN CAPITAL LETTER B WITH TOPBAR +0184; L; 0185; #LATIN CAPITAL LETTER TONE SIX +0186; L; 0254; #LATIN CAPITAL LETTER OPEN O +0187; L; 0188; #LATIN CAPITAL LETTER C WITH HOOK +0189; L; 0256; #LATIN CAPITAL LETTER AFRICAN D +018A; L; 0257; #LATIN CAPITAL LETTER D WITH HOOK +018B; L; 018C; #LATIN CAPITAL LETTER D WITH TOPBAR +018E; L; 01DD; #LATIN CAPITAL LETTER REVERSED E +018F; L; 0259; #LATIN CAPITAL LETTER SCHWA +0190; L; 025B; #LATIN CAPITAL LETTER OPEN E +0191; L; 0192; #LATIN CAPITAL LETTER F WITH HOOK +0193; L; 0260; #LATIN CAPITAL LETTER G WITH HOOK +0194; L; 0263; #LATIN CAPITAL LETTER GAMMA +0196; L; 0269; #LATIN CAPITAL LETTER IOTA +0197; L; 0268; #LATIN CAPITAL LETTER I WITH STROKE +0198; L; 0199; #LATIN CAPITAL LETTER K WITH HOOK +019C; L; 026F; #LATIN CAPITAL LETTER TURNED M +019D; L; 0272; #LATIN CAPITAL LETTER N WITH LEFT HOOK +019F; L; 0275; #LATIN CAPITAL LETTER O WITH MIDDLE TILDE +01A0; L; 01A1; #LATIN CAPITAL LETTER O WITH HORN +01A2; L; 01A3; #LATIN CAPITAL LETTER OI +01A4; L; 01A5; #LATIN CAPITAL LETTER P WITH HOOK +01A6; L; 0280; #LATIN LETTER YR +01A7; L; 01A8; #LATIN CAPITAL LETTER TONE TWO +01A9; L; 0283; #LATIN CAPITAL LETTER ESH +01AC; L; 01AD; #LATIN CAPITAL LETTER T WITH HOOK +01AE; L; 0288; #LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +01AF; L; 01B0; #LATIN CAPITAL LETTER U WITH HORN +01B1; L; 028A; #LATIN CAPITAL LETTER UPSILON +01B2; L; 028B; #LATIN CAPITAL LETTER V WITH HOOK +01B3; L; 01B4; #LATIN CAPITAL LETTER Y WITH HOOK +01B5; L; 01B6; #LATIN CAPITAL LETTER Z WITH STROKE +01B7; L; 0292; #LATIN CAPITAL LETTER EZH +01B8; L; 01B9; #LATIN CAPITAL LETTER EZH REVERSED +01BC; L; 01BD; #LATIN CAPITAL LETTER TONE FIVE +01C4; L; 01C6; #LATIN CAPITAL LETTER DZ WITH CARON +01C5; L; 01C6; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7; L; 01C9; #LATIN CAPITAL LETTER LJ +01C8; L; 01C9; #LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA; L; 01CC; #LATIN CAPITAL LETTER NJ +01CB; L; 01CC; #LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD; L; 01CE; #LATIN CAPITAL LETTER A WITH CARON +01CF; L; 01D0; #LATIN CAPITAL LETTER I WITH CARON +01D1; L; 01D2; #LATIN CAPITAL LETTER O WITH CARON +01D3; L; 01D4; #LATIN CAPITAL LETTER U WITH CARON +01D5; L; 01D6; #LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7; L; 01D8; #LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9; L; 01DA; #LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB; L; 01DC; #LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE; L; 01DF; #LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0; L; 01E1; #LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2; L; 01E3; #LATIN CAPITAL LETTER AE WITH MACRON +01E4; L; 01E5; #LATIN CAPITAL LETTER G WITH STROKE +01E6; L; 01E7; #LATIN CAPITAL LETTER G WITH CARON +01E8; L; 01E9; #LATIN CAPITAL LETTER K WITH CARON +01EA; L; 01EB; #LATIN CAPITAL LETTER O WITH OGONEK +01EC; L; 01ED; #LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE; L; 01EF; #LATIN CAPITAL LETTER EZH WITH CARON +01F0; E; 006A 030C; #LATIN SMALL LETTER J WITH CARON +01F1; L; 01F3; #LATIN CAPITAL LETTER DZ +01F2; L; 01F3; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4; L; 01F5; #LATIN CAPITAL LETTER G WITH ACUTE +01F6; L; 0195; #LATIN CAPITAL LETTER HWAIR +01F7; L; 01BF; #LATIN CAPITAL LETTER WYNN +01F8; L; 01F9; #LATIN CAPITAL LETTER N WITH GRAVE +01FA; L; 01FB; #LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC; L; 01FD; #LATIN CAPITAL LETTER AE WITH ACUTE +01FE; L; 01FF; #LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200; L; 0201; #LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202; L; 0203; #LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204; L; 0205; #LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206; L; 0207; #LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208; L; 0209; #LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A; L; 020B; #LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C; L; 020D; #LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E; L; 020F; #LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210; L; 0211; #LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212; L; 0213; #LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214; L; 0215; #LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216; L; 0217; #LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218; L; 0219; #LATIN CAPITAL LETTER S WITH COMMA BELOW +021A; L; 021B; #LATIN CAPITAL LETTER T WITH COMMA BELOW +021C; L; 021D; #LATIN CAPITAL LETTER YOGH +021E; L; 021F; #LATIN CAPITAL LETTER H WITH CARON +0222; L; 0223; #LATIN CAPITAL LETTER OU +0224; L; 0225; #LATIN CAPITAL LETTER Z WITH HOOK +0226; L; 0227; #LATIN CAPITAL LETTER A WITH DOT ABOVE +0228; L; 0229; #LATIN CAPITAL LETTER E WITH CEDILLA +022A; L; 022B; #LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C; L; 022D; #LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E; L; 022F; #LATIN CAPITAL LETTER O WITH DOT ABOVE +0230; L; 0231; #LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232; L; 0233; #LATIN CAPITAL LETTER Y WITH MACRON +0345; E; 03B9; #COMBINING GREEK YPOGEGRAMMENI +0386; L; 03AC; #GREEK CAPITAL LETTER ALPHA WITH TONOS +0388; L; 03AD; #GREEK CAPITAL LETTER EPSILON WITH TONOS +0389; L; 03AE; #GREEK CAPITAL LETTER ETA WITH TONOS +038A; L; 03AF; #GREEK CAPITAL LETTER IOTA WITH TONOS +038C; L; 03CC; #GREEK CAPITAL LETTER OMICRON WITH TONOS +038E; L; 03CD; #GREEK CAPITAL LETTER UPSILON WITH TONOS +038F; L; 03CE; #GREEK CAPITAL LETTER OMEGA WITH TONOS +0390; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391; L; 03B1; #GREEK CAPITAL LETTER ALPHA +0392; L; 03B2; #GREEK CAPITAL LETTER BETA +0393; L; 03B3; #GREEK CAPITAL LETTER GAMMA +0394; L; 03B4; #GREEK CAPITAL LETTER DELTA +0395; L; 03B5; #GREEK CAPITAL LETTER EPSILON +0396; L; 03B6; #GREEK CAPITAL LETTER ZETA +0397; L; 03B7; #GREEK CAPITAL LETTER ETA +0398; L; 03B8; #GREEK CAPITAL LETTER THETA +0399; L; 03B9; #GREEK CAPITAL LETTER IOTA +039A; L; 03BA; #GREEK CAPITAL LETTER KAPPA +039B; L; 03BB; #GREEK CAPITAL LETTER LAMDA +039C; L; 03BC; #GREEK CAPITAL LETTER MU +039D; L; 03BD; #GREEK CAPITAL LETTER NU +039E; L; 03BE; #GREEK CAPITAL LETTER XI +039F; L; 03BF; #GREEK CAPITAL LETTER OMICRON +03A0; L; 03C0; #GREEK CAPITAL LETTER PI +03A1; L; 03C1; #GREEK CAPITAL LETTER RHO +03A3; E; 03C2; #GREEK CAPITAL LETTER SIGMA +03A4; L; 03C4; #GREEK CAPITAL LETTER TAU +03A5; L; 03C5; #GREEK CAPITAL LETTER UPSILON +03A6; L; 03C6; #GREEK CAPITAL LETTER PHI +03A7; L; 03C7; #GREEK CAPITAL LETTER CHI +03A8; L; 03C8; #GREEK CAPITAL LETTER PSI +03A9; L; 03C9; #GREEK CAPITAL LETTER OMEGA +03AA; L; 03CA; #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +03AB; L; 03CB; #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03B0; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03C2; L; 03C2; #GREEK SMALL LETTER FINAL SIGMA +03C3; E; 03C2; #GREEK SMALL LETTER SIGMA +03D0; E; 03B2; #GREEK BETA SYMBOL +03D1; E; 03B8; #GREEK THETA SYMBOL +03D5; E; 03C6; #GREEK PHI SYMBOL +03D6; E; 03C0; #GREEK PI SYMBOL +03DA; L; 03DB; #GREEK LETTER STIGMA +03DC; L; 03DD; #GREEK LETTER DIGAMMA +03DE; L; 03DF; #GREEK LETTER KOPPA +03E0; L; 03E1; #GREEK LETTER SAMPI +03E2; L; 03E3; #COPTIC CAPITAL LETTER SHEI +03E4; L; 03E5; #COPTIC CAPITAL LETTER FEI +03E6; L; 03E7; #COPTIC CAPITAL LETTER KHEI +03E8; L; 03E9; #COPTIC CAPITAL LETTER HORI +03EA; L; 03EB; #COPTIC CAPITAL LETTER GANGIA +03EC; L; 03ED; #COPTIC CAPITAL LETTER SHIMA +03EE; L; 03EF; #COPTIC CAPITAL LETTER DEI +03F0; E; 03BA; #GREEK KAPPA SYMBOL +03F1; E; 03C1; #GREEK RHO SYMBOL +03F2; E; 03C2; #GREEK LUNATE SIGMA SYMBOL +0400; L; 0450; #CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401; L; 0451; #CYRILLIC CAPITAL LETTER IO +0402; L; 0452; #CYRILLIC CAPITAL LETTER DJE +0403; L; 0453; #CYRILLIC CAPITAL LETTER GJE +0404; L; 0454; #CYRILLIC CAPITAL LETTER UKRAINIAN IE +0405; L; 0455; #CYRILLIC CAPITAL LETTER DZE +0406; L; 0456; #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0407; L; 0457; #CYRILLIC CAPITAL LETTER YI +0408; L; 0458; #CYRILLIC CAPITAL LETTER JE +0409; L; 0459; #CYRILLIC CAPITAL LETTER LJE +040A; L; 045A; #CYRILLIC CAPITAL LETTER NJE +040B; L; 045B; #CYRILLIC CAPITAL LETTER TSHE +040C; L; 045C; #CYRILLIC CAPITAL LETTER KJE +040D; L; 045D; #CYRILLIC CAPITAL LETTER I WITH GRAVE +040E; L; 045E; #CYRILLIC CAPITAL LETTER SHORT U +040F; L; 045F; #CYRILLIC CAPITAL LETTER DZHE +0410; L; 0430; #CYRILLIC CAPITAL LETTER A +0411; L; 0431; #CYRILLIC CAPITAL LETTER BE +0412; L; 0432; #CYRILLIC CAPITAL LETTER VE +0413; L; 0433; #CYRILLIC CAPITAL LETTER GHE +0414; L; 0434; #CYRILLIC CAPITAL LETTER DE +0415; L; 0435; #CYRILLIC CAPITAL LETTER IE +0416; L; 0436; #CYRILLIC CAPITAL LETTER ZHE +0417; L; 0437; #CYRILLIC CAPITAL LETTER ZE +0418; L; 0438; #CYRILLIC CAPITAL LETTER I +0419; L; 0439; #CYRILLIC CAPITAL LETTER SHORT I +041A; L; 043A; #CYRILLIC CAPITAL LETTER KA +041B; L; 043B; #CYRILLIC CAPITAL LETTER EL +041C; L; 043C; #CYRILLIC CAPITAL LETTER EM +041D; L; 043D; #CYRILLIC CAPITAL LETTER EN +041E; L; 043E; #CYRILLIC CAPITAL LETTER O +041F; L; 043F; #CYRILLIC CAPITAL LETTER PE +0420; L; 0440; #CYRILLIC CAPITAL LETTER ER +0421; L; 0441; #CYRILLIC CAPITAL LETTER ES +0422; L; 0442; #CYRILLIC CAPITAL LETTER TE +0423; L; 0443; #CYRILLIC CAPITAL LETTER U +0424; L; 0444; #CYRILLIC CAPITAL LETTER EF +0425; L; 0445; #CYRILLIC CAPITAL LETTER HA +0426; L; 0446; #CYRILLIC CAPITAL LETTER TSE +0427; L; 0447; #CYRILLIC CAPITAL LETTER CHE +0428; L; 0448; #CYRILLIC CAPITAL LETTER SHA +0429; L; 0449; #CYRILLIC CAPITAL LETTER SHCHA +042A; L; 044A; #CYRILLIC CAPITAL LETTER HARD SIGN +042B; L; 044B; #CYRILLIC CAPITAL LETTER YERU +042C; L; 044C; #CYRILLIC CAPITAL LETTER SOFT SIGN +042D; L; 044D; #CYRILLIC CAPITAL LETTER E +042E; L; 044E; #CYRILLIC CAPITAL LETTER YU +042F; L; 044F; #CYRILLIC CAPITAL LETTER YA +0460; L; 0461; #CYRILLIC CAPITAL LETTER OMEGA +0462; L; 0463; #CYRILLIC CAPITAL LETTER YAT +0464; L; 0465; #CYRILLIC CAPITAL LETTER IOTIFIED E +0466; L; 0467; #CYRILLIC CAPITAL LETTER LITTLE YUS +0468; L; 0469; #CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A; L; 046B; #CYRILLIC CAPITAL LETTER BIG YUS +046C; L; 046D; #CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E; L; 046F; #CYRILLIC CAPITAL LETTER KSI +0470; L; 0471; #CYRILLIC CAPITAL LETTER PSI +0472; L; 0473; #CYRILLIC CAPITAL LETTER FITA +0474; L; 0475; #CYRILLIC CAPITAL LETTER IZHITSA +0476; L; 0477; #CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478; L; 0479; #CYRILLIC CAPITAL LETTER UK +047A; L; 047B; #CYRILLIC CAPITAL LETTER ROUND OMEGA +047C; L; 047D; #CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E; L; 047F; #CYRILLIC CAPITAL LETTER OT +0480; L; 0481; #CYRILLIC CAPITAL LETTER KOPPA +048C; L; 048D; #CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E; L; 048F; #CYRILLIC CAPITAL LETTER ER WITH TICK +0490; L; 0491; #CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492; L; 0493; #CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494; L; 0495; #CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496; L; 0497; #CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498; L; 0499; #CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A; L; 049B; #CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C; L; 049D; #CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E; L; 049F; #CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0; L; 04A1; #CYRILLIC CAPITAL LETTER BASHKIR KA +04A2; L; 04A3; #CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4; L; 04A5; #CYRILLIC CAPITAL LIGATURE EN GHE +04A6; L; 04A7; #CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8; L; 04A9; #CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA; L; 04AB; #CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC; L; 04AD; #CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE; L; 04AF; #CYRILLIC CAPITAL LETTER STRAIGHT U +04B0; L; 04B1; #CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2; L; 04B3; #CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4; L; 04B5; #CYRILLIC CAPITAL LIGATURE TE TSE +04B6; L; 04B7; #CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8; L; 04B9; #CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA; L; 04BB; #CYRILLIC CAPITAL LETTER SHHA +04BC; L; 04BD; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE; L; 04BF; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C1; L; 04C2; #CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3; L; 04C4; #CYRILLIC CAPITAL LETTER KA WITH HOOK +04C7; L; 04C8; #CYRILLIC CAPITAL LETTER EN WITH HOOK +04CB; L; 04CC; #CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04D0; L; 04D1; #CYRILLIC CAPITAL LETTER A WITH BREVE +04D2; L; 04D3; #CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4; L; 04D5; #CYRILLIC CAPITAL LIGATURE A IE +04D6; L; 04D7; #CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8; L; 04D9; #CYRILLIC CAPITAL LETTER SCHWA +04DA; L; 04DB; #CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC; L; 04DD; #CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE; L; 04DF; #CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0; L; 04E1; #CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2; L; 04E3; #CYRILLIC CAPITAL LETTER I WITH MACRON +04E4; L; 04E5; #CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6; L; 04E7; #CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8; L; 04E9; #CYRILLIC CAPITAL LETTER BARRED O +04EA; L; 04EB; #CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC; L; 04ED; #CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE; L; 04EF; #CYRILLIC CAPITAL LETTER U WITH MACRON +04F0; L; 04F1; #CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2; L; 04F3; #CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4; L; 04F5; #CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F8; L; 04F9; #CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +0531; L; 0561; #ARMENIAN CAPITAL LETTER AYB +0532; L; 0562; #ARMENIAN CAPITAL LETTER BEN +0533; L; 0563; #ARMENIAN CAPITAL LETTER GIM +0534; L; 0564; #ARMENIAN CAPITAL LETTER DA +0535; L; 0565; #ARMENIAN CAPITAL LETTER ECH +0536; L; 0566; #ARMENIAN CAPITAL LETTER ZA +0537; L; 0567; #ARMENIAN CAPITAL LETTER EH +0538; L; 0568; #ARMENIAN CAPITAL LETTER ET +0539; L; 0569; #ARMENIAN CAPITAL LETTER TO +053A; L; 056A; #ARMENIAN CAPITAL LETTER ZHE +053B; L; 056B; #ARMENIAN CAPITAL LETTER INI +053C; L; 056C; #ARMENIAN CAPITAL LETTER LIWN +053D; L; 056D; #ARMENIAN CAPITAL LETTER XEH +053E; L; 056E; #ARMENIAN CAPITAL LETTER CA +053F; L; 056F; #ARMENIAN CAPITAL LETTER KEN +0540; L; 0570; #ARMENIAN CAPITAL LETTER HO +0541; L; 0571; #ARMENIAN CAPITAL LETTER JA +0542; L; 0572; #ARMENIAN CAPITAL LETTER GHAD +0543; L; 0573; #ARMENIAN CAPITAL LETTER CHEH +0544; L; 0574; #ARMENIAN CAPITAL LETTER MEN +0545; L; 0575; #ARMENIAN CAPITAL LETTER YI +0546; L; 0576; #ARMENIAN CAPITAL LETTER NOW +0547; L; 0577; #ARMENIAN CAPITAL LETTER SHA +0548; L; 0578; #ARMENIAN CAPITAL LETTER VO +0549; L; 0579; #ARMENIAN CAPITAL LETTER CHA +054A; L; 057A; #ARMENIAN CAPITAL LETTER PEH +054B; L; 057B; #ARMENIAN CAPITAL LETTER JHEH +054C; L; 057C; #ARMENIAN CAPITAL LETTER RA +054D; L; 057D; #ARMENIAN CAPITAL LETTER SEH +054E; L; 057E; #ARMENIAN CAPITAL LETTER VEW +054F; L; 057F; #ARMENIAN CAPITAL LETTER TIWN +0550; L; 0580; #ARMENIAN CAPITAL LETTER REH +0551; L; 0581; #ARMENIAN CAPITAL LETTER CO +0552; L; 0582; #ARMENIAN CAPITAL LETTER YIWN +0553; L; 0583; #ARMENIAN CAPITAL LETTER PIWR +0554; L; 0584; #ARMENIAN CAPITAL LETTER KEH +0555; L; 0585; #ARMENIAN CAPITAL LETTER OH +0556; L; 0586; #ARMENIAN CAPITAL LETTER FEH +0587; E; 0565 0582; #ARMENIAN SMALL LIGATURE ECH YIWN +1E00; L; 1E01; #LATIN CAPITAL LETTER A WITH RING BELOW +1E02; L; 1E03; #LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04; L; 1E05; #LATIN CAPITAL LETTER B WITH DOT BELOW +1E06; L; 1E07; #LATIN CAPITAL LETTER B WITH LINE BELOW +1E08; L; 1E09; #LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A; L; 1E0B; #LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C; L; 1E0D; #LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E; L; 1E0F; #LATIN CAPITAL LETTER D WITH LINE BELOW +1E10; L; 1E11; #LATIN CAPITAL LETTER D WITH CEDILLA +1E12; L; 1E13; #LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14; L; 1E15; #LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16; L; 1E17; #LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18; L; 1E19; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A; L; 1E1B; #LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C; L; 1E1D; #LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E; L; 1E1F; #LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20; L; 1E21; #LATIN CAPITAL LETTER G WITH MACRON +1E22; L; 1E23; #LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24; L; 1E25; #LATIN CAPITAL LETTER H WITH DOT BELOW +1E26; L; 1E27; #LATIN CAPITAL LETTER H WITH DIAERESIS +1E28; L; 1E29; #LATIN CAPITAL LETTER H WITH CEDILLA +1E2A; L; 1E2B; #LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C; L; 1E2D; #LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E; L; 1E2F; #LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30; L; 1E31; #LATIN CAPITAL LETTER K WITH ACUTE +1E32; L; 1E33; #LATIN CAPITAL LETTER K WITH DOT BELOW +1E34; L; 1E35; #LATIN CAPITAL LETTER K WITH LINE BELOW +1E36; L; 1E37; #LATIN CAPITAL LETTER L WITH DOT BELOW +1E38; L; 1E39; #LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A; L; 1E3B; #LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C; L; 1E3D; #LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E; L; 1E3F; #LATIN CAPITAL LETTER M WITH ACUTE +1E40; L; 1E41; #LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42; L; 1E43; #LATIN CAPITAL LETTER M WITH DOT BELOW +1E44; L; 1E45; #LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46; L; 1E47; #LATIN CAPITAL LETTER N WITH DOT BELOW +1E48; L; 1E49; #LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A; L; 1E4B; #LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C; L; 1E4D; #LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E; L; 1E4F; #LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50; L; 1E51; #LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52; L; 1E53; #LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54; L; 1E55; #LATIN CAPITAL LETTER P WITH ACUTE +1E56; L; 1E57; #LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58; L; 1E59; #LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A; L; 1E5B; #LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C; L; 1E5D; #LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E; L; 1E5F; #LATIN CAPITAL LETTER R WITH LINE BELOW +1E60; L; 1E61; #LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62; L; 1E63; #LATIN CAPITAL LETTER S WITH DOT BELOW +1E64; L; 1E65; #LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66; L; 1E67; #LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68; L; 1E69; #LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A; L; 1E6B; #LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C; L; 1E6D; #LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E; L; 1E6F; #LATIN CAPITAL LETTER T WITH LINE BELOW +1E70; L; 1E71; #LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72; L; 1E73; #LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74; L; 1E75; #LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76; L; 1E77; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78; L; 1E79; #LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A; L; 1E7B; #LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C; L; 1E7D; #LATIN CAPITAL LETTER V WITH TILDE +1E7E; L; 1E7F; #LATIN CAPITAL LETTER V WITH DOT BELOW +1E80; L; 1E81; #LATIN CAPITAL LETTER W WITH GRAVE +1E82; L; 1E83; #LATIN CAPITAL LETTER W WITH ACUTE +1E84; L; 1E85; #LATIN CAPITAL LETTER W WITH DIAERESIS +1E86; L; 1E87; #LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88; L; 1E89; #LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A; L; 1E8B; #LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C; L; 1E8D; #LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E; L; 1E8F; #LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90; L; 1E91; #LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92; L; 1E93; #LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94; L; 1E95; #LATIN CAPITAL LETTER Z WITH LINE BELOW +1E96; E; 0068 0331; #LATIN SMALL LETTER H WITH LINE BELOW +1E97; E; 0074 0308; #LATIN SMALL LETTER T WITH DIAERESIS +1E98; E; 0077 030A; #LATIN SMALL LETTER W WITH RING ABOVE +1E99; E; 0079 030A; #LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; E; 0061 02BE; #LATIN SMALL LETTER A WITH RIGHT HALF RING +1E9B; E; 1E61; #LATIN SMALL LETTER LONG S WITH DOT ABOVE +1EA0; L; 1EA1; #LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2; L; 1EA3; #LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4; L; 1EA5; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6; L; 1EA7; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8; L; 1EA9; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA; L; 1EAB; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC; L; 1EAD; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE; L; 1EAF; #LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0; L; 1EB1; #LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2; L; 1EB3; #LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4; L; 1EB5; #LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6; L; 1EB7; #LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8; L; 1EB9; #LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA; L; 1EBB; #LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC; L; 1EBD; #LATIN CAPITAL LETTER E WITH TILDE +1EBE; L; 1EBF; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0; L; 1EC1; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2; L; 1EC3; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4; L; 1EC5; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6; L; 1EC7; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8; L; 1EC9; #LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA; L; 1ECB; #LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC; L; 1ECD; #LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE; L; 1ECF; #LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0; L; 1ED1; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2; L; 1ED3; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4; L; 1ED5; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6; L; 1ED7; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8; L; 1ED9; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA; L; 1EDB; #LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC; L; 1EDD; #LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE; L; 1EDF; #LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0; L; 1EE1; #LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2; L; 1EE3; #LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4; L; 1EE5; #LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6; L; 1EE7; #LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8; L; 1EE9; #LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA; L; 1EEB; #LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC; L; 1EED; #LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE; L; 1EEF; #LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0; L; 1EF1; #LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2; L; 1EF3; #LATIN CAPITAL LETTER Y WITH GRAVE +1EF4; L; 1EF5; #LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6; L; 1EF7; #LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8; L; 1EF9; #LATIN CAPITAL LETTER Y WITH TILDE +1F08; L; 1F00; #GREEK CAPITAL LETTER ALPHA WITH PSILI +1F09; L; 1F01; #GREEK CAPITAL LETTER ALPHA WITH DASIA +1F0A; L; 1F02; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA +1F0B; L; 1F03; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA +1F0C; L; 1F04; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA +1F0D; L; 1F05; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA +1F0E; L; 1F06; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI +1F0F; L; 1F07; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18; L; 1F10; #GREEK CAPITAL LETTER EPSILON WITH PSILI +1F19; L; 1F11; #GREEK CAPITAL LETTER EPSILON WITH DASIA +1F1A; L; 1F12; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA +1F1B; L; 1F13; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA +1F1C; L; 1F14; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA +1F1D; L; 1F15; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28; L; 1F20; #GREEK CAPITAL LETTER ETA WITH PSILI +1F29; L; 1F21; #GREEK CAPITAL LETTER ETA WITH DASIA +1F2A; L; 1F22; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA +1F2B; L; 1F23; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA +1F2C; L; 1F24; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA +1F2D; L; 1F25; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA +1F2E; L; 1F26; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI +1F2F; L; 1F27; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38; L; 1F30; #GREEK CAPITAL LETTER IOTA WITH PSILI +1F39; L; 1F31; #GREEK CAPITAL LETTER IOTA WITH DASIA +1F3A; L; 1F32; #GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA +1F3B; L; 1F33; #GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA +1F3C; L; 1F34; #GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA +1F3D; L; 1F35; #GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA +1F3E; L; 1F36; #GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI +1F3F; L; 1F37; #GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48; L; 1F40; #GREEK CAPITAL LETTER OMICRON WITH PSILI +1F49; L; 1F41; #GREEK CAPITAL LETTER OMICRON WITH DASIA +1F4A; L; 1F42; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA +1F4B; L; 1F43; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA +1F4C; L; 1F44; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA +1F4D; L; 1F45; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50; E; 03C5 0313; #GREEK SMALL LETTER UPSILON WITH PSILI +1F52; E; 03C5 0313 0300; #GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; E; 03C5 0313 0301; #GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; E; 03C5 0313 0342; #GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1F59; L; 1F51; #GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B; L; 1F53; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D; L; 1F55; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F; L; 1F57; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68; L; 1F60; #GREEK CAPITAL LETTER OMEGA WITH PSILI +1F69; L; 1F61; #GREEK CAPITAL LETTER OMEGA WITH DASIA +1F6A; L; 1F62; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA +1F6B; L; 1F63; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA +1F6C; L; 1F64; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA +1F6D; L; 1F65; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA +1F6E; L; 1F66; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI +1F6F; L; 1F67; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80; E; 1F00 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; E; 1F01 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; E; 1F02 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; E; 1F03 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; E; 1F04 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; E; 1F05 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; E; 1F06 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; E; 1F07 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; E; 1F00 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; E; 1F01 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; E; 1F02 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; E; 1F03 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; E; 1F04 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; E; 1F05 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; E; 1F06 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; E; 1F07 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; E; 1F20 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; E; 1F21 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; E; 1F22 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; E; 1F23 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; E; 1F24 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; E; 1F25 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; E; 1F26 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; E; 1F27 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; E; 1F20 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; E; 1F21 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; E; 1F22 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; E; 1F23 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; E; 1F24 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; E; 1F25 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; E; 1F26 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; E; 1F27 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; E; 1F60 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; E; 1F61 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; E; 1F62 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; E; 1F63 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; E; 1F64 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; E; 1F65 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; E; 1F66 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; E; 1F67 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; E; 1F60 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; E; 1F61 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; E; 1F62 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; E; 1F63 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; E; 1F64 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; E; 1F65 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; E; 1F66 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; E; 1F67 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2; E; 1F70 03B9; #GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB3; E; 03B1 03B9; #GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FB4; E; 03AC 03B9; #GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6; E; 03B1 0342; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FB7; E; 03B1 0342 03B9; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FB8; L; 1FB0; #GREEK CAPITAL LETTER ALPHA WITH VRACHY +1FB9; L; 1FB1; #GREEK CAPITAL LETTER ALPHA WITH MACRON +1FBA; L; 1F70; #GREEK CAPITAL LETTER ALPHA WITH VARIA +1FBB; L; 1F71; #GREEK CAPITAL LETTER ALPHA WITH OXIA +1FBC; E; 03B1 03B9; #GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE; E; 03B9; #GREEK PROSGEGRAMMENI +1FC2; E; 1F74 03B9; #GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC3; E; 03B7 03B9; #GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FC4; E; 03AE 03B9; #GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6; E; 03B7 0342; #GREEK SMALL LETTER ETA WITH PERISPOMENI +1FC7; E; 03B7 0342 03B9; #GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC8; L; 1F72; #GREEK CAPITAL LETTER EPSILON WITH VARIA +1FC9; L; 1F73; #GREEK CAPITAL LETTER EPSILON WITH OXIA +1FCA; L; 1F74; #GREEK CAPITAL LETTER ETA WITH VARIA +1FCB; L; 1F75; #GREEK CAPITAL LETTER ETA WITH OXIA +1FCC; E; 03B7 03B9; #GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD2; E; 03B9 0308 0300; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; E; 03B9 0342; #GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; E; 03B9 0308 0342; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FD8; L; 1FD0; #GREEK CAPITAL LETTER IOTA WITH VRACHY +1FD9; L; 1FD1; #GREEK CAPITAL LETTER IOTA WITH MACRON +1FDA; L; 1F76; #GREEK CAPITAL LETTER IOTA WITH VARIA +1FDB; L; 1F77; #GREEK CAPITAL LETTER IOTA WITH OXIA +1FE2; E; 03C5 0308 0300; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; E; 03C1 0313; #GREEK SMALL LETTER RHO WITH PSILI +1FE6; E; 03C5 0342; #GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; E; 03C5 0308 0342; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FE8; L; 1FE0; #GREEK CAPITAL LETTER UPSILON WITH VRACHY +1FE9; L; 1FE1; #GREEK CAPITAL LETTER UPSILON WITH MACRON +1FEA; L; 1F7A; #GREEK CAPITAL LETTER UPSILON WITH VARIA +1FEB; L; 1F7B; #GREEK CAPITAL LETTER UPSILON WITH OXIA +1FEC; L; 1FE5; #GREEK CAPITAL LETTER RHO WITH DASIA +1FF2; E; 1F7C 03B9; #GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF3; E; 03C9 03B9; #GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FF4; E; 03CE 03B9; #GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6; E; 03C9 0342; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI +1FF7; E; 03C9 0342 03B9; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF8; L; 1F78; #GREEK CAPITAL LETTER OMICRON WITH VARIA +1FF9; L; 1F79; #GREEK CAPITAL LETTER OMICRON WITH OXIA +1FFA; L; 1F7C; #GREEK CAPITAL LETTER OMEGA WITH VARIA +1FFB; L; 1F7D; #GREEK CAPITAL LETTER OMEGA WITH OXIA +1FFC; E; 03C9 03B9; #GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126; L; 03C9; #OHM SIGN +212A; L; 006B; #KELVIN SIGN +212B; L; 00E5; #ANGSTROM SIGN +2160; L; 2170; #ROMAN NUMERAL ONE +2161; L; 2171; #ROMAN NUMERAL TWO +2162; L; 2172; #ROMAN NUMERAL THREE +2163; L; 2173; #ROMAN NUMERAL FOUR +2164; L; 2174; #ROMAN NUMERAL FIVE +2165; L; 2175; #ROMAN NUMERAL SIX +2166; L; 2176; #ROMAN NUMERAL SEVEN +2167; L; 2177; #ROMAN NUMERAL EIGHT +2168; L; 2178; #ROMAN NUMERAL NINE +2169; L; 2179; #ROMAN NUMERAL TEN +216A; L; 217A; #ROMAN NUMERAL ELEVEN +216B; L; 217B; #ROMAN NUMERAL TWELVE +216C; L; 217C; #ROMAN NUMERAL FIFTY +216D; L; 217D; #ROMAN NUMERAL ONE HUNDRED +216E; L; 217E; #ROMAN NUMERAL FIVE HUNDRED +216F; L; 217F; #ROMAN NUMERAL ONE THOUSAND +24B6; L; 24D0; #CIRCLED LATIN CAPITAL LETTER A +24B7; L; 24D1; #CIRCLED LATIN CAPITAL LETTER B +24B8; L; 24D2; #CIRCLED LATIN CAPITAL LETTER C +24B9; L; 24D3; #CIRCLED LATIN CAPITAL LETTER D +24BA; L; 24D4; #CIRCLED LATIN CAPITAL LETTER E +24BB; L; 24D5; #CIRCLED LATIN CAPITAL LETTER F +24BC; L; 24D6; #CIRCLED LATIN CAPITAL LETTER G +24BD; L; 24D7; #CIRCLED LATIN CAPITAL LETTER H +24BE; L; 24D8; #CIRCLED LATIN CAPITAL LETTER I +24BF; L; 24D9; #CIRCLED LATIN CAPITAL LETTER J +24C0; L; 24DA; #CIRCLED LATIN CAPITAL LETTER K +24C1; L; 24DB; #CIRCLED LATIN CAPITAL LETTER L +24C2; L; 24DC; #CIRCLED LATIN CAPITAL LETTER M +24C3; L; 24DD; #CIRCLED LATIN CAPITAL LETTER N +24C4; L; 24DE; #CIRCLED LATIN CAPITAL LETTER O +24C5; L; 24DF; #CIRCLED LATIN CAPITAL LETTER P +24C6; L; 24E0; #CIRCLED LATIN CAPITAL LETTER Q +24C7; L; 24E1; #CIRCLED LATIN CAPITAL LETTER R +24C8; L; 24E2; #CIRCLED LATIN CAPITAL LETTER S +24C9; L; 24E3; #CIRCLED LATIN CAPITAL LETTER T +24CA; L; 24E4; #CIRCLED LATIN CAPITAL LETTER U +24CB; L; 24E5; #CIRCLED LATIN CAPITAL LETTER V +24CC; L; 24E6; #CIRCLED LATIN CAPITAL LETTER W +24CD; L; 24E7; #CIRCLED LATIN CAPITAL LETTER X +24CE; L; 24E8; #CIRCLED LATIN CAPITAL LETTER Y +24CF; L; 24E9; #CIRCLED LATIN CAPITAL LETTER Z +FB00; E; 0066 0066; #LATIN SMALL LIGATURE FF +FB01; E; 0066 0069; #LATIN SMALL LIGATURE FI +FB02; E; 0066 006C; #LATIN SMALL LIGATURE FL +FB03; E; 0066 0066 0069; #LATIN SMALL LIGATURE FFI +FB04; E; 0066 0066 006C; #LATIN SMALL LIGATURE FFL +FB05; E; 0073 0074; #LATIN SMALL LIGATURE LONG S T +FB06; E; 0073 0074; #LATIN SMALL LIGATURE ST +FB13; E; 0574 0576; #ARMENIAN SMALL LIGATURE MEN NOW +FB14; E; 0574 0565; #ARMENIAN SMALL LIGATURE MEN ECH +FB15; E; 0574 056B; #ARMENIAN SMALL LIGATURE MEN INI +FB16; E; 057E 0576; #ARMENIAN SMALL LIGATURE VEW NOW +FB17; E; 0574 056D; #ARMENIAN SMALL LIGATURE MEN XEH +FF21; L; FF41; #FULLWIDTH LATIN CAPITAL LETTER A +FF22; L; FF42; #FULLWIDTH LATIN CAPITAL LETTER B +FF23; L; FF43; #FULLWIDTH LATIN CAPITAL LETTER C +FF24; L; FF44; #FULLWIDTH LATIN CAPITAL LETTER D +FF25; L; FF45; #FULLWIDTH LATIN CAPITAL LETTER E +FF26; L; FF46; #FULLWIDTH LATIN CAPITAL LETTER F +FF27; L; FF47; #FULLWIDTH LATIN CAPITAL LETTER G +FF28; L; FF48; #FULLWIDTH LATIN CAPITAL LETTER H +FF29; L; FF49; #FULLWIDTH LATIN CAPITAL LETTER I +FF2A; L; FF4A; #FULLWIDTH LATIN CAPITAL LETTER J +FF2B; L; FF4B; #FULLWIDTH LATIN CAPITAL LETTER K +FF2C; L; FF4C; #FULLWIDTH LATIN CAPITAL LETTER L +FF2D; L; FF4D; #FULLWIDTH LATIN CAPITAL LETTER M +FF2E; L; FF4E; #FULLWIDTH LATIN CAPITAL LETTER N +FF2F; L; FF4F; #FULLWIDTH LATIN CAPITAL LETTER O +FF30; L; FF50; #FULLWIDTH LATIN CAPITAL LETTER P +FF31; L; FF51; #FULLWIDTH LATIN CAPITAL LETTER Q +FF32; L; FF52; #FULLWIDTH LATIN CAPITAL LETTER R +FF33; L; FF53; #FULLWIDTH LATIN CAPITAL LETTER S +FF34; L; FF54; #FULLWIDTH LATIN CAPITAL LETTER T +FF35; L; FF55; #FULLWIDTH LATIN CAPITAL LETTER U +FF36; L; FF56; #FULLWIDTH LATIN CAPITAL LETTER V +FF37; L; FF57; #FULLWIDTH LATIN CAPITAL LETTER W +FF38; L; FF58; #FULLWIDTH LATIN CAPITAL LETTER X +FF39; L; FF59; #FULLWIDTH LATIN CAPITAL LETTER Y +FF3A; L; FF5A; #FULLWIDTH LATIN CAPITAL LETTER Z + + diff --git a/lib/unicode/Category.pl b/lib/unicode/Category.pl index bffd1169be..9c81514c58 100644 --- a/lib/unicode/Category.pl +++ b/lib/unicode/Category.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f Cc @@ -1503,4 +1503,6 @@ ffe9 ffec Sm ffed ffee So fff9 fffb Cf fffc fffd So +f0000 ffffd Co +100000 10fffd Co END diff --git a/lib/unicode/CombiningClass.pl b/lib/unicode/CombiningClass.pl index a40949830c..628b9c63db 100644 --- a/lib/unicode/CombiningClass.pl +++ b/lib/unicode/CombiningClass.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 0314 230 diff --git a/lib/unicode/CompExcl.txt b/lib/unicode/CompExcl.txt index 5ea46afc63..53f846729d 100644 --- a/lib/unicode/CompExcl.txt +++ b/lib/unicode/CompExcl.txt @@ -1,3 +1,5 @@ +# CompositionExclusions-2.txt +# # Composition Exclusions # This file lists the characters from the UTR #15 Composition Exclusion Table. # @@ -133,8 +135,8 @@ FB4E # HEBREW LETTER PE WITH RAFE # (4) Non-Starter Decompositions # These characters can be derived from the UnicodeData file # by including all characters whose canonical decomposition consists -# of a sequence of characters, the first of which has a canonical -# class of zero. +# of a sequence of characters, the first of which has a non-zero +# combining class. # These characters are simply quoted here for reference. # 0344 COMBINING GREEK DIALYTIKA TONOS diff --git a/lib/unicode/Decomposition.pl b/lib/unicode/Decomposition.pl index ecc30b205e..1fe29cd157 100644 --- a/lib/unicode/Decomposition.pl +++ b/lib/unicode/Decomposition.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a0 <noBreak> 0020 diff --git a/lib/unicode/In/AlphabeticPresentationForms.pl b/lib/unicode/In/AlphabeticPresentationForms.pl index c42e944a3c..a85b9cabec 100644 --- a/lib/unicode/In/AlphabeticPresentationForms.pl +++ b/lib/unicode/In/AlphabeticPresentationForms.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FB00 FB4F diff --git a/lib/unicode/In/Arabic.pl b/lib/unicode/In/Arabic.pl index 5010ab73de..5fbbbfa028 100644 --- a/lib/unicode/In/Arabic.pl +++ b/lib/unicode/In/Arabic.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0600 06FF diff --git a/lib/unicode/In/ArabicPresentationForms-A.pl b/lib/unicode/In/ArabicPresentationForms-A.pl index 6edd74d755..62521bb1b1 100644 --- a/lib/unicode/In/ArabicPresentationForms-A.pl +++ b/lib/unicode/In/ArabicPresentationForms-A.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FB50 FDFF diff --git a/lib/unicode/In/ArabicPresentationForms-B.pl b/lib/unicode/In/ArabicPresentationForms-B.pl index 964073931e..6b2d44742b 100644 --- a/lib/unicode/In/ArabicPresentationForms-B.pl +++ b/lib/unicode/In/ArabicPresentationForms-B.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FE70 FEFE diff --git a/lib/unicode/In/Armenian.pl b/lib/unicode/In/Armenian.pl index 19b74acd71..d4736a7506 100644 --- a/lib/unicode/In/Armenian.pl +++ b/lib/unicode/In/Armenian.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0530 058F diff --git a/lib/unicode/In/Arrows.pl b/lib/unicode/In/Arrows.pl index 7ce44183a1..a7ef468593 100644 --- a/lib/unicode/In/Arrows.pl +++ b/lib/unicode/In/Arrows.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2190 21FF diff --git a/lib/unicode/In/BasicLatin.pl b/lib/unicode/In/BasicLatin.pl index 39987f16ec..36d6456fa6 100644 --- a/lib/unicode/In/BasicLatin.pl +++ b/lib/unicode/In/BasicLatin.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 007F diff --git a/lib/unicode/In/Bengali.pl b/lib/unicode/In/Bengali.pl index c0a47d30d1..07dc6ac102 100644 --- a/lib/unicode/In/Bengali.pl +++ b/lib/unicode/In/Bengali.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0980 09FF diff --git a/lib/unicode/In/BlockElements.pl b/lib/unicode/In/BlockElements.pl index e96e64faa0..495629b938 100644 --- a/lib/unicode/In/BlockElements.pl +++ b/lib/unicode/In/BlockElements.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2580 259F diff --git a/lib/unicode/In/Bopomofo.pl b/lib/unicode/In/Bopomofo.pl index 553560670c..3dbf73a236 100644 --- a/lib/unicode/In/Bopomofo.pl +++ b/lib/unicode/In/Bopomofo.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3100 312F diff --git a/lib/unicode/In/BopomofoExtended.pl b/lib/unicode/In/BopomofoExtended.pl index d0ee43a437..f2ca6de96d 100644 --- a/lib/unicode/In/BopomofoExtended.pl +++ b/lib/unicode/In/BopomofoExtended.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 31A0 31BF diff --git a/lib/unicode/In/BoxDrawing.pl b/lib/unicode/In/BoxDrawing.pl index d580199b7f..a3cd897498 100644 --- a/lib/unicode/In/BoxDrawing.pl +++ b/lib/unicode/In/BoxDrawing.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2500 257F diff --git a/lib/unicode/In/BraillePatterns.pl b/lib/unicode/In/BraillePatterns.pl index e5c9e4ca70..58afc05a20 100644 --- a/lib/unicode/In/BraillePatterns.pl +++ b/lib/unicode/In/BraillePatterns.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2800 28FF diff --git a/lib/unicode/In/CJKCompatibility.pl b/lib/unicode/In/CJKCompatibility.pl index 07ab8edfd4..793520f4eb 100644 --- a/lib/unicode/In/CJKCompatibility.pl +++ b/lib/unicode/In/CJKCompatibility.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3300 33FF diff --git a/lib/unicode/In/CJKCompatibilityForms.pl b/lib/unicode/In/CJKCompatibilityForms.pl index 122ccd7ad6..a9ba270122 100644 --- a/lib/unicode/In/CJKCompatibilityForms.pl +++ b/lib/unicode/In/CJKCompatibilityForms.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FE30 FE4F diff --git a/lib/unicode/In/CJKCompatibilityIdeographs.pl b/lib/unicode/In/CJKCompatibilityIdeographs.pl index 59c8e5dd5b..d841bc5482 100644 --- a/lib/unicode/In/CJKCompatibilityIdeographs.pl +++ b/lib/unicode/In/CJKCompatibilityIdeographs.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; F900 FAFF diff --git a/lib/unicode/In/CJKRadicalsSupplement.pl b/lib/unicode/In/CJKRadicalsSupplement.pl index d4c0c82bb6..2d1370799a 100644 --- a/lib/unicode/In/CJKRadicalsSupplement.pl +++ b/lib/unicode/In/CJKRadicalsSupplement.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2E80 2EFF diff --git a/lib/unicode/In/CJKSymbolsandPunctuation.pl b/lib/unicode/In/CJKSymbolsandPunctuation.pl index 24ecc37b67..ca525ae383 100644 --- a/lib/unicode/In/CJKSymbolsandPunctuation.pl +++ b/lib/unicode/In/CJKSymbolsandPunctuation.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3000 303F diff --git a/lib/unicode/In/CJKUnifiedIdeographs.pl b/lib/unicode/In/CJKUnifiedIdeographs.pl index 351cf74a82..729f4c6315 100644 --- a/lib/unicode/In/CJKUnifiedIdeographs.pl +++ b/lib/unicode/In/CJKUnifiedIdeographs.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 4E00 9FFF diff --git a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl b/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl index 012f54c824..e92f091938 100644 --- a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl +++ b/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3400 4DB5 diff --git a/lib/unicode/In/Cherokee.pl b/lib/unicode/In/Cherokee.pl index 10cae1a652..1e9ad746d3 100644 --- a/lib/unicode/In/Cherokee.pl +++ b/lib/unicode/In/Cherokee.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 13A0 13FF diff --git a/lib/unicode/In/CombiningDiacriticalMarks.pl b/lib/unicode/In/CombiningDiacriticalMarks.pl index a32f974bfb..d3a45d4cd0 100644 --- a/lib/unicode/In/CombiningDiacriticalMarks.pl +++ b/lib/unicode/In/CombiningDiacriticalMarks.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 036F diff --git a/lib/unicode/In/CombiningHalfMarks.pl b/lib/unicode/In/CombiningHalfMarks.pl index 100471bdbb..4f0a5731a7 100644 --- a/lib/unicode/In/CombiningHalfMarks.pl +++ b/lib/unicode/In/CombiningHalfMarks.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FE20 FE2F diff --git a/lib/unicode/In/CombiningMarksforSymbols.pl b/lib/unicode/In/CombiningMarksforSymbols.pl index f45e7e0490..9dde706cc3 100644 --- a/lib/unicode/In/CombiningMarksforSymbols.pl +++ b/lib/unicode/In/CombiningMarksforSymbols.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 20D0 20FF diff --git a/lib/unicode/In/ControlPictures.pl b/lib/unicode/In/ControlPictures.pl index 77a759f1a0..78113e8c55 100644 --- a/lib/unicode/In/ControlPictures.pl +++ b/lib/unicode/In/ControlPictures.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2400 243F diff --git a/lib/unicode/In/CurrencySymbols.pl b/lib/unicode/In/CurrencySymbols.pl index 567ae97da3..8cbc1600e9 100644 --- a/lib/unicode/In/CurrencySymbols.pl +++ b/lib/unicode/In/CurrencySymbols.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 20A0 20CF diff --git a/lib/unicode/In/Cyrillic.pl b/lib/unicode/In/Cyrillic.pl index 9ca104c7db..f057731818 100644 --- a/lib/unicode/In/Cyrillic.pl +++ b/lib/unicode/In/Cyrillic.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0400 04FF diff --git a/lib/unicode/In/Devanagari.pl b/lib/unicode/In/Devanagari.pl index 61372b58ab..c99eff18ec 100644 --- a/lib/unicode/In/Devanagari.pl +++ b/lib/unicode/In/Devanagari.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0900 097F diff --git a/lib/unicode/In/Dingbats.pl b/lib/unicode/In/Dingbats.pl index 0f820ca711..1bbb102999 100644 --- a/lib/unicode/In/Dingbats.pl +++ b/lib/unicode/In/Dingbats.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2700 27BF diff --git a/lib/unicode/In/EnclosedAlphanumerics.pl b/lib/unicode/In/EnclosedAlphanumerics.pl index de52aa8d99..46b4cf5589 100644 --- a/lib/unicode/In/EnclosedAlphanumerics.pl +++ b/lib/unicode/In/EnclosedAlphanumerics.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2460 24FF diff --git a/lib/unicode/In/EnclosedCJKLettersandMonths.pl b/lib/unicode/In/EnclosedCJKLettersandMonths.pl index e4de0e0261..da5a7a1ecb 100644 --- a/lib/unicode/In/EnclosedCJKLettersandMonths.pl +++ b/lib/unicode/In/EnclosedCJKLettersandMonths.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3200 32FF diff --git a/lib/unicode/In/Ethiopic.pl b/lib/unicode/In/Ethiopic.pl index 13c309050a..5b472c47c5 100644 --- a/lib/unicode/In/Ethiopic.pl +++ b/lib/unicode/In/Ethiopic.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1200 137F diff --git a/lib/unicode/In/GeneralPunctuation.pl b/lib/unicode/In/GeneralPunctuation.pl index 81c76992dc..aa82c30bd7 100644 --- a/lib/unicode/In/GeneralPunctuation.pl +++ b/lib/unicode/In/GeneralPunctuation.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2000 206F diff --git a/lib/unicode/In/GeometricShapes.pl b/lib/unicode/In/GeometricShapes.pl index 170422d2d0..6cf8ea72f7 100644 --- a/lib/unicode/In/GeometricShapes.pl +++ b/lib/unicode/In/GeometricShapes.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 25A0 25FF diff --git a/lib/unicode/In/Georgian.pl b/lib/unicode/In/Georgian.pl index 773ed1562a..493f57053e 100644 --- a/lib/unicode/In/Georgian.pl +++ b/lib/unicode/In/Georgian.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 10A0 10FF diff --git a/lib/unicode/In/Greek.pl b/lib/unicode/In/Greek.pl index ff753d19b4..ac4bbee588 100644 --- a/lib/unicode/In/Greek.pl +++ b/lib/unicode/In/Greek.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0370 03FF diff --git a/lib/unicode/In/GreekExtended.pl b/lib/unicode/In/GreekExtended.pl index b8f02e7f0a..acd43be814 100644 --- a/lib/unicode/In/GreekExtended.pl +++ b/lib/unicode/In/GreekExtended.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1F00 1FFF diff --git a/lib/unicode/In/Gujarati.pl b/lib/unicode/In/Gujarati.pl index ff6c6503bb..0e3c8e98ce 100644 --- a/lib/unicode/In/Gujarati.pl +++ b/lib/unicode/In/Gujarati.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0A80 0AFF diff --git a/lib/unicode/In/Gurmukhi.pl b/lib/unicode/In/Gurmukhi.pl index b888df6941..32ff23943b 100644 --- a/lib/unicode/In/Gurmukhi.pl +++ b/lib/unicode/In/Gurmukhi.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0A00 0A7F diff --git a/lib/unicode/In/HalfwidthandFullwidthForms.pl b/lib/unicode/In/HalfwidthandFullwidthForms.pl index e45265393f..fd3ba327f6 100644 --- a/lib/unicode/In/HalfwidthandFullwidthForms.pl +++ b/lib/unicode/In/HalfwidthandFullwidthForms.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FF00 FFEF diff --git a/lib/unicode/In/HangulCompatibilityJamo.pl b/lib/unicode/In/HangulCompatibilityJamo.pl index c15379fafc..744e57270f 100644 --- a/lib/unicode/In/HangulCompatibilityJamo.pl +++ b/lib/unicode/In/HangulCompatibilityJamo.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3130 318F diff --git a/lib/unicode/In/HangulJamo.pl b/lib/unicode/In/HangulJamo.pl index c329b54c34..a1d1c67708 100644 --- a/lib/unicode/In/HangulJamo.pl +++ b/lib/unicode/In/HangulJamo.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1100 11FF diff --git a/lib/unicode/In/HangulSyllables.pl b/lib/unicode/In/HangulSyllables.pl index 7d91a363f5..80cd4a4420 100644 --- a/lib/unicode/In/HangulSyllables.pl +++ b/lib/unicode/In/HangulSyllables.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; AC00 D7A3 diff --git a/lib/unicode/In/Hebrew.pl b/lib/unicode/In/Hebrew.pl index abe7b9ede4..2e29a28236 100644 --- a/lib/unicode/In/Hebrew.pl +++ b/lib/unicode/In/Hebrew.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0590 05FF diff --git a/lib/unicode/In/HighPrivateUseSurrogates.pl b/lib/unicode/In/HighPrivateUseSurrogates.pl index 6ed7ac96fd..0e1320d7ee 100644 --- a/lib/unicode/In/HighPrivateUseSurrogates.pl +++ b/lib/unicode/In/HighPrivateUseSurrogates.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; DB80 DBFF diff --git a/lib/unicode/In/HighSurrogates.pl b/lib/unicode/In/HighSurrogates.pl index 924a0c9bdb..6acc6c4503 100644 --- a/lib/unicode/In/HighSurrogates.pl +++ b/lib/unicode/In/HighSurrogates.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; D800 DB7F diff --git a/lib/unicode/In/Hiragana.pl b/lib/unicode/In/Hiragana.pl index 7a65302188..5905fe9b28 100644 --- a/lib/unicode/In/Hiragana.pl +++ b/lib/unicode/In/Hiragana.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3040 309F diff --git a/lib/unicode/In/IPAExtensions.pl b/lib/unicode/In/IPAExtensions.pl index 20906d6300..5365373c1b 100644 --- a/lib/unicode/In/IPAExtensions.pl +++ b/lib/unicode/In/IPAExtensions.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0250 02AF diff --git a/lib/unicode/In/IdeographicDescriptionCharacters.pl b/lib/unicode/In/IdeographicDescriptionCharacters.pl index 4baae881a1..dafb5b4fe3 100644 --- a/lib/unicode/In/IdeographicDescriptionCharacters.pl +++ b/lib/unicode/In/IdeographicDescriptionCharacters.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2FF0 2FFF diff --git a/lib/unicode/In/Kanbun.pl b/lib/unicode/In/Kanbun.pl index 57d6bd21f4..9ad03a661b 100644 --- a/lib/unicode/In/Kanbun.pl +++ b/lib/unicode/In/Kanbun.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3190 319F diff --git a/lib/unicode/In/KangxiRadicals.pl b/lib/unicode/In/KangxiRadicals.pl index d26fd6c774..165398c9e9 100644 --- a/lib/unicode/In/KangxiRadicals.pl +++ b/lib/unicode/In/KangxiRadicals.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2F00 2FDF diff --git a/lib/unicode/In/Kannada.pl b/lib/unicode/In/Kannada.pl index 109197a6f7..a679445f3f 100644 --- a/lib/unicode/In/Kannada.pl +++ b/lib/unicode/In/Kannada.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0C80 0CFF diff --git a/lib/unicode/In/Katakana.pl b/lib/unicode/In/Katakana.pl index 93bd5a03fa..2976d25822 100644 --- a/lib/unicode/In/Katakana.pl +++ b/lib/unicode/In/Katakana.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 30A0 30FF diff --git a/lib/unicode/In/Khmer.pl b/lib/unicode/In/Khmer.pl index f3e86851b3..6a85224223 100644 --- a/lib/unicode/In/Khmer.pl +++ b/lib/unicode/In/Khmer.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1780 17FF diff --git a/lib/unicode/In/Lao.pl b/lib/unicode/In/Lao.pl index 41ff11f805..fdddd86766 100644 --- a/lib/unicode/In/Lao.pl +++ b/lib/unicode/In/Lao.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0E80 0EFF diff --git a/lib/unicode/In/Latin-1Supplement.pl b/lib/unicode/In/Latin-1Supplement.pl index 1b252eb23e..6a901fba1c 100644 --- a/lib/unicode/In/Latin-1Supplement.pl +++ b/lib/unicode/In/Latin-1Supplement.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0080 00FF diff --git a/lib/unicode/In/LatinExtended-A.pl b/lib/unicode/In/LatinExtended-A.pl index b8be987db0..a042350176 100644 --- a/lib/unicode/In/LatinExtended-A.pl +++ b/lib/unicode/In/LatinExtended-A.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0100 017F diff --git a/lib/unicode/In/LatinExtended-B.pl b/lib/unicode/In/LatinExtended-B.pl index b9aff43f3d..b7106c6d37 100644 --- a/lib/unicode/In/LatinExtended-B.pl +++ b/lib/unicode/In/LatinExtended-B.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0180 024F diff --git a/lib/unicode/In/LatinExtendedAdditional.pl b/lib/unicode/In/LatinExtendedAdditional.pl index d309e90814..e17cc3de95 100644 --- a/lib/unicode/In/LatinExtendedAdditional.pl +++ b/lib/unicode/In/LatinExtendedAdditional.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1E00 1EFF diff --git a/lib/unicode/In/LetterlikeSymbols.pl b/lib/unicode/In/LetterlikeSymbols.pl index 1768740d42..c2249a7b94 100644 --- a/lib/unicode/In/LetterlikeSymbols.pl +++ b/lib/unicode/In/LetterlikeSymbols.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2100 214F diff --git a/lib/unicode/In/LowSurrogates.pl b/lib/unicode/In/LowSurrogates.pl index 752b264e81..025bd13950 100644 --- a/lib/unicode/In/LowSurrogates.pl +++ b/lib/unicode/In/LowSurrogates.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; DC00 DFFF diff --git a/lib/unicode/In/Malayalam.pl b/lib/unicode/In/Malayalam.pl index 8fb57cdb10..5a01d40927 100644 --- a/lib/unicode/In/Malayalam.pl +++ b/lib/unicode/In/Malayalam.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0D00 0D7F diff --git a/lib/unicode/In/MathematicalOperators.pl b/lib/unicode/In/MathematicalOperators.pl index 055f19e590..8b45e1881c 100644 --- a/lib/unicode/In/MathematicalOperators.pl +++ b/lib/unicode/In/MathematicalOperators.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2200 22FF diff --git a/lib/unicode/In/MiscellaneousSymbols.pl b/lib/unicode/In/MiscellaneousSymbols.pl index 9dcdd26954..cc5b02fdc2 100644 --- a/lib/unicode/In/MiscellaneousSymbols.pl +++ b/lib/unicode/In/MiscellaneousSymbols.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2600 26FF diff --git a/lib/unicode/In/MiscellaneousTechnical.pl b/lib/unicode/In/MiscellaneousTechnical.pl index 370c00f320..a1058a0c6d 100644 --- a/lib/unicode/In/MiscellaneousTechnical.pl +++ b/lib/unicode/In/MiscellaneousTechnical.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2300 23FF diff --git a/lib/unicode/In/Mongolian.pl b/lib/unicode/In/Mongolian.pl index 394014d496..98a4914ce6 100644 --- a/lib/unicode/In/Mongolian.pl +++ b/lib/unicode/In/Mongolian.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1800 18AF diff --git a/lib/unicode/In/Myanmar.pl b/lib/unicode/In/Myanmar.pl index 4b3f3181b0..3aa2f41410 100644 --- a/lib/unicode/In/Myanmar.pl +++ b/lib/unicode/In/Myanmar.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1000 109F diff --git a/lib/unicode/In/NumberForms.pl b/lib/unicode/In/NumberForms.pl index d33ece0bbc..2a606a6bf7 100644 --- a/lib/unicode/In/NumberForms.pl +++ b/lib/unicode/In/NumberForms.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2150 218F diff --git a/lib/unicode/In/Ogham.pl b/lib/unicode/In/Ogham.pl index e097d90c77..de320a9172 100644 --- a/lib/unicode/In/Ogham.pl +++ b/lib/unicode/In/Ogham.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1680 169F diff --git a/lib/unicode/In/OpticalCharacterRecognition.pl b/lib/unicode/In/OpticalCharacterRecognition.pl index be1d981c7c..7f0aff830e 100644 --- a/lib/unicode/In/OpticalCharacterRecognition.pl +++ b/lib/unicode/In/OpticalCharacterRecognition.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2440 245F diff --git a/lib/unicode/In/Oriya.pl b/lib/unicode/In/Oriya.pl index 5a680f6743..771a245757 100644 --- a/lib/unicode/In/Oriya.pl +++ b/lib/unicode/In/Oriya.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0B00 0B7F diff --git a/lib/unicode/In/PrivateUse.pl b/lib/unicode/In/PrivateUse.pl index 0c118f4fe4..0b0c00407d 100644 --- a/lib/unicode/In/PrivateUse.pl +++ b/lib/unicode/In/PrivateUse.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; E000 F8FF diff --git a/lib/unicode/In/Runic.pl b/lib/unicode/In/Runic.pl index 0bd42df80c..52ca7aa4fb 100644 --- a/lib/unicode/In/Runic.pl +++ b/lib/unicode/In/Runic.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 16A0 16FF diff --git a/lib/unicode/In/Sinhala.pl b/lib/unicode/In/Sinhala.pl index 37e007c057..5a892fd3c1 100644 --- a/lib/unicode/In/Sinhala.pl +++ b/lib/unicode/In/Sinhala.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0D80 0DFF diff --git a/lib/unicode/In/SmallFormVariants.pl b/lib/unicode/In/SmallFormVariants.pl index 736415e67e..148e6e85b8 100644 --- a/lib/unicode/In/SmallFormVariants.pl +++ b/lib/unicode/In/SmallFormVariants.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FE50 FE6F diff --git a/lib/unicode/In/SpacingModifierLetters.pl b/lib/unicode/In/SpacingModifierLetters.pl index 6e9cdf0b53..0e31fea4b4 100644 --- a/lib/unicode/In/SpacingModifierLetters.pl +++ b/lib/unicode/In/SpacingModifierLetters.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 02B0 02FF diff --git a/lib/unicode/In/Specials.pl b/lib/unicode/In/Specials.pl index f9f730f840..03f69a3b8e 100644 --- a/lib/unicode/In/Specials.pl +++ b/lib/unicode/In/Specials.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; FFF0 FFFD diff --git a/lib/unicode/In/SuperscriptsandSubscripts.pl b/lib/unicode/In/SuperscriptsandSubscripts.pl index efcec0b841..b0f90cd67b 100644 --- a/lib/unicode/In/SuperscriptsandSubscripts.pl +++ b/lib/unicode/In/SuperscriptsandSubscripts.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2070 209F diff --git a/lib/unicode/In/Syriac.pl b/lib/unicode/In/Syriac.pl index 7c81fb6f32..f85f1a9fd7 100644 --- a/lib/unicode/In/Syriac.pl +++ b/lib/unicode/In/Syriac.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0700 074F diff --git a/lib/unicode/In/Tamil.pl b/lib/unicode/In/Tamil.pl index e65ed2fa19..71fa923d6f 100644 --- a/lib/unicode/In/Tamil.pl +++ b/lib/unicode/In/Tamil.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0B80 0BFF diff --git a/lib/unicode/In/Telugu.pl b/lib/unicode/In/Telugu.pl index d5ed2368c2..ff09b1ed87 100644 --- a/lib/unicode/In/Telugu.pl +++ b/lib/unicode/In/Telugu.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0C00 0C7F diff --git a/lib/unicode/In/Thaana.pl b/lib/unicode/In/Thaana.pl index 361bd4d4b4..f88768c924 100644 --- a/lib/unicode/In/Thaana.pl +++ b/lib/unicode/In/Thaana.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0780 07BF diff --git a/lib/unicode/In/Thai.pl b/lib/unicode/In/Thai.pl index 3376de4e18..e77c0c512f 100644 --- a/lib/unicode/In/Thai.pl +++ b/lib/unicode/In/Thai.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0E00 0E7F diff --git a/lib/unicode/In/Tibetan.pl b/lib/unicode/In/Tibetan.pl index 50837ad8bc..35436b3b14 100644 --- a/lib/unicode/In/Tibetan.pl +++ b/lib/unicode/In/Tibetan.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0F00 0FFF diff --git a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl b/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl index ad4eb27866..83c6a78cca 100644 --- a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl +++ b/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1400 167F diff --git a/lib/unicode/In/YiRadicals.pl b/lib/unicode/In/YiRadicals.pl index f25c6954ff..7350871cb2 100644 --- a/lib/unicode/In/YiRadicals.pl +++ b/lib/unicode/In/YiRadicals.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; A490 A4CF diff --git a/lib/unicode/In/YiSyllables.pl b/lib/unicode/In/YiSyllables.pl index f4e3a8bcbc..baa038eb30 100644 --- a/lib/unicode/In/YiSyllables.pl +++ b/lib/unicode/In/YiSyllables.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; A000 A48F diff --git a/lib/unicode/Is/ASCII.pl b/lib/unicode/Is/ASCII.pl index 63f95ae7dd..1434a55d96 100644 --- a/lib/unicode/Is/ASCII.pl +++ b/lib/unicode/Is/ASCII.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 007f diff --git a/lib/unicode/Is/Alnum.pl b/lib/unicode/Is/Alnum.pl index 203860bac1..94f9a5c621 100644 --- a/lib/unicode/Is/Alnum.pl +++ b/lib/unicode/Is/Alnum.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Alpha.pl b/lib/unicode/Is/Alpha.pl index 90020b8fb6..de5046f9d4 100644 --- a/lib/unicode/Is/Alpha.pl +++ b/lib/unicode/Is/Alpha.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a diff --git a/lib/unicode/Is/BidiAL.pl b/lib/unicode/Is/BidiAL.pl index e04f2f562d..a8d43b816c 100644 --- a/lib/unicode/Is/BidiAL.pl +++ b/lib/unicode/Is/BidiAL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 061b diff --git a/lib/unicode/Is/BidiAN.pl b/lib/unicode/Is/BidiAN.pl index 4a71ae532d..4519c6d51d 100644 --- a/lib/unicode/Is/BidiAN.pl +++ b/lib/unicode/Is/BidiAN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0660 0669 diff --git a/lib/unicode/Is/BidiB.pl b/lib/unicode/Is/BidiB.pl index e4ba16567a..33bdb45944 100644 --- a/lib/unicode/Is/BidiB.pl +++ b/lib/unicode/Is/BidiB.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000a diff --git a/lib/unicode/Is/BidiBN.pl b/lib/unicode/Is/BidiBN.pl index 795a4a9f40..75b8871a59 100644 --- a/lib/unicode/Is/BidiBN.pl +++ b/lib/unicode/Is/BidiBN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 0008 diff --git a/lib/unicode/Is/BidiCS.pl b/lib/unicode/Is/BidiCS.pl index f8d037d118..e217653843 100644 --- a/lib/unicode/Is/BidiCS.pl +++ b/lib/unicode/Is/BidiCS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002c diff --git a/lib/unicode/Is/BidiEN.pl b/lib/unicode/Is/BidiEN.pl index d63270aecf..113de87454 100644 --- a/lib/unicode/Is/BidiEN.pl +++ b/lib/unicode/Is/BidiEN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/BidiES.pl b/lib/unicode/Is/BidiES.pl index 5a1a36a6d8..d1cd305f54 100644 --- a/lib/unicode/Is/BidiES.pl +++ b/lib/unicode/Is/BidiES.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002f diff --git a/lib/unicode/Is/BidiET.pl b/lib/unicode/Is/BidiET.pl index 5e7af2bbf4..0a66fa8346 100644 --- a/lib/unicode/Is/BidiET.pl +++ b/lib/unicode/Is/BidiET.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0023 0025 diff --git a/lib/unicode/Is/BidiL.pl b/lib/unicode/Is/BidiL.pl index 8dc4ca87c0..a08d8b8900 100644 --- a/lib/unicode/Is/BidiL.pl +++ b/lib/unicode/Is/BidiL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a @@ -320,4 +320,6 @@ ffc2 ffc7 ffca ffcf ffd2 ffd7 ffda ffdc +f0000 ffffd +100000 10fffd END diff --git a/lib/unicode/Is/BidiLRE.pl b/lib/unicode/Is/BidiLRE.pl index ef2a6e462f..25704212c2 100644 --- a/lib/unicode/Is/BidiLRE.pl +++ b/lib/unicode/Is/BidiLRE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202a diff --git a/lib/unicode/Is/BidiLRO.pl b/lib/unicode/Is/BidiLRO.pl index e9958c4b81..721a22311e 100644 --- a/lib/unicode/Is/BidiLRO.pl +++ b/lib/unicode/Is/BidiLRO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202d diff --git a/lib/unicode/Is/BidiNSM.pl b/lib/unicode/Is/BidiNSM.pl index 191bc052a9..a5c3af4b34 100644 --- a/lib/unicode/Is/BidiNSM.pl +++ b/lib/unicode/Is/BidiNSM.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 034e diff --git a/lib/unicode/Is/BidiON.pl b/lib/unicode/Is/BidiON.pl index bde00ff123..ec0f18ff1d 100644 --- a/lib/unicode/Is/BidiON.pl +++ b/lib/unicode/Is/BidiON.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 0022 diff --git a/lib/unicode/Is/BidiPDF.pl b/lib/unicode/Is/BidiPDF.pl index 4a3eedd564..20e3d9d31b 100644 --- a/lib/unicode/Is/BidiPDF.pl +++ b/lib/unicode/Is/BidiPDF.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202c diff --git a/lib/unicode/Is/BidiR.pl b/lib/unicode/Is/BidiR.pl index fccc1f6d6e..9f776ae53f 100644 --- a/lib/unicode/Is/BidiR.pl +++ b/lib/unicode/Is/BidiR.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 05be diff --git a/lib/unicode/Is/BidiRLE.pl b/lib/unicode/Is/BidiRLE.pl index d789246ddb..eaf7ab7a23 100644 --- a/lib/unicode/Is/BidiRLE.pl +++ b/lib/unicode/Is/BidiRLE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202b diff --git a/lib/unicode/Is/BidiRLO.pl b/lib/unicode/Is/BidiRLO.pl index fcb81acc93..8974dd955a 100644 --- a/lib/unicode/Is/BidiRLO.pl +++ b/lib/unicode/Is/BidiRLO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 202e diff --git a/lib/unicode/Is/BidiS.pl b/lib/unicode/Is/BidiS.pl index b28b3310ea..ac2655d6ed 100644 --- a/lib/unicode/Is/BidiS.pl +++ b/lib/unicode/Is/BidiS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0009 diff --git a/lib/unicode/Is/BidiWS.pl b/lib/unicode/Is/BidiWS.pl index 25d8b8f6aa..ebd24e546e 100644 --- a/lib/unicode/Is/BidiWS.pl +++ b/lib/unicode/Is/BidiWS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000c diff --git a/lib/unicode/Is/C.pl b/lib/unicode/Is/C.pl index 0db83c4bf3..51e4ede067 100644 --- a/lib/unicode/Is/C.pl +++ b/lib/unicode/Is/C.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f @@ -15,4 +15,6 @@ dc00 dfff e000 f8ff feff fff9 fffb +f0000 ffffd +100000 10fffd END diff --git a/lib/unicode/Is/Cc.pl b/lib/unicode/Is/Cc.pl index d7184e3151..6b97adc9eb 100644 --- a/lib/unicode/Is/Cc.pl +++ b/lib/unicode/Is/Cc.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f diff --git a/lib/unicode/Is/Cf.pl b/lib/unicode/Is/Cf.pl index 896c3e6cd6..cef5ac448b 100644 --- a/lib/unicode/Is/Cf.pl +++ b/lib/unicode/Is/Cf.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 070f diff --git a/lib/unicode/Is/Cn.pl b/lib/unicode/Is/Cn.pl index 3c686154c1..fb75e8769c 100644 --- a/lib/unicode/Is/Cn.pl +++ b/lib/unicode/Is/Cn.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0220 0221 @@ -356,4 +356,18 @@ ffd8 ffd9 ffdd ffdf ffe7 ffef fff8 +10000 1fffd +20000 2fffd +30000 3fffd +40000 4fffd +50000 5fffd +60000 6fffd +70000 7fffd +80000 8fffd +90000 9fffd +a0000 afffd +b0000 bfffd +c0000 cfffd +d0000 dfffd +e0000 efffd END diff --git a/lib/unicode/Is/Cntrl.pl b/lib/unicode/Is/Cntrl.pl index 0db83c4bf3..51e4ede067 100644 --- a/lib/unicode/Is/Cntrl.pl +++ b/lib/unicode/Is/Cntrl.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f @@ -15,4 +15,6 @@ dc00 dfff e000 f8ff feff fff9 fffb +f0000 ffffd +100000 10fffd END diff --git a/lib/unicode/Is/Co.pl b/lib/unicode/Is/Co.pl index c456d33aea..d077fd2bd3 100644 --- a/lib/unicode/Is/Co.pl +++ b/lib/unicode/Is/Co.pl @@ -1,6 +1,8 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; e000 f8ff +f0000 ffffd +100000 10fffd END diff --git a/lib/unicode/Is/Cs.pl b/lib/unicode/Is/Cs.pl index 8888fb5f3c..33e1daca88 100644 --- a/lib/unicode/Is/Cs.pl +++ b/lib/unicode/Is/Cs.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; d800 db7f diff --git a/lib/unicode/Is/DCcircle.pl b/lib/unicode/Is/DCcircle.pl index 4c47b28b26..82c9edcd2b 100644 --- a/lib/unicode/Is/DCcircle.pl +++ b/lib/unicode/Is/DCcircle.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2460 2473 diff --git a/lib/unicode/Is/DCcompat.pl b/lib/unicode/Is/DCcompat.pl index 75d25695f3..5ae2b6a9f4 100644 --- a/lib/unicode/Is/DCcompat.pl +++ b/lib/unicode/Is/DCcompat.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a8 diff --git a/lib/unicode/Is/DCfinal.pl b/lib/unicode/Is/DCfinal.pl index 33fbf6aff8..3c81bcc6c4 100644 --- a/lib/unicode/Is/DCfinal.pl +++ b/lib/unicode/Is/DCfinal.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fb51 diff --git a/lib/unicode/Is/DCfont.pl b/lib/unicode/Is/DCfont.pl index c72234b3bf..7feff18b22 100644 --- a/lib/unicode/Is/DCfont.pl +++ b/lib/unicode/Is/DCfont.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2102 diff --git a/lib/unicode/Is/DCfraction.pl b/lib/unicode/Is/DCfraction.pl index fc2dd6755d..abac9d9cfa 100644 --- a/lib/unicode/Is/DCfraction.pl +++ b/lib/unicode/Is/DCfraction.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00bc 00be diff --git a/lib/unicode/Is/DCinital.pl b/lib/unicode/Is/DCinital.pl index 2c9cf47e7d..8778a75ed5 100644 --- a/lib/unicode/Is/DCinital.pl +++ b/lib/unicode/Is/DCinital.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fb55 diff --git a/lib/unicode/Is/DCinitial.pl b/lib/unicode/Is/DCinitial.pl index 0145b7dd71..c6d7802eaf 100644 --- a/lib/unicode/Is/DCinitial.pl +++ b/lib/unicode/Is/DCinitial.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fb54 diff --git a/lib/unicode/Is/DCisolated.pl b/lib/unicode/Is/DCisolated.pl index cc8541eb7b..e4e24f786a 100644 --- a/lib/unicode/Is/DCisolated.pl +++ b/lib/unicode/Is/DCisolated.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fb50 diff --git a/lib/unicode/Is/DCnarrow.pl b/lib/unicode/Is/DCnarrow.pl index 9417de1bbd..7887452105 100644 --- a/lib/unicode/Is/DCnarrow.pl +++ b/lib/unicode/Is/DCnarrow.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; ff61 ffbe diff --git a/lib/unicode/Is/DCnoBreak.pl b/lib/unicode/Is/DCnoBreak.pl index 1fd9e8735b..18c01059ed 100644 --- a/lib/unicode/Is/DCnoBreak.pl +++ b/lib/unicode/Is/DCnoBreak.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a0 diff --git a/lib/unicode/Is/DCsmall.pl b/lib/unicode/Is/DCsmall.pl index f6c8069163..3a37931b56 100644 --- a/lib/unicode/Is/DCsmall.pl +++ b/lib/unicode/Is/DCsmall.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fe50 fe52 diff --git a/lib/unicode/Is/DCsquare.pl b/lib/unicode/Is/DCsquare.pl index b55fdd9c6a..f27993d6b8 100644 --- a/lib/unicode/Is/DCsquare.pl +++ b/lib/unicode/Is/DCsquare.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3300 3357 diff --git a/lib/unicode/Is/DCsub.pl b/lib/unicode/Is/DCsub.pl index 98c4dfa87e..f709a228c2 100644 --- a/lib/unicode/Is/DCsub.pl +++ b/lib/unicode/Is/DCsub.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2080 208e diff --git a/lib/unicode/Is/DCsuper.pl b/lib/unicode/Is/DCsuper.pl index 865a26dd92..1e6a0c5feb 100644 --- a/lib/unicode/Is/DCsuper.pl +++ b/lib/unicode/Is/DCsuper.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00aa diff --git a/lib/unicode/Is/DCvertical.pl b/lib/unicode/Is/DCvertical.pl index 5d55483606..33b9feb724 100644 --- a/lib/unicode/Is/DCvertical.pl +++ b/lib/unicode/Is/DCvertical.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fe30 fe44 diff --git a/lib/unicode/Is/DCwide.pl b/lib/unicode/Is/DCwide.pl index 09dae19629..afe1e06b7d 100644 --- a/lib/unicode/Is/DCwide.pl +++ b/lib/unicode/Is/DCwide.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3000 diff --git a/lib/unicode/Is/DecoCanon.pl b/lib/unicode/Is/DecoCanon.pl index c5a59f6596..57c167b5f8 100644 --- a/lib/unicode/Is/DecoCanon.pl +++ b/lib/unicode/Is/DecoCanon.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00c0 00c5 diff --git a/lib/unicode/Is/DecoCompat.pl b/lib/unicode/Is/DecoCompat.pl index 43d34fc110..940d956f9f 100644 --- a/lib/unicode/Is/DecoCompat.pl +++ b/lib/unicode/Is/DecoCompat.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a0 diff --git a/lib/unicode/Is/Digit.pl b/lib/unicode/Is/Digit.pl index 2ab8156d77..259bb891f6 100644 --- a/lib/unicode/Is/Digit.pl +++ b/lib/unicode/Is/Digit.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Graph.pl b/lib/unicode/Is/Graph.pl index 156f1711af..40d35067f6 100644 --- a/lib/unicode/Is/Graph.pl +++ b/lib/unicode/Is/Graph.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 007e diff --git a/lib/unicode/Is/L.pl b/lib/unicode/Is/L.pl index c32f83049c..bfe2c27412 100644 --- a/lib/unicode/Is/L.pl +++ b/lib/unicode/Is/L.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a diff --git a/lib/unicode/Is/LbrkAI.pl b/lib/unicode/Is/LbrkAI.pl index a15f76014e..297f5cf1fa 100644 --- a/lib/unicode/Is/LbrkAI.pl +++ b/lib/unicode/Is/LbrkAI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a1 diff --git a/lib/unicode/Is/LbrkAL.pl b/lib/unicode/Is/LbrkAL.pl index c705dc8a3d..c1fc416662 100644 --- a/lib/unicode/Is/LbrkAL.pl +++ b/lib/unicode/Is/LbrkAL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0023 diff --git a/lib/unicode/Is/LbrkB2.pl b/lib/unicode/Is/LbrkB2.pl index 527e4c8977..a1410da25f 100644 --- a/lib/unicode/Is/LbrkB2.pl +++ b/lib/unicode/Is/LbrkB2.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2014 diff --git a/lib/unicode/Is/LbrkBA.pl b/lib/unicode/Is/LbrkBA.pl index 053369bccc..93ec04efb4 100644 --- a/lib/unicode/Is/LbrkBA.pl +++ b/lib/unicode/Is/LbrkBA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0009 diff --git a/lib/unicode/Is/LbrkBB.pl b/lib/unicode/Is/LbrkBB.pl index 5d3952a25d..b9bc7e8e35 100644 --- a/lib/unicode/Is/LbrkBB.pl +++ b/lib/unicode/Is/LbrkBB.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 02c8 diff --git a/lib/unicode/Is/LbrkBK.pl b/lib/unicode/Is/LbrkBK.pl index 2c314fb0ba..17bb5514c5 100644 --- a/lib/unicode/Is/LbrkBK.pl +++ b/lib/unicode/Is/LbrkBK.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000c diff --git a/lib/unicode/Is/LbrkCB.pl b/lib/unicode/Is/LbrkCB.pl index 95f8b803dc..8da9eba019 100644 --- a/lib/unicode/Is/LbrkCB.pl +++ b/lib/unicode/Is/LbrkCB.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; fffc diff --git a/lib/unicode/Is/LbrkCL.pl b/lib/unicode/Is/LbrkCL.pl index a73130a6bd..353c59490b 100644 --- a/lib/unicode/Is/LbrkCL.pl +++ b/lib/unicode/Is/LbrkCL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0029 diff --git a/lib/unicode/Is/LbrkCM.pl b/lib/unicode/Is/LbrkCM.pl index 3d0f3474f8..87c7833f51 100644 --- a/lib/unicode/Is/LbrkCM.pl +++ b/lib/unicode/Is/LbrkCM.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 0008 diff --git a/lib/unicode/Is/LbrkCR.pl b/lib/unicode/Is/LbrkCR.pl index c61a527902..2548269791 100644 --- a/lib/unicode/Is/LbrkCR.pl +++ b/lib/unicode/Is/LbrkCR.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000d diff --git a/lib/unicode/Is/LbrkEX.pl b/lib/unicode/Is/LbrkEX.pl index d847092dce..173bf58d55 100644 --- a/lib/unicode/Is/LbrkEX.pl +++ b/lib/unicode/Is/LbrkEX.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 diff --git a/lib/unicode/Is/LbrkGL.pl b/lib/unicode/Is/LbrkGL.pl index b03a627c2b..49bb985c12 100644 --- a/lib/unicode/Is/LbrkGL.pl +++ b/lib/unicode/Is/LbrkGL.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a0 diff --git a/lib/unicode/Is/LbrkHY.pl b/lib/unicode/Is/LbrkHY.pl index 6989bc8631..71e5886869 100644 --- a/lib/unicode/Is/LbrkHY.pl +++ b/lib/unicode/Is/LbrkHY.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002d diff --git a/lib/unicode/Is/LbrkID.pl b/lib/unicode/Is/LbrkID.pl index 0b1cc8453f..6e6100c191 100644 --- a/lib/unicode/Is/LbrkID.pl +++ b/lib/unicode/Is/LbrkID.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1100 1159 diff --git a/lib/unicode/Is/LbrkIN.pl b/lib/unicode/Is/LbrkIN.pl index 825198d12c..1758673e4b 100644 --- a/lib/unicode/Is/LbrkIN.pl +++ b/lib/unicode/Is/LbrkIN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2024 2026 diff --git a/lib/unicode/Is/LbrkIS.pl b/lib/unicode/Is/LbrkIS.pl index afa01a8004..32dcfb0f5b 100644 --- a/lib/unicode/Is/LbrkIS.pl +++ b/lib/unicode/Is/LbrkIS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002c diff --git a/lib/unicode/Is/LbrkLF.pl b/lib/unicode/Is/LbrkLF.pl index dcb5490eb2..9b845aed6f 100644 --- a/lib/unicode/Is/LbrkLF.pl +++ b/lib/unicode/Is/LbrkLF.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 000a diff --git a/lib/unicode/Is/LbrkNS.pl b/lib/unicode/Is/LbrkNS.pl index af9f3371c9..b7ff279c1d 100644 --- a/lib/unicode/Is/LbrkNS.pl +++ b/lib/unicode/Is/LbrkNS.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0e5a 0e5b diff --git a/lib/unicode/Is/LbrkNU.pl b/lib/unicode/Is/LbrkNU.pl index 5c55d221b4..eb51418f68 100644 --- a/lib/unicode/Is/LbrkNU.pl +++ b/lib/unicode/Is/LbrkNU.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/LbrkOP.pl b/lib/unicode/Is/LbrkOP.pl index a7dee379eb..90f18098b7 100644 --- a/lib/unicode/Is/LbrkOP.pl +++ b/lib/unicode/Is/LbrkOP.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0028 diff --git a/lib/unicode/Is/LbrkPO.pl b/lib/unicode/Is/LbrkPO.pl index cdfb56ea17..07f7bf74d2 100644 --- a/lib/unicode/Is/LbrkPO.pl +++ b/lib/unicode/Is/LbrkPO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0025 diff --git a/lib/unicode/Is/LbrkPR.pl b/lib/unicode/Is/LbrkPR.pl index c2d20da481..03466c912a 100644 --- a/lib/unicode/Is/LbrkPR.pl +++ b/lib/unicode/Is/LbrkPR.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0024 diff --git a/lib/unicode/Is/LbrkQU.pl b/lib/unicode/Is/LbrkQU.pl index 46a6ee3a07..35dac981c5 100644 --- a/lib/unicode/Is/LbrkQU.pl +++ b/lib/unicode/Is/LbrkQU.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0022 diff --git a/lib/unicode/Is/LbrkSA.pl b/lib/unicode/Is/LbrkSA.pl index bae4ced946..4539e093d0 100644 --- a/lib/unicode/Is/LbrkSA.pl +++ b/lib/unicode/Is/LbrkSA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0e01 0e30 diff --git a/lib/unicode/Is/LbrkSG.pl b/lib/unicode/Is/LbrkSG.pl index 8888fb5f3c..33e1daca88 100644 --- a/lib/unicode/Is/LbrkSG.pl +++ b/lib/unicode/Is/LbrkSG.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; d800 db7f diff --git a/lib/unicode/Is/LbrkSP.pl b/lib/unicode/Is/LbrkSP.pl index e786a0c935..2153e128dc 100644 --- a/lib/unicode/Is/LbrkSP.pl +++ b/lib/unicode/Is/LbrkSP.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0020 diff --git a/lib/unicode/Is/LbrkSY.pl b/lib/unicode/Is/LbrkSY.pl index d2a33aeacc..ce65fe1d85 100644 --- a/lib/unicode/Is/LbrkSY.pl +++ b/lib/unicode/Is/LbrkSY.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002f diff --git a/lib/unicode/Is/LbrkXX.pl b/lib/unicode/Is/LbrkXX.pl index ec287c456a..c3b32ac61d 100644 --- a/lib/unicode/Is/LbrkXX.pl +++ b/lib/unicode/Is/LbrkXX.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; END diff --git a/lib/unicode/Is/LbrkZW.pl b/lib/unicode/Is/LbrkZW.pl index 96d8e99efc..63c9dcf489 100644 --- a/lib/unicode/Is/LbrkZW.pl +++ b/lib/unicode/Is/LbrkZW.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 200b diff --git a/lib/unicode/Is/Ll.pl b/lib/unicode/Is/Ll.pl index 28147943e8..03dafcc742 100644 --- a/lib/unicode/Is/Ll.pl +++ b/lib/unicode/Is/Ll.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0061 007a diff --git a/lib/unicode/Is/Lm.pl b/lib/unicode/Is/Lm.pl index 4380afe18e..23a3c55d4a 100644 --- a/lib/unicode/Is/Lm.pl +++ b/lib/unicode/Is/Lm.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 02b0 02b8 diff --git a/lib/unicode/Is/Lo.pl b/lib/unicode/Is/Lo.pl index 78fab4cd0e..d82c6bbdaf 100644 --- a/lib/unicode/Is/Lo.pl +++ b/lib/unicode/Is/Lo.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 01bb diff --git a/lib/unicode/Is/Lower.pl b/lib/unicode/Is/Lower.pl index 28147943e8..03dafcc742 100644 --- a/lib/unicode/Is/Lower.pl +++ b/lib/unicode/Is/Lower.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0061 007a diff --git a/lib/unicode/Is/Lt.pl b/lib/unicode/Is/Lt.pl index 809c37a1f2..b19755ca8e 100644 --- a/lib/unicode/Is/Lt.pl +++ b/lib/unicode/Is/Lt.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 01c5 diff --git a/lib/unicode/Is/Lu.pl b/lib/unicode/Is/Lu.pl index 8dde2742d0..07dee4834c 100644 --- a/lib/unicode/Is/Lu.pl +++ b/lib/unicode/Is/Lu.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a diff --git a/lib/unicode/Is/M.pl b/lib/unicode/Is/M.pl index 9367775a82..e3ef7f3dfa 100644 --- a/lib/unicode/Is/M.pl +++ b/lib/unicode/Is/M.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 034e diff --git a/lib/unicode/Is/Mc.pl b/lib/unicode/Is/Mc.pl index 937d8d4005..a76d66c9b3 100644 --- a/lib/unicode/Is/Mc.pl +++ b/lib/unicode/Is/Mc.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0903 diff --git a/lib/unicode/Is/Me.pl b/lib/unicode/Is/Me.pl index 00f446d87d..23ef860d8e 100644 --- a/lib/unicode/Is/Me.pl +++ b/lib/unicode/Is/Me.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0488 0489 diff --git a/lib/unicode/Is/Mirrored.pl b/lib/unicode/Is/Mirrored.pl index e2c55a6443..d324f506a7 100644 --- a/lib/unicode/Is/Mirrored.pl +++ b/lib/unicode/Is/Mirrored.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0028 0029 diff --git a/lib/unicode/Is/Mn.pl b/lib/unicode/Is/Mn.pl index aba40afa57..803e038d97 100644 --- a/lib/unicode/Is/Mn.pl +++ b/lib/unicode/Is/Mn.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0300 034e diff --git a/lib/unicode/Is/N.pl b/lib/unicode/Is/N.pl index 1291f2713f..8667e774b4 100644 --- a/lib/unicode/Is/N.pl +++ b/lib/unicode/Is/N.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Nd.pl b/lib/unicode/Is/Nd.pl index 2ab8156d77..259bb891f6 100644 --- a/lib/unicode/Is/Nd.pl +++ b/lib/unicode/Is/Nd.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Nl.pl b/lib/unicode/Is/Nl.pl index 8f1af469bb..bdeefd5761 100644 --- a/lib/unicode/Is/Nl.pl +++ b/lib/unicode/Is/Nl.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2160 2183 diff --git a/lib/unicode/Is/No.pl b/lib/unicode/Is/No.pl index 6a57dc5f89..13cac3b0e8 100644 --- a/lib/unicode/Is/No.pl +++ b/lib/unicode/Is/No.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00b2 00b3 diff --git a/lib/unicode/Is/P.pl b/lib/unicode/Is/P.pl index 8fd1e8e183..97330ecd48 100644 --- a/lib/unicode/Is/P.pl +++ b/lib/unicode/Is/P.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 0023 diff --git a/lib/unicode/Is/Pc.pl b/lib/unicode/Is/Pc.pl index 342efac344..e14874d011 100644 --- a/lib/unicode/Is/Pc.pl +++ b/lib/unicode/Is/Pc.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 005f diff --git a/lib/unicode/Is/Pd.pl b/lib/unicode/Is/Pd.pl index 58997ca7e9..b4a2ffbe8f 100644 --- a/lib/unicode/Is/Pd.pl +++ b/lib/unicode/Is/Pd.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002d diff --git a/lib/unicode/Is/Pe.pl b/lib/unicode/Is/Pe.pl index 8879191c34..2b5bd3eeb9 100644 --- a/lib/unicode/Is/Pe.pl +++ b/lib/unicode/Is/Pe.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0029 diff --git a/lib/unicode/Is/Pf.pl b/lib/unicode/Is/Pf.pl index 166c64bbb6..b27a4f6851 100644 --- a/lib/unicode/Is/Pf.pl +++ b/lib/unicode/Is/Pf.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00bb diff --git a/lib/unicode/Is/Pi.pl b/lib/unicode/Is/Pi.pl index 7f2243d5d8..dbbae44957 100644 --- a/lib/unicode/Is/Pi.pl +++ b/lib/unicode/Is/Pi.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00ab diff --git a/lib/unicode/Is/Po.pl b/lib/unicode/Is/Po.pl index e6b8b02520..849ee17867 100644 --- a/lib/unicode/Is/Po.pl +++ b/lib/unicode/Is/Po.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 0023 diff --git a/lib/unicode/Is/Print.pl b/lib/unicode/Is/Print.pl index 9560586065..c3adba6c5c 100644 --- a/lib/unicode/Is/Print.pl +++ b/lib/unicode/Is/Print.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0020 007e diff --git a/lib/unicode/Is/Ps.pl b/lib/unicode/Is/Ps.pl index a7dee379eb..90f18098b7 100644 --- a/lib/unicode/Is/Ps.pl +++ b/lib/unicode/Is/Ps.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0028 diff --git a/lib/unicode/Is/Punct.pl b/lib/unicode/Is/Punct.pl index 9e8684d6fc..9e088bab85 100644 --- a/lib/unicode/Is/Punct.pl +++ b/lib/unicode/Is/Punct.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0021 0023 diff --git a/lib/unicode/Is/S.pl b/lib/unicode/Is/S.pl index 8851766e9f..a304e17ff5 100644 --- a/lib/unicode/Is/S.pl +++ b/lib/unicode/Is/S.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0024 diff --git a/lib/unicode/Is/Sc.pl b/lib/unicode/Is/Sc.pl index 5776bd6a57..adeb3e4336 100644 --- a/lib/unicode/Is/Sc.pl +++ b/lib/unicode/Is/Sc.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0024 diff --git a/lib/unicode/Is/Sk.pl b/lib/unicode/Is/Sk.pl index b5f6e591a7..52f88ae004 100644 --- a/lib/unicode/Is/Sk.pl +++ b/lib/unicode/Is/Sk.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 005e diff --git a/lib/unicode/Is/Sm.pl b/lib/unicode/Is/Sm.pl index ae9424cc62..540da63e64 100644 --- a/lib/unicode/Is/Sm.pl +++ b/lib/unicode/Is/Sm.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 002b diff --git a/lib/unicode/Is/So.pl b/lib/unicode/Is/So.pl index 4e9dfc2b5e..3caf617b66 100644 --- a/lib/unicode/Is/So.pl +++ b/lib/unicode/Is/So.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 00a6 00a7 diff --git a/lib/unicode/Is/Space.pl b/lib/unicode/Is/Space.pl index 701329ff82..1625dce03b 100644 --- a/lib/unicode/Is/Space.pl +++ b/lib/unicode/Is/Space.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0009 000d diff --git a/lib/unicode/Is/SylA.pl b/lib/unicode/Is/SylA.pl index be1107822d..6a3fc47eb9 100644 --- a/lib/unicode/Is/SylA.pl +++ b/lib/unicode/Is/SylA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1203 diff --git a/lib/unicode/Is/SylAA.pl b/lib/unicode/Is/SylAA.pl index 45d6692de7..6d1bd6dadd 100644 --- a/lib/unicode/Is/SylAA.pl +++ b/lib/unicode/Is/SylAA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 140b diff --git a/lib/unicode/Is/SylAAI.pl b/lib/unicode/Is/SylAAI.pl index a8b03d4c6c..83134b6a16 100644 --- a/lib/unicode/Is/SylAAI.pl +++ b/lib/unicode/Is/SylAAI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1402 diff --git a/lib/unicode/Is/SylAI.pl b/lib/unicode/Is/SylAI.pl index b70d793bc6..e639bd0438 100644 --- a/lib/unicode/Is/SylAI.pl +++ b/lib/unicode/Is/SylAI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 141c diff --git a/lib/unicode/Is/SylC.pl b/lib/unicode/Is/SylC.pl index e2a1601dd3..fb8b08e300 100644 --- a/lib/unicode/Is/SylC.pl +++ b/lib/unicode/Is/SylC.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1205 diff --git a/lib/unicode/Is/SylE.pl b/lib/unicode/Is/SylE.pl index b3c3e60437..d762748c69 100644 --- a/lib/unicode/Is/SylE.pl +++ b/lib/unicode/Is/SylE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1204 diff --git a/lib/unicode/Is/SylEE.pl b/lib/unicode/Is/SylEE.pl index 0a22f78f65..9f8ff07dc1 100644 --- a/lib/unicode/Is/SylEE.pl +++ b/lib/unicode/Is/SylEE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1408 diff --git a/lib/unicode/Is/SylI.pl b/lib/unicode/Is/SylI.pl index f80790ce44..29bc70f4fa 100644 --- a/lib/unicode/Is/SylI.pl +++ b/lib/unicode/Is/SylI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1202 diff --git a/lib/unicode/Is/SylII.pl b/lib/unicode/Is/SylII.pl index 4516d7a32a..2dcd49f24e 100644 --- a/lib/unicode/Is/SylII.pl +++ b/lib/unicode/Is/SylII.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1404 diff --git a/lib/unicode/Is/SylN.pl b/lib/unicode/Is/SylN.pl index 215463fb7f..d7d90c7a0d 100644 --- a/lib/unicode/Is/SylN.pl +++ b/lib/unicode/Is/SylN.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 3093 diff --git a/lib/unicode/Is/SylO.pl b/lib/unicode/Is/SylO.pl index a0a6f7dd01..2c795f0291 100644 --- a/lib/unicode/Is/SylO.pl +++ b/lib/unicode/Is/SylO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1206 diff --git a/lib/unicode/Is/SylOO.pl b/lib/unicode/Is/SylOO.pl index 12280534b1..27c8032275 100644 --- a/lib/unicode/Is/SylOO.pl +++ b/lib/unicode/Is/SylOO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1406 1407 diff --git a/lib/unicode/Is/SylU.pl b/lib/unicode/Is/SylU.pl index c458382f25..117d981ee6 100644 --- a/lib/unicode/Is/SylU.pl +++ b/lib/unicode/Is/SylU.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1201 diff --git a/lib/unicode/Is/SylV.pl b/lib/unicode/Is/SylV.pl index b6e76f81b9..e5a39ed654 100644 --- a/lib/unicode/Is/SylV.pl +++ b/lib/unicode/Is/SylV.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1200 diff --git a/lib/unicode/Is/SylWA.pl b/lib/unicode/Is/SylWA.pl index 9bb529ed01..39e94caabe 100644 --- a/lib/unicode/Is/SylWA.pl +++ b/lib/unicode/Is/SylWA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 120f diff --git a/lib/unicode/Is/SylWAA.pl b/lib/unicode/Is/SylWAA.pl index 5f3b784d0c..cd560eb8a9 100644 --- a/lib/unicode/Is/SylWAA.pl +++ b/lib/unicode/Is/SylWAA.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1419 141b diff --git a/lib/unicode/Is/SylWC.pl b/lib/unicode/Is/SylWC.pl index 3ad968c505..4272b8934f 100644 --- a/lib/unicode/Is/SylWC.pl +++ b/lib/unicode/Is/SylWC.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 124d diff --git a/lib/unicode/Is/SylWE.pl b/lib/unicode/Is/SylWE.pl index 9e32c0e602..c4c5ba99ae 100644 --- a/lib/unicode/Is/SylWE.pl +++ b/lib/unicode/Is/SylWE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 124c diff --git a/lib/unicode/Is/SylWEE.pl b/lib/unicode/Is/SylWEE.pl index c4bccb5240..d3160290fe 100644 --- a/lib/unicode/Is/SylWEE.pl +++ b/lib/unicode/Is/SylWEE.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 15d9 diff --git a/lib/unicode/Is/SylWI.pl b/lib/unicode/Is/SylWI.pl index 4cd6c6789c..c914b07a56 100644 --- a/lib/unicode/Is/SylWI.pl +++ b/lib/unicode/Is/SylWI.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 124a diff --git a/lib/unicode/Is/SylWII.pl b/lib/unicode/Is/SylWII.pl index bd68aeadf5..c990e437e6 100644 --- a/lib/unicode/Is/SylWII.pl +++ b/lib/unicode/Is/SylWII.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1410 1411 diff --git a/lib/unicode/Is/SylWO.pl b/lib/unicode/Is/SylWO.pl index 7676564130..a73cbdd0ec 100644 --- a/lib/unicode/Is/SylWO.pl +++ b/lib/unicode/Is/SylWO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1412 1413 diff --git a/lib/unicode/Is/SylWOO.pl b/lib/unicode/Is/SylWOO.pl index 0ab766a553..6e92f850a7 100644 --- a/lib/unicode/Is/SylWOO.pl +++ b/lib/unicode/Is/SylWOO.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1414 1416 diff --git a/lib/unicode/Is/SylWU.pl b/lib/unicode/Is/SylWU.pl index 76af7aefad..d165f41d72 100644 --- a/lib/unicode/Is/SylWU.pl +++ b/lib/unicode/Is/SylWU.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 15d6 diff --git a/lib/unicode/Is/SylWV.pl b/lib/unicode/Is/SylWV.pl index 8bd8849042..6a06ae9087 100644 --- a/lib/unicode/Is/SylWV.pl +++ b/lib/unicode/Is/SylWV.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 1248 diff --git a/lib/unicode/Is/Upper.pl b/lib/unicode/Is/Upper.pl index 4fda655dc4..16f875241d 100644 --- a/lib/unicode/Is/Upper.pl +++ b/lib/unicode/Is/Upper.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a diff --git a/lib/unicode/Is/Word.pl b/lib/unicode/Is/Word.pl index 2f13b382af..1c76c60b78 100644 --- a/lib/unicode/Is/Word.pl +++ b/lib/unicode/Is/Word.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/XDigit.pl b/lib/unicode/Is/XDigit.pl index e55682500b..b26a3b4074 100644 --- a/lib/unicode/Is/XDigit.pl +++ b/lib/unicode/Is/XDigit.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 diff --git a/lib/unicode/Is/Z.pl b/lib/unicode/Is/Z.pl index 22a9792d4f..03416c0265 100644 --- a/lib/unicode/Is/Z.pl +++ b/lib/unicode/Is/Z.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0020 diff --git a/lib/unicode/Is/Zl.pl b/lib/unicode/Is/Zl.pl index 0989e1d920..5f127ce33a 100644 --- a/lib/unicode/Is/Zl.pl +++ b/lib/unicode/Is/Zl.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2028 diff --git a/lib/unicode/Is/Zp.pl b/lib/unicode/Is/Zp.pl index 3b23446fe9..4e38303e72 100644 --- a/lib/unicode/Is/Zp.pl +++ b/lib/unicode/Is/Zp.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 2029 diff --git a/lib/unicode/Is/Zs.pl b/lib/unicode/Is/Zs.pl index db18055ea4..56cf9e4662 100644 --- a/lib/unicode/Is/Zs.pl +++ b/lib/unicode/Is/Zs.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0020 diff --git a/lib/unicode/Jamo.txt b/lib/unicode/Jamo.txt index 6910ab924e..ea288f0397 100644 --- a/lib/unicode/Jamo.txt +++ b/lib/unicode/Jamo.txt @@ -1,69 +1,91 @@ -#Value; Short Name; Unicode Name -U+1100; G; HANGUL CHOSEONG KIYEOK -U+1101; GG; HANGUL CHOSEONG SSANGKIYEOK -U+1102; N; HANGUL CHOSEONG NIEUN -U+1103; D; HANGUL CHOSEONG TIKEUT -U+1104; DD; HANGUL CHOSEONG SSANGTIKEUT -U+1105; R; HANGUL CHOSEONG RIEUL -U+1106; M; HANGUL CHOSEONG MIEUM -U+1107; B; HANGUL CHOSEONG PIEUP -U+1108; BB; HANGUL CHOSEONG SSANGPIEUP -U+1109; S; HANGUL CHOSEONG SIOS -U+110A; SS; HANGUL CHOSEONG SSANGSIOS -U+110B; ; HANGUL CHOSEONG IEUNG -U+110C; J; HANGUL CHOSEONG CIEUC -U+110D; JJ; HANGUL CHOSEONG SSANGCIEUC -U+110E; C; HANGUL CHOSEONG CHIEUCH -U+110F; K; HANGUL CHOSEONG KHIEUKH -U+1110; T; HANGUL CHOSEONG THIEUTH -U+1111; P; HANGUL CHOSEONG PHIEUPH -U+1112; H; HANGUL CHOSEONG HIEUH -U+1161; A; HANGUL JUNGSEONG A -U+1162; AE; HANGUL JUNGSEONG AE -U+1163; YA; HANGUL JUNGSEONG YA -U+1164; YAE; HANGUL JUNGSEONG YAE -U+1165; EO; HANGUL JUNGSEONG EO -U+1166; E; HANGUL JUNGSEONG E -U+1167; YEO; HANGUL JUNGSEONG YEO -U+1168; YE; HANGUL JUNGSEONG YE -U+1169; O; HANGUL JUNGSEONG O -U+116A; WA; HANGUL JUNGSEONG WA -U+116B; WAE; HANGUL JUNGSEONG WAE -U+116C; OE; HANGUL JUNGSEONG OE -U+116D; YO; HANGUL JUNGSEONG YO -U+116E; U; HANGUL JUNGSEONG U -U+116F; WEO; HANGUL JUNGSEONG WEO -U+1170; WE; HANGUL JUNGSEONG WE -U+1171; WI; HANGUL JUNGSEONG WI -U+1172; YU; HANGUL JUNGSEONG YU -U+1173; EU; HANGUL JUNGSEONG EU -U+1174; YI; HANGUL JUNGSEONG YI -U+1175; I; HANGUL JUNGSEONG I -U+11A8; G; HANGUL JONGSEONG KIYEOK -U+11A9; GG; HANGUL JONGSEONG SSANGKIYEOK -U+11AA; GS; HANGUL JONGSEONG KIYEOK-SIOS -U+11AB; N; HANGUL JONGSEONG NIEUN -U+11AC; NJ; HANGUL JONGSEONG NIEUN-CIEUC -U+11AD; NH; HANGUL JONGSEONG NIEUN-HIEUH -U+11AE; D; HANGUL JONGSEONG TIKEUT -U+11AF; L; HANGUL JONGSEONG RIEUL -U+11B0; LG; HANGUL JONGSEONG RIEUL-KIYEOK -U+11B1; LM; HANGUL JONGSEONG RIEUL-MIEUM -U+11B2; LB; HANGUL JONGSEONG RIEUL-PIEUP -U+11B3; LS; HANGUL JONGSEONG RIEUL-SIOS -U+11B4; LT; HANGUL JONGSEONG RIEUL-THIEUTH -U+11B5; LP; HANGUL JONGSEONG RIEUL-PHIEUPH -U+11B6; LH; HANGUL JONGSEONG RIEUL-HIEUH -U+11B7; M; HANGUL JONGSEONG MIEUM -U+11B8; B; HANGUL JONGSEONG PIEUP -U+11B9; BS; HANGUL JONGSEONG PIEUP-SIOS -U+11BA; S; HANGUL JONGSEONG SIOS -U+11BB; SS; HANGUL JONGSEONG SSANGSIOS -U+11BC; NG; HANGUL JONGSEONG IEUNG -U+11BD; J; HANGUL JONGSEONG CIEUC -U+11BE; C; HANGUL JONGSEONG CHIEUCH -U+11BF; K; HANGUL JONGSEONG KHIEUKH -U+11C0; T; HANGUL JONGSEONG THIEUTH -U+11C1; P; HANGUL JONGSEONG PHIEUPH -U+11C2; H; HANGUL JONGSEONG HIEUH +# Jamo-3.txt +# +# This file is a normative contributory data file in the +# Unicode Character Database. +# +# This file defines the Jamo Short Name property, repeating +# in machine readable form the information printed in Table 4-4 +# of The Unicode Standard, Version 3.0. +# +# See sections 3.11 and 4.4 of The Unicode Standard, Version 3.0 +# for more information. +# +# Each line contains two fields, separated by a semicolon. +# +# The first field gives the code point, in 4-digit hexadecimal +# form, of a combining jamo character that participates in +# the algorithmic determination Hangul syllable character names. +# The second field gives the Jamo Short Name as a one-, two-, +# or three-character ASCII string (or in one case, for U+110B, +# the null string). +# +# ############################################################# + +1100; G # HANGUL CHOSEONG KIYEOK +1101; GG # HANGUL CHOSEONG SSANGKIYEOK +1102; N # HANGUL CHOSEONG NIEUN +1103; D # HANGUL CHOSEONG TIKEUT +1104; DD # HANGUL CHOSEONG SSANGTIKEUT +1105; R # HANGUL CHOSEONG RIEUL +1106; M # HANGUL CHOSEONG MIEUM +1107; B # HANGUL CHOSEONG PIEUP +1108; BB # HANGUL CHOSEONG SSANGPIEUP +1109; S # HANGUL CHOSEONG SIOS +110A; SS # HANGUL CHOSEONG SSANGSIOS +110B; # HANGUL CHOSEONG IEUNG +110C; J # HANGUL CHOSEONG CIEUC +110D; JJ # HANGUL CHOSEONG SSANGCIEUC +110E; C # HANGUL CHOSEONG CHIEUCH +110F; K # HANGUL CHOSEONG KHIEUKH +1110; T # HANGUL CHOSEONG THIEUTH +1111; P # HANGUL CHOSEONG PHIEUPH +1112; H # HANGUL CHOSEONG HIEUH +1161; A # HANGUL JUNGSEONG A +1162; AE # HANGUL JUNGSEONG AE +1163; YA # HANGUL JUNGSEONG YA +1164; YAE # HANGUL JUNGSEONG YAE +1165; EO # HANGUL JUNGSEONG EO +1166; E # HANGUL JUNGSEONG E +1167; YEO # HANGUL JUNGSEONG YEO +1168; YE # HANGUL JUNGSEONG YE +1169; O # HANGUL JUNGSEONG O +116A; WA # HANGUL JUNGSEONG WA +116B; WAE # HANGUL JUNGSEONG WAE +116C; OE # HANGUL JUNGSEONG OE +116D; YO # HANGUL JUNGSEONG YO +116E; U # HANGUL JUNGSEONG U +116F; WEO # HANGUL JUNGSEONG WEO +1170; WE # HANGUL JUNGSEONG WE +1171; WI # HANGUL JUNGSEONG WI +1172; YU # HANGUL JUNGSEONG YU +1173; EU # HANGUL JUNGSEONG EU +1174; YI # HANGUL JUNGSEONG YI +1175; I # HANGUL JUNGSEONG I +11A8; G # HANGUL JONGSEONG KIYEOK +11A9; GG # HANGUL JONGSEONG SSANGKIYEOK +11AA; GS # HANGUL JONGSEONG KIYEOK-SIOS +11AB; N # HANGUL JONGSEONG NIEUN +11AC; NJ # HANGUL JONGSEONG NIEUN-CIEUC +11AD; NH # HANGUL JONGSEONG NIEUN-HIEUH +11AE; D # HANGUL JONGSEONG TIKEUT +11AF; L # HANGUL JONGSEONG RIEUL +11B0; LG # HANGUL JONGSEONG RIEUL-KIYEOK +11B1; LM # HANGUL JONGSEONG RIEUL-MIEUM +11B2; LB # HANGUL JONGSEONG RIEUL-PIEUP +11B3; LS # HANGUL JONGSEONG RIEUL-SIOS +11B4; LT # HANGUL JONGSEONG RIEUL-THIEUTH +11B5; LP # HANGUL JONGSEONG RIEUL-PHIEUPH +11B6; LH # HANGUL JONGSEONG RIEUL-HIEUH +11B7; M # HANGUL JONGSEONG MIEUM +11B8; B # HANGUL JONGSEONG PIEUP +11B9; BS # HANGUL JONGSEONG PIEUP-SIOS +11BA; S # HANGUL JONGSEONG SIOS +11BB; SS # HANGUL JONGSEONG SSANGSIOS +11BC; NG # HANGUL JONGSEONG IEUNG +11BD; J # HANGUL JONGSEONG CIEUC +11BE; C # HANGUL JONGSEONG CHIEUCH +11BF; K # HANGUL JONGSEONG KHIEUKH +11C0; T # HANGUL JONGSEONG THIEUTH +11C1; P # HANGUL JONGSEONG PHIEUPH +11C2; H # HANGUL JONGSEONG HIEUH diff --git a/lib/unicode/JamoShort.pl b/lib/unicode/JamoShort.pl index 760bcba03e..19cd4290c6 100644 --- a/lib/unicode/JamoShort.pl +++ b/lib/unicode/JamoShort.pl @@ -1,72 +1,72 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; -1100 G -1101 GG -1102 N -1103 D -1104 DD -1105 R -1106 M -1107 B -1108 BB -1109 S -110a SS -110b -110c J -110d JJ -110e C -110f K -1110 T -1111 P -1112 H -1161 A -1162 AE -1163 YA -1164 YAE -1165 EO -1166 E -1167 YEO -1168 YE -1169 O -116a WA -116b WAE -116c OE -116d YO -116e U -116f WEO -1170 WE -1171 WI -1172 YU -1173 EU -1174 YI -1175 I -11a8 G -11a9 GG -11aa GS -11ab N -11ac NJ -11ad NH -11ae D -11af L -11b0 LG -11b1 LM -11b2 LB -11b3 LS -11b4 LT -11b5 LP -11b6 LH -11b7 M -11b8 B -11b9 BS -11ba S -11bb SS -11bc NG -11bd J -11be C -11bf K -11c0 T -11c1 P -11c2 H +1100 G # HANGUL CHOSEONG KIYEOK +1101 GG # HANGUL CHOSEONG SSANGKIYEOK +1102 N # HANGUL CHOSEONG NIEUN +1103 D # HANGUL CHOSEONG TIKEUT +1104 DD # HANGUL CHOSEONG SSANGTIKEUT +1105 R # HANGUL CHOSEONG RIEUL +1106 M # HANGUL CHOSEONG MIEUM +1107 B # HANGUL CHOSEONG PIEUP +1108 BB # HANGUL CHOSEONG SSANGPIEUP +1109 S # HANGUL CHOSEONG SIOS +110a SS # HANGUL CHOSEONG SSANGSIOS +110b # HANGUL CHOSEONG IEUNG +110c J # HANGUL CHOSEONG CIEUC +110d JJ # HANGUL CHOSEONG SSANGCIEUC +110e C # HANGUL CHOSEONG CHIEUCH +110f K # HANGUL CHOSEONG KHIEUKH +1110 T # HANGUL CHOSEONG THIEUTH +1111 P # HANGUL CHOSEONG PHIEUPH +1112 H # HANGUL CHOSEONG HIEUH +1161 A # HANGUL JUNGSEONG A +1162 AE # HANGUL JUNGSEONG AE +1163 YA # HANGUL JUNGSEONG YA +1164 YAE # HANGUL JUNGSEONG YAE +1165 EO # HANGUL JUNGSEONG EO +1166 E # HANGUL JUNGSEONG E +1167 YEO # HANGUL JUNGSEONG YEO +1168 YE # HANGUL JUNGSEONG YE +1169 O # HANGUL JUNGSEONG O +116a WA # HANGUL JUNGSEONG WA +116b WAE # HANGUL JUNGSEONG WAE +116c OE # HANGUL JUNGSEONG OE +116d YO # HANGUL JUNGSEONG YO +116e U # HANGUL JUNGSEONG U +116f WEO # HANGUL JUNGSEONG WEO +1170 WE # HANGUL JUNGSEONG WE +1171 WI # HANGUL JUNGSEONG WI +1172 YU # HANGUL JUNGSEONG YU +1173 EU # HANGUL JUNGSEONG EU +1174 YI # HANGUL JUNGSEONG YI +1175 I # HANGUL JUNGSEONG I +11a8 G # HANGUL JONGSEONG KIYEOK +11a9 GG # HANGUL JONGSEONG SSANGKIYEOK +11aa GS # HANGUL JONGSEONG KIYEOK-SIOS +11ab N # HANGUL JONGSEONG NIEUN +11ac NJ # HANGUL JONGSEONG NIEUN-CIEUC +11ad NH # HANGUL JONGSEONG NIEUN-HIEUH +11ae D # HANGUL JONGSEONG TIKEUT +11af L # HANGUL JONGSEONG RIEUL +11b0 LG # HANGUL JONGSEONG RIEUL-KIYEOK +11b1 LM # HANGUL JONGSEONG RIEUL-MIEUM +11b2 LB # HANGUL JONGSEONG RIEUL-PIEUP +11b3 LS # HANGUL JONGSEONG RIEUL-SIOS +11b4 LT # HANGUL JONGSEONG RIEUL-THIEUTH +11b5 LP # HANGUL JONGSEONG RIEUL-PHIEUPH +11b6 LH # HANGUL JONGSEONG RIEUL-HIEUH +11b7 M # HANGUL JONGSEONG MIEUM +11b8 B # HANGUL JONGSEONG PIEUP +11b9 BS # HANGUL JONGSEONG PIEUP-SIOS +11ba S # HANGUL JONGSEONG SIOS +11bb SS # HANGUL JONGSEONG SSANGSIOS +11bc NG # HANGUL JONGSEONG IEUNG +11bd J # HANGUL JONGSEONG CIEUC +11be C # HANGUL JONGSEONG CHIEUCH +11bf K # HANGUL JONGSEONG KHIEUKH +11c0 T # HANGUL JONGSEONG THIEUTH +11c1 P # HANGUL JONGSEONG PHIEUPH +11c2 H # HANGUL JONGSEONG HIEUH END diff --git a/lib/unicode/Name.pl b/lib/unicode/Name.pl index ef8979f0d1..f5c4c56f21 100644 --- a/lib/unicode/Name.pl +++ b/lib/unicode/Name.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0000 001f <control> @@ -10549,4 +10549,6 @@ fffa INTERLINEAR ANNOTATION SEPARATOR fffb INTERLINEAR ANNOTATION TERMINATOR fffc OBJECT REPLACEMENT CHARACTER fffd REPLACEMENT CHARACTER +f0000 ffffd <Plane 15 Private Use, First> +100000 10fffd <Plane 16 Private Use, First> END diff --git a/lib/unicode/Number.pl b/lib/unicode/Number.pl index b0e054a0d0..1f5c2c84c7 100644 --- a/lib/unicode/Number.pl +++ b/lib/unicode/Number.pl @@ -1,7 +1,8 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; +0030 0 0031 1 0032 2 0033 3 @@ -17,6 +18,7 @@ return <<'END'; 00bc 1/4 00bd 1/2 00be 3/4 +0660 0 0661 1 0662 2 0663 3 @@ -26,6 +28,7 @@ return <<'END'; 0667 7 0668 8 0669 9 +06f0 0 06f1 1 06f2 2 06f3 3 @@ -35,6 +38,7 @@ return <<'END'; 06f7 7 06f8 8 06f9 9 +0966 0 0967 1 0968 2 0969 3 @@ -44,6 +48,7 @@ return <<'END'; 096d 7 096e 8 096f 9 +09e6 0 09e7 1 09e8 2 09e9 3 @@ -58,6 +63,7 @@ return <<'END'; 09f6 3 09f7 4 09f9 16 +0a66 0 0a67 1 0a68 2 0a69 3 @@ -67,6 +73,7 @@ return <<'END'; 0a6d 7 0a6e 8 0a6f 9 +0ae6 0 0ae7 1 0ae8 2 0ae9 3 @@ -76,6 +83,7 @@ return <<'END'; 0aed 7 0aee 8 0aef 9 +0b66 0 0b67 1 0b68 2 0b69 3 @@ -97,6 +105,7 @@ return <<'END'; 0bf0 10 0bf1 100 0bf2 1000 +0c66 0 0c67 1 0c68 2 0c69 3 @@ -106,6 +115,7 @@ return <<'END'; 0c6d 7 0c6e 8 0c6f 9 +0ce6 0 0ce7 1 0ce8 2 0ce9 3 @@ -115,6 +125,7 @@ return <<'END'; 0ced 7 0cee 8 0cef 9 +0d66 0 0d67 1 0d68 2 0d69 3 @@ -124,6 +135,7 @@ return <<'END'; 0d6d 7 0d6e 8 0d6f 9 +0e50 0 0e51 1 0e52 2 0e53 3 @@ -133,6 +145,7 @@ return <<'END'; 0e57 7 0e58 8 0e59 9 +0ed0 0 0ed1 1 0ed2 2 0ed3 3 @@ -142,6 +155,7 @@ return <<'END'; 0ed7 7 0ed8 8 0ed9 9 +0f20 0 0f21 1 0f22 2 0f23 3 @@ -151,6 +165,17 @@ return <<'END'; 0f27 7 0f28 8 0f29 9 +0f2a 1/2 +0f2b 3/2 +0f2c 5/2 +0f2d 7/2 +0f2e 9/2 +0f2f 11/2 +0f30 13/2 +0f31 15/2 +0f32 17/2 +0f33 -1/2 +1040 0 1041 1 1042 2 1043 3 @@ -183,6 +208,7 @@ return <<'END'; 16ee 17 16ef 18 16f0 19 +17e0 0 17e1 1 17e2 2 17e3 3 @@ -192,6 +218,7 @@ return <<'END'; 17e7 7 17e8 8 17e9 9 +1810 0 1811 1 1812 2 1813 3 @@ -201,12 +228,14 @@ return <<'END'; 1817 7 1818 8 1819 9 +2070 0 2074 4 2075 5 2076 6 2077 7 2078 8 2079 9 +2080 0 2081 1 2082 2 2083 3 @@ -322,6 +351,7 @@ return <<'END'; 2499 18 249a 19 249b 20 +24ea 0 2776 1 2777 2 2778 3 @@ -352,6 +382,7 @@ return <<'END'; 2791 8 2792 9 2793 10 +3007 0 3021 1 3022 2 3023 3 @@ -364,6 +395,20 @@ return <<'END'; 3038 10 3039 20 303a 30 +3192 1 +3193 2 +3194 3 +3195 4 +3220 1 +3221 2 +3222 3 +3223 4 +3224 5 +3225 6 +3226 7 +3227 8 +3228 9 +3229 10 3280 1 3281 2 3282 3 @@ -374,6 +419,7 @@ return <<'END'; 3287 8 3288 9 3289 10 +ff10 0 ff11 1 ff12 2 ff13 3 diff --git a/lib/unicode/Props.txt b/lib/unicode/PropList.txt index b3548e164d..e47f127987 100644 --- a/lib/unicode/Props.txt +++ b/lib/unicode/PropList.txt @@ -1,4 +1,4 @@ -Property dump: UnicodeData-3.0.0.txt +Property dump: UnicodeData-3.0.1.txt ******************************************* @@ -192,89 +192,6 @@ FF61..FF65 (5 chars) ******************************************* -Property dump for: 0x00800000 (Delimiter) - -0000 -0009..000D (5 chars) -001C..0023 (8 chars) -0028..0029 (2 chars) -002C -002E..002F (2 chars) -003A..003B (2 chars) -003F -005B..005D (3 chars) -007B..007D (3 chars) -0085 -00A0..00A1 (2 chars) -00A6..00A7 (2 chars) -00AB -00B6 -00BB -00BF -037E -0387 -055C..055E (3 chars) -0589 -060C -061B -061F -066B..066C (2 chars) -06D4 -0700..0709 (10 chars) -070B..070D (3 chars) -0964..0965 (2 chars) -0DF4 -0E5A..0E5B (2 chars) -0F0B -0F0D..0F12 (6 chars) -0F3A..0F3D (4 chars) -104A..104B (2 chars) -10FB -1361..1368 (8 chars) -166D..166E (2 chars) -1680 -169B..169C (2 chars) -16EB..16ED (3 chars) -17D4..17D6 (3 chars) -17DA -1802..1805 (4 chars) -1808..1809 (2 chars) -2000..200B (12 chars) -2016 -2018..201F (8 chars) -2028..2029 (2 chars) -202F -2039..203A (2 chars) -203C..203D (2 chars) -2045..2046 (2 chars) -2048..2049 (2 chars) -207D..207E (2 chars) -208D..208E (2 chars) -2329..232A (2 chars) -3000..3002 (3 chars) -3008..3011 (10 chars) -3014..301B (8 chars) -301D..301F (3 chars) -30FB -FD3E..FD3F (2 chars) -FE35..FE44 (16 chars) -FE50..FE52 (3 chars) -FE54..FE57 (4 chars) -FE59..FE5F (7 chars) -FE68 -FF01..FF03 (3 chars) -FF08..FF09 (2 chars) -FF0C -FF0E..FF0F (2 chars) -FF1A..FF1B (2 chars) -FF1F -FF3B..FF3D (3 chars) -FF5B..FF5D (3 chars) -FF61..FF65 (5 chars) -FFE4 - -******************************************* - Property dump for: 0x80000003 (Line Separator) 2028 @@ -591,7 +508,6 @@ Property dump for: 0x20000004 (Combining) 20D0..20E3 (20 chars) 302A..302F (6 chars) 3099..309A (2 chars) -F8F0..F8FF (16 chars) FB1E FE20..FE23 (4 chars) @@ -689,7 +605,6 @@ Property dump for: 0x20040000 (Non-spacing) 20D0..20E3 (20 chars) 302A..302F (6 chars) 3099..309A (2 chars) -F8F0..F8FF (16 chars) FB1E FE20..FE23 (4 chars) @@ -1161,7 +1076,6 @@ Property dump for: 0x20000001 (Alphabetic) 0DF2..0DF3 (2 chars) 0E01..0E3A (58 chars) 0E40..0E45 (6 chars) -0E47 0E4D 0E81..0E82 (2 chars) 0E84 @@ -1355,7 +1269,7 @@ Property dump for: 0x20010000 (Diacritic) 0CCD 0D4D 0DCA -0E48..0E4C (5 chars) +0E47..0E4C (6 chars) 0E4E 0EC8..0ECC (5 chars) 0F18..0F19 (2 chars) @@ -2077,8 +1991,7 @@ Property dump for: 0x01000000 (Bidi: Left-to-Right) 4E00..9FA5 (20902 chars) A000..A48C (1165 chars) AC00..D7A3 (11172 chars) -D800..F7FF (8192 chars) -F900..FA2D (302 chars) +D800..FA2D (8750 chars) FB00..FB06 (7 chars) FB13..FB17 (5 chars) FF21..FF3A (26 chars) @@ -2088,6 +2001,8 @@ FFC2..FFC7 (6 chars) FFCA..FFCF (6 chars) FFD2..FFD7 (6 chars) FFDA..FFDC (3 chars) +F0000..FFFFD (65534 chars) +100000..10FFFD (65534 chars) ******************************************* @@ -2331,7 +2246,6 @@ Property dump for: 0x0C000000 (Bidi: Non-spacing Mark) 20D0..20E3 (20 chars) 302A..302F (6 chars) 3099..309A (2 chars) -F8F0..F8FF (16 chars) FB1E FE20..FE23 (4 chars) @@ -2483,6 +2397,8 @@ FFFC..FFFD (2 chars) Property dump for: 0x80000005 (Private Use) E000..F8FF (6400 chars) +F0000..FFFFD (65534 chars) +100000..10FFFD (65534 chars) ******************************************* @@ -3281,6 +3197,28 @@ DB80..DBFF (128 chars) ******************************************* +Property dump for: 0x8000000A (Not a Character) + +FFFE..FFFF (2 chars) +1FFFE..1FFFF (2 chars) +2FFFE..2FFFF (2 chars) +3FFFE..3FFFF (2 chars) +4FFFE..4FFFF (2 chars) +5FFFE..5FFFF (2 chars) +6FFFE..6FFFF (2 chars) +7FFFE..7FFFF (2 chars) +8FFFE..8FFFF (2 chars) +9FFFE..9FFFF (2 chars) +AFFFE..AFFFF (2 chars) +BFFFE..BFFFF (2 chars) +CFFFE..CFFFF (2 chars) +DFFFE..DFFFF (2 chars) +EFFFE..EFFFF (2 chars) +FFFFE..FFFFF (2 chars) +10FFFE..10FFFF (2 chars) + +******************************************* + Property dump for: 0x00000000 (Unassigned Code Value) 0220..0221 (2 chars) @@ -3637,4 +3575,18 @@ FFD8..FFD9 (2 chars) FFDD..FFDF (3 chars) FFE7 FFEF..FFF8 (10 chars) +10000..1FFFD (65534 chars) +20000..2FFFD (65534 chars) +30000..3FFFD (65534 chars) +40000..4FFFD (65534 chars) +50000..5FFFD (65534 chars) +60000..6FFFD (65534 chars) +70000..7FFFD (65534 chars) +80000..8FFFD (65534 chars) +90000..9FFFD (65534 chars) +A0000..AFFFD (65534 chars) +B0000..BFFFD (65534 chars) +C0000..CFFFD (65534 chars) +D0000..DFFFD (65534 chars) +E0000..EFFFD (65534 chars) diff --git a/lib/unicode/README.perl b/lib/unicode/README.perl new file mode 100644 index 0000000000..2294bd3cc4 --- /dev/null +++ b/lib/unicode/README.perl @@ -0,0 +1,37 @@ +The *.txt files were copied 30 Aug 2000 from + + http://www.unicode.org/Public/UNIDATA/ + +and most of them were renamed to better fit 8.3 filename limitations, +by which the Perl distribution tries to live. + + www.unicode.org Perl distribution + + ArabicShaping.txt ArabShap.txt + BidiMirroring.txt BidiMirr.txt + Blocks.txt Blocks.txt + CaseFolding.txt CaseFold.txt + CompositionExclusions.txt CompExcl.txt + EastAsianWidth.txt EAWidth.txt (0) + Index.txt Index.txt + Jamo.txt Jamo.txt + LineBreak.txt LineBrk.txt (0) + NamesList.html NamesList.html (0) + NamesList.txt Names.txt + PropList.txt PropList.txt + ReadMe.txt ReadMe.txt + SpecialCasing.txt SpecCase.txt + UnicodeCharacterDatabase.html UCD301.html + UnicodeData.html UCDFF301.html + UnicodeData.txt Unicode.301 + +The two big files, NormalizationTest.txt (1.7MB) and Unihan.txt (15.8MB) +were not copied for space considerations. The files marked with (0) had +not been updated since Unicode 3.0.0 (10 Sep 1999) + +The *.pl files are generated from these files by the 'mktables.PL' script. + +While the files have been renamed the links in the html files haven't. + +-- +jhi@iki.fi diff --git a/lib/unicode/ReadMe.txt b/lib/unicode/ReadMe.txt index c2c4aee6a5..b8a643ca27 100644 --- a/lib/unicode/ReadMe.txt +++ b/lib/unicode/ReadMe.txt @@ -1,45 +1,13 @@ -June 23, 1999 +August 30, 2000 -This directory contains the initial release for Unicode 3.0. +This directory contains the first update release for Unicode 3.0. This release consists of corrections and additions to the -Unicode Character Database, to match the publication of -The Unicode Standard, Version 3.0. +Unicode Character Database for the Unicode Standard, +Version 3.0.1. Detailed documentation of the files constituting the Unicode Character Database (contributory data files for the standard itself) can now be found in UnicodeCharacterDatabase.html. --------------------------------------------------------------------------- -NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE - -The files have been copied from - - ftp://ftp.unicode.org/Public/3.0-Update/ - -and most of them have been renamed to better fit 8.3 filename limitations. - -long name at unicode.org short name latest '#' ------------------------- ---------- ---------- -ArabicShaping-#.txt ArabShap.txt 2 -Blocks-#.txt Blocks.txt 3 -CompositionExclusions-#.txt CompExcl.txt 1 -EastAsianWidth-#.txt EAWidth.txt 3 -Index-#.txt Index.txt 3.0.0 -Jamo-#.txt Jamo.txt 2 -LineBreak-#.txt LineBrk.txt 5 -NamesList-#.txt Names.txt 3.0.0 -NamesList-#.html NamesList.html 1 -PropList-#.txt Props.txt 3.0.0 -SpecialCasing-#.txt SpecCase.txt 2 -UnicodeData-#.txt Unicode.300 3.0.0 -UnicodeData-#.html Unicode3.html 3.0.0 -UnicodeCharacterDatabase-#.html UCD300.html 3.0.0 - -The *.pl files are generated from these files by the 'mktables.PL' script. - -While the files have been renamed the links in the html files haven't. - --- -jhi@iki.fi diff --git a/lib/unicode/SpecCase.txt b/lib/unicode/SpecCase.txt index af002ef4cf..94662d384f 100644 --- a/lib/unicode/SpecCase.txt +++ b/lib/unicode/SpecCase.txt @@ -1,4 +1,4 @@ -# SpecialCasing-2.txt +# SpecialCasing-3.txt # # Special Casing Properties # @@ -26,26 +26,33 @@ # <upper> := <code_point_list> # <code_point_list> := <code_point> (<s>+ <code_point>)* # <code_point> := <hex><hex><hex><hex> -# <hex> := [0-1A-Fa-f] +# <hex> := [0-9A-Fa-f] # <s> := <space> # -# <condition_list> := <locale>? (<s>+ <context>)* -# <locale> := <ISO_3166_code> ( "_" <ISO_639_code> )? ( "_" <variant> )? +# <condition_list> := <locale>? (<s>+ <context>)* <sep> +# <locale> := <ISO_639_code> ( "_" <ISO_3166_code> )? ( "_" <variant> )? # <ISO_3166_code> := 2-letter country code, # as in http://www.unicode.org/unicode/onlinedat/countries.html # <ISO_639_code> := 2-letter code, # as in http://www.unicode.org/unicode/onlinedat/languages.html -# <context> := "FINAL" | "NON_FINAL" | "MODERN" | "NON_MODERN" +# <context> := "FINAL" | "NON_FINAL" | "MODERN" | "NON_MODERN" | "AFTER_i" +# +# A condition list overrides the normal behavior if all of the listed conditions are true. +# Case distinctions in the condition list are not significant. # -# A condition list overrides the normal behavior if any of the listed conditions is true. # FINAL: The letter is not followed by a letter of category L* (e.g. Ll, Lt, Lu, Lm, or Lo). # MODERN: The mapping is only used for modern text. +# AFTER_i: The last base character was "i" 0069 +# # Conditions preceded by "NON_" represent the negation of the condition # # New contexts may be added in the future. -# Parsers of this file must be prepared to deal with that situation. # Additional whitespace around elements is optional. Blank lines are ignored in parsing. # On any line, all text following "#" is a comment, and are ignored in parsing. +# +# Parsers of this file must be prepared to deal future additions to this format: +# * Additional contexts +# * Additional fields # ================================================================================ # ================================================================================ @@ -76,7 +83,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH # No corresponding uppercase precomposed character -0149; 0149; 02BC 006E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON @@ -199,7 +206,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH # 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA # 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA -# Note: the following cases are not included, since they would normalize in lowercasing +# Note: the following cases are not included, since they would case-fold in lowercasing # 03C3; 03C2; 03A3; 03A3; FINAL; # GREEK SMALL LETTER SIGMA # 03C2; 03C3; 03A3; 03A3; NON_FINAL; # GREEK SMALL LETTER FINAL SIGMA @@ -208,12 +215,16 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH # Locale-sensitive mappings # ================================================================================ +# Lithuanian + +0307; 0307; ; ; lt AFTER_i; # Remove DOT ABOVE after "i" with upper or titlecase + # Turkish -0049; 0131; 0049; 0049; TR; # LATIN CAPITAL LETTER I -0069; 0069; 0130; 0130; TR; # LATIN SMALL LETTER I +0049; 0131; 0049; 0049; tr; # LATIN CAPITAL LETTER I +0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I # Note: the following cases are already in the UnicodeData file. -# 0131; 0131; 0049; 0049; TR; # LATIN SMALL LETTER DOTLESS I -# 0130; 0069; 0130; 0130; TR; # LATIN CAPITAL LETTER I WITH DOT ABOVE +# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I +# 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE diff --git a/lib/unicode/To/Digit.pl b/lib/unicode/To/Digit.pl index a96bc1c1a6..4bace1e662 100644 --- a/lib/unicode/To/Digit.pl +++ b/lib/unicode/To/Digit.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0030 0039 0000 diff --git a/lib/unicode/To/Lower.pl b/lib/unicode/To/Lower.pl index a78a7e4492..89755b7c3a 100644 --- a/lib/unicode/To/Lower.pl +++ b/lib/unicode/To/Lower.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0041 005a 0061 diff --git a/lib/unicode/To/Title.pl b/lib/unicode/To/Title.pl index d8f5c048d4..cadeaf909b 100644 --- a/lib/unicode/To/Title.pl +++ b/lib/unicode/To/Title.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0061 007a 0041 diff --git a/lib/unicode/To/Upper.pl b/lib/unicode/To/Upper.pl index 1fc7637753..d6c03d34bd 100644 --- a/lib/unicode/To/Upper.pl +++ b/lib/unicode/To/Upper.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.300. +# This file is built by mktables.PL from e.g. Unicode.301. # Any changes made here will be lost! return <<'END'; 0061 007a 0041 diff --git a/lib/unicode/UCD300.html b/lib/unicode/UCD301.html index 113d311f01..284349e264 100644 --- a/lib/unicode/UCD300.html +++ b/lib/unicode/UCD301.html @@ -4,342 +4,198 @@ <html> - - <head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <meta http-equiv="Content-Language" content="en-us"> - <meta name="GENERATOR" content="Microsoft FrontPage 4.0"> - <meta name="ProgId" content="FrontPage.Editor.Document"> - <link rel="stylesheet" href="http://www.unicode.org/unicode.css" type="text/css"> - <title>Unicode Character Database</title> - </head> - - <body> - - -<h1>UNICODE CHARACTER DATABASE<br> -Version 3.0.0</h1> - +<h1>UNICODE CHARACTER DATABASE<br> +Version 3.0.1</h1> <table border="1" cellspacing="2" cellpadding="0" height="87" width="100%"> - <tr> - <td valign="TOP" width="144">Revision</td> - - <td valign="TOP">3.0.0</td> - + <td valign="TOP">3.0.1</td> </tr> - <tr> - <td valign="TOP" width="144">Authors</td> - <td valign="TOP">Mark Davis and Ken Whistler</td> - </tr> - <tr> - <td valign="TOP" width="144">Date</td> - - <td valign="TOP">1999-09-11</td> - + <td valign="TOP">2000-08-17</td> </tr> - <tr> - <td valign="TOP" width="144">This Version</td> - - <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td> - + <td valign="TOP"><a + href="http://www.unicode.org/Public/3.0-Update1/UnicodeCharacterDatabase-3.0.1.html">http://www.unicode.org/Public/3.0-Update1/UnicodeCharacterDatabase-3.0.1.html</a></td> </tr> - <tr> - <td valign="TOP" width="144">Previous Version</td> - - <td valign="TOP">n/a</td> - + <td valign="TOP"><a + href="http://www.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">http://www.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td> </tr> - <tr> - <td valign="TOP" width="144">Latest Version</td> - - <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td> - + <td valign="TOP"><a + href="http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html">http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html</a></td> </tr> - </table> - -<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.</p> - -<h2>Disclaimer</h2> - -<p>The Unicode Character Database is provided as is by Unicode, Inc. No claims - -are made as to fitness for any particular purpose. No warranties of any kind are - -expressed or implied. The recipient agrees to determine applicability of - -information provided. If this file has been purchased on magnetic or optical - -media from Unicode, Inc., the sole remedy for any claim will be exchange of - -defective media within 90 days of receipt.</p> - -<p>This disclaimer is applicable for all other data files accompanying the - -Unicode Character Database, some of which have been compiled by the Unicode - -Consortium, and some of which have been supplied by other sources.</p> - -<h2>Limitations on Rights to Redistribute This Data</h2> - -<p>Recipient is granted the right to make copies in any form for internal - -distribution and to freely use the information supplied in the creation of - -products supporting the Unicode<sup>TM</sup> Standard. The files in the Unicode - -Character Database can be redistributed to third parties or other organizations - -(whether for profit or not) as long as this notice and the disclaimer notice are - -retained. Information can be extracted from these files and used in - -documentation or programs, as long as there is an accompanying notice indicating - -the source.</p> - -<h2>Introduction</h2> - -<p>The Unicode Character Database is a set of files that define the Unicode - -character properties and internal mappings. For more information about character - -properties and mappings, see <i><a href="http://www.unicode.org/unicode/uni2book/u2.html">The - -Unicode Standard</a></i>.</p> - -<p>The Unicode Character Database has been updated to reflect Version 3.0 of the - -Unicode Standard, with many characters added to those published in Version 2.0. - -A number of corrections have also been made to case mappings or other errors in - -the database noted since the publication of Version 2.0. Normative bidirectional - -properties have also been modified to reflect decisions of the Unicode Technical - -Committee.</p> - -<p>For more information on versions of the Unicode Standard and how to reference - -them, see <a href="http://www.unicode.org/unicode/standard/versions/">http://www.unicode.org/unicode/standard/versions/</a>.</p> - -<h2>Conformance</h2> - -<p>Character properties may be either normative or informative. <i>Normative</i> - -means that implementations that claim conformance to the Unicode Standard (at a - -particular version) and which make use of a particular property or field must - -follow the specifications of the standard for that property or field in order to - -be conformant. The term <i>normative</i> when applied to a property or field of - -the Unicode Character Database, does <i>not</i> mean that the value of that - -field will never change. Corrections and extensions to the standard in the - -future may require minor changes to normative values, even though the Unicode - -Technical Committee strives to minimize such changes. An<i> informative </i>property - -or field is strongly recommended, but a conformant implementation is free to use - -or change such values as it may require while still being conformant to the - -standard. Particular implementations may choose to override the properties and - -mappings that are not normative. In that case, it is up to the implementer to - -establish a protocol to convey that information.</p> - -<h2>Files</h2> - -<p>The following summarizes the files in the Unicode Character Database. For - -more information about these files, see the referenced technical report or - -section of Unicode Standard, Version 3.0.</p> - -<p><b>UnicodeData.txt (Chapter 4)</b> - -<ul> - - <li>The main file in the Unicode Character Database.</li> - - <li>For detailed information on the format, see <a href="UnicodeData.html">UnicodeData.html</a>. - - This file also characterizes which properties are normative and which are - - informative.</li> - -</ul> - -<p><b>PropList.txt (Chapter 4)</b> - -<ul> - - <li>Additional informative properties list: <i>Alphabetic, Ideographic,</i> - - and <i>Mathematical</i>, among others.</li> - -</ul> - -<p><b>SpecialCasing.txt (Chapter 4)</b> - -<ul> - - <li>List of informative special casing properties, including one-to-many - - mappings such as SHARP S => "SS", and locale-specific mappings, - - such as for Turkish <i>dotless i</i>.</li> - -</ul> - -<p><b>Blocks.txt (Chapter 14)</b> - -<ul> - - <li>List of normative block names.</li> - -</ul> - -<p><b>Jamo.txt (Chapter 4)</b> - -<ul> - - <li>List of normative Jamo short names, used in deriving HANGUL SYLLABLE names - - algorithmically.</li> - -</ul> - -<p><b>ArabicShaping.txt (Section 8.2)</b> - -<ul> - - <li>Basic Arabic and Syriac character shaping properties, such as initial, - - medial and final shapes. These properties are normative for minimal shaping - - of Arabic and Syriac. </li> - -</ul> - -<p><b>NamesList.txt (Chapter 14)</b> - -<ul> - - <li>This file duplicates some of the material in the UnicodeData file, and - - adds informative annotations uses in the character charts, as printed in the - - Unicode Standard. </li> - - <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches - - the appropriate version of the book. Changes in the Unicode Character - - Database since then may not be reflected in these files, since they are - - primarily of archival interest.</li> - -</ul> - -<p><b>Index.txt (Chapter 14)</b> - -<ul> - - <li>Informative index to Unicode characters, as printed in the Unicode - - Standard</li> - - <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches - - the appropriate version of the book. Changes in the Unicode Character - - Database since then may not be reflected in these files, since they are - - primarily of archival interest.</li> - -</ul> - -<p><b>CompositionExclusions.txt (<a href="http://www.unicode.org/unicode/reports/tr15/">UTR#15 - -Unicode Normalization Forms</a>)</b> - -<ul> - - <li>Normative properties for normalization.</li> - -</ul> - -<p><b>LineBreak.txt (<a href="http://www.unicode.org/unicode/reports/tr14/">UTR - -#14: Line Breaking Properties</a>)</b> - -<ul> - - <li>Normative and informative properties for line breaking. To see which - - properties are informative and which are normative, consult UTR#14.</li> - -</ul> - -<p><b>EastAsianWidth.txt (<a href="http://www.unicode.org/unicode/reports/tr11/">UTR - -#11: East Asian Character Width</a>)</b> - -<ul> - - <li>Informative properties for determining the choice of wide vs. narrow - - glyphs in East Asian contexts.</li> - -</ul> - -<p><b>diffXvY.txt</b> - -<ul> - - <li>Mechanically-generated informative files containing accumulated - - differences between successive versions of UnicodeData.txt</li> - -</ul> - - - -</body> - - - -</html> - +<p align="center">Copyright © 1995-2000 Unicode, Inc. All Rights reserved.</p> +<h2>Disclaimer</h2> +<p>The Unicode Character Database is provided as is by Unicode, Inc. No claims +are made as to fitness for any particular purpose. No warranties of any kind are +expressed or implied. The recipient agrees to determine applicability of +information provided. If this file has been purchased on magnetic or optical +media from Unicode, Inc., the sole remedy for any claim will be exchange of +defective media within 90 days of receipt.</p> +<p>This disclaimer is applicable for all other data files accompanying the +Unicode Character Database, some of which have been compiled by the Unicode +Consortium, and some of which have been supplied by other sources.</p> +<h2>Limitations on Rights to Redistribute This Data</h2> +<p>Recipient is granted the right to make copies in any form for internal +distribution and to freely use the information supplied in the creation of +products supporting the Unicode<sup>TM</sup> Standard. The files in the Unicode +Character Database can be redistributed to third parties or other organizations +(whether for profit or not) as long as this notice and the disclaimer notice are +retained. Information can be extracted from these files and used in +documentation or programs, as long as there is an accompanying notice indicating +the source.</p> +<h2>Introduction</h2> +<p>The Unicode Character Database is a set of files that define the Unicode +character properties and internal mappings. For more information about character +properties and mappings, see <i><a +href="http://www.unicode.org/unicode/uni2book/u2.html">The Unicode Standard</a></i>.</p> +<p>The Unicode Character Database has been updated to reflect Version 3.0 of the +Unicode Standard, with many characters added to those published in Version 2.0. +A number of corrections have also been made to case mappings or other errors in +the database noted since the publication of Version 2.0. Normative bidirectional +properties have also been modified to reflect decisions of the Unicode Technical +Committee.</p> +<p>For more information on versions of the Unicode Standard and how to reference +them, see <a href="http://www.unicode.org/unicode/standard/versions/">http://www.unicode.org/unicode/standard/versions/</a>.</p> +<h2>Conformance</h2> +<p>Character properties may be either normative or informative. <i>Normative</i> +means that implementations that claim conformance to the Unicode Standard (at a +particular version) and which make use of a particular property or field must +follow the specifications of the standard for that property or field in order to +be conformant. The term <i>normative</i> when applied to a property or field of +the Unicode Character Database, does <i>not</i> mean that the value of that +field will never change. Corrections and extensions to the standard in the +future may require minor changes to normative values, even though the Unicode +Technical Committee strives to minimize such changes. An<i> informative </i>property +or field is strongly recommended, but a conformant implementation is free to use +or change such values as it may require while still being conformant to the +standard. Particular implementations may choose to override the properties and +mappings that are not normative. In that case, it is up to the implementer to +establish a protocol to convey that information.</p> +<h2>Files</h2> +<p>The following summarizes the files in the Unicode Character Database. For +more information about these files, see the referenced technical report(s) or +section of Unicode Standard, Version 3.0.</p> +<p><b>UnicodeData.txt (Chapter 4, <a +href="http://www.unicode.org/unicode/reports/tr21/">UTR #21: Case Mappings</a>, <a +href="http://www.unicode.org/unicode/reports/tr15/">UAX #15 Unicode Normalization +Forms</a>)</b> +<ul> + <li>The main file in the Unicode Character Database.</li> + <li>For detailed information on the format, see <a href="UnicodeData.html">UnicodeData.html</a>. + This file also characterizes which properties are normative and which are + informative.</li> +</ul> +<p><b>PropList.txt (Chapter 4)</b> +<ul> + <li>Additional informative properties list: <i>Alphabetic, Ideographic,</i> + and <i>Mathematical</i>, among others.</li> +</ul> +<p><b>SpecialCasing.txt (Chapter 4, <a +href="http://www.unicode.org/unicode/reports/tr21/">UTR #21: Case Mappings</a>)</b> +<ul> + <li>List of informative special casing properties, including one-to-many + mappings such as SHARP S => "SS", and locale-specific mappings, + such as for Turkish <i>dotless i</i>.</li> +</ul> +<p><b>Blocks.txt (Chapter 14)</b> +<ul> + <li>List of normative block names.</li> +</ul> +<p><b>Jamo.txt (Chapter 4)</b> +<ul> + <li>List of normative Jamo short names, used in deriving HANGUL SYLLABLE names + algorithmically.</li> +</ul> +<p><b>ArabicShaping.txt (Section 8.2)</b> +<ul> + <li>Basic Arabic and Syriac character shaping properties, such as initial, + medial and final shapes. These properties are normative for minimal shaping + of Arabic and Syriac.</li> +</ul> +<p><b>NamesList.txt (Chapter 14)</b> +<ul> + <li>This file duplicates some of the material in the UnicodeData file, and + adds informative annotations uses in the character charts, as printed in the + Unicode Standard.</li> + <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches + the appropriate version of the book. Changes in the Unicode Character + Database since then may not be reflected in these files, since they are + primarily of archival interest.</li> +</ul> +<p><b>Index.txt (Chapter 14)</b> +<ul> + <li>Informative index to Unicode characters, as printed in the Unicode + Standard</li> + <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches + the appropriate version of the book. Changes in the Unicode Character + Database since then may not be reflected in these files, since they are + primarily of archival interest.</li> +</ul> +<p><b>CompositionExclusions.txt (<a +href="http://www.unicode.org/unicode/reports/tr15/">UAX #15 Unicode Normalization +Forms</a>)</b> +<ul> + <li>Normative properties for normalization.</li> +</ul> +<p><b>LineBreak.txt (<a href="http://www.unicode.org/unicode/reports/tr14/">UAX +#14: Line Breaking Properties</a>)</b> +<ul> + <li>Normative and informative properties for line breaking. To see which + properties are informative and which are normative, consult UAX #14.</li> +</ul> +<p><b>EastAsianWidth.txt (<a href="http://www.unicode.org/unicode/reports/tr11/">UAX +#11: East Asian Character Width</a>)</b> +<ul> + <li>Informative properties for determining the choice of wide vs. narrow + glyphs in East Asian contexts.</li> +</ul> +<p><b>BidiMirroring.txt</b><b> (<a +href="http://www.unicode.org/unicode/reports/tr9/">UAX #9: The +Bidirectional Algorithm</a>)</b></p> +<ul> + <li>Informative properties for substituting characters in an implementation of + bidirectional mirroring.</li> +</ul> +<p><b>CaseFolding.txt (<a href="http://www.unicode.org/unicode/reports/tr21/">UTR +#21: Case Mappings</a>)</b></p> +<ul> + <li>Informative file mapping characters to their case-folded form.</li> +</ul> +<p><b>NormalizationTest.txt (<a +href="http://www.unicode.org/unicode/reports/tr15/">UAX #15 Unicode Normalization +Forms</a>)</b></p> +<ul> + <li>Normative test file for conformance to Unicode Normalization Forms.</li> +</ul> +<p><b>diffXvY.txt</b> +<ul> + <li>Mechanically-generated informative files containing accumulated + differences between successive versions of UnicodeData.txt</li> +</ul> + +</body> + +</html> diff --git a/lib/unicode/UCDFF301.html b/lib/unicode/UCDFF301.html new file mode 100644 index 0000000000..a8fbe329de --- /dev/null +++ b/lib/unicode/UCDFF301.html @@ -0,0 +1,1025 @@ +<html> + +<head> +<meta name="GENERATOR" content="Microsoft FrontPage 4.0"> +<meta name="ProgId" content="FrontPage.Editor.Document"> +<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> +<link rel="stylesheet" href="http://www.unicode.org/unicode.css" type="text/css"> +<title>UnicodeData File Format</title> +</head> + +<body> + +<h1>UnicodeData File Format<br> +Version 3.0.1</h1> +<table border="1" cellspacing="2" cellpadding="0" height="87" width="100%"> + <tr> + <td valign="TOP" width="144">Revision</td> + <td valign="TOP">3.0.1</td> + </tr> + <tr> + <td valign="TOP" width="144">Authors</td> + <td valign="TOP">Mark Davis and Ken Whistler</td> + </tr> + <tr> + <td valign="TOP" width="144">Date</td> + <td valign="TOP">2000-08-17</td> + </tr> + <tr> + <td valign="TOP" width="144">This Version</td> + <td valign="TOP"><a + href="http://www.unicode.org/Public/3.0-Update1/UnicodeData-3.0.1.html">http://www.unicode.org/Public/3.0-Update1/UnicodeData-3.0.1.html</a></td> + </tr> + <tr> + <td valign="TOP" width="144">Previous Version</td> + <td valign="TOP"><a + href="http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td> + </tr> + <tr> + <td valign="TOP" width="144">Latest Version</td> + <td valign="TOP"><a + href="http://www.unicode.org/Public/UNIDATA/UnicodeData.html">http://www.unicode.org/Public/UNIDATA/UnicodeData.html</a></td> + </tr> +</table> +<p align="center">Copyright © 1995-2000 Unicode, Inc. All Rights reserved.<br> +<i>For more information, including Disclamer and Limitations, see <a +href="UnicodeCharacterDatabase-3.0.1.html">UnicodeCharacterDatabase-3.0.1.html</a></i></p> +<p>This document describes the format of the UnicodeData.txt file, which is one +of the files in the Unicode Character Database. The document is divided into the +following sections: +<ul> + <li><a href="#Field Formats">Field Formats</a> + <ul> + <li><a href="#General Category">General Category</a></li> + <li><a href="#Bidirectional Category">Bidirectional Category</a></li> + <li><a href="#Character Decomposition">Character Decomposition Mapping</a></li> + <li><a href="#Canonical Combining Classes">Canonical Combining Classes</a></li> + <li><a href="#Decompositions and Normalization">Decompositions and + Normalization</a></li> + <li><a href="#Case Mappings">Case Mappings</a></li> + </ul> + </li> + <li><a href="#Property Invariants">Property Invariants</a></li> + <li><a href="#Modification History">Modification History</a></li> +</ul> +<p><b>Warning: </b>the information in this file does not completely describe the +use and interpretation of Unicode character properties and behavior. It must be +used in conjunction with the data in the other files in the <a +href="UnicodeCharacterDatabase-3.0.1.html">Unicode Character Database</a>, and +relies on the notation and definitions supplied in <i><a +href="http://www.unicode.org/unicode/standard/versions/Unicode3.0.html">The +Unicode Standard</a></i>. All chapter references are to Version 3.0 of the +standard.</p> +<h2><a name="Field Formats"></a>Field Formats</h2> +<p>The file consists of lines containing fields separated by semicolons. Each +line represents the data for one encoded character in the Unicode Standard. +Every encoded character has a data entry, with the exception of certain special +ranges, as detailed below. +<ul> + <li>There are nine special ranges of characters that are represented only by + their start and end characters, since the properties in the file are + uniform, except for code values (which are all sequential and assigned).</li> + <li>The names of CJK ideograph characters and the names and decompositions of + Hangul syllable characters are algorithmically derivable. (See the Unicode + Standard and <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode + Standard Annex #15</a> for more information).</li> + <li>Surrogate code values and private use characters have no names.</li> + <li>The Private Use character outside of the BMP (U+F0000..U+FFFFD, + U+100000..U+10FFFD) are listed as distinct ranges. These correspond to surrogate pairs + where the first surrogate is in the High Surrogate Private Use section.</li> +</ul> +<p>The exact ranges represented by start and end characters are: +<ul> + <li>CJK Ideographs Extension A (U+3400 - U+4DB5)</li> + <li>CJK Ideographs (U+4E00 - U+9FA5)</li> + <li>Hangul Syllables (U+AC00 - U+D7A3)</li> + <li>Non-Private Use High Surrogates (U+D800 - U+DB7F)</li> + <li>Private Use High Surrogates (U+DB80 - U+DBFF)</li> + <li>Low Surrogates (U+DC00 - U+DFFF)</li> + <li>The Private Use Area (U+E000 - U+F8FF)</li> + <li>Plane 15 Private Use Area (U+F0000 - U+FFFFD)</li> + <li>Plane 16 Private Use Area (U+100000 - U+10FFFD)</li> +</ul> +<p>The following table describes the format and meaning of each field in a data +entry in the UnicodeData file. Fields which contain normative information are so +indicated.</p> +<table border="1" cellspacing="2" cellpadding="2"> + <tr> + <th valign="top" align="LEFT"> + <p align="LEFT">Field</th> + <th valign="top" align="LEFT"> + <p align="LEFT">Name</th> + <th valign="top" align="LEFT"> + <p align="LEFT">Status</th> + <th valign="top" align="LEFT"> + <p align="LEFT">Explanation</th> + </tr> + <tr> + <th valign="top">0</th> + <td valign="top">Code value</td> + <td valign="top">normative</td> + <td valign="top">Code value. For characters in the range U+0000..U+FFFD + the code value uses a 4-digit hexadecimal format; for characters in the + range U+10000..U+FFFFD the code value uses a 5-digit hexadecimal format; + and for characters in the range U+100000..U+10FFFD the code value uses a + 6-digit hexadecimal format.</td> + </tr> + <tr> + <th valign="top">1</th> + <td valign="top">Character name</td> + <td valign="top">normative</td> + <td valign="top">These names match exactly the names published in Chapter 14 + of the Unicode Standard, Version 3.0.</td> + </tr> + <tr> + <th valign="top">2</th> + <td valign="top"><a href="#General Category">General Category</a></td> + <td valign="top">normative / informative<br> + (see below)</td> + <td valign="top">This is a useful breakdown into various "character + types" which can be used as a default categorization in + implementations. See below for a brief explanation.</td> + </tr> + <tr> + <th valign="top">3</th> + <td valign="top"><a href="#Canonical Combining Classes">Canonical Combining + Classes</a></td> + <td valign="top">normative</td> + <td valign="top">The classes used for the Canonical Ordering Algorithm in + the Unicode Standard. These classes are also printed in Chapter 4 of the + Unicode Standard.</td> + </tr> + <tr> + <th valign="top">4</th> + <td valign="top"><a href="#Bidirectional Category">Bidirectional Category</a></td> + <td valign="top">normative</td> + <td valign="top">See the list below for an explanation of the abbreviations + used in this field. These are the categories required by the Bidirectional + Behavior Algorithm in the Unicode Standard. These categories are + summarized in Chapter 3 of the Unicode Standard.</td> + </tr> + <tr> + <th valign="top">5</th> + <td valign="top"><a href="#Character Decomposition">Character Decomposition + Mapping</a></td> + <td valign="top">normative</td> + <td valign="top">In the Unicode Standard, not all of the mappings are full + (maximal) decompositions. Recursive application of look-up for + decompositions will, in all cases, lead to a maximal decomposition. The + decomposition mappings match exactly the decomposition mappings published + with the character names in the Unicode Standard.</td> + </tr> + <tr> + <th valign="top">6</th> + <td valign="top">Decimal digit value</td> + <td valign="top">normative</td> + <td valign="top">This is a numeric field. If the character has the decimal + digit property, as specified in Chapter 4 of the Unicode Standard, the + value of that digit is represented with an integer value in this field</td> + </tr> + <tr> + <th valign="top">7</th> + <td valign="top">Digit value</td> + <td valign="top">normative</td> + <td valign="top">This is a numeric field. If the character represents a + digit, not necessarily a decimal digit, the value is here. This covers + digits which do not form decimal radix forms, such as the compatibility + superscript digits</td> + </tr> + <tr> + <th valign="top">8</th> + <td valign="top">Numeric value</td> + <td valign="top">normative</td> + <td valign="top">This is a numeric field. If the character has the numeric + property, as specified in Chapter 4 of the Unicode Standard, the value of + that character is represented with an integer or rational number in this + field. This includes fractions as, e.g., "1/5" for U+2155 VULGAR + FRACTION ONE FIFTH Also included are numerical values for compatibility + characters such as circled numbers.</td> + </tr> + <tr> + <th valign="top">9</th> + <td valign="top">Mirrored</td> + <td valign="top">normative</td> + <td valign="top">If the character has been identified as a + "mirrored" character in bidirectional text, this field has the + value "Y"; otherwise "N". The list of mirrored + characters is also printed in Chapter 4 of the Unicode Standard.</td> + </tr> + <tr> + <th valign="top">10</th> + <td valign="top">Unicode 1.0 Name</td> + <td valign="top">informative</td> + <td valign="top">This is the old name as published in Unicode 1.0. This name + is only provided when it is significantly different from the Unicode 3.0 + name for the character.</td> + </tr> + <tr> + <th valign="top">11</th> + <td valign="top">10646 comment field</td> + <td valign="top">informative</td> + <td valign="top">This is the ISO 10646 comment field. It appears in parentheses + in the 10646 names list, or contains an asterisk to mark an Annex P note.</td> + </tr> + <tr> + <th valign="top">12</th> + <td valign="top"><a href="#Case Mappings">Uppercase Mapping</a></td> + <td valign="top">informative</td> + <td valign="top">Upper case equivalent mapping. If a character is part of an + alphabet with case distinctions, and has an upper case equivalent, then + the upper case equivalent is in this field. See the explanation below on + case distinctions. These mappings are always one-to-one, not one-to-many + or many-to-one. This field is informative.</td> + </tr> + <tr> + <th valign="top">13</th> + <td valign="top"><a href="#Case Mappings">Lowercase Mapping</a></td> + <td valign="top">informative</td> + <td valign="top">Similar to Uppercase mapping</td> + </tr> + <tr> + <th valign="top">14</th> + <td valign="top"><a href="#Case Mappings">Titlecase Mapping</a></td> + <td valign="top">informative</td> + <td valign="top">Similar to Uppercase mapping</td> + </tr> +</table> +<h3><a name="General Category"></a>General Category</h3> +<p>The values in this field are abbreviations for the following. Some of the +values are normative, and some are informative. For more information, see the +Unicode Standard.</p> +<p><b>Note:</b> the standard does not assign information to control characters +(except for certain cases in the Bidirectional Algorithm). Implementations will +generally also assign categories to certain control characters, notably CR and +LF, according to platform conventions.</p> +<h4>Normative Categories</h4> +<table border="0" cellspacing="2" cellpadding="0"> + <tr> + <th> + <p align="LEFT">Abbr.</th> + <th> + <p align="LEFT">Description</th> + </tr> + <tr> + <td align="CENTER">Lu</td> + <td>Letter, Uppercase</td> + </tr> + <tr> + <td align="CENTER">Ll</td> + <td>Letter, Lowercase</td> + </tr> + <tr> + <td align="CENTER">Lt</td> + <td>Letter, Titlecase</td> + </tr> + <tr> + <td align="CENTER">Mn</td> + <td>Mark, Non-Spacing</td> + </tr> + <tr> + <td align="CENTER">Mc</td> + <td>Mark, Spacing Combining</td> + </tr> + <tr> + <td align="CENTER">Me</td> + <td>Mark, Enclosing</td> + </tr> + <tr> + <td align="CENTER">Nd</td> + <td>Number, Decimal Digit</td> + </tr> + <tr> + <td align="CENTER">Nl</td> + <td>Number, Letter</td> + </tr> + <tr> + <td align="CENTER">No</td> + <td>Number, Other</td> + </tr> + <tr> + <td align="CENTER">Zs</td> + <td>Separator, Space</td> + </tr> + <tr> + <td align="CENTER">Zl</td> + <td>Separator, Line</td> + </tr> + <tr> + <td align="CENTER">Zp</td> + <td>Separator, Paragraph</td> + </tr> + <tr> + <td align="CENTER">Cc</td> + <td>Other, Control</td> + </tr> + <tr> + <td align="CENTER">Cf</td> + <td>Other, Format</td> + </tr> + <tr> + <td align="CENTER">Cs</td> + <td>Other, Surrogate</td> + </tr> + <tr> + <td align="CENTER">Co</td> + <td>Other, Private Use</td> + </tr> + <tr> + <td align="CENTER">Cn</td> + <td>Other, Not Assigned (no characters in the file have this property)</td> + </tr> +</table> +<h4>Informative Categories</h4> +<table border="0" cellspacing="2" cellpadding="0"> + <tr> + <th> + <p align="LEFT">Abbr.</th> + <th> + <p align="LEFT">Description</th> + </tr> + <tr> + <td align="CENTER">Lm</td> + <td>Letter, Modifier</td> + </tr> + <tr> + <td align="CENTER">Lo</td> + <td>Letter, Other</td> + </tr> + <tr> + <td align="CENTER">Pc</td> + <td>Punctuation, Connector</td> + </tr> + <tr> + <td align="CENTER">Pd</td> + <td>Punctuation, Dash</td> + </tr> + <tr> + <td align="CENTER">Ps</td> + <td>Punctuation, Open</td> + </tr> + <tr> + <td align="CENTER">Pe</td> + <td>Punctuation, Close</td> + </tr> + <tr> + <td align="CENTER">Pi</td> + <td>Punctuation, Initial quote (may behave like Ps or Pe depending on usage)</td> + </tr> + <tr> + <td align="CENTER">Pf</td> + <td>Punctuation, Final quote (may behave like Ps or Pe depending on usage)</td> + </tr> + <tr> + <td align="CENTER">Po</td> + <td>Punctuation, Other</td> + </tr> + <tr> + <td align="CENTER">Sm</td> + <td>Symbol, Math</td> + </tr> + <tr> + <td align="CENTER">Sc</td> + <td>Symbol, Currency</td> + </tr> + <tr> + <td align="CENTER">Sk</td> + <td>Symbol, Modifier</td> + </tr> + <tr> + <td align="CENTER">So</td> + <td>Symbol, Other</td> + </tr> +</table> +<h3><a name="Bidirectional Category"></a>Bidirectional Category</h3> +<p>Please refer to Chapter 3 for an explanation of the algorithm for +Bidirectional Behavior and an explanation of the significance of these +categories. An up-to-date version can be found on <a +href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9: +The Bidirectional Algorithm</a>. These values are normative.</p> +<table border="0" cellpadding="2"> + <tr> + <th valign="TOP" align="LEFT"> + <p align="LEFT">Type</th> + <th valign="TOP" align="LEFT"> + <p align="LEFT">Description</th> + </tr> + <tr> + <td valign="TOP"><b>L</b></td> + <td valign="TOP">Left-to-Right</td> + </tr> + <tr> + <td valign="TOP"><b>LRE</b></td> + <td valign="TOP">Left-to-Right Embedding</td> + </tr> + <tr> + <td valign="TOP"><b>LRO</b></td> + <td valign="TOP">Left-to-Right Override</td> + </tr> + <tr> + <td valign="TOP"><b>R</b></td> + <td valign="TOP">Right-to-Left</td> + </tr> + <tr> + <td valign="TOP"><b>AL</b></td> + <td valign="TOP">Right-to-Left Arabic</td> + </tr> + <tr> + <td valign="TOP"><b>RLE</b></td> + <td valign="TOP">Right-to-Left Embedding</td> + </tr> + <tr> + <td valign="TOP"><b>RLO</b></td> + <td valign="TOP">Right-to-Left Override</td> + </tr> + <tr> + <td valign="TOP"><b>PDF</b></td> + <td valign="TOP">Pop Directional Format</td> + </tr> + <tr> + <td valign="TOP"><b>EN</b></td> + <td valign="TOP">European Number</td> + </tr> + <tr> + <td valign="TOP"><b>ES</b></td> + <td valign="TOP">European Number Separator</td> + </tr> + <tr> + <td valign="TOP"><b>ET</b></td> + <td valign="TOP">European Number Terminator</td> + </tr> + <tr> + <td valign="TOP"><b>AN</b></td> + <td valign="TOP">Arabic Number</td> + </tr> + <tr> + <td valign="TOP"><b>CS</b></td> + <td valign="TOP">Common Number Separator</td> + </tr> + <tr> + <td valign="TOP"><b>NSM</b></td> + <td valign="TOP">Non-Spacing Mark</td> + </tr> + <tr> + <td valign="TOP"><b>BN</b></td> + <td valign="TOP">Boundary Neutral</td> + </tr> + <tr> + <td valign="TOP"><b>B</b></td> + <td valign="TOP">Paragraph Separator</td> + </tr> + <tr> + <td valign="TOP"><b>S</b></td> + <td valign="TOP">Segment Separator</td> + </tr> + <tr> + <td valign="TOP"><b>WS</b></td> + <td valign="TOP">Whitespace</td> + </tr> + <tr> + <td valign="TOP"><b>ON</b></td> + <td valign="TOP">Other Neutrals</td> + </tr> +</table> +<h3><a name="Character Decomposition"></a>Character Decomposition Mapping</h3> +<p>The decomposition is a normative property of a character. The tags supplied +with certain decomposition mappings generally indicate formatting information. +Where no such tag is given, the mapping is designated as canonical. Conversely, +the presence of a formatting tag also indicates that the mapping is a +compatibility mapping and not a canonical mapping. In the absence of other +formatting information in a compatibility mapping, the tag is used to +distinguish it from canonical mappings.</p> +<p>In some instances a canonical mapping or a compatibility mapping may consist +of a single character. For a canonical mapping, this indicates that the +character is a canonical equivalent of another single character. For a +compatibility mapping, this indicates that the character is a compatibility +equivalent of another single character. The compatibility formatting tags used +are:</p> +<table border="0" cellspacing="2" cellpadding="0"> + <tr> + <th>Tag</th> + <th> + <p align="LEFT">Description</th> + </tr> + <tr> + <td align="CENTER"><font> </td> + <td>A font variant (e.g. a blackletter form).</td> + </tr> + <tr> + <td align="CENTER"><noBreak> </td> + <td>A no-break version of a space or hyphen.</td> + </tr> + <tr> + <td align="CENTER"><initial> </td> + <td>An initial presentation form (Arabic).</td> + </tr> + <tr> + <td align="CENTER"><medial> </td> + <td>A medial presentation form (Arabic).</td> + </tr> + <tr> + <td align="CENTER"><final> </td> + <td>A final presentation form (Arabic).</td> + </tr> + <tr> + <td align="CENTER"><isolated> </td> + <td>An isolated presentation form (Arabic).</td> + </tr> + <tr> + <td align="CENTER"><circle> </td> + <td>An encircled form.</td> + </tr> + <tr> + <td align="CENTER"><super> </td> + <td>A superscript form.</td> + </tr> + <tr> + <td align="CENTER"><sub> </td> + <td>A subscript form.</td> + </tr> + <tr> + <td align="CENTER"><vertical> </td> + <td>A vertical layout presentation form.</td> + </tr> + <tr> + <td align="CENTER"><wide> </td> + <td>A wide (or zenkaku) compatibility character.</td> + </tr> + <tr> + <td align="CENTER"><narrow> </td> + <td>A narrow (or hankaku) compatibility character.</td> + </tr> + <tr> + <td align="CENTER"><small> </td> + <td>A small variant form (CNS compatibility).</td> + </tr> + <tr> + <td align="CENTER"><square> </td> + <td>A CJK squared font variant.</td> + </tr> + <tr> + <td align="CENTER"><fraction> </td> + <td>A vulgar fraction form.</td> + </tr> + <tr> + <td align="CENTER"><compat> </td> + <td>Otherwise unspecified compatibility character.</td> + </tr> +</table> +<p><b>Reminder: </b>There is a difference between decomposition and +decomposition mapping. The decomposition mappings are defined in the UnicodeData, +while the decomposition (also termed "full decomposition") is defined +in Chapter 3 to use those mappings <i>recursively.</i> +<ul> + <li>The canonical decomposition is formed by recursively applying the + canonical mappings, then applying the canonical reordering algorithm.</li> + <li>The compatibility decomposition is formed by recursively applying the + canonical <em>and</em> compatibility mappings, then applying the canonical + reordering algorithm.</li> +</ul> +<h3><a name="Canonical Combining Classes"></a>Canonical Combining Classes</h3> +<table border="0" cellspacing="2" cellpadding="0"> + <tr> + <th> + <p align="LEFT">Value</th> + <th> + <p align="LEFT">Description</th> + </tr> + <tr> + <td align="RIGHT">0:</td> + <td>Spacing, split, enclosing, reordrant, and Tibetan subjoined</td> + </tr> + <tr> + <td align="RIGHT">1:</td> + <td>Overlays and interior</td> + </tr> + <tr> + <td align="RIGHT">7:</td> + <td>Nuktas</td> + </tr> + <tr> + <td align="RIGHT">8:</td> + <td>Hiragana/Katakana voicing marks</td> + </tr> + <tr> + <td align="RIGHT">9:</td> + <td>Viramas</td> + </tr> + <tr> + <td align="RIGHT">10:</td> + <td>Start of fixed position classes</td> + </tr> + <tr> + <td align="RIGHT">199:</td> + <td>End of fixed position classes</td> + </tr> + <tr> + <td align="RIGHT">200:</td> + <td>Below left attached</td> + </tr> + <tr> + <td align="RIGHT">202:</td> + <td>Below attached</td> + </tr> + <tr> + <td align="RIGHT">204:</td> + <td>Below right attached</td> + </tr> + <tr> + <td align="RIGHT">208:</td> + <td>Left attached (reordrant around single base character)</td> + </tr> + <tr> + <td align="RIGHT">210:</td> + <td>Right attached</td> + </tr> + <tr> + <td align="RIGHT">212:</td> + <td>Above left attached</td> + </tr> + <tr> + <td align="RIGHT">214:</td> + <td>Above attached</td> + </tr> + <tr> + <td align="RIGHT">216:</td> + <td>Above right attached</td> + </tr> + <tr> + <td align="RIGHT">218:</td> + <td>Below left</td> + </tr> + <tr> + <td align="RIGHT">220:</td> + <td>Below</td> + </tr> + <tr> + <td align="RIGHT">222:</td> + <td>Below right</td> + </tr> + <tr> + <td align="RIGHT">224:</td> + <td>Left (reordrant around single base character)</td> + </tr> + <tr> + <td align="RIGHT">226:</td> + <td>Right</td> + </tr> + <tr> + <td align="RIGHT">228:</td> + <td>Above left</td> + </tr> + <tr> + <td align="RIGHT">230:</td> + <td>Above</td> + </tr> + <tr> + <td align="RIGHT">232:</td> + <td>Above right</td> + </tr> + <tr> + <td align="RIGHT">233:</td> + <td>Double below</td> + </tr> + <tr> + <td align="RIGHT">234:</td> + <td>Double above</td> + </tr> + <tr> + <td align="RIGHT">240:</td> + <td>Below (iota subscript)</td> + </tr> +</table> +<p><strong>Note: </strong>some of the combining classes in this list do not +currently have members but are specified here for completeness.</p> +<h3><a name="Decompositions and Normalization"></a>Decompositions and +Normalization</h3> +<p>Decomposition is specified in Chapter 3. <a +href="http://www.unicode.org/unicode/reports/tr15/"><i>Unicode Standard Annex +#15: Unicode Normalization Forms</i></a> specifies the interaction between decomposition +and normalization. The most up-to-date version is found on <a +href="http://www.unicode.org/unicode/reports/tr15/">http://www.unicode.org/unicode/reports/tr15/</a>. +That report specifies how the decompositions defined in UnicodeData.txt are used +to derive normalized forms of Unicode text.</p> +<p>Note that as of the 2.1.9 update of the Unicode Character Database, the +decompositions in the UnicodeData.txt file can be used to recursively derive the +full decomposition in canonical order, without the need to separately apply +canonical reordering. However, canonical reordering of combining character +sequences must still be applied in decomposition when normalizing source text +which contains any combining marks.</p> +<h3><a name="Case Mappings"></a>Case Mappings</h3> +<p>The case mapping is an informative, default mapping. Case itself, on the +other hand, has normative status. Thus, for example, 0041 LATIN CAPITAL LETTER A +is normatively uppercase, but its lowercase mapping the 0061 LATIN SMALL LETTER +A is informative. The reason for this is that case can be considered to be an +inherent property of a particular character (and is usually, but not always, +derivable from the presence of the terms "CAPITAL" or +"SMALL" in the character name), but case mappings between characters +are occasionally influenced by local conventions. For example, certain +languages, such as Turkish, German, French, or Greek may have small deviations +from the default mappings listed in UnicodeData.</p> +<p>In addition to uppercase and lowercase, because of the inclusion of certain +composite characters for compatibility, such as 01F1 LATIN CAPITAL LETTER DZ, +there is a third case, called <i>titlecase</i>, which is used where the first +letter of a word is to be capitalized (e.g. UPPERCASE, Titlecase, lowercase). An +example of such a titlecase letter is 01F2 LATIN CAPITAL LETTER D WITH SMALL +LETTER Z.</p> +<p>The uppercase, titlecase and lowercase fields are only included for +characters that have a single corresponding character of that type. Composite +characters (such as "339D SQUARE CM") that do not have a single +corresponding character of that type can be cased by decomposition.</p> +<p>For compatibility with existing parsers, UnicodeData only contains case +mappings for characters where they are one-to-one mappings; it also omits +information about context-sensitive case mappings. Information about these +special cases can be found in a separate data file, <a +href="http://www.unicode.org/Public/3.0-Update1/">SpecialCasing.txt</a>, which +has been added starting with the 2.1.8 update to the Unicode data files. +SpecialCasing.txt contains additional informative case mappings that are either +not one-to-one or which are context-sensitive.</p> +<h2><a name="Property Invariants"></a>Property Invariants</h2> +<p>Values in UnicodeData.txt are subject to correction as errors are found; +however, some characteristics of the categories themselves can be considered +invariants. Applications may wish to take these invariants into account when +choosing how to implement character properties. The following is a partial list +of known invariants for the Unicode Character Database.</p> +<h4>Database Fields</h4> +<ul> + <li>The number of fields in UnicodeData.txt is fixed.</li> + <li>The order of the fields is also fixed. + <ul> + <li>Any additional information about character properties to be added in + the future will appear in separate data tables, rather than being added + on to the existing table or by subdivision or reinterpretation of + existing fields.</li> + </ul> + </li> +</ul> +<h4>General Category</h4> +<ul> + <li>There will never be more than 32 General Category values. + <ul> + <li>It is very unlikely that the Unicode Technical Committee will + subdivide the General Category partition any further, since that can + cause implementations to misbehave. Because the General Category is + limited to 32 values, 5 bits can be used to represent the information, + and a 32-bit integer can be used as a bitmask to represent arbitrary + sets of categories.</li> + </ul> + </li> +</ul> +<h4>Combining Classes</h4> +<ul> + <li>Combining classes are limited to the values 0 to 255. + <ul> + <li>In practice, there are far fewer than 256 values used. Implementations + may take advantage of this fact for compression, since only the ordering + of the non-zero values matters for the Canonical Reordering Algorithm. + It is possible for up to 256 values to be used in the future; however, + UTC decisions in the future may restrict the number of values to 128, + since this has implementation advantages. [Signed bytes can be used + without widening to ints in Java, for example.]</li> + </ul> + </li> + <li>All characters other than those of General Category M* have the combining + class 0. + <ul> + <li>Currently, all characters other than those of General Category Mn have + the value 0. However, some characters of General Category Me or Mc may + be given non-zero values in the future.</li> + <li>The precise values above the value 0 are not invariant--only the + relative ordering is considered normative. For example, it is not + guaranteed in future versions that the class of U+05B4 will be precisely + 14.</li> + </ul> + </li> +</ul> +<h4>Case</h4> +<ul> + <li>Characters of type Lu, Lt, or Ll are called <i>cased</i>. All characters + with an Upper, Lower, or Titlecase mapping are cased characters. + <ul> + <li>However, characters with the General Categories of Lu, Ll, or Lt may + not always have case mappings, and case mappings may vary by locale. + (See http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt).</li> + </ul> + </li> +</ul> +<h4>Canonical Decomposition</h4> +<ul> + <li>Canonical mappings are always in canonical order.</li> + <li>Canonical mappings have only the first of a pair possibly further + decomposing.</li> + <li>Canonical decompositions are "transparent" to other character + data: + <ul> + <li><tt>BIDI(a) = BIDI(principal(canonicalDecomposition(a))</tt></li> + <li><tt>Category(a) = Category(principal(canonicalDecomposition(a))</tt></li> + <li><tt>CombiningClass(a) = + CombiningClass(principal(canonicalDecomposition(a))</tt><br> + where principal(a) is the first character not of type Mn, or the first + character if all characters are of type Mn.</li> + </ul> + </li> + <li>However, because there are sometimes missing case pairs, and because of + some legacy characters, it is only generally true that: + <ul> + <li><tt>upper(canonicalDecomposition(a)) = canonicalDecomposition(upper(a))</tt></li> + <li><tt>lower(canonicalDecomposition(a)) = canonicalDecomposition(lower(a))</tt></li> + <li><tt>title(canonicalDecomposition(a)) = canonicalDecomposition(title(a))</tt></li> + </ul> + </li> +</ul> +<h2><a name="Modification History"></a>Modification History</h2> +<p>This section provides a summary of the changes between update versions of the +Unicode Standard.</p> +<h3><a +href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 3.0.1">Unicode +3.0.1</a></h3> +<p>Modifications made for Version 3.0.1 of UnicodeData.txt include: +<ul> + <li>Added 5- and 6-digit representation of code points past U+FFFF.</li> + <li>Added Private Use range definitions for Planes 15 and 16.</li> + <li>Minor additions for the 10646 comment field.</li> +</ul> +<h3><a +href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 3.0.0">Unicode +3.0.0</a></h3> +<p>Modifications made for Version 3.0.0 of UnicodeData.txt include many new +characters and a number of property changes. These are summarized in Appendex D +of <em>The Unicode Standard, Version 3.0.</em></p> +<h3><a +href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.9">Unicode +2.1.9</a></h3> +<p>Modifications made for Version 2.1.9 of UnicodeData.txt include: +<ul> + <li>Corrected combining class for U+05AE HEBREW ACCENT ZINOR.</li> + <li>Corrected combining class for U+20E1 COMBINING LEFT RIGHT ARROW ABOVE</li> + <li>Corrected combining class for U+0F35 and U+0F37 to 220.</li> + <li>Corrected combining class for U+0F71 to 129.</li> + <li>Added a decomposition for U+0F0C TIBETAN MARK DELIMITER TSHEG BSTAR.</li> + <li>Added decompositions for several Greek symbol letters: + U+03D0..U+03D2, U+03D5, U+03D6, U+03F0..U+03F2.</li> + <li>Removed decompositions from the conjoining jamo block: + U+1100..U+11F8.</li> + <li>Changes to decomposition mappings for some Tibetan vowels for consistency + in normalization. (U+0F71, U+0F73, U+0F77, U+0F79, U+0F81)</li> + <li>Updated the decomposition mappings for several Vietnamese characters with + two diacritics (U+1EAC, U+1EAD, U+1EB6, U+1EB7, U+1EC6, U+1EC7, U+1ED8, + U+1ED9), so that the recursive decomposition can be generated directly in + canonically reordered form (not a normative change).</li> + <li>Updated the decomposition mappings for several Arabic compatibility + characters involving shadda (U+FC5E..U+FC62, U+FCF2..U+FCF4), and two Latin + characters (U+1E1C, U+1E1D), so that the decompositions are generated + directly in canonically reordered form (not a normative change).</li> + <li>Changed BIDI category for: U+00A0 NO-BREAK SPACE, U+2007 FIGURE SPACE, + U+2028 LINE SEPARATOR.</li> + <li>Changed BIDI category for extenders of General Category Lm: U+3005, + U+3021..U+3035, U+FF9E, U+FF9F.</li> + <li>Changed General Category and BIDI category for the Greek numeral signs: + U+0374, U+0375.</li> + <li>Corrected General Category for U+FFE8 HALFWIDTH FORMS LIGHT VERTICAL.</li> + <li>Added Unicode 1.0 names for many Tibetan characters (informative).</li> +</ul> +<h3><a +href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.8">Unicode +2.1.8</a></h3> +<p>Modifications made for Version 2.1.8 of UnicodeData.txt include: +<ul> + <li>Added combining class 240 for U+0345 COMBINING GREEK YPOGEGRAMMENI so that + decompositions involving iota subscript are derivable directly in + canonically reordered form; this also has a bearing on simplification of + casing of polytonic Greek.</li> + <li>Changes in decompositions related to Greek tonos. These result from the + clarification that monotonic Greek "tonos" should be equated with + U+0301 COMBINING ACUTE, rather than with U+030D COMBINING VERTICAL LINE + ABOVE. (All Greek characters in the Greek block involving "tonos"; + some Greek characters in the polytonic Greek in the 1FXX block.)</li> + <li>Changed decompositions involving dialytika tonos. (U+0390, U+03B0)</li> + <li>Changed ternary decompositions to binary. (U+0CCB, U+FB2C, U+FB2D) These + changes simplify normalization.</li> + <li>Removed canonical decomposition for Latin Candrabindu. (U+0310)</li> + <li>Corrected error in canonical decomposition for U+1FF4.</li> + <li>Added compatibility decompositions to clarify collation tables. (U+2100, + U+2101, U+2105, U+2106, U+1E9A)</li> + <li>A series of general category changes to assist the convergence of of + Unicode definition of identifier with ISO TR 10176: + <ul> + <li>So > Lo: U+0950, U+0AD0, U+0F00, U+0F88..U+0F8B</li> + <li>Po > Lo: U+0E2F, U+0EAF, U+3006</li> + <li>Lm > Sk: U+309B, U+309C</li> + <li>Po > Pc: U+30FB, U+FF65</li> + <li>Ps/Pe > Mn: U+0F3E, U+0F3F</li> + </ul> + </li> + <li>A series of bidi property changes for consistency. + <ul> + <li>L > ET: U+09F2, U+09F3</li> + <li>ON > L: U+3007</li> + <li>L > ON: U+0F3A..U+0F3D, U+037E, U+0387</li> + </ul> + </li> + <li>Add case mapping: U+01A6 <-> U+0280</li> + <li>Updated symmetric swapping value for guillemets: U+00AB, U+00BB, U+2039, + U+203A.</li> + <li>Changes to combining class values. Most Indic fixed position class + non-spacing marks were changed to combining class 0. This fixes some + inconsistencies in how canonical reordering would apply to Indic scripts, + including Tibetan. Indic interacting top/bottom fixed position classes were + merged into single (non-zero) classes as part of this change. Tibetan + subjoined consonants are changed from combining class 6 to combining class + 0. Thai pinthu (U+0E3A) moved to combining class 9. Moved two Devanagari + stress marks into generic above and below combining classes (U+0951, + U+0952).</li> + <li>Corrected placement of semicolon near symmetric swapping field. (U+FA0E, + etc., scattered positions to U+FA29)</li> +</ul> +<h3>Version 2.1.7</h3> +<p><i>This version was for internal change tracking only, and never publicly +released.</i></p> +<h3>Version 2.1.6</h3> +<p><i>This version was for internal change tracking only, and never publicly +released.</i></p> +<h3><a +href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.5">Unicode +2.1.5</a></h3> +<p>Modifications made for Version 2.1.5 of UnicodeData.txt include: +<ul> + <li>Changed decomposition for U+FF9E and U+FF9F so that correct collation + weighting will automatically result from the canonical equivalences.</li> + <li>Removed canonical decompositions for U+04D4, U+04D5, U+04D8, U+04D9, + U+04E0, U+04E1, U+04E8, U+04E9 (the implication being that no canonical + equivalence is claimed between these 8 characters and similar Latin + letters), and updated 4 canonical decompositions for U+04DB, U+04DC, U+04EA, + U+04EB to reflect the implied difference in the base character.</li> + <li>Added Pi, and Pf categories and assigned the relevant quotation marks to + those categories, based on the Unicode Technical Corrigendum on Quotation + Characters.</li> + <li>Updating of many bidi properties, following the advice of the ad hoc + committee on bidi, and to make the bidi properties of compatibility + characters more consistent.</li> + <li>Changed category of several Tibetan characters: U+0F3E, U+0F3F, + U+0F88..U+0F8B to make them non-combining, reflecting the combined opinion + of Tibetan experts.</li> + <li>Added case mapping for U+03F2.</li> + <li>Corrected case mapping for U+0275.</li> + <li>Added titlecase mappings for U+03D0, U+03D1, U+03D5, U+03D6, U+03F0.. + U+03F2.</li> + <li>Corrected compatibility label for U+2121.</li> + <li>Add specific entries for all the CJK compatibility ideographs, + U+F900..U+FA2D, so the canonical decomposition for each (the URO character + it is equivalent to) can be carried in the database.</li> +</ul> +<h3>Version 2.1.4</h3> +<p><i>This version was for internal change tracking only, and never publicly +released.</i></p> +<h3>Version 2.1.3</h3> +<p><i>This version was for internal change tracking only, and never publicly +released.</i></p> +<h3><a +href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.2">Unicode +2.1.2</a></h3> +<p>Modifications made in updating UnicodeData.txt to Version 2.1.2 for the +Unicode Standard, Version 2.1 (from Version 2.0) include: +<ul> + <li>Added two characters (U+20AC and U+FFFC).</li> + <li>Amended bidi properties for U+0026, U+002E, U+0040, U+2007.</li> + <li>Corrected case mappings for U+018E, U+019F, U+01DD, U+0258, U+0275, + U+03C2, U+1E9B.</li> + <li>Changed combining order class for U+0F71.</li> + <li>Corrected canonical decompositions for U+0F73, U+1FBE.</li> + <li>Changed decomposition for U+FB1F from compatibility to canonical.</li> + <li>Added compatibility decompositions for U+FBE8, U+FBE9, U+FBF9..U+FBFB.</li> + <li>Corrected compatibility decompositions for U+2469, U+246A, U+3358.</li> +</ul> +<h3>Version 2.1.1</h3> +<p><i>This version was for internal change tracking only, and never publicly +released.</i></p> +<h3><a +href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.0.0">Unicode +2.0.0</a></h3> +<p>The modifications made in updating UnicodeData.txt for the Unicode Standard, +Version 2.0 include: +<ul> + <li>Fixed decompositions with TONOS to use correct NSM: 030D.</li> + <li>Removed old Hangul Syllables; mapping to new characters are in a separate + table.</li> + <li>Marked compatibility decompositions with additional tags.</li> + <li>Changed old tag names for clarity.</li> + <li>Revision of decompositions to use first-level decomposition, instead of + maximal decomposition.</li> + <li>Correction of all known errors in decompositions from earlier versions.</li> + <li>Added control code names (as old Unicode names).</li> + <li>Added Hangul Jamo decompositions.</li> + <li>Added Number category to match properties list in book.</li> + <li>Fixed categories of Koranic Arabic marks.</li> + <li>Fixed categories of precomposed characters to match decomposition where + possible.</li> + <li>Added Hebrew cantillation marks and the Tibetan script.</li> + <li>Added place holders for ranges such as CJK Ideographic Area and the + Private Use Area.</li> + <li>Added categories Me, Sk, Pc, Nl, Cs, Cf, and rectified a number of + mistakes in the database.</li> +</ul> + +</body> + +</html> diff --git a/lib/unicode/Unicode.300 b/lib/unicode/Unicode.301 index 6a54d3d74e..6804a75065 100644 --- a/lib/unicode/Unicode.300 +++ b/lib/unicode/Unicode.301 @@ -130,7 +130,7 @@ 0081;<control>;Cc;0;BN;;;;;N;;;;; 0082;<control>;Cc;0;BN;;;;;N;BREAK PERMITTED HERE;;;; 0083;<control>;Cc;0;BN;;;;;N;NO BREAK HERE;;;; -0084;<control>;Cc;0;BN;;;;;N;INDEX;;;; +0084;<control>;Cc;0;BN;;;;;N;;;;; 0085;<control>;Cc;0;B;;;;;N;NEXT LINE;;;; 0086;<control>;Cc;0;BN;;;;;N;START OF SELECTED AREA;;;; 0087;<control>;Cc;0;BN;;;;;N;END OF SELECTED AREA;;;; @@ -420,7 +420,7 @@ 01A3;LATIN SMALL LETTER OI;Ll;0;L;;;;;N;LATIN SMALL LETTER O I;gha;01A2;;01A2 01A4;LATIN CAPITAL LETTER P WITH HOOK;Lu;0;L;;;;;N;LATIN CAPITAL LETTER P HOOK;;;01A5; 01A5;LATIN SMALL LETTER P WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER P HOOK;;01A4;;01A4 -01A6;LATIN LETTER YR;Lu;0;L;;;;;N;LATIN LETTER Y R;;;0280; +01A6;LATIN LETTER YR;Lu;0;L;;;;;N;LATIN LETTER Y R;*;;0280; 01A7;LATIN CAPITAL LETTER TONE TWO;Lu;0;L;;;;;N;;;;01A8; 01A8;LATIN SMALL LETTER TONE TWO;Ll;0;L;;;;;N;;;01A7;;01A7 01A9;LATIN CAPITAL LETTER ESH;Lu;0;L;;;;;N;;;;0283; @@ -608,7 +608,7 @@ 027D;LATIN SMALL LETTER R WITH TAIL;Ll;0;L;;;;;N;LATIN SMALL LETTER R HOOK;;;; 027E;LATIN SMALL LETTER R WITH FISHHOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER FISHHOOK R;;;; 027F;LATIN SMALL LETTER REVERSED R WITH FISHHOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER REVERSED FISHHOOK R;;;; -0280;LATIN LETTER SMALL CAPITAL R;Ll;0;L;;;;;N;;;01A6;;01A6 +0280;LATIN LETTER SMALL CAPITAL R;Ll;0;L;;;;;N;;*;01A6;;01A6 0281;LATIN LETTER SMALL CAPITAL INVERTED R;Ll;0;L;;;;;N;;;;; 0282;LATIN SMALL LETTER S WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER S HOOK;;;; 0283;LATIN SMALL LETTER ESH;Ll;0;L;;;;;N;;;01A9;;01A9 @@ -2639,16 +2639,16 @@ 0F27;TIBETAN DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 0F28;TIBETAN DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 0F29;TIBETAN DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; -0F2A;TIBETAN DIGIT HALF ONE;No;0;L;;;;;N;;;;; -0F2B;TIBETAN DIGIT HALF TWO;No;0;L;;;;;N;;;;; -0F2C;TIBETAN DIGIT HALF THREE;No;0;L;;;;;N;;;;; -0F2D;TIBETAN DIGIT HALF FOUR;No;0;L;;;;;N;;;;; -0F2E;TIBETAN DIGIT HALF FIVE;No;0;L;;;;;N;;;;; -0F2F;TIBETAN DIGIT HALF SIX;No;0;L;;;;;N;;;;; -0F30;TIBETAN DIGIT HALF SEVEN;No;0;L;;;;;N;;;;; -0F31;TIBETAN DIGIT HALF EIGHT;No;0;L;;;;;N;;;;; -0F32;TIBETAN DIGIT HALF NINE;No;0;L;;;;;N;;;;; -0F33;TIBETAN DIGIT HALF ZERO;No;0;L;;;;;N;;;;; +0F2A;TIBETAN DIGIT HALF ONE;No;0;L;;;;1/2;N;;;;; +0F2B;TIBETAN DIGIT HALF TWO;No;0;L;;;;3/2;N;;;;; +0F2C;TIBETAN DIGIT HALF THREE;No;0;L;;;;5/2;N;;;;; +0F2D;TIBETAN DIGIT HALF FOUR;No;0;L;;;;7/2;N;;;;; +0F2E;TIBETAN DIGIT HALF FIVE;No;0;L;;;;9/2;N;;;;; +0F2F;TIBETAN DIGIT HALF SIX;No;0;L;;;;11/2;N;;;;; +0F30;TIBETAN DIGIT HALF SEVEN;No;0;L;;;;13/2;N;;;;; +0F31;TIBETAN DIGIT HALF EIGHT;No;0;L;;;;15/2;N;;;;; +0F32;TIBETAN DIGIT HALF NINE;No;0;L;;;;17/2;N;;;;; +0F33;TIBETAN DIGIT HALF ZERO;No;0;L;;;;-1/2;N;;;;; 0F34;TIBETAN MARK BSDUS RTAGS;So;0;L;;;;;N;;du ta;;; 0F35;TIBETAN MARK NGAS BZUNG NYI ZLA;Mn;220;NSM;;;;;N;TIBETAN HONORIFIC UNDER RING;nge zung nyi da;;; 0F36;TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN;So;0;L;;;;;N;;dzu ta shi mig chen;;; @@ -2789,7 +2789,7 @@ 0FCA;TIBETAN SYMBOL NOR BU NYIS -KHYIL;So;0;L;;;;;N;;norbu nyi khyi;;; 0FCB;TIBETAN SYMBOL NOR BU GSUM -KHYIL;So;0;L;;;;;N;;norbu sum khyi;;; 0FCC;TIBETAN SYMBOL NOR BU BZHI -KHYIL;So;0;L;;;;;N;;norbu shi khyi;;; -0FCF;TIBETAN SIGN RDEL NAG GSUM;So;0;L;;;;;N;;;;; +0FCF;TIBETAN SIGN RDEL NAG GSUM;So;0;L;;;;;N;;dena sum;;; 1000;MYANMAR LETTER KA;Lo;0;L;;;;;N;;;;; 1001;MYANMAR LETTER KHA;Lo;0;L;;;;;N;;;;; 1002;MYANMAR LETTER GA;Lo;0;L;;;;;N;;;;; @@ -7518,10 +7518,10 @@ 318E;HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;; 3190;IDEOGRAPHIC ANNOTATION LINKING MARK;So;0;L;;;;;N;KANBUN TATETEN;Kanbun Tateten;;; 3191;IDEOGRAPHIC ANNOTATION REVERSE MARK;So;0;L;;;;;N;KAERITEN RE;Kaeriten;;; -3192;IDEOGRAPHIC ANNOTATION ONE MARK;No;0;L;<super> 4E00;;;;N;KAERITEN ITI;Kaeriten;;; -3193;IDEOGRAPHIC ANNOTATION TWO MARK;No;0;L;<super> 4E8C;;;;N;KAERITEN NI;Kaeriten;;; -3194;IDEOGRAPHIC ANNOTATION THREE MARK;No;0;L;<super> 4E09;;;;N;KAERITEN SAN;Kaeriten;;; -3195;IDEOGRAPHIC ANNOTATION FOUR MARK;No;0;L;<super> 56DB;;;;N;KAERITEN SI;Kaeriten;;; +3192;IDEOGRAPHIC ANNOTATION ONE MARK;No;0;L;<super> 4E00;;;1;N;KAERITEN ITI;Kaeriten;;; +3193;IDEOGRAPHIC ANNOTATION TWO MARK;No;0;L;<super> 4E8C;;;2;N;KAERITEN NI;Kaeriten;;; +3194;IDEOGRAPHIC ANNOTATION THREE MARK;No;0;L;<super> 4E09;;;3;N;KAERITEN SAN;Kaeriten;;; +3195;IDEOGRAPHIC ANNOTATION FOUR MARK;No;0;L;<super> 56DB;;;4;N;KAERITEN SI;Kaeriten;;; 3196;IDEOGRAPHIC ANNOTATION TOP MARK;So;0;L;<super> 4E0A;;;;N;KAERITEN ZYOU;Kaeriten;;; 3197;IDEOGRAPHIC ANNOTATION MIDDLE MARK;So;0;L;<super> 4E2D;;;;N;KAERITEN TYUU;Kaeriten;;; 3198;IDEOGRAPHIC ANNOTATION BOTTOM MARK;So;0;L;<super> 4E0B;;;;N;KAERITEN GE;Kaeriten;;; @@ -7585,16 +7585,16 @@ 321A;PARENTHESIZED HANGUL PHIEUPH A;So;0;L;<compat> 0028 1111 1161 0029;;;;N;PARENTHESIZED HANGUL PA;;;; 321B;PARENTHESIZED HANGUL HIEUH A;So;0;L;<compat> 0028 1112 1161 0029;;;;N;PARENTHESIZED HANGUL HA;;;; 321C;PARENTHESIZED HANGUL CIEUC U;So;0;L;<compat> 0028 110C 116E 0029;;;;N;PARENTHESIZED HANGUL JU;;;; -3220;PARENTHESIZED IDEOGRAPH ONE;No;0;L;<compat> 0028 4E00 0029;;;;N;;;;; -3221;PARENTHESIZED IDEOGRAPH TWO;No;0;L;<compat> 0028 4E8C 0029;;;;N;;;;; -3222;PARENTHESIZED IDEOGRAPH THREE;No;0;L;<compat> 0028 4E09 0029;;;;N;;;;; -3223;PARENTHESIZED IDEOGRAPH FOUR;No;0;L;<compat> 0028 56DB 0029;;;;N;;;;; -3224;PARENTHESIZED IDEOGRAPH FIVE;No;0;L;<compat> 0028 4E94 0029;;;;N;;;;; -3225;PARENTHESIZED IDEOGRAPH SIX;No;0;L;<compat> 0028 516D 0029;;;;N;;;;; -3226;PARENTHESIZED IDEOGRAPH SEVEN;No;0;L;<compat> 0028 4E03 0029;;;;N;;;;; -3227;PARENTHESIZED IDEOGRAPH EIGHT;No;0;L;<compat> 0028 516B 0029;;;;N;;;;; -3228;PARENTHESIZED IDEOGRAPH NINE;No;0;L;<compat> 0028 4E5D 0029;;;;N;;;;; -3229;PARENTHESIZED IDEOGRAPH TEN;No;0;L;<compat> 0028 5341 0029;;;;N;;;;; +3220;PARENTHESIZED IDEOGRAPH ONE;No;0;L;<compat> 0028 4E00 0029;;;1;N;;;;; +3221;PARENTHESIZED IDEOGRAPH TWO;No;0;L;<compat> 0028 4E8C 0029;;;2;N;;;;; +3222;PARENTHESIZED IDEOGRAPH THREE;No;0;L;<compat> 0028 4E09 0029;;;3;N;;;;; +3223;PARENTHESIZED IDEOGRAPH FOUR;No;0;L;<compat> 0028 56DB 0029;;;4;N;;;;; +3224;PARENTHESIZED IDEOGRAPH FIVE;No;0;L;<compat> 0028 4E94 0029;;;5;N;;;;; +3225;PARENTHESIZED IDEOGRAPH SIX;No;0;L;<compat> 0028 516D 0029;;;6;N;;;;; +3226;PARENTHESIZED IDEOGRAPH SEVEN;No;0;L;<compat> 0028 4E03 0029;;;7;N;;;;; +3227;PARENTHESIZED IDEOGRAPH EIGHT;No;0;L;<compat> 0028 516B 0029;;;8;N;;;;; +3228;PARENTHESIZED IDEOGRAPH NINE;No;0;L;<compat> 0028 4E5D 0029;;;9;N;;;;; +3229;PARENTHESIZED IDEOGRAPH TEN;No;0;L;<compat> 0028 5341 0029;;;10;N;;;;; 322A;PARENTHESIZED IDEOGRAPH MOON;So;0;L;<compat> 0028 6708 0029;;;;N;;;;; 322B;PARENTHESIZED IDEOGRAPH FIRE;So;0;L;<compat> 0028 706B 0029;;;;N;;;;; 322C;PARENTHESIZED IDEOGRAPH WATER;So;0;L;<compat> 0028 6C34 0029;;;;N;;;;; @@ -10615,3 +10615,7 @@ FFFA;INTERLINEAR ANNOTATION SEPARATOR;Cf;0;BN;;;;;N;;;;; FFFB;INTERLINEAR ANNOTATION TERMINATOR;Cf;0;BN;;;;;N;;;;; FFFC;OBJECT REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; +F0000;<Plane 15 Private Use, First>;Co;0;L;;;;;N;;;;; +FFFFD;<Plane 15 Private Use, Last>;Co;0;L;;;;;N;;;;; +100000;<Plane 16 Private Use, First>;Co;0;L;;;;;N;;;;; +10FFFD;<Plane 16 Private Use, Last>;Co;0;L;;;;;N;;;;; diff --git a/lib/unicode/Unicode3.html b/lib/unicode/Unicode3.html deleted file mode 100644 index a08a25ec75..0000000000 --- a/lib/unicode/Unicode3.html +++ /dev/null @@ -1,1988 +0,0 @@ -<html> - - - -<head> - -<meta NAME="GENERATOR" CONTENT="Microsoft FrontPage 4.0"> - -<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> - -<link REL="stylesheet" HREF="http://www.unicode.org/unicode.css" TYPE="text/css"> - -<title>UnicodeData File Format</title> - -</head> - - - -<body> - - - -<h1>UnicodeData File Format<br> -Version 3.0.0</h1> - - - -<table BORDER="1" CELLSPACING="2" CELLPADDING="0" HEIGHT="87" WIDTH="100%"> - - <tr> - - <td VALIGN="TOP" width="144">Revision</td> - - <td VALIGN="TOP">3.0.0</td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">Authors</td> - - <td VALIGN="TOP">Mark Davis and Ken Whistler</td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">Date</td> - - <td VALIGN="TOP">1999-09-12</td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">This Version</td> - - <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">Previous Version</td> - - <td VALIGN="TOP">n/a</td> - - </tr> - - <tr> - - <td VALIGN="TOP" width="144">Latest Version</td> - - <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td> - - </tr> - -</table> - - - -<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.<br> - -<i>For more information, including Disclamer and Limitations, see <a HREF="UnicodeCharacterDatabase-3.0.0.html">UnicodeCharacterDatabase-3.0.0.html</a> </i></p> - - - -<p>This document describes the format of the UnicodeData.txt file, which is one of the - -files in the Unicode Character Database. The document is divided into the following - -sections: - - - -<ul> - - <li><a HREF="#Field Formats">Field Formats</a> <ul> - - <li><a HREF="#General Category">General Category</a> </li> - - <li><a HREF="#Bidirectional Category">Bidirectional Category</a> </li> - - <li><a HREF="#Character Decomposition">Character Decomposition Mapping</a> </li> - - <li><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </li> - - <li><a HREF="#Decompositions and Normalization">Decompositions and Normalization</a> </li> - - <li><a HREF="#Case Mappings">Case Mappings</a> </li> - - </ul> - - </li> - - <li><a HREF="#Property Invariants">Property Invariants</a> </li> - - <li><a HREF="#Modification History">Modification History</a> </li> - -</ul> - - - -<p><b>Warning: </b>the information in this file does not completely describe the use and - -interpretation of Unicode character properties and behavior. It must be used in - -conjunction with the data in the other files in the Unicode Character Database, and relies - -on the notation and definitions supplied in <i><a href="http://www.unicode.org/unicode/standard/versions/Unicode3.0.html"> The Unicode -Standard</a></i>. All chapter references - -are to Version 3.0 of the standard.</p> - - - -<h2><a NAME="Field Formats"></a>Field Formats</h2> - - - -<p>The file consists of lines containing fields terminated by semicolons. Each line - -represents the data for one encoded character in the Unicode Standard. Every encoded - -character has a data entry, with the exception of certain special ranges, as detailed - -below. - - - -<ul> - - <li>There are six special ranges of characters that are represented only by their start and - - end characters, since the properties in the file are uniform, except for code values - - (which are all sequential and assigned). </li> - - <li>The names of CJK ideograph characters and the names and decompositions of Hangul - - syllable characters are algorithmically derivable. (See the Unicode Standard and <a - - HREF="http://www.unicode.org/unicode/reports/tr15/">Unicode Technical Report #15</a> for - - more information). </li> - - <li>Surrogate code values and private use characters have no names. </li> - - <li>The Private Use character outside of the BMP (U+F0000..U+FFFFD, U+100000..U+10FFFD) are - - not listed. These correspond to surrogate pairs where the first surrogate is in the High - - Surrogate Private Use section. </li> - -</ul> - - - -<p>The exact ranges represented by start and end characters are: - - - -<ul> - - <li>CJK Ideographs Extension A (U+3400 - U+4DB5) </li> - - <li>CJK Ideographs (U+4E00 - U+9FA5) </li> - - <li>Hangul Syllables (U+AC00 - U+D7A3) </li> - - <li>Non-Private Use High Surrogates (U+D800 - U+DB7F) </li> - - <li>Private Use High Surrogates (U+DB80 - U+DBFF) </li> - - <li>Low Surrogates (U+DC00 - U+DFFF) </li> - - <li>The Private Use Area (U+E000 - U+F8FF) </li> - -</ul> - - - -<p>The following table describes the format and meaning of each field in a data entry in - -the UnicodeData file. Fields which contain normative information are so indicated.</p> - - - -<table BORDER="1" CELLSPACING="2" CELLPADDING="2"> - - <tr> - - <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Field</th> - - <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Name</th> - - <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Status</th> - - <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Explanation</th> - - </tr> - - <tr> - - <th VALIGN="top">0</th> - - <td VALIGN="top">Code value</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">Code value in 4-digit hexadecimal format.</td> - - </tr> - - <tr> - - <th VALIGN="top">1</th> - - <td VALIGN="top">Character name</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">These names match exactly the names published in Chapter 14 of the - - Unicode Standard, Version 3.0.</td> - - </tr> - - <tr> - - <th VALIGN="top">2</th> - - <td VALIGN="top"><a HREF="#General Category">General Category</a> </td> - - <td VALIGN="top">normative / informative<br> - - (see below)</td> - - <td VALIGN="top">This is a useful breakdown into various "character types" which - - can be used as a default categorization in implementations. See below for a brief - - explanation.</td> - - </tr> - - <tr> - - <th VALIGN="top">3</th> - - <td VALIGN="top"><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">The classes used for the Canonical Ordering Algorithm in the Unicode - - Standard. These classes are also printed in Chapter 4 of the Unicode Standard.</td> - - </tr> - - <tr> - - <th VALIGN="top">4</th> - - <td VALIGN="top"><a HREF="#Bidirectional Category">Bidirectional Category</a> </td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">See the list below for an explanation of the abbreviations used in this - - field. These are the categories required by the Bidirectional Behavior Algorithm in the - - Unicode Standard. These categories are summarized in Chapter 3 of the Unicode Standard.</td> - - </tr> - - <tr> - - <th VALIGN="top">5</th> - - <td VALIGN="top"><a HREF="#Character Decomposition">Character Decomposition - Mapping</a></td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">In the Unicode Standard, not all of the mappings are full (maximal) - - decompositions. Recursive application of look-up for decompositions will, in all cases, - - lead to a maximal decomposition. The decomposition mappings match exactly the - - decomposition mappings published with the character names in the Unicode Standard.</td> - - </tr> - - <tr> - - <th VALIGN="top">6</th> - - <td VALIGN="top">Decimal digit value</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">This is a numeric field. If the character has the decimal digit property, - - as specified in Chapter 4 of the Unicode Standard, the value of that digit is represented - - with an integer value in this field</td> - - </tr> - - <tr> - - <th VALIGN="top">7</th> - - <td VALIGN="top">Digit value</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">This is a numeric field. If the character represents a digit, not - - necessarily a decimal digit, the value is here. This covers digits which do not form - - decimal radix forms, such as the compatibility superscript digits</td> - - </tr> - - <tr> - - <th VALIGN="top">8</th> - - <td VALIGN="top">Numeric value</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">This is a numeric field. If the character has the numeric property, as - - specified in Chapter 4 of the Unicode Standard, the value of that character is represented - - with an integer or rational number in this field. This includes fractions as, e.g., - - "1/5" for U+2155 VULGAR FRACTION ONE FIFTH Also included are numerical values - - for compatibility characters such as circled numbers.</td> - - </tr> - - <tr> - - <th VALIGN="top">8</th> - - <td VALIGN="top">Mirrored</td> - - <td VALIGN="top">normative</td> - - <td VALIGN="top">If the character has been identified as a "mirrored" character - - in bidirectional text, this field has the value "Y"; otherwise "N". - - The list of mirrored characters is also printed in Chapter 4 of the Unicode Standard.</td> - - </tr> - - <tr> - - <th VALIGN="top">10</th> - - <td VALIGN="top">Unicode 1.0 Name</td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">This is the old name as published in Unicode 1.0. This name is only - - provided when it is significantly different from the Unicode 3.0 name for the character.</td> - - </tr> - - <tr> - - <th VALIGN="top">11</th> - - <td VALIGN="top">10646 comment field</td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">This is the ISO 10646 comment field. It is in parantheses in the 10646 - - names list.</td> - - </tr> - - <tr> - - <th VALIGN="top">12</th> - - <td VALIGN="top"><a HREF="#Case Mappings">Uppercase Mapping</a></td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">Upper case equivalent mapping. If a character is part of an alphabet with - - case distinctions, and has an upper case equivalent, then the upper case equivalent is in - - this field. See the explanation below on case distinctions. These mappings are always - - one-to-one, not one-to-many or many-to-one. This field is informative.</td> - - </tr> - - <tr> - - <th VALIGN="top">13</th> - - <td VALIGN="top"><a HREF="#Case Mappings">Lowercase Mapping</a></td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">Similar to Uppercase mapping</td> - - </tr> - - <tr> - - <th VALIGN="top">14</th> - - <td VALIGN="top"><a HREF="#Case Mappings">Titlecase Mapping</a></td> - - <td VALIGN="top">informative</td> - - <td VALIGN="top">Similar to Uppercase mapping</td> - - </tr> - -</table> - - - -<h3><a NAME="General Category"></a>General Category</h3> - - - -<p>The values in this field are abbreviations for the following. Some of the values are - -normative, and some are informative. For more information, see the Unicode Standard.</p> - - - -<p><b>Note:</b> the standard does not assign information to control characters (except for - -certain cases in the Bidirectional Algorithm). Implementations will generally also assign - -categories to certain control characters, notably CR and LF, according to platform - -conventions.</p> - - - -<h4>Normative Categories</h4> - - - -<table BORDER="0" CELLSPACING="2" CELLPADDING="0"> - - <tr> - - <th><p ALIGN="LEFT">Abbr.</th> - - <th><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td ALIGN="CENTER">Lu</td> - - <td>Letter, Uppercase</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Ll</td> - - <td>Letter, Lowercase</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Lt</td> - - <td>Letter, Titlecase</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Mn</td> - - <td>Mark, Non-Spacing</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Mc</td> - - <td>Mark, Spacing Combining</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Me</td> - - <td>Mark, Enclosing</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Nd</td> - - <td>Number, Decimal Digit</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Nl</td> - - <td>Number, Letter</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">No</td> - - <td>Number, Other</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Zs</td> - - <td>Separator, Space</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Zl</td> - - <td>Separator, Line</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Zp</td> - - <td>Separator, Paragraph</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Cc</td> - - <td>Other, Control</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Cf</td> - - <td>Other, Format</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Cs</td> - - <td>Other, Surrogate</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Co</td> - - <td>Other, Private Use</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Cn</td> - - <td>Other, Not Assigned (no characters in the file have this property)</td> - - </tr> - -</table> - - - -<h4>Informative Categories</h4> - - - -<table BORDER="0" CELLSPACING="2" CELLPADDING="0"> - - <tr> - - <th><p ALIGN="LEFT">Abbr.</th> - - <th><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td ALIGN="CENTER">Lm</td> - - <td>Letter, Modifier</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Lo</td> - - <td>Letter, Other</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pc</td> - - <td>Punctuation, Connector</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pd</td> - - <td>Punctuation, Dash</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Ps</td> - - <td>Punctuation, Open</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pe</td> - - <td>Punctuation, Close</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pi</td> - - <td>Punctuation, Initial quote (may behave like Ps or Pe depending on usage)</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Pf</td> - - <td>Punctuation, Final quote (may behave like Ps or Pe depending on usage)</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Po</td> - - <td>Punctuation, Other</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Sm</td> - - <td>Symbol, Math</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Sc</td> - - <td>Symbol, Currency</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">Sk</td> - - <td>Symbol, Modifier</td> - - </tr> - - <tr> - - <td ALIGN="CENTER">So</td> - - <td>Symbol, Other</td> - - </tr> - -</table> - - - -<h3><a NAME="Bidirectional Category"></a>Bidirectional Category</h3> - - - -<p>Please refer to Chapter 3 for an explanation of the algorithm for Bidirectional - -Behavior and an explanation of the significance of these categories. An up-to-date version - -can be found on <a HREF="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical - -Report #9: The Bidirectional Algorithm</a>. These values are normative.</p> - - - -<table BORDER="0" CELLPADDING="2"> - - <tr> - - <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Type</th> - - <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>L</b></td> - - <td VALIGN="TOP">Left-to-Right</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>LRE</b></td> - - <td VALIGN="TOP">Left-to-Right Embedding</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>LRO</b></td> - - <td VALIGN="TOP">Left-to-Right Override</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>R</b></td> - - <td VALIGN="TOP">Right-to-Left</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>AL</b></td> - - <td VALIGN="TOP">Right-to-Left Arabic</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>RLE</b></td> - - <td VALIGN="TOP">Right-to-Left Embedding</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>RLO</b></td> - - <td VALIGN="TOP">Right-to-Left Override</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>PDF</b></td> - - <td VALIGN="TOP">Pop Directional Format</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>EN</b></td> - - <td VALIGN="TOP">European Number</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>ES</b></td> - - <td VALIGN="TOP">European Number Separator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>ET</b></td> - - <td VALIGN="TOP">European Number Terminator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>AN</b></td> - - <td VALIGN="TOP">Arabic Number</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>CS</b></td> - - <td VALIGN="TOP">Common Number Separator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>NSM</b></td> - - <td VALIGN="TOP">Non-Spacing Mark</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>BN</b></td> - - <td VALIGN="TOP">Boundary Neutral</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>B</b></td> - - <td VALIGN="TOP">Paragraph Separator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>S</b></td> - - <td VALIGN="TOP">Segment Separator</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>WS</b></td> - - <td VALIGN="TOP">Whitespace</td> - - </tr> - - <tr> - - <td VALIGN="TOP"><b>ON</b></td> - - <td VALIGN="TOP">Other Neutrals</td> - - </tr> - -</table> - - - -<h3><a NAME="Character Decomposition"></a>Character Decomposition Mapping</h3> - - - -<p>The decomposition is a normative property of a character. The tags supplied with - -certain decomposition mappings generally indicate formatting information. Where no such - -tag is given, the mapping is designated as canonical. Conversely, the presence of a - -formatting tag also indicates that the mapping is a compatibility mapping and not a - -canonical mapping. In the absence of other formatting information in a compatibility - -mapping, the tag is used to distinguish it from canonical mappings.</p> - - - -<p>In some instances a canonical mapping or a compatibility mapping may consist of a - -single character. For a canonical mapping, this indicates that the character is a - -canonical equivalent of another single character. For a compatibility mapping, this - -indicates that the character is a compatibility equivalent of another single character. - -The compatibility formatting tags used are:</p> - - - -<table BORDER="0" CELLSPACING="2" CELLPADDING="0"> - - <tr> - - <th>Tag</th> - - <th><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td ALIGN="CENTER"><font> </td> - - <td>A font variant (e.g. a blackletter form).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><noBreak> </td> - - <td>A no-break version of a space or hyphen.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><initial> </td> - - <td>An initial presentation form (Arabic).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><medial> </td> - - <td>A medial presentation form (Arabic).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><final> </td> - - <td>A final presentation form (Arabic).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><isolated> </td> - - <td>An isolated presentation form (Arabic).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><circle> </td> - - <td>An encircled form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><super> </td> - - <td>A superscript form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><sub> </td> - - <td>A subscript form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><vertical> </td> - - <td>A vertical layout presentation form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><wide> </td> - - <td>A wide (or zenkaku) compatibility character.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><narrow> </td> - - <td>A narrow (or hankaku) compatibility character.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><small> </td> - - <td>A small variant form (CNS compatibility).</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><square> </td> - - <td>A CJK squared font variant.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><fraction> </td> - - <td>A vulgar fraction form.</td> - - </tr> - - <tr> - - <td ALIGN="CENTER"><compat> </td> - - <td>Otherwise unspecified compatibility character.</td> - - </tr> - -</table> - - - -<p><b>Reminder: </b>There is a difference between decomposition and decomposition mapping. - -The decomposition mappings are defined in the UnicodeData, while the decomposition (also - -termed "full decomposition") is defined in Chapter 3 to use those mappings -<i> - -recursively.</i> - - - -<ul> - - <li>The canonical decomposition is formed by recursively applying the canonical mappings, - - then applying the canonical reordering algorithm. </li> - - <li>The compatibility decomposition is formed by recursively applying the canonical <em>and</em> - - compatibility mappings, then applying the canonical reordering algorithm. </li> - -</ul> - - - -<h3><a NAME="Canonical Combining Classes"></a>Canonical Combining Classes</h3> - - - -<table BORDER="0" CELLSPACING="2" CELLPADDING="0"> - - <tr> - - <th><p ALIGN="LEFT">Value</th> - - <th><p ALIGN="LEFT">Description</th> - - </tr> - - <tr> - - <td ALIGN="RIGHT">0:</td> - - <td>Spacing, split, enclosing, reordrant, and Tibetan subjoined</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">1:</td> - - <td>Overlays and interior</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">7:</td> - - <td>Nuktas</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">8:</td> - - <td>Hiragana/Katakana voicing marks</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">9:</td> - - <td>Viramas</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">10:</td> - - <td>Start of fixed position classes</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">199:</td> - - <td>End of fixed position classes</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">200:</td> - - <td>Below left attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">202:</td> - - <td>Below attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">204:</td> - - <td>Below right attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">208:</td> - - <td>Left attached (reordrant around single base character)</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">210:</td> - - <td>Right attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">212:</td> - - <td>Above left attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">214:</td> - - <td>Above attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">216:</td> - - <td>Above right attached</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">218:</td> - - <td>Below left</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">220:</td> - - <td>Below</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">222:</td> - - <td>Below right</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">224:</td> - - <td>Left (reordrant around single base character)</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">226:</td> - - <td>Right</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">228:</td> - - <td>Above left</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">230:</td> - - <td>Above</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">232:</td> - - <td>Above right</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">233:</td> - - <td>Double below</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">234:</td> - - <td>Double above</td> - - </tr> - - <tr> - - <td ALIGN="RIGHT">240:</td> - - <td>Below (iota subscript)</td> - - </tr> - -</table> - - - -<p><strong>Note: </strong>some of the combining classes in this list do not currently have - -members but are specified here for completeness.</p> - - - -<h3><a NAME="Decompositions and Normalization"></a>Decompositions and Normalization</h3> - - - -<p>Decomposition is specified in Chapter 3. <a href="http://www.unicode.org/unicode/reports/tr15/"><i>Unicode Technical Report #15: - -Normalization Forms</i></a> specifies the interaction between decomposition and normalization. The - -most up-to-date version is found on <a HREF="http://www.unicode.org/unicode/reports/tr15/">http://www.unicode.org/unicode/reports/tr15/</a>. - -That report specifies how the decompositions defined in UnicodeData.txt are used to derive - -normalized forms of Unicode text.</p> - - - -<p>Note that as of the 2.1.9 update of the Unicode Character Database, the decompositions - -in the UnicodeData.txt file can be used to recursively derive the full decomposition in - -canonical order, without the need to separately apply canonical reordering. However, - -canonical reordering of combining character sequences must still be applied in - -decomposition when normalizing source text which contains any combining marks.</p> - - - -<h3><a NAME="Case Mappings"></a>Case Mappings</h3> - - - -<p>The case mapping is an informative, default mapping. Case itself, on the other hand, - -has normative status. Thus, for example, 0041 LATIN CAPITAL LETTER A is normatively - -uppercase, but its lowercase mapping the 0061 LATIN SMALL LETTER A is informative. The - -reason for this is that case can be considered to be an inherent property of a particular - -character (and is usually, but not always, derivable from the presence of the terms - -"CAPITAL" or "SMALL" in the character name), but case mappings between - -characters are occasionally influenced by local conventions. For example, certain - -languages, such as Turkish, German, French, or Greek may have small deviations from the - -default mappings listed in UnicodeData.</p> - - - -<p>In addition to uppercase and lowercase, because of the inclusion of certain composite - -characters for compatibility, such as 01F1 LATIN CAPITAL LETTER DZ, there is a third case, - -called <i>titlecase</i>, which is used where the first letter of a word is to be - -capitalized (e.g. UPPERCASE, Titlecase, lowercase). An example of such a titlecase letter - -is 01F2 LATIN CAPITAL LETTER D WITH SMALL LETTER Z.</p> - - - -<p>The uppercase, titlecase and lowercase fields are only included for characters that - -have a single corresponding character of that type. Composite characters (such as - -"339D SQUARE CM") that do not have a single corresponding character of that type - -can be cased by decomposition.</p> - - - -<p>For compatibility with existing parsers, UnicodeData only contains case mappings for - -characters where they are one-to-one mappings; it also omits information about - -context-sensitive case mappings. Information about these special cases can be found in a - -separate data file, SpecialCasing.txt, - -which has been added starting with the 2.1.8 update to the Unicode data files. - -SpecialCasing.txt contains additional informative case mappings that are either not - -one-to-one or which are context-sensitive.</p> - - - -<h2><a NAME="Property Invariants"></a>Property Invariants</h2> - - - -<p>Values in UnicodeData.txt are subject to correction as errors are found; however, some - -characteristics of the categories themselves can be considered invariants. Applications - -may wish to take these invariants into account when choosing how to implement character - -properties. The following is a partial list of known invariants for the Unicode Character - -Database.</p> - - - -<h4>Database Fields</h4> - - - -<ul> - - <li>The number of fields in UnicodeData.txt is fixed. </li> - - <li>The order of the fields is also fixed. <ul> - - <li>Any additional information about character properties to be added in the future will - - appear in separate data tables, rather than being added on to the existing table or by - - subdivision or reinterpretation of existing fields. </li> - - </ul> - - </li> - -</ul> - - - -<h4>General Category</h4> - - - -<ul> - - <li>There will never be more than 32 General Category values. <ul> - - <li>It is very unlikely that the Unicode Technical Committee will subdivide the General - - Category partition any further, since that can cause implementations to misbehave. Because - - the General Category is limited to 32 values, 5 bits can be used to represent the - - information, and a 32-bit integer can be used as a bitmask to represent arbitrary sets of - - categories. </li> - - </ul> - - </li> - -</ul> - - - -<h4>Combining Classes</h4> - - - -<ul> - - <li>Combining classes are limited to the values 0 to 255. <ul> - - <li>In practice, there are far fewer than 256 values used. Implementations may take - - advantage of this fact for compression, since only the ordering of the non-zero values - - matters for the Canonical Reordering Algorithm. It is possible for up to 256 values to be - - used in the future; however, UTC decisions in the future may restrict the number of values - - to 128, since this has implementation advantages. [Signed bytes can be used without - - widening to ints in Java, for example.] </li> - - </ul> - - </li> - - <li>All characters other than those of General Category M* have the combining class 0. <ul> - - <li>Currently, all characters other than those of General Category Mn have the value 0. - - However, some characters of General Category Me or Mc may be given non-zero values in the - - future. </li> - - <li>The precise values above the value 0 are not invariant--only the relative ordering is - - considered normative. For example, it is not guaranteed in future versions that the class - - of U+05B4 will be precisely 14. </li> - - </ul> - - </li> - -</ul> - - - -<h4>Case</h4> - - - -<ul> - - <li>Characters of type Lu, Lt, or Ll are called <i>cased</i>. All characters with an Upper, - - Lower, or Titlecase mapping are cased characters. <ul> - - <li>However, characters with the General Categories of Lu, Ll, or Lt may not always have - - case mappings, and case mappings may vary by locale. (See - - ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt). </li> - - </ul> - - </li> - -</ul> - - - -<h4>Canonical Decomposition</h4> - - - -<ul> - - <li>Canonical mappings are always in canonical order. </li> - - <li>Canonical mappings have only the first of a pair possibly further decomposing. </li> - - <li>Canonical decompositions are "transparent" to other character data: <ul> - - <li><tt>BIDI(a) = BIDI(principal(canonicalDecomposition(a))</tt> </li> - - <li><tt>Category(a) = Category(principal(canonicalDecomposition(a))</tt> </li> - - <li><tt>CombiningClass(a) = CombiningClass(principal(canonicalDecomposition(a))</tt><br> - - where principal(a) is the first character not of type Mn, or the first character if all - - characters are of type Mn. </li> - - </ul> - - </li> - - <li>However, because there are sometimes missing case pairs, and because of some legacy - - characters, it is only generally true that: <ul> - - <li><tt>upper(canonicalDecomposition(a)) = canonicalDecomposition(upper(a))</tt> </li> - - <li><tt>lower(canonicalDecomposition(a)) = canonicalDecomposition(lower(a))</tt> </li> - - <li><tt>title(canonicalDecomposition(a)) = canonicalDecomposition(title(a))</tt> </li> - - </ul> - - </li> - -</ul> - - - -<h2><a NAME="Modification History"></a>Modification History</h2> - - - -<p>This section provides a summary of the changes between update versions of the Unicode - -Standard.</p> - - - -<h3><a href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 3.0.0"> Unicode 3.0.0</a></h3> - - - -<p>Modifications made for Version 3.0.0 of UnicodeData.txt include many new characters and - -a number of property changes. These are summarized in Appendex D of <em>The Unicode - -Standard, Version 3.0.</em></p> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.9">Unicode 2.1.9</a> </h3> - - - -<p>Modifications made for Version 2.1.9 of UnicodeData.txt include: - - - -<ul> - - <li>Corrected combining class for U+05AE HEBREW ACCENT ZINOR. </li> - - <li>Corrected combining class for U+20E1 COMBINING LEFT RIGHT ARROW ABOVE </li> - - <li>Corrected combining class for U+0F35 and U+0F37 to 220. </li> - - <li>Corrected combining class for U+0F71 to 129. </li> - - <li>Added a decomposition for U+0F0C TIBETAN MARK DELIMITER TSHEG BSTAR. </li> - - <li>Added decompositions for several Greek symbol letters: U+03D0..U+03D2, U+03D5, - - U+03D6, U+03F0..U+03F2. </li> - - <li>Removed decompositions from the conjoining jamo block: U+1100..U+11F8. </li> - - <li>Changes to decomposition mappings for some Tibetan vowels for consistency in - - normalization. (U+0F71, U+0F73, U+0F77, U+0F79, U+0F81) </li> - - <li>Updated the decomposition mappings for several Vietnamese characters with two diacritics - - (U+1EAC, U+1EAD, U+1EB6, U+1EB7, U+1EC6, U+1EC7, U+1ED8, U+1ED9), so that the recursive - - decomposition can be generated directly in canonically reordered form (not a normative - - change). </li> - - <li>Updated the decomposition mappings for several Arabic compatibility characters involving - - shadda (U+FC5E..U+FC62, U+FCF2..U+FCF4), and two Latin characters (U+1E1C, U+1E1D), so - - that the decompositions are generated directly in canonically reordered form (not a - - normative change). </li> - - <li>Changed BIDI category for: U+00A0 NO-BREAK SPACE, U+2007 FIGURE SPACE, U+2028 LINE - - SEPARATOR. </li> - - <li>Changed BIDI category for extenders of General Category Lm: U+3005, U+3021..U+3035, - - U+FF9E, U+FF9F. </li> - - <li>Changed General Category and BIDI category for the Greek numeral signs: U+0374, U+0375. </li> - - <li>Corrected General Category for U+FFE8 HALFWIDTH FORMS LIGHT VERTICAL. </li> - - <li>Added Unicode 1.0 names for many Tibetan characters (informative). </li> - -</ul> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.8">Unicode 2.1.8</a> </h3> - - - -<p>Modifications made for Version 2.1.8 of UnicodeData.txt include: - - - -<ul> - - <li>Added combining class 240 for U+0345 COMBINING GREEK YPOGEGRAMMENI so that - - decompositions involving iota subscript are derivable directly in canonically reordered - - form; this also has a bearing on simplification of casing of polytonic Greek. </li> - - <li>Changes in decompositions related to Greek tonos. These result from the clarification - - that monotonic Greek "tonos" should be equated with U+0301 COMBINING ACUTE, - - rather than with U+030D COMBINING VERTICAL LINE ABOVE. (All Greek characters in the Greek - - block involving "tonos"; some Greek characters in the polytonic Greek in the - - 1FXX block.) </li> - - <li>Changed decompositions involving dialytika tonos. (U+0390, U+03B0) </li> - - <li>Changed ternary decompositions to binary. (U+0CCB, U+FB2C, U+FB2D) These changes - - simplify normalization. </li> - - <li>Removed canonical decomposition for Latin Candrabindu. (U+0310) </li> - - <li>Corrected error in canonical decomposition for U+1FF4. </li> - - <li>Added compatibility decompositions to clarify collation tables. (U+2100, U+2101, U+2105, - - U+2106, U+1E9A) </li> - - <li>A series of general category changes to assist the convergence of of Unicode definition - - of identifier with ISO TR 10176: <ul> - - <li>So > Lo: U+0950, U+0AD0, U+0F00, U+0F88..U+0F8B </li> - - <li>Po > Lo: U+0E2F, U+0EAF, U+3006 </li> - - <li>Lm > Sk: U+309B, U+309C </li> - - <li>Po > Pc: U+30FB, U+FF65 </li> - - <li>Ps/Pe > Mn: U+0F3E, U+0F3F </li> - - </ul> - - </li> - - <li>A series of bidi property changes for consistency. <ul> - - <li>L > ET: U+09F2, U+09F3 </li> - - <li>ON > L: U+3007 </li> - - <li>L > ON: U+0F3A..U+0F3D, U+037E, U+0387 </li> - - </ul> - - </li> - - <li>Add case mapping: U+01A6 <-> U+0280 </li> - - <li>Updated symmetric swapping value for guillemets: U+00AB, U+00BB, U+2039, U+203A. </li> - - <li>Changes to combining class values. Most Indic fixed position class non-spacing marks - - were changed to combining class 0. This fixes some inconsistencies in how canonical - - reordering would apply to Indic scripts, including Tibetan. Indic interacting top/bottom - - fixed position classes were merged into single (non-zero) classes as part of this change. - - Tibetan subjoined consonants are changed from combining class 6 to combining class 0. Thai - - pinthu (U+0E3A) moved to combining class 9. Moved two Devanagari stress marks into generic - - above and below combining classes (U+0951, U+0952). </li> - - <li>Corrected placement of semicolon near symmetric swapping field. (U+FA0E, etc., scattered - - positions to U+FA29) </li> - -</ul> - - - -<h3>Version 2.1.7</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3>Version 2.1.6</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.5">Unicode 2.1.5</a> </h3> - - - -<p>Modifications made for Version 2.1.5 of UnicodeData.txt include: - - - -<ul> - - <li>Changed decomposition for U+FF9E and U+FF9F so that correct collation weighting will - - automatically result from the canonical equivalences. </li> - - <li>Removed canonical decompositions for U+04D4, U+04D5, U+04D8, U+04D9, U+04E0, U+04E1, - - U+04E8, U+04E9 (the implication being that no canonical equivalence is claimed between - - these 8 characters and similar Latin letters), and updated 4 canonical decompositions for - - U+04DB, U+04DC, U+04EA, U+04EB to reflect the implied difference in the base character. </li> - - <li>Added Pi, and Pf categories and assigned the relevant quotation marks to those - - categories, based on the Unicode Technical Corrigendum on Quotation Characters. </li> - - <li>Updating of many bidi properties, following the advice of the ad hoc committee on bidi, - - and to make the bidi properties of compatibility characters more consistent. </li> - - <li>Changed category of several Tibetan characters: U+0F3E, U+0F3F, U+0F88..U+0F8B to make - - them non-combining, reflecting the combined opinion of Tibetan experts. </li> - - <li>Added case mapping for U+03F2. </li> - - <li>Corrected case mapping for U+0275. </li> - - <li>Added titlecase mappings for U+03D0, U+03D1, U+03D5, U+03D6, U+03F0.. U+03F2. </li> - - <li>Corrected compatibility label for U+2121. </li> - - <li>Add specific entries for all the CJK compatibility ideographs, U+F900..U+FA2D, so the - - canonical decomposition for each (the URO character it is equivalent to) can be carried in - - the database. </li> - -</ul> - - - -<h3>Version 2.1.4</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3>Version 2.1.3</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.2">Unicode 2.1.2</a> </h3> - - - -<p>Modifications made in updating UnicodeData.txt to Version 2.1.2 for the Unicode - -Standard, Version 2.1 (from Version 2.0) include: - - - -<ul> - - <li>Added two characters (U+20AC and U+FFFC). </li> - - <li>Amended bidi properties for U+0026, U+002E, U+0040, U+2007. </li> - - <li>Corrected case mappings for U+018E, U+019F, U+01DD, U+0258, U+0275, U+03C2, U+1E9B. </li> - - <li>Changed combining order class for U+0F71. </li> - - <li>Corrected canonical decompositions for U+0F73, U+1FBE. </li> - - <li>Changed decomposition for U+FB1F from compatibility to canonical. </li> - - <li>Added compatibility decompositions for U+FBE8, U+FBE9, U+FBF9..U+FBFB. </li> - - <li>Corrected compatibility decompositions for U+2469, U+246A, U+3358. </li> - -</ul> - - - -<h3>Version 2.1.1</h3> - - - -<p><i>This version was for internal change tracking only, and never publicly released.</i></p> - - - -<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.0.0">Unicode 2.0.0</a> </h3> - - - -<p>The modifications made in updating UnicodeData.txt for the Unicode - -Standard, Version 2.0 include: - - - -<ul> - - <li>Fixed decompositions with TONOS to use correct NSM: 030D. </li> - - <li>Removed old Hangul Syllables; mapping to new characters are in a separate table. </li> - - <li>Marked compatibility decompositions with additional tags. </li> - - <li>Changed old tag names for clarity. </li> - - <li>Revision of decompositions to use first-level decomposition, instead of maximal - - decomposition. </li> - - <li>Correction of all known errors in decompositions from earlier versions. </li> - - <li>Added control code names (as old Unicode names). </li> - - <li>Added Hangul Jamo decompositions. </li> - - <li>Added Number category to match properties list in book. </li> - - <li>Fixed categories of Koranic Arabic marks. </li> - - <li>Fixed categories of precomposed characters to match decomposition where possible. </li> - - <li>Added Hebrew cantillation marks and the Tibetan script. </li> - - <li>Added place holders for ranges such as CJK Ideographic Area and the Private Use Area. </li> - - <li>Added categories Me, Sk, Pc, Nl, Cs, Cf, and rectified a number of mistakes in the - - database. </li> - -</ul> - -</body> - -</html> - diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index 241d2e6bb3..37b6e84874 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -2,16 +2,16 @@ use bytes; -$UnicodeData = "Unicode.300"; +$UnicodeData = "Unicode.301"; $SyllableData = "syllables.txt"; -$PropData = "Props.txt"; +$PropData = "PropList.txt"; # Note: we try to keep filenames unique within first 8 chars. Using # subdirectories for the following helps. -mkdir "In", 0777; -mkdir "Is", 0777; -mkdir "To", 0777; +mkdir "In", 0755; +mkdir "Is", 0755; +mkdir "To", 0755; @todo = ( # typical @@ -161,7 +161,7 @@ mkdir "To", 0777; # Number - ['Number', '$num', '$num'], + ['Number', '$num ne ""', '$num'], # Mirrored diff --git a/lib/utf8.pm b/lib/utf8.pm index 17ec37bbe2..6d6c0eb503 100644 --- a/lib/utf8.pm +++ b/lib/utf8.pm @@ -1,5 +1,7 @@ package utf8; +if (ord('A') != 193) { # make things more pragmatic for EBCDIC folk + $utf8::hint_bits = 0x00800000; sub import { @@ -13,7 +15,10 @@ sub unimport { sub AUTOLOAD { require "utf8_heavy.pl"; - goto &$AUTOLOAD; + goto &$AUTOLOAD if defined &$AUTOLOAD; + Carp::croak("Undefined subroutine $AUTOLOAD called"); +} + } 1; @@ -44,7 +49,9 @@ in future we would like to standardize on the UTF-8 encoding for source text. Until UTF-8 becomes the default format for source text, this pragma should be used to recognize UTF-8 in the source. When UTF-8 becomes the standard source format, this pragma will -effectively become a no-op. +effectively become a no-op. This pragma already is a no-op on +EBCDIC platforms (where it is alright to code perl in EBCDIC +rather than UTF-8). Enabling the C<utf8> pragma has the following effects: diff --git a/lib/warnings.pm b/lib/warnings.pm index ac6d919954..2517239365 100644 --- a/lib/warnings.pm +++ b/lib/warnings.pm @@ -26,6 +26,14 @@ warnings - Perl pragma to control optional warnings warnings::warn("void", "some warning"); } + if (warnings::enabled($object)) { + warnings::warn($object, "some warning"); + } + + warnif("some warning"); + warnif("void", "some warning"); + warnif($object, "some warning"); + =head1 DESCRIPTION If no import list is supplied, all possible warnings are either enabled @@ -37,26 +45,78 @@ A number of functions are provided to assist module authors. =item use warnings::register -Creates a new warnings category which has the same name as the module -where the call to the pragma is used. +Creates a new warnings category with the same name as the package where +the call to the pragma is used. + +=item warnings::enabled() + +Use the warnings category with the same name as the current package. + +Return TRUE if that warnings category is enabled in the calling module. +Otherwise returns FALSE. + +=item warnings::enabled($category) + +Return TRUE if the warnings category, C<$category>, is enabled in the +calling module. +Otherwise returns FALSE. + +=item warnings::enabled($object) + +Use the name of the class for the object reference, C<$object>, as the +warnings category. + +Return TRUE if that warnings category is enabled in the first scope +where the object is used. +Otherwise returns FALSE. + +=item warnings::warn($message) + +Print C<$message> to STDERR. + +Use the warnings category with the same name as the current package. + +If that warnings category has been set to "FATAL" in the calling module +then die. Otherwise return. + +=item warnings::warn($category, $message) + +Print C<$message> to STDERR. + +If the warnings category, C<$category>, has been set to "FATAL" in the +calling module then die. Otherwise return. + +=item warnings::warn($object, $message) -=item warnings::enabled([$category]) +Print C<$message> to STDERR. -Returns TRUE if the warnings category C<$category> is enabled in the -calling module. Otherwise returns FALSE. +Use the name of the class for the object reference, C<$object>, as the +warnings category. -If the parameter, C<$category>, isn't supplied, the current package name -will be used. +If that warnings category has been set to "FATAL" in the scope where C<$object> +is first used then die. Otherwise return. -=item warnings::warn([$category,] $message) -If the calling module has I<not> set C<$category> to "FATAL", print -C<$message> to STDERR. -If the calling module has set C<$category> to "FATAL", print C<$message> -STDERR then die. +=item warnings::warnif($message) -If the parameter, C<$category>, isn't supplied, the current package name -will be used. +Equivalent to: + + if (warnings::enabled()) + { warnings::warn($message) } + +=item warnings::warnif($category, $message) + +Equivalent to: + + if (warnings::enabled($category)) + { warnings::warn($category, $message) } + +=item warnings::warnif($object, $message) + +Equivalent to: + + if (warnings::enabled($object)) + { warnings::warn($object, $message) } =back @@ -243,44 +303,80 @@ sub bits { sub import { shift; - ${^WARNING_BITS} |= bits(@_ ? @_ : 'all') ; + my $mask = ${^WARNING_BITS} ; + if (vec($mask, $Offsets{'all'}, 1)) { + $mask |= $Bits{'all'} ; + $mask |= $DeadBits{'all'} if vec($mask, $Offsets{'all'}+1, 1); + } + ${^WARNING_BITS} = $mask | bits(@_ ? @_ : 'all') ; } sub unimport { shift; my $mask = ${^WARNING_BITS} ; if (vec($mask, $Offsets{'all'}, 1)) { - $mask = $Bits{'all'} ; + $mask |= $Bits{'all'} ; $mask |= $DeadBits{'all'} if vec($mask, $Offsets{'all'}+1, 1); } ${^WARNING_BITS} = $mask & ~ (bits(@_ ? @_ : 'all') | $All) ; } -sub enabled +sub __chk { - croak("Usage: warnings::enabled([category])") - unless @_ == 1 || @_ == 0 ; - local $Carp::CarpLevel = 1 ; my $category ; my $offset ; - my $callers_bitmask = (caller(1))[9] ; - return 0 unless defined $callers_bitmask ; - + my $isobj = 0 ; if (@_) { # check the category supplied. $category = shift ; + if (ref $category) { + croak ("not an object") + if $category !~ /^([^=]+)=/ ;+ + $category = $1 ; + $isobj = 1 ; + } $offset = $Offsets{$category}; croak("unknown warnings category '$category'") unless defined $offset; } else { - $category = (caller(0))[0] ; + $category = (caller(1))[0] ; $offset = $Offsets{$category}; croak("package '$category' not registered for warnings") unless defined $offset ; } + my $this_pkg = (caller(1))[0] ; + my $i = 2 ; + my $pkg ; + + if ($isobj) { + while (do { { package DB; $pkg = (caller($i++))[0] } } ) { + last unless @DB::args && $DB::args[0] =~ /^$category=/ ; + } + $i -= 2 ; + } + else { + for ($i = 2 ; $pkg = (caller($i))[0] ; ++ $i) { + last if $pkg ne $this_pkg ; + } + $i = 2 + if !$pkg || $pkg eq $this_pkg ; + } + + my $callers_bitmask = (caller($i))[9] ; + return ($callers_bitmask, $offset, $i) ; +} + +sub enabled +{ + croak("Usage: warnings::enabled([category])") + unless @_ == 1 || @_ == 0 ; + + my ($callers_bitmask, $offset, $i) = __chk(@_) ; + + return 0 unless defined $callers_bitmask ; return vec($callers_bitmask, $offset, 1) || vec($callers_bitmask, $Offsets{'all'}, 1) ; } @@ -290,29 +386,34 @@ sub warn { croak("Usage: warnings::warn([category,] 'message')") unless @_ == 2 || @_ == 1 ; - local $Carp::CarpLevel = 1 ; - my $category ; - my $offset ; - my $callers_bitmask = (caller(1))[9] ; - - if (@_ == 2) { - $category = shift ; - $offset = $Offsets{$category}; - croak("unknown warnings category '$category'") - unless defined $offset ; - } - else { - $category = (caller(0))[0] ; - $offset = $Offsets{$category}; - croak("package '$category' not registered for warnings") - unless defined $offset ; - } - my $message = shift ; + my $message = pop ; + my ($callers_bitmask, $offset, $i) = __chk(@_) ; + local $Carp::CarpLevel = $i ; croak($message) if vec($callers_bitmask, $offset+1, 1) || vec($callers_bitmask, $Offsets{'all'}+1, 1) ; carp($message) ; } +sub warnif +{ + croak("Usage: warnings::warnif([category,] 'message')") + unless @_ == 2 || @_ == 1 ; + + my $message = pop ; + my ($callers_bitmask, $offset, $i) = __chk(@_) ; + local $Carp::CarpLevel = $i ; + + return + unless defined $callers_bitmask && + (vec($callers_bitmask, $offset, 1) || + vec($callers_bitmask, $Offsets{'all'}, 1)) ; + + croak($message) + if vec($callers_bitmask, $offset+1, 1) || + vec($callers_bitmask, $Offsets{'all'}+1, 1) ; + + carp($message) ; +} 1; |