diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-10-04 22:54:06 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-10-04 22:54:06 +0000 |
commit | 84f709e736e1ecec2cb204663711a2f0ea2f0e83 (patch) | |
tree | f49dca05395f0f8d5415825af4685103b1084cf0 /pod/perlebcdic.pod | |
parent | 35b2760ac6eea1581f6fe2a3565b2105801fc51a (diff) | |
download | perl-84f709e736e1ecec2cb204663711a2f0ea2f0e83.tar.gz |
Retract #12313 and #12249.
p4raw-id: //depot/perl@12338
Diffstat (limited to 'pod/perlebcdic.pod')
-rw-r--r-- | pod/perlebcdic.pod | 343 |
1 files changed, 128 insertions, 215 deletions
diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod index be2a2e3d8c..c98b46c6e7 100644 --- a/pod/perlebcdic.pod +++ b/pod/perlebcdic.pod @@ -130,29 +130,8 @@ work with a pod2_other_format translation) through: =back - perldoc -m perlebcdic | \ - perl -ne 'if(/(.{33})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/)' \ - -e '{printf("%s%-9o%-9o%-9o%o\n",$1,$2,$3,$4,$5)}' - -Or, as a script, called like C<perldoc -m perlebcdic | extract.pl>: - - my $regex = qr/ - (.{33}) # any 33 characters - - (\d+)\s+ # capture some digits, discard spaces - (\d+)\s+ # ".." - (\d+)\s+ # ".." - (\d+) # capture some digits - /x; - - while ( <> ) { - if ( $_ =~ $regex ) { - printf( - "%s%-9o%-9o%-9o%o\n", - $1, $2, $3, $4, $5, - ); - } - } + perl -ne 'if(/(.{33})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/)' \ + -e '{printf("%s%-9o%-9o%-9o%o\n",$1,$2,$3,$4,$5)}' perlebcdic.pod If you want to retain the UTF-x code points then in script form you might want to write: @@ -163,47 +142,20 @@ might want to write: =back - my $regex = qr/ - (.{33}) # $1: any 33 characters - - (\d+)\s+ # $2, $3, $4, $5: - (\d+)\s+ # capture some digits, discard spaces - (\d+)\s+ # 4 times - (\d+)\s+ - - (\d+) # $6: capture some digits, - \.? # there may be a period, - (\d*) # $7: capture some digits if they're there, - \s+ # discard spaces - - (\d+) # $8: capture some digits - \.? # there may be a period, - (\d*) # $9: capture some digits if they're there, - /x; - - open( FH, 'perldoc -m perlebcdic |' ) || - die "Could not open perlebcdic.pod: $!"; - while ( <FH> ) { - if ( $_ =~ $regex ) { - if ( $7 ne '' && $9 ne '' ) { - printf( - "%s%-9o%-9o%-9o%-9o%-3o.%-5o%-3o.%o\n", - $1, $2, $3, $4, $5, $6, $7, $8, $9 - ); - } elsif ( $7 ne '' ) { - printf( - "%s%-9o%-9o%-9o%-9o%-3o.%-5o%o\n", - $1, $2, $3, $4, $5, $6, $7, $8 - ); - } else { - printf( - "%s%-9o%-9o%-9o%-9o%-9o%o\n", - $1, $2, $3, $4, $5, $6, $8 - ); + open(FH,"<perlebcdic.pod") or die "Could not open perlebcdic.pod: $!"; + while (<FH>) { + if (/(.{33})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)\s+(\d+)\.?(\d*)/) { + if ($7 ne '' && $9 ne '') { + printf("%s%-9o%-9o%-9o%-9o%-3o.%-5o%-3o.%o\n",$1,$2,$3,$4,$5,$6,$7,$8,$9); + } + elsif ($7 ne '') { + printf("%s%-9o%-9o%-9o%-9o%-3o.%-5o%o\n",$1,$2,$3,$4,$5,$6,$7,$8); + } + else { + printf("%s%-9o%-9o%-9o%-9o%-9o%o\n",$1,$2,$3,$4,$5,$6,$8); } } } - close FH; If you would rather see this table listing hexadecimal values then run the table through: @@ -214,9 +166,8 @@ run the table through: =back - perldoc -m perlebcdic | \ - perl -ne 'if(/(.{33})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/)' \ - -e '{printf("%s%-9X%-9X%-9X%X\n",$1,$2,$3,$4,$5)}' + perl -ne 'if(/(.{33})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/)' \ + -e '{printf("%s%-9X%-9X%-9X%X\n",$1,$2,$3,$4,$5)}' perlebcdic.pod Or, in order to retain the UTF-x code points in hexadecimal: @@ -226,50 +177,21 @@ Or, in order to retain the UTF-x code points in hexadecimal: =back - my $regex = qr/ - (.{33}) # $1: any 33 characters - - (\d+)\s+ # $2, $3, $4, $5: - (\d+)\s+ # capture some digits, discard spaces - (\d+)\s+ # 4 times - (\d+)\s+ - - (\d+) # $6: capture some digits, - \.? # there may be a period, - (\d*) # $7: capture some digits if they're there, - \s+ # discard spaces - - (\d+) # $8: capture some digits - \.? # there may be a period, - (\d*) # $9: capture some digits if they're there, - /x; - - open( FH, 'perldoc -m perlebcdic |' ) || - die "Could not open perlebcdic.pod: $!"; + open(FH,"<perlebcdic.pod") or die "Could not open perlebcdic.pod: $!"; while (<FH>) { - if ( $_ =~ $regex ) { - if ( $7 ne '' && $9 ne '' ) { - printf( - "%s%-9X%-9X%-9X%-9X%-2X.%-6X%-2X.%X\n", - $1, $2, $3, $4, $5, $6, $7, $8, $9 - ); + if (/(.{33})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)\s+(\d+)\.?(\d*)/) { + if ($7 ne '' && $9 ne '') { + printf("%s%-9X%-9X%-9X%-9X%-2X.%-6X%-2X.%X\n",$1,$2,$3,$4,$5,$6,$7,$8,$9); } - elsif ( $7 ne '' ) { - printf( - "%s%-9X%-9X%-9X%-9X%-2X.%-6X%X\n", - $1, $2, $3, $4, $5, $6, $7, $8 - ); + elsif ($7 ne '') { + printf("%s%-9X%-9X%-9X%-9X%-2X.%-6X%X\n",$1,$2,$3,$4,$5,$6,$7,$8); } else { - printf( - "%s%-9X%-9X%-9X%-9X%-9X%X\n", - $1, $2, $3, $4, $5, $6, $8 - ); + printf("%s%-9X%-9X%-9X%-9X%-9X%X\n",$1,$2,$3,$4,$5,$6,$8); } } } -=head2 THE SINGLE OCTET TABLE incomp- incomp- 8859-1 lete lete @@ -532,7 +454,6 @@ Or, in order to retain the UTF-x code points in hexadecimal: <SMALL LETTER thorn> 254 142 142 142 195.190 139.114 <y WITH DIAERESIS> 255 223 223 223 195.191 139.115 - If you would rather see the above table in CCSID 0037 order rather than ASCII + Latin-1 order then run the table through: @@ -542,12 +463,11 @@ ASCII + Latin-1 order then run the table through: =back - perldoc -m perlebcdic | \ - perl -ne 'if(/.{33}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)' \ - -e '{push(@l,$_)}' \ - -e 'END{print map{$_->[0]}' \ - -e 'sort{$a->[1] <=> $b->[1]}' \ - -e 'map{[$_,substr($_,42,3)]}@l;}' + perl -ne 'if(/.{33}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\ + -e '{push(@l,$_)}' \ + -e 'END{print map{$_->[0]}' \ + -e ' sort{$a->[1] <=> $b->[1]}' \ + -e ' map{[$_,substr($_,42,3)]}@l;}' perlebcdic.pod If you would rather see it in CCSID 1047 order then change the digit 42 in the last line to 51, like this: @@ -558,12 +478,11 @@ If you would rather see it in CCSID 1047 order then change the digit =back - perldoc -m perlebcdic | \ - perl -ne 'if(/.{33}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)' \ - -e '{push(@l,$_)}' \ - -e 'END{print map{$_->[0]}' \ - -e 'sort{$a->[1] <=> $b->[1]}' \ - -e 'map{[$_,substr($_,51,3)]}@l;}' + perl -ne 'if(/.{33}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\ + -e '{push(@l,$_)}' \ + -e 'END{print map{$_->[0]}' \ + -e ' sort{$a->[1] <=> $b->[1]}' \ + -e ' map{[$_,substr($_,51,3)]}@l;}' perlebcdic.pod If you would rather see it in POSIX-BC order then change the digit 51 in the last line to 60, like this: @@ -574,12 +493,11 @@ If you would rather see it in POSIX-BC order then change the digit =back - perldoc -m perlebcdic | \ - perl -ne 'if(/.{33}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)' \ - -e '{push(@l,$_)}' \ - -e 'END{print map{$_->[0]}' \ - -e 'sort{$a->[1] <=> $b->[1]}' \ - -e 'map{[$_,substr($_,60,3)]}@l;}' + perl -ne 'if(/.{33}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\ + -e '{push(@l,$_)}' \ + -e 'END{print map{$_->[0]}' \ + -e ' sort{$a->[1] <=> $b->[1]}' \ + -e ' map{[$_,substr($_,60,3)]}@l;}' perlebcdic.pod =head1 IDENTIFYING CHARACTER CODE SETS @@ -588,44 +506,44 @@ To determine the character set you are running under from perl one could use the return value of ord() or chr() to test one or more character values. For example: - my $is_ascii = "A" eq chr(65); - my $is_ebcdic = "A" eq chr(193); + $is_ascii = "A" eq chr(65); + $is_ebcdic = "A" eq chr(193); Also, "\t" is a C<HORIZONTAL TABULATION> character so that: - my $is_ascii = ord("\t") == 9; - my $is_ebcdic = ord("\t") == 5; + $is_ascii = ord("\t") == 9; + $is_ebcdic = ord("\t") == 5; To distinguish EBCDIC code pages try looking at one or more of the characters that differ between them. For example: - my $is_ebcdic_37 = "\n" eq chr(37); - my $is_ebcdic_1047 = "\n" eq chr(21); + $is_ebcdic_37 = "\n" eq chr(37); + $is_ebcdic_1047 = "\n" eq chr(21); Or better still choose a character that is uniquely encoded in any of the code sets, e.g.: - my $is_ascii = ord('[') == 91; - my $is_ebcdic_37 = ord('[') == 186; - my $is_ebcdic_1047 = ord('[') == 173; - my $is_ebcdic_POSIX_BC = ord('[') == 187; + $is_ascii = ord('[') == 91; + $is_ebcdic_37 = ord('[') == 186; + $is_ebcdic_1047 = ord('[') == 173; + $is_ebcdic_POSIX_BC = ord('[') == 187; However, it would be unwise to write tests such as: - my $is_ascii = "\r" ne chr(13); # WRONG - my $is_ascii = "\n" ne chr(10); # ILL ADVISED + $is_ascii = "\r" ne chr(13); # WRONG + $is_ascii = "\n" ne chr(10); # ILL ADVISED Obviously the first of these will fail to distinguish most ASCII machines -from either a CCSID 0037, a 1047, or a POSIX-BC EBCDIC machine since "\r" eq -chr(13) under all of those coded character sets. But note too that -because "\n" is chr(13) and "\r" is chr(10) on the MacIntosh (which is an +from either a CCSID 0037, a 1047, or a POSIX-BC EBCDIC machine since "\r" eq +chr(13) under all of those coded character sets. But note too that +because "\n" is chr(13) and "\r" is chr(10) on the MacIntosh (which is an ASCII machine) the second C<$is_ascii> test will lead to trouble there. -To determine whether or not perl was built under an EBCDIC +To determine whether or not perl was built under an EBCDIC code page you can use the Config module like so: use Config; - my $is_ebcdic = $Config{'ebcdic'} eq 'define'; + $is_ebcdic = $Config{'ebcdic'} eq 'define'; =head1 CONVERSIONS @@ -638,30 +556,29 @@ The data in the table are in ASCII order hence the EBCDIC columns provide easy to use ASCII to EBCDIC operations that are also easily reversed. -For example, to convert ASCII to code page 037 take the output of the second -column from the output of recipe 0 (modified to add \\ characters) and use +For example, to convert ASCII to code page 037 take the output of the second +column from the output of recipe 0 (modified to add \\ characters) and use it in tr/// like so: - my $cp_037 = join '', - qq[\000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017], - qq[\020\021\022\023\235\205\010\207\030\031\222\217\034\035\036\037], - qq[\200\201\202\203\204\012\027\033\210\211\212\213\214\005\006\007], - qq[\220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032], - qq[\040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174], - qq[\046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\254], - qq[\055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077], - qq[\370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042], - qq[\330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261], - qq[\260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244], - qq[\265\176\163\164\165\166\167\170\171\172\241\277\320\335\336\256], - qq[\136\243\245\267\251\247\266\274\275\276\133\135\257\250\264\327], - qq[\173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365], - qq[\175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377], - qq[\134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325], - qq[\060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237]; + $cp_037 = + '\000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017' . + '\020\021\022\023\235\205\010\207\030\031\222\217\034\035\036\037' . + '\200\201\202\203\204\012\027\033\210\211\212\213\214\005\006\007' . + '\220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032' . + '\040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174' . + '\046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\254' . + '\055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077' . + '\370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042' . + '\330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261' . + '\260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244' . + '\265\176\163\164\165\166\167\170\171\172\241\277\320\335\336\256' . + '\136\243\245\267\251\247\266\274\275\276\133\135\257\250\264\327' . + '\173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365' . + '\175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377' . + '\134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325' . + '\060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237' ; my $ebcdic_string = $ascii_string; - eval '$ebcdic_string =~ tr/\000-\377/' . $cp_037 . '/'; To convert from EBCDIC 037 to ASCII just reverse the order of the tr/// @@ -684,12 +601,12 @@ On OS/390 or z/OS see the iconv(1) manpage. One way to invoke the iconv shell utility from within perl would be to: # OS/390 or z/OS example - my $ascii_data = `echo '$ebcdic_data'| iconv -f IBM-1047 -t ISO8859-1` + $ascii_data = `echo '$ebcdic_data'| iconv -f IBM-1047 -t ISO8859-1` or the inverse map: # OS/390 or z/OS example - my $ebcdic_data = `echo '$ascii_data'| iconv -f ISO8859-1 -t IBM-1047` + $ebcdic_data = `echo '$ascii_data'| iconv -f ISO8859-1 -t IBM-1047` For other perl based conversion options see the Convert::* modules on CPAN. @@ -704,7 +621,7 @@ care on EBCDIC machines. For example the following array will have twenty six elements on either an EBCDIC machine or an ASCII machine: - my @alphabet = ( 'A'..'Z' ); # $#alphabet == 25 + @alphabet = ('A'..'Z'); # $#alphabet == 25 The bitwise operators such as & ^ | may return different results when operating on string or character data in a perl program running @@ -712,10 +629,10 @@ on an EBCDIC machine than when run on an ASCII machine. Here is an example adapted from the one in L<perlop>: # EBCDIC-based examples - print "j p \n" ^ " a h"; # prints "JAPH\n" - print "JA" | " ph\n"; # prints "japh\n" - print "JAPH\nJunk" & "\277\277\277\277\277"; # prints "japh\n" - print 'p N$' ^ " E<H\n"; # prints "Perl\n" + print "j p \n" ^ " a h"; # prints "JAPH\n" + print "JA" | " ph\n"; # prints "japh\n" + print "JAPH\nJunk" & "\277\277\277\277\277"; # prints "japh\n"; + print 'p N$' ^ " E<H\n"; # prints "Perl\n"; An interesting property of the 32 C0 control characters in the ASCII table is that they can "literally" be constructed @@ -781,24 +698,23 @@ not one. chr() must be given an EBCDIC code number argument to yield a desired character return value on an EBCDIC machine. For example: - my $CAPITAL_LETTER_A = chr(193); + $CAPITAL_LETTER_A = chr(193); =item ord() ord() will return EBCDIC code number values on an EBCDIC machine. For example: - my $the_number_193 = ord("A"); + $the_number_193 = ord("A"); =item pack() The c and C templates for pack() are dependent upon character set encoding. Examples of usage on EBCDIC include: - my $foo; $foo = pack("CCCC",193,194,195,196); # $foo eq "ABCD" - $foo = pack("C4", 193,194,195,196); + $foo = pack("C4",193,194,195,196); # same thing $foo = pack("ccxxcc",193,194,195,196); @@ -843,7 +759,7 @@ mixed case strings. This is discussed in more detail below. See the discussion of printf() above. An example of the use of sprintf would be: - my $CAPITAL_LETTER_A = sprintf("%c",193); + $CAPITAL_LETTER_A = sprintf("%c",193); =item unpack() @@ -903,13 +819,13 @@ four coded character sets discussed in this document is as follows: sub Is_c0 { my $char = substr(shift,0,1); - if ( ord('^') == 94 ) { # ascii + if (ord('^')==94) { # ascii return $char =~ /[\000-\037]/; - } - if ( ord('^') == 176 ) { # 37 + } + if (ord('^')==176) { # 37 return $char =~ /[\000-\003\067\055-\057\026\005\045\013-\023\074\075\062\046\030\031\077\047\034-\037]/; } - if ( ord('^') == 95 || ord('^') == 106 ) { # 1047 || posix-bc + if (ord('^')==95 || ord('^')==106) { # 1047 || posix-bc return $char =~ /[\000-\003\067\055-\057\026\005\025\013-\023\074\075\062\046\030\031\077\047\034-\037]/; } } @@ -921,45 +837,46 @@ four coded character sets discussed in this document is as follows: sub Is_delete { my $char = substr(shift,0,1); - if ( ord('^') == 94 ) { # ascii + if (ord('^')==94) { # ascii return $char eq "\177"; - } else { # ebcdic + } + else { # ebcdic return $char eq "\007"; } } sub Is_c1 { my $char = substr(shift,0,1); - if ( ord('^') == 94 ) { # ascii + if (ord('^')==94) { # ascii return $char =~ /[\200-\237]/; } - if ( ord('^') == 176 ) { # 37 + if (ord('^')==176) { # 37 return $char =~ /[\040-\044\025\006\027\050-\054\011\012\033\060\061\032\063-\066\010\070-\073\040\024\076\377]/; } - if ( ord('^') == 95 ) { # 1047 + if (ord('^')==95) { # 1047 return $char =~ /[\040-\045\006\027\050-\054\011\012\033\060\061\032\063-\066\010\070-\073\040\024\076\377]/; } - if ( ord('^') == 106 ) { # posix-bc - return $char =~ + if (ord('^')==106) { # posix-bc + return $char =~ /[\040-\045\006\027\050-\054\011\012\033\060\061\032\063-\066\010\070-\073\040\024\076\137]/; } } sub Is_latin_1 { my $char = substr(shift,0,1); - if ( ord('^') == 94 ) { # ascii + if (ord('^')==94) { # ascii return $char =~ /[\240-\377]/; } - if ( ord('^') == 176 ) { # 37 - return $char =~ + if (ord('^')==176) { # 37 + return $char =~ /[\101\252\112\261\237\262\152\265\275\264\232\212\137\312\257\274\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\144\145\142\146\143\147\236\150\164\161-\163\170\165-\167\254\151\355\356\353\357\354\277\200\375\376\373\374\255\256\131\104\105\102\106\103\107\234\110\124\121-\123\130\125-\127\214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337]/; } - if ( ord('^') == 95 ) { # 1047 + if (ord('^')==95) { # 1047 return $char =~ /[\101\252\112\261\237\262\152\265\273\264\232\212\260\312\257\274\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\144\145\142\146\143\147\236\150\164\161-\163\170\165-\167\254\151\355\356\353\357\354\277\200\375\376\373\374\272\256\131\104\105\102\106\103\107\234\110\124\121-\123\130\125-\127\214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337]/; } - if ( ord('^') == 106 ) { # posix-bc - return $char =~ + if (ord('^')==106) { # posix-bc + return $char =~ /[\101\252\260\261\237\262\320\265\171\264\232\212\272\312\257\241\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\144\145\142\146\143\147\236\150\164\161-\163\170\165-\167\254\151\355\356\353\357\354\277\200\340\376\335\374\255\256\131\104\105\102\106\103\107\234\110\124\121-\123\130\125-\127\214\111\315\316\313\317\314\341\160\300\336\333\334\215\216\337]/; } } @@ -992,8 +909,8 @@ letters compared to the digits. If sorted on an ASCII based machine the two letter abbreviation for a physician comes before the two letter for drive, that is: - my @sorted = sort(qw(Dr. dr.)); # @sorted holds ('Dr.','dr.') on ASCII, - # but ('dr.','Dr.') on EBCDIC + @sorted = sort(qw(Dr. dr.)); # @sorted holds ('Dr.','dr.') on ASCII, + # but ('dr.','Dr.') on EBCDIC The property of lower case before uppercase letters in EBCDIC is even carried to the Latin 1 EBCDIC pages such as 0037 and 1047. @@ -1023,9 +940,9 @@ then sort(). If the data are primarily lowercase non Latin 1 then apply tr/[A-Z]/[a-z]/ before sorting. If the data are primarily UPPERCASE and include Latin-1 characters then apply: - tr/[a-z]/[A-Z]/; + tr/[a-z]/[A-Z]/; tr/[àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ]/[ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ]/; - s/ß/SS/g; + s/ß/SS/g; then sort(). Do note however that such Latin-1 manipulation does not address the E<yuml> C<y WITH DIAERESIS> character that will remain at @@ -1072,7 +989,7 @@ may also be expressed as either of: where 7E is the hexadecimal ASCII code point for '~'. Here is an example of decoding such a URL under CCSID 1047: - my $url = 'http://www.pvhp.com/%7Epvhp/'; + $url = 'http://www.pvhp.com/%7Epvhp/'; # this array assumes code page 1047 my @a2e_1047 = ( 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 21, 11, 12, 13, 14, 15, @@ -1097,7 +1014,7 @@ of decoding such a URL under CCSID 1047: Conversely, here is a partial solution for the task of encoding such a URL under the 1047 code page: - my $url = 'http://www.pvhp.com/~pvhp/'; + $url = 'http://www.pvhp.com/~pvhp/'; # this array assumes code page 1047 my @e2a_1047 = ( 0, 1, 2, 3,156, 9,134,127,151,141,142, 11, 12, 13, 14, 15, @@ -1117,7 +1034,7 @@ a URL under the 1047 code page: 92,247, 83, 84, 85, 86, 87, 88, 89, 90,178,212,214,210,211,213, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,179,219,220,217,218,159 ); - # The following regular expression does not address the + # The following regular expression does not address the # mappings for: ('.' => '%2E', '/' => '%2F', ':' => '%3A') $url =~ s/([\t "#%&\(\),;<=>\?\@\[\\\]^`{|}~])/sprintf("%%%02X",$e2a_1047[ord($1)])/ge; @@ -1134,13 +1051,10 @@ The C<u> template to pack() or unpack() will render EBCDIC data in EBCDIC characters equivalent to their ASCII counterparts. For example, the following will print "Yes indeed\n" on either an ASCII or EBCDIC computer: - my $all_byte_chrs = ''; - - $all_byte_chrs .= chr($_) foreach 0 .. 255; - - my $uuencode_byte_chrs = pack('u', $all_byte_chrs); - - (my $uu = <<' ENDOFHEREDOC') =~ s/^\s*//gm; + $all_byte_chrs = ''; + for (0..255) { $all_byte_chrs .= chr($_); } + $uuencode_byte_chrs = pack('u', $all_byte_chrs); + ($uu = <<' ENDOFHEREDOC') =~ s/^\s*//gm; M``$"`P0%!@<("0H+#`T.#Q`1$A,4%187&!D:&QP='A\@(2(C)"4F)R@I*BLL M+2XO,#$R,S0U-C<X.3H[/#T^/T!!0D-$149'2$E*2TQ-3D]045)35%565UA9 M6EM<75Y?8&%B8V1E9F=H:6IK;&UN;W!Q<G-T=79W>'EZ>WQ]?G^`@8*#A(6& @@ -1148,22 +1062,21 @@ following will print "Yes indeed\n" on either an ASCII or EBCDIC computer: MM+6VM[BYNKN\O;Z_P,'"P\3%QL?(R<K+S,W.S]#1TM/4U=;7V-G:V]S=WM_@ ?X>+CY.7FY^CIZNOL[>[O\/'R\_3U]O?X^?K[_/W^_P`` ENDOFHEREDOC - if ( $uuencode_byte_chrs eq $uu ) { + if ($uuencode_byte_chrs eq $uu) { print "Yes "; } $uudecode_byte_chrs = unpack('u', $uuencode_byte_chrs); - if ( $uudecode_byte_chrs eq $all_byte_chrs ) { + if ($uudecode_byte_chrs eq $all_byte_chrs) { print "indeed\n"; } Here is a very spartan uudecoder that will work on EBCDIC provided that the @e2a array is filled in appropriately: - #!/usr/bin/perl - my @e2a = ( - # this must be filled in - ); - $_ = <> until my($mode,$file) = /^begin\s*(\d*)\s*(\S*)/; + #!/usr/local/bin/perl + @e2a = ( # this must be filled in + ); + $_ = <> until ($mode,$file) = /^begin\s*(\d*)\s*(\S*)/; open(OUT, "> $file") if $file ne ""; while(<>) { last if /^end/; @@ -1182,7 +1095,7 @@ On ASCII encoded machines it is possible to strip characters outside of the printable set using: # This QP encoder works on ASCII only - my $qp_string =~ s/([=\x00-\x1F\x80-\xFF])/sprintf("=%02X",ord($1))/ge; + $qp_string =~ s/([=\x00-\x1F\x80-\xFF])/sprintf("=%02X",ord($1))/ge; Whereas a QP encoder that works on both ASCII and EBCDIC machines would look somewhat like the following (where the EBCDIC branch @e2a @@ -1191,14 +1104,12 @@ array is omitted for brevity): if (ord('A') == 65) { # ASCII $delete = "\x7F"; # ASCII @e2a = (0 .. 255) # ASCII to ASCII identity map - - } else { # EBCDIC + } + else { # EBCDIC $delete = "\x07"; # EBCDIC - @e2a = ( - # EBCDIC to ASCII map (as shown above) - ); + @e2a = # EBCDIC to ASCII map (as shown above) } - my $qp_string =~ + $qp_string =~ s/([^ !"\#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~$delete])/sprintf("=%02X",$e2a[ord($1)])/ge; (although in production code the substitutions might be done @@ -1233,14 +1144,14 @@ work on ASCII and EBCDIC machines: #!/usr/local/bin/perl - while ( <> ) { + while(<>){ tr/n-za-mN-ZA-M/a-zA-Z/; print; } In one-liner form: - perl -pe 'tr/n-za-mN-ZA-M/a-zA-Z/' + perl -ne 'tr/n-za-mN-ZA-M/a-zA-Z/;print' =head1 Hashing order and checksums @@ -1386,3 +1297,5 @@ Thanks also to Vickie Cooper, Philip Newton, William Raffloer, and Joe Smith. Trademarks, registered trademarks, service marks and registered service marks used in this document are the property of their respective owners. + + |