summaryrefslogtreecommitdiff
path: root/eg/unuc
diff options
context:
space:
mode:
Diffstat (limited to 'eg/unuc')
-rwxr-xr-xeg/unuc186
1 files changed, 186 insertions, 0 deletions
diff --git a/eg/unuc b/eg/unuc
new file mode 100755
index 0000000000..ae5c65285d
--- /dev/null
+++ b/eg/unuc
@@ -0,0 +1,186 @@
+#!/usr/bin/perl
+
+print STDERR "Loading proper nouns...\n";
+open(DICT,"/usr/dict/words") || die "Can't find /usr/dict/words: $!\n";
+while (<DICT>) {
+ if (/^[A-Z]/) {
+ chop;
+ ($lower = $_) =~ y/A-Z/a-z/;
+ $proper{$lower} = $_;
+ }
+}
+close DICT;
+print STDERR "Loading exceptions...\n";
+
+$prog = <<'EOT';
+while (<>) {
+ next if /[a-z]/;
+ y/A-Z/a-z/;
+ s/(\w+)/$proper{$1} ? $proper{$1} : $1/eg;
+ s/^(\s*)([a-z])/$1 . (($tmp = $2) =~ y:a-z:A-Z:,$tmp)/e;
+ s/([-.?!]["']?(\n\s*| \s*)["']?)([a-z])/$1 . (($tmp = $3) =~ y:a-z:A-Z:,$tmp)/eg;
+ s/\b([b-df-hj-np-tv-xz]+)\b/(($tmp = $1) =~ y:a-z:A-Z:,$tmp)/eg;
+ s/([a-z])'([SDT])\b/$1 . "'" . (($tmp = $2) =~ y:A-Z:a-z:,$tmp)/eg;
+EOT
+while (<DATA>) {
+ chop;
+ next if /^$/;
+ next if /^#/;
+ if (! /;$/) {
+ $foo = $_;
+ $foo =~ y/A-Z/a-z/;
+ print STDERR "Dup $_\n" if $proper{$foo};
+ $foo =~ s/([^\w ])/\\$1/g;
+ $foo =~ s/ /(\\s+)/g;
+ $foo = "\\b" . $foo if $foo =~ /^\w/; # XXX till patch 9
+ $foo .= "\\b" if $foo =~ /\w$/;
+ $i = 0;
+ ($bar = $_) =~ s/ /'$' . ++$i/eg;
+ $_ = "s/$foo/$bar/gi;";
+ }
+ $prog .= ' ' . $_ . "\n";
+}
+$prog .= "}\ncontinue {\n print;\n}\n";
+
+$/ = '';
+#print $prog;
+eval $prog; die $@ if $@;
+__END__
+A.M.
+Air Force
+Air Force Base
+Air Force Station
+American
+Apr.
+Ariane
+Aug.
+August
+Bureau of Labor Statistics
+CIT
+Caltech
+Cape Canaveral
+Challenger
+China
+Corporation
+Crippen
+Daily News in Brief
+Daniel Quayle
+Dec.
+Discovery
+Edwards
+Endeavour
+Feb.
+Ford Aerospace
+Fri.
+General Dynamics
+George Bush
+Headline News
+HOTOL
+I
+II
+III
+IV
+IX
+Institute of Technology
+JPL
+Jan.
+Jul.
+Jun.
+Kennedy Space Center
+LDEF
+Long Duration Exposure Facility
+Long March
+Mar.
+March
+Martin
+Martin Marietta
+Mercury
+Mon.
+in May
+s/\bmay (\d)/May $1/g;
+s/\boffice of (\w)/'Office of ' . (($tmp = $1) =~ y:a-z:A-Z:,$tmp)/eg;
+National Science Foundation
+NASA Select
+New Mexico
+Nov.
+OMB
+Oct.
+Office of Management and Budget
+President
+President Bush
+Richard Truly
+Rocketdyne
+Russian
+Russians
+Sat.
+Sep.
+Soviet
+Soviet Union
+Soviets
+Space Shuttle
+Sun.
+Thu.
+Tue.
+U.S.
+Union of Soviet Socialist Republics
+United States
+VI
+VII
+VIII
+Vice President
+Vice President Quayle
+Wed.
+White Sands
+Kaman Aerospace
+Aerospace Daily
+Aviation Week
+Space Technology
+Washington Post
+Los Angeles Times
+New York Times
+Aerospace Industries Association
+president of
+Johnson Space Center
+Space Services
+Inc.
+Co.
+Hughes Aircraft
+Company
+Orbital Sciences
+Swedish Space
+Arnauld
+Nicogosian
+Magellan
+Galileo
+Mir
+Jet Propulsion Laboratory
+University
+Department of Defense
+Orbital Science
+OMS
+United Press International
+United Press
+UPI
+Associated Press
+AP
+Cable News Network
+Cape York
+Zenit
+SYNCOM
+Eastern
+Western
+Test Range
+Jcsat
+Japanese Satellite Communications
+Defence Ministry
+Defense Ministry
+Skynet
+Fixed Service Structure
+Launch Processing System
+Asiasat
+Launch Control Center
+Earth
+CNES
+Glavkosmos
+Pacific
+Atlantic