summaryrefslogtreecommitdiff
path: root/lib/Lingua/KO/Hangul/Util.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lingua/KO/Hangul/Util.pm')
-rw-r--r--lib/Lingua/KO/Hangul/Util.pm278
1 files changed, 0 insertions, 278 deletions
diff --git a/lib/Lingua/KO/Hangul/Util.pm b/lib/Lingua/KO/Hangul/Util.pm
deleted file mode 100644
index 3848592903..0000000000
--- a/lib/Lingua/KO/Hangul/Util.pm
+++ /dev/null
@@ -1,278 +0,0 @@
-package Lingua::KO::Hangul::Util;
-
-use 5.006;
-use strict;
-use warnings;
-
-require Exporter;
-
-our @ISA = qw(Exporter);
-our %EXPORT_TAGS = ();
-our @EXPORT_OK = ();
-our @EXPORT = qw(
- decomposeHangul
- composeHangul
- getHangulName
- parseHangulName
-);
-our $VERSION = '0.02';
-
-our @JamoL = ( # Initial (HANGUL CHOSEONG)
- "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
- "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H",
- );
-
-our @JamoV = ( # Medial (HANGUL JUNGSEONG)
- "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
- "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
- "YU", "EU", "YI", "I",
- );
-
-our @JamoT = ( # Final (HANGUL JONGSEONG)
- "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
- "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
- "S", "SS", "NG", "J", "C", "K", "T", "P", "H",
- );
-
-our $BlockName = "HANGUL SYLLABLE ";
-
-use constant SBase => 0xAC00;
-use constant LBase => 0x1100;
-use constant VBase => 0x1161;
-use constant TBase => 0x11A7;
-use constant LCount => 19; # scalar @JamoL
-use constant VCount => 21; # scalar @JamoV
-use constant TCount => 28; # scalar @JamoT
-use constant NCount => 588; # VCount * TCount
-use constant SCount => 11172; # LCount * NCount
-use constant SFinal => 0xD7A3; # SBase -1 + SCount
-
-our(%CodeL, %CodeV, %CodeT);
-@CodeL{@JamoL} = 0 .. LCount-1;
-@CodeV{@JamoV} = 0 .. VCount-1;
-@CodeT{@JamoT} = 0 .. TCount-1;
-
-sub getHangulName {
- my $code = shift;
- return undef unless SBase <= $code && $code <= SFinal;
- my $SIndex = $code - SBase;
- my $LIndex = int( $SIndex / NCount);
- my $VIndex = int(($SIndex % NCount) / TCount);
- my $TIndex = $SIndex % TCount;
- "$BlockName$JamoL[$LIndex]$JamoV[$VIndex]$JamoT[$TIndex]";
-}
-
-sub parseHangulName {
- my $arg = shift;
- return undef unless $arg =~ s/$BlockName//o;
- return undef unless $arg =~ /^([^AEIOUWY]*)([AEIOUWY]+)([^AEIOUWY]*)$/;
- return undef unless exists $CodeL{$1}
- && exists $CodeV{$2}
- && exists $CodeT{$3};
- SBase + $CodeL{$1} * NCount + $CodeV{$2} * TCount + $CodeT{$3};
-}
-
-sub decomposeHangul {
- my $code = shift;
- return unless SBase <= $code && $code <= SFinal;
- my $SIndex = $code - SBase;
- my $LIndex = int( $SIndex / NCount);
- my $VIndex = int(($SIndex % NCount) / TCount);
- my $TIndex = $SIndex % TCount;
- my @ret = (
- LBase + $LIndex,
- VBase + $VIndex,
- $TIndex ? (TBase + $TIndex) : (),
- );
- wantarray ? @ret : pack('U*', @ret);
-}
-
-#
-# To Do:
-# s/(\p{JamoL}\p{JamoV})/toHangLV($1)/ge;
-# s/(\p{HangLV}\p{JamoT})/toHangLVT($1)/ge;
-#
-sub composeHangul {
- my $str = shift;
- return $str unless length $str;
- my(@ret);
-
- foreach my $ch (unpack('U*', $str)) # Makes list! The string be short!
- {
- push(@ret, $ch) and next unless @ret;
-
- # 1. check to see if $ret[-1] is L and $ch is V.
- my $LIndex = $ret[-1] - LBase;
- if(0 <= $LIndex && $LIndex < LCount)
- {
- my $VIndex = $ch - VBase;
- if(0 <= $VIndex && $VIndex < VCount)
- {
- $ret[-1] = SBase + ($LIndex * VCount + $VIndex) * TCount;
- next; # discard $ch
- }
- }
-
- # 2. check to see if $ret[-1] is LV and $ch is T.
- my $SIndex = $ret[-1] - SBase;
- if(0 <= $SIndex && $SIndex < SCount && $SIndex % TCount == 0)
- {
- my $TIndex = $ch - TBase;
- if(0 <= $TIndex && $TIndex < TCount)
- {
- $ret[-1] += $TIndex;
- next; # discard $ch
- }
- }
-
- # 3. just append $ch
- push(@ret, $ch);
- }
- wantarray ? @ret : pack('U*', @ret);
-}
-
-1;
-__END__
-
-=head1 NAME
-
-Lingua::KO::Hangul::Util - utility functions for Hangul Syllables
-
-=head1 SYNOPSIS
-
- use Lingua::KO::Hangul::Util;
-
- decomposeHangul(0xAC00);
- # (0x1100,0x1161) or "\x{1100}\x{1161}"
-
- composeHangul("\x{1100}\x{1161}");
- # "\x{AC00}"
-
- getHangulName(0xAC00);
- # "HANGUL SYLLABLE GA"
-
- parseHangulName("HANGUL SYLLABLE GA");
- # 0xAC00
-
-=head1 DESCRIPTION
-
-A Hangul syllable consists of Hangul Jamo.
-
-Hangul Jamo are classified into three classes:
-
- CHOSEONG (the initial sound) as a leading consonant (L),
- JUNGSEONG (the medial sound) as a vowel (V),
- JONGSEONG (the final sound) as a trailing consonant (T).
-
-Any Hangul syllable is a composition of
-
- i) CHOSEONG + JUNGSEONG (L + V)
-
- or
-
- ii) CHOSEONG + JUNGSEONG + JONGSEONG (L + V + T).
-
-Names of Hangul Syllables have a format of C<"HANGUL SYLLABLE %s">.
-
-=head2 Composition and Decomposition
-
-=over 4
-
-=item C<$string_decomposed = decomposeHangul($codepoint)>
-
-=item C<@codepoints = decomposeHangul($codepoint)>
-
-Accepts unicode codepoint integer.
-
-If the specified codepoint is of a Hangul syllable,
-returns a list of codepoints (in a list context)
-or a UTF-8 string (in a scalar context)
-of its decomposition.
-
- decomposeHangul(0xAC00) # U+AC00 is HANGUL SYLLABLE GA.
- returns "\x{1100}\x{1161}" or (0x1100, 0x1161);
-
- decomposeHangul(0xAE00) # U+AE00 is HANGUL SYLLABLE GEUL.
- returns "\x{1100}\x{1173}\x{11AF}" or (0x1100, 0x1173, 0x11AF);
-
-Otherwise, returns false (empty string or empty list).
-
- decomposeHangul(0x0041) # outside Hangul Syllables
- returns empty string or empty list.
-
-=item C<$string_composed = composeHangul($src_string)>
-
-=item C<@codepoints_composed = composeHangul($src_string)>
-
-Any sequence of an initial Jamo C<L> and a medial Jamo C<V>
-is composed into a syllable C<LV>;
-then any sequence of a syllable C<LV> and a final Jamo C<T>
-is composed into a syllable C<LVT>.
-
-Any characters other than Hangul Jamo and Hangul Syllables
-are unaffected.
-
- composeHangul("Hangul \x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}.")
- returns "Hangul \x{AC00}\x{AE00}." or
- (0x48,0x61,0x6E,0x67,0x75,0x6C,0x20,0xAC00,0xAE00,0x2E);
-
-=back
-
-=head2 Hangul Syllable Name
-
-=over 4
-
-=item C<$name = getHangulName($codepoint)>
-
-If the specified codepoint is of a Hangul syllable,
-returns its name; otherwise returns undef.
-
- getHangulName(0xAC00) returns "HANGUL SYLLABLE GA";
- getHangulName(0x0041) returns undef.
-
-=item C<$codepoint = parseHangulName($name)>
-
-If the specified name is of a Hangul syllable,
-returns its codepoint; otherwise returns undef.
-
- parseHangulName("HANGUL SYLLABLE GEUL") returns 0xAE00;
-
- parseHangulName("LATIN SMALL LETTER A") returns undef;
-
- parseHangulName("HANGUL SYLLABLE PERL") returns undef;
- # Regrettably, HANGUL SYLLABLE PERL does not exist :-)
-
-=back
-
-=head2 EXPORT
-
-By default,
-
- decomposeHangul
- composeHangul
- getHangulName
- parseHangulName
-
-=head1 AUTHOR
-
-SADAHIRO Tomoyuki
-
- bqw10602@nifty.com
- http://homepage1.nifty.com/nomenclator/perl/
-
- Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved.
-
- This program is free software; you can redistribute it and/or
- modify it under the same terms as Perl itself.
-
-=head1 SEE ALSO
-
-=over 4
-
-=item http://www.unicode.org/unicode/reports/tr15
-
-Annex 10: Hangul, in Unicode Normalization Forms (UAX #15).
-
-=back
-
-=cut