--TEST-- Exhaustive test of Shift-JIS DoCoMo, KDDI, SoftBank encoding verification and conversion --SKIPIF-- --FILE-- = 4) { if (sscanf($fields[0], "%x %x", $cp1, $cp2) == 2) { $utf32 = pack('N', $cp1) . pack('N', $cp2); } else { $utf32 = pack('N', hexdec($fields[0])); unset($invalidCodepoints[$utf32]); } if ($fields[1]) $docomo[pack('n', hexdec($fields[1]))] = $utf32; if ($fields[2]) $kddi[pack('n', hexdec($fields[2]))] = $utf32; if ($fields[3]) { $bytes = pack('n', hexdec($fields[3])); $sbEmoji[$bytes] = $utf32; unset($nonInvertibleSoftbank[$bytes]); } } } /* Other, vendor-specific emoji which do not appear in EmojiSources.txt * Most of these don't exist in Unicode and have been mapped to 'private * area' codepoints */ $docomo["\xF9\x4A"] = "\x00\x0F\xEE\x16"; // PIAS PI $docomo["\xF9\x4B"] = "\x00\x0F\xEE\x17"; // PIAS A $docomo["\xF9\x4C"] = "\x00\x0F\xEE\x18"; // INVERSE TICKET $docomo["\xF9\x4D"] = "\x00\x0F\xEE\x19"; // KATAKANA ABBREVIATION FOR TICKET ("chi ke") $docomo["\xF9\x4E"] = "\x00\x0F\xEE\x1A"; // RESERVE BY PHONE $docomo["\xF9\x4F"] = "\x00\x0F\xEE\x1B"; // P CODE $docomo["\xF9\x53"] = "\x00\x0F\xEE\x1C"; // MOVIES 2 $docomo["\xF9\x54"] = "\x00\x0F\xEE\x1D"; // PIAS PI INVERSE $docomo["\xF9\x58"] = "\x00\x0F\xEE\x1E"; // PIAS PI CIRCLE $docomo["\xF9\x59"] = "\x00\x0F\xEE\x1F"; // PIAS PI SQUARE $docomo["\xF9\x5A"] = "\x00\x0F\xEE\x20"; // CHECK $docomo["\xF9\x5F"] = "\x00\x0F\xEE\x21"; // F $docomo["\xF9\x60"] = "\x00\x0F\xEE\x22"; // D $docomo["\xF9\x61"] = "\x00\x0F\xEE\x23"; // S $docomo["\xF9\x62"] = "\x00\x0F\xEE\x24"; // C $docomo["\xF9\x63"] = "\x00\x0F\xEE\x25"; // R $docomo["\xF9\x64"] = "\x00\x00\x25\xEA"; // SQUARE WITH LOWER RIGHT DIAGONAL HALF BLACK $nonInvertibleDocomo["\xF9\x64"] = "\x00\x00\x25\xEA"; $docomo["\xF9\x65"] = "\x00\x00\x25\xA0"; // BLACK SQUARE $nonInvertibleDocomo["\xF9\x65"] = "\x00\x00\x25\xA0"; $docomo["\xF9\x66"] = "\x00\x00\x25\xBF"; // DOWNWARD TRIANGLE $nonInvertibleDocomo["\xF9\x66"] = "\x00\x00\x25\xBF"; /* TODO: test that FEE28 converts to F966, for backwards compatibility */ $docomo["\xF9\x67"] = "\x00\x0F\xEE\x29"; // QUADRUPLE DAGGER $docomo["\xF9\x68"] = "\x00\x0F\xEE\x2A"; // TRIPLE DAGGER $docomo["\xF9\x69"] = "\x00\x0F\xEE\x2B"; // DOUBLE DAGGER $docomo["\xF9\x6A"] = "\x00\x00\x20\x20"; // DAGGER $nonInvertibleDocomo["\xF9\x6A"] = "\x00\x00\x20\x20"; /* TODO: test that FEE2C converts to F96A, for backwards compatibility */ $docomo["\xF9\x6B"] = "\x00\x0F\xEE\x2D"; // I (meaning "inexpensive") $docomo["\xF9\x6C"] = "\x00\x0F\xEE\x2E"; // M (meaning "moderate") $docomo["\xF9\x6D"] = "\x00\x0F\xEE\x2F"; // E (meaning "expensive") $docomo["\xF9\x6E"] = "\x00\x0F\xEE\x30"; // VE (meaning "very expensive") $docomo["\xF9\x6F"] = "\x00\x0F\xEE\x31"; // SPHERE $docomo["\xF9\x70"] = "\x00\x0F\xEE\x32"; // CREDIT CARDS NOT ACCEPTED $docomo["\xF9\x71"] = "\x00\x0F\xEE\x33"; // CHECKBOX $docomo["\xF9\x75"] = "\x00\x0F\xEE\x10"; // I-MODE $docomo["\xF9\x76"] = "\x00\x0F\xEE\x11"; // I-MODE WITH FRAME $docomo["\xF9\x78"] = "\x00\x0F\xEE\x12"; // PROVIDED BY DOCOMO $docomo["\xF9\x79"] = "\x00\x0F\xEE\x13"; // DOCOMO POINT $docomo["\xF9\x84"] = "\x00\x00\x27\xBF"; // FREE DIAL; mapped to DOUBLE CURLY LOOP unset($invalidCodepoints["\x00\x00\x27\xBF"]); $docomo["\xF9\x86"] = "\x00\x0F\xE8\x2D"; // MOBILE Q $docomo["\xF9\xB1"] = "\x00\x0F\xEE\x14"; // I-APPLI $docomo["\xF9\xB2"] = "\x00\x0F\xEE\x15"; // I-APPLI WITH BORDER $kddi["\xF7\x94"] = "\x00\x0F\xEE\x40"; // EZ WEB $kddi["\xF7\xCF"] = "\x00\x0F\xEE\x41"; // EZ PLUS $kddi["\xF3\x70"] = "\x00\x0F\xEE\x42"; // EZ NAVIGATION $kddi["\xF4\x78"] = "\x00\x0F\xEE\x43"; // EZ MOVIE $kddi["\xF4\x86"] = "\x00\x0F\xEE\x44"; // CMAIL $kddi["\xF4\x8E"] = "\x00\x0F\xEE\x45"; // JAVA (TM) $kddi["\xF4\x8F"] = "\x00\x0F\xEE\x46"; // BREW $kddi["\xF4\x90"] = "\x00\x0F\xEE\x47"; // EZ RING MUSIC $kddi["\xF4\x91"] = "\x00\x0F\xEE\x48"; // EZ NAVI $kddi["\xF4\x92"] = "\x00\x0F\xEE\x49"; // WIN $kddi["\xF4\x93"] = "\x00\x0F\xEE\x4A"; // PREMIUM SIGN $kddi["\xF7\x48"] = "\x00\x0F\xE8\x2D"; // MOBILE Q $kddi["\xF7\xA3"] = "\x00\x0F\xE8\x3C"; // PDC ("personal digital cellular") $kddi["\xF7\xD2"] = "\x00\x0F\xEB\x89"; // OPENWAVE $sbEmoji["\xF7\xB1"] = "\x00\x00\x27\xBF"; // FREE DIAL; mapped to DOUBLE CURLY $sbEmoji["\xF7\xF4"] = "\x00\x0F\xEE\x77"; // J-PHONE SHOP $sbEmoji["\xF7\xF5"] = "\x00\x0F\xEE\x78"; // SKY WEB $sbEmoji["\xF7\xF6"] = "\x00\x0F\xEE\x79"; // SKY WALKER $sbEmoji["\xF7\xF7"] = "\x00\x0F\xEE\x7A"; // SKY MELODY $sbEmoji["\xF7\xF8"] = "\x00\x0F\xEE\x7B"; // J-PHONE 1 $sbEmoji["\xF7\xF9"] = "\x00\x0F\xEE\x7C"; // J-PHONE 2 $sbEmoji["\xF7\xFA"] = "\x00\x0F\xEE\x7D"; // J-PHONE 3 /* SoftBank-specific 'JSky1', 'JSky2', 'VODAFONE1', 'VODAFONE2', etc. emoji, * which are not supported by Unicode */ for ($i = 0xFBD8; $i <= 0xFBDE; $i++) { $bytes = pack('n', $i); $sbEmoji[$bytes] = pack('N', 0xFEE70 + $i - 0xFBD8); unset($nonInvertibleSoftbank[$bytes]); } /* SoftBank-specific emoji for Shibuya department store */ $sbEmoji["\xFB\xAA"] = "\x00\x0F\xE4\xC5"; unset($nonInvertibleSoftbank["\xFB\xAA"]); $softbank = array_merge($softbank, $sbEmoji); /* For Softbank, we support an alternative representation for emoji which * uses sequences starting with ESC. Apparently this was used in older * versions of Softbank's phones. * ESC could be followed by 6 different ASCII characters, each of which * represented a different ku code */ $escCodeToKu = array('G' => 0x91, 'E' => 0x8D, 'F' => 0x8E, 'O' => 0x92, 'P' => 0x95, 'Q' => 0x96); $escCodeMaxTen = array('G' => 0x7A, 'E' => 0x7A, 'F' => 0x7A, 'O' => 0x6D, 'P' => 0x6C, 'Q' => 0x5E); function shiftJISEncode($ku, $ten) { $ku -= 0x21; $ten -= 0x21; $hiBits = $ku >> 1; $loBit = $ku % 2; if ($hiBits < 31) { $sjis = chr($hiBits + 0x81); } else { $sjis = chr($hiBits - 31 + 0xE0); } if ($loBit == 0) { if ($ten < 63) return $sjis . chr($ten + 0x40); else return $sjis . chr($ten - 63 + 0x80); } else { return $sjis . chr($ten + 0x9F); } } foreach ($escCodeToKu as $char => $ku) { for ($ten = 0x21; $ten <= $escCodeMaxTen[$char]; $ten++) { $sjis = shiftJISEncode($ku, $ten); if (isset($sbEmoji[$sjis])) { $bytes = "\x1B\$" . $char . chr($ten); $unicode = $softbank[$sjis]; $nonInvertibleSoftbank[$bytes] = $softbank[$bytes] = $unicode; } } } /* A bare ESC is not valid for Softbank, since it is used for escape sequences * which represent emoji */ unset($softbank["\x1B"]); function testSJISVariant($validChars, $nonInvertible, $encoding) { global $fromUnicode, $invalidCodepoints, $escCodeToKu; $lenTable = array_fill_keys(range(0xE0, 0xFC), 2) + array_fill_keys(range(0x81, 0x9F), 2); findInvalidChars($validChars, $invalidChars, $truncated, $lenTable); foreach ($escCodeToKu as $char => $unused) { unset($invalidChars["\x1B\$" . $char . "\x0F"]); unset($truncated["\x1B\$" . $char]); } $escapes = []; foreach ($nonInvertible as $bytes => $unicode) { unset($validChars[$bytes]); if (substr($bytes, 0, 1) === "\x1B") array_push($escapes, $bytes); } /* 0xF is used to terminate a run of emoji encoded using ESC sequence * We couldn't do this earlier or `findInvalidChars` wouldn't have worked * as desired */ foreach ($escapes as $bytes) { $nonInvertible[$bytes . "\x0F"] = $nonInvertible[$bytes]; unset($nonInvertible[$bytes]); } testAllValidChars($validChars, $encoding, 'UTF-32BE'); testAllValidChars($nonInvertible, $encoding, 'UTF-32BE', false); echo "$encoding verification and conversion works on all valid characters\n"; testAllInvalidChars($invalidChars, $validChars, $encoding, 'UTF-32BE', "\x00\x00\x00%"); testTruncatedChars($truncated, $encoding, 'UTF-32BE', "\x00\x00\x00%"); echo "$encoding verification and conversion works on all invalid characters\n"; convertAllInvalidChars($invalidCodepoints, $fromUnicode, 'UTF-32BE', $encoding, '%'); echo "Unicode -> $encoding conversion works on all invalid codepoints\n"; } testSJISVariant($docomo, $nonInvertibleDocomo, 'SJIS-Mobile#DOCOMO'); testSJISVariant($kddi, $nonInvertible, 'SJIS-Mobile#KDDI'); testSJISVariant($softbank, $nonInvertibleSoftbank, 'SJIS-Mobile#SOFTBANK'); ?> --EXPECT-- SJIS-Mobile#DOCOMO verification and conversion works on all valid characters SJIS-Mobile#DOCOMO verification and conversion works on all invalid characters Unicode -> SJIS-Mobile#DOCOMO conversion works on all invalid codepoints SJIS-Mobile#KDDI verification and conversion works on all valid characters SJIS-Mobile#KDDI verification and conversion works on all invalid characters Unicode -> SJIS-Mobile#KDDI conversion works on all invalid codepoints SJIS-Mobile#SOFTBANK verification and conversion works on all valid characters SJIS-Mobile#SOFTBANK verification and conversion works on all invalid characters Unicode -> SJIS-Mobile#SOFTBANK conversion works on all invalid codepoints