diff options
author | Gustavo André dos Santos Lopes <cataphract@php.net> | 2010-10-24 19:05:21 +0000 |
---|---|---|
committer | Gustavo André dos Santos Lopes <cataphract@php.net> | 2010-10-24 19:05:21 +0000 |
commit | 03de44f23e454423d676024b33ce0c4c806766bd (patch) | |
tree | 3f487886d7c2a13a9bb515d981d2cd30b7b7655f | |
parent | 3f804701b50e73eb50e43aaca4644695844f1625 (diff) | |
download | php-git-03de44f23e454423d676024b33ce0c4c806766bd.tar.gz |
- Removed out-of-date comments and one enum constant from html_tables.h and its generator.
- Other minor aesthetic improvements in the generator.
-rw-r--r-- | ext/standard/html_tables.h | 69 | ||||
-rw-r--r-- | ext/standard/html_tables/html_table_gen.php | 84 |
2 files changed, 68 insertions, 85 deletions
diff --git a/ext/standard/html_tables.h b/ext/standard/html_tables.h index 8d4de82c5a..f9674a1c37 100644 --- a/ext/standard/html_tables.h +++ b/ext/standard/html_tables.h @@ -1,4 +1,4 @@ -/* +/* +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ @@ -28,17 +28,9 @@ *************************************************************************** **************************************************************************/ -/* cs_terminator is overloaded in the following fashion: - * - It terminates the list entity maps. - * - In BG(inverse_ent_maps), it's the key of the inverse map that stores - * only the basic entities. - * - When passed to traverse_for_entities (or via php_unescape_entities with !all), - * we don't care about the encoding (UTF-8 is chosen, but it should be used - * when it doesn't matter). - */ -enum entity_charset { cs_terminator, cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, - cs_cp1251, cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, - cs_big5, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, +enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251, + cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5, + cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, cs_numelems /* used to count the number of charsets */ }; #define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8) @@ -49,36 +41,36 @@ static const struct { const char *codeset; enum entity_charset charset; } charset_map[] = { - { "ISO-8859-1", cs_8859_1 }, - { "ISO8859-1", cs_8859_1 }, - { "ISO-8859-15", cs_8859_15 }, - { "ISO8859-15", cs_8859_15 }, - { "utf-8", cs_utf_8 }, + { "ISO-8859-1", cs_8859_1 }, + { "ISO8859-1", cs_8859_1 }, + { "ISO-8859-15", cs_8859_15 }, + { "ISO8859-15", cs_8859_15 }, + { "utf-8", cs_utf_8 }, { "cp1252", cs_cp1252 }, - { "Windows-1252", cs_cp1252 }, - { "1252", cs_cp1252 }, + { "Windows-1252", cs_cp1252 }, + { "1252", cs_cp1252 }, { "BIG5", cs_big5 }, - { "950", cs_big5 }, + { "950", cs_big5 }, { "GB2312", cs_gb2312 }, - { "936", cs_gb2312 }, + { "936", cs_gb2312 }, { "BIG5-HKSCS", cs_big5hkscs }, { "Shift_JIS", cs_sjis }, - { "SJIS", cs_sjis }, - { "932", cs_sjis }, - { "EUCJP", cs_eucjp }, - { "EUC-JP", cs_eucjp }, - { "KOI8-R", cs_koi8r }, - { "koi8-ru", cs_koi8r }, - { "koi8r", cs_koi8r }, - { "cp1251", cs_cp1251 }, - { "Windows-1251", cs_cp1251 }, - { "win-1251", cs_cp1251 }, - { "iso8859-5", cs_8859_5 }, - { "iso-8859-5", cs_8859_5 }, - { "cp866", cs_cp866 }, - { "866", cs_cp866 }, - { "ibm866", cs_cp866 }, - { "MacRoman", cs_macroman }, + { "SJIS", cs_sjis }, + { "932", cs_sjis }, + { "EUCJP", cs_eucjp }, + { "EUC-JP", cs_eucjp }, + { "KOI8-R", cs_koi8r }, + { "koi8-ru", cs_koi8r }, + { "koi8r", cs_koi8r }, + { "cp1251", cs_cp1251 }, + { "Windows-1251", cs_cp1251 }, + { "win-1251", cs_cp1251 }, + { "iso8859-5", cs_8859_5 }, + { "iso-8859-5", cs_8859_5 }, + { "cp866", cs_cp866 }, + { "866", cs_cp866 }, + { "ibm866", cs_cp866 }, + { "MacRoman", cs_macroman }, { NULL } }; @@ -476,7 +468,6 @@ static const enc_to_uni enc_to_uni_macroman = { /* {{{ Index of tables for encoding conversion */ static const enc_to_uni *const enc_to_uni_index[cs_numelems] = { NULL, - NULL, &enc_to_uni_iso88591, &enc_to_uni_win1252, &enc_to_uni_iso885915, @@ -1144,7 +1135,7 @@ typedef struct { const entity_stage3_row *table; } entity_table_opt; -/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistentcy's sake. */ +/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */ /* {{{ Start of HTML5 multi-stage table for codepoint -> entity */ diff --git a/ext/standard/html_tables/html_table_gen.php b/ext/standard/html_tables/html_table_gen.php index 35be2d9afc..f095202bc6 100644 --- a/ext/standard/html_tables/html_table_gen.php +++ b/ext/standard/html_tables/html_table_gen.php @@ -51,17 +51,9 @@ $t = <<<CODE *************************************************************************** **************************************************************************/ -/* cs_terminator is overloaded in the following fashion: - * - It terminates the list entity maps. - * - In BG(inverse_ent_maps), it's the key of the inverse map that stores - * only the basic entities. - * - When passed to traverse_for_entities (or via php_unescape_entities with !all), - * we don't care about the encoding (UTF-8 is chosen, but it should be used - * when it doesn't matter). - */ -enum entity_charset { cs_terminator, cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, - cs_cp1251, cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, - cs_big5, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, +enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251, + cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5, + cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, cs_numelems /* used to count the number of charsets */ }; #define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8) @@ -72,36 +64,36 @@ static const struct { const char *codeset; enum entity_charset charset; } charset_map[] = { - { "ISO-8859-1", cs_8859_1 }, - { "ISO8859-1", cs_8859_1 }, - { "ISO-8859-15", cs_8859_15 }, - { "ISO8859-15", cs_8859_15 }, - { "utf-8", cs_utf_8 }, + { "ISO-8859-1", cs_8859_1 }, + { "ISO8859-1", cs_8859_1 }, + { "ISO-8859-15", cs_8859_15 }, + { "ISO8859-15", cs_8859_15 }, + { "utf-8", cs_utf_8 }, { "cp1252", cs_cp1252 }, - { "Windows-1252", cs_cp1252 }, - { "1252", cs_cp1252 }, + { "Windows-1252", cs_cp1252 }, + { "1252", cs_cp1252 }, { "BIG5", cs_big5 }, - { "950", cs_big5 }, + { "950", cs_big5 }, { "GB2312", cs_gb2312 }, - { "936", cs_gb2312 }, + { "936", cs_gb2312 }, { "BIG5-HKSCS", cs_big5hkscs }, { "Shift_JIS", cs_sjis }, - { "SJIS", cs_sjis }, - { "932", cs_sjis }, - { "EUCJP", cs_eucjp }, - { "EUC-JP", cs_eucjp }, - { "KOI8-R", cs_koi8r }, - { "koi8-ru", cs_koi8r }, - { "koi8r", cs_koi8r }, - { "cp1251", cs_cp1251 }, - { "Windows-1251", cs_cp1251 }, - { "win-1251", cs_cp1251 }, - { "iso8859-5", cs_8859_5 }, - { "iso-8859-5", cs_8859_5 }, - { "cp866", cs_cp866 }, - { "866", cs_cp866 }, - { "ibm866", cs_cp866 }, - { "MacRoman", cs_macroman }, + { "SJIS", cs_sjis }, + { "932", cs_sjis }, + { "EUCJP", cs_eucjp }, + { "EUC-JP", cs_eucjp }, + { "KOI8-R", cs_koi8r }, + { "koi8-ru", cs_koi8r }, + { "koi8r", cs_koi8r }, + { "cp1251", cs_cp1251 }, + { "Windows-1251", cs_cp1251 }, + { "win-1251", cs_cp1251 }, + { "iso8859-5", cs_8859_5 }, + { "iso-8859-5", cs_8859_5 }, + { "cp866", cs_cp866 }, + { "866", cs_cp866 }, + { "ibm866", cs_cp866 }, + { "MacRoman", cs_macroman }, { NULL } }; @@ -132,51 +124,51 @@ echo $t; $encodings = array( array( "ident" => "iso88591", - "enumid" => 2, + "enumid" => 1, "name" => "ISO-8859-1", "file" => "mappings/8859-1.TXT", ), array( "ident" => "iso88595", - "enumid" => 6, + "enumid" => 5, "name" => "ISO-8859-5", "file" => "mappings/8859-5.TXT", ), array( "ident" => "iso885915", - "enumid" => 4, + "enumid" => 3, "name" => "ISO-8859-15", "file" => "mappings/8859-15.TXT", ), array( "ident" => "win1252", - "enumid" => 3, + "enumid" => 2, "enumident" => "cp1252", "name" => "Windows-1252", "file" => "mappings/CP1252.TXT", ), array( "ident" => "win1251", - "enumid" => 5, + "enumid" => 4, "enumident" => "cp1252", "name" => "Windows-1251", "file" => "mappings/CP1251.TXT", ), array( "ident" => "koi8r", - "enumid" => 9, + "enumid" => 8, "name" => "KOI8-R", "file" => "mappings/KOI8-R.TXT", ), array( "ident" => "cp866", - "enumid" => 7, + "enumid" => 6, "name" => "CP-866", "file" => "mappings/CP866.TXT", ), array( "ident" => "macroman", - "enumid" => 8, + "enumid" => 7, "name" => "MacRoman", "file" => "mappings/ROMAN.TXT", ), @@ -336,7 +328,7 @@ foreach ($encodings as $e) { $lines = explode("\n", file_get_contents($e{'file'})); foreach ($lines as $l) { if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})\s+#\s*(.*)$/i", $l, $matches)) - $map[] = array($matches[1], $matches[2], $matches[3]); + $map[] = array($matches[1], $matches[2], rtrim($matches[3])); } $mappy = array(); @@ -420,7 +412,7 @@ typedef struct { const entity_stage3_row *table; } entity_table_opt; -/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistentcy's sake. */ +/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */ CODE; |