summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGustavo André dos Santos Lopes <cataphract@php.net>2010-10-24 19:05:21 +0000
committerGustavo André dos Santos Lopes <cataphract@php.net>2010-10-24 19:05:21 +0000
commit03de44f23e454423d676024b33ce0c4c806766bd (patch)
tree3f487886d7c2a13a9bb515d981d2cd30b7b7655f
parent3f804701b50e73eb50e43aaca4644695844f1625 (diff)
downloadphp-git-03de44f23e454423d676024b33ce0c4c806766bd.tar.gz
- Removed out-of-date comments and one enum constant from html_tables.h and its generator.
- Other minor aesthetic improvements in the generator.
-rw-r--r--ext/standard/html_tables.h69
-rw-r--r--ext/standard/html_tables/html_table_gen.php84
2 files changed, 68 insertions, 85 deletions
diff --git a/ext/standard/html_tables.h b/ext/standard/html_tables.h
index 8d4de82c5a..f9674a1c37 100644
--- a/ext/standard/html_tables.h
+++ b/ext/standard/html_tables.h
@@ -1,4 +1,4 @@
-/*
+/*
+----------------------------------------------------------------------+
| PHP Version 5 |
+----------------------------------------------------------------------+
@@ -28,17 +28,9 @@
***************************************************************************
**************************************************************************/
-/* cs_terminator is overloaded in the following fashion:
- * - It terminates the list entity maps.
- * - In BG(inverse_ent_maps), it's the key of the inverse map that stores
- * only the basic entities.
- * - When passed to traverse_for_entities (or via php_unescape_entities with !all),
- * we don't care about the encoding (UTF-8 is chosen, but it should be used
- * when it doesn't matter).
- */
-enum entity_charset { cs_terminator, cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15,
- cs_cp1251, cs_8859_5, cs_cp866, cs_macroman, cs_koi8r,
- cs_big5, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
+enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
+ cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5,
+ cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
cs_numelems /* used to count the number of charsets */
};
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
@@ -49,36 +41,36 @@ static const struct {
const char *codeset;
enum entity_charset charset;
} charset_map[] = {
- { "ISO-8859-1", cs_8859_1 },
- { "ISO8859-1", cs_8859_1 },
- { "ISO-8859-15", cs_8859_15 },
- { "ISO8859-15", cs_8859_15 },
- { "utf-8", cs_utf_8 },
+ { "ISO-8859-1", cs_8859_1 },
+ { "ISO8859-1", cs_8859_1 },
+ { "ISO-8859-15", cs_8859_15 },
+ { "ISO8859-15", cs_8859_15 },
+ { "utf-8", cs_utf_8 },
{ "cp1252", cs_cp1252 },
- { "Windows-1252", cs_cp1252 },
- { "1252", cs_cp1252 },
+ { "Windows-1252", cs_cp1252 },
+ { "1252", cs_cp1252 },
{ "BIG5", cs_big5 },
- { "950", cs_big5 },
+ { "950", cs_big5 },
{ "GB2312", cs_gb2312 },
- { "936", cs_gb2312 },
+ { "936", cs_gb2312 },
{ "BIG5-HKSCS", cs_big5hkscs },
{ "Shift_JIS", cs_sjis },
- { "SJIS", cs_sjis },
- { "932", cs_sjis },
- { "EUCJP", cs_eucjp },
- { "EUC-JP", cs_eucjp },
- { "KOI8-R", cs_koi8r },
- { "koi8-ru", cs_koi8r },
- { "koi8r", cs_koi8r },
- { "cp1251", cs_cp1251 },
- { "Windows-1251", cs_cp1251 },
- { "win-1251", cs_cp1251 },
- { "iso8859-5", cs_8859_5 },
- { "iso-8859-5", cs_8859_5 },
- { "cp866", cs_cp866 },
- { "866", cs_cp866 },
- { "ibm866", cs_cp866 },
- { "MacRoman", cs_macroman },
+ { "SJIS", cs_sjis },
+ { "932", cs_sjis },
+ { "EUCJP", cs_eucjp },
+ { "EUC-JP", cs_eucjp },
+ { "KOI8-R", cs_koi8r },
+ { "koi8-ru", cs_koi8r },
+ { "koi8r", cs_koi8r },
+ { "cp1251", cs_cp1251 },
+ { "Windows-1251", cs_cp1251 },
+ { "win-1251", cs_cp1251 },
+ { "iso8859-5", cs_8859_5 },
+ { "iso-8859-5", cs_8859_5 },
+ { "cp866", cs_cp866 },
+ { "866", cs_cp866 },
+ { "ibm866", cs_cp866 },
+ { "MacRoman", cs_macroman },
{ NULL }
};
@@ -476,7 +468,6 @@ static const enc_to_uni enc_to_uni_macroman = {
/* {{{ Index of tables for encoding conversion */
static const enc_to_uni *const enc_to_uni_index[cs_numelems] = {
NULL,
- NULL,
&enc_to_uni_iso88591,
&enc_to_uni_win1252,
&enc_to_uni_iso885915,
@@ -1144,7 +1135,7 @@ typedef struct {
const entity_stage3_row *table;
} entity_table_opt;
-/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistentcy's sake. */
+/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */
/* {{{ Start of HTML5 multi-stage table for codepoint -> entity */
diff --git a/ext/standard/html_tables/html_table_gen.php b/ext/standard/html_tables/html_table_gen.php
index 35be2d9afc..f095202bc6 100644
--- a/ext/standard/html_tables/html_table_gen.php
+++ b/ext/standard/html_tables/html_table_gen.php
@@ -51,17 +51,9 @@ $t = <<<CODE
***************************************************************************
**************************************************************************/
-/* cs_terminator is overloaded in the following fashion:
- * - It terminates the list entity maps.
- * - In BG(inverse_ent_maps), it's the key of the inverse map that stores
- * only the basic entities.
- * - When passed to traverse_for_entities (or via php_unescape_entities with !all),
- * we don't care about the encoding (UTF-8 is chosen, but it should be used
- * when it doesn't matter).
- */
-enum entity_charset { cs_terminator, cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15,
- cs_cp1251, cs_8859_5, cs_cp866, cs_macroman, cs_koi8r,
- cs_big5, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
+enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
+ cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5,
+ cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
cs_numelems /* used to count the number of charsets */
};
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
@@ -72,36 +64,36 @@ static const struct {
const char *codeset;
enum entity_charset charset;
} charset_map[] = {
- { "ISO-8859-1", cs_8859_1 },
- { "ISO8859-1", cs_8859_1 },
- { "ISO-8859-15", cs_8859_15 },
- { "ISO8859-15", cs_8859_15 },
- { "utf-8", cs_utf_8 },
+ { "ISO-8859-1", cs_8859_1 },
+ { "ISO8859-1", cs_8859_1 },
+ { "ISO-8859-15", cs_8859_15 },
+ { "ISO8859-15", cs_8859_15 },
+ { "utf-8", cs_utf_8 },
{ "cp1252", cs_cp1252 },
- { "Windows-1252", cs_cp1252 },
- { "1252", cs_cp1252 },
+ { "Windows-1252", cs_cp1252 },
+ { "1252", cs_cp1252 },
{ "BIG5", cs_big5 },
- { "950", cs_big5 },
+ { "950", cs_big5 },
{ "GB2312", cs_gb2312 },
- { "936", cs_gb2312 },
+ { "936", cs_gb2312 },
{ "BIG5-HKSCS", cs_big5hkscs },
{ "Shift_JIS", cs_sjis },
- { "SJIS", cs_sjis },
- { "932", cs_sjis },
- { "EUCJP", cs_eucjp },
- { "EUC-JP", cs_eucjp },
- { "KOI8-R", cs_koi8r },
- { "koi8-ru", cs_koi8r },
- { "koi8r", cs_koi8r },
- { "cp1251", cs_cp1251 },
- { "Windows-1251", cs_cp1251 },
- { "win-1251", cs_cp1251 },
- { "iso8859-5", cs_8859_5 },
- { "iso-8859-5", cs_8859_5 },
- { "cp866", cs_cp866 },
- { "866", cs_cp866 },
- { "ibm866", cs_cp866 },
- { "MacRoman", cs_macroman },
+ { "SJIS", cs_sjis },
+ { "932", cs_sjis },
+ { "EUCJP", cs_eucjp },
+ { "EUC-JP", cs_eucjp },
+ { "KOI8-R", cs_koi8r },
+ { "koi8-ru", cs_koi8r },
+ { "koi8r", cs_koi8r },
+ { "cp1251", cs_cp1251 },
+ { "Windows-1251", cs_cp1251 },
+ { "win-1251", cs_cp1251 },
+ { "iso8859-5", cs_8859_5 },
+ { "iso-8859-5", cs_8859_5 },
+ { "cp866", cs_cp866 },
+ { "866", cs_cp866 },
+ { "ibm866", cs_cp866 },
+ { "MacRoman", cs_macroman },
{ NULL }
};
@@ -132,51 +124,51 @@ echo $t;
$encodings = array(
array(
"ident" => "iso88591",
- "enumid" => 2,
+ "enumid" => 1,
"name" => "ISO-8859-1",
"file" => "mappings/8859-1.TXT",
),
array(
"ident" => "iso88595",
- "enumid" => 6,
+ "enumid" => 5,
"name" => "ISO-8859-5",
"file" => "mappings/8859-5.TXT",
),
array(
"ident" => "iso885915",
- "enumid" => 4,
+ "enumid" => 3,
"name" => "ISO-8859-15",
"file" => "mappings/8859-15.TXT",
),
array(
"ident" => "win1252",
- "enumid" => 3,
+ "enumid" => 2,
"enumident" => "cp1252",
"name" => "Windows-1252",
"file" => "mappings/CP1252.TXT",
),
array(
"ident" => "win1251",
- "enumid" => 5,
+ "enumid" => 4,
"enumident" => "cp1252",
"name" => "Windows-1251",
"file" => "mappings/CP1251.TXT",
),
array(
"ident" => "koi8r",
- "enumid" => 9,
+ "enumid" => 8,
"name" => "KOI8-R",
"file" => "mappings/KOI8-R.TXT",
),
array(
"ident" => "cp866",
- "enumid" => 7,
+ "enumid" => 6,
"name" => "CP-866",
"file" => "mappings/CP866.TXT",
),
array(
"ident" => "macroman",
- "enumid" => 8,
+ "enumid" => 7,
"name" => "MacRoman",
"file" => "mappings/ROMAN.TXT",
),
@@ -336,7 +328,7 @@ foreach ($encodings as $e) {
$lines = explode("\n", file_get_contents($e{'file'}));
foreach ($lines as $l) {
if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})\s+#\s*(.*)$/i", $l, $matches))
- $map[] = array($matches[1], $matches[2], $matches[3]);
+ $map[] = array($matches[1], $matches[2], rtrim($matches[3]));
}
$mappy = array();
@@ -420,7 +412,7 @@ typedef struct {
const entity_stage3_row *table;
} entity_table_opt;
-/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistentcy's sake. */
+/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */
CODE;