diff options
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | enc/trans/iso2022.trans | 2 | ||||
-rw-r--r-- | enc/trans/newline.trans | 3 | ||||
-rw-r--r-- | enc/trans/utf_16_32.trans | 8 | ||||
-rw-r--r-- | tool/transcode-tblgen.rb | 3 | ||||
-rw-r--r-- | transcode.c | 5 | ||||
-rw-r--r-- | transcode_data.h | 5 |
7 files changed, 40 insertions, 4 deletions
@@ -1,3 +1,21 @@ +Tue Sep 2 02:36:20 2008 Tanaka Akira <akr@fsij.org> + + * tool/transcode-tblgen.rb: record offsets array as index of + byte_array to avoid relocation. + + * transcode.c (transcode_restartable0): add byte_array to get offsets + array. + + * transcode_data.h (BYTE_LOOKUP_BASE): change return type to + uintptr_t. + (rb_transcoder): add fields: byte_array, word_array and word_size. + + * enc/trans/newline.trans: follow rb_transcoder change. + + * enc/trans/iso2022.trans: ditto. + + * enc/trans/utf_16_32.trans: ditto. + Tue Sep 2 02:05:14 2008 Tanaka Akira <akr@fsij.org> * tool/transcode-tblgen.rb: make infos arrays and BYTE_LOOKUPs into diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans index 8a94d70040..a11424c2c5 100644 --- a/enc/trans/iso2022.trans +++ b/enc/trans/iso2022.trans @@ -70,6 +70,7 @@ fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, u static const rb_transcoder rb_ISO_2022_JP_to_EUC_JP = { "ISO-2022-JP", "EUC-JP", iso2022jp_to_eucjp, + byte_array, word_array, sizeof(uintptr_t), 1, /* input_unit_length */ 3, /* max_input */ 3, /* max_output */ @@ -142,6 +143,7 @@ finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o) static const rb_transcoder rb_EUC_JP_to_ISO_2022_JP = { "EUC-JP", "ISO-2022-JP", eucjp_to_iso2022jp, + byte_array, word_array, sizeof(uintptr_t), 1, /* input_unit_length */ 3, /* max_input */ 5, /* max_output */ diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index 536cba108c..6431a7d951 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -55,6 +55,7 @@ fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, un static const rb_transcoder rb_universal_newline = { "universal_newline", "", universal_newline, + byte_array, word_array, sizeof(uintptr_t), 1, /* input_unit_length */ 1, /* max_input */ 1, /* max_output */ @@ -65,6 +66,7 @@ rb_universal_newline = { static const rb_transcoder rb_crlf_newline = { "", "crlf_newline", crlf_newline, + byte_array, word_array, sizeof(uintptr_t), 1, /* input_unit_length */ 1, /* max_input */ 2, /* max_output */ @@ -75,6 +77,7 @@ rb_crlf_newline = { static const rb_transcoder rb_cr_newline = { "", "cr_newline", cr_newline, + byte_array, word_array, sizeof(uintptr_t), 1, /* input_unit_length */ 1, /* max_input */ 1, /* max_output */ diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans index 2bbee65b7f..bb6db9d5e4 100644 --- a/enc/trans/utf_16_32.trans +++ b/enc/trans/utf_16_32.trans @@ -262,6 +262,7 @@ fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned static const rb_transcoder rb_from_UTF_16BE = { "UTF-16BE", "UTF-8", from_UTF_16BE, + byte_array, word_array, sizeof(uintptr_t), 2, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ @@ -272,6 +273,7 @@ rb_from_UTF_16BE = { static const rb_transcoder rb_to_UTF_16BE = { "UTF-8", "UTF-16BE", to_UTF_16BE, + byte_array, word_array, sizeof(uintptr_t), 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ @@ -282,6 +284,7 @@ rb_to_UTF_16BE = { static const rb_transcoder rb_from_UTF_16LE = { "UTF-16LE", "UTF-8", from_UTF_16LE, + byte_array, word_array, sizeof(uintptr_t), 2, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ @@ -292,6 +295,7 @@ rb_from_UTF_16LE = { static const rb_transcoder rb_to_UTF_16LE = { "UTF-8", "UTF-16LE", to_UTF_16BE, + byte_array, word_array, sizeof(uintptr_t), 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ @@ -302,6 +306,7 @@ rb_to_UTF_16LE = { static const rb_transcoder rb_from_UTF_32BE = { "UTF-32BE", "UTF-8", from_UTF_32BE, + byte_array, word_array, sizeof(uintptr_t), 4, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ @@ -312,6 +317,7 @@ rb_from_UTF_32BE = { static const rb_transcoder rb_to_UTF_32BE = { "UTF-8", "UTF-32BE", to_UTF_16BE, + byte_array, word_array, sizeof(uintptr_t), 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ @@ -322,6 +328,7 @@ rb_to_UTF_32BE = { static const rb_transcoder rb_from_UTF_32LE = { "UTF-32LE", "UTF-8", from_UTF_32LE, + byte_array, word_array, sizeof(uintptr_t), 4, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ @@ -332,6 +339,7 @@ rb_from_UTF_32LE = { static const rb_transcoder rb_to_UTF_32LE = { "UTF-8", "UTF-32LE", to_UTF_16BE, + byte_array, word_array, sizeof(uintptr_t), 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb index 4702600895..f6c9b0118a 100644 --- a/tool/transcode-tblgen.rb +++ b/tool/transcode-tblgen.rb @@ -334,7 +334,7 @@ End end size = bytes_code[/\[\d+\]/][1...-1].to_i bytes_code.sub!(/^(\};\n\z)/) { - "\#define #{offsets_name} (byte_array+#{size})\n" + + "\#define #{offsets_name} #{size}\n" + format_offsets(min,max,offsets) + "\n" + $1 } @@ -587,6 +587,7 @@ def transcode_tblgen(from, to, map) static const rb_transcoder #{transcoder_name} = { #{c_esc from}, #{c_esc to}, #{real_tree_name}, + byte_array, word_array, sizeof(uintptr_t), #{input_unit_length}, /* input_unit_length */ #{max_input}, /* max_input */ #{max_output}, /* max_output */ diff --git a/transcode.c b/transcode.c index f4409a39ab..bc1597e5b0 100644 --- a/transcode.c +++ b/transcode.c @@ -493,10 +493,11 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos, next_byte = (unsigned char)*in_p++; follow_byte: - if (next_byte < BYTE_LOOKUP_BASE(next_table)[0] || BYTE_LOOKUP_BASE(next_table)[1] < next_byte) +#define BL_BASE(next_table) (tr->byte_array + BYTE_LOOKUP_BASE(next_table)) + if (next_byte < BL_BASE(next_table)[0] || BL_BASE(next_table)[1] < next_byte) next_info = INVALID; else { - unsigned int next_offset = BYTE_LOOKUP_BASE(next_table)[2+next_byte-BYTE_LOOKUP_BASE(next_table)[0]]; + unsigned int next_offset = BL_BASE(next_table)[2+next_byte-BL_BASE(next_table)[0]]; next_info = (VALUE)BYTE_LOOKUP_INFO(next_table)[next_offset]; } follow_info: diff --git a/transcode_data.h b/transcode_data.h index 8287cf66e0..a40391614c 100644 --- a/transcode_data.h +++ b/transcode_data.h @@ -18,7 +18,7 @@ typedef unsigned char base_element; typedef uintptr_t BYTE_LOOKUP[2]; -#define BYTE_LOOKUP_BASE(bl) ((const base_element *)(((uintptr_t *)(bl))[0])) +#define BYTE_LOOKUP_BASE(bl) (((uintptr_t *)(bl))[0]) #define BYTE_LOOKUP_INFO(bl) ((const struct byte_lookup *const *)(((uintptr_t *)(bl))[1])) #ifndef PType @@ -107,6 +107,9 @@ struct rb_transcoder { const char *from_encoding; const char *to_encoding; uintptr_t conv_tree_start; + const unsigned char *byte_array; + const uintptr_t *word_array; + int word_size; int input_unit_length; int max_input; int max_output; |