summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa <handa@m17n.org>2003-09-08 12:53:41 +0000
committerKenichi Handa <handa@m17n.org>2003-09-08 12:53:41 +0000
commit8f924df7df019cce90537647de2627581043b5c4 (patch)
tree6c40bd05679425e710d6b2e5649eae3da5e40a52 /src
parent463f5630a5e7cbe7f042bc1175d1fa1c4e98860f (diff)
parent9d4807432a01f9b3cc519fcfa3ea92a70ffa7f43 (diff)
downloademacs-8f924df7df019cce90537647de2627581043b5c4.tar.gz
*** empty log message ***
Diffstat (limited to 'src')
-rw-r--r--src/.gdbinit31
-rw-r--r--src/ChangeLog.222061
-rw-r--r--src/Makefile.in153
-rw-r--r--src/abbrev.c14
-rw-r--r--src/alloc.c81
-rw-r--r--src/buffer.c52
-rw-r--r--src/buffer.h23
-rw-r--r--src/bytecode.c17
-rw-r--r--src/callproc.c180
-rw-r--r--src/casefiddle.c201
-rw-r--r--src/casetab.c87
-rw-r--r--src/category.c222
-rw-r--r--src/category.h19
-rw-r--r--src/ccl.c517
-rw-r--r--src/ccl.h35
-rw-r--r--src/character.c975
-rw-r--r--src/character.h652
-rw-r--r--src/charset.c3203
-rw-r--r--src/charset.h1297
-rw-r--r--src/chartab.c965
-rw-r--r--src/cmds.c21
-rw-r--r--src/coding.c11740
-rw-r--r--src/coding.h889
-rw-r--r--src/composite.c207
-rw-r--r--src/composite.h24
-rw-r--r--src/data.c119
-rw-r--r--src/dired.c1
-rw-r--r--src/dispextern.h41
-rw-r--r--src/dispnew.c2
-rw-r--r--src/disptab.h10
-rw-r--r--src/doc.c2
-rw-r--r--src/doprnt.c2
-rw-r--r--src/dosfns.c2
-rw-r--r--src/editfns.c50
-rw-r--r--src/emacs.c19
-rw-r--r--src/fileio.c533
-rw-r--r--src/filelock.c2
-rw-r--r--src/fns.c713
-rw-r--r--src/fontset.c1745
-rw-r--r--src/fontset.h58
-rw-r--r--src/frame.c12
-rw-r--r--src/frame.h2
-rw-r--r--src/indent.c4
-rw-r--r--src/insdel.c56
-rw-r--r--src/keyboard.c20
-rw-r--r--src/keymap.c363
-rw-r--r--src/lisp.h235
-rw-r--r--src/lread.c710
-rw-r--r--src/marker.c2
-rw-r--r--src/minibuf.c29
-rw-r--r--src/msdos.c10
-rw-r--r--src/print.c51
-rw-r--r--src/process.c130
-rw-r--r--src/puresize.h2
-rw-r--r--src/regex.c525
-rw-r--r--src/regex.h10
-rw-r--r--src/search.c129
-rw-r--r--src/syntax.c903
-rw-r--r--src/syntax.h43
-rw-r--r--src/term.c28
-rw-r--r--src/w16select.c2
-rw-r--r--src/w32bdf.c2
-rw-r--r--src/w32console.c2
-rw-r--r--src/w32term.c20
-rw-r--r--src/xdisp.c250
-rw-r--r--src/xfaces.c707
-rw-r--r--src/xfns.c106
-rw-r--r--src/xterm.c200
-rw-r--r--src/xterm.h3
69 files changed, 18549 insertions, 12972 deletions
diff --git a/src/.gdbinit b/src/.gdbinit
index ca6f5f255a2..ca6648e162f 100644
--- a/src/.gdbinit
+++ b/src/.gdbinit
@@ -1,4 +1,4 @@
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2003
# Free Software Foundation, Inc.
#
# This file is part of GNU Emacs.
@@ -237,10 +237,10 @@ end
define xchartable
print (struct Lisp_Char_Table *) (($ & $valmask) | gdb_data_seg_bits)
-printf "Purpose: "
-output (char*)&((struct Lisp_Symbol *) ((((int) $->purpose) & $valmask) | gdb_data_seg_bits))->name->data
-printf " %d extra slots", ($->size & 0x1ff) - 388
+printf " %d extra slots", ($->size & 0x1ff) - 68
echo \n
+printf "Purpose: "
+xprintsym $->purpose
end
document xchartable
Print the address of the char-table $, and its purpose.
@@ -332,6 +332,29 @@ document xprintsym
Print argument as a symbol.
end
+define xcoding
+ set $tmp = (struct Lisp_Hash_Table *) ((Vcoding_system_hash_table & $valmask) | gdb_data_seg_bits)
+ set $tmp = (struct Lisp_Vector *) (($tmp->key_and_value & $valmask) | gdb_data_seg_bits)
+ set $name = $tmp->contents[$arg0 * 2]
+ print $name
+ pr
+ print $tmp->contents[$arg0 * 2 + 1]
+ pr
+end
+document xcoding
+ Print the name and attributes of coding system that has ID (argument).
+end
+
+define xcharset
+ set $tmp = (struct Lisp_Hash_Table *) ((Vcharset_hash_table & $valmask) | gdb_data_seg_bits)
+ set $tmp = (struct Lisp_Vector *) (($tmp->key_and_value & $valmask) | gdb_data_seg_bits)
+ p $tmp->contents[$arg0->hash_index * 2]
+ pr
+end
+document xcharset
+ Print the name of charset that has ID (argument).
+end
+
define xbacktrace
set $bt = backtrace_list
while $bt
diff --git a/src/ChangeLog.22 b/src/ChangeLog.22
new file mode 100644
index 00000000000..355ffa20515
--- /dev/null
+++ b/src/ChangeLog.22
@@ -0,0 +1,2061 @@
+2003-07-09 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding_sjis): Check bytes more rigidly.
+
+2003-06-26 Kenichi Handa <handa@m17n.org>
+
+ * fileio.c (choose_write_coding_system): Return a decided coding
+ system.
+ (Fwrite_region): Set Vlast_coding_system_used to the return value
+ of choose_write_coding_system.
+
+2003-06-06 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fset_charset_priority): Pay attention to duplicated
+ arguments.
+
+ * coding.c (QCcategory): New variable.
+ (syms_of_coding): Defsym it. Set all elements of
+ Vcoding_category_table and their symbol values.
+ (Fset_coding_system_priority): Doc fix. Update symbol qvalues of
+ coding-category-XXX, and coding-category-list.
+ (Fdefine_coding_system_internal): Add category in the plist.
+
+2003-06-05 Kenichi Handa <handa@m17n.org>
+
+ * callproc.c (Fcall_process): Handle carryover correctly.
+
+ * coding.c (decode_coding_iso_2022): Fix handling of invalid
+ bytes.
+ (raw_text_coding_system): Check NILP (coding_system).
+ (coding_inherit_eol_type): Check NILP (coding_system) and
+ NILP (parent).
+ (consume_chars): Fix for the case of raw-text.
+
+ * process.c (read_process_output): Handle carryover correctly.
+
+2003-06-02 Dave Love <fx@gnu.org>
+
+ * regex.c (re_search_2): Fix last change.
+
+2003-05-30 Kenichi Handa <handa@m17n.org>
+
+ * regex.c (GET_CHAR_BEFORE_2): Check multibyte, not
+ target_multibyte. Even in a unibyte case, return a converted
+ multibyte char.
+ (GET_CHAR_AFTER): New macro.
+ (PATFETCH): Translate via multibyte char.
+ (HANDLE_UNIBYTE_RANGE): Delete this macro.
+ (SETUP_MULTIBYTE_RANGE): New macro.
+ (regex_compile): Setup compiled code so that its multibyteness
+ matches that of a target. Fix the handling of "[X-YZ]" using
+ SETUP_MULTIBYTE_RANGE.
+ (analyse_first) <charset>: For filling fastmap for all multibyte
+ characters, don't check by BASE_LEADING_CODE_P.
+ (re_search_2): Don't check RE_TARGET_MULTIBYTE_P (bufp). It is
+ the same as RE_MULTIBYTE_P (bufp) now.
+ (mutually_exclusive_p): Check by (! multibyte ||
+ IS_REAL_ASCII (c)).
+ (TARGET_CHAR_AND_LENGTH): Delete this macro.
+ (TRANSLATE_VIA_MULTIBYTE): New macro.
+ (re_match_2_internal): Don't check RE_TARGET_MULTIBYTE_P (bufp).
+ It is the same as RE_MULTIBYTE_P (bufp) now.
+ <exactn>: Translate via multibyte.
+ <anychar>: Fetch a character by RE_STRING_CHAR_AND_LENGTH. Don't
+ translate it.
+ <charset, charset_not>: Fetch a character by
+ RE_STRING_CHAR_AND_LENGTH. Translate via multibyte.
+ <duplicate>: Call bcmp_translate with the last arg `multibyte'.
+ <wordbound, notwordbound, wordbeg, wordend, syntaxspec,
+ notsyntaxspec, categoryspec, notcategoryspec> Fetch a character
+ by GET_CHAR_AFTER.
+ (bcmp_translate): Likewise.
+
+ * search.c (compile_pattern): Check the member target_multibyte,
+ not the member multibyte of buf.
+
+ * lread.c (read1): While reading a string, set force_singlebyte
+ and force_multibyte correctly.
+
+ * charset.c (Fset_unibyte_charset): Fix setting up of
+ unibyte_to_multibyte_table.
+ (init_charset_once): Likewise.
+
+2003-05-29 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (setup_coding_system): If coding has
+ post-read-conversion or pre-write-conversion, set
+ CODING_REQUIRE_DECODING_MASK and CODING_REQUIRE_ENCODING_MASK
+ respectively.
+ (decode_coding_gap): Run post-read-conversion if any.
+
+ * fileio.c (Finsert_file_contents): Even if we read into a
+ unibyte buffer, check if we must decode the result or not.
+
+2003-05-29 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (make_conversion_work_buffer): Change the work buffer
+ name to the same one as that of Emacs 21.
+
+2003-05-28 Kenichi Handa <handa@m17n.org>
+
+ * coding.h (make_conversion_work_buffer): Prototype adjusted.
+ (code_conversion_restore): Don't extern it.
+
+ * coding.c (detected_mask): Delete unused variable.
+ (decode_coding_iso_2022): Pay attention to the byte sequence of
+ CTEXT extended segment, and retain those bytes as is.
+ (decode_coding_ccl): Delete unused variable `valids'.
+ (setup_coding_system): Delete unused variable `category'.
+ (consume_chars): Delete unused variable `category'. Make it work
+ for non-multibyte case.
+ (make_conversion_work_buffer): Argument changed.
+ (saved_coding): Delete unused variable.
+ (code_conversion_restore): Don't check saved_coding->destination.
+ (code_conversion_save): New function.
+ (decode_coding_gap, encode_coding_gap): Call code_conversion_save
+ instead of record_unwind_protect.
+ (decode_coding_object, encode_coding_object): Likewise. Recover
+ PT.
+ (detect_coding_system): Delete unused variable `mask'.
+ (Fdefine_coding_system_internal): Delete unsed vaiable id;
+
+ * fileio.c (kill_workbuf_unwind): New function.
+ (Finsert_file_contents): On replacing, call
+ make_conversion_work_buffer with correct args, and call
+ record_unwind_protect with the first arg kill_workbuf_unwind.
+
+ * lisp.h (Fgenerate_new_buffer_name): EXFUN it.
+
+2003-05-20 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (BASE_FONTSET_P): Check FONTSET_BASE, not
+ FONTSET_NAME.
+ (fontset_add): Fix for the case that TO is less than TO1.
+ (Ffontset_info): Don't use fallback fontset on checking the
+ default fontset.
+ (dump_fontset): New function for debugging.
+
+ * coding.c (Fdefine_coding_system_internal): Fix for the case that
+ coding_type is Qcharset.
+
+2003-05-07 Kenichi Handa <handa@m17n.org>
+
+ * chartab.c (map_sub_char_table): New argument DEFAULT_VAL.
+ (map_char_table): Don't inherit the value from the parent on
+ initializing VAL. Adjusted for the above change.
+
+2003-05-06 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (Qsignature, Qendian): Delete these variables.
+ (syms_of_coding): Don't initialize them.
+ (CATEGORY_MASK_UTF_16_AUTO): New macro.
+ (detect_coding_utf_16): Add CATEGORY_MASK_UTF_16_AUTO in
+ detect_info->found.
+ (decode_coding_utf_16): Don't detect BOM here.
+ (encode_coding_utf_16): Produce BOM if CODING_UTF_16_BOM (coding)
+ is NOT utf_16_without_bom.
+ (setup_coding_system): For a coding system of type utf-16, check
+ if the attribute :endian is Qbig or not (not nil or not), and set
+ CODING_REQUIRE_DETECTION_MASK if BOM detection is required.
+ (detect_coding): If coding type is utf-16 and BOM detection is
+ required, detect it.
+ (Fdefine_coding_system_internal): For a coding system of type
+ utf-16, check if the attribute :endian is Qbig or not (not nil or
+ not).
+
+2003-05-06 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (coding_set_source): Fix for the case that the current
+ buffer is different from coding->src_object.
+ (decode_coding_object): Don't use the conversion work buffer if
+ DST_OBJECT is a buffer.
+
+2003-05-04 Dave Love <fx@gnu.org>
+
+ * lread.c (read_emacs_mule_char) [len==2]: Index
+ emacs_mule_charset correctly.
+
+2003-02-16 Dave Love <fx@gnu.org>
+
+ * coding.c (Qbig5, Vbig5_coding_system, CATEGORY_MASK_BIG5)
+ (detect_coding_big5, decode_coding_big5, encode_coding_big5)
+ (Fdecode_big5_char, Fencode_big5_char): Deleted. (Big5 no longer
+ treated specially.)
+ (setup_coding_system, coding_category, CATEGORY_MASK_ANY)
+ (detected_mask): Remove Big5 bits.
+
+2003-04-09 Kenichi Handa <handa@m17n.org>
+
+ The following changes are to make the font rescaling facility
+ compatible with Emacs 21.
+
+ * xfaces.c (Vface_font_rescale_alist): Renamed from
+ Vface_resizing_fonts.
+ (struct font_name): Rename member resizing_ratio to rescale_ratio.
+ (font_rescale_ratio): Renamed from font_resizing_ratio.
+ (split_font_name): Set font->rescale_ratio.
+ (better_font_p): Pay attention to font->rescale_ratio.
+ (build_scalable_font_name): Likewise. Change RESX, and RESY
+ fields.
+ (syms_of_xfaces): Declare Vface_font_rescale_alist as a Lisp
+ variable.
+
+2003-03-28 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig)
+ (Qutf_16_le): Remove these variables.
+ (syms_of_coding): Don't DEFSYM them.
+ (decode_coding_utf_16): Fix handling of BOM.
+ (encode_coding_utf_16): Fix handling of BOM.
+
+2003-03-14 Kenichi Handa <handa@m17n.org>
+
+ * fileio.c (Finsert_file_contents): On replacing, before decoding
+ the file into the work buffer, set point of the work buffer to the
+ end.
+
+2003-02-13 Dave Love <fx@gnu.org>
+
+ * coding.c (Fcheck_coding_systems_region): Fix type errors.
+
+2003-02-04 Dave Love <fx@gnu.org>
+
+ * xterm.c (XTread_socket): Check Lisp types for Vx_keysym_table
+ and fix C types.
+
+2003-01-31 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (SKIP_GLYPHS): New macro.
+ (set_cursor_from_row): Pay attention to string display properties.
+
+ * category.c (copy_category_entry): Fix for the case that RANGE
+ is an integer.
+
+ * xterm.c (x_encode_char): Call ccl_driver with the last arg Qnil.
+
+ * w32term.c (w32_encode_char): Call ccl_driver with the last arg
+ Qnil.
+
+2003-01-30 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fcharset_id_internal): New function.
+ (syms_of_charset): Defsubr it.
+
+ * coding.c (decode_coding_ccl, encode_coding_ccl): Call ccl_driver
+ with the last arg charset_list acquired from coding.
+ (Fdefine_coding_system_internal): For ccl-based coding system, fix
+ the attribute coding_attr_ccl_valids.
+
+ * coding.h (enum define_coding_ccl_arg_index): Set the first
+ member coding_arg_ccl_decoder to coding_arg_max.
+
+ * ccl.h (ccl_driver): Prototype adjusted.
+
+ * ccl.c (CCL_DECODE_CHAR, CCL_ENCODE_CHAR): New macros.
+ (ccl_driver): New arg CHARSET_LIST. Use the above macros instead
+ of DECODE_CAHR, ENCODE_CHAR, CHAR_CHARSET.
+ (Fccl_execute): Call ccl_driver with the last arg Qnil.
+ (Fccl_execute_on_string): Likewise.
+
+2003-01-11 Kenichi Handa <handa@m17n.org>
+
+ * charset.h (ENCODE_CHAR): If the method is SUBSET or SUPERSET,
+ call encode_char.
+
+ * charset.c (encode_char): Fix handling of methods SUBSET and
+ SUPERSET.
+
+ * xterm.c (x_new_fontset): Fix previous change.
+
+2003-01-10 Dave Love <fx@gnu.org>
+
+ * composite.c (syms_of_composite): Make composition_hash_table
+ weak.
+
+2003-01-10 Kenichi Handa <handa@m17n.org>
+
+ * dispextern.h (check_face_attributes, generate_ascii_font_name)
+ (font_name_registry): Don't extern them.
+ (split_font_name_into_vector, build_font_name_from_vector): Extern
+ them.
+
+ * fontset.h (Qfontset): Don't extern it.
+ (new_fontset_from_font_name): Extern it.
+
+ * fontset.c: Give 8 extra slots to fontset objects.
+ (Qfontset_info): New variable.
+ (syms_of_fontset): Defsym it.
+ (FONTSET_FALLBACK): New macro.
+ (fontset_face): Try also the default fontset.
+ (make_fontset): Realize a fallback fontset from the default
+ fontset.
+ (generate_ascii_font_name): Moved from xfaces.c. Rewritten by
+ using split_font_name_into_vector and build_font_name_from_vector.
+ (Fset_fontset_font): Access the elements of font_spec by enum
+ FONT_SPEC_INDEX. If font_spec is a string, extract the registry
+ name by using split_font_name_into_vector.
+ (Fnew_fontset): If no ASCII font is specified in FONTLIST,
+ generate a proper font name from the fontset name. Update
+ Vfontset_alias_alist.
+ (n_auto_fontsets): New variable.
+ (new_fontset_from_font_name): New function.
+ (Ffont_info): Store the information about fonts generated from the
+ default fontset in the first extra slot of the returned
+ char-table.
+
+ * xfaces.c (generate_ascii_font_name): Moved to fontset.c.
+ (font_name_registry): Function deleted.
+ (split_font_name_into_vector): New function.
+ (build_font_name_from_vector): New function.
+ (font_list): The argument REGISTRY is now a list of registry
+ names.
+ (choose_face_font): If we are choosing an ASCII font, and ATTRS
+ specifies an explicit font name, return the name as is. Make a
+ list of registy names.
+
+ * xfns.c (x_set_font, x_create_tip_frame): Adjusted to the change
+ of x_new_fontset.
+ (Fx_create_frame): Don't call x_new_fontset here. Just use
+ x_list_fonts to check the existence of fonts.
+
+ * xterm.h (x_new_fontset): Prototype adjusted.
+
+ * xterm.c (x_new_fontset): Change the arg FONTSETNAME to Lisp
+ string. Use new_fontset_from_font_name to create a fontset from a
+ font name.
+
+2003-01-07 Dave Love <fx@gnu.org>
+
+ * Makefile.in: Fix some dependencies.
+
+ * keymap.c (Fapropos_internal): Don't gcpro apropos_predicate but
+ set it to nil before returning.
+
+ * composite.c (update_compositions): Fix type error.
+
+ * syntax.c (skip_chars, skip_syntaxes): Fix type errors.
+
+2003-01-07 Kenichi Handa <handa@etl.go.jp>
+
+ * xterm.c (x_new_font): Optimize for the case that the font is
+ already set for the frame.
+
+2003-01-06 Kenichi Handa <handa@m17n.org>
+
+ * chartab.c (char_table_ascii): Check if the char table contents
+ is sub-char-table or not.
+ (char_table_set): Fix argument to char_table_ascii.
+ (char_table_set_range): Likewise.
+
+ * coding.c (CATEGORY_MASK_RAW_TEXT): New macro.
+ (detect_coding_utf_8, detect_coding_utf_16)
+ (detect_coding_emacs_mule, detect_coding_iso_2022)
+ (detect_coding_sjis, detect_coding_big5)
+ (detect_coding_ccl, detect_coding_charset): Change argument MASK
+ to DETECT_INFO. Update DETECT_INFO and return 1 if the byte
+ sequence is valid in this coding system. Callers changed.
+ (MAX_ANNOTATION_LENGTH): New macro.
+ (ADD_ANNOTATION_DATA): New macro.
+ (ADD_COMPOSITION_DATA): Argument changed. Callers changed. Call
+ ADD_ANNOTATION_DATA. The format of annotation data changed.
+ (ADD_CHARSET_DATA): New macro.
+ (emacs_mule_char): New argument ID. Callers changed.
+ (decode_coding_emacs_mule, decode_coding_iso_2022)
+ (decode_coding_sjis, decode_coding_big5, decode_coding_charset):
+ Produce charset annotation data in coding->charbuf.
+ (encode_coding_emacs_mule, encode_coding_iso_2022): Pay attention
+ to charset annotation data in coding->charbuf.
+ (setup_coding_system): Add CODING_ANNOTATE_CHARSET_MASK
+ coding->common_flags if the coding system is iso-2022 based and
+ uses designation.
+ (produce_composition): Adjusted for the new annotation data
+ format.
+ (produce_charset): New function.
+ (produce_annotation): Handle charset annotation.
+ (handle_composition_annotation, handle_charset_annotation): New
+ functions.
+ (consume_chars): Handle charset annotation. Utilize the above two
+ functions.
+ (encode_coding_object): If SRC_OBJECT and DST_OBJECT are the same
+ buffer, get the deleted text as a string and set
+ coding->src_object to that string.
+ (detect_coding, detect_coding_system): Use the new struct
+ coding_detection_info.
+
+ * coding.h (struct coding_detection_info): New structure.
+ (struct coding_system): Prototype of the member `detector'
+ adjusted.
+ (CODING_ANNOTATE_CHARSET_MASK): New macro.
+
+2003-01-06 Kenichi Handa <handa@etl.go.jp>
+
+ * insdel.c (insert_from_gap): Fix argument to offset_intervals.
+
+2003-01-03 Dave Love <fx@gnu.org>
+
+ * keymap.c (apropos_predicate, apropos_accumulate): Declare
+ static.
+ (Fapropos_internal): Don't gcpro apropos_accumulate. Set result
+ to new local and nullify apropos_accumulate before returning.
+ (syms_of_keymap): Staticpro and initialize apropos_accumulate.
+
+2002-12-05 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fdefine_charset_internal): Setup charset.fast_map
+ correctly.
+
+2002-11-26 Dave Love <fx@gnu.org>
+
+ * fns.c (Flanginfo): Call synchronize_system_time_locale.
+
+2002-11-07 Kenichi Handa <handa@m17n.org>
+
+ The following changes are to make character composition happen
+ automatically on displaying.
+
+ * Makefile.in (lisp, shortlisp): Add composite.elc
+
+ * composite.h (Qauto_composed, Vauto_composition_function,
+ Qauto_composition_function): Extern them.
+
+ * composite.c (Vcomposition_function_table,
+ Qcomposition_function_table): Delete variables.
+ (Qauto_composed, Vauto_composition_function,
+ Qauto_composition_function): New variables.
+ (run_composition_function): Don't call
+ compose-chars-after-function.
+ (update_compositions): Clear `auto-composed' text property.
+ (compose_chars_in_text): Delete this function.
+ (syms_of_composite): Staticpro Qauto_composed and
+ Qauto_composition_function. Declare Vauto_composition_function as
+ a Lisp variable.
+
+ * dispextern.h (enum prop_idx): Add member AUTO_COMPOSED_PROP_IDX.
+
+ * xdisp.c (it_props): Add an entry for Qauto_composed.
+ (handle_auto_composed_prop): New function.
+
+ * xselect.c (selection_data_to_lisp_data): Don't call
+ compose_chars_in_text.
+
+2002-11-06 Dave Love <fx@gnu.org>
+
+ * keyboard.c (read_char): Modify checking around use of
+ Vkeyboard_translate_table.
+
+ * xterm.c (XTread_socket): Check Lisp types for Vx_keysym_table
+ and fix C types.
+
+2002-11-06 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding_utf_8): When eol_type is Qdos, handle
+ the case that the last byte is '\r' correctly.
+ (decode_coding_emacs_mule): Likewise.
+ (decode_coding_iso_2022): Likewise.
+ (decode_coding_sjis): Likewise.
+ (decode_coding_big5): Likewise.
+ (decode_coding_charset): Likewise.
+ (produce_chars): Likewise.
+ (decode_coding): Flushing out the unprocessed data correctly.
+ (decode_coding_gap): Set CODING_MODE_LAST_BLOCK bit of
+ coding->mode.
+
+2002-10-31 Dave Love <fx@gnu.org>
+
+ * xterm.c (XTread_socket): Fix changes for defined keysyms. Add
+ XK_ISO... case.
+ (xaw_scroll_callback): Revert last change.
+
+2002-10-30 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fset_charset_priority): Update
+ Viso_2022_charset_list.
+
+2002-10-29 Kenichi Handa <handa@m17n.org>
+
+ * xfaces.c (Vface_resizing_fonts): New variable.
+ (struct font_name): New member `resizing_ratio'.
+ (font_resizing_ratio): New function.
+ (split_font_name): Set font->resizing_ratio.
+ (better_font_p): Pay attention to font->resizing_ratio.
+ (build_scalable_font_name): Likewise. Don't change POINT_SIZE,
+ RESX, and RESY fields.
+ (try_alternative_families): Try scalable fonts if
+ Vscalable_fonts_allowed is not Qt.
+ (syms_of_xfaces): Declare Vface_resizing_fonts as a Lisp variable.
+
+2002-10-29 Dave Love <fx@gnu.org>
+
+ * xterm.c (xaw_scroll_callback): Cast correctly.
+
+2002-10-28 Dave Love <fx@gnu.org>
+
+ * keyboard.c (lispy_accent_codes, lispy_accent_keys): Extend.
+ (lispy_kana_keys): Comment out.
+ (make_lispy_event) [XK_kana_A]: Comment out.
+
+ * xterm.c (xaw_scroll_callback): Cast call_data.
+ (XTread_socket): Deal with ASCII keysyms.
+ (syms_of_xterm) <Vx_keysym_table>: Fix args of make_hash_table.
+
+2002-10-27 Dave Love <fx@gnu.org>
+
+ * xterm.c (Vx_keysym_table): New.
+ (syms_of_xterm): Initialize it.
+ (XTread_socket): Use it.
+ From head: Eliminate incorrect optimization that tried to avoid
+ decoding the output of X*LookupString.
+ (x_get_font_repertory): Delete charset declaration.
+
+2002-10-16 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (detect_coding): Fix previous change.
+ (detect_coding_charset): If only ASCII bytes are found, return 0.
+ (detect_coding_system): Fix previous change.
+ (Fdefine_coding_system_internal): Setup CODING_ATTR_ASCII_COMPAT
+ (attrs) correctly.
+
+2002-10-15 Dave Love <fx@gnu.org>
+
+ * coding.c (Fcheck_coding_system): Doc fix.
+
+ * editfns.c (Finsert_byte): Return a proper value.
+
+2002-10-14 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (decode_coding): Fix args to translate_chars. Pay
+ attention to Vstandard_translation_table_for_decode.
+ (encode_coding): Fix args to translate_chars. Pay attention to
+ Vstandard_translation_table_for_encode.
+
+ * data.c (Faset): Check NEWELT by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+
+ * editfns.c (general_insert_function): Check VAL by ASCII_CHAR_P,
+ not by SINGLE_BYTE_CHAR_P.
+
+ * fns.c (concat): Check CH by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+
+ * insdel.c (copy_text): Check C by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+
+ * keymap.c (Ftext_char_description): Check C by ASCII_CHAR_P, not
+ by SINGLE_BYTE_CHAR_P.
+
+ * search.c (Freplace_match): Check C by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+
+2002-10-14 Dave Love <fx@gnu.org>
+
+ * fns.c (Fstring_as_multibyte, Fstring_to_multibyte): Doc fix.
+
+2002-10-10 Dave Love <fx@gnu.org>
+
+ * fns.c (Flanginfo): Fix typo.
+
+ * unexelf.c (unexec): Make last change conditional on Irix 6.5.
+
+2002-10-10 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (detect_coding_utf_8): Check incomplete byte sequence.
+ Don't update *mask when correctly detected.
+ (detect_coding_utf_16): Likewise.
+ (detect_coding_emacs_mule): Likewise.
+ (detect_coding_iso_2022): Likewise.
+ (detect_coding_sjis): Likewise.
+ (detect_coding_big5): Likewise.
+ (detect_coding_ccl): Likewise.
+ (decode_coding_sjis): Fix decoding of katakana-jisx0201.
+ (detect_eol): Delete the argument CODING, and add the argument
+ CATEGORY.
+ (detect_coding): Adjusted for the changes above.
+ (detect_coding_system): Likewise.
+
+2002-10-09 Kenichi Handa <handa@m17n.org>
+
+ * character.c (char_string): Renamed from
+ char_string_with_unification. Pay attention to
+ CHAR_MODIFIER_MASK.
+ (string_char): Renamed from string_char.
+
+ * character.h (CHAR_STRING): Call char_string if C is greater than
+ MAX_3_BYTE_CHAR.
+ (CHAR_STRING_ADVANCE): Likewise.
+ (STRING_CHAR): Call string_char instead of
+ string_char_with_unification.
+ (STRING_CHAR_AND_LENGTH): Likewise.
+ (STRING_CHAR_ADVANCE): Likewise.
+
+2002-10-09 Dave Love <fx@gnu.org>
+
+ * coding.c (decode_coding_utf_8): Treat surrogates as invalid.
+
+2002-10-07 Kenichi Handa <handa@m17n.org>
+
+ * keymap.c (push_key_description): Pay attention to
+ force_multibyte.
+
+ * regex.c (re_search_2): Fix for the case of unibyte buffer.
+
+2002-10-06 Dave Love <fx@gnu.org>
+
+ * charset.c (define_charset_internal): Rename `supprementary'.
+
+ * Makefile.in (lisp, shortlisp): Remove latin-N.
+
+2002-10-05 Dave Love <fx@gnu.org>
+
+ * xfns.c (x_window, x_window): Use use_xim.
+
+ * xterm.c (use_xim): Initialize.
+ (xim_open_dpy, xim_initialize, xim_close_dpy): Use use_xim.
+ (x_term_init): Maybe set use_xim.
+
+ * xterm.h (use_xim) [HAVE_X_I18N]: Declare.
+
+2002-10-01 Kenichi Handa <handa@m17n.org>
+
+ * search.c (search_buffer): Fix case-fold-search of multibyte
+ characters.
+ (boyer_moore): Rename the last argument to char_high_bits.
+
+2002-09-27 Kenichi Handa <handa@etl.go.jp>
+
+ * xdisp.c (display_string): Fix for the case of zero width glyph.
+
+ * xfns.c (x_set_font): Change the error message of the case that
+ x_new_fontset returns Qt.
+
+ * xfaces.c (set_lface_from_font_name): Reject the default fontset.
+ (Finternal_set_lisp_face_attribute): Use signal_error for the
+ error of invalid fontset.
+
+ * xterm.c (x_new_fontset): If FONTSETNAME specifies the default
+ fontset, return Qt.
+
+2002-09-19 Kenichi Handa <handa@etl.go.jp>
+
+ * regex.c (re_search_2): Fix previous change.
+
+2002-09-18 Kenichi Handa <handa@etl.go.jp>
+
+ * syntax.c (skip_syntaxes): Fix previous change.
+
+2002-09-13 Kenichi Handa <handa@etl.go.jp>
+
+ * syntax.c (skip_chars): Fix previous change.
+ (skip_syntaxes): Fix previous change.
+
+2002-09-06 Dave Love <fx@gnu.org>
+
+ * config.in: Restore it.
+
+2002-09-05 Dave Love <fx@gnu.org>
+
+ * config.in: Removed (now auto-generated).
+
+ * s/usg5-4.h: Fix last change.
+
+ * unexelf.c (unexec): Make .got handling not SGI-specific.
+
+ * syntax.c (syms_of_syntax) <multibyte-syntax-as-symbol>: Doc fix.
+
+ * regex.c: Use `ifdef HAVE_ALLOCA_H', not `if HAVE_ALLOCA_H'.
+
+ * keyboard.c (read_key_sequence): Fix type error.
+
+ * buffer.c (Fset_buffer_multibyte, Fset_buffer_multibyte): Fix
+ type error.
+
+ * fontset.c (fontset_add): Return Lisp_Object.
+
+2002-09-03 Dave Love <fx@gnu.org>
+
+ * charset.h (charset_ordered_list_tick): Declare extern.
+
+2002-09-03 Kenichi Handa <handa@etl.go.jp>
+
+ The following changes (and some of 2002-08-20 changes of mine) are
+ for handling syntax, category, and case conversion for unibyte
+ characters by converting them to multibyte on the fly. With these
+ changes, we don't have to setup syntax and case tables for unibyte
+ characters in each language environment.
+
+ * abbrev.c (Fexpand_abbrev): Convert a unibyte character to
+ multibyte if necessary.
+
+ * bytecode.c (Fbyte_code): Likewise.
+
+ * character.h (LEADING_CODE_LATIN_1_MIN)
+ (LEADING_CODE_LATIN_1_MAX): New macros.
+ (unibyte_to_multibyte_table): Extern it.
+ (unibyte_char_to_multibyte): New macro.
+ (MAKE_CHAR_MULTIBYTE): Use unibyte_to_multibyte_table.
+ (CHAR_LEADING_CODE): New macro.
+ (FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE): New macro.
+
+ * character.c (unibyte_to_multibyte_table): New variable.
+ (unibyte_char_to_multibyte): Move to character.h and defined as
+ macro.
+ (multibyte_char_to_unibyte): If C is an eight-bit character,
+ convert it to the corresponding byte value.
+
+ * charset.c (Fset_unibyte_charset): If the dimension of CHARSET is
+ not 1, singals an error. Update the elements of
+ unibyte_to_multibyte_table.
+ (init_charset_once): Initialize unibyte_to_multibyte_table.
+ (syms_of_charset): Define the charset `iso-8859-1'.
+
+ * casefiddle.c (casify_object): Fix previous change.
+
+ * cmds.c (internal_self_insert): In a multibyte buffer, insert C
+ as is without converting it to unibyte. In a unibyte buffer,
+ convert C to multibyte before checking the syntax.
+
+ * lisp.h (unibyte_char_to_multibyte): Extern deleted.
+
+ * minibuf.c (Fminibuffer_complete_word): Use the macro
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE.
+
+ * regex.h (struct re_pattern_buffer): New member target_multibyte.
+
+ * regex.c (RE_TARGET_MULTIBYTE_P): New macro.
+ (GET_CHAR_BEFORE_2): Check target_multibyte, not multibyte. If
+ that is zero, convert an eight-bit char to multibyte.
+ (MAKE_CHAR_MULTIBYTE, CHAR_LEADING_CODE): New dummy new macros for
+ non-emacs case.
+ (PATFETCH): Convert an eight-bit char to multibyte.
+ (HANDLE_UNIBYTE_RANGE): New macro.
+ (regex_compile): Setup the compiled pattern for multibyte chars
+ even if the given regex string is unibyte. Use PATFETCH_RAW
+ instead of PATFETCH in many places. To handle `charset'
+ specification of unibyte, call HANDLE_UNIBYTE_RANGE. Use bitmap
+ only for ASCII chars.
+ (analyse_first) <exactn>: Simplified because the compiled pattern
+ is multibyte.
+ <charset_not>: Setup fastmap from bitmap only for ASCII chars.
+ <charset>: Use CHAR_LEADING_CODE to get leading codes.
+ <categoryspec>: If multibyte, setup fastmap only for ASCII chars
+ here.
+ (re_compile_fastmap) [emacs]: Call analyse_first with the arg
+ multibyte always 1.
+ (re_search_2) In emacs, set the locale variable multibyte to 1,
+ otherwise to 0. New local variable target_multibyte. Check it
+ to decide the multibyteness of STR1 and STR2. If
+ target_multibyte is zero, convert unibyte chars to multibyte
+ before translating and checking fastmap.
+ (TARGET_CHAR_AND_LENGTH): New macro.
+ (re_match_2_internal): In emacs, set the locale variable multibyte
+ to 1, otherwise to 0. New local variable target_multibyte. Check
+ it to decide the multibyteness of STR1 and STR2. Use
+ TARGET_CHAR_AND_LENGTH to fetch a character from D.
+ <charset, charset_not>: If multibyte is nonzero, check fastmap
+ only for ASCII chars. Call bcmp_translate with
+ target_multibyte, not with multibyte.
+ <begline>: Declare the local variable C as `unsigned'.
+ (bcmp_translate): Change the last arg name to target_multibyte.
+
+ * search.c (compile_pattern_1): Don't adjust the multibyteness of
+ the regexp pattern and the matching target. Set cp->buf.multibyte
+ to the multibyteness of the regexp pattern. Set
+ cp->but.target_multibyte to the multibyteness of the matching
+ target.
+ (wordify): Use FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE instead of
+ FETCH_STRING_CHAR_ADVANCE.
+ (Freplace_match): Convert unibyte chars to multibyte.
+
+ * syntax.c (char_quoted): Use FETCH_CHAR_AS_MULTIBYTE to convert
+ unibyte chars to multibyte.
+ (back_comment): Likewise.
+ (scan_words): Likewise.
+ (skip_chars): The arg syntaxp is deleted, and the code for
+ handling syntaxes is moved to skip_syntaxes. Callers changed.
+ Fix the case that the multibyteness of STRING and the current
+ buffer doesn't match.
+ (skip_syntaxes): New function.
+ (SYNTAX_WITH_MULTIBYTE_CHECK): Check C by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+ (Fforward_comment): Use FETCH_CHAR_AS_MULTIBYTE to convert unibyte
+ chars to multibyte.
+ (scan_lists): Likewise.
+ (Fbackward_prefix_chars): Likewise.
+ (scan_sexps_forward): Likewise.
+
+2002-08-23 Kenichi Handa <handa@etl.go.jp>
+
+ * xfaces.c (QCfontset): New variable.
+ (LFACE_FONTSET): New macro.
+ (check_lface_attrs): Check also LFACE_FONTSET_INDEX.
+ (set_lface_from_font_name): Setup LFACE_FONTSET (lface).
+ (Finternal_set_lisp_face_attribute): Handle QCfontset.
+ (Finternal_get_lisp_face_attribute): Likewise.
+ (lface_same_font_attributes_p): Fix checking of LFACE_FONT_INDEX,
+ check also LFACE_FONTSET_INDEX.
+ (face_fontset): Check attrs[LFACE_FONTSET_INDEX], not
+ attrs[LFACE_FONT_INDEX].
+ (syms_of_xfaces): Intern and staticpro QCfontset.
+
+ * dispextern.h (enum lface_attribute_index): New member
+ LFACE_FONTSET_INDEX.
+
+ * fns.c (base64_encode_1): Handle eight-bit chars correctly.
+
+2002-08-21 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (coding_set_destination): Fix coding->destination for
+ the case converting a region.
+ (encode_coding_utf_8): Encode eight-bit chars as single byte.
+ (encode_coding_object): Fix coding->dst_pos and
+ coding->dst_pos_byte for the case converting a region.
+
+ * insdel.c (insert_from_gap): Make it work even if PT != GTP.
+
+ * character.h (BYTE8_STRING): New macro.
+
+ * fns.c (base64_decode_1): Insert eight-bit chars correctly.
+
+2002-08-20 Kenichi Handa <handa@etl.go.jp>
+
+ * xdisp.c (get_next_display_element): Don't display unibyte 8-bit
+ characters by octal form.
+
+ * abbrev.c (Fexpand_abbrev): Fix for the multibyte case.
+
+ * buffer.h (_fetch_multibyte_char_len): Extern deleted.
+ (FETCH_MULTIBYTE_CHAR): Don't use _fetch_multibyte_char_len.
+ (BUF_FETCH_MULTIBYTE_CHAR): Likewise.
+ (FETCH_CHAR_AS_MULTIBYTE): New macro.
+
+ * casetab.c (set_canon, set_identity, shuffle): Simplified.
+
+ * casefiddle.c (casify_object): Simplified. Handle the case that
+ the case conversion change the byte length.
+ (casify_region): Likewise
+
+ * character.h (MAKE_CHAR_UNIBYTE, MAKE_CHAR_MULTIBYTE): New
+ macros.
+
+ * character.c (_fetch_multibyte_char_len): This variable deleted.
+ (syms_of_character): Setup Vprintable_chars.
+
+ * editfns.c (Fchar_equal): Fix for the unibyte case.
+ (Finsert_byte): New function.
+ (syms_of_editfns): Defsubr it.
+
+ * keyboard.c (read_key_sequence): Use ~CHAR_MODIFIER_MASK instead
+ of direct code 0x3ffff.
+
+ * search.c (Freplace_match): Fix for the unibyte case.
+
+2002-08-19 Kenichi Handa <handa@etl.go.jp>
+
+ * lread.c (safe_to_load_p): Fix the logic.
+
+ * syntax.c (scan_words): Don't treat characters belonging to
+ different scripts as constituting a word.
+
+ * editfns.c (Fformat): Use ASCII_CHAR_P, not SINGLE_BYTE_CHAR_P.
+
+ * fontset.c (Fset_fontset_font): Treat `ascii' as charset, not
+ script.
+
+ * emacs.c (main): In the case of --unibyte, instead of aborting on
+ finding non-empty buffer, make it unibyte.
+
+2002-08-18 Kenichi Handa <handa@etl.go.jp>
+
+ * xterm.c (x_new_fontset): Call `create-fontset-from-ascii-font'
+ to create a fontset.
+
+2002-08-18 Dave Love <fx@gnu.org>
+
+ * character.c (Funibyte_char_to_multibyte): Doc fix.
+
+ * xfns.c [HAVE_STDLIB_H]: Fix last change.
+
+2002-08-15 Kenichi Handa <handa@etl.go.jp>
+
+ * fontset.c (fontset_add): Make the type `int'.
+ (fontset_id_valid_p): Define it if FONTSET_DEBUG is defined.
+
+ * character.c (unibyte_char_to_multibyte): Refer to
+ charset_unibyte, not charset_primary.
+ (multibyte_char_to_unibyte): Likewise.
+ (Funibyte_char_to_multibyte): Likewise.
+
+ * charset.h: (charset_unibyte): Extern it instead of
+ charset_primary.
+
+ * charset.c (charset_unibyte): Renamed from charset_primary.
+ (Funibyte_charset): Renamed from Fprimary_charset.
+ (Fset_unibyte_charset): Renamed from Fset_primary_charset.
+ (syms_of_charset): Adjusted for the above changes.
+
+ * w32term.c (x_produce_glyphs): Use ASCII_CHAR_P, not
+ SINGLE_BYTE_CHAR_P. Fix the logic of handling non-ASCII char when
+ it->multibyte_p is zero.
+
+ * lisp.h (nonascii_insert_offset, Vnonascii_translation_table):
+ Extern deleted.
+
+2002-08-08 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (Fdefine_coding_system_internal): Fix category setting
+ for a coding system of type iso-2022.
+
+2002-08-02 Kenichi Handa <handa@etl.go.jp>
+
+ * fontset.h (FS_LOAD_FONT): Call fs_load_font with the arg CHARSET
+ -1.
+
+2002-08-01 Kenichi Handa <handa@etl.go.jp>
+
+ * syntax.c (Vnext_word_boundary_function_table): New variable.
+ (syms_of_syntax): Declare it as a Lisp variable.
+ (scan_words): Call functions in Vnext_word_boundary_function_table
+ if any.
+
+ * xterm.c (x_load_font): Initialize fontp->fontset to -1.
+
+ * fontset.c (fs_load_font): If fontp->charset is not negative,
+ return fontp without setting its members.
+
+2002-07-31 Dave Love <fx@gnu.org>
+
+ * config.in: Generated with autoheader.
+
+ * xfns.c [HAVE_STDLIB_H]: Change logic (instead of fixing typo).
+
+ * m/sparc.h (HAVE_ALLOCA): Delete.
+
+ * s/irix6-5.h: Don't include strings.h.
+ (bcopy, bzero, bcmp): Don't undef.
+
+ * s/irix6-0.h (bcopy, bzero, bcmp): Don't undef.
+
+ * s/usg5-4.h (NO_SIOCTL_H): Don't define.
+ (TIOCSIGSEND): Don't test IRIX6.
+ (bcopy, bzero, bcmp): Define conditionally.
+
+2002-07-31 Kenichi Handa <handa@etl.go.jp>
+
+ * buffer.c (Qas, Qmake, Qto): New variables.
+ (Fset_buffer_multibyte): New optional arg METHOD. Caller changed.
+ (syms_of_buffer): Intern and staticpro Qas, Qmake, and Qto.
+
+ * callproc.c (Fcall_process): Don't call insert_1_both directly if
+ we are inserting a process output into a multibyte buffer.
+
+ * character.h (CHAR_TO_BYTE8): If C is not eight-bit char, call
+ multibyte_char_to_unibyte.
+
+ * character.c (Funibyte_char_to_multibyte): If C can't be decoded
+ by the primary charset, make it eight-bit char.
+ (Fmultibyte_char_to_unibyte): Call CHAR_TO_BYTE8.
+
+ * charset.c: (charset_eight_bit, Qeight_bit_control): New
+ variables.
+ (charset_8_bit__control, charset_8_bit_graphic,
+ Qeight_bit_control, Qeight_bit_graphic): These variables deleted.
+ (define_charset_internal): New function.
+ (syms_of_charset): Call define_charset_internal for pre-defined
+ charsets.
+
+ * charset.h (charset_8_bit): Extern it.
+
+ * coding.c (make_conversion_work_buffer): Adjusted for the change
+ of Fset_buffer_multibyte.
+ (encode_coding_raw_text): Increment p0 in the loop.
+
+ * lisp.h (Fset_buffer_multibyte): Prototype adjusted.
+
+ * xdisp.c (setup_echo_area_for_printing, set_message_1): Adjusted
+ for the change of Fset_buffer_multibyte.
+
+ * fns.c (Fstring_to_multibyte): New function.
+ (syms_of_fns): Declare Fstring_to_multibyte as Lisp subroutine.
+
+2002-07-30 Dave Love <fx@gnu.org>
+
+ * xfns.c (x_put_x_image): Declare args.
+
+ * xfaces.c (font_name_registry, choose_face_font): Delete unused
+ vars.
+ (try_font_list): Declare an arg.
+
+ * xdisp.c (message2_nolog, set_message): Declare an arg.
+
+ * terminfo.c (tparam): Declare an arg. Use P_ to declare tparm.
+
+ * syntax.c (scan_sexps_forward): Declare an arg.
+
+ * scroll.c (calculate_scrolling, calculate_direct_scrolling):
+ Declare an arg.
+
+ * lisp.h (Fnew_fontset): Declare.
+
+ * keymap.c (push_key_description): Call CHARACTERP correctly.
+
+ * fontset.c (fontset_add): Declare args. Call make_number
+ correctly.
+ (face_for_char): Delete unused vars.
+ (Fset_fontset_font): Doc fix. Delete unused vars.
+
+ * doc.c (Fsubstitute_command_keys): Delete unused vars.
+
+ * composite.c (update_compositions): Declare arg.
+
+ * cm.c (calccost, cmgoto): Declare args.
+
+ * charset.c: Remove `emacs' conditional. Doc fixes.
+ (map_char_table_for_charset): Declare.
+
+ * character.c (syms_of_character) <translation-table-vector>: Doc
+ fix.
+
+ * ccl.c: Remove `emacs' conditional. Include hash table stuff
+ from trunk.
+
+2002-07-26 Kenichi Handa <handa@etl.go.jp>
+
+ The following changes are to allow specifying multiple font
+ patterns for a character range (specified by script or charset).
+
+ * Makefile.in (abbrev.o): Depend on syntax.h.
+ (xfaces.o): Depend on charset.h.
+
+ * alloc.c (Fmake_string): Use ASCII_CHAR_P, not
+ SINGLE_BYTE_CHAR_P.
+
+ * ccl.c (Fccl_execute_on_string): Add `const' to local variables.
+
+ * character.h (Vchar_script_table): Extern it.
+
+ * character.c (Vscript_alist): This variable deleted.
+ (Vchar_script_table, Qchar_script_table): New variable.
+ (syms_of_character): Declare Vchar_script_table as a lisp variable
+ and initialize it.
+
+ * chartab.c (Fmake_char_table): Doc fixed. If PURPOSE doesn't
+ have property char-table-extra-slots, make no extra slot.
+
+ * dispextern.h (struct face): Member `charset' deleted.
+ (FACE_SUITABLE_FOR_CHAR_P): Use ASCII_CHAR_P, not
+ SINGLE_BYTE_CHAR_P.
+ (FACE_FOR_CHAR): Likewise.
+ (choose_face_font, lookup_non_ascii_face, font_name_registry): Add
+ prototypes
+ (lookup_face, lookup_named_face, lookup_derived_face): Prototype
+ fixed.
+ (generate_ascii_font_name): Renamed from generate_ascii_font.
+
+ * fontset.h (get_font_repertory_func): New prototype.
+ (make_fontset_for_ascii_face, fs_load_font): Prototypes fixed.
+ (FS_LOAD_FONT): Call fs_load_font with the 3rd arg charset_ascii.
+
+ * fontset.c (Qprepend, Qappend): New variables.
+ (FONTSET_CHARSET_ALIST, FONTSET_FACE_ALIST): These macros deleted.
+ (FONTSET_NOFONT_FACE, FONTSET_REPERTORY): New macros.
+ (FONTSET_REF): Optimize if FONTSET is Vdefault_fontset.
+ (FONTSET_REF_AND_RANGE, FONTSET_ADD): New macros.
+ (fontset_ref_and_range, fontset_add, reorder_font_vector)
+ (load_font_get_repertory): New functions.
+ (fontset_set): This function deleted.
+ (fontset_face): New arg FACE. Return face ID, not face.
+ Completely re-written to handle new fontset structure. Caller
+ changed.
+ (free_face_fontset): Use ASET istead of AREF (X) = Y.
+ (face_for_char): Don't call lookup_face.
+ (make_fontset_for_ascii_face): New arg FACE.
+ (fs_load_font): New arg CHARSET_ID. Don't check
+ Vfont_encoding_alist here.
+ (find_font_encoding): New function.
+ (list_fontsets): Use STRINGP, not ! NILP.
+ (accumulate_script_ranges): New function.
+ (Fset_fontset_font, Fnew_fontset, Ffontset_info): Completely
+ re-written to handle new fontset structure.
+ (Ffontset_font): Return a copy of element.
+ (syms_of_fontset): Define symbols Qprepend and Qappend. Fix
+ docstring of font-encoding-alist.
+
+ * lisp.h (CHAR_TABLE_REF): Remove unnecessary check (IDX >= 0).
+ (Fset_fotset_font): Fix arguments to 5.
+
+ * msdos.c (XMenuActivate): Adjuted for the change of
+ lookup_derived_face.
+
+ * xdisp.c (message_dolog, set_message_1, extend_face_to_end_of_line):
+ Use ASCII_CHAR_P, not SINGLE_BYTE_CHAR_P.
+ (highlight_trailing_whitespace): Adjusted for the change of
+ lookup_named_face.
+
+ * xfaces.c: Include charset.h.
+ (load_face_font): Argument C deleted. Caller changed.
+ (generate_ascii_font_name): Renamed from generate_ascii_font.
+ (font_name_registry): New function.
+ (cache_face): Store ascii faces before non-ascii faces in buckets.
+ (lookup_face): Arguments C and BASE_FACE deleted. Caller changed.
+ Lookup only ascii faces.
+ (lookup_non_ascii_face): New function.
+ (lookup_named_face): Argument C deleted. Caller changed.
+ (lookup_derived_face): Argument C deleted. Caller changed.
+ (try_font_list): New arg PATTERN. Caller changed. If PATTERN is
+ a string, just call font_list with it.
+ (choose_face_font): Arguments FACE and C deleted. New arg
+ FONT_SPEC. Caller changed.
+ (realize_face): Arguments C and BASE_FACE deleted. Caller
+ (realize_x_face): Likewise.
+ (realize_non_ascii_face): New function.
+ (realize_x_face): Call load_face_font here.
+ (realize_tty_face): Argument C deleted. Caller changed.
+ (compute_char_face): If CH is not ascii, call FACE_FOR_CHAR to
+ get a face ID.
+ (dump_realized_face): Don't print charset of FACE.
+
+ * xfns.c (x_set_font): Always call x_new_fontset and
+ store_frame_parameter.
+ (Fx_create_frame): Call x_new_fontset, not x_new_font.
+ (syms_of_xfns): Set get_font_repertory_func to
+ x_get_font_repertory.
+
+ * xterm.h (x_get_font_repertory): Extern it.
+
+ * xterm.c (x_produce_glyphs): Use ASCII_CHAR_P, not
+ SINGLE_BYTE_CHAR_P. Fix the logic of handling non-ASCII char when
+ it->multibyte_p is zero.
+ (XTread_socket): Use ASCII_CHAR_P, not SINGLE_BYTE_CHAR_P.
+ (x_new_fontset): If FONTSETNAME doesn't match any existing
+ fontsets, create a new one.
+ (x_get_font_repertory): New function.
+
+2002-07-25 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (Ffind_coding_systems_region_internal): Detect an
+ ASCII only string correctly.
+
+ * lread.c (Fload): Don't load with Qload_force_doc_strings t if
+ version is 0.
+
+2002-07-24 Kenichi Handa <handa@etl.go.jp>
+
+ * lread.c: Include "coding.h".
+ (Qget_emacs_mule_file_char, Qload_force_doc_strings,
+ load_each_byte, unread_char): New variables.
+ (readchar_backlog): This variable deleted.
+ (readchar): Return a character unless load_each_byte is nonzero.
+ Handle the case that readcharfun is Qget_emacs_mule_file_char or a
+ cons. If unread_char is not -1, simply return it.
+ (unreadchar): Handle the case that readcharfun is
+ Qget_emacs_mule_file_char or a cons. Set unread_char if
+ necessary.
+ (read_multibyte): This function deleted.
+ (readbyte_for_lambda, readbyte_from_file, readbyte_from_string)
+ (read_emacs_mule_char): New functions.
+ (Fload): Even if the file doesn't have the extention ".elc", if
+ safe_to_load_p returns a positive version number, assume that the
+ file contains bytecompiled code. If the version is less than 22,
+ load the file while decoding multibyte sequences by emacs-mule.
+ (readevalloop): Don't use readchar_backlog.
+ (Fread): Likewise. Pay attention to the case that STREAM is a
+ cons.
+ (Fread_from_string): Pay attention to the case that STREAM is a
+ cons.
+ (read_escape): The arg BYTEREP deleted.
+ (read1): Set load_each_byte to 1 temporarily while handling
+ #@NUMBER. Don't call read_multibyte.
+ (read_vector): Call Fread with a cons. If readcharfun is
+ Qget_emacs_mule_file_char, decode the read string by emacs-mule.
+ (read_list): If doc_reference is 2, make the cdr part string as
+ unibyte.
+ (syms_of_lread): Intern and staticpro Qget_emacs_mule_file_char
+ and Qload_force_doc_strings.
+
+2002-07-23 Kenichi Handa <handa@etl.go.jp>
+
+ * xdisp.c (face_before_or_after_it_pos): Call
+ FETCH_MULTIBYTE_CHAR with byte postion, not char position.
+
+2002-07-22 Kenichi Handa <handa@etl.go.jp>
+
+ * character.h (TRAILING_CODE_P): New macro.
+ (MAYBE_UNIFY_CHAR): Adjusted for the change of Funify_charset.
+ (string_char_with_unification): Fix prototype.
+ (Vscript_alist): Extern it.
+
+ * character.c (Vscript_alist): New variable.
+ (string_char_with_unification): Add `const' to local variables.
+ (str_as_unibyte): Likewise.
+ (string_escape_byte8): Likewise.
+ (syms_of_character): Declare script-alist as a Lisp variable.
+
+ * charset.h (Vcharset_ordered_list): Extern it.
+ (charset_ordered_list_tick): Extern it.
+ (EMACS_MULE_LEADING_CODE_PRIVATE_11)
+ (EMACS_MULE_LEADING_CODE_PRIVATE_12)
+ (EMACS_MULE_LEADING_CODE_PRIVATE_21)
+ (EMACS_MULE_LEADING_CODE_PRIVATE_22): New macros
+ (Funify_charset): Adjusted for the change of Funify_charset.
+
+ * charset.c (charset_ordered_list_tick): New variable.
+ (Fdefine_charset_internal): Increment charset_ordered_list_tick.
+ (Funify_charset): New optional arg DEUNIFY. If it is non-nil,
+ deunify intead of unify a charset.
+ (string_xstring_p): Add `const' to local variables.
+ (find_charsets_in_text): Add `const' to arguemnts and local
+ variables.
+ (encode_char): Adjusted for the change of Funify_charset. Fix
+ detecting of invalid code.
+ (Fset_charset_priority): Increment charset_ordered_list_tick.
+ (Fmap_charset_chars): Fix handling of default value for FROM_CODE
+ and TO_CODE.
+
+ * coding.c (LEADING_CODE_PRIVATE_11, LEADING_CODE_PRIVATE_12)
+ (LEADING_CODE_PRIVATE_21, LEADING_CODE_PRIVATE_22): Macros
+ deleted. Callers changed to use
+ EMACS_MULE_LEADING_CODE_PRIVATE_11, etc.
+ (decode_coding_ccl): Add `const' to local variables.
+ (consume_chars): Likewise.
+ (Ffind_coding_systems_region_internal): Likewise.
+ (Fcheck_coding_systems_region): Likewise.
+
+ * print.c (print_object): Use octal form for printing the
+ contents of a bool vector.
+
+2002-07-18 Dave Love <fx@gnu.org>
+
+ * lread.c (Fload) <!load_dangerous_libraries>: Don't leak fd.
+ <version == 20>: Refuse to load.
+
+2002-07-17 Dave Love <fx@gnu.org>
+
+ * fns.c: Move coding.h.
+ (Qcodeset, Qdays, Qmonths): New.
+ (concat): Use CHARACTERP instead of INTERGERP.
+ (Flocale_codeset): Deleted.
+ (Flanginfo): New function.
+ (syms_of_fns): Changed accordingly.
+
+ * coding.c (adjust_coding_eol_type): Fix eol_type/eol_seen mixup.
+
+2002-07-16 Dave Love <fx@gnu.org>
+
+ * casetab.c (init_casetab_once, init_casetab_once): Fix
+ CHAR_TABLE_SET call.
+
+ * category.c (Fmodify_category_entry): Fix CATEGORY_MEMBER call.
+
+ * character.c (syms_of_character): Fix CHAR_TABLE_SET call.
+
+ * charset.c (Fmap_charset_chars): Check args. Convert Lisp types.
+ (load_charset_map, Fdeclare_equiv_charset, Fencode_char)
+ (Fset_charset_priority, syms_of_charset): Convert Lisp types.
+
+ * charset.h (CHECK_CHARSET_GET_ID): Use XINT on AREF result.
+
+ * coding.c (ENCODE_DESIGNATION, decode_eol)
+ (make_conversion_work_buffer, code_conversion_restore)
+ (Fdefine_coding_system_internal): Convert Lisp types.
+ (code_conversion_restore): Use EQ, not ==.
+ (Fencode_coding_string): Fix code_convert_string call.
+
+ * coding.h (code_convert_region): Fix prototype.
+
+ * dispextern.h (redraw_frame, redraw_garbaged_frames): Removed.
+
+ * fontset.c (fontset_ref, fontset_set, fs_load_font)
+ (Ffontset_info): Convert Lisp types.
+
+ * syntax.h (SYNTAX_ENTRY_INT): Don't use make_number.
+
+ * xterm.c (note_mouse_movement): Fix call of window_from_coordinates.
+
+ * xdisp.c (display_mode_element): Fix call of Fset_text_properties.
+
+ * chartab.c: Include "...h", not <...h> in some cases.
+
+ * callproc.c (Fcall_process): Remove unused variables.
+
+2002-07-12 Dave Love <fx@gnu.org>
+
+ * coding.c (Fset_coding_system_priority): Allow null arg list.
+
+2002-07-03 Dave Love <fx@gnu.org>
+
+ * minibuf.c (Fminibuffer_complete_word): Remove unused var.
+ (Fself_insert_and_exit): Use CHARACTERP.
+
+ * callproc.c (Fcall_process): Remove unused vars.
+
+ * xterm.c (XTread_socket): Add extra dead keysyms.
+
+ * xdisp.c (decode_mode_spec_coding): Use CHARACTERP.
+
+ * dispextern.h: Remove prototypes for redraw_frame,
+ redraw_garbaged_frames.
+
+ * cmds.c (Fself_insert_command): Use CHARACTERP.
+
+ * chartab.c (make_sub_char_table): Remove unused var.
+ (Fset_char_table_default, Fmap_char_table): Doc fix.
+
+ * keymap.c (access_keymap): Remove generic char code.
+ (push_key_description): Use CHARACTERP.
+
+2002-07-01 Dave Love <fx@gnu.org>
+
+ * charset.c: Doc fixes.
+ (Funify_charset): Extra checking.
+
+2002-06-24 Dave Love <fx@gnu.org>
+
+ * lread.c: Remove some unused variables.
+ (safe_to_load_p): If safe, return the magic number version byte.
+ (Fload): Maybe use load-with-code-conversion.
+
+2002-06-12 Kenichi Handa <handa@etl.go.jp>
+
+ * category.c (Fmodify_category_entry): Don't modify the contents
+ of category_set for characters out of the range. Avoid
+ unnecessary modification.
+
+ * character.h (MAYBE_UNIFY_CHAR): Adjusted for the change of
+ Vchar_unify_table. The default value of the table is now nil.
+
+ * character.c (syms_of_character): Setup Vchar_width_table for
+ eight-bit-control and raw-byte chars.
+
+ * charset.h (enum define_charset_arg_index): Delete
+ charset_arg_parents and add charset_arg_subset and
+ charset_arg_superset.
+ (enum charset_attr_index): Delete charset_parents and add
+ charset_subset and charset_superset.
+ (enum charset_method): Delete CHARSET_METHOD_INHERIT and add
+ CHARSET_METHOD_SUBSET and CHARSET_METHOD_SUPERSET.
+ (CHARSET_ATTR_PARENTS, CHARSET_PARENTS): Macros deleted.
+ (CHARSET_ATTR_SUBSET, CHARSET_ATTR_SUPERSET, CHARSET_SUBSET)
+ (CHARSET_SUPERSET): New macros.
+ (charset_work): Extern it.
+ (ENCODE_CHAR): Use charset_work.
+ (CHAR_CHARSET_P): Adjusted for the change of encoder format.
+ (map_charset_chars): Extern it.
+
+ * charset.c (load_charset_map): Set the default value of encoder
+ and deunifier char-tables to nil.
+ (map_charset_chars): Argument changed. Callers changed. Use
+ map_char_table_for_charset instead of map_char_table.
+ (Fmap_charset_chars): New optional args from_code and to_code.
+ (Fdefine_charset_internal): Adjusted for the change of
+ `define-charset' (:parents -> :subset or :superset).
+ (charset_work): New variable.
+ (encode_char): Adjusted for the change of
+ Fdefine_charset_internal.
+ (syms_of_charset): Likewise.
+ (Ffind_charset_string): Setup the vector `charsets' correctly.
+
+ * chartab.c (sub_char_table_ref_and_range): New arg defalt. Fix
+ the previous change.
+ (char_table_ref_and_range): Adjusted for the above change.
+ (map_sub_char_table_for_charset): New function.
+ (map_char_table_for_charset): New function.
+
+ * keymap.c (describe_vector): Handle a char-table directly here.
+ (describe_char_table): Deleted.
+
+ * lisp.h (map_charset_chars): Deleted.
+
+2002-06-11 Dave Love <fx@gnu.org>
+
+ * fns.c (count_combining): Comment out (unused).
+ (Flocale_codeset): New.
+ (syms_of_fns): Defsubr it.
+
+ * config.in (HAVE_PTY_H, HAVE_SIZE_T, HAVE_LANGINFO_CODESET): New.
+ (size_t): Removed.
+
+2002-06-06 Dave Love <fx@gnu.org>
+
+ * Makefile.in (chartab.o): Depend on charset.h
+
+2002-06-03 Kenichi Handa <handa@etl.go.jp>
+
+ * character.c (syms_of_character): Set the default value of
+ Vprintable_chars to Qnil.
+
+2002-05-31 Dave Love <fx@gnu.org>
+
+ * Makefile.in (lisp, shortlisp): Change indian.elc to indian.el.
+
+2002-05-31 Kenichi Handa <handa@etl.go.jp>
+
+ * charset.c (load_charset_map): Handle the case that from < to
+ correctly.
+
+ * coding.c (encode_coding_emacs_mule): Pay attention to raw-8-bit
+ chars.
+ (encode_coding_iso_2022): Likewise.
+ (encode_coding_sjis): Likewise.
+ (encode_coding_big5): Likewise.
+ (encode_coding_charset): Likewise.
+
+2002-05-30 Kenichi Handa <handa@etl.go.jp>
+
+ * Makefile.in (lisp): Change chinese.elc to chinese.el. They are
+ not bytecompiled now.
+ (shortlisp): Likewise.
+
+ * charset.c (charset_jisx0201_roman, charset_jisx0208_1978)
+ (charset_jisx0208): New variables.
+ (Fdefine_charset_internal): Setup them if appropriate.
+ (init_charset_once): Initialize them to -1.
+
+ * charset.h (charset_jisx0201_roman, charset_jisx0208_1978,
+ charset_jisx0208): Extern them.
+
+ * coding.c (CODING_ISO_FLAG_USE_ROMAN): New macro
+ (CODING_ISO_FLAG_USE_OLDJIS): New macro.
+ (CODING_ISO_FLAG_FULL_SUPPORT): Macro definition changed.
+ (setup_iso_safe_charsets): Fix arguemtns to Fassq.
+ (DECODE_DESIGNATION): Pay attention to CODING_ISO_FLAG_USE_ROMAN
+ and CODING_ISO_FLAG_USE_OLDJIS.
+ (ENCODE_ISO_CHARACTER_DIMENSION1): Likewise.
+ (ENCODE_ISO_CHARACTER_DIMENSION2): Likewise.
+ (encode_coding_iso_2022): Change the 1st arg to
+ ENCODE_ISO_CHARACTER to a variable.
+
+2002-05-29 Kenichi Handa <handa@etl.go.jp>
+
+ * charset.h (enum define_charset_arg_index): New enums
+ charset_arg_min_code and charset_arg_max_code.
+ (struct charset): New member char_index_offset.
+
+ * charset.c (CODE_POINT_TO_INDEX): Take charset->char_index_offset
+ into account.
+ (INDEX_TO_CODE_POINT): Likewise.
+ (Fdefine_charset_internal): Handle args[charset_arg_min_code] and
+ args[charset_arg_max_code]. Setup charset.char_index_offset.
+ (syms_of_charset): Fix args to Fdefine_charset_internal.
+
+2002-05-27 Dave Love <fx@gnu.org>
+
+ * coding.c (decode_coding_utf_8): Reject overlong sequences.
+
+2002-05-26 Dave Love <fx@gnu.org>
+
+ * coding.c: Doc fixes.
+ (Fcoding_system_aliases): Fix return value.
+ (Qmac): Remove (duplicated) definition.
+
+2002-05-25 Dave Love <fx@gnu.org>
+
+ * charset.c (Fcharset_priority_list, Fset_charset_priority): New
+ functions.
+
+ * character.c (Fstring): Doc fix.
+
+ * charset.c (Fdefine_charset_alias): Update Vcharset_list.
+
+ * fontset.c (Ffontset_info): Doc fix. Return charset names, not
+ ids.
+ (font-encoding-alist): Doc fix.
+
+2002-05-24 Dave Love <fx@gnu.org>
+
+ * term.c (costs_set): Declare static, non-initialized for pcc.
+ (encode_terminal_code): Remove ensued var.
+
+ * keyboard.c (kbd_buffer_store_event): Fix interrupt_signal decl
+ for K&R.
+
+ * xterm.c (xlwmenu_window_p): Fix prototype for K&R.
+
+ * coding.c (setup_iso_safe_charsets): Fix arg decl for K&R.
+ (suffixes): Moved out of make_subsidiaries for K&R.
+
+ * charset.c (map_charset_chars): Fix c_function declaration for
+ K&R.
+
+ * lisp.h (DEFUN) [!PROTOTYPES]: Remove spurious `args'.
+
+2002-05-23 Dave Love <fx@gnu.org>
+
+ * data.c (Fchar_or_string_p): Doc fix. Use CHARACTERP.
+
+ * category.c (Fmodify_category_entry): Doc fix. Remove unused
+ vars.
+
+2002-05-23 Yong Lu <lyongu@asia-infonet.com>
+
+ * charset.c (Fdefine_charset_internal): Fix argument to bzero.
+
+ * coding.c (Fdefine_coding_system_internal): Fix previous change.
+ (decode_coding_charset): Workaround for the bug of GCC 2.96.
+
+2002-05-23 Kenichi Handa <handa@etl.go.jp>
+
+ * Makefile.in (lisp): Change cyrillic.elc to cyrillic.el,
+ vietnamese.elc to vietnamese.el. They are not bytecompiled now.
+ (shortlisp): Likewise.
+
+2002-05-22 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (decode_coding_charset): Adjusted for the change of
+ Fdefine_coding_system_internal.
+ (Fdefine_coding_system_internal): For a coding system of
+ `charset' type, store a list of charset IDs in
+ `charset_attr_charset_valids' element of coding attributes.
+
+ * charset.c (Fmake_char): Fix previous change.
+
+2002-05-21 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (ONE_MORE_BYTE_NO_CHECK): Increment consumed_chars.
+ (emacs_mule_char): New arg src. Delete arg `composition'. Caller
+ changed. Handle 2-byte and 3-byte charsets correctly.
+ (DECODE_EMACS_MULE_COMPOSITION_RULE_20): Renamed from
+ DECODE_EMACS_MULE_COMPOSITION_RULE. Caller changed.
+ (DECODE_EMACS_MULE_COMPOSITION_RULE_21): New macro.
+ (DECODE_EMACS_MULE_21_COMPOSITION): Call
+ DECODE_EMACS_MULE_COMPOSITION_RULE_21. Produce correct annotation
+ sequence.
+ (decode_coding_emacs_mule): Handle composition correctly. Rewind
+ `src' and `consumed_chars' correctly before calling
+ emacs_mule_char.
+ (DECODE_COMPOSITION_START): Correctly handle the case of altchar
+ and alt&rule composition.
+ (decode_coding_iso_2022): Handle composition correctly.
+ (init_coding_once): Setup emacs_mule_bytes for private charsets.
+
+ * charset.c (Fdefine_charset_internal): Fix bug for the case of
+ re-defining a charset. If the charset has :emacs-mule-id, setup
+ emacs_mule_bytes.
+ (Fmake_char): If CODE1 is nil, use the minimum code of the
+ charset.
+
+2002-05-20 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (encode_coding_iso_2022): If coding requires safe
+ encoding, produce a character specified by
+ CODING_INHIBIT_CHARACTER_SUBSTITUTION.
+ (encode_coding_sjis): Likewise.
+ (encode_coding_big5): Likewise.
+ (encode_coding_charset): Likewise.
+
+2002-05-17 Dave Love <fx@gnu.org>
+
+ * xterm.c (XSetIMValues): Declare.
+
+ * process.c: Conditionally include sys/wait.h, pty.h.
+
+ * print.c (print_object): Fix print format for 64-bit
+ systems.
+
+ * keyboard.c (modify_event_symbol): Fix print format for 64-bit
+ systems.
+
+ * buffer.c (emacs_strerror): Declare.
+ (MMAP_ALLOCATED_P, mmap_enlarge, syms_of_buffer): Import changes
+ from trunk.
+
+ * fontset.c (Fclear_face_cache): Declare.
+ (accumulate_font_info): Commented-out (unused).
+ (face_for_char, Fset_fontset_font, Ffontset_info): Remove unused
+ variables.
+
+ * character.h (string_escape_byte8): Declare.
+
+ * charset.c (load_charset_map, load_charset_map_from_file): Remove
+ unused vars.
+ (Fdefine_charset_internal, Fsplit_char, syms_of_charset)
+ (Fmap_charset_chars): Doc fix.
+
+ * coding.c (Vchar_coding_system_table, Qchar_coding_system):
+ Removed.
+ (Fset_coding_system_priority, Fset_coding_system_priority)
+ (Fdefine_coding_system_internal): Doc fix.
+
+2002-05-16 Dave Love <fx@gnu.org>
+
+ * s/osf5-0.h (C_SWITCH_SYSTEM) [!__GNUC__]: Remove -nointrinsics.
+
+2002-05-16 Kenichi Handa <handa@etl.go.jp>
+
+ * character.c (string_escape_byte8): Make multibyte string with
+ correct size.
+
+ * charset.c (Fmake_char): Delete unnecessary code.
+
+2002-05-14 Kenichi Handa <handa@etl.go.jp>
+
+ * xfns.c (x_encode_text): Allocate coding.destination here, and
+ call encode_coding_object with dst_object Qnil.
+
+ * buffer.c (Fset_buffer_multibyte): Convert 8-bit bytes to
+ multibyte form correctly.
+
+ * fontset.c (fs_load_font): Check fontp->full_name (not fontname)
+ against Vfont_encoding_alist.
+
+ * coding.c (Fdecode_sjis_char): Fix typo (0x7F->0xFF). Fix the
+ handling of charset list.
+ (encode_coding_iso_2022): Setup coding->safe_charsets in advance.
+ (decode_coding_object): Move point to coding->dst_pos before
+ calling post-read-conversion function.
+ (encode_coding_object): Give correct arguments to
+ pre-write-conversion. Ignore the return value of
+ pre-write-conversion function. Pay attention to the case that
+ pre-write-conversion changes the current buffer. If dst_object is
+ Qt, even if coding->src_bytes is zero, allocate at least one byte
+ to coding->destination.
+
+ * coding.h (JIS_TO_SJIS): Fix typo (j1->s1, j2->s2).
+
+ * charset.c (Fmake_char): Make it more backward compatible.
+ (Fmap_charset_chars): Fix docstring.
+
+2002-05-13 Dave Love <fx@gnu.org>
+
+ * coding.c: Doc fixes.
+ (Fdefine_coding_system_alias): Use names, not symbols, in
+ coding-system-alist.
+
+2002-05-13 Kenichi Handa <handa@etl.go.jp>
+
+ * fontset.c (free_realized_fontsets): Call Fclear_face_cache instead
+ of calling free_realized_face.
+
+2002-05-10 Yong Lu <lyongu@asia-infonet.com>
+
+ * charset.c (load_charset_map): Fix previous change.
+ (read_hex): Don't treat SPC as a comment starter.
+ (decode_char): If CODE_POINT_TO_INDEX retruns -1, always return
+ -1.
+ (Fdecode_char): Fix typo.
+
+2002-05-10 Kenichi Handa <handa@etl.go.jp>
+
+ * charset.h (struct charset): New member `code_space_mask'.
+
+ * coding.c (coding_set_source): Delete the local variable
+ beg_byte.
+ (encode_coding_charset): Delete the local variable charset.
+ (Fdefine_coding_system_internal): Likewise.
+ (Fdefine_coding_system_internal): Setup
+ attrs[coding_attr_charset_valids] correctly.
+
+ * charset.c (CODE_POINT_TO_INDEX): Utilize `code_space_mask'
+ member to check if CODE is valid or not.
+ (Fdefine_charset_internal): Initialize `code_space_mask' member.
+ (encode_char): Before calling CODE_POINT_TO_INDEX, check if CODE
+ is within the range of charset->min_code and carset->max_code.
+
+2002-05-09 Dave Love <fx@gnu.org>
+
+ * syntax.h (syntax_temp) [!__GNUC__]: Declare.
+
+ * dispextern.h (generate_ascii_font): Fix return type.
+
+ * xfaces.c (generate_ascii_font): Fix arg declaration.
+
+ * coding.c (coding_inherit_eol_type)
+ (Fset_terminal_coding_system_internal)
+ (Fset_safe_terminal_coding_system_internal): Fix arg declarations.
+
+2002-05-08 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (decode_coding_charset, encode_coding_charset): Handle
+ multiple charsets correctly.
+
+2002-05-07 Kenichi Handa <handa@etl.go.jp>
+
+ * search.c (boyer_moore): Fix handling of mulitbyte character
+ translation.
+
+ * xdisp.c (display_mode_element): When the variable `elt' is
+ changed, update `this' and `lisp_string'.
+
+2002-05-07 Kenichi Handa <handa@etl.go.jp>
+
+ * buffer.c (Fset_buffer_multibyte): Fix 8-bit char handling.
+
+ * callproc.c (Fcall_process): Be sure to give the current buffer
+ to decode_coding_c_string. Update PT and PT_BYTE after the
+ insertion.
+
+ * charset.c (struct charset_map_entries): New struct.
+ (load_charset_map): Renamed from parse_charset_map. New args
+ entries and n_entries. Caller changed.
+ (load_charset_map_from_file): Renamed from load_charset_map.
+ Caller changed. New arg control_flag. Call load_charset_map at
+ the tail.
+ (load_charset_map_from_vector): New function.
+ (Fdefine_charset_internal): Setup charset.compact_codes_p.
+ (encode_char): If the charset is compact, change a character index
+ to a code point.
+
+ * coding.c (coding_alloc_by_making_gap): Check the case that the
+ source and destination are the same correctly.
+ (decode_coding_raw_text): Set coding->consumed_char and
+ coding->consumed to 0.
+ (produce_chars): If coding->chars_at_source is nonzero, update
+ coding->consumed_char and coding->consumed before calling
+ alloc_destination.
+ (Fdefine_coding_system_alias): Register ALIAS in
+ Vcoding_system_alist.
+ (syms_of_coding): Define `no-convesion' coding system at the tail.
+
+ * fileio.c (Finsert_file_contents): Set coding_system instead of
+ val. If the current buffer is multibyte, always call
+ decode_coding_gap.
+
+ * xfaces.c (try_font_list): Give higher priority to fontset's
+ family than face's family.
+
+2002-04-18 Kenichi Handa <handa@etl.go.jp>
+
+ * callproc.c (Fcall_process): Be sure to give the current buffer
+ to decode_coding_c_string.
+
+ * xfaces.c (try_font_list): Give a family specified in a fontset
+ higher priority than a family specified in a face.
+
+2002-04-09 Kenichi Handa <handa@etl.go.jp>
+
+ * fileio.c (Finsert_file_contents): Fix calculation of `inserted'.
+ Fix arguments to insert_from_buffer.
+
+ * xdisp.c (display_mode_element): Fix calculation of `bytepos'.
+
+2002-03-11 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (produce_chars): Set the variable `multibytep' correctly.
+ (decode_coding_gap): Set coding->dst_multibyte correctly.
+
+2002-03-07 Kenichi Handa <handa@etl.go.jp>
+
+ * coding.c (encode_coding_utf_8): Initialize produced_chars to 0.
+ (decode_coding_utf_16): Fix converting high and low bytes to
+ code-point.
+ (encode_coding_utf_16): Substitute coding->default_char for
+ non-Unicode characters.
+ (decode_coding): Don't call record_insert here.
+ (setup_coding_system): Initialize `surrogate' of
+ coding->spec.utf_16 to 0.
+ (EMIT_ONE_BYTE): Fix for multibyte case.
+
+ * insdel.c (insert_from_gap): Call record_insert.
+
+2002-03-04 Kenichi Handa <handa@etl.go.jp>
+
+ * casefiddle.c (casify_region): Fix multibyte case.
+
+ * character.c (c_string_width): Add return type `int'.
+ (char_string_with_unification): Arg ADVANCED deleted.
+
+ * character.h (CHAR_VALID_P): Don't call CHARACTERP.
+ (CHAR_STRING): Adjusted for the change of
+ char_string_with_unification.
+ (CHAR_STRING_ADVANCE): Make it do-while statement.
+
+ * chartab.c (sub_char_table_set_range): Optimized for the case
+ DEPTH == 3. Add workaround code for a GCC optimization bug.
+
+ * charset.c (parse_charset_map): Remove an unused variable.
+
+ * coding.c: Delete unused variables.
+
+ * fileio.c (Finsert_file_contents): Set coding_system to Qnil
+ earlier. If inserted is zero and the coding system doesn't
+ require flushing, don't call decode_coding_gap.
+
+ * syntax.h (SET_RAW_SYNTAX_ENTRY): Don't call make_number.
+
+2002-03-01 Kenichi Handa <handa@etl.go.jp>
+
+ The following changes are for using Unicode as an internal
+ character model, and use UTF-8 format for buffer/string
+ representation.
+
+ * .gdbinit (xchartable): Adjusted for the change of char table
+ structure.
+ (xsubchartable, xcoding, xcharset, xcurbuf): New commands.
+
+ * Makefile.in (obj): Add character.o and chartab.o.
+ (lisp, shortlisp): Remove utf-8.elc:
+ (*.o): For many files, change dependency on charset.h to
+ character.h, and add dependency on character.h.
+ (character.o, chartab.o): New targets.
+
+ * abbrev.c, bytecode.c, casefiddle.c, cmds.c, dispnew.c, doc.c,
+ doprnt.c, dosfns.c, frame.c, marker.c, minibuf.c, msdos.c,
+ w16select.c, w32bdf.c, w32console.c: Include "character.h" instead
+ of "charset.h".
+
+ * dired.c, filelock.c: Include "character.h".
+
+ * alloc.c: Include "character.h" instead of "charset.h".
+ (Fmake_char_table): Moved to chartab.c.
+ (make_sub_char_table): Likewise.
+ (syms_of_alloc): Remove defsubr for Smake_char_table.
+
+ * buffer.c: Include "character.h" instead of "charset.h", don't
+ include "coding.h".
+ (Fset_buffer_multibyte): Adjuted for UTF-8.
+
+ * buffer.h: EXFUN Fbuffer_live_p.
+
+ * callproc.c: Include "character.h" instead of "charset.h".
+ (Fcall_process): Big change for the new code-conversion APIs.
+
+ * casetab.c: Include "character.h" instead of "charset.h".
+ (set_canon, set_identity, shuffle): Adjusted for the new
+ map_char_table spec.
+ (init_casetab_once): Call CHAR_TABLE_SET instead of directly
+ accessing the char table structure.
+
+ * chartab.c: New file that implements char table.
+
+ * category.c: Include "character.h".
+ (copy_category_entry): New function.
+ (copy_category_table): Call map_char_table and copy_category_entry.
+ (Fmake_category_table): Initialize all top-vel slots.
+ (char_category_set): New function.
+ (modify_lower_category_set): Deleted.
+ (Fmodify_category_entry): Call char_table_ref_and_range.
+
+ * category.h (CATEGORY_SET): Just call char_category_set.
+
+ * ccl.c: Include "character.h".
+ (Qccl, Qcclp): New variables.
+ (CCL_WRITE_CHAR): Alway treat the arg CH as a character even if
+ it's less than 256.
+ (CCL_WRITE_MULTIBYTE_CHAR): Deleted.
+ (CCL_WRITE_STRING, CCL_READ_CHAR): Adjusted for the change of SRC
+ and DST type.
+ (ccl_driver): Types of arguments changed. Code adjusted for that.
+ (Fccl_execute, Fccl_execute_on_string): Adjusted for the change of
+ ccl_driver.
+ (syms_of_ccl): Intern and staticpro Qccl and Qcclp.
+
+ * ccl.h (struct ccl_program): Members eol_type and multibyte
+ deleted. New members src_multibyte, dst_multibyte, consumed, and
+ produced.
+ (struct ccl_spec): Members decoder and encoder deleted. New
+ memeber ccl.
+ (CODING_SPEC_CCL_PROGRAM): New macro.
+ (ccl_driver): Prototype updated.
+ (Qccl, Qcclp, Fccl_program_p): Extern them.
+ (CHECK_CCL_PROGRAM): New macro.
+
+ * character.c, character.h, chartab.c: New files.
+
+ * charset.c: Mostly re-written. Character and multibyte sequence
+ handling codes are moved to character.c.
+
+ * charset.h: Mostly re-written. Character and multibyte sequence
+ handling codes are moved to character.h.
+
+ * coding.c, coding.h: Mostly re-written.
+
+ * composite.c: Include "character.h" instead of "charset.h".
+ (CHAR_WIDTH): Moved to character.h.
+ (HASH_KEY, HASH_VALUE): Deleted.
+
+ * composite.h (enum composition_method): Order of enumeration
+ symbols changed.
+
+ * data.c: Include "character.h" instead of "charset.h".
+ (Faref): Call CHAR_TABLE_REF for a char table.
+ (Faset): Call CHAR_TABLE_SET for a char table.
+
+ * dispextern.h (free_realized_face, check_face_attribytes,
+ generate_ascii_font): Extern them.
+ (free_realized_multibyte_face): Extern deleted.
+
+ * disptab.h (DISP_CHAR_VECTOR): Adjusted for the change of char
+ table structure.
+
+ * editfns.c: Include "character.h" instead of "charset.h".
+ (Fchar_to_string): Always call CHAR_STRING.
+
+ * emacs.c (main): Call init_charset_once, init_charset,
+ syms_of_chartab, and syms_of_character.
+
+ * fileio.c: Include "character.h" instead of "charset.h".
+ (Finsert_file_contents): Big change for the new code-conversion
+ API.
+ (choose_write_coding_system): Likewise.
+ (Fwrite_region): Likewise.
+ (build_annotations_2): Deleted.
+ (e_write): Big change for the new code-conversion API.
+
+ * fns.c: Include "character.h" instead of "charset.h".
+ (copy_sub_char_table): Moved to chartab.c.
+ (Fcopy_sequence): Call copy_char_table for a char table.
+ (concat): Delete codes calling count_multibyte.
+ (string_char_to_byte): Adjusted for the new multibyte form.
+ (string_byte_to_char): Likewise.
+ (internal_equal): Adjusted for the change of char table structure.
+ (Fchar_table_subtype, Fchar_table_parent, Fset_char_table_parent,
+ Fchar_table_extra_slot, Fset_char_table_extra_slot,
+ Fchar_table_range, Fset_char_table_range, Fset_char_table_default,
+ char_table_translate, optimize_sub_char_table,
+ Foptimize_char_table, map_char_table, Fmap_char_table): Moved to
+ chartab.c.
+ (char_table_ref_and_index): Deleted.
+ (HASH_KEY, HASH_VALUE): Moved to lisp.h.
+ (Fmd5): Call preferred_coding_system instead of accessing
+ Vcoding_category_list. Adjusted for the new code-conversion API.
+ (syms_of_fns): Defsubr for char table related functions moved to
+ chartab.c.
+
+ * fontset.c: Mostly re-written.
+
+ * fontset.h (struct font_info): Type of the member encoding_type
+ changed.
+ (enum FONT_SPEC_INDEX): New enum.
+ (fontset_font_pattern, fs_load_font): Prototype updated.
+ (FS_LOAD_FONT): Adjusted for the change of fs_load_font.
+
+ * indent.c: Include "character.h" instead of "charset.h".
+ (MULTIBYTE_BYTES_WIDTH): Call CHAR_WIDTH instead of
+ WIDTH_BY_CHAR_HEAD.
+
+ * insdel.c: Include "character.h" instead of "charset.h".
+ (copy_text): Don't refer to Vnonascii_translation_table.
+ (insert_from_gap): New function.
+
+ * keyboard.c: Include "character.h" instead of "charset.h".
+ (command_loop_1): Never call direct_output_forward_char before
+ a non-ASCII character.
+ (read_char): If Vkeyboard_translate_table is a char table, always
+ translated a character.
+
+ * keymap.c: Include "character.h".
+ (store_in_keymap): Handle the case that IDX is a cons.
+ (Fdefine_key): Handle the case that KEY is a cons and the car part
+ is also a cons (range).
+ (push_key_description): Adjusted for the new character code.
+ (describe_vector): Call describe_char_table for a char table.
+ (describe_char_table): New function.
+
+ * keymap.h (describe_char_table): Extern it.
+
+ * lisp.h (enum pvec_type): New member PVEC_SUB_CHAR_TABLE.
+ (XSUB_CHAR_TABLE, XSETSUB_CHAR_TABLE): New macros.
+ (CHAR_TABLE_ORDINARY_SLOTS, CHAR_TABLE_SINGLE_BYTE_SLOTS,
+ SUB_CHAR_TABLE_ORDINARY_SLOTS, SUB_CHAR_TABLE_STANDARD_SLOTS):
+ Deleted.
+ (CHAR_TABLE_REF, CHAR_TABLE_SET): Adjusted for the new char table
+ structure.
+ (CHAR_TABLE_TRANSLATE): Just call char_table_translate.
+ (CHARTAB_SIZE_BITS_0, CHARTAB_SIZE_BITS_1, CHARTAB_SIZE_BITS_2,
+ CHARTAB_SIZE_BITS_3): New macros.
+ (chartab_size): Extern it.
+ (struct Lisp_Char_Table): Re-designed.
+ (struct Lisp_Sub_Char_Table): New structure.
+ (HASH_KEY, HASH_VALUE): Moved from fns.c.
+ (CHARACTERBITS): Defined as 22.
+ (GLYPH_MASK_FACE, GLYPH_MASK_CHAR): Adjusted for the above change.
+ (SUB_CHAR_TABLE_P): Check PVEC_CHAR_TABLE.
+ (GC_SUB_CHAR_TABLE_P): New macro.
+ (Fencode_coding_string, Fdecode_coding_string): EXFUN Updated.
+ (code_convert_string_norecord): Extern deleted.
+ (init_character_once, syms_of_character, init_charset,
+ syms_of_composite, Qeq, Fmakehash, insert_from_gap): Extern them.
+
+ * lread.c: Include "character.h".
+ (read_multibyte): New arg NBYTES.
+ (read_escape): The meaning of returned *BYTEREP changed.
+ (to_multibyte): Deleted.
+ (read1): Adjuted the handling of char table and string.
+
+ * print.c: Include "character.h" instead of "charset.h".
+ (print_string): Convert 8-bit raw bytes to octal form by
+ string_escape_byte8.
+ (print_object): Adjusted for the new multibyte form. Print 8-bit
+ raw bytes always in octal form. Handle sub char table correctly.
+
+ * process.c: Include "character.h" instead of "charset.h".
+ (read_process_output): Adjusted for the new code-conversion API.
+ (send_process): Likewise.
+
+ * puresize.h (BASE_PURESIZE): Increased.
+
+ * regex.c: Include "character.h" instead of "charset.h".
+ (BYTE8_TO_CHAR, CHAR_BYTE8_P) [not emacs]: New dummy macros.
+ (regex_compile): Accept a range whose starting and ending
+ character have different leading bytes.
+ (analyse_first): Adjusted for the above change.
+
+ * search.c: Include "character.h" instead of "charset.h".
+ (search_buffer, boyer_moore): Adjusted for the new multibyte form.
+ (Freplace_match): Adjusted for the change of
+ multibyte_char_to_unibyte.
+
+ * syntax.c: Include "character.h" instead of "charset.h".
+ (syntax_parent_lookup): Deleted.
+ (Fmodify_syntax_entry): Accept a cons as CHAR.
+ (skip_chars): Adjusted for the new multibyte form.
+ (init_syntax_once): Call char_table_set_range instead of directly
+ accessing the structure of a char table.
+
+ * syntax.h (SET_RAW_SYNTAX_ENTRY): Call CHAR_TABLE_SET.
+ (SYNTAX_ENTRY_FOLLOW_PARENT): Macro deleted.
+ (SET_RAW_SYNTAX_ENTRY_RANGE): New macro.
+ (SYNTAX_ENTRY_INT): Call CHAR_TABLE_REF.
+
+ * term.c: Include "buffer.h" and "character.h".
+ (encode_terminal_code): Adjusted for the new code-conversion API.
+ (write_glyphs): Likewise.
+ (produce_glyphs): Call CHAR_WIDTH instead of CHARSET_WIDTH.
+
+ * w32term.c (x_new_font): Adjusted for the change of FS_LOAD_FONT.
+
+ * xdisp.c: Include "character.h".
+ (get_next_display_element): Adjusted for the new multibyte form.
+ (disp_char_vector): Adjusted for the new char table structure.
+ (decode_mode_spec_coding): Adjusted for the new structure of
+ coding system.
+ (decode_mode_spec): Adjusted for the new code-conversion API.
+
+ * xfaces.c: Include "character.h" instead of "charset.h".
+ (load_face_font): Adjusted for the change of choose_face_font and
+ FS_LOAD_FONT.
+ (generate_ascii_font): New function.
+ (set_lface_from_font_name): Adjusted for the change of
+ FS_LOAD_FONT.
+ (set_font_frame_param): Adjusted for the change of
+ choose_face_font.
+ (free_realized_face): Make it public.
+ (free_realized_faces_for_fontset): Renamed from
+ free_realized_multibyte_face. Free also faces realized for ASCII.
+ (choose_face_font): Argments changed. Adjusted for the change of
+ fontset_font_pattern and FS_LOAD_FONT.
+
+ * xfns.c: Include "character.h".
+ (x_encode_text): Adjusted for the new code-conversion API.
+
+ * xselect.c: Don't include "charset.h".
+ (selection_data_to_lisp_data): Adjusted for the new code
+ covnersion API.
+
+ * xterm.c: Include "character.h".
+ (x_encode_char): New argument CHARSET. Caller changed.
+ (x_get_char_face_and_encoding): Call ENCODE_CHAR instead of
+ SPLIT_CHAR.
+ (x_get_glyph_face_and_encoding): Likewise.
+ (x_produce_glyphs): Don't check Vnonascii_translation_table Call
+ CHAR_WIDTH instead of CHARSET_WIDTH.
+ (XTread_socket): Adjusted for the new code-conversion API.
+ (x_new_font): Adjusted for the change of FS_LOAD_FONT.
+ (x_load_font): Adjusted for the change of struct font.
+
+;; Local Variables:
+;; coding: iso-2022-7bit
+;; End:
+
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ Copying and distribution of this file, with or without modification,
+ are permitted provided the copyright notice and this notice are preserved.
diff --git a/src/Makefile.in b/src/Makefile.in
index 1961dfcdb13..0f3ac171442 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -572,7 +572,7 @@ XMENU_OBJ = xmenu.o
/* lastfile must follow all files
whose initialized data areas should be dumped as pure by dump-emacs. */
obj= dispnew.o frame.o scroll.o xdisp.o $(XMENU_OBJ) window.o \
- charset.o coding.o category.o ccl.o \
+ charset.o coding.o category.o ccl.o character.o chartab.o \
cm.o term.o xfaces.o $(XOBJ) \
emacs.o keyboard.o macros.o keymap.o sysdep.o \
buffer.o filelock.o insdel.o marker.o \
@@ -690,6 +690,7 @@ lisp= \
${lispsource}buff-menu.elc \
${lispsource}button.elc \
${lispsource}emacs-lisp/byte-run.elc \
+ ${lispsource}composite.elc \
${lispsource}cus-face.elc \
${lispsource}cus-start.elc \
${lispsource}custom.elc \
@@ -716,20 +717,10 @@ lisp= \
${lispsource}international/mule-conf.el \
${lispsource}international/mule-cmds.elc \
${lispsource}international/characters.elc \
- ${lispsource}international/ucs-tables.elc \
- ${lispsource}international/utf-8.elc \
- ${lispsource}international/utf-16.elc \
- ${lispsource}international/latin-1.el \
- ${lispsource}international/latin-2.el \
- ${lispsource}international/latin-3.el \
- ${lispsource}international/latin-4.el \
- ${lispsource}international/latin-5.el \
- ${lispsource}international/latin-8.el \
- ${lispsource}international/latin-9.el \
${lispsource}case-table.elc \
- ${lispsource}language/chinese.elc \
- ${lispsource}language/cyrillic.elc \
- ${lispsource}language/indian.elc \
+ ${lispsource}language/chinese.el \
+ ${lispsource}language/cyrillic.el \
+ ${lispsource}language/indian.el \
${lispsource}language/devanagari.el \
${lispsource}language/malayalam.el \
${lispsource}language/tamil.el \
@@ -746,7 +737,7 @@ lisp= \
${lispsource}language/lao.el \
${lispsource}language/thai.el \
${lispsource}language/tibetan.elc \
- ${lispsource}language/vietnamese.elc \
+ ${lispsource}language/vietnamese.el \
${lispsource}language/misc-lang.el \
${lispsource}language/utf-8-lang.el \
${lispsource}language/georgian.el \
@@ -781,6 +772,7 @@ shortlisp= \
../lisp/buff-menu.elc \
../lisp/button.elc \
../lisp/emacs-lisp/byte-run.elc \
+ ../lisp/composite.elc \
../lisp/cus-face.elc \
../lisp/cus-start.elc \
../lisp/custom.elc \
@@ -805,20 +797,10 @@ shortlisp= \
../lisp/international/mule-conf.el \
../lisp/international/mule-cmds.elc \
../lisp/international/characters.elc \
- ../lisp/international/ucs-tables.elc \
- ../lisp/international/utf-8.elc \
- ../lisp/international/utf-16.elc \
- ../lisp/international/latin-1.el \
- ../lisp/international/latin-2.el \
- ../lisp/international/latin-3.el \
- ../lisp/international/latin-4.el \
- ../lisp/international/latin-5.el \
- ../lisp/international/latin-8.el \
- ../lisp/international/latin-9.el \
../lisp/case-table.elc \
- ../lisp/language/chinese.elc \
- ../lisp/language/cyrillic.elc \
- ../lisp/language/indian.elc \
+ ../lisp/language/chinese.el \
+ ../lisp/language/cyrillic.el \
+ ../lisp/language/indian.el \
../lisp/language/devanagari.el \
../lisp/language/malayalam.el \
../lisp/language/tamil.el \
@@ -835,7 +817,7 @@ shortlisp= \
../lisp/language/lao.el \
../lisp/language/thai.el \
../lisp/language/tibetan.elc \
- ../lisp/language/vietnamese.elc \
+ ../lisp/language/vietnamese.el \
../lisp/language/misc-lang.el \
../lisp/language/utf-8-lang.el \
../lisp/language/georgian.el \
@@ -1063,64 +1045,70 @@ alloca.o : alloca.s $(config_h)
it is so often changed in ways that do not require any recompilation
and so rarely changed in ways that do require any. */
-abbrev.o: abbrev.c buffer.h window.h dispextern.h commands.h charset.h \
+abbrev.o: abbrev.c buffer.h window.h dispextern.h commands.h character.h \
$(config_h)
buffer.o: buffer.c buffer.h region-cache.h commands.h window.h \
- dispextern.h $(INTERVAL_SRC) blockinput.h atimer.h systime.h charset.h \
+ dispextern.h $(INTERVAL_SRC) blockinput.h atimer.h systime.h character.h \
$(config_h)
callint.o: callint.c window.h commands.h buffer.h \
keyboard.h dispextern.h $(config_h)
callproc.o: callproc.c epaths.h buffer.h commands.h $(config_h) \
- process.h systty.h syssignal.h charset.h coding.h ccl.h msdos.h \
+ process.h systty.h syssignal.h character.h coding.h ccl.h msdos.h \
composite.h
-casefiddle.o: casefiddle.c syntax.h commands.h buffer.h composite.h $(config_h)
+casefiddle.o: casefiddle.c syntax.h commands.h buffer.h character.h \
+ composite.h $(config_h)
casetab.o: casetab.c buffer.h $(config_h)
-category.o: category.c category.h buffer.h charset.h $(config_h)
-ccl.o: ccl.c ccl.h charset.h coding.h $(config_h)
-charset.o: charset.c charset.h buffer.h coding.h composite.h disptab.h \
- $(config_h)
-coding.o: coding.c coding.h ccl.h buffer.h charset.h $(config_h)
+category.o: category.c category.h buffer.h charset.h character.h $(config_h)
+ccl.o: ccl.c ccl.h charset.h character.h coding.h $(config_h)
+character.o: character.c character.h buffer.h charset.h composite.h disptab.h \
+ $(config.h)
+charset.o: charset.c charset.h character.h buffer.h coding.h composite.h \
+ disptab.h $(config_h)
+chartab.o: charset.h character.h $(config.h)
+coding.o: coding.c coding.h ccl.h buffer.h character.h charset.h composite.h \
+ $(config_h)
cm.o: cm.c cm.h termhooks.h $(config_h)
-cmds.o: cmds.c syntax.h buffer.h charset.h commands.h window.h $(config_h) \
+cmds.o: cmds.c syntax.h buffer.h character.h commands.h window.h $(config_h) \
msdos.h dispextern.h
pre-crt0.o: pre-crt0.c
ecrt0.o: ecrt0.c $(config_h)
CRT0_COMPILE ${srcdir}/ecrt0.c
-dired.o: dired.c commands.h buffer.h $(config_h) charset.h coding.h regex.h \
- systime.h
+dired.o: dired.c commands.h buffer.h $(config_h) character.h charset.h \
+ coding.h regex.h systime.h
dispnew.o: dispnew.c systty.h systime.h commands.h process.h frame.h \
window.h buffer.h dispextern.h termchar.h termopts.h termhooks.h cm.h \
disptab.h \
- xterm.h blockinput.h atimer.h charset.h msdos.h composite.h keyboard.h \
+ xterm.h blockinput.h atimer.h character.h msdos.h composite.h keyboard.h \
$(config_h)
-doc.o: doc.c $(config_h) epaths.h buffer.h keyboard.h charset.h
-doprnt.o: doprnt.c charset.h $(config_h)
+doc.o: doc.c $(config_h) epaths.h buffer.h keyboard.h character.h
+doprnt.o: doprnt.c character.h $(config_h)
dosfns.o: buffer.h termchar.h termhooks.h frame.h msdos.h dosfns.h $(config_h)
-editfns.o: editfns.c window.h buffer.h systime.h $(INTERVAL_SRC) charset.h \
+editfns.o: editfns.c window.h buffer.h systime.h $(INTERVAL_SRC) character.h \
coding.h dispextern.h $(config_h)
emacs.o: emacs.c commands.h systty.h syssignal.h blockinput.h process.h \
termhooks.h buffer.h atimer.h systime.h $(INTERVAL_SRC) $(config_h)
-fileio.o: fileio.c window.h buffer.h systime.h $(INTERVAL_SRC) charset.h \
+fileio.o: fileio.c window.h buffer.h systime.h $(INTERVAL_SRC) character.h \
coding.h ccl.h msdos.h dispextern.h $(config_h)
-filelock.o: filelock.c buffer.h systime.h epaths.h $(config_h)
+filelock.o: filelock.c buffer.h character.h charset.h coding.h systime.h \
+ epaths.h $(config_h)
filemode.o: filemode.c $(config_h)
frame.o: frame.c xterm.h window.h frame.h termhooks.h commands.h keyboard.h \
- blockinput.h atimer.h systime.h buffer.h charset.h fontset.h \
+ blockinput.h atimer.h systime.h buffer.h character.h fontset.h \
msdos.h dosfns.h dispextern.h $(config_h)
-fontset.o: dispextern.h fontset.h fontset.c ccl.h buffer.h charset.h frame.h \
- keyboard.h $(config_h)
+fontset.o: dispextern.h fontset.h fontset.c ccl.h buffer.h character.h \
+ charset.h frame.h keyboard.h $(config_h)
getloadavg.o: getloadavg.c $(config_h)
indent.o: indent.c frame.h window.h indent.h buffer.h $(config_h) termchar.h \
- termopts.h disptab.h region-cache.h charset.h composite.h dispextern.h \
- keyboard.h
-insdel.o: insdel.c window.h buffer.h $(INTERVAL_SRC) blockinput.h charset.h \
+ termopts.h disptab.h region-cache.h character.h category.h composite.h \
+ dispextern.h keyboard.h
+insdel.o: insdel.c window.h buffer.h $(INTERVAL_SRC) blockinput.h character.h \
dispextern.h atimer.h systime.h $(config_h)
-keyboard.o: keyboard.c termchar.h termhooks.h termopts.h buffer.h charset.h \
+keyboard.o: keyboard.c termchar.h termhooks.h termopts.h buffer.h character.h \
commands.h frame.h window.h macros.h disptab.h keyboard.h syssignal.h \
systty.h systime.h dispextern.h syntax.h $(INTERVAL_SRC) blockinput.h \
atimer.h xterm.h puresize.h msdos.h $(config_h)
keymap.o: keymap.c buffer.h commands.h keyboard.h termhooks.h blockinput.h \
- atimer.h systime.h puresize.h charset.h intervals.h $(config_h)
+ atimer.h systime.h puresize.h character.h intervals.h $(config_h)
lastfile.o: lastfile.c $(config_h)
macros.o: macros.c window.h buffer.h commands.h macros.h keyboard.h \
dispextern.h $(config_h)
@@ -1128,32 +1116,35 @@ malloc.o: malloc.c $(config_h)
gmalloc.o: gmalloc.c $(config_h)
ralloc.o: ralloc.c $(config_h)
vm-limit.o: vm-limit.c mem-limits.h $(config_h)
-marker.o: marker.c buffer.h charset.h $(config_h)
+marker.o: marker.c buffer.h character.h $(config_h)
md5.o: md5.c md5.h $(config_h)
minibuf.o: minibuf.c syntax.h dispextern.h frame.h window.h keyboard.h \
- buffer.h commands.h charset.h msdos.h $(config_h)
+ buffer.h commands.h character.h msdos.h $(config_h)
mktime.o: mktime.c $(config_h)
msdos.o: msdos.c msdos.h dosfns.h systime.h termhooks.h dispextern.h frame.h \
- termopts.h termchar.h charset.h coding.h ccl.h disptab.h window.h \
+ termopts.h termchar.h character.h coding.h ccl.h disptab.h window.h \
keyboard.h $(config_h)
process.o: process.c process.h buffer.h window.h termhooks.h termopts.h \
commands.h syssignal.h systime.h systty.h syswait.h frame.h dispextern.h \
blockinput.h atimer.h charset.h coding.h ccl.h msdos.h composite.h \
keyboard.h $(config_h)
-regex.o: regex.c syntax.h buffer.h $(config_h) regex.h category.h charset.h
+regex.o: regex.c syntax.h buffer.h $(config_h) regex.h category.h character.h \
+ charset.h
region-cache.o: region-cache.c buffer.h region-cache.h
scroll.o: scroll.c termchar.h dispextern.h frame.h msdos.h keyboard.h \
$(config_h)
search.o: search.c regex.h commands.h buffer.h region-cache.h syntax.h \
- blockinput.h atimer.h systime.h category.h charset.h composite.h $(config_h)
+ blockinput.h atimer.h systime.h category.h character.h composite.h \
+ $(config_h)
strftime.o: strftime.c $(config_h)
-syntax.o: syntax.c syntax.h buffer.h commands.h category.h charset.h \
+syntax.o: syntax.c syntax.h buffer.h commands.h category.h character.h \
composite.h $(config_h)
sysdep.o: sysdep.c syssignal.h systty.h systime.h syswait.h blockinput.h \
process.h dispextern.h termhooks.h termchar.h termopts.h \
frame.h atimer.h window.h msdos.h dosfns.h keyboard.h $(config_h)
term.o: term.c termchar.h termhooks.h termopts.h $(config_h) cm.h frame.h \
- disptab.h dispextern.h keyboard.h charset.h coding.h ccl.h msdos.h
+ disptab.h dispextern.h keyboard.h character.h charset.h coding.h ccl.h \
+ msdos.h keymap.h
termcap.o: termcap.c $(config_h)
terminfo.o: terminfo.c $(config_h)
tparam.o: tparam.c $(config_h)
@@ -1170,20 +1161,22 @@ window.o: window.c indent.h commands.h frame.h window.h buffer.h termchar.h \
termhooks.h disptab.h keyboard.h dispextern.h msdos.h composite.h \
$(config_h)
xdisp.o: xdisp.c macros.h commands.h process.h indent.h buffer.h dispextern.h coding.h \
- termchar.h frame.h window.h disptab.h termhooks.h charset.h $(config_h) \
- msdos.h composite.h fontset.h blockinput.h atimer.h systime.h keymap.h
+ termchar.h frame.h window.h disptab.h termhooks.h character.h charset.h \
+ $(config_h) msdos.h composite.h fontset.h blockinput.h atimer.h systime.h \
+ keymap.h region-cache.h
xfaces.o: xfaces.c dispextern.h frame.h xterm.h buffer.h blockinput.h \
- window.h charset.h msdos.h dosfns.h composite.h atimer.h systime.h $(config_h)
+ window.h character.h charset.h msdos.h dosfns.h composite.h atimer.h \
+ systime.h fontset.h $(config_h) $(INTERVAL_SRC)
xfns.o: xfns.c buffer.h frame.h window.h keyboard.h xterm.h dispextern.h \
$(srcdir)/../lwlib/lwlib.h blockinput.h atimer.h systime.h epaths.h \
- charset.h gtkutil.h $(config_h)
+ character.h charset.h coding.h gtkutil.h $(config_h) termhooks.h
xmenu.o: xmenu.c xterm.h termhooks.h window.h dispextern.h frame.h buffer.h \
- keyboard.h $(srcdir)/../lwlib/lwlib.h blockinput.h atimer.h systime.h \
- gtkutil.h msdos.h $(config_h)
+ charset.h keyboard.h $(srcdir)/../lwlib/lwlib.h blockinput.h atimer.h \
+ systime.h gtkutil.h msdos.h $(config_h) coding.h
xterm.o: xterm.c xterm.h termhooks.h termopts.h termchar.h window.h buffer.h \
- dispextern.h frame.h disptab.h blockinput.h atimer.h systime.h syssignal.h \
- keyboard.h gnu.h charset.h ccl.h fontset.h composite.h \
- coding.h process.h gtkutil.h $(config_h)
+ dispextern.h frame.h disptab.h blockinput.h atimer.h systime.h syssignal.h \
+ keyboard.h gnu.h character.h charset.h ccl.h fontset.h composite.h \
+ coding.h process.h gtkutil.h $(config_h)
xselect.o: xselect.c process.h dispextern.h frame.h xterm.h blockinput.h \
buffer.h atimer.h systime.h $(config_h)
xrdb.o: xrdb.c $(config_h) epaths.h
@@ -1198,24 +1191,26 @@ atimer.o: atimer.c atimer.h systime.h $(config_h)
/* The files of Lisp proper */
alloc.o: alloc.c process.h frame.h window.h buffer.h puresize.h syssignal.h keyboard.h \
- blockinput.h atimer.h systime.h charset.h dispextern.h $(config_h) $(INTERVAL_SRC)
-bytecode.o: bytecode.c buffer.h syntax.h charset.h $(config_h)
-data.o: data.c buffer.h puresize.h charset.h syssignal.h keyboard.h $(config_h)
+ blockinput.h atimer.h systime.h character.h dispextern.h $(config_h) \
+ $(INTERVAL_SRC)
+bytecode.o: bytecode.c buffer.h syntax.h character.h $(config_h)
+data.o: data.c buffer.h puresize.h character.h syssignal.h keyboard.h \
+ $(config_h)
eval.o: eval.c commands.h keyboard.h blockinput.h atimer.h systime.h \
$(config_h)
floatfns.o: floatfns.c $(config_h)
-fns.o: fns.c commands.h $(config_h) frame.h buffer.h charset.h keyboard.h \
+fns.o: fns.c commands.h $(config_h) frame.h buffer.h character.h keyboard.h \
frame.h window.h dispextern.h $(INTERVAL_SRC) coding.h
-print.o: print.c process.h frame.h window.h buffer.h keyboard.h charset.h \
+print.o: print.c process.h frame.h window.h buffer.h keyboard.h character.h \
$(config_h) dispextern.h msdos.h composite.h
-lread.o: lread.c commands.h keyboard.h buffer.h epaths.h charset.h $(config_h) \
- termhooks.h coding.h msdos.h
+lread.o: lread.c commands.h keyboard.h buffer.h epaths.h character.h \
+ charset.h $(config_h) termhooks.h coding.h msdos.h
/* Text properties support */
textprop.o: textprop.c buffer.h window.h dispextern.h $(INTERVAL_SRC) \
$(config_h)
intervals.o: intervals.c buffer.h $(INTERVAL_SRC) keyboard.h puresize.h $(config_h)
-composite.o: composite.c buffer.h charset.h $(INTERVAL_SRC) $(config_h)
+composite.o: composite.c buffer.h character.h $(INTERVAL_SRC) $(config_h)
/* System-specific programs to be made.
OTHER_FILES and OBJECTS_MACHINE
diff --git a/src/abbrev.c b/src/abbrev.c
index dabc03b2a55..e3e0e28210b 100644
--- a/src/abbrev.c
+++ b/src/abbrev.c
@@ -27,7 +27,7 @@ Boston, MA 02111-1307, USA. */
#include "commands.h"
#include "buffer.h"
#include "window.h"
-#include "charset.h"
+#include "character.h"
#include "syntax.h"
/* An abbrev table is an obarray.
@@ -385,9 +385,15 @@ Returns the abbrev symbol, if expansion took place. */)
int pos = wordstart_byte;
/* Find the initial. */
- while (pos < PT_BYTE
- && SYNTAX (*BUF_BYTE_ADDRESS (current_buffer, pos)) != Sword)
- pos++;
+ if (multibyte)
+ while (pos < PT_BYTE
+ && SYNTAX (FETCH_MULTIBYTE_CHAR (pos)) != Sword)
+ INC_POS (pos);
+ else
+ while (pos < PT_BYTE
+ && (SYNTAX (*BUF_BYTE_ADDRESS (current_buffer, pos))
+ != Sword))
+ pos++;
/* Change just that. */
pos = BYTE_TO_CHAR (pos);
diff --git a/src/alloc.c b/src/alloc.c
index 102bc637b58..4ebb97aec18 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -52,7 +52,7 @@ Boston, MA 02111-1307, USA. */
#include "keyboard.h"
#include "frame.h"
#include "blockinput.h"
-#include "charset.h"
+#include "character.h"
#include "syssignal.h"
#include <setjmp.h>
@@ -766,6 +766,23 @@ lisp_align_malloc (nbytes, type)
mallopt (M_MMAP_MAX, MMAP_MAX_AREAS);
#endif
+ /* If the memory just allocated cannot be addressed thru a Lisp
+ object's pointer, and it needs to be, that's equivalent to
+ running out of memory. */
+ if (type != MEM_TYPE_NON_LISP)
+ {
+ Lisp_Object tem;
+ char *end = (char *) base + ABLOCKS_BYTES - 1;
+ XSETCONS (tem, end);
+ if ((char *) XCONS (tem) != end)
+ {
+ lisp_malloc_loser = base;
+ free (base);
+ UNBLOCK_INPUT;
+ memory_full ();
+ }
+ }
+
/* Initialize the blocks and put them on the free list.
Is `base' was not properly aligned, we can't use the last block. */
for (i = 0; i < (aligned ? ABLOCKS_SIZE : ABLOCKS_SIZE - 1); i++)
@@ -788,21 +805,6 @@ lisp_align_malloc (nbytes, type)
val = free_ablock;
free_ablock = free_ablock->x.next_free;
- /* If the memory just allocated cannot be addressed thru a Lisp
- object's pointer, and it needs to be,
- that's equivalent to running out of memory. */
- if (val && type != MEM_TYPE_NON_LISP)
- {
- Lisp_Object tem;
- XSETCONS (tem, (char *) val + nbytes - 1);
- if ((char *) XCONS (tem) != (char *) val + nbytes - 1)
- {
- lisp_malloc_loser = val;
- free (val);
- val = 0;
- }
- }
-
#if GC_MARK_STACK && !defined GC_MALLOC_CHECK
if (val && type != MEM_TYPE_NON_LISP)
mem_insert (val, (char *) val + nbytes, type);
@@ -1896,7 +1898,7 @@ Both LENGTH and INIT must be numbers. */)
CHECK_NUMBER (init);
c = XINT (init);
- if (SINGLE_BYTE_CHAR_P (c))
+ if (ASCII_CHAR_P (c))
{
nbytes = XINT (length);
val = make_uninit_string (nbytes);
@@ -2622,49 +2624,6 @@ See also the function `vector'. */)
}
-DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
- doc: /* Return a newly created char-table, with purpose PURPOSE.
-Each element is initialized to INIT, which defaults to nil.
-PURPOSE should be a symbol which has a `char-table-extra-slots' property.
-The property's value should be an integer between 0 and 10. */)
- (purpose, init)
- register Lisp_Object purpose, init;
-{
- Lisp_Object vector;
- Lisp_Object n;
- CHECK_SYMBOL (purpose);
- n = Fget (purpose, Qchar_table_extra_slots);
- CHECK_NUMBER (n);
- if (XINT (n) < 0 || XINT (n) > 10)
- args_out_of_range (n, Qnil);
- /* Add 2 to the size for the defalt and parent slots. */
- vector = Fmake_vector (make_number (CHAR_TABLE_STANDARD_SLOTS + XINT (n)),
- init);
- XCHAR_TABLE (vector)->top = Qt;
- XCHAR_TABLE (vector)->parent = Qnil;
- XCHAR_TABLE (vector)->purpose = purpose;
- XSETCHAR_TABLE (vector, XCHAR_TABLE (vector));
- return vector;
-}
-
-
-/* Return a newly created sub char table with default value DEFALT.
- Since a sub char table does not appear as a top level Emacs Lisp
- object, we don't need a Lisp interface to make it. */
-
-Lisp_Object
-make_sub_char_table (defalt)
- Lisp_Object defalt;
-{
- Lisp_Object vector
- = Fmake_vector (make_number (SUB_CHAR_TABLE_STANDARD_SLOTS), Qnil);
- XCHAR_TABLE (vector)->top = Qnil;
- XCHAR_TABLE (vector)->defalt = defalt;
- XSETCHAR_TABLE (vector, XCHAR_TABLE (vector));
- return vector;
-}
-
-
DEFUN ("vector", Fvector, Svector, 0, MANY, 0,
doc: /* Return a newly created vector with specified arguments as elements.
Any number of arguments, even zero arguments, are allowed.
@@ -5024,6 +4983,7 @@ mark_object (arg)
since all markable slots in current buffer marked anyway. */
/* Don't need to do Lisp_Objfwd, since the places they point
are protected with staticpro. */
+ case Lisp_Misc_Save_Value:
break;
case Lisp_Misc_Overlay:
@@ -5771,7 +5731,6 @@ The time is in seconds as a floating point value. */);
defsubr (&Smake_byte_code);
defsubr (&Smake_list);
defsubr (&Smake_vector);
- defsubr (&Smake_char_table);
defsubr (&Smake_string);
defsubr (&Smake_bool_vector);
defsubr (&Smake_symbol);
diff --git a/src/buffer.c b/src/buffer.c
index 40ee3f071de..b7efdbe5504 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -45,7 +45,7 @@ extern int errno;
#include "window.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "region-cache.h"
#include "indent.h"
#include "blockinput.h"
@@ -186,6 +186,7 @@ static void free_buffer_text P_ ((struct buffer *b));
static struct Lisp_Overlay * copy_overlays P_ ((struct buffer *, struct Lisp_Overlay *));
static void modify_overlay P_ ((struct buffer *, EMACS_INT, EMACS_INT));
+extern char * emacs_strerror P_ ((int));
/* For debugging; temporary. See set_buffer_internal. */
/* Lisp_Object Qlisp_mode, Vcheck_symbol; */
@@ -2078,8 +2079,10 @@ DEFUN ("set-buffer-multibyte", Fset_buffer_multibyte, Sset_buffer_multibyte,
doc: /* Set the multibyte flag of the current buffer to FLAG.
If FLAG is t, this makes the buffer a multibyte buffer.
If FLAG is nil, this makes the buffer a single-byte buffer.
-The buffer contents remain unchanged as a sequence of bytes
-but the contents viewed as characters do change. */)
+In these cases, the buffer contents remain unchanged as a sequence of
+bytes but the contents viewed as characters do change.
+If FLAG is `to', this makes the buffer a multibyte buffer by changing
+all eight-bit bytes to eight-bit characters. */)
(flag)
Lisp_Object flag;
{
@@ -2149,11 +2152,11 @@ but the contents viewed as characters do change. */)
p = GAP_END_ADDR;
stop = Z;
}
- if (MULTIBYTE_STR_AS_UNIBYTE_P (p, bytes))
- p += bytes, pos += bytes;
- else
+ if (ASCII_BYTE_P (*p))
+ p++, pos++;
+ else if (CHAR_BYTE8_HEAD_P (*p))
{
- c = STRING_CHAR (p, stop - pos);
+ c = STRING_CHAR_AND_LENGTH (p, stop - pos, bytes);
/* Delete all bytes for this 8-bit character but the
last one, and change the last one to the charcter
code. */
@@ -2168,6 +2171,11 @@ but the contents viewed as characters do change. */)
zv -= bytes;
stop = Z;
}
+ else
+ {
+ bytes = BYTES_BY_CHAR_HEAD (*p);
+ p += bytes, pos += bytes;
+ }
}
if (narrowed)
Fnarrow_to_region (make_number (begv), make_number (zv));
@@ -2176,13 +2184,14 @@ but the contents viewed as characters do change. */)
{
int pt = PT;
int pos, stop;
- unsigned char *p;
+ unsigned char *p, *pend;
/* Be sure not to have a multibyte sequence striding over the GAP.
- Ex: We change this: "...abc\201 _GAP_ \241def..."
- to: "...abc _GAP_ \201\241def..." */
+ Ex: We change this: "...abc\302 _GAP_ \241def..."
+ to: "...abc _GAP_ \302\241def..." */
- if (GPT_BYTE > 1 && GPT_BYTE < Z_BYTE
+ if (EQ (flag, Qt)
+ && GPT_BYTE > 1 && GPT_BYTE < Z_BYTE
&& ! CHAR_HEAD_P (*(GAP_END_ADDR)))
{
unsigned char *p = GPT_ADDR - 1;
@@ -2201,6 +2210,7 @@ but the contents viewed as characters do change. */)
pos = BEG;
stop = GPT;
p = BEG_ADDR;
+ pend = GPT_ADDR;
while (1)
{
int bytes;
@@ -2210,16 +2220,21 @@ but the contents viewed as characters do change. */)
if (pos == Z)
break;
p = GAP_END_ADDR;
+ pend = Z_ADDR;
stop = Z;
}
- if (UNIBYTE_STR_AS_MULTIBYTE_P (p, stop - pos, bytes))
+ if (ASCII_BYTE_P (*p))
+ p++, pos++;
+ else if (EQ (flag, Qt) && (bytes = MULTIBYTE_LENGTH (p, pend)) > 0)
p += bytes, pos += bytes;
else
{
unsigned char tmp[MAX_MULTIBYTE_LENGTH];
+ int c;
- bytes = CHAR_STRING (*p, tmp);
+ c = BYTE8_TO_CHAR (*p);
+ bytes = CHAR_STRING (c, tmp);
*p = tmp[0];
TEMP_SET_PT_BOTH (pos + 1, pos + 1);
bytes--;
@@ -2233,6 +2248,7 @@ but the contents viewed as characters do change. */)
zv += bytes;
if (pos <= pt)
pt += bytes;
+ pend = Z_ADDR;
stop = Z;
}
}
@@ -3741,15 +3757,13 @@ buffer. */)
end = OVERLAY_END (overlay);
if (OVERLAY_POSITION (end) < b->overlay_center)
{
- if (b->overlays_after)
- XOVERLAY (overlay)->next = b->overlays_after;
- b->overlays_after = XOVERLAY (overlay);
+ XOVERLAY (overlay)->next = b->overlays_after;
+ b->overlays_after = XOVERLAY (overlay);
}
else
{
- if (b->overlays_before)
- XOVERLAY (overlay)->next = b->overlays_before;
- b->overlays_before = XOVERLAY (overlay);
+ XOVERLAY (overlay)->next = b->overlays_before;
+ b->overlays_before = XOVERLAY (overlay);
}
/* This puts it in the right list, and in the right order. */
diff --git a/src/buffer.h b/src/buffer.h
index 8c340a23d23..b52f83a6e06 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -319,7 +319,6 @@ else
/* Variables used locally in FETCH_MULTIBYTE_CHAR. */
extern unsigned char *_fetch_multibyte_char_p;
-extern int _fetch_multibyte_char_len;
/* Return character code of multi-byte form at position POS. If POS
doesn't point the head of valid multi-byte form, only the byte at
@@ -327,10 +326,18 @@ extern int _fetch_multibyte_char_len;
#define FETCH_MULTIBYTE_CHAR(pos) \
(_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \
- + (pos) + BEG_ADDR - BEG_BYTE), \
- _fetch_multibyte_char_len \
- = ((pos) >= GPT_BYTE ? ZV_BYTE : GPT_BYTE) - (pos), \
- STRING_CHAR (_fetch_multibyte_char_p, _fetch_multibyte_char_len))
+ + (pos) + BEG_ADDR - BEG_BYTE), \
+ STRING_CHAR (_fetch_multibyte_char_p, 0))
+
+/* Return character at position POS. If the current buffer is unibyte
+ and the character is not ASCII, make the returning character
+ multibyte. */
+
+#define FETCH_CHAR_AS_MULTIBYTE(pos) \
+ (!NILP (current_buffer->enable_multibyte_characters) \
+ ? FETCH_MULTIBYTE_CHAR ((pos)) \
+ : unibyte_char_to_multibyte (FETCH_BYTE ((pos))))
+
/* Macros for accessing a character or byte,
or converting between byte positions and addresses,
@@ -379,10 +386,7 @@ extern int _fetch_multibyte_char_len;
(_fetch_multibyte_char_p \
= (((pos) >= BUF_GPT_BYTE (buf) ? BUF_GAP_SIZE (buf) : 0) \
+ (pos) + BUF_BEG_ADDR (buf) - BEG_BYTE), \
- _fetch_multibyte_char_len \
- = (((pos) >= BUF_GPT_BYTE (buf) ? BUF_ZV_BYTE (buf) : BUF_GPT_BYTE (buf)) \
- - (pos)), \
- STRING_CHAR (_fetch_multibyte_char_p, _fetch_multibyte_char_len))
+ STRING_CHAR (_fetch_multibyte_char_p, 0))
/* Define the actual buffer data structures. */
@@ -820,6 +824,7 @@ extern void buffer_slot_type_mismatch P_ ((int));
extern void fix_overlays_before P_ ((struct buffer *, EMACS_INT, EMACS_INT));
extern void mmap_set_vars P_ ((int));
+EXFUN (Fbuffer_live_p, 1);
EXFUN (Fbuffer_name, 1);
EXFUN (Fget_file_buffer, 1);
EXFUN (Fnext_overlay_change, 1);
diff --git a/src/bytecode.c b/src/bytecode.c
index 141f5adda84..f3a07dced35 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -37,7 +37,7 @@ by Hallvard:
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "syntax.h"
#include "window.h"
@@ -1437,10 +1437,17 @@ If the third argument is incorrect, Emacs may crash. */)
break;
case Bchar_syntax:
- BEFORE_POTENTIAL_GC ();
- CHECK_NUMBER (TOP);
- AFTER_POTENTIAL_GC ();
- XSETFASTINT (TOP, syntax_code_spec[(int) SYNTAX (XINT (TOP))]);
+ {
+ int c;
+
+ BEFORE_POTENTIAL_GC ();
+ CHECK_CHARACTER (TOP);
+ AFTER_POTENTIAL_GC ();
+ c = XFASTINT (TOP);
+ if (NILP (current_buffer->enable_multibyte_characters))
+ MAKE_CHAR_MULTIBYTE (c);
+ XSETFASTINT (TOP, syntax_code_spec[(int) SYNTAX (c)]);
+ }
break;
case Bbuffer_substring:
diff --git a/src/callproc.c b/src/callproc.c
index d92176ccd91..20b3ee22add 100644
--- a/src/callproc.c
+++ b/src/callproc.c
@@ -75,7 +75,7 @@ extern int errno;
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "ccl.h"
#include "coding.h"
#include "composite.h"
@@ -408,12 +408,8 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
{
argument_coding.src_multibyte = STRING_MULTIBYTE (args[i]);
if (CODING_REQUIRE_ENCODING (&argument_coding))
- {
- /* We must encode this argument. */
- args[i] = encode_coding_string (args[i], &argument_coding, 1);
- if (argument_coding.type == coding_type_ccl)
- setup_ccl_program (&(argument_coding.spec.ccl.encoder), Qnil);
- }
+ /* We must encode this argument. */
+ args[i] = encode_coding_string (&argument_coding, args[i], 1);
new_argv[i - 3] = SDATA (args[i]);
}
UNGCPRO;
@@ -721,19 +717,15 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
else
val = Qnil;
}
- setup_coding_system (Fcheck_coding_system (val), &process_coding);
+ Fcheck_coding_system (val);
/* In unibyte mode, character code conversion should not take
place but EOL conversion should. So, setup raw-text or one
of the subsidiary according to the information just setup. */
if (NILP (current_buffer->enable_multibyte_characters)
&& !NILP (val))
- setup_raw_text_coding_system (&process_coding);
+ val = raw_text_coding_system (val);
+ setup_coding_system (val, &process_coding);
}
- process_coding.src_multibyte = 0;
- process_coding.dst_multibyte
- = (BUFFERP (buffer)
- ? ! NILP (XBUFFER (buffer)->enable_multibyte_characters)
- : ! NILP (current_buffer->enable_multibyte_characters));
immediate_quit = 1;
QUIT;
@@ -745,12 +737,8 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
int carryover = 0;
int display_on_the_fly = !NILP (display) && INTERACTIVE;
struct coding_system saved_coding;
- int pt_orig = PT, pt_byte_orig = PT_BYTE;
- int inserted;
saved_coding = process_coding;
- if (process_coding.composing != COMPOSITION_DISABLED)
- coding_allocate_composition_data (&process_coding, PT);
while (1)
{
/* Repeatedly read until we've filled as much as possible
@@ -783,128 +771,45 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
if (!NILP (buffer))
{
- if (! CODING_MAY_REQUIRE_DECODING (&process_coding))
+ if (NILP (current_buffer->enable_multibyte_characters)
+ && ! CODING_MAY_REQUIRE_DECODING (&process_coding))
insert_1_both (bufptr, nread, nread, 0, 1, 0);
else
{ /* We have to decode the input. */
- int size;
- char *decoding_buf;
-
- repeat_decoding:
- size = decoding_buffer_size (&process_coding, nread);
- decoding_buf = (char *) xmalloc (size);
-
- /* We can't use the macro CODING_REQUIRE_DETECTION
- because it always returns nonzero if the coding
- system requires EOL detection. Here, we have to
- check only whether or not the coding system
- requires text-encoding detection. */
- if (process_coding.type == coding_type_undecided)
- {
- detect_coding (&process_coding, bufptr, nread);
- if (process_coding.composing != COMPOSITION_DISABLED)
- coding_allocate_composition_data (&process_coding, PT);
- }
- if (process_coding.cmp_data)
- process_coding.cmp_data->char_offset = PT;
-
- decode_coding (&process_coding, bufptr, decoding_buf,
- nread, size);
+ Lisp_Object buf;
+ XSETBUFFER (buf, current_buffer);
+ decode_coding_c_string (&process_coding, bufptr, nread,
+ buf);
if (display_on_the_fly
- && saved_coding.type == coding_type_undecided
- && process_coding.type != coding_type_undecided)
+ && CODING_REQUIRE_DETECTION (&saved_coding)
+ && ! CODING_REQUIRE_DETECTION (&process_coding))
{
/* We have detected some coding system. But,
there's a possibility that the detection was
done by insufficient data. So, we give up
displaying on the fly. */
- xfree (decoding_buf);
+ if (process_coding.produced > 0)
+ del_range_2 (process_coding.dst_pos,
+ process_coding.dst_pos_byte,
+ process_coding.dst_pos
+ + process_coding.produced_char,
+ process_coding.dst_pos_byte
+ + process_coding.produced, 0);
display_on_the_fly = 0;
process_coding = saved_coding;
carryover = nread;
continue;
}
- if (process_coding.produced > 0)
- insert_1_both (decoding_buf, process_coding.produced_char,
- process_coding.produced, 0, 1, 0);
- xfree (decoding_buf);
-
- if (process_coding.result == CODING_FINISH_INCONSISTENT_EOL)
- {
- Lisp_Object eol_type, coding;
-
- if (process_coding.eol_type == CODING_EOL_CR)
- {
- /* CRs have been replaced with LFs. Undo
- that in the text inserted above. */
- unsigned char *p;
-
- move_gap_both (PT, PT_BYTE);
-
- p = BYTE_POS_ADDR (pt_byte_orig);
- for (; p < GPT_ADDR; ++p)
- if (*p == '\n')
- *p = '\r';
- }
- else if (process_coding.eol_type == CODING_EOL_CRLF)
- {
- /* CR LFs have been replaced with LFs. Undo
- that by inserting CRs in front of LFs in
- the text inserted above. */
- EMACS_INT bytepos, old_pt, old_pt_byte, nCR;
-
- old_pt = PT;
- old_pt_byte = PT_BYTE;
- nCR = 0;
-
- for (bytepos = PT_BYTE - 1;
- bytepos >= pt_byte_orig;
- --bytepos)
- if (FETCH_BYTE (bytepos) == '\n')
- {
- EMACS_INT charpos = BYTE_TO_CHAR (bytepos);
- TEMP_SET_PT_BOTH (charpos, bytepos);
- insert_1_both ("\r", 1, 1, 0, 1, 0);
- ++nCR;
- }
-
- TEMP_SET_PT_BOTH (old_pt + nCR, old_pt_byte + nCR);
- }
-
- /* Set the coding system symbol to that for
- Unix-like EOL. */
- eol_type = Fget (saved_coding.symbol, Qeol_type);
- if (VECTORP (eol_type)
- && ASIZE (eol_type) == 3
- && SYMBOLP (AREF (eol_type, CODING_EOL_LF)))
- coding = AREF (eol_type, CODING_EOL_LF);
- else
- coding = saved_coding.symbol;
-
- process_coding.symbol = coding;
- process_coding.eol_type = CODING_EOL_LF;
- process_coding.mode
- &= ~CODING_MODE_INHIBIT_INCONSISTENT_EOL;
- }
-
- nread -= process_coding.consumed;
- carryover = nread;
+ TEMP_SET_PT_BOTH (PT + process_coding.produced_char,
+ PT_BYTE + process_coding.produced);
+ carryover = process_coding.carryover_bytes;
if (carryover > 0)
/* As CARRYOVER should not be that large, we had
better avoid overhead of bcopy. */
- BCOPY_SHORT (bufptr + process_coding.consumed, bufptr,
- carryover);
- if (process_coding.result == CODING_FINISH_INSUFFICIENT_CMP)
- {
- /* The decoding ended because of insufficient data
- area to record information about composition.
- We must try decoding with additional data area
- before reading more output for the process. */
- coding_allocate_composition_data (&process_coding, PT);
- goto repeat_decoding;
- }
+ BCOPY_SHORT (process_coding.carryover, bufptr,
+ process_coding.carryover_bytes);
}
}
@@ -935,33 +840,12 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
}
give_up: ;
- if (!NILP (buffer)
- && process_coding.cmp_data)
- {
- coding_restore_composition (&process_coding, Fcurrent_buffer ());
- coding_free_composition_data (&process_coding);
- }
-
- {
- int post_read_count = SPECPDL_INDEX ();
-
- record_unwind_protect (save_excursion_restore, save_excursion_save ());
- inserted = PT - pt_orig;
- TEMP_SET_PT_BOTH (pt_orig, pt_byte_orig);
- if (SYMBOLP (process_coding.post_read_conversion)
- && !NILP (Ffboundp (process_coding.post_read_conversion)))
- call1 (process_coding.post_read_conversion, make_number (inserted));
-
- Vlast_coding_system_used = process_coding.symbol;
-
- /* If the caller required, let the buffer inherit the
- coding-system used to decode the process output. */
- if (inherit_process_coding_system)
- call1 (intern ("after-insert-file-set-buffer-file-coding-system"),
- make_number (total_read));
-
- unbind_to (post_read_count, Qnil);
- }
+ Vlast_coding_system_used = CODING_ID_NAME (process_coding.id);
+ /* If the caller required, let the buffer inherit the
+ coding-system used to decode the process output. */
+ if (inherit_process_coding_system)
+ call1 (intern ("after-insert-file-set-buffer-file-coding-system"),
+ make_number (total_read));
}
/* Wait for it to terminate, unless it already has. */
diff --git a/src/casefiddle.c b/src/casefiddle.c
index 8b92d39cbb3..1e502af9c02 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -22,7 +22,7 @@ Boston, MA 02111-1307, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "commands.h"
#include "syntax.h"
#include "composite.h"
@@ -37,7 +37,7 @@ casify_object (flag, obj)
enum case_action flag;
Lisp_Object obj;
{
- register int i, c, len;
+ register int c, c1;
register int inword = flag == CASE_DOWN;
/* If the case table is flagged as modified, rescan it. */
@@ -51,13 +51,18 @@ casify_object (flag, obj)
int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
| CHAR_SHIFT | CHAR_CTL | CHAR_META);
int flags = XINT (obj) & flagbits;
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
- c = DOWNCASE (XFASTINT (obj) & ~flagbits);
- if (inword)
- XSETFASTINT (obj, c | flags);
- else if (c == (XFASTINT (obj) & ~flagbits))
+ c1 = XFASTINT (obj) & ~flagbits;
+ if (! multibyte)
+ MAKE_CHAR_MULTIBYTE (c1);
+ c = DOWNCASE (c1);
+ if (inword || c == c1)
{
- c = UPCASE1 ((XFASTINT (obj) & ~flagbits));
+ if (! inword)
+ c = UPCASE1 (c1);
+ if (! multibyte)
+ MAKE_CHAR_UNIBYTE (c);
XSETFASTINT (obj, c | flags);
}
return obj;
@@ -66,66 +71,43 @@ casify_object (flag, obj)
if (STRINGP (obj))
{
int multibyte = STRING_MULTIBYTE (obj);
+ int i, i_byte, len;
+ int size = SCHARS (obj);
obj = Fcopy_sequence (obj);
- len = SBYTES (obj);
-
- /* Scan all single-byte characters from start of string. */
- for (i = 0; i < len;)
+ for (i = i_byte = 0; i < size; i++, i_byte += len)
{
- c = SREF (obj, i);
-
- if (multibyte && c >= 0x80)
- /* A multibyte character can't be handled in this
- simple loop. */
- break;
+ if (multibyte)
+ c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
+ else
+ {
+ c = SREF (obj, i_byte);
+ len = 1;
+ MAKE_CHAR_MULTIBYTE (c);
+ }
+ c1 = c;
if (inword && flag != CASE_CAPITALIZE_UP)
c = DOWNCASE (c);
else if (!UPPERCASEP (c)
&& (!inword || flag != CASE_CAPITALIZE_UP))
- c = UPCASE1 (c);
- /* If this char won't fit in a single-byte string.
- fall out to the multibyte case. */
- if (multibyte ? ! ASCII_BYTE_P (c)
- : ! SINGLE_BYTE_CHAR_P (c))
- break;
-
- SSET (obj, i, c);
+ c = UPCASE1 (c1);
if ((int) flag >= (int) CASE_CAPITALIZE)
- inword = SYNTAX (c) == Sword;
- i++;
- }
-
- /* If we didn't do the whole string as single-byte,
- scan the rest in a more complex way. */
- if (i < len)
- {
- /* The work is not yet finished because of a multibyte
- character just encountered. */
- int fromlen, j_byte = i;
- char *buf
- = (char *) alloca ((len - i) * MAX_MULTIBYTE_LENGTH + i);
-
- /* Copy data already handled. */
- bcopy (SDATA (obj), buf, i);
-
- /* From now on, I counts bytes. */
- while (i < len)
+ inword = (SYNTAX (c) == Sword);
+ if (c != c1)
{
- c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i,
- len - i, fromlen);
- if (inword && flag != CASE_CAPITALIZE_UP)
- c = DOWNCASE (c);
- else if (!UPPERCASEP (c)
- && (!inword || flag != CASE_CAPITALIZE_UP))
- c = UPCASE1 (c);
- i += fromlen;
- j_byte += CHAR_STRING (c, buf + j_byte);
- if ((int) flag >= (int) CASE_CAPITALIZE)
- inword = SYNTAX (c) == Sword;
+ if (! multibyte)
+ {
+ MAKE_CHAR_UNIBYTE (c);
+ SSET (obj, i_byte, c);
+ }
+ else if (ASCII_CHAR_P (c1) && ASCII_CHAR_P (c))
+ SSET (obj, i_byte, c);
+ else
+ {
+ Faset (obj, make_number (i), make_number (c));
+ i_byte += CHAR_BYTES (c) - len;
+ }
}
- obj = make_multibyte_string (buf, SCHARS (obj),
- j_byte);
}
return obj;
}
@@ -187,13 +169,14 @@ casify_region (flag, b, e)
enum case_action flag;
Lisp_Object b, e;
{
- register int i;
register int c;
register int inword = flag == CASE_DOWN;
register int multibyte = !NILP (current_buffer->enable_multibyte_characters);
int start, end;
int start_byte, end_byte;
int changed = 0;
+ int opoint = PT;
+ int opoint_byte = PT_BYTE;
if (EQ (b, e))
/* Not modifying because nothing marked */
@@ -211,82 +194,66 @@ casify_region (flag, b, e)
start_byte = CHAR_TO_BYTE (start);
end_byte = CHAR_TO_BYTE (end);
- for (i = start_byte; i < end_byte; i++, start++)
+ while (start < end)
{
- int c2;
- c = c2 = FETCH_BYTE (i);
- if (multibyte && c >= 0x80)
- /* A multibyte character can't be handled in this simple loop. */
- break;
+ int c2, len;
+
+ if (multibyte)
+ {
+ c = FETCH_MULTIBYTE_CHAR (start_byte);
+ len = CHAR_BYTES (c);
+ }
+ else
+ {
+ c = FETCH_BYTE (start_byte);
+ MAKE_CHAR_MULTIBYTE (c);
+ len = 1;
+ }
+ c2 = c;
if (inword && flag != CASE_CAPITALIZE_UP)
c = DOWNCASE (c);
else if (!UPPERCASEP (c)
&& (!inword || flag != CASE_CAPITALIZE_UP))
c = UPCASE1 (c);
- FETCH_BYTE (i) = c;
- if (c != c2)
- changed = 1;
if ((int) flag >= (int) CASE_CAPITALIZE)
- inword = SYNTAX (c) == Sword && (inword || !SYNTAX_PREFIX (c));
- }
- if (i < end_byte)
- {
- /* The work is not yet finished because of a multibyte character
- just encountered. */
- int opoint = PT;
- int opoint_byte = PT_BYTE;
- int c2;
-
- while (i < end_byte)
+ inword = ((SYNTAX (c) == Sword) && (inword || !SYNTAX_PREFIX (c)));
+ if (c != c2)
{
- if ((c = FETCH_BYTE (i)) >= 0x80)
- c = FETCH_MULTIBYTE_CHAR (i);
- c2 = c;
- if (inword && flag != CASE_CAPITALIZE_UP)
- c2 = DOWNCASE (c);
- else if (!UPPERCASEP (c)
- && (!inword || flag != CASE_CAPITALIZE_UP))
- c2 = UPCASE1 (c);
- if (c != c2)
+ changed = 1;
+ if (! multibyte)
{
- int fromlen, tolen, j;
+ MAKE_CHAR_UNIBYTE (c);
+ FETCH_BYTE (start_byte) = c;
+ }
+ else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
+ FETCH_BYTE (start_byte) = c;
+ else if (len == CHAR_BYTES (c))
+ {
+ int j;
unsigned char str[MAX_MULTIBYTE_LENGTH];
- changed = 1;
- /* Handle the most likely case */
- if (c < 0400 && c2 < 0400)
- FETCH_BYTE (i) = c2;
- else if (fromlen = CHAR_STRING (c, str),
- tolen = CHAR_STRING (c2, str),
- fromlen == tolen)
- {
- for (j = 0; j < tolen; ++j)
- FETCH_BYTE (i + j) = str[j];
- }
- else
- {
- error ("Can't casify letters that change length");
-#if 0 /* This is approximately what we'd like to be able to do here */
- if (tolen < fromlen)
- del_range_1 (i + tolen, i + fromlen, 0, 0);
- else if (tolen > fromlen)
- {
- TEMP_SET_PT (i + fromlen);
- insert_1 (str + fromlen, tolen - fromlen, 1, 0, 0);
- }
-#endif
- }
+ CHAR_STRING (c, str);
+ for (j = 0; j < len; ++j)
+ FETCH_BYTE (start_byte + j) = str[j];
+ }
+ else
+ {
+ TEMP_SET_PT_BOTH (start, start_byte);
+ del_range_2 (start, start_byte, start + 1, start_byte + len, 0);
+ insert_char (c);
+ len = CHAR_BYTES (c);
}
- if ((int) flag >= (int) CASE_CAPITALIZE)
- inword = SYNTAX (c2) == Sword;
- INC_BOTH (start, i);
}
- TEMP_SET_PT_BOTH (opoint, opoint_byte);
+ start++;
+ start_byte += len;
}
- start = XFASTINT (b);
+ if (PT != opoint)
+ TEMP_SET_PT_BOTH (opoint, opoint_byte);
+
if (changed)
{
+ start = XFASTINT (b);
signal_after_change (start, end - start, end - start);
update_compositions (start, end, CHECK_ALL);
}
diff --git a/src/casetab.c b/src/casetab.c
index 9f9c4f8c5b2..64b0b4cbb29 100644
--- a/src/casetab.c
+++ b/src/casetab.c
@@ -23,7 +23,7 @@ Boston, MA 02111-1307, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
Lisp_Object Qcase_table_p, Qcase_table;
Lisp_Object Vascii_downcase_table, Vascii_upcase_table;
@@ -138,8 +138,8 @@ set_case_table (table, standard)
if (NILP (up))
{
up = Fmake_char_table (Qcase_table, Qnil);
- map_char_table (set_identity, Qnil, table, table, up, 0, indices);
- map_char_table (shuffle, Qnil, table, table, up, 0, indices);
+ map_char_table (set_identity, Qnil, table, up);
+ map_char_table (shuffle, Qnil, table, up);
XCHAR_TABLE (table)->extras[0] = up;
}
@@ -147,14 +147,14 @@ set_case_table (table, standard)
{
canon = Fmake_char_table (Qcase_table, Qnil);
XCHAR_TABLE (table)->extras[1] = canon;
- map_char_table (set_canon, Qnil, table, table, table, 0, indices);
+ map_char_table (set_canon, Qnil, table, table);
}
if (NILP (eqv))
{
eqv = Fmake_char_table (Qcase_table, Qnil);
- map_char_table (set_identity, Qnil, canon, canon, eqv, 0, indices);
- map_char_table (shuffle, Qnil, canon, canon, eqv, 0, indices);
+ map_char_table (set_identity, Qnil, canon, eqv);
+ map_char_table (shuffle, Qnil, canon, eqv);
XCHAR_TABLE (table)->extras[2] = eqv;
}
@@ -176,30 +176,45 @@ set_case_table (table, standard)
/* The following functions are called in map_char_table. */
-/* Set CANON char-table element for C to a translated ELT by UP and
- DOWN char-tables. This is done only when ELT is a character. The
- char-tables CANON, UP, and DOWN are in CASE_TABLE. */
+/* Set CANON char-table element for characters in RANGE to a
+ translated ELT by UP and DOWN char-tables. This is done only when
+ ELT is a character. The char-tables CANON, UP, and DOWN are in
+ CASE_TABLE. */
static void
-set_canon (case_table, c, elt)
- Lisp_Object case_table, c, elt;
+set_canon (case_table, range, elt)
+ Lisp_Object case_table, range, elt;
{
Lisp_Object up = XCHAR_TABLE (case_table)->extras[0];
Lisp_Object canon = XCHAR_TABLE (case_table)->extras[1];
if (NATNUMP (elt))
- Faset (canon, c, Faref (case_table, Faref (up, elt)));
+ Fset_char_table_range (canon, range, Faref (case_table, Faref (up, elt)));
}
-/* Set elements of char-table TABLE for C to C itself. This is done
- only when ELT is a character. This is called in map_char_table. */
+/* Set elements of char-table TABLE for C to C itself. C may be a
+ cons specifying a character range. In that case, set characters in
+ that range to themselves. This is done only when ELT is a
+ character. This is called in map_char_table. */
static void
set_identity (table, c, elt)
Lisp_Object table, c, elt;
{
if (NATNUMP (elt))
- Faset (table, c, c);
+ {
+ int from, to;
+
+ if (CONSP (c))
+ {
+ from = XINT (XCAR (c));
+ to = XINT (XCDR (c));
+ }
+ else
+ from = to = XINT (c);
+ for (; from <= to; from++)
+ CHAR_TABLE_SET (table, from, make_number (from));
+ }
}
/* Permute the elements of TABLE (which is initially an identity
@@ -211,11 +226,25 @@ static void
shuffle (table, c, elt)
Lisp_Object table, c, elt;
{
- if (NATNUMP (elt) && !EQ (c, elt))
+ if (NATNUMP (elt))
{
Lisp_Object tem = Faref (table, elt);
- Faset (table, elt, c);
- Faset (table, c, tem);
+ int from, to;
+
+ if (CONSP (c))
+ {
+ from = XINT (XCAR (c));
+ to = XINT (XCDR (c));
+ }
+ else
+ from = to = XINT (c);
+
+ for (; from <= to; from++)
+ if (from != XINT (elt))
+ {
+ Faset (table, elt, make_number (from));
+ Faset (table, make_number (from), tem);
+ }
}
}
@@ -240,22 +269,24 @@ init_casetab_once ()
Vascii_downcase_table = down;
XCHAR_TABLE (down)->purpose = Qcase_table;
- for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
- XSETFASTINT (XCHAR_TABLE (down)->contents[i],
- (i >= 'A' && i <= 'Z') ? i + ('a' - 'A') : i);
+ for (i = 0; i < 128; i++)
+ {
+ int c = (i >= 'A' && i <= 'Z') ? i + ('a' - 'A') : i;
+ CHAR_TABLE_SET (down, i, make_number (c));
+ }
XCHAR_TABLE (down)->extras[1] = Fcopy_sequence (down);
up = Fmake_char_table (Qcase_table, Qnil);
XCHAR_TABLE (down)->extras[0] = up;
- for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
- XSETFASTINT (XCHAR_TABLE (up)->contents[i],
- ((i >= 'A' && i <= 'Z')
- ? i + ('a' - 'A')
- : ((i >= 'a' && i <= 'z')
- ? i + ('A' - 'a')
- : i)));
+ for (i = 0; i < 128; i++)
+ {
+ int c = ((i >= 'A' && i <= 'Z') ? i + ('a' - 'A')
+ : ((i >= 'a' && i <= 'z') ? i + ('A' - 'a')
+ : i));;
+ CHAR_TABLE_SET (up, i, make_number (c));
+ }
XCHAR_TABLE (down)->extras[2] = Fcopy_sequence (up);
}
diff --git a/src/category.c b/src/category.c
index 4846ae8f7f6..89e4d907378 100644
--- a/src/category.c
+++ b/src/category.c
@@ -1,6 +1,9 @@
/* GNU Emacs routines to deal with category tables.
Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -27,6 +30,7 @@ Boston, MA 02111-1307, USA. */
#include <ctype.h>
#include "lisp.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "category.h"
#include "keymap.h"
@@ -186,6 +190,18 @@ This is the one used for new buffers. */)
return Vstandard_category_table;
}
+
+static void
+copy_category_entry (table, c, val)
+ Lisp_Object table, c, val;
+{
+ val = Fcopy_sequence (val);
+ if (CONSP (c))
+ char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val);
+ else
+ char_table_set (table, XINT (c), val);
+}
+
/* Return a copy of category table TABLE. We can't simply use the
function copy-sequence because no contents should be shared between
the original and the copy. This function is called recursively by
@@ -195,44 +211,14 @@ Lisp_Object
copy_category_table (table)
Lisp_Object table;
{
- Lisp_Object tmp;
- int i, to;
+ table = copy_char_table (table);
- if (!NILP (XCHAR_TABLE (table)->top))
- {
- /* TABLE is a top level char table.
- At first, make a copy of tree structure of the table. */
- table = Fcopy_sequence (table);
-
- /* Then, copy elements for single byte characters one by one. */
- for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
- if (!NILP (tmp = XCHAR_TABLE (table)->contents[i]))
- XCHAR_TABLE (table)->contents[i] = Fcopy_sequence (tmp);
- to = CHAR_TABLE_ORDINARY_SLOTS;
-
- /* Also copy the first (and sole) extra slot. It is a vector
- containing docstring of each category. */
- Fset_char_table_extra_slot
- (table, make_number (0),
- Fcopy_sequence (Fchar_table_extra_slot (table, make_number (0))));
- }
- else
- {
- i = 32;
- to = SUB_CHAR_TABLE_ORDINARY_SLOTS;
- }
-
- /* If the table has non-nil default value, copy it. */
- if (!NILP (tmp = XCHAR_TABLE (table)->defalt))
- XCHAR_TABLE (table)->defalt = Fcopy_sequence (tmp);
-
- /* At last, copy the remaining elements while paying attention to a
- sub char table. */
- for (; i < to; i++)
- if (!NILP (tmp = XCHAR_TABLE (table)->contents[i]))
- XCHAR_TABLE (table)->contents[i]
- = (SUB_CHAR_TABLE_P (tmp)
- ? copy_category_table (tmp) : Fcopy_sequence (tmp));
+ if (! NILP (XCHAR_TABLE (table)->defalt))
+ XCHAR_TABLE (table)->defalt
+ = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
+ XCHAR_TABLE (table)->extras[0]
+ = Fcopy_sequence (XCHAR_TABLE (table)->extras[0]);
+ map_char_table (copy_category_entry, Qnil, table, table);
return table;
}
@@ -258,9 +244,12 @@ DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
()
{
Lisp_Object val;
+ int i;
val = Fmake_char_table (Qcategory_table, Qnil);
XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
+ for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
+ XCHAR_TABLE (val)->contents[i] = MAKE_CATEGORY_SET;
Fset_char_table_extra_slot (val, make_number (0),
Fmake_vector (make_number (95), Qnil));
return val;
@@ -281,6 +270,13 @@ DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
}
+Lisp_Object
+char_category_set (c)
+ int c;
+{
+ return CHAR_TABLE_REF (current_buffer->category_table, c);
+}
+
DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
doc: /* Return the category set of CHAR. */)
(ch)
@@ -313,34 +309,6 @@ The return value is a string containing those same categories. */)
return build_string (str);
}
-/* Modify all category sets stored under sub char-table TABLE so that
- they contain (SET_VALUE is t) or don't contain (SET_VALUE is nil)
- CATEGORY. */
-
-void
-modify_lower_category_set (table, category, set_value)
- Lisp_Object table, category, set_value;
-{
- Lisp_Object val;
- int i;
-
- val = XCHAR_TABLE (table)->defalt;
- if (!CATEGORY_SET_P (val))
- val = MAKE_CATEGORY_SET;
- SET_CATEGORY_SET (val, category, set_value);
- XCHAR_TABLE (table)->defalt = val;
-
- for (i = 32; i < SUB_CHAR_TABLE_ORDINARY_SLOTS; i++)
- {
- val = XCHAR_TABLE (table)->contents[i];
-
- if (CATEGORY_SET_P (val))
- SET_CATEGORY_SET (val, category, set_value);
- else if (SUB_CHAR_TABLE_P (val))
- modify_lower_category_set (val, category, set_value);
- }
-}
-
void
set_category_set (category_set, category, val)
Lisp_Object category_set, category, val;
@@ -360,113 +328,55 @@ DEFUN ("modify-category-entry", Fmodify_category_entry,
Smodify_category_entry, 2, 4, 0,
doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
The category is changed only for table TABLE, which defaults to
- the current buffer's category table.
+the current buffer's category table.
+CHARACTER can be either a single character or a cons representing the
+lower and upper ends of an inclusive character range to modify.
If optional fourth argument RESET is non-nil,
then delete CATEGORY from the category set instead of adding it. */)
(character, category, table, reset)
Lisp_Object character, category, table, reset;
{
- int c, charset, c1, c2;
Lisp_Object set_value; /* Actual value to be set in category sets. */
- Lisp_Object val, category_set;
+ Lisp_Object category_set;
+ int start, end;
+ int from, to;
- CHECK_NUMBER (character);
- c = XINT (character);
- CHECK_CATEGORY (category);
- table = check_category_table (table);
-
- if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
- error ("Undefined category: %c", XFASTINT (category));
-
- set_value = NILP (reset) ? Qt : Qnil;
-
- if (c < CHAR_TABLE_SINGLE_BYTE_SLOTS)
+ if (INTEGERP (character))
{
- val = XCHAR_TABLE (table)->contents[c];
- if (!CATEGORY_SET_P (val))
- XCHAR_TABLE (table)->contents[c] = (val = MAKE_CATEGORY_SET);
- SET_CATEGORY_SET (val, category, set_value);
- return Qnil;
+ CHECK_CHARACTER (character);
+ start = end = XFASTINT (character);
}
-
- SPLIT_CHAR (c, charset, c1, c2);
-
- /* The top level table. */
- val = XCHAR_TABLE (table)->contents[charset + 128];
- if (CATEGORY_SET_P (val))
- category_set = val;
- else if (!SUB_CHAR_TABLE_P (val))
- {
- category_set = val = MAKE_CATEGORY_SET;
- XCHAR_TABLE (table)->contents[charset + 128] = category_set;
- }
-
- if (c1 <= 0)
- {
- /* Only a charset is specified. */
- if (SUB_CHAR_TABLE_P (val))
- /* All characters in CHARSET should be the same as for having
- CATEGORY or not. */
- modify_lower_category_set (val, category, set_value);
- else
- SET_CATEGORY_SET (category_set, category, set_value);
- return Qnil;
- }
-
- /* The second level table. */
- if (!SUB_CHAR_TABLE_P (val))
+ else
{
- val = make_sub_char_table (Qnil);
- XCHAR_TABLE (table)->contents[charset + 128] = val;
- /* We must set default category set of CHARSET in `defalt' slot. */
- XCHAR_TABLE (val)->defalt = category_set;
+ CHECK_CONS (character);
+ CHECK_CHARACTER_CAR (character);
+ CHECK_CHARACTER_CDR (character);
+ start = XFASTINT (XCAR (character));
+ end = XFASTINT (XCDR (character));
}
- table = val;
- val = XCHAR_TABLE (table)->contents[c1];
- if (CATEGORY_SET_P (val))
- category_set = val;
- else if (!SUB_CHAR_TABLE_P (val))
- {
- category_set = val = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
- XCHAR_TABLE (table)->contents[c1] = category_set;
- }
+ CHECK_CATEGORY (category);
+ table = check_category_table (table);
- if (c2 <= 0)
- {
- if (SUB_CHAR_TABLE_P (val))
- /* All characters in C1 group of CHARSET should be the same as
- for CATEGORY. */
- modify_lower_category_set (val, category, set_value);
- else
- SET_CATEGORY_SET (category_set, category, set_value);
- return Qnil;
- }
+ if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
+ error ("Undefined category: %c", XFASTINT (category));
- /* The third (bottom) level table. */
- if (!SUB_CHAR_TABLE_P (val))
- {
- val = make_sub_char_table (Qnil);
- XCHAR_TABLE (table)->contents[c1] = val;
- /* We must set default category set of CHARSET and C1 in
- `defalt' slot. */
- XCHAR_TABLE (val)->defalt = category_set;
- }
- table = val;
+ set_value = NILP (reset) ? Qt : Qnil;
- val = XCHAR_TABLE (table)->contents[c2];
- if (CATEGORY_SET_P (val))
- category_set = val;
- else if (!SUB_CHAR_TABLE_P (val))
+ while (start <= end)
{
- category_set = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
- XCHAR_TABLE (table)->contents[c2] = category_set;
+ category_set = char_table_ref_and_range (table, start, &from, &to);
+ if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
+ {
+ category_set = Fcopy_sequence (category_set);
+ SET_CATEGORY_SET (category_set, category, set_value);
+ if (to > end)
+ char_table_set_range (table, start, end, category_set);
+ else
+ char_table_set_range (table, start, to, category_set);
+ }
+ start = to + 1;
}
- else
- /* This should never happen. */
- error ("Invalid category table");
-
- SET_CATEGORY_SET (category_set, category, set_value);
return Qnil;
}
diff --git a/src/category.h b/src/category.h
index d48d99df805..0b909de7ddb 100644
--- a/src/category.h
+++ b/src/category.h
@@ -1,6 +1,9 @@
/* Declarations having to do with Emacs category tables.
Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -95,21 +98,7 @@ extern Lisp_Object _temp_category_set;
#define Vstandard_category_table buffer_defaults.category_table
/* Return the category set of character C in the current category table. */
-#ifdef __GNUC__
-#define CATEGORY_SET(c) \
- ({ Lisp_Object table = current_buffer->category_table; \
- Lisp_Object temp; \
- if ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS) \
- while (NILP (temp = XCHAR_TABLE (table)->contents[(unsigned char) c]) \
- && NILP (temp = XCHAR_TABLE (table)->defalt)) \
- table = XCHAR_TABLE (table)->parent; \
- else \
- temp = Faref (table, make_number (c)); \
- temp; })
-#else
-#define CATEGORY_SET(c) \
- Faref (current_buffer->category_table, make_number (c))
-#endif
+#define CATEGORY_SET(c) char_category_set (c)
/* Return the doc string of CATEGORY in category table TABLE. */
#define CATEGORY_DOCSTRING(table, category) \
diff --git a/src/ccl.c b/src/ccl.c
index 4a47ca063f3..b9dd47ff17c 100644
--- a/src/ccl.c
+++ b/src/ccl.c
@@ -1,7 +1,10 @@
/* CCL (Code Conversion Language) interpreter.
Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
Copyright (C) 2001, 2002 Free Software Foundation, Inc.
- Licensed to the Free Software Foundation.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -25,10 +28,13 @@ Boston, MA 02111-1307, USA. */
#include <stdio.h>
#include "lisp.h"
+#include "character.h"
#include "charset.h"
#include "ccl.h"
#include "coding.h"
+Lisp_Object Qccl, Qcclp;
+
/* This contains all code conversion map available to CCL. */
Lisp_Object Vcode_conversion_map_vector;
@@ -62,6 +68,8 @@ Lisp_Object Vtranslation_hash_table_vector;
#define GET_HASH_TABLE(id) \
(XHASH_TABLE (XCDR(XVECTOR(Vtranslation_hash_table_vector)->contents[(id)])))
+extern int charset_unicode;
+
/* CCL (Code Conversion Language) is a simple language which has
operations on one input buffer, one output buffer, and 7 registers.
The syntax of CCL is described in `ccl.el'. Emacs Lisp function
@@ -720,56 +728,24 @@ while(0)
/* Encode one character CH to multibyte form and write to the current
output buffer. If CH is less than 256, CH is written as is. */
-#define CCL_WRITE_CHAR(ch) \
- do { \
- int bytes = SINGLE_BYTE_CHAR_P (ch) ? 1: CHAR_BYTES (ch); \
- if (!dst) \
- CCL_INVALID_CMD; \
- else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
- { \
- if (bytes == 1) \
- { \
- *dst++ = (ch); \
- if (extra_bytes && (ch) >= 0x80 && (ch) < 0xA0) \
- /* We may have to convert this eight-bit char to \
- multibyte form later. */ \
- extra_bytes++; \
- } \
- else if (CHAR_VALID_P (ch, 0)) \
- dst += CHAR_STRING (ch, dst); \
- else \
- CCL_INVALID_CMD; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
- } while (0)
-
-/* Encode one character CH to multibyte form and write to the current
- output buffer. The output bytes always forms a valid multibyte
- sequence. */
-#define CCL_WRITE_MULTIBYTE_CHAR(ch) \
- do { \
- int bytes = CHAR_BYTES (ch); \
- if (!dst) \
- CCL_INVALID_CMD; \
- else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
- { \
- if (CHAR_VALID_P ((ch), 0)) \
- dst += CHAR_STRING ((ch), dst); \
- else \
- CCL_INVALID_CMD; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
+#define CCL_WRITE_CHAR(ch) \
+ do { \
+ if (! dst) \
+ CCL_INVALID_CMD; \
+ else if (dst < dst_end) \
+ *dst++ = (ch); \
+ else \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
/* Write a string at ccl_prog[IC] of length LEN to the current output
buffer. */
#define CCL_WRITE_STRING(len) \
do { \
+ int i; \
if (!dst) \
CCL_INVALID_CMD; \
- else if (dst + len <= (dst_bytes ? dst_end : src)) \
+ else if (dst + len <= dst_end) \
for (i = 0; i < len; i++) \
*dst++ = ((XFASTINT (ccl_prog[ic + (i / 3)])) \
>> ((2 - (i % 3)) * 8)) & 0xFF; \
@@ -777,78 +753,55 @@ while(0)
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
-/* Read one byte from the current input buffer into REGth register. */
-#define CCL_READ_CHAR(REG) \
- do { \
- if (!src) \
- CCL_INVALID_CMD; \
- else if (src < src_end) \
- { \
- REG = *src++; \
- if (REG == '\n' \
- && ccl->eol_type != CODING_EOL_LF) \
- { \
- /* We are encoding. */ \
- if (ccl->eol_type == CODING_EOL_CRLF) \
- { \
- if (ccl->cr_consumed) \
- ccl->cr_consumed = 0; \
- else \
- { \
- ccl->cr_consumed = 1; \
- REG = '\r'; \
- src--; \
- } \
- } \
- else \
- REG = '\r'; \
- } \
- if (REG == LEADING_CODE_8_BIT_CONTROL \
- && ccl->multibyte) \
- REG = *src++ - 0x20; \
- } \
- else if (ccl->last_block) \
- { \
- ic = ccl->eof_ic; \
- goto ccl_repeat; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
- } while (0)
-
-
-/* Set C to the character code made from CHARSET and CODE. This is
- like MAKE_CHAR but check the validity of CHARSET and CODE. If they
- are not valid, set C to (CODE & 0xFF) because that is usually the
- case that CCL_ReadMultibyteChar2 read an invalid code and it set
- CODE to that invalid byte. */
-
-#define CCL_MAKE_CHAR(charset, code, c) \
+/* Read one byte from the current input buffer into Rth register. */
+#define CCL_READ_CHAR(r) \
+ do { \
+ if (! src) \
+ CCL_INVALID_CMD; \
+ else if (src < src_end) \
+ r = *src++; \
+ else if (ccl->last_block) \
+ { \
+ ic = ccl->eof_ic; \
+ goto ccl_repeat; \
+ } \
+ else \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
+ } while (0)
+
+/* Decode CODE by a charset whose id is ID. If ID is 0, return CODE
+ as is for backward compatibility. Assume that we can use the
+ variable `charset'. */
+
+#define CCL_DECODE_CHAR(id, code) \
+ ((id) == 0 ? (code) \
+ : (charset = CHARSET_FROM_ID ((id)), DECODE_CHAR (charset, (code))))
+
+/* Encode character C by some of charsets in CHARSET_LIST. Set ID to
+ the id of the used charset, ENCODED to the resulf of encoding.
+ Assume that we can use the variable `charset'. */
+
+#define CCL_ENCODE_CHAR(c, charset_list, id, encoded) \
do { \
- if (charset == CHARSET_ASCII) \
- c = code & 0xFF; \
- else if (CHARSET_DEFINED_P (charset) \
- && (code & 0x7F) >= 32 \
- && (code < 256 || ((code >> 7) & 0x7F) >= 32)) \
- { \
- int c1 = code & 0x7F, c2 = 0; \
+ unsigned code; \
\
- if (code >= 256) \
- c2 = c1, c1 = (code >> 7) & 0x7F; \
- c = MAKE_CHAR (charset, c1, c2); \
+ charset = char_charset ((c), (charset_list), &code); \
+ if (! charset && ! NILP (charset_list)) \
+ charset = char_charset ((c), Qnil, &code); \
+ if (charset) \
+ { \
+ (id) = CHARSET_ID (charset); \
+ (encoded) = code; \
} \
- else \
- c = code & 0xFF; \
- } while (0)
-
+ } while (0)
-/* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting
- text goes to a place pointed by DESTINATION, the length of which
- should not exceed DST_BYTES. The bytes actually processed is
- returned as *CONSUMED. The return value is the length of the
- resulting text. As a side effect, the contents of CCL registers
- are updated. If SOURCE or DESTINATION is NULL, only operations on
- registers are permitted. */
+/* Execute CCL code on characters at SOURCE (length SRC_SIZE). The
+ resulting text goes to a place pointed by DESTINATION, the length
+ of which should not exceed DST_SIZE. As a side effect, how many
+ characters are consumed and produced are recorded in CCL->consumed
+ and CCL->produced, and the contents of CCL registers are updated.
+ If SOURCE or DESTINATION is NULL, only operations on registers are
+ permitted. */
#ifdef CCL_DEBUG
#define CCL_DEBUG_BACKTRACE_LEN 256
@@ -865,34 +818,30 @@ struct ccl_prog_stack
/* For the moment, we only support depth 256 of stack. */
static struct ccl_prog_stack ccl_prog_stack_struct[256];
-int
-ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
+void
+ccl_driver (ccl, source, destination, src_size, dst_size, charset_list)
struct ccl_program *ccl;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
- int *consumed;
+ int *source, *destination;
+ int src_size, dst_size;
+ Lisp_Object charset_list;
{
register int *reg = ccl->reg;
register int ic = ccl->ic;
register int code = 0, field1, field2;
register Lisp_Object *ccl_prog = ccl->prog;
- unsigned char *src = source, *src_end = src + src_bytes;
- unsigned char *dst = destination, *dst_end = dst + dst_bytes;
+ int *src = source, *src_end = src + src_size;
+ int *dst = destination, *dst_end = dst + dst_size;
int jump_address;
int i = 0, j, op;
int stack_idx = ccl->stack_idx;
/* Instruction counter of the current CCL code. */
int this_ic = 0;
- /* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F. But,
- each of them will be converted to multibyte form of 2-byte
- sequence. For that conversion, we remember how many more bytes
- we must keep in DESTINATION in this variable. */
- int extra_bytes = ccl->eight_bit_control;
+ struct charset *charset;
if (ic >= ccl->eof_ic)
ic = CCL_HEADER_MAIN;
- if (ccl->buf_magnification == 0) /* We can't produce any bytes. */
+ if (ccl->buf_magnification == 0) /* We can't read/produce any bytes. */
dst = NULL;
/* Set mapping stack pointer. */
@@ -917,8 +866,8 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
/* We can't just signal Qquit, instead break the loop as if
the whole data is processed. Don't reset Vquit_flag, it
must be handled later at a safer place. */
- if (consumed)
- src = source + src_bytes;
+ if (src)
+ src = source + src_size;
ccl->status = CCL_STAT_QUIT;
break;
}
@@ -1233,8 +1182,22 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
case CCL_LE: reg[rrr] = i <= j; break;
case CCL_GE: reg[rrr] = i >= j; break;
case CCL_NE: reg[rrr] = i != j; break;
- case CCL_DECODE_SJIS: DECODE_SJIS (i, j, reg[rrr], reg[7]); break;
- case CCL_ENCODE_SJIS: ENCODE_SJIS (i, j, reg[rrr], reg[7]); break;
+ case CCL_DECODE_SJIS:
+ {
+ i = (i << 8) | j;
+ SJIS_TO_JIS (i);
+ reg[rrr] = i >> 8;
+ reg[7] = i & 0xFF;
+ break;
+ }
+ case CCL_ENCODE_SJIS:
+ {
+ i = (i << 8) | j;
+ JIS_TO_SJIS (i);
+ reg[rrr] = i >> 8;
+ reg[7] = i & 0xFF;
+ break;
+ }
default: CCL_INVALID_CMD;
}
code &= 0x1F;
@@ -1254,165 +1217,29 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
case CCL_ReadMultibyteChar2:
if (!src)
CCL_INVALID_CMD;
-
- if (src >= src_end)
- {
- src++;
- goto ccl_read_multibyte_character_suspend;
- }
-
- if (!ccl->multibyte)
- {
- int bytes;
- if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
- {
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = *src++;
- break;
- }
- }
- i = *src++;
- if (i == '\n' && ccl->eol_type != CODING_EOL_LF)
- {
- /* We are encoding. */
- if (ccl->eol_type == CODING_EOL_CRLF)
- {
- if (ccl->cr_consumed)
- ccl->cr_consumed = 0;
- else
- {
- ccl->cr_consumed = 1;
- i = '\r';
- src--;
- }
- }
- else
- i = '\r';
- reg[rrr] = i;
- reg[RRR] = CHARSET_ASCII;
- }
- else if (i < 0x80)
- {
- /* ASCII */
- reg[rrr] = i;
- reg[RRR] = CHARSET_ASCII;
- }
- else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2)
- {
- int dimension = BYTES_BY_CHAR_HEAD (i) - 1;
-
- if (dimension == 0)
- {
- /* `i' is a leading code for an undefined charset. */
- reg[RRR] = CHARSET_8_BIT_GRAPHIC;
- reg[rrr] = i;
- }
- else if (src + dimension > src_end)
- goto ccl_read_multibyte_character_suspend;
- else
- {
- reg[RRR] = i;
- i = (*src++ & 0x7F);
- if (dimension == 1)
- reg[rrr] = i;
- else
- reg[rrr] = ((i << 7) | (*src++ & 0x7F));
- }
- }
- else if ((i == LEADING_CODE_PRIVATE_11)
- || (i == LEADING_CODE_PRIVATE_12))
- {
- if ((src + 1) >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = *src++;
- reg[rrr] = (*src++ & 0x7F);
- }
- else if ((i == LEADING_CODE_PRIVATE_21)
- || (i == LEADING_CODE_PRIVATE_22))
- {
- if ((src + 2) >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = *src++;
- i = (*src++ & 0x7F);
- reg[rrr] = ((i << 7) | (*src & 0x7F));
- src++;
- }
- else if (i == LEADING_CODE_8_BIT_CONTROL)
- {
- if (src >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = (*src++ - 0x20);
- }
- else if (i >= 0xA0)
- {
- reg[RRR] = CHARSET_8_BIT_GRAPHIC;
- reg[rrr] = i;
- }
- else
- {
- /* INVALID CODE. Return a single byte character. */
- reg[RRR] = CHARSET_ASCII;
- reg[rrr] = i;
- }
- break;
-
- ccl_read_multibyte_character_suspend:
- if (src <= src_end && !ccl->multibyte && ccl->last_block)
- {
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = i;
- break;
- }
- src--;
- if (ccl->last_block)
- {
- ic = ccl->eof_ic;
- goto ccl_repeat;
- }
- else
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC);
-
+ CCL_READ_CHAR (i);
+ CCL_ENCODE_CHAR (i, charset_list, reg[RRR], reg[rrr]);
break;
case CCL_WriteMultibyteChar2:
- i = reg[RRR]; /* charset */
- if (i == CHARSET_ASCII
- || i == CHARSET_8_BIT_CONTROL
- || i == CHARSET_8_BIT_GRAPHIC)
- i = reg[rrr] & 0xFF;
- else if (CHARSET_DIMENSION (i) == 1)
- i = ((i - 0x70) << 7) | (reg[rrr] & 0x7F);
- else if (i < MIN_CHARSET_PRIVATE_DIMENSION2)
- i = ((i - 0x8F) << 14) | reg[rrr];
- else
- i = ((i - 0xE0) << 14) | reg[rrr];
-
- CCL_WRITE_MULTIBYTE_CHAR (i);
-
+ if (! dst)
+ CCL_INVALID_CMD;
+ i = CCL_DECODE_CHAR (reg[RRR], reg[rrr]);
+ CCL_WRITE_CHAR (i);
break;
case CCL_TranslateCharacter:
- CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
- op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]),
- i, -1, 0, 0);
- SPLIT_CHAR (op, reg[RRR], i, j);
- if (j != -1)
- i = (i << 7) | j;
-
- reg[rrr] = i;
+ i = CCL_DECODE_CHAR (reg[RRR], reg[rrr]);
+ op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), i);
+ CCL_ENCODE_CHAR (op, charset_list, reg[RRR], reg[rrr]);
break;
case CCL_TranslateCharacterConstTbl:
op = XINT (ccl_prog[ic]); /* table */
ic++;
- CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
- op = translate_char (GET_TRANSLATION_TABLE (op), i, -1, 0, 0);
- SPLIT_CHAR (op, reg[RRR], i, j);
- if (j != -1)
- i = (i << 7) | j;
-
- reg[rrr] = i;
+ i = CCL_DECODE_CHAR (reg[RRR], reg[rrr]);
+ op = translate_char (GET_TRANSLATION_TABLE (op), i);
+ CCL_ENCODE_CHAR (op, charset_list, reg[RRR], reg[rrr]);
break;
case CCL_LookupIntConstTbl:
@@ -1426,12 +1253,10 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
{
Lisp_Object opl;
opl = HASH_VALUE (h, op);
- if (!CHAR_VALID_P (XINT (opl), 0))
+ if (! CHARACTERP (XINT (opl)))
CCL_INVALID_CMD;
- SPLIT_CHAR (XINT (opl), reg[RRR], i, j);
- if (j != -1)
- i = (i << 7) | j;
- reg[rrr] = i;
+ reg[RRR] = charset_unicode;
+ reg[rrr] = op;
reg[7] = 1; /* r7 true for success */
}
else
@@ -1442,7 +1267,7 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
case CCL_LookupCharConstTbl:
op = XINT (ccl_prog[ic]); /* table */
ic++;
- CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
+ i = CCL_DECODE_CHAR (reg[RRR], reg[rrr]);
{
struct Lisp_Hash_Table *h = GET_HASH_TABLE (op);
@@ -1876,10 +1701,10 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
}
msglen = strlen (msg);
- if (dst + msglen <= (dst_bytes ? dst_end : src))
+ if (dst + msglen <= dst_end)
{
- bcopy (msg, dst, msglen);
- dst += msglen;
+ for (i = 0; i < msglen; i++)
+ *dst++ = msg[i];
}
if (ccl->status == CCL_STAT_INVALID_CMD)
@@ -1905,10 +1730,8 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
ccl->ic = ic;
ccl->stack_idx = stack_idx;
ccl->prog = ccl_prog;
- ccl->eight_bit_control = (extra_bytes > 1);
- if (consumed)
- *consumed = src - source;
- return (dst ? dst - destination : 0);
+ ccl->consumed = src - source;
+ ccl->produced = dst - destination;
}
/* Resolve symbols in the specified CCL code (Lisp vector). This
@@ -2058,7 +1881,6 @@ setup_ccl_program (ccl, ccl_prog)
ccl->private_state = 0;
ccl->status = 0;
ccl->stack_idx = 0;
- ccl->eol_type = CODING_EOL_LF;
ccl->suppress_error = 0;
ccl->eight_bit_control = 0;
return 0;
@@ -2120,7 +1942,7 @@ programs. */)
? XINT (AREF (reg, i))
: 0);
- ccl_driver (&ccl, (unsigned char *)0, (unsigned char *)0, 0, 0, (int *)0);
+ ccl_driver (&ccl, NULL, NULL, 0, 0, Qnil);
QUIT;
if (ccl.status != CCL_STAT_SUCCESS)
error ("Error in CCL program at %dth code", ccl.ic);
@@ -2161,10 +1983,13 @@ See the documentation of `define-ccl-program' for the detail of CCL program. */
{
Lisp_Object val;
struct ccl_program ccl;
- int i, produced;
+ int i;
int outbufsize;
- char *outbuf;
- struct gcpro gcpro1, gcpro2;
+ unsigned char *outbuf, *outp;
+ int str_chars, str_bytes;
+#define CCL_EXECUTE_BUF_SIZE 1024
+ int source[CCL_EXECUTE_BUF_SIZE], destination[CCL_EXECUTE_BUF_SIZE];
+ int consumed_chars, consumed_bytes, produced_chars;
if (setup_ccl_program (&ccl, ccl_prog) < 0)
error ("Invalid CCL program");
@@ -2174,7 +1999,8 @@ See the documentation of `define-ccl-program' for the detail of CCL program. */
error ("Length of vector STATUS is not 9");
CHECK_STRING (str);
- GCPRO2 (status, str);
+ str_chars = SCHARS (str);
+ str_bytes = SBYTES (str);
for (i = 0; i < 8; i++)
{
@@ -2189,34 +2015,89 @@ See the documentation of `define-ccl-program' for the detail of CCL program. */
if (ccl.ic < i && i < ccl.size)
ccl.ic = i;
}
- outbufsize = SBYTES (str) * ccl.buf_magnification + 256;
- outbuf = (char *) xmalloc (outbufsize);
- ccl.last_block = NILP (contin);
- ccl.multibyte = STRING_MULTIBYTE (str);
- produced = ccl_driver (&ccl, SDATA (str), outbuf,
- SBYTES (str), outbufsize, (int *) 0);
- for (i = 0; i < 8; i++)
- XSET (AREF (status, i), Lisp_Int, ccl.reg[i]);
- XSETINT (AREF (status, 8), ccl.ic);
- UNGCPRO;
- if (NILP (unibyte_p))
+ outbufsize = (ccl.buf_magnification
+ ? str_bytes * ccl.buf_magnification + 256
+ : str_bytes + 256);
+ outp = outbuf = (unsigned char *) xmalloc (outbufsize);
+
+ consumed_chars = consumed_bytes = 0;
+ produced_chars = 0;
+ while (consumed_bytes < str_bytes)
{
- int nchars;
+ const unsigned char *p = SDATA (str) + consumed_bytes;
+ const unsigned char *endp = SDATA (str) + str_bytes;
+ int i = 0;
+ int *src, src_size;
+
+ if (endp - p == str_chars - consumed_chars)
+ while (i < CCL_EXECUTE_BUF_SIZE && p < endp)
+ source[i++] = *p++;
+ else
+ while (i < CCL_EXECUTE_BUF_SIZE && p < endp)
+ source[i++] = STRING_CHAR_ADVANCE (p);
+ consumed_chars += i;
+ consumed_bytes = p - SDATA (str);
+
+ if (consumed_bytes == str_bytes)
+ ccl.last_block = NILP (contin);
+ src = source;
+ src_size = i;
+ while (1)
+ {
+ ccl_driver (&ccl, src, destination, src_size, CCL_EXECUTE_BUF_SIZE,
+ Qnil);
+ if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
+ break;
+ produced_chars += ccl.produced;
+ if (NILP (unibyte_p))
+ {
+ if (outp - outbuf + MAX_MULTIBYTE_LENGTH * ccl.produced
+ > outbufsize)
+ {
+ int offset = outp - outbuf;
+ outbufsize += MAX_MULTIBYTE_LENGTH * ccl.produced;
+ outbuf = (unsigned char *) xrealloc (outbuf, outbufsize);
+ outp = outbuf + offset;
+ }
+ for (i = 0; i < ccl.produced; i++)
+ CHAR_STRING_ADVANCE (destination[i], outp);
+ }
+ else
+ {
+ if (outp - outbuf + ccl.produced > outbufsize)
+ {
+ int offset = outp - outbuf;
+ outbufsize += ccl.produced;
+ outbuf = (unsigned char *) xrealloc (outbuf, outbufsize);
+ outp = outbuf + offset;
+ }
+ for (i = 0; i < ccl.produced; i++)
+ *outp++ = destination[i];
+ }
+ src += ccl.consumed;
+ src_size -= ccl.consumed;
+ }
- produced = str_as_multibyte (outbuf, outbufsize, produced, &nchars);
- val = make_multibyte_string (outbuf, nchars, produced);
+ if (ccl.status != CCL_STAT_SUSPEND_BY_SRC)
+ break;
}
- else
- val = make_unibyte_string (outbuf, produced);
- xfree (outbuf);
- QUIT;
- if (ccl.status == CCL_STAT_SUSPEND_BY_DST)
- error ("Output buffer for the CCL programs overflow");
+
if (ccl.status != CCL_STAT_SUCCESS
&& ccl.status != CCL_STAT_SUSPEND_BY_SRC)
error ("Error in CCL program at %dth code", ccl.ic);
+ for (i = 0; i < 8; i++)
+ XSET (XVECTOR (status)->contents[i], Lisp_Int, ccl.reg[i]);
+ XSETINT (XVECTOR (status)->contents[8], ccl.ic);
+
+ if (NILP (unibyte_p))
+ val = make_multibyte_string ((char *) outbuf, produced_chars,
+ outp - outbuf);
+ else
+ val = make_unibyte_string ((char *) outbuf, produced_chars);
+ xfree (outbuf);
+
return val;
}
@@ -2361,6 +2242,12 @@ syms_of_ccl ()
staticpro (&Vccl_program_table);
Vccl_program_table = Fmake_vector (make_number (32), Qnil);
+ Qccl = intern ("ccl");
+ staticpro (&Qccl);
+
+ Qcclp = intern ("cclp");
+ staticpro (&Qcclp);
+
Qccl_program = intern ("ccl-program");
staticpro (&Qccl_program);
diff --git a/src/ccl.h b/src/ccl.h
index bfd4757b41a..e1b8285df19 100644
--- a/src/ccl.h
+++ b/src/ccl.h
@@ -1,6 +1,9 @@
/* Header for CCL (Code Conversion Language) interpreter.
Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -53,16 +56,14 @@ struct ccl_program {
many times bigger the output buffer
should be than the input buffer. */
int stack_idx; /* How deep the call of CCL_Call is nested. */
- int eol_type; /* When the CCL program is used for
- encoding by a coding system, set to
- the eol_type of the coding system.
- In other cases, always
- CODING_EOL_LF. */
- int multibyte; /* 1 if the source text is multibyte. */
+ int src_multibyte; /* 1 if the input buffer is multibyte. */
+ int dst_multibyte; /* 1 if the output buffer is multibyte. */
int cr_consumed; /* Flag for encoding DOS-like EOL
format when the CCL program is used
for encoding by a coding
system. */
+ int consumed;
+ int produced;
int suppress_error; /* If nonzero, don't insert error
message in the output. */
int eight_bit_control; /* If nonzero, ccl_driver counts all
@@ -76,13 +77,13 @@ struct ccl_program {
coding_system. */
struct ccl_spec {
- struct ccl_program decoder;
- struct ccl_program encoder;
- unsigned char valid_codes[256];
+ struct ccl_program ccl;
int cr_carryover; /* CR carryover flag. */
unsigned char eight_bit_carryover[MAX_MULTIBYTE_LENGTH];
};
+#define CODING_SPEC_CCL_PROGRAM(coding) ((coding)->spec.ccl.ccl)
+
/* Alist of fontname patterns vs corresponding CCL program. */
extern Lisp_Object Vfont_ccl_encoder_alist;
@@ -90,8 +91,8 @@ extern Lisp_Object Vfont_ccl_encoder_alist;
execution of ccl program CCL_PROG (symbol or vector). */
extern int setup_ccl_program P_ ((struct ccl_program *, Lisp_Object));
-extern int ccl_driver P_ ((struct ccl_program *, unsigned char *,
- unsigned char *, int, int, int *));
+extern void ccl_driver P_ ((struct ccl_program *, int *, int *, int, int,
+ Lisp_Object));
/* Vector of CCL program names vs corresponding program data. */
extern Lisp_Object Vccl_program_table;
@@ -100,4 +101,14 @@ extern Lisp_Object Vccl_program_table;
is an index for Vccl_protram_table. */
extern Lisp_Object Qccl_program_idx;
+extern Lisp_Object Qccl, Qcclp;
+
+EXFUN (Fccl_program_p, 1);
+
+#define CHECK_CCL_PROGRAM(x) \
+ do { \
+ if (NILP (Fccl_program_p (x))) \
+ x = wrong_type_argument (Qcclp, (x)); \
+ } while (0);
+
#endif /* EMACS_CCL_H */
diff --git a/src/character.c b/src/character.c
new file mode 100644
index 00000000000..b25aff8083c
--- /dev/null
+++ b/src/character.c
@@ -0,0 +1,975 @@
+/* Basic character support.
+ Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* At first, see the document in `character.h' to understand the code
+ in this file. */
+
+#ifdef emacs
+#include <config.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef emacs
+
+#include <sys/types.h>
+#include "lisp.h"
+#include "character.h"
+#include "buffer.h"
+#include "charset.h"
+#include "composite.h"
+#include "disptab.h"
+
+#else /* not emacs */
+
+#include "mulelib.h"
+
+#endif /* emacs */
+
+Lisp_Object Qcharacterp;
+
+/* Vector of translation table ever defined.
+ ID of a translation table is used to index this vector. */
+Lisp_Object Vtranslation_table_vector;
+
+/* A char-table for characters which may invoke auto-filling. */
+Lisp_Object Vauto_fill_chars;
+
+Lisp_Object Qauto_fill_chars;
+
+Lisp_Object Vchar_unify_table;
+
+/* A char-table. An element is non-nil iff the corresponding
+ character has a printable glyph. */
+Lisp_Object Vprintable_chars;
+
+/* A char-table. An elemnent is a column-width of the corresponding
+ character. */
+Lisp_Object Vchar_width_table;
+
+/* A char-table. An element is a symbol indicating the direction
+ property of corresponding character. */
+Lisp_Object Vchar_direction_table;
+
+/* Variable used locally in the macro FETCH_MULTIBYTE_CHAR. */
+unsigned char *_fetch_multibyte_char_p;
+
+/* Char table of scripts. */
+Lisp_Object Vchar_script_table;
+
+static Lisp_Object Qchar_script_table;
+
+/* Mapping table from unibyte chars to multibyte chars. */
+int unibyte_to_multibyte_table[256];
+
+
+
+int
+char_string (c, p)
+ int c;
+ unsigned char *p;
+{
+ int bytes;
+
+ if (c & CHAR_MODIFIER_MASK)
+ {
+ /* As a character not less than 256 can't have modifier bits, we
+ just ignore the bits. */
+ if (SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
+ {
+ /* For Meta, Shift, and Control modifiers, we need special care. */
+ if (c & CHAR_META)
+ {
+ /* Move the meta bit to the right place for a string. */
+ c = (c & ~CHAR_META) | 0x80;
+ }
+ if (c & CHAR_SHIFT)
+ {
+ /* Shift modifier is valid only with [A-Za-z]. */
+ if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
+ c &= ~CHAR_SHIFT;
+ else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
+ c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+ }
+ if (c & CHAR_CTL)
+ {
+ /* Simulate the code in lread.c. */
+ /* Allow `\C- ' and `\C-?'. */
+ if (c == (CHAR_CTL | ' '))
+ c = 0;
+ else if (c == (CHAR_CTL | '?'))
+ c = 127;
+ /* ASCII control chars are made from letters (both cases),
+ as well as the non-letters within 0100...0137. */
+ else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
+ c &= (037 | (~0177 & ~CHAR_CTL));
+ else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
+ c &= (037 | (~0177 & ~CHAR_CTL));
+ }
+ }
+
+ /* If C still has any modifier bits, just ignore it. */
+ c &= ~CHAR_MODIFIER_MASK;
+ }
+
+ MAYBE_UNIFY_CHAR (c);
+
+ if (c <= MAX_3_BYTE_CHAR)
+ {
+ bytes = CHAR_STRING (c, p);
+ }
+ else if (c <= MAX_4_BYTE_CHAR)
+ {
+ p[0] = (0xF0 | (c >> 18));
+ p[1] = (0x80 | ((c >> 12) & 0x3F));
+ p[2] = (0x80 | ((c >> 6) & 0x3F));
+ p[3] = (0x80 | (c & 0x3F));
+ bytes = 4;
+ }
+ else if (c <= MAX_5_BYTE_CHAR)
+ {
+ p[0] = 0xF8;
+ p[1] = (0x80 | ((c >> 18) & 0x0F));
+ p[2] = (0x80 | ((c >> 12) & 0x3F));
+ p[3] = (0x80 | ((c >> 6) & 0x3F));
+ p[4] = (0x80 | (c & 0x3F));
+ bytes = 5;
+ }
+ else
+ {
+ c = CHAR_TO_BYTE8 (c);
+ bytes = BYTE8_STRING (c, p);
+ }
+
+ return bytes;
+}
+
+
+int
+string_char (p, advanced, len)
+ const unsigned char *p;
+ const unsigned char **advanced;
+ int *len;
+{
+ int c;
+ const unsigned char *saved_p = p;
+
+ if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
+ {
+ c = STRING_CHAR_ADVANCE (p);
+ }
+ else if (! (*p & 0x08))
+ {
+ c = ((((p)[0] & 0xF) << 18)
+ | (((p)[1] & 0x3F) << 12)
+ | (((p)[2] & 0x3F) << 6)
+ | ((p)[3] & 0x3F));
+ p += 4;
+ }
+ else
+ {
+ c = ((((p)[1] & 0x3F) << 18)
+ | (((p)[2] & 0x3F) << 12)
+ | (((p)[3] & 0x3F) << 6)
+ | ((p)[4] & 0x3F));
+ p += 5;
+ }
+
+ MAYBE_UNIFY_CHAR (c);
+
+ if (len)
+ *len = p - saved_p;
+ if (advanced)
+ *advanced = p;
+ return c;
+}
+
+
+/* Translate character C by translation table TABLE. If C is
+ negative, translate a character specified by CHARSET and CODE. If
+ no translation is found in TABLE, return the untranslated
+ character. */
+
+int
+translate_char (table, c)
+ Lisp_Object table;
+ int c;
+{
+ Lisp_Object ch;
+
+ if (! CHAR_TABLE_P (table))
+ return c;
+ ch = CHAR_TABLE_REF (table, c);
+ if (! CHARACTERP (ch))
+ return c;
+ return XINT (ch);
+}
+
+/* Convert the multibyte character C to unibyte 8-bit character based
+ on the current value of charset_unibyte. If dimension of
+ charset_unibyte is more than one, return (C & 0xFF).
+
+ The argument REV_TBL is now ignored. It will be removed in the
+ future. */
+
+int
+multibyte_char_to_unibyte (c, rev_tbl)
+ int c;
+ Lisp_Object rev_tbl;
+{
+ struct charset *charset;
+ unsigned c1;
+
+ if (CHAR_BYTE8_P (c))
+ return CHAR_TO_BYTE8 (c);
+ charset = CHARSET_FROM_ID (charset_unibyte);
+ c1 = ENCODE_CHAR (charset, c);
+ return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
+}
+
+
+DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
+ doc: /* Return non-nil if OBJECT is a character. */)
+ (object, ignore)
+ Lisp_Object object, ignore;
+{
+ return (CHARACTERP (object) ? Qt : Qnil);
+}
+
+DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
+ doc: /* Return the character of the maximum code. */)
+ ()
+{
+ return make_number (MAX_CHAR);
+}
+
+DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
+ Sunibyte_char_to_multibyte, 1, 1, 0,
+ doc: /* Convert the unibyte character CH to multibyte character.
+The multibyte character is a result of decoding CH by
+the current unibyte charset (see `unibyte-charset'). */)
+ (ch)
+ Lisp_Object ch;
+{
+ int c;
+ struct charset *charset;
+
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ if (c >= 0400)
+ error ("Invalid unibyte character: %d", c);
+ charset = CHARSET_FROM_ID (charset_unibyte);
+ c = DECODE_CHAR (charset, c);
+ if (c < 0)
+ c = BYTE8_TO_CHAR (XFASTINT (ch));
+ return make_number (c);
+}
+
+DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
+ Smultibyte_char_to_unibyte, 1, 1, 0,
+ doc: /* Convert the multibyte character CH to unibyte character.\n\
+The unibyte character is a result of encoding CH by
+the current primary charset (value of `charset-primary'). */)
+ (ch)
+ Lisp_Object ch;
+{
+ int c;
+
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ c = CHAR_TO_BYTE8 (c);
+ return make_number (c);
+}
+
+DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
+ doc: /* Return 1 regardless of the argument CHAR.
+This is now an obsolete function. We keep it just for backward compatibility. */)
+ (ch)
+ Lisp_Object ch;
+{
+ CHECK_CHARACTER (ch);
+ return make_number (1);
+}
+
+DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
+ doc: /* Return width of CHAR when displayed in the current buffer.
+The width is measured by how many columns it occupies on the screen.
+Tab is taken to occupy `tab-width' columns. */)
+ (ch)
+ Lisp_Object ch;
+{
+ Lisp_Object disp;
+ int c, width;
+ struct Lisp_Char_Table *dp = buffer_display_table ();
+
+ CHECK_CHARACTER (ch);
+ c = XINT (ch);
+
+ /* Get the way the display table would display it. */
+ disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
+
+ if (VECTORP (disp))
+ width = ASIZE (disp);
+ else
+ width = CHAR_WIDTH (c);
+
+ return make_number (width);
+}
+
+/* Return width of string STR of length LEN when displayed in the
+ current buffer. The width is measured by how many columns it
+ occupies on the screen. If PRECISION > 0, return the width of
+ longest substring that doesn't exceed PRECISION, and set number of
+ characters and bytes of the substring in *NCHARS and *NBYTES
+ respectively. */
+
+int
+c_string_width (str, len, precision, nchars, nbytes)
+ const unsigned char *str;
+ int precision, *nchars, *nbytes;
+{
+ int i = 0, i_byte = 0;
+ int width = 0;
+ struct Lisp_Char_Table *dp = buffer_display_table ();
+
+ while (i_byte < len)
+ {
+ int bytes, thiswidth;
+ Lisp_Object val;
+ int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+
+ if (dp)
+ {
+ val = DISP_CHAR_VECTOR (dp, c);
+ if (VECTORP (val))
+ thiswidth = XVECTOR (val)->size;
+ else
+ thiswidth = CHAR_WIDTH (c);
+ }
+ else
+ {
+ thiswidth = CHAR_WIDTH (c);
+ }
+
+ if (precision > 0
+ && (width + thiswidth > precision))
+ {
+ *nchars = i;
+ *nbytes = i_byte;
+ return width;
+ }
+ i++;
+ i_byte += bytes;
+ width += thiswidth;
+ }
+
+ if (precision > 0)
+ {
+ *nchars = i;
+ *nbytes = i_byte;
+ }
+
+ return width;
+}
+
+/* Return width of string STR of length LEN when displayed in the
+ current buffer. The width is measured by how many columns it
+ occupies on the screen. */
+
+int
+strwidth (str, len)
+ unsigned char *str;
+ int len;
+{
+ return c_string_width (str, len, -1, NULL, NULL);
+}
+
+/* Return width of Lisp string STRING when displayed in the current
+ buffer. The width is measured by how many columns it occupies on
+ the screen while paying attention to compositions. If PRECISION >
+ 0, return the width of longest substring that doesn't exceed
+ PRECISION, and set number of characters and bytes of the substring
+ in *NCHARS and *NBYTES respectively. */
+
+int
+lisp_string_width (string, precision, nchars, nbytes)
+ Lisp_Object string;
+ int precision, *nchars, *nbytes;
+{
+ int len = SCHARS (string);
+ unsigned char *str = SDATA (string);
+ int i = 0, i_byte = 0;
+ int width = 0;
+ struct Lisp_Char_Table *dp = buffer_display_table ();
+
+ while (i < len)
+ {
+ int chars, bytes, thiswidth;
+ Lisp_Object val;
+ int cmp_id;
+ int ignore, end;
+
+ if (find_composition (i, -1, &ignore, &end, &val, string)
+ && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
+ >= 0))
+ {
+ thiswidth = composition_table[cmp_id]->width;
+ chars = end - i;
+ bytes = string_char_to_byte (string, end) - i_byte;
+ }
+ else if (dp)
+ {
+ int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+
+ chars = 1;
+ val = DISP_CHAR_VECTOR (dp, c);
+ if (VECTORP (val))
+ thiswidth = XVECTOR (val)->size;
+ else
+ thiswidth = CHAR_WIDTH (c);
+ }
+ else
+ {
+ int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+
+ chars = 1;
+ thiswidth = CHAR_WIDTH (c);
+ }
+
+ if (precision > 0
+ && (width + thiswidth > precision))
+ {
+ *nchars = i;
+ *nbytes = i_byte;
+ return width;
+ }
+ i += chars;
+ i_byte += bytes;
+ width += thiswidth;
+ }
+
+ if (precision > 0)
+ {
+ *nchars = i;
+ *nbytes = i_byte;
+ }
+
+ return width;
+}
+
+DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
+ doc: /* Return width of STRING when displayed in the current buffer.
+Width is measured by how many columns it occupies on the screen.
+When calculating width of a multibyte character in STRING,
+only the base leading-code is considered; the validity of
+the following bytes is not checked. Tabs in STRING are always
+taken to occupy `tab-width' columns. */)
+ (str)
+ Lisp_Object str;
+{
+ Lisp_Object val;
+
+ CHECK_STRING (str);
+ XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
+ return val;
+}
+
+DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
+ doc: /* Return the direction of CHAR.
+The returned value is 0 for left-to-right and 1 for right-to-left. */)
+ (ch)
+ Lisp_Object ch;
+{
+ int c;
+
+ CHECK_CHARACTER (ch);
+ c = XINT (ch);
+ return CHAR_TABLE_REF (Vchar_direction_table, c);
+}
+
+DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
+ doc: /* Return number of characters between BEG and END.
+This is now an obsolete function. We keep it just for backward compatibility. */)
+ (beg, end)
+ Lisp_Object beg, end;
+{
+ int from, to;
+
+ CHECK_NUMBER_COERCE_MARKER (beg);
+ CHECK_NUMBER_COERCE_MARKER (end);
+
+ from = min (XFASTINT (beg), XFASTINT (end));
+ to = max (XFASTINT (beg), XFASTINT (end));
+
+ return make_number (to - from);
+}
+
+/* Return the number of characters in the NBYTES bytes at PTR.
+ This works by looking at the contents and checking for multibyte
+ sequences while assuming that there's no invalid sequence.
+ However, if the current buffer has enable-multibyte-characters =
+ nil, we treat each byte as a character. */
+
+int
+chars_in_text (ptr, nbytes)
+ const unsigned char *ptr;
+ int nbytes;
+{
+ /* current_buffer is null at early stages of Emacs initialization. */
+ if (current_buffer == 0
+ || NILP (current_buffer->enable_multibyte_characters))
+ return nbytes;
+
+ return multibyte_chars_in_text (ptr, nbytes);
+}
+
+/* Return the number of characters in the NBYTES bytes at PTR.
+ This works by looking at the contents and checking for multibyte
+ sequences while assuming that there's no invalid sequence. It
+ ignores enable-multibyte-characters. */
+
+int
+multibyte_chars_in_text (ptr, nbytes)
+ const unsigned char *ptr;
+ int nbytes;
+{
+ const unsigned char *endp = ptr + nbytes;
+ int chars = 0;
+
+ while (ptr < endp)
+ {
+ int len = MULTIBYTE_LENGTH (ptr, endp);
+
+ if (len == 0)
+ abort ();
+ ptr += len;
+ chars++;
+ }
+
+ return chars;
+}
+
+/* Parse unibyte text at STR of LEN bytes as a multibyte text, count
+ characters and bytes in it, and store them in *NCHARS and *NBYTES
+ respectively. On counting bytes, pay attention to that 8-bit
+ characters not constructing a valid multibyte sequence are
+ represented by 2-byte in a multibyte text. */
+
+void
+parse_str_as_multibyte (str, len, nchars, nbytes)
+ const unsigned char *str;
+ int len, *nchars, *nbytes;
+{
+ const unsigned char *endp = str + len;
+ int n, chars = 0, bytes = 0;
+
+ if (len >= MAX_MULTIBYTE_LENGTH)
+ {
+ const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+ while (str < adjusted_endp)
+ {
+ if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
+ str += n, bytes += n;
+ else
+ str++, bytes += 2;
+ chars++;
+ }
+ }
+ while (str < endp)
+ {
+ if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
+ str += n, bytes += n;
+ else
+ str++, bytes += 2;
+ chars++;
+ }
+
+ *nchars = chars;
+ *nbytes = bytes;
+ return;
+}
+
+/* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
+ It actually converts only such 8-bit characters that don't contruct
+ a multibyte sequence to multibyte forms of Latin-1 characters. If
+ NCHARS is nonzero, set *NCHARS to the number of characters in the
+ text. It is assured that we can use LEN bytes at STR as a work
+ area and that is enough. Return the number of bytes of the
+ resulting text. */
+
+int
+str_as_multibyte (str, len, nbytes, nchars)
+ unsigned char *str;
+ int len, nbytes, *nchars;
+{
+ unsigned char *p = str, *endp = str + nbytes;
+ unsigned char *to;
+ int chars = 0;
+ int n;
+
+ if (nbytes >= MAX_MULTIBYTE_LENGTH)
+ {
+ unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+ while (p < adjusted_endp
+ && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
+ p += n, chars++;
+ }
+ while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
+ p += n, chars++;
+ if (nchars)
+ *nchars = chars;
+ if (p == endp)
+ return nbytes;
+
+ to = p;
+ nbytes = endp - p;
+ endp = str + len;
+ safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
+ p = endp - nbytes;
+
+ if (nbytes >= MAX_MULTIBYTE_LENGTH)
+ {
+ unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+ while (p < adjusted_endp)
+ {
+ if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
+ {
+ while (n--)
+ *to++ = *p++;
+ }
+ else
+ {
+ int c = *p++;
+ c = BYTE8_TO_CHAR (c);
+ to += CHAR_STRING (c, to);
+ }
+ }
+ chars++;
+ }
+ while (p < endp)
+ {
+ if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
+ {
+ while (n--)
+ *to++ = *p++;
+ }
+ else
+ {
+ int c = *p++;
+ c = BYTE8_TO_CHAR (c);
+ to += CHAR_STRING (c, to);
+ }
+ chars++;
+ }
+ if (nchars)
+ *nchars = chars;
+ return (to - str);
+}
+
+/* Parse unibyte string at STR of LEN bytes, and return the number of
+ bytes it may ocupy when converted to multibyte string by
+ `str_to_multibyte'. */
+
+int
+parse_str_to_multibyte (str, len)
+ unsigned char *str;
+ int len;
+{
+ unsigned char *endp = str + len;
+ int bytes;
+
+ for (bytes = 0; str < endp; str++)
+ bytes += (*str < 0x80) ? 1 : 2;
+ return bytes;
+}
+
+
+/* Convert unibyte text at STR of NBYTES bytes to a multibyte text
+ that contains the same single-byte characters. It actually
+ converts all 8-bit characters to multibyte forms. It is assured
+ that we can use LEN bytes at STR as a work area and that is
+ enough. */
+
+int
+str_to_multibyte (str, len, bytes)
+ unsigned char *str;
+ int len, bytes;
+{
+ unsigned char *p = str, *endp = str + bytes;
+ unsigned char *to;
+
+ while (p < endp && *p < 0x80) p++;
+ if (p == endp)
+ return bytes;
+ to = p;
+ bytes = endp - p;
+ endp = str + len;
+ safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
+ p = endp - bytes;
+ while (p < endp)
+ {
+ int c = *p++;
+
+ if (c >= 0x80)
+ c = BYTE8_TO_CHAR (c);
+ to += CHAR_STRING (c, to);
+ }
+ return (to - str);
+}
+
+/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
+ actually converts characters in the range 0x80..0xFF to
+ unibyte. */
+
+int
+str_as_unibyte (str, bytes)
+ unsigned char *str;
+ int bytes;
+{
+ const unsigned char *p = str, *endp = str + bytes;
+ unsigned char *to;
+ int c, len;
+
+ while (p < endp)
+ {
+ c = *p;
+ len = BYTES_BY_CHAR_HEAD (c);
+ if (CHAR_BYTE8_HEAD_P (c))
+ break;
+ p += len;
+ }
+ to = str + (p - str);
+ while (p < endp)
+ {
+ c = *p;
+ len = BYTES_BY_CHAR_HEAD (c);
+ if (CHAR_BYTE8_HEAD_P (c))
+ {
+ c = STRING_CHAR_ADVANCE (p);
+ *to++ = CHAR_TO_BYTE8 (c);
+ }
+ else
+ {
+ while (len--) *to++ = *p++;
+ }
+ }
+ return (to - str);
+}
+
+int
+string_count_byte8 (string)
+ Lisp_Object string;
+{
+ int multibyte = STRING_MULTIBYTE (string);
+ int nbytes = SBYTES (string);
+ unsigned char *p = SDATA (string);
+ unsigned char *pend = p + nbytes;
+ int count = 0;
+ int c, len;
+
+ if (multibyte)
+ while (p < pend)
+ {
+ c = *p;
+ len = BYTES_BY_CHAR_HEAD (c);
+
+ if (CHAR_BYTE8_HEAD_P (c))
+ count++;
+ p += len;
+ }
+ else
+ while (p < pend)
+ {
+ if (*p++ >= 0x80)
+ count++;
+ }
+ return count;
+}
+
+
+Lisp_Object
+string_escape_byte8 (string)
+ Lisp_Object string;
+{
+ int nchars = SCHARS (string);
+ int nbytes = SBYTES (string);
+ int multibyte = STRING_MULTIBYTE (string);
+ int byte8_count;
+ const unsigned char *src, *src_end;
+ unsigned char *dst;
+ Lisp_Object val;
+ int c, len;
+
+ if (multibyte && nchars == nbytes)
+ return string;
+
+ byte8_count = string_count_byte8 (string);
+
+ if (byte8_count == 0)
+ return string;
+
+ if (multibyte)
+ /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
+ val = make_uninit_multibyte_string (nchars + byte8_count * 3,
+ nbytes + byte8_count * 2);
+ else
+ /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
+ val = make_uninit_string (nbytes + byte8_count * 3);
+
+ src = SDATA (string);
+ src_end = src + nbytes;
+ dst = SDATA (val);
+ if (multibyte)
+ while (src < src_end)
+ {
+ c = *src;
+ len = BYTES_BY_CHAR_HEAD (c);
+
+ if (CHAR_BYTE8_HEAD_P (c))
+ {
+ c = STRING_CHAR_ADVANCE (src);
+ c = CHAR_TO_BYTE8 (c);
+ sprintf ((char *) dst, "\\%03o", c);
+ dst += 4;
+ }
+ else
+ while (len--) *dst++ = *src++;
+ }
+ else
+ while (src < src_end)
+ {
+ c = *src++;
+ if (c >= 0x80)
+ {
+ sprintf ((char *) dst, "\\%03o", c);
+ dst += 4;
+ }
+ else
+ *dst++ = c;
+ }
+ return val;
+}
+
+
+DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
+ doc: /*
+Concatenate all the argument characters and make the result a string.
+usage: (string &rest CHARACTERS) */)
+ (n, args)
+ int n;
+ Lisp_Object *args;
+{
+ int i;
+ unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
+ unsigned char *p = buf;
+ int c;
+
+ for (i = 0; i < n; i++)
+ {
+ CHECK_CHARACTER (args[i]);
+ c = XINT (args[i]);
+ p += CHAR_STRING (c, p);
+ }
+
+ return make_string_from_bytes ((char *) buf, n, p - buf);
+}
+
+void
+init_character_once ()
+{
+}
+
+#ifdef emacs
+
+void
+syms_of_character ()
+{
+ DEFSYM (Qcharacterp, "characterp");
+ DEFSYM (Qauto_fill_chars, "auto-fill-chars");
+
+ staticpro (&Vchar_unify_table);
+ Vchar_unify_table = Qnil;
+
+ defsubr (&Smax_char);
+ defsubr (&Scharacterp);
+ defsubr (&Sunibyte_char_to_multibyte);
+ defsubr (&Smultibyte_char_to_unibyte);
+ defsubr (&Schar_bytes);
+ defsubr (&Schar_width);
+ defsubr (&Sstring_width);
+ defsubr (&Schar_direction);
+ defsubr (&Schars_in_region);
+ defsubr (&Sstring);
+
+ DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
+ doc: /*
+Vector recording all translation tables ever defined.
+Each element is a pair (SYMBOL . TABLE) relating the table to the
+symbol naming it. The ID of a translation table is an index into this vector. */);
+ Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
+
+ DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
+ doc: /*
+A char-table for characters which invoke auto-filling.
+Such characters have value t in this table. */);
+ Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
+ CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
+ CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
+
+ DEFVAR_LISP ("char-width-table", &Vchar_width_table,
+ doc: /*
+A char-table for width (columns) of each character. */);
+ Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
+ char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4));
+ char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
+ make_number (4));
+
+ DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
+ doc: /* A char-table for direction of each character. */);
+ Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
+
+ DEFVAR_LISP ("printable-chars", &Vprintable_chars,
+ doc: /* A char-table for each printable character. */);
+ Vprintable_chars = Fmake_char_table (Qnil, Qnil);
+ Fset_char_table_range (Vprintable_chars,
+ Fcons (make_number (32), make_number (126)), Qt);
+ Fset_char_table_range (Vprintable_chars,
+ Fcons (make_number (160),
+ make_number (MAX_5_BYTE_CHAR)), Qt);
+
+ DEFVAR_LISP ("char-script-table", &Vchar_script_table,
+ doc: /* Char table of script symbols.
+It has one extra slot whose value is a list of script symbols. */);
+
+ /* Intern this now in case it isn't already done.
+ Setting this variable twice is harmless.
+ But don't staticpro it here--that is done in alloc.c. */
+ Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ DEFSYM (Qchar_script_table, "char-script-table");
+ Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
+ Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
+}
+
+#endif /* emacs */
diff --git a/src/character.h b/src/character.h
new file mode 100644
index 00000000000..db9de8c3d5b
--- /dev/null
+++ b/src/character.h
@@ -0,0 +1,652 @@
+/* Header for multibyte character handler.
+ Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifndef EMACS_CHARACTER_H
+#define EMACS_CHARACTER_H
+
+/* character code 1st byte byte sequence
+ -------------- -------- -------------
+ 0-7F 00..7F 0xxxxxxx
+ 80-7FF C2..DF 110xxxxx 10xxxxxx
+ 800-FFFF E0..EF 1110xxxx 10xxxxxx 10xxxxxx
+ 10000-1FFFFF F0..F7 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ 200000-3FFF7F F8 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ invalid F9..FF
+
+ raw-8-bit
+ 3FFF80-3FFFFF C0..C1 1100000x 10xxxxxx
+*/
+
+/* Maximum character code ((1 << CHARACTERBITS) - 1). */
+#define MAX_CHAR 0x3FFFFF
+
+/* Maximum Unicode character code. */
+#define MAX_UNICODE_CHAR 0x10FFFF
+
+/* Maximum N-byte character codes. */
+#define MAX_1_BYTE_CHAR 0x7F
+#define MAX_2_BYTE_CHAR 0x7FF
+#define MAX_3_BYTE_CHAR 0xFFFF
+#define MAX_4_BYTE_CHAR 0x1FFFFF
+#define MAX_5_BYTE_CHAR 0x3FFF7F
+
+/* Leading code range of Latin-1 chars. */
+#define LEADING_CODE_LATIN_1_MIN 0xC2
+#define LEADING_CODE_LATIN_1_MAX 0xC3
+
+/* Nonzero iff C is a character that corresponds to a raw 8-bit
+ byte. */
+#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
+
+/* Return the character code for raw 8-bit byte BYTE. */
+#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
+
+/* Return the raw 8-bit byte for character C. */
+#define CHAR_TO_BYTE8(c) \
+ (CHAR_BYTE8_P (c) \
+ ? (c) - 0x3FFF00 \
+ : multibyte_char_to_unibyte (c, Qnil))
+
+/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
+ that corresponds to a raw 8-bit byte. */
+#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
+
+/* Mapping table from unibyte chars to multibyte chars. */
+extern int unibyte_to_multibyte_table[256];
+
+/* Convert the unibyte character C to the corresponding multibyte
+ character. If C can't be converted, return C. */
+#define unibyte_char_to_multibyte(c) \
+ ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
+
+/* If C is not ASCII, make it unibyte. */
+#define MAKE_CHAR_UNIBYTE(c) \
+ do { \
+ if (! ASCII_CHAR_P (c)) \
+ c = CHAR_TO_BYTE8 (c); \
+ } while (0)
+
+
+/* If C is not ASCII, make it multibyte. It assumes C < 256. */
+#define MAKE_CHAR_MULTIBYTE(c) ((c) = unibyte_to_multibyte_table[(c)])
+
+/* This is the maximum byte length of multibyte form. */
+#define MAX_MULTIBYTE_LENGTH 5
+
+/* Return a Lisp character whose character code is C. */
+#define make_char(c) make_number (c)
+
+/* Nonzero iff C is an ASCII byte. */
+#define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)
+
+/* Nonzero iff X is a character. */
+#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
+
+/* Nonzero iff C is valid as a character code. GENERICP is not used
+ now. */
+#define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR)
+
+/* Check if Lisp object X is a character or not. */
+#define CHECK_CHARACTER(x) \
+ do { \
+ if (! CHARACTERP(x)) x = wrong_type_argument (Qcharacterp, (x)); \
+ } while (0)
+
+#define CHECK_CHARACTER_CAR(x) \
+ do { \
+ Lisp_Object tmp = XCAR (x); \
+ CHECK_CHARACTER (tmp); \
+ XSETCAR ((x), tmp); \
+ } while (0)
+
+#define CHECK_CHARACTER_CDR(x) \
+ do { \
+ Lisp_Object tmp = XCDR (x); \
+ CHECK_CHARACTER (tmp); \
+ XSETCDR ((x), tmp); \
+ } while (0)
+
+/* Nonzero iff C is an ASCII character. */
+#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
+
+/* Nonzero iff C is a character of code less than 0x100. */
+#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
+
+/* Nonzero if character C has a printable glyph. */
+#define CHAR_PRINTABLE_P(c) \
+ (((c) >= 32 && ((c) < 127) \
+ || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))
+
+/* Return byte length of multibyte form for character C. */
+#define CHAR_BYTES(c) \
+ ( (c) <= MAX_1_BYTE_CHAR ? 1 \
+ : (c) <= MAX_2_BYTE_CHAR ? 2 \
+ : (c) <= MAX_3_BYTE_CHAR ? 3 \
+ : (c) <= MAX_4_BYTE_CHAR ? 4 \
+ : (c) <= MAX_5_BYTE_CHAR ? 5 \
+ : 2)
+
+
+/* Return the leading code of multibyte form of C. */
+#define CHAR_LEADING_CODE(c) \
+ ((c) <= MAX_1_BYTE_CHAR ? c \
+ : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6)) \
+ : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12)) \
+ : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18)) \
+ : (c) <= MAX_5_BYTE_CHAR ? 0xF8 \
+ : (0xC0 | (((c) >> 6) & 0x01)))
+
+
+/* Store multibyte form of the character C in P. The caller should
+ allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
+ Returns the length of the multibyte form. */
+
+#define CHAR_STRING(c, p) \
+ ((unsigned) (c) <= MAX_1_BYTE_CHAR \
+ ? ((p)[0] = (c), \
+ 1) \
+ : (unsigned) (c) <= MAX_2_BYTE_CHAR \
+ ? ((p)[0] = (0xC0 | ((c) >> 6)), \
+ (p)[1] = (0x80 | ((c) & 0x3F)), \
+ 2) \
+ : (unsigned) (c) <= MAX_3_BYTE_CHAR \
+ ? ((p)[0] = (0xE0 | ((c) >> 12)), \
+ (p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \
+ (p)[2] = (0x80 | ((c) & 0x3F)), \
+ 3) \
+ : char_string (c, p))
+
+/* Store multibyte form of byte B in P. The caller should allocate at
+ least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the
+ length of the multibyte form. */
+
+#define BYTE8_STRING(b, p) \
+ ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)), \
+ (p)[1] = (0x80 | ((c) & 0x3F)), \
+ 2)
+
+
+/* Store multibyte form of the character C in P. The caller should
+ allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
+ And, advance P to the end of the multibyte form. */
+
+#define CHAR_STRING_ADVANCE(c, p) \
+ do { \
+ if ((c) <= MAX_1_BYTE_CHAR) \
+ *(p)++ = (c); \
+ else if ((c) <= MAX_2_BYTE_CHAR) \
+ *(p)++ = (0xC0 | ((c) >> 6)), \
+ *(p)++ = (0x80 | ((c) & 0x3F)); \
+ else if ((c) <= MAX_3_BYTE_CHAR) \
+ *(p)++ = (0xE0 | ((c) >> 12)), \
+ *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
+ *(p)++ = (0x80 | ((c) & 0x3F)); \
+ else \
+ (p) += char_string ((c), (p)); \
+ } while (0)
+
+
+/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
+ form. */
+#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
+
+/* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
+ multibyte form. */
+#define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
+
+/* Nonzero iff BYTE starts a character in a multibyte form.
+ This is equivalent to:
+ (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte)) */
+#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
+
+/* Just kept for backward compatibility. This macro will be removed
+ in the future. */
+#define BASE_LEADING_CODE_P LEADING_CODE_P
+
+/* How many bytes a character that starts with BYTE occupies in a
+ multibyte form. */
+#define BYTES_BY_CHAR_HEAD(byte) \
+ (!((byte) & 0x80) ? 1 \
+ : !((byte) & 0x20) ? 2 \
+ : !((byte) & 0x10) ? 3 \
+ : !((byte) & 0x08) ? 4 \
+ : 5)
+
+
+/* Return the length of the multi-byte form at string STR of length
+ LEN while assuming that STR points a valid multi-byte form. As
+ this macro isn't necessary anymore, all callers will be changed to
+ use BYTES_BY_CHAR_HEAD directly in the future. */
+
+#define MULTIBYTE_FORM_LENGTH(str, len) \
+ BYTES_BY_CHAR_HEAD (*(str))
+
+/* Parse multibyte string STR of length LENGTH and set BYTES to the
+ byte length of a character at STR while assuming that STR points a
+ valid multibyte form. As this macro isn't necessary anymore, all
+ callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
+ future. */
+
+#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
+ (bytes) = BYTES_BY_CHAR_HEAD (*(str))
+
+/* The byte length of multibyte form at unibyte string P ending at
+ PEND. If STR doesn't point a valid multibyte form, return 0. */
+
+#define MULTIBYTE_LENGTH(p, pend) \
+ (p >= pend ? 0 \
+ : !((p)[0] & 0x80) ? 1 \
+ : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0 \
+ : ((p)[0] & 0xE0) == 0xC0 ? 2 \
+ : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0 \
+ : ((p)[0] & 0xF0) == 0xE0 ? 3 \
+ : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0 \
+ : ((p)[0] & 0xF8) == 0xF0 ? 4 \
+ : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0 \
+ : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
+ : 0)
+
+
+/* Like MULTIBYTE_LENGTH but don't check the ending address. */
+
+#define MULTIBYTE_LENGTH_NO_CHECK(p) \
+ (!((p)[0] & 0x80) ? 1 \
+ : ((p)[1] & 0xC0) != 0x80 ? 0 \
+ : ((p)[0] & 0xE0) == 0xC0 ? 2 \
+ : ((p)[2] & 0xC0) != 0x80 ? 0 \
+ : ((p)[0] & 0xF0) == 0xE0 ? 3 \
+ : ((p)[3] & 0xC0) != 0x80 ? 0 \
+ : ((p)[0] & 0xF8) == 0xF0 ? 4 \
+ : ((p)[4] & 0xC0) != 0x80 ? 0 \
+ : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
+ : 0)
+
+/* If P is before LIMIT, advance P to the next character boundary. It
+ assumes that P is already at a character boundary of the sane
+ mulitbyte form whose end address is LIMIT. */
+
+#define NEXT_CHAR_BOUNDARY(p, limit) \
+ do { \
+ if ((p) < (limit)) \
+ (p) += BYTES_BY_CHAR_HEAD (*(p)); \
+ } while (0)
+
+
+/* If P is after LIMIT, advance P to the previous character boundary.
+ It assumes that P is already at a character boundary of the sane
+ mulitbyte form whose beginning address is LIMIT. */
+
+#define PREV_CHAR_BOUNDARY(p, limit) \
+ do { \
+ if ((p) > (limit)) \
+ { \
+ const unsigned char *p0 = (p); \
+ do { \
+ p0--; \
+ } while (p0 >= limit && ! CHAR_HEAD_P (*p0)); \
+ (p) = (BYTES_BY_CHAR_HEAD (*p0) == (p) - p0) ? p0 : (p) - 1; \
+ } \
+ } while (0)
+
+/* Return the character code of character whose multibyte form is at
+ P. The argument LEN is ignored. It will be removed in the
+ future. */
+
+#define STRING_CHAR(p, len) \
+ (!((p)[0] & 0x80) \
+ ? (p)[0] \
+ : ! ((p)[0] & 0x20) \
+ ? (((((p)[0] & 0x1F) << 6) \
+ | ((p)[1] & 0x3F)) \
+ + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \
+ : ! ((p)[0] & 0x10) \
+ ? ((((p)[0] & 0x0F) << 12) \
+ | (((p)[1] & 0x3F) << 6) \
+ | ((p)[2] & 0x3F)) \
+ : string_char ((p), NULL, NULL))
+
+
+/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
+ form. The argument LEN is ignored. It will be removed in the
+ future. */
+
+#define STRING_CHAR_AND_LENGTH(p, len, actual_len) \
+ (!((p)[0] & 0x80) \
+ ? ((actual_len) = 1, (p)[0]) \
+ : ! ((p)[0] & 0x20) \
+ ? ((actual_len) = 2, \
+ (((((p)[0] & 0x1F) << 6) \
+ | ((p)[1] & 0x3F)) \
+ + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \
+ : ! ((p)[0] & 0x10) \
+ ? ((actual_len) = 3, \
+ ((((p)[0] & 0x0F) << 12) \
+ | (((p)[1] & 0x3F) << 6) \
+ | ((p)[2] & 0x3F))) \
+ : string_char ((p), NULL, &actual_len))
+
+
+/* Like STRING_CHAR but advacen P to the end of multibyte form. */
+
+#define STRING_CHAR_ADVANCE(p) \
+ (!((p)[0] & 0x80) \
+ ? *(p)++ \
+ : ! ((p)[0] & 0x20) \
+ ? ((p) += 2, \
+ ((((p)[-2] & 0x1F) << 6) \
+ | ((p)[-1] & 0x3F) \
+ | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
+ : ! ((p)[0] & 0x10) \
+ ? ((p) += 3, \
+ ((((p)[-3] & 0x0F) << 12) \
+ | (((p)[-2] & 0x3F) << 6) \
+ | ((p)[-1] & 0x3F))) \
+ : string_char ((p), &(p), NULL))
+
+
+/* Fetch the "next" character from Lisp string STRING at byte position
+ BYTEIDX, character position CHARIDX. Store it into OUTPUT.
+
+ All the args must be side-effect-free.
+ BYTEIDX and CHARIDX must be lvalues;
+ we increment them past the character fetched. */
+
+#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ CHARIDX++; \
+ if (STRING_MULTIBYTE (STRING)) \
+ { \
+ unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ int len; \
+ \
+ OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ } \
+ else \
+ OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
+ } \
+ else
+
+/* Like FETCH_STRING_CHAR_ADVANCE */
+
+#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ CHARIDX++; \
+ if (STRING_MULTIBYTE (STRING)) \
+ { \
+ unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ int len; \
+ \
+ OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ } \
+ else \
+ { \
+ OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
+ MAKE_CHAR_MULTIBYTE (OUTPUT); \
+ } \
+ } \
+ else
+
+
+/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte. */
+
+#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ int len; \
+ \
+ OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ CHARIDX++; \
+ } \
+ else
+
+
+/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
+ buffer. */
+
+#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ CHARIDX++; \
+ if (!NILP (current_buffer->enable_multibyte_characters)) \
+ { \
+ unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
+ int len; \
+ \
+ OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ } \
+ else \
+ { \
+ OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
+ BYTEIDX++; \
+ } \
+ } \
+ else
+
+
+/* Like FETCH_CHAR_ADVANCE but assumes STRING is multibyte. */
+
+#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
+ int len; \
+ \
+ OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ CHARIDX++; \
+ } \
+ else
+
+
+/* Increase the buffer byte position POS_BYTE of the current buffer to
+ the next character boundary. No range checking of POS. */
+
+#define INC_POS(pos_byte) \
+ do { \
+ unsigned char *p = BYTE_POS_ADDR (pos_byte); \
+ pos_byte += BYTES_BY_CHAR_HEAD (*p); \
+ } while (0)
+
+
+/* Decrease the buffer byte position POS_BYTE of the current buffer to
+ the previous character boundary. No range checking of POS. */
+
+#define DEC_POS(pos_byte) \
+ do { \
+ unsigned char *p; \
+ \
+ pos_byte--; \
+ if (pos_byte < GPT_BYTE) \
+ p = BEG_ADDR + pos_byte - 1; \
+ else \
+ p = BEG_ADDR + GAP_SIZE + pos_byte - 1; \
+ while (!CHAR_HEAD_P (*p)) \
+ { \
+ p--; \
+ pos_byte--; \
+ } \
+ } while (0)
+
+/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
+
+#define INC_BOTH(charpos, bytepos) \
+ do \
+ { \
+ (charpos)++; \
+ if (NILP (current_buffer->enable_multibyte_characters)) \
+ (bytepos)++; \
+ else \
+ INC_POS ((bytepos)); \
+ } \
+ while (0)
+
+
+/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
+
+#define DEC_BOTH(charpos, bytepos) \
+ do \
+ { \
+ (charpos)--; \
+ if (NILP (current_buffer->enable_multibyte_characters)) \
+ (bytepos)--; \
+ else \
+ DEC_POS ((bytepos)); \
+ } \
+ while (0)
+
+
+/* Increase the buffer byte position POS_BYTE of the current buffer to
+ the next character boundary. This macro relies on the fact that
+ *GPT_ADDR and *Z_ADDR are always accessible and the values are
+ '\0'. No range checking of POS_BYTE. */
+
+#define BUF_INC_POS(buf, pos_byte) \
+ do { \
+ unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
+ pos_byte += BYTES_BY_CHAR_HEAD (*p); \
+ } while (0)
+
+
+/* Decrease the buffer byte position POS_BYTE of the current buffer to
+ the previous character boundary. No range checking of POS_BYTE. */
+
+#define BUF_DEC_POS(buf, pos_byte) \
+ do { \
+ unsigned char *p; \
+ pos_byte--; \
+ if (pos_byte < BUF_GPT_BYTE (buf)) \
+ p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
+ else \
+ p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
+ while (!CHAR_HEAD_P (*p)) \
+ { \
+ p--; \
+ pos_byte--; \
+ } \
+ } while (0)
+
+
+#define MAYBE_UNIFY_CHAR(c) \
+ if (c > MAX_UNICODE_CHAR \
+ && CHAR_TABLE_P (Vchar_unify_table)) \
+ { \
+ Lisp_Object val; \
+ int unified; \
+ \
+ val = CHAR_TABLE_REF (Vchar_unify_table, c); \
+ if (! NILP (val)) \
+ { \
+ if (SYMBOLP (val)) \
+ { \
+ Funify_charset (val, Qnil, Qnil); \
+ val = CHAR_TABLE_REF (Vchar_unify_table, c); \
+ } \
+ if ((unified = XINT (val)) >= 0) \
+ c = unified; \
+ } \
+ } \
+ else
+
+
+/* Return the width of ASCII character C. The width is measured by
+ how many columns occupied on the screen when displayed in the
+ current buffer. */
+
+#define ASCII_CHAR_WIDTH(c) \
+ (c < 0x20 \
+ ? (c == '\t' \
+ ? XFASTINT (current_buffer->tab_width) \
+ : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
+ : (c < 0x7f \
+ ? 1 \
+ : ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
+
+/* Return the width of character C. The width is measured by how many
+ columns occupied on the screen when displayed in the current
+ buffer. */
+
+#define CHAR_WIDTH(c) \
+ (ASCII_CHAR_P (c) \
+ ? ASCII_CHAR_WIDTH (c) \
+ : XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
+
+extern int char_resolve_modifier_mask P_ ((int));
+extern int char_string P_ ((int, unsigned char *));
+extern int string_char P_ ((const unsigned char *,
+ const unsigned char **, int *));
+
+extern int translate_char P_ ((Lisp_Object, int c));
+extern int char_printable_p P_ ((int c));
+extern void parse_str_as_multibyte P_ ((const unsigned char *, int, int *,
+ int *));
+extern int parse_str_to_multibyte P_ ((unsigned char *, int));
+extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
+extern int str_to_multibyte P_ ((unsigned char *, int, int));
+extern int str_as_unibyte P_ ((unsigned char *, int));
+extern int strwidth P_ ((unsigned char *, int));
+extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *));
+extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
+
+extern Lisp_Object Vprintable_chars;
+
+extern Lisp_Object Qcharacterp, Qauto_fill_chars;
+extern Lisp_Object Vtranslation_table_vector;
+extern Lisp_Object Vchar_width_table;
+extern Lisp_Object Vchar_direction_table;
+extern Lisp_Object Vchar_unify_table;
+
+extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object));
+
+/* Return a translation table of id number ID. */
+#define GET_TRANSLATION_TABLE(id) \
+ (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
+
+/* A char-table for characters which may invoke auto-filling. */
+extern Lisp_Object Vauto_fill_chars;
+
+extern Lisp_Object Vchar_script_table;
+
+/* Copy LEN bytes from FROM to TO. This macro should be used only
+ when a caller knows that LEN is short and the obvious copy loop is
+ faster than calling bcopy which has some overhead. Copying a
+ multibyte sequence of a character is the typical case. */
+
+#define BCOPY_SHORT(from, to, len) \
+ do { \
+ int i = len; \
+ unsigned char *from_p = from, *to_p = to; \
+ while (i--) *to_p++ = *from_p++; \
+ } while (0)
+
+#define DEFSYM(sym, name) \
+ do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
+
+#endif /* EMACS_CHARACTER_H */
diff --git a/src/charset.c b/src/charset.c
index af5c6ff7068..19c75538340 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -1,7 +1,10 @@
-/* Basic multilingual character support.
- Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+/* Basic character set support.
+ Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
Copyright (C) 2001 Free Software Foundation, Inc.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -20,717 +23,1251 @@ along with GNU Emacs; see the file COPYING. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
-/* At first, see the document in `charset.h' to understand the code in
- this file. */
-
-#ifdef emacs
#include <config.h>
-#endif
#include <stdio.h>
-
-#ifdef emacs
-
+#include <unistd.h>
+#include <ctype.h>
#include <sys/types.h>
#include "lisp.h"
-#include "buffer.h"
+#include "character.h"
#include "charset.h"
-#include "composite.h"
#include "coding.h"
#include "disptab.h"
+#include "buffer.h"
-#else /* not emacs */
+/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
-#include "mulelib.h"
+ A coded character set ("charset" hereafter) is a meaningful
+ collection (i.e. language, culture, functionality, etc.) of
+ characters. Emacs handles multiple charsets at once. In Emacs Lisp
+ code, a charset is represented by a symbol. In C code, a charset is
+ represented by its ID number or by a pointer to a struct charset.
-#endif /* emacs */
+ The actual information about each charset is stored in two places.
+ Lispy information is stored in the hash table Vcharset_hash_table as
+ a vector (charset attributes). The other information is stored in
+ charset_table as a struct charset.
-Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
-Lisp_Object Qunknown;
+*/
-/* Declaration of special leading-codes. */
-EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
-EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
-EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
-EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
+/* List of all charsets. This variable is used only from Emacs
+ Lisp. */
+Lisp_Object Vcharset_list;
-/* Declaration of special charsets. The values are set by
- Fsetup_special_charsets. */
-int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
-int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
-int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
-int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
-int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
-int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
-int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
+/* Hash table that contains attributes of each charset. Keys are
+ charset symbols, and values are vectors of charset attributes. */
+Lisp_Object Vcharset_hash_table;
-Lisp_Object Qcharset_table;
+/* Table of struct charset. */
+struct charset *charset_table;
-/* A char-table containing information of each character set. */
-Lisp_Object Vcharset_table;
+static int charset_table_size;
+int charset_table_used;
-/* A vector of charset symbol indexed by charset-id. This is used
- only for returning charset symbol from C functions. */
-Lisp_Object Vcharset_symbol_table;
+Lisp_Object Qcharsetp;
-/* A list of charset symbols ever defined. */
-Lisp_Object Vcharset_list;
+/* Special charset symbols. */
+Lisp_Object Qascii;
+Lisp_Object Qeight_bit;
+Lisp_Object Qiso_8859_1;
+Lisp_Object Qunicode;
-/* Vector of translation table ever defined.
- ID of a translation table is used to index this vector. */
-Lisp_Object Vtranslation_table_vector;
+/* The corresponding charsets. */
+int charset_ascii;
+int charset_eight_bit;
+int charset_iso_8859_1;
+int charset_unicode;
-/* A char-table for characters which may invoke auto-filling. */
-Lisp_Object Vauto_fill_chars;
+/* The other special charsets. */
+int charset_jisx0201_roman;
+int charset_jisx0208_1978;
+int charset_jisx0208;
-Lisp_Object Qauto_fill_chars;
+/* Value of charset attribute `charset-iso-plane'. */
+Lisp_Object Qgl, Qgr;
-/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
-int bytes_by_char_head[256];
-int width_by_char_head[256];
+/* Charset of unibyte characters. */
+int charset_unibyte;
-/* Mapping table from ISO2022's charset (specified by DIMENSION,
- CHARS, and FINAL-CHAR) to Emacs' charset. */
-int iso_charset_table[2][2][128];
+/* List of charsets ordered by the priority. */
+Lisp_Object Vcharset_ordered_list;
+
+/* Incremented everytime we change Vcharset_ordered_list. This is
+ unsigned short so that it fits in Lisp_Int and never matches
+ -1. */
+unsigned short charset_ordered_list_tick;
-/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
-unsigned char *_fetch_multibyte_char_p;
-int _fetch_multibyte_char_len;
+/* List of iso-2022 charsets. */
+Lisp_Object Viso_2022_charset_list;
-/* Offset to add to a non-ASCII value when inserting it. */
-EMACS_INT nonascii_insert_offset;
+/* List of emacs-mule charsets. */
+Lisp_Object Vemacs_mule_charset_list;
-/* Translation table for converting non-ASCII unibyte characters
- to multibyte codes, or nil. */
-Lisp_Object Vnonascii_translation_table;
+struct charset *emacs_mule_charset[256];
+
+/* Mapping table from ISO2022's charset (specified by DIMENSION,
+ CHARS, and FINAL-CHAR) to Emacs' charset. */
+int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
+
+Lisp_Object Vcharset_map_directory;
+
+Lisp_Object Vchar_unified_charset_table;
+
+/* Defined in chartab.c */
+extern void
+map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
+ Lisp_Object function, Lisp_Object table,
+ Lisp_Object arg, struct charset *charset,
+ unsigned from, unsigned to));
+
+#define CODE_POINT_TO_INDEX(charset, code) \
+ ((charset)->code_linear_p \
+ ? (code) - (charset)->min_code \
+ : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
+ && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
+ && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
+ && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
+ ? (((((code) >> 24) - (charset)->code_space[12]) \
+ * (charset)->code_space[11]) \
+ + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
+ * (charset)->code_space[7]) \
+ + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
+ * (charset)->code_space[3]) \
+ + (((code) & 0xFF) - (charset)->code_space[0]) \
+ - ((charset)->char_index_offset)) \
+ : -1)
+
+
+/* Convert the character index IDX to code-point CODE for CHARSET.
+ It is assumed that IDX is in a valid range. */
+
+#define INDEX_TO_CODE_POINT(charset, idx) \
+ ((charset)->code_linear_p \
+ ? (idx) + (charset)->min_code \
+ : (idx += (charset)->char_index_offset, \
+ (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
+ | (((charset)->code_space[4] \
+ + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
+ << 8) \
+ | (((charset)->code_space[8] \
+ + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
+ << 16) \
+ | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
+ << 24))))
-/* List of all possible generic characters. */
-Lisp_Object Vgeneric_character_list;
-void
-invalid_character (c)
- int c;
-{
- error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
-}
-/* Parse string STR of length LENGTH and fetch information of a
- character at STR. Set BYTES to the byte length the character
- occupies, CHARSET, C1, C2 to proper values of the character. */
-
-#define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
- do { \
- (c1) = *(str); \
- (bytes) = BYTES_BY_CHAR_HEAD (c1); \
- if ((bytes) == 1) \
- (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
- else if ((bytes) == 2) \
- { \
- if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
- (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
- else \
- (charset) = (c1), (c1) = (str)[1] & 0x7F; \
- } \
- else if ((bytes) == 3) \
- { \
- if ((c1) < LEADING_CODE_PRIVATE_11) \
- (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
- else \
- (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
- } \
- else \
- (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
- } while (0)
-
-/* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
- Note that this intentionally allows invalid components, such
- as 0xA0 0xA0, because there exist many files that contain
- such invalid byte sequences, especially in EUC-GB. */
-#define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
- ((charset) == CHARSET_ASCII \
- ? ((c1) >= 0 && (c1) <= 0x7F) \
- : ((charset) == CHARSET_8_BIT_CONTROL \
- ? ((c1) >= 0x80 && (c1) <= 0x9F) \
- : ((charset) == CHARSET_8_BIT_GRAPHIC \
- ? ((c1) >= 0x80 && (c1) <= 0xFF) \
- : (CHARSET_DIMENSION (charset) == 1 \
- ? ((c1) >= 0x20 && (c1) <= 0x7F) \
- : ((c1) >= 0x20 && (c1) <= 0x7F \
- && (c2) >= 0x20 && (c2) <= 0x7F)))))
-
-/* Store multi-byte form of the character C in STR. The caller should
- allocate at least 4-byte area at STR in advance. Returns the
- length of the multi-byte form. If C is an invalid character code,
- return -1. */
+/* Set to 1 to warn that a charset map is loaded and thus a buffer
+ text and a string data may be relocated. */
+int charset_map_loaded;
-int
-char_to_string_1 (c, str)
- int c;
- unsigned char *str;
+struct charset_map_entries
{
- unsigned char *p = str;
+ struct {
+ unsigned from, to;
+ int c;
+ } entry[0x10000];
+ struct charset_map_entries *next;
+};
+
+/* Load the mapping information for CHARSET from ENTRIES.
+
+ If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
+
+ If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
+ CHARSET->decoder, and CHARSET->encoder.
+
+ If CONTROL_FLAG is 2, setup CHARSET->deunifier and
+ Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
+ setup it too. */
+
+static void
+load_charset_map (charset, entries, n_entries, control_flag)
+ struct charset *charset;
+ struct charset_map_entries *entries;
+ int n_entries;
+ int control_flag;
+{
+ Lisp_Object vec, table;
+ unsigned max_code = CHARSET_MAX_CODE (charset);
+ int ascii_compatible_p = charset->ascii_compatible_p;
+ int min_char, max_char, nonascii_min_char;
+ int i;
+ unsigned char *fast_map = charset->fast_map;
- if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
+ if (n_entries <= 0)
+ return;
+
+ if (control_flag > 0)
{
- /* Multibyte character can't have a modifier bit. */
- if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
- return -1;
+ int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
+
+ table = Fmake_char_table (Qnil, Qnil);
+ if (control_flag == 1)
+ vec = Fmake_vector (make_number (n), make_number (-1));
+ else if (! CHAR_TABLE_P (Vchar_unify_table))
+ Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
+
+ charset_map_loaded = 1;
+ }
- /* For Meta, Shift, and Control modifiers, we need special care. */
- if (c & CHAR_META)
+ min_char = max_char = entries->entry[0].c;
+ nonascii_min_char = MAX_CHAR;
+ for (i = 0; i < n_entries; i++)
+ {
+ unsigned from, to;
+ int from_index, to_index;
+ int from_c, to_c;
+ int idx = i % 0x10000;
+
+ if (i > 0 && idx == 0)
+ entries = entries->next;
+ from = entries->entry[idx].from;
+ to = entries->entry[idx].to;
+ from_c = entries->entry[idx].c;
+ from_index = CODE_POINT_TO_INDEX (charset, from);
+ if (from == to)
{
- /* Move the meta bit to the right place for a string. */
- c = (c & ~CHAR_META) | 0x80;
+ to_index = from_index;
+ to_c = from_c;
}
- if (c & CHAR_SHIFT)
+ else
{
- /* Shift modifier is valid only with [A-Za-z]. */
- if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
- c &= ~CHAR_SHIFT;
- else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
- c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+ to_index = CODE_POINT_TO_INDEX (charset, to);
+ to_c = from_c + (to_index - from_index);
}
- if (c & CHAR_CTL)
+ if (from_index < 0 || to_index < 0)
+ continue;
+
+ if (control_flag < 2)
{
- /* Simulate the code in lread.c. */
- /* Allow `\C- ' and `\C-?'. */
- if (c == (CHAR_CTL | ' '))
- c = 0;
- else if (c == (CHAR_CTL | '?'))
- c = 127;
- /* ASCII control chars are made from letters (both cases),
- as well as the non-letters within 0100...0137. */
- else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
- c &= (037 | (~0177 & ~CHAR_CTL));
- else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
- c &= (037 | (~0177 & ~CHAR_CTL));
+ int c;
+
+ if (to_c > max_char)
+ max_char = to_c;
+ else if (from_c < min_char)
+ min_char = from_c;
+ if (ascii_compatible_p)
+ {
+ if (! ASCII_BYTE_P (from_c))
+ {
+ if (from_c < nonascii_min_char)
+ nonascii_min_char = from_c;
+ }
+ else if (! ASCII_BYTE_P (to_c))
+ {
+ nonascii_min_char = 0x80;
+ }
+ }
+
+ for (c = from_c; c <= to_c; c++)
+ CHARSET_FAST_MAP_SET (c, fast_map);
+
+ if (control_flag == 1)
+ {
+ unsigned code = from;
+
+ if (CHARSET_COMPACT_CODES_P (charset))
+ while (1)
+ {
+ ASET (vec, from_index, make_number (from_c));
+ if (NILP (CHAR_TABLE_REF (table, from_c)))
+ CHAR_TABLE_SET (table, from_c, make_number (code));
+ if (from_index == to_index)
+ break;
+ from_index++, from_c++;
+ code = INDEX_TO_CODE_POINT (charset, from_index);
+ }
+ else
+ for (; from_index <= to_index; from_index++, from_c++)
+ {
+ ASET (vec, from_index, make_number (from_c));
+ if (NILP (CHAR_TABLE_REF (table, from_c)))
+ CHAR_TABLE_SET (table, from_c, make_number (from_index));
+ }
+ }
}
+ else
+ {
+ unsigned code = from;
- /* If C still has any modifier bits, just ignore it. */
- c &= ~CHAR_MODIFIER_MASK;
+ while (1)
+ {
+ int c1 = DECODE_CHAR (charset, code);
+
+ if (c1 >= 0)
+ {
+ CHAR_TABLE_SET (table, from_c, make_number (c1));
+ CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
+ if (CHAR_TABLE_P (Vchar_unified_charset_table))
+ CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
+ CHARSET_NAME (charset));
+ }
+ if (from_index == to_index)
+ break;
+ from_index++, from_c++;
+ code = INDEX_TO_CODE_POINT (charset, from_index);
+ }
+ }
}
- if (SINGLE_BYTE_CHAR_P (c))
+ if (control_flag < 2)
{
- if (ASCII_BYTE_P (c) || c >= 0xA0)
- *p++ = c;
- else
+ CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
+ ? nonascii_min_char : min_char);
+ CHARSET_MAX_CHAR (charset) = max_char;
+ if (control_flag == 1)
{
- *p++ = LEADING_CODE_8_BIT_CONTROL;
- *p++ = c + 0x20;
+ CHARSET_DECODER (charset) = vec;
+ CHARSET_ENCODER (charset) = table;
}
}
- else if (CHAR_VALID_P (c, 0))
+ else
+ CHARSET_DEUNIFIER (charset) = table;
+}
+
+
+/* Read a hexadecimal number (preceded by "0x") from the file FP while
+ paying attention to comment charcter '#'. */
+
+static INLINE unsigned
+read_hex (fp, eof)
+ FILE *fp;
+ int *eof;
+{
+ int c;
+ unsigned n;
+
+ while ((c = getc (fp)) != EOF)
{
- int charset, c1, c2;
-
- SPLIT_CHAR (c, charset, c1, c2);
-
- if (charset >= LEADING_CODE_EXT_11)
- *p++ = (charset < LEADING_CODE_EXT_12
- ? LEADING_CODE_PRIVATE_11
- : (charset < LEADING_CODE_EXT_21
- ? LEADING_CODE_PRIVATE_12
- : (charset < LEADING_CODE_EXT_22
- ? LEADING_CODE_PRIVATE_21
- : LEADING_CODE_PRIVATE_22)));
- *p++ = charset;
- if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
- return -1;
- if (c1)
+ if (c == '#')
+ {
+ while ((c = getc (fp)) != EOF && c != '\n');
+ }
+ else if (c == '0')
{
- *p++ = c1 | 0x80;
- if (c2 > 0)
- *p++ = c2 | 0x80;
+ if ((c = getc (fp)) == EOF || c == 'x')
+ break;
}
}
+ if (c == EOF)
+ {
+ *eof = 1;
+ return 0;
+ }
+ *eof = 0;
+ n = 0;
+ if (c == 'x')
+ while ((c = getc (fp)) != EOF && isxdigit (c))
+ n = ((n << 4)
+ | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
else
- return -1;
-
- return (p - str);
+ while ((c = getc (fp)) != EOF && isdigit (c))
+ n = (n * 10) + c - '0';
+ if (c != EOF)
+ ungetc (c, fp);
+ return n;
}
-/* Store multi-byte form of the character C in STR. The caller should
- allocate at least 4-byte area at STR in advance. Returns the
- length of the multi-byte form. If C is an invalid character code,
- signal an error.
+/* Return a mapping vector for CHARSET loaded from MAPFILE.
+ Each line of MAPFILE has this form
+ 0xAAAA 0xCCCC
+ where 0xAAAA is a code-point and 0xCCCC is the corresponding
+ character code, or this form
+ 0xAAAA-0xBBBB 0xCCCC
+ where 0xAAAA and 0xBBBB are code-points specifying a range, and
+ 0xCCCC is the first character code of the range.
- Use macro `CHAR_STRING (C, STR)' instead of calling this function
- directly if C can be an ASCII character. */
+ The returned vector has this form:
+ [ CODE1 CHAR1 CODE2 CHAR2 .... ]
+ where CODE1 is a code-point or a cons of code-points specifying a
+ range. */
-int
-char_to_string (c, str)
- int c;
- unsigned char *str;
+extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
+
+static void
+load_charset_map_from_file (charset, mapfile, control_flag)
+ struct charset *charset;
+ Lisp_Object mapfile;
+ int control_flag;
{
- int len;
- len = char_to_string_1 (c, str);
- if (len == -1)
- invalid_character (c);
- return len;
-}
+ unsigned min_code = CHARSET_MIN_CODE (charset);
+ unsigned max_code = CHARSET_MAX_CODE (charset);
+ int fd;
+ FILE *fp;
+ int eof;
+ Lisp_Object suffixes;
+ struct charset_map_entries *head, *entries;
+ int n_entries;
+
+ suffixes = Fcons (build_string (".map"),
+ Fcons (build_string (".TXT"), Qnil));
+
+ fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
+ NULL, Qnil);
+ if (fd < 0
+ || ! (fp = fdopen (fd, "r")))
+ {
+ add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
+ return;
+ }
+ head = entries = ((struct charset_map_entries *)
+ alloca (sizeof (struct charset_map_entries)));
+ n_entries = 0;
+ eof = 0;
+ while (1)
+ {
+ unsigned from, to;
+ int c;
+ int idx;
-/* Return the non-ASCII character corresponding to multi-byte form at
- STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
- length of the multibyte form in *ACTUAL_LEN.
+ from = read_hex (fp, &eof);
+ if (eof)
+ break;
+ if (getc (fp) == '-')
+ to = read_hex (fp, &eof);
+ else
+ to = from;
+ c = (int) read_hex (fp, &eof);
- Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
- this function directly if you want ot handle ASCII characters as
- well. */
+ if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
+ continue;
-int
-string_to_char (str, len, actual_len)
- const unsigned char *str;
- int len, *actual_len;
-{
- int c, bytes, charset, c1, c2;
+ if (n_entries > 0 && (n_entries % 0x10000) == 0)
+ {
+ entries->next = ((struct charset_map_entries *)
+ alloca (sizeof (struct charset_map_entries)));
+ entries = entries->next;
+ }
+ idx = n_entries % 0x10000;
+ entries->entry[idx].from = from;
+ entries->entry[idx].to = to;
+ entries->entry[idx].c = c;
+ n_entries++;
+ }
+ fclose (fp);
+ close (fd);
- SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
- c = MAKE_CHAR (charset, c1, c2);
- if (actual_len)
- *actual_len = bytes;
- return c;
+ load_charset_map (charset, head, n_entries, control_flag);
}
-/* Return the length of the multi-byte form at string STR of length LEN.
- Use the macro MULTIBYTE_FORM_LENGTH instead. */
-int
-multibyte_form_length (str, len)
- const unsigned char *str;
- int len;
+static void
+load_charset_map_from_vector (charset, vec, control_flag)
+ struct charset *charset;
+ Lisp_Object vec;
+ int control_flag;
{
- int bytes;
+ unsigned min_code = CHARSET_MIN_CODE (charset);
+ unsigned max_code = CHARSET_MAX_CODE (charset);
+ struct charset_map_entries *head, *entries;
+ int n_entries;
+ int len = ASIZE (vec);
+ int i;
- PARSE_MULTIBYTE_SEQ (str, len, bytes);
- return bytes;
-}
+ if (len % 2 == 1)
+ {
+ add_to_log ("Failure in loading charset map: %V", vec, Qnil);
+ return;
+ }
-/* Check multibyte form at string STR of length LEN and set variables
- pointed by CHARSET, C1, and C2 to charset and position codes of the
- character at STR, and return 0. If there's no multibyte character,
- return -1. This should be used only in the macro SPLIT_STRING
- which checks range of STR in advance. */
+ head = entries = ((struct charset_map_entries *)
+ alloca (sizeof (struct charset_map_entries)));
+ n_entries = 0;
+ for (i = 0; i < len; i += 2)
+ {
+ Lisp_Object val, val2;
+ unsigned from, to;
+ int c;
+ int idx;
-int
-split_string (str, len, charset, c1, c2)
- const unsigned char *str;
- unsigned char *c1, *c2;
- int len, *charset;
-{
- register int bytes, cs, code1, code2 = -1;
+ val = AREF (vec, i);
+ if (CONSP (val))
+ {
+ val2 = XCDR (val);
+ val = XCAR (val);
+ CHECK_NATNUM (val);
+ CHECK_NATNUM (val2);
+ from = XFASTINT (val);
+ to = XFASTINT (val2);
+ }
+ else
+ {
+ CHECK_NATNUM (val);
+ from = to = XFASTINT (val);
+ }
+ val = AREF (vec, i + 1);
+ CHECK_NATNUM (val);
+ c = XFASTINT (val);
- SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
- if (cs == CHARSET_ASCII)
- return -1;
- *charset = cs;
- *c1 = code1;
- *c2 = code2;
- return 0;
+ if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
+ continue;
+
+ if ((n_entries % 0x10000) == 0)
+ {
+ entries->next = ((struct charset_map_entries *)
+ alloca (sizeof (struct charset_map_entries)));
+ entries = entries->next;
+ }
+ idx = n_entries % 0x10000;
+ entries->entry[idx].from = from;
+ entries->entry[idx].to = to;
+ entries->entry[idx].c = c;
+ n_entries++;
+ }
+
+ load_charset_map (charset, head, n_entries, control_flag);
}
-/* Return 1 iff character C has valid printable glyph.
- Use the macro CHAR_PRINTABLE_P instead. */
-int
-char_printable_p (c)
- int c;
+static void
+load_charset (charset)
+ struct charset *charset;
{
- int charset, c1, c2;
-
- if (ASCII_BYTE_P (c))
- return 1;
- else if (SINGLE_BYTE_CHAR_P (c))
- return 0;
- else if (c >= MAX_CHAR)
- return 0;
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
+ {
+ Lisp_Object map;
- SPLIT_CHAR (c, charset, c1, c2);
- if (! CHARSET_DEFINED_P (charset))
- return 0;
- if (CHARSET_CHARS (charset) == 94
- ? c1 <= 32 || c1 >= 127
- : c1 < 32)
- return 0;
- if (CHARSET_DIMENSION (charset) == 2
- && (CHARSET_CHARS (charset) == 94
- ? c2 <= 32 || c2 >= 127
- : c2 < 32))
- return 0;
- return 1;
+ map = CHARSET_MAP (charset);
+ if (STRINGP (map))
+ load_charset_map_from_file (charset, map, 1);
+ else
+ load_charset_map_from_vector (charset, map, 1);
+ CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
+ }
}
-/* Translate character C by translation table TABLE. If C
- is negative, translate a character specified by CHARSET, C1, and C2
- (C1 and C2 are code points of the character). If no translation is
- found in TABLE, return C. */
-int
-translate_char (table, c, charset, c1, c2)
- Lisp_Object table;
- int c, charset, c1, c2;
+
+DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
+ doc: /* Return non-nil if and only if OBJECT is a charset.*/)
+ (object)
+ Lisp_Object object;
{
- Lisp_Object ch;
- int alt_charset, alt_c1, alt_c2, dimension;
-
- if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
- if (!CHAR_TABLE_P (table)
- || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
- return c;
-
- SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
- dimension = CHARSET_DIMENSION (alt_charset);
- if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
- /* CH is not a generic character, just return it. */
- return XFASTINT (ch);
-
- /* Since CH is a generic character, we must return a specific
- charater which has the same position codes as C from CH. */
- if (charset < 0)
- SPLIT_CHAR (c, charset, c1, c2);
- if (dimension != CHARSET_DIMENSION (charset))
- /* We can't make such a character because of dimension mismatch. */
- return c;
- return MAKE_CHAR (alt_charset, c1, c2);
+ return (CHARSETP (object) ? Qt : Qnil);
}
-/* Convert the unibyte character C to multibyte based on
- Vnonascii_translation_table or nonascii_insert_offset. If they can't
- convert C to a valid multibyte character, convert it based on
- DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
-int
-unibyte_char_to_multibyte (c)
- int c;
+void
+map_charset_chars (c_function, function, arg,
+ charset, from, to)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object));
+ Lisp_Object function, arg;
+ struct charset *charset;
+ unsigned from, to;
{
- if (c < 0400 && c >= 0200)
+ Lisp_Object range;
+ int partial;
+
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
+ load_charset (charset);
+
+ partial = (from > CHARSET_MIN_CODE (charset)
+ || to < CHARSET_MAX_CODE (charset));
+
+ if (CHARSET_UNIFIED_P (charset)
+ && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
{
- int c_save = c;
+ map_char_table_for_charset (c_function, function,
+ CHARSET_DEUNIFIER (charset), arg,
+ partial ? charset : NULL, from, to);
+ }
- if (! NILP (Vnonascii_translation_table))
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
+ {
+ int from_idx = CODE_POINT_TO_INDEX (charset, from);
+ int to_idx = CODE_POINT_TO_INDEX (charset, to);
+ int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
+ int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
+
+ range = Fcons (make_number (from_c), make_number (to_c));
+ if (NILP (function))
+ (*c_function) (range, arg);
+ else
+ call2 (function, range, arg);
+ }
+ else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
+ {
+ if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
+ return;
+ if (CHARSET_ASCII_COMPATIBLE_P (charset) && from <= 127)
{
- c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
- if (c >= 0400 && ! char_valid_p (c, 0))
- c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
+ range = Fcons (make_number (from), make_number (to));
+ if (to >= 128)
+ XSETCAR (range, make_number (127));
+
+ if (NILP (function))
+ (*c_function) (range, arg);
+ else
+ call2 (function, range, arg);
}
- else if (c >= 0240 && nonascii_insert_offset > 0)
+ map_char_table_for_charset (c_function, function,
+ CHARSET_ENCODER (charset), arg,
+ partial ? charset : NULL, from, to);
+ }
+ else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
+ {
+ Lisp_Object subset_info;
+ int offset;
+
+ subset_info = CHARSET_SUBSET (charset);
+ charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
+ offset = XINT (AREF (subset_info, 3));
+ from -= offset;
+ if (from < XFASTINT (AREF (subset_info, 1)))
+ from = XFASTINT (AREF (subset_info, 1));
+ to -= offset;
+ if (to > XFASTINT (AREF (subset_info, 2)))
+ to = XFASTINT (AREF (subset_info, 2));
+ map_charset_chars (c_function, function, arg, charset, from, to);
+ }
+ else /* i.e. CHARSET_METHOD_SUPERSET */
+ {
+ Lisp_Object parents;
+
+ for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
+ parents = XCDR (parents))
{
- c += nonascii_insert_offset;
- if (c < 0400 || ! char_valid_p (c, 0))
- c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
+ int offset;
+ unsigned this_from, this_to;
+
+ charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
+ offset = XINT (XCDR (XCAR (parents)));
+ this_from = from - offset;
+ this_to = to - offset;
+ if (this_from < CHARSET_MIN_CODE (charset))
+ this_from = CHARSET_MIN_CODE (charset);
+ if (this_to > CHARSET_MAX_CODE (charset))
+ this_to = CHARSET_MAX_CODE (charset);
+ map_charset_chars (c_function, function, arg, charset, from, to);
}
- else if (c >= 0240)
- c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
}
- return c;
}
+DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
+ doc: /* Call FUNCTION for all characters in CHARSET.
+FUNCTION is called with an argument RANGE and the optional 3rd
+argument ARG.
-/* Convert the multibyte character C to unibyte 8-bit character based
- on Vnonascii_translation_table or nonascii_insert_offset. If
- REV_TBL is non-nil, it should be a reverse table of
- Vnonascii_translation_table, i.e. what given by:
- Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
+RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
+characters contained in CHARSET.
-int
-multibyte_char_to_unibyte (c, rev_tbl)
- int c;
- Lisp_Object rev_tbl;
+The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
+range of code points of target characters. */)
+ (function, charset, arg, from_code, to_code)
+ Lisp_Object function, charset, arg, from_code, to_code;
{
- if (!SINGLE_BYTE_CHAR_P (c))
+ struct charset *cs;
+ unsigned from, to;
+
+ CHECK_CHARSET_GET_CHARSET (charset, cs);
+ if (NILP (from_code))
+ from = CHARSET_MIN_CODE (cs);
+ else
+ {
+ CHECK_NATNUM (from_code);
+ from = XINT (from_code);
+ if (from < CHARSET_MIN_CODE (cs))
+ from = CHARSET_MIN_CODE (cs);
+ }
+ if (NILP (to_code))
+ to = CHARSET_MAX_CODE (cs);
+ else
+ {
+ CHECK_NATNUM (to_code);
+ to = XINT (to_code);
+ if (to > CHARSET_MAX_CODE (cs))
+ to = CHARSET_MAX_CODE (cs);
+ }
+ map_charset_chars (NULL, function, arg, cs, from, to);
+ return Qnil;
+}
+
+
+/* Define a charset according to the arguments. The Nth argument is
+ the Nth attribute of the charset (the last attribute `charset-id'
+ is not included). See the docstring of `define-charset' for the
+ detail. */
+
+DEFUN ("define-charset-internal", Fdefine_charset_internal,
+ Sdefine_charset_internal, charset_arg_max, MANY, 0,
+ doc: /* For internal use only.
+usage: (define-charset-internal ...) */)
+ (nargs, args)
+ int nargs;
+ Lisp_Object *args;
+{
+ /* Charset attr vector. */
+ Lisp_Object attrs;
+ Lisp_Object val;
+ unsigned hash_code;
+ struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
+ int i, j;
+ struct charset charset;
+ int id;
+ int dimension;
+ int new_definition_p;
+ int nchars;
+
+ if (nargs != charset_arg_max)
+ return Fsignal (Qwrong_number_of_arguments,
+ Fcons (intern ("define-charset-internal"),
+ make_number (nargs)));
+
+ attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
+
+ CHECK_SYMBOL (args[charset_arg_name]);
+ ASET (attrs, charset_name, args[charset_arg_name]);
+
+ val = args[charset_arg_code_space];
+ for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
{
- int c_save = c;
+ int min_byte, max_byte;
+
+ min_byte = XINT (Faref (val, make_number (i * 2)));
+ max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
+ if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
+ error ("Invalid :code-space value");
+ charset.code_space[i * 4] = min_byte;
+ charset.code_space[i * 4 + 1] = max_byte;
+ charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
+ nchars *= charset.code_space[i * 4 + 2];
+ charset.code_space[i * 4 + 3] = nchars;
+ if (max_byte > 0)
+ dimension = i + 1;
+ }
- if (! CHAR_TABLE_P (rev_tbl)
- && CHAR_TABLE_P (Vnonascii_translation_table))
- rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
- make_number (0));
- if (CHAR_TABLE_P (rev_tbl))
+ val = args[charset_arg_dimension];
+ if (NILP (val))
+ charset.dimension = dimension;
+ else
+ {
+ CHECK_NATNUM (val);
+ charset.dimension = XINT (val);
+ if (charset.dimension < 1 || charset.dimension > 4)
+ args_out_of_range_3 (val, make_number (1), make_number (4));
+ }
+
+ charset.code_linear_p
+ = (charset.dimension == 1
+ || (charset.code_space[2] == 256
+ && (charset.dimension == 2
+ || (charset.code_space[6] == 256
+ && (charset.dimension == 3
+ || charset.code_space[10] == 256)))));
+
+ if (! charset.code_linear_p)
+ {
+ charset.code_space_mask = (unsigned char *) xmalloc (256);
+ bzero (charset.code_space_mask, 256);
+ for (i = 0; i < 4; i++)
+ for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
+ j++)
+ charset.code_space_mask[j] |= (1 << i);
+ }
+
+ charset.iso_chars_96 = charset.code_space[2] == 96;
+
+ charset.min_code = (charset.code_space[0]
+ | (charset.code_space[4] << 8)
+ | (charset.code_space[8] << 16)
+ | (charset.code_space[12] << 24));
+ charset.max_code = (charset.code_space[1]
+ | (charset.code_space[5] << 8)
+ | (charset.code_space[9] << 16)
+ | (charset.code_space[13] << 24));
+ charset.char_index_offset = 0;
+
+ val = args[charset_arg_min_code];
+ if (! NILP (val))
+ {
+ unsigned code;
+
+ if (INTEGERP (val))
+ code = XINT (val);
+ else
{
- Lisp_Object temp;
- temp = Faref (rev_tbl, make_number (c));
- if (INTEGERP (temp))
- c = XINT (temp);
- if (c >= 256)
- c = (c_save & 0177) + 0200;
+ CHECK_CONS (val);
+ CHECK_NUMBER_CAR (val);
+ CHECK_NUMBER_CDR (val);
+ code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
}
+ if (code < charset.min_code
+ || code > charset.max_code)
+ args_out_of_range_3 (make_number (charset.min_code),
+ make_number (charset.max_code), val);
+ charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
+ charset.min_code = code;
+ }
+
+ val = args[charset_arg_max_code];
+ if (! NILP (val))
+ {
+ unsigned code;
+
+ if (INTEGERP (val))
+ code = XINT (val);
else
{
- if (nonascii_insert_offset > 0)
- c -= nonascii_insert_offset;
- if (c < 128 || c >= 256)
- c = (c_save & 0177) + 0200;
+ CHECK_CONS (val);
+ CHECK_NUMBER_CAR (val);
+ CHECK_NUMBER_CDR (val);
+ code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
}
+ if (code < charset.min_code
+ || code > charset.max_code)
+ args_out_of_range_3 (make_number (charset.min_code),
+ make_number (charset.max_code), val);
+ charset.max_code = code;
}
- return c;
-}
+ charset.compact_codes_p = charset.max_code < 0x1000000;
-
-/* Update the table Vcharset_table with the given arguments (see the
- document of `define-charset' for the meaning of each argument).
- Several other table contents are also updated. The caller should
- check the validity of CHARSET-ID and the remaining arguments in
- advance. */
+ val = args[charset_arg_invalid_code];
+ if (NILP (val))
+ {
+ if (charset.min_code > 0)
+ charset.invalid_code = 0;
+ else
+ {
+ XSETINT (val, charset.max_code + 1);
+ if (XINT (val) == charset.max_code + 1)
+ charset.invalid_code = charset.max_code + 1;
+ else
+ error ("Attribute :invalid-code must be specified");
+ }
+ }
+ else
+ {
+ CHECK_NATNUM (val);
+ charset.invalid_code = XFASTINT (val);
+ }
-void
-update_charset_table (charset_id, dimension, chars, width, direction,
- iso_final_char, iso_graphic_plane,
- short_name, long_name, description)
- Lisp_Object charset_id, dimension, chars, width, direction;
- Lisp_Object iso_final_char, iso_graphic_plane;
- Lisp_Object short_name, long_name, description;
-{
- int charset = XINT (charset_id);
- int bytes;
- unsigned char leading_code_base, leading_code_ext;
-
- if (NILP (CHARSET_TABLE_ENTRY (charset)))
- CHARSET_TABLE_ENTRY (charset)
- = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
-
- if (NILP (long_name))
- long_name = short_name;
- if (NILP (description))
- description = long_name;
-
- /* Get byte length of multibyte form, base leading-code, and
- extended leading-code of the charset. See the comment under the
- title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
- bytes = XINT (dimension);
- if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
+ val = args[charset_arg_iso_final];
+ if (NILP (val))
+ charset.iso_final = -1;
+ else
{
- /* Official charset, it doesn't have an extended leading-code. */
- if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
- bytes += 1; /* For a base leading-code. */
- leading_code_base = charset;
- leading_code_ext = 0;
+ CHECK_NUMBER (val);
+ if (XINT (val) < '0' || XINT (val) > 127)
+ error ("Invalid iso-final-char: %d", XINT (val));
+ charset.iso_final = XINT (val);
}
+
+ val = args[charset_arg_iso_revision];
+ if (NILP (val))
+ charset.iso_revision = -1;
else
{
- /* Private charset. */
- bytes += 2; /* For base and extended leading-codes. */
- leading_code_base
- = (charset < LEADING_CODE_EXT_12
- ? LEADING_CODE_PRIVATE_11
- : (charset < LEADING_CODE_EXT_21
- ? LEADING_CODE_PRIVATE_12
- : (charset < LEADING_CODE_EXT_22
- ? LEADING_CODE_PRIVATE_21
- : LEADING_CODE_PRIVATE_22)));
- leading_code_ext = charset;
- if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
- error ("Invalid dimension for the charset-ID %d", charset);
+ CHECK_NUMBER (val);
+ if (XINT (val) > 63)
+ args_out_of_range (make_number (63), val);
+ charset.iso_revision = XINT (val);
}
- CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
- CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
- CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
- CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
- CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
- CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
- CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
- = make_number (leading_code_base);
- CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
- = make_number (leading_code_ext);
- CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
- CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
- = iso_graphic_plane;
- CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
- CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
- CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
- CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
+ val = args[charset_arg_emacs_mule_id];
+ if (NILP (val))
+ charset.emacs_mule_id = -1;
+ else
+ {
+ CHECK_NATNUM (val);
+ if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
+ error ("Invalid emacs-mule-id: %d", XINT (val));
+ charset.emacs_mule_id = XINT (val);
+ }
- {
- /* If we have already defined a charset which has the same
- DIMENSION, CHARS and ISO-FINAL-CHAR but the different
- DIRECTION, we must update the entry REVERSE-CHARSET of both
- charsets. If there's no such charset, the value of the entry
- is set to nil. */
- int i;
-
- for (i = 0; i <= MAX_CHARSET; i++)
- if (!NILP (CHARSET_TABLE_ENTRY (i)))
+ charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
+
+ charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
+
+ charset.unified_p = 0;
+
+ bzero (charset.fast_map, sizeof (charset.fast_map));
+
+ if (! NILP (args[charset_arg_code_offset]))
+ {
+ val = args[charset_arg_code_offset];
+ CHECK_NUMBER (val);
+
+ charset.method = CHARSET_METHOD_OFFSET;
+ charset.code_offset = XINT (val);
+
+ i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
+ charset.min_char = i + charset.code_offset;
+ i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
+ charset.max_char = i + charset.code_offset;
+ if (charset.max_char > MAX_CHAR)
+ error ("Unsupported max char: %d", charset.max_char);
+
+ i = (charset.min_char >> 7) << 7;
+ for (; i < 0x10000 && i <= charset.max_char; i += 128)
+ CHARSET_FAST_MAP_SET (i, charset.fast_map);
+ i = (i >> 12) << 12;
+ for (; i <= charset.max_char; i += 0x1000)
+ CHARSET_FAST_MAP_SET (i, charset.fast_map);
+ }
+ else if (! NILP (args[charset_arg_map]))
+ {
+ val = args[charset_arg_map];
+ ASET (attrs, charset_map, val);
+ if (STRINGP (val))
+ load_charset_map_from_file (&charset, val, 0);
+ else
+ load_charset_map_from_vector (&charset, val, 0);
+ charset.method = CHARSET_METHOD_MAP_DEFERRED;
+ }
+ else if (! NILP (args[charset_arg_subset]))
+ {
+ Lisp_Object parent;
+ Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
+ struct charset *parent_charset;
+
+ val = args[charset_arg_subset];
+ parent = Fcar (val);
+ CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
+ parent_min_code = Fnth (make_number (1), val);
+ CHECK_NATNUM (parent_min_code);
+ parent_max_code = Fnth (make_number (2), val);
+ CHECK_NATNUM (parent_max_code);
+ parent_code_offset = Fnth (make_number (3), val);
+ CHECK_NUMBER (parent_code_offset);
+ val = Fmake_vector (make_number (4), Qnil);
+ ASET (val, 0, make_number (parent_charset->id));
+ ASET (val, 1, parent_min_code);
+ ASET (val, 2, parent_max_code);
+ ASET (val, 3, parent_code_offset);
+ ASET (attrs, charset_subset, val);
+
+ charset.method = CHARSET_METHOD_SUBSET;
+ /* Here, we just copy the parent's fast_map. It's not accurate,
+ but at least it works for quickly detecting which character
+ DOESN'T belong to this charset. */
+ for (i = 0; i < 190; i++)
+ charset.fast_map[i] = parent_charset->fast_map[i];
+
+ /* We also copy these for parents. */
+ charset.min_char = parent_charset->min_char;
+ charset.max_char = parent_charset->max_char;
+ }
+ else if (! NILP (args[charset_arg_superset]))
+ {
+ val = args[charset_arg_superset];
+ charset.method = CHARSET_METHOD_SUPERSET;
+ val = Fcopy_sequence (val);
+ ASET (attrs, charset_superset, val);
+
+ charset.min_char = MAX_CHAR;
+ charset.max_char = 0;
+ for (; ! NILP (val); val = Fcdr (val))
{
- if (CHARSET_DIMENSION (i) == XINT (dimension)
- && CHARSET_CHARS (i) == XINT (chars)
- && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
- && CHARSET_DIRECTION (i) != XINT (direction))
+ Lisp_Object elt, car_part, cdr_part;
+ int this_id, offset;
+ struct charset *this_charset;
+
+ elt = Fcar (val);
+ if (CONSP (elt))
{
- CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
- = make_number (i);
- CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
- break;
+ car_part = XCAR (elt);
+ cdr_part = XCDR (elt);
+ CHECK_CHARSET_GET_ID (car_part, this_id);
+ CHECK_NUMBER (cdr_part);
+ offset = XINT (cdr_part);
}
+ else
+ {
+ CHECK_CHARSET_GET_ID (elt, this_id);
+ offset = 0;
+ }
+ XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
+
+ this_charset = CHARSET_FROM_ID (this_id);
+ if (charset.min_char > this_charset->min_char)
+ charset.min_char = this_charset->min_char;
+ if (charset.max_char < this_charset->max_char)
+ charset.max_char = this_charset->max_char;
+ for (i = 0; i < 190; i++)
+ charset.fast_map[i] |= this_charset->fast_map[i];
}
- if (i > MAX_CHARSET)
- /* No such a charset. */
- CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
- = make_number (-1);
- }
+ }
+ else
+ error ("None of :code-offset, :map, :parents are specified");
+
+ val = args[charset_arg_unify_map];
+ if (! NILP (val) && !STRINGP (val))
+ CHECK_VECTOR (val);
+ ASET (attrs, charset_unify_map, val);
- if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
- && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
+ CHECK_LIST (args[charset_arg_plist]);
+ ASET (attrs, charset_plist, args[charset_arg_plist]);
+
+ charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
+ &hash_code);
+ if (charset.hash_index >= 0)
{
- bytes_by_char_head[leading_code_base] = bytes;
- width_by_char_head[leading_code_base] = XINT (width);
-
- /* Update table emacs_code_class. */
- emacs_code_class[charset] = (bytes == 2
- ? EMACS_leading_code_2
- : (bytes == 3
- ? EMACS_leading_code_3
- : EMACS_leading_code_4));
+ new_definition_p = 0;
+ id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
+ HASH_VALUE (hash_table, charset.hash_index) = attrs;
+ }
+ else
+ {
+ charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
+ hash_code);
+ if (charset_table_used == charset_table_size)
+ {
+ struct charset *new_table
+ = (struct charset *) xmalloc (sizeof (struct charset)
+ * (charset_table_size + 16));
+ bcopy (charset_table, new_table,
+ sizeof (struct charset) * charset_table_size);
+ charset_table_size += 16;
+ charset_table = new_table;
+ }
+ id = charset_table_used++;
+ new_definition_p = 1;
+ }
+
+ ASET (attrs, charset_id, make_number (id));
+ charset.id = id;
+ charset_table[id] = charset;
+
+ if (charset.iso_final >= 0)
+ {
+ ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
+ charset.iso_final) = id;
+ if (new_definition_p)
+ Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
+ Fcons (make_number (id), Qnil));
+ if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
+ charset_jisx0201_roman = id;
+ else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
+ charset_jisx0208_1978 = id;
+ else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
+ charset_jisx0208 = id;
+ }
+
+ if (charset.emacs_mule_id >= 0)
+ {
+ emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
+ if (charset.emacs_mule_id < 0xA0)
+ emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
+ if (new_definition_p)
+ Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
+ Fcons (make_number (id), Qnil));
}
- /* Update table iso_charset_table. */
- if (XINT (iso_final_char) >= 0
- && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
- ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
+ if (new_definition_p)
+ {
+ Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
+ Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
+ Fcons (make_number (id), Qnil));
+ charset_ordered_list_tick++;
+ }
+
+ return Qnil;
}
-#ifdef emacs
-/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
- is invalid. */
-int
-get_charset_id (charset_symbol)
- Lisp_Object charset_symbol;
+/* Same as Fdefine_charset_internal but arguments are more convenient
+ to call from C (typically in syms_of_charset). This can define a
+ charset of `offset' method only. Return the ID of the new
+ charset. */
+
+static int
+define_charset_internal (name, dimension, code_space, min_code, max_code,
+ iso_final, iso_revision, emacs_mule_id,
+ ascii_compatible, supplementary,
+ code_offset)
+ Lisp_Object name;
+ int dimension;
+ unsigned char *code_space;
+ unsigned min_code, max_code;
+ int iso_final, iso_revision, emacs_mule_id;
+ int ascii_compatible, supplementary;
+ int code_offset;
{
+ Lisp_Object args[charset_arg_max];
+ Lisp_Object plist[14];
Lisp_Object val;
- int charset;
-
- /* This originally used a ?: operator, but reportedly the HP-UX
- compiler version HP92453-01 A.10.32.22 miscompiles that. */
- if (SYMBOLP (charset_symbol)
- && VECTORP (val = Fget (charset_symbol, Qcharset))
- && CHARSET_VALID_P (charset =
- XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
- return charset;
- else
- return -1;
+ int i;
+
+ args[charset_arg_name] = name;
+ args[charset_arg_dimension] = make_number (dimension);
+ val = Fmake_vector (make_number (8), make_number (0));
+ for (i = 0; i < 8; i++)
+ ASET (val, i, make_number (code_space[i]));
+ args[charset_arg_code_space] = val;
+ args[charset_arg_min_code] = make_number (min_code);
+ args[charset_arg_max_code] = make_number (max_code);
+ args[charset_arg_iso_final]
+ = (iso_final < 0 ? Qnil : make_number (iso_final));
+ args[charset_arg_iso_revision] = make_number (iso_revision);
+ args[charset_arg_emacs_mule_id]
+ = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
+ args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
+ args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
+ args[charset_arg_invalid_code] = Qnil;
+ args[charset_arg_code_offset] = make_number (code_offset);
+ args[charset_arg_map] = Qnil;
+ args[charset_arg_subset] = Qnil;
+ args[charset_arg_superset] = Qnil;
+ args[charset_arg_unify_map] = Qnil;
+
+ plist[0] = intern (":name");
+ plist[1] = args[charset_arg_name];
+ plist[2] = intern (":dimension");
+ plist[3] = args[charset_arg_dimension];
+ plist[4] = intern (":code-space");
+ plist[5] = args[charset_arg_code_space];
+ plist[6] = intern (":iso-final-char");
+ plist[7] = args[charset_arg_iso_final];
+ plist[8] = intern (":emacs-mule-id");
+ plist[9] = args[charset_arg_emacs_mule_id];
+ plist[10] = intern (":ascii-compatible-p");
+ plist[11] = args[charset_arg_ascii_compatible_p];
+ plist[12] = intern (":code-offset");
+ plist[13] = args[charset_arg_code_offset];
+
+ args[charset_arg_plist] = Flist (14, plist);
+ Fdefine_charset_internal (charset_arg_max, args);
+
+ return XINT (CHARSET_SYMBOL_ID (name));
}
-/* Return an identification number for a new private charset of
- DIMENSION and WIDTH. If there's no more room for the new charset,
- return 0. */
-Lisp_Object
-get_new_private_charset_id (dimension, width)
- int dimension, width;
+
+DEFUN ("define-charset-alias", Fdefine_charset_alias,
+ Sdefine_charset_alias, 2, 2, 0,
+ doc: /* Define ALIAS as an alias for charset CHARSET. */)
+ (alias, charset)
+ Lisp_Object alias, charset;
{
- int charset, from, to;
+ Lisp_Object attr;
- if (dimension == 1)
- {
- from = LEADING_CODE_EXT_11;
- to = LEADING_CODE_EXT_21;
- }
- else
+ CHECK_CHARSET_GET_ATTR (charset, attr);
+ Fputhash (alias, attr, Vcharset_hash_table);
+ Vcharset_list = Fcons (alias, Vcharset_list);
+ return Qnil;
+}
+
+
+DEFUN ("unibyte-charset", Funibyte_charset, Sunibyte_charset, 0, 0, 0,
+ doc: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
+ ()
+{
+ return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte));
+}
+
+
+DEFUN ("set-unibyte-charset", Fset_unibyte_charset, Sset_unibyte_charset,
+ 1, 1, 0,
+ doc: /* Set the unibyte charset to CHARSET.
+This determines how unibyte/multibyte conversion is done. See also
+function `unibyte-charset'. */)
+ (charset)
+ Lisp_Object charset;
+{
+ struct charset *cs;
+ int i, c;
+
+ CHECK_CHARSET_GET_CHARSET (charset, cs);
+ if (! cs->ascii_compatible_p
+ || cs->dimension != 1)
+ error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset)));
+ charset_unibyte = cs->id;
+ for (i = 128; i < 256; i++)
{
- from = LEADING_CODE_EXT_21;
- to = LEADING_CODE_EXT_MAX + 1;
+ c = DECODE_CHAR (cs, i);
+ unibyte_to_multibyte_table[i] = (c < 0 ? BYTE8_TO_CHAR (i) : c);
}
- for (charset = from; charset < to; charset++)
- if (!CHARSET_DEFINED_P (charset)) break;
+ return Qnil;
+}
+
- return make_number (charset < to ? charset : 0);
+DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
+ doc: /* Return the property list of CHARSET. */)
+ (charset)
+ Lisp_Object charset;
+{
+ Lisp_Object attrs;
+
+ CHECK_CHARSET_GET_ATTR (charset, attrs);
+ return CHARSET_ATTR_PLIST (attrs);
}
-DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
- doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
-If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
- treated as a private charset.
-INFO-VECTOR is a vector of the format:
- [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
- SHORT-NAME LONG-NAME DESCRIPTION]
-The meanings of each elements is as follows:
-DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
-CHARS (integer) is the number of characters in a dimension: 94 or 96.
-WIDTH (integer) is the number of columns a character in the charset
-occupies on the screen: one of 0, 1, and 2.
-
-DIRECTION (integer) is the rendering direction of characters in the
-charset when rendering. If 0, render from left to right, else
-render from right to left.
-
-ISO-FINAL-CHAR (character) is the final character of the
-corresponding ISO 2022 charset.
-It may be -1 if the charset is internal use only.
-
-ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
-while encoding to variants of ISO 2022 coding system, one of the
-following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
-It may be -1 if the charset is internal use only.
-
-SHORT-NAME (string) is the short name to refer to the charset.
-
-LONG-NAME (string) is the long name to refer to the charset.
-
-DESCRIPTION (string) is the description string of the charset. */)
- (charset_id, charset_symbol, info_vector)
- Lisp_Object charset_id, charset_symbol, info_vector;
+
+DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
+ doc: /* Set CHARSET's property list to PLIST. */)
+ (charset, plist)
+ Lisp_Object charset, plist;
{
- Lisp_Object *vec;
+ Lisp_Object attrs;
+
+ CHECK_CHARSET_GET_ATTR (charset, attrs);
+ CHARSET_ATTR_PLIST (attrs) = plist;
+ return plist;
+}
- if (!NILP (charset_id))
- CHECK_NUMBER (charset_id);
- CHECK_SYMBOL (charset_symbol);
- CHECK_VECTOR (info_vector);
- if (! NILP (charset_id))
+DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
+ doc: /* Unify characters of CHARSET with Unicode.
+This means reading the relevant file and installing the table defined
+by CHARSET's `:unify-map' property.
+
+Optional second arg UNIFY-MAP is a file name string or a vector. It has
+the same meaning as the `:unify-map' attribute in the function
+`define-charset' (which see).
+
+Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
+ (charset, unify_map, deunify)
+ Lisp_Object charset, unify_map, deunify;
+{
+ int id;
+ struct charset *cs;
+
+ CHECK_CHARSET_GET_ID (charset, id);
+ cs = CHARSET_FROM_ID (id);
+ if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
+ load_charset (cs);
+ if (NILP (deunify)
+ ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
+ : ! CHARSET_UNIFIED_P (cs))
+ return Qnil;
+
+ CHARSET_UNIFIED_P (cs) = 0;
+ if (NILP (deunify))
{
- if (! CHARSET_VALID_P (XINT (charset_id)))
- error ("Invalid CHARSET: %d", XINT (charset_id));
- else if (CHARSET_DEFINED_P (XINT (charset_id)))
- error ("Already defined charset: %d", XINT (charset_id));
+ if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
+ error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
+ if (NILP (unify_map))
+ unify_map = CHARSET_UNIFY_MAP (cs);
+ if (STRINGP (unify_map))
+ load_charset_map_from_file (cs, unify_map, 2);
+ else if (VECTORP (unify_map))
+ load_charset_map_from_vector (cs, unify_map, 2);
+ else if (NILP (unify_map))
+ error ("No unify-map for charset");
+ else
+ error ("Bad unify-map arg");
+ CHARSET_UNIFIED_P (cs) = 1;
}
-
- vec = XVECTOR (info_vector)->contents;
- if (XVECTOR (info_vector)->size != 9
- || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
- || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
- || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
- || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
- || !INTEGERP (vec[4])
- || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
- || !INTEGERP (vec[5])
- || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
- || !STRINGP (vec[6])
- || !STRINGP (vec[7])
- || !STRINGP (vec[8]))
- error ("Invalid info-vector argument for defining charset %s",
- SDATA (SYMBOL_NAME (charset_symbol)));
-
- if (NILP (charset_id))
+ else if (CHAR_TABLE_P (Vchar_unify_table))
{
- charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
- if (XINT (charset_id) == 0)
- error ("There's no room for a new private charset %s",
- SDATA (SYMBOL_NAME (charset_symbol)));
+ int min_code = CHARSET_MIN_CODE (cs);
+ int max_code = CHARSET_MAX_CODE (cs);
+ int min_char = DECODE_CHAR (cs, min_code);
+ int max_char = DECODE_CHAR (cs, max_code);
+
+ char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
}
- update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
- vec[4], vec[5], vec[6], vec[7], vec[8]);
- Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
- CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
- Vcharset_list = Fcons (charset_symbol, Vcharset_list);
- Fupdate_coding_systems_internal ();
return Qnil;
}
-DEFUN ("generic-character-list", Fgeneric_character_list,
- Sgeneric_character_list, 0, 0, 0,
- doc: /* Return a list of all possible generic characters.
-It includes a generic character for a charset not yet defined. */)
- ()
-{
- return Vgeneric_character_list;
-}
-
DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
Sget_unused_iso_final_char, 2, 2, 0,
- doc: /* Return an unsed ISO's final char for a charset of DIMENISION and CHARS.
+ doc: /*
+Return an unsed ISO final char for a charset of DIMENISION and CHARS.
DIMENSION is the number of bytes to represent a character: 1 or 2.
CHARS is the number of characters in a dimension: 94 or 96.
@@ -744,143 +1281,149 @@ return nil. */)
CHECK_NUMBER (dimension);
CHECK_NUMBER (chars);
- if (XINT (dimension) != 1 && XINT (dimension) != 2)
- error ("Invalid charset dimension %d, it should be 1 or 2",
- XINT (dimension));
+ if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
+ args_out_of_range_3 (dimension, make_number (1), make_number (3));
if (XINT (chars) != 94 && XINT (chars) != 96)
- error ("Invalid charset chars %d, it should be 94 or 96",
- XINT (chars));
+ args_out_of_range_3 (chars, make_number (94), make_number (96));
for (final_char = '0'; final_char <= '?'; final_char++)
- {
- if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
- break;
- }
+ if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
+ break;
return (final_char <= '?' ? make_number (final_char) : Qnil);
}
-DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
- 4, 4, 0,
- doc: /* Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
-CHARSET should be defined by `defined-charset' in advance. */)
- (dimension, chars, final_char, charset_symbol)
- Lisp_Object dimension, chars, final_char, charset_symbol;
+static void
+check_iso_charset_parameter (dimension, chars, final_char)
+ Lisp_Object dimension, chars, final_char;
{
- int charset;
-
- CHECK_NUMBER (dimension);
- CHECK_NUMBER (chars);
- CHECK_NUMBER (final_char);
- CHECK_SYMBOL (charset_symbol);
+ CHECK_NATNUM (dimension);
+ CHECK_NATNUM (chars);
+ CHECK_NATNUM (final_char);
- if (XINT (dimension) != 1 && XINT (dimension) != 2)
- error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
+ if (XINT (dimension) > 3)
+ error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
if (XINT (chars) != 94 && XINT (chars) != 96)
error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
- if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
+ if (XINT (final_char) < '0' || XINT (final_char) > '~')
error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
- if ((charset = get_charset_id (charset_symbol)) < 0)
- error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset_symbol)));
+}
- ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
+
+DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
+ 4, 4, 0,
+ doc: /*
+Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
+CHARSET should be defined by `define-charset' in advance. */)
+ (dimension, chars, final_char, charset)
+ Lisp_Object dimension, chars, final_char, charset;
+{
+ int id;
+
+ CHECK_CHARSET_GET_ID (charset, id);
+ check_iso_charset_parameter (dimension, chars, final_char);
+
+ ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), XINT (final_char)) = id;
return Qnil;
}
+
/* Return information about charsets in the text at PTR of NBYTES
bytes, which are NCHARS characters. The value is:
0: Each character is represented by one byte. This is always
- true for unibyte text.
- 1: No charsets other than ascii eight-bit-control,
- eight-bit-graphic, and latin-1 are found.
- 2: Otherwise.
+ true for a unibyte string. For a multibyte string, true if
+ it contains only ASCII characters.
+
+ 1: No charsets other than ascii, control-1, and latin-1 are
+ found.
- In addition, if CHARSETS is nonzero, for each found charset N, set
- CHARSETS[N] to 1. For that, callers should allocate CHARSETS
- (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
- table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
- 1 (note that there's no charset whose ID is 1). */
+ 2: Otherwise.
+*/
int
-find_charset_in_text (ptr, nchars, nbytes, charsets, table)
- const unsigned char *ptr;
- int nchars, nbytes, *charsets;
- Lisp_Object table;
+string_xstring_p (string)
+ Lisp_Object string;
{
- if (nchars == nbytes)
- {
- if (charsets && nbytes > 0)
- {
- const unsigned char *endp = ptr + nbytes;
- int maskbits = 0;
+ const unsigned char *p = SDATA (string);
+ const unsigned char *endp = p + SBYTES (string);
+ struct charset *charset;
- while (ptr < endp && maskbits != 7)
- {
- maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
- ptr++;
- }
+ if (SCHARS (string) == SBYTES (string))
+ return 0;
- if (maskbits & 1)
- charsets[CHARSET_ASCII] = 1;
- if (maskbits & 2)
- charsets[CHARSET_8_BIT_CONTROL] = 1;
- if (maskbits & 4)
- charsets[CHARSET_8_BIT_GRAPHIC] = 1;
- }
- return 0;
- }
- else
+ charset = CHARSET_FROM_ID (charset_iso_8859_1);
+ while (p < endp)
{
- int return_val = 1;
- int bytes, charset, c1, c2;
+ int c = STRING_CHAR_ADVANCE (p);
- if (! CHAR_TABLE_P (table))
- table = Qnil;
+ /* Fixme: comparison of unsigned expression < 0 is always false */
+ if (ENCODE_CHAR (charset, c) < 0)
+ return 2;
+ }
+ return 1;
+}
- while (nchars-- > 0)
- {
- SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
- ptr += bytes;
- if (!CHARSET_DEFINED_P (charset))
- charset = 1;
- else if (! NILP (table))
- {
- int c = translate_char (table, -1, charset, c1, c2);
- if (c >= 0)
- charset = CHAR_CHARSET (c);
- }
+/* Find charsets in the string at PTR of NCHARS and NBYTES.
- if (return_val == 1
- && charset != CHARSET_ASCII
- && charset != CHARSET_8_BIT_CONTROL
- && charset != CHARSET_8_BIT_GRAPHIC
- && charset != charset_latin_iso8859_1)
- return_val = 2;
+ CHARSETS is a vector. Each element is a cons of CHARSET and
+ FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
+ FOUND-FLAG t (or nil) means that the corresponding charset is
+ already found (or not yet found).
- if (charsets)
- charsets[charset] = 1;
- else if (return_val == 2)
- break;
+ It may lookup a translation table TABLE if supplied. */
+
+static void
+find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
+ const unsigned char *ptr;
+ int nchars, nbytes;
+ Lisp_Object charsets, table;
+{
+ const unsigned char *pend = ptr + nbytes;
+ int ncharsets = ASIZE (charsets);
+
+ if (nchars == nbytes)
+ return;
+
+ while (ptr < pend)
+ {
+ int c = STRING_CHAR_ADVANCE (ptr);
+ int i;
+ int all_found = 1;
+ Lisp_Object elt;
+
+ if (!NILP (table))
+ c = translate_char (table, c);
+ for (i = 0; i < ncharsets; i++)
+ {
+ elt = AREF (charsets, i);
+ if (NILP (XCDR (elt)))
+ {
+ struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
+
+ if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
+ XSETCDR (elt, Qt);
+ else
+ all_found = 0;
+ }
}
- return return_val;
+ if (all_found)
+ break;
}
}
+/* Fixme: returns nil for unibyte. */
DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
2, 3, 0,
doc: /* Return a list of charsets in the region between BEG and END.
BEG and END are buffer positions.
Optional arg TABLE if non-nil is a translation table to look up.
-If the region contains invalid multibyte characters,
-`unknown' is included in the returned list.
-
If the current buffer is unibyte, the returned list may contain
only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
(beg, end, table)
Lisp_Object beg, end, table;
{
- int charsets[MAX_CHARSET + 1];
+ Lisp_Object charsets;
int from, from_byte, to, stop, stop_byte, i;
Lisp_Object val;
@@ -898,11 +1441,14 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
from_byte = CHAR_TO_BYTE (from);
- bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
+ charsets = Fmake_vector (make_number (charset_table_used), Qnil);
+ for (i = 0; i < charset_table_used; i++)
+ ASET (charsets, i, Fcons (make_number (i), Qnil));
+
while (1)
{
- find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
- stop_byte - from_byte, charsets, table);
+ find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
+ stop_byte - from_byte, charsets, table);
if (stop < to)
{
from = stop, from_byte = stop_byte;
@@ -913,856 +1459,613 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
}
val = Qnil;
- if (charsets[1])
- val = Fcons (Qunknown, val);
- for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
- if (charsets[i])
- val = Fcons (CHARSET_SYMBOL (i), val);
- if (charsets[0])
- val = Fcons (Qascii, val);
+ for (i = charset_table_used - 1; i >= 0; i--)
+ if (!NILP (XCDR (AREF (charsets, i))))
+ val = Fcons (CHARSET_NAME (charset_table + i), val);
return val;
}
+/* Fixme: returns nil for unibyte. */
DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1, 2, 0,
doc: /* Return a list of charsets in STR.
Optional arg TABLE if non-nil is a translation table to look up.
-If the string contains invalid multibyte characters,
-`unknown' is included in the returned list.
-
If STR is unibyte, the returned list may contain
-only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
+only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
(str, table)
Lisp_Object str, table;
{
- int charsets[MAX_CHARSET + 1];
+ Lisp_Object charsets;
int i;
Lisp_Object val;
CHECK_STRING (str);
- bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
- find_charset_in_text (SDATA (str), SCHARS (str),
- SBYTES (str), charsets, table);
+ charsets = Fmake_vector (make_number (charset_table_used), Qnil);
+ for (i = 0; i < charset_table_used; i++)
+ ASET (charsets, i, Fcons (make_number (i), Qnil));
+ find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
+ charsets, table);
val = Qnil;
- if (charsets[1])
- val = Fcons (Qunknown, val);
- for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
- if (charsets[i])
- val = Fcons (CHARSET_SYMBOL (i), val);
- if (charsets[0])
- val = Fcons (Qascii, val);
+ for (i = charset_table_used - 1; i >= 0; i--)
+ if (!NILP (XCDR (AREF (charsets, i))))
+ val = Fcons (CHARSET_NAME (charset_table + i), val);
return val;
}
-DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
- doc: /* Return a character made from arguments.
-Internal use only. */)
- (charset, code1, code2)
- Lisp_Object charset, code1, code2;
+
+/* Return a character correponding to the code-point CODE of
+ CHARSET. */
+
+int
+decode_char (charset, code)
+ struct charset *charset;
+ unsigned code;
{
- int charset_id, c1, c2;
+ int c, char_index;
+ enum charset_method method = CHARSET_METHOD (charset);
- CHECK_NUMBER (charset);
- charset_id = XINT (charset);
- if (!CHARSET_DEFINED_P (charset_id))
- error ("Invalid charset ID: %d", XINT (charset));
+ if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
+ return -1;
- if (NILP (code1))
- c1 = 0;
- else
- {
- CHECK_NUMBER (code1);
- c1 = XINT (code1);
- }
- if (NILP (code2))
- c2 = 0;
- else
+ if (method == CHARSET_METHOD_MAP_DEFERRED)
{
- CHECK_NUMBER (code2);
- c2 = XINT (code2);
+ load_charset (charset);
+ method = CHARSET_METHOD (charset);
}
- if (charset_id == CHARSET_ASCII)
+ if (method == CHARSET_METHOD_SUBSET)
{
- if (c1 < 0 || c1 > 0x7F)
- goto invalid_code_posints;
- return make_number (c1);
+ Lisp_Object subset_info;
+
+ subset_info = CHARSET_SUBSET (charset);
+ charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
+ code -= XINT (AREF (subset_info, 3));
+ if (code < XFASTINT (AREF (subset_info, 1))
+ || code > XFASTINT (AREF (subset_info, 2)))
+ c = -1;
+ else
+ c = DECODE_CHAR (charset, code);
}
- else if (charset_id == CHARSET_8_BIT_CONTROL)
+ else if (method == CHARSET_METHOD_SUPERSET)
{
- if (NILP (code1))
- c1 = 0x80;
- else if (c1 < 0x80 || c1 > 0x9F)
- goto invalid_code_posints;
- return make_number (c1);
+ Lisp_Object parents;
+
+ parents = CHARSET_SUPERSET (charset);
+ c = -1;
+ for (; CONSP (parents); parents = XCDR (parents))
+ {
+ int id = XINT (XCAR (XCAR (parents)));
+ int code_offset = XINT (XCDR (XCAR (parents)));
+ unsigned this_code = code - code_offset;
+
+ charset = CHARSET_FROM_ID (id);
+ if ((c = DECODE_CHAR (charset, this_code)) >= 0)
+ break;
+ }
}
- else if (charset_id == CHARSET_8_BIT_GRAPHIC)
+ else
{
- if (NILP (code1))
- c1 = 0xA0;
- else if (c1 < 0xA0 || c1 > 0xFF)
- goto invalid_code_posints;
- return make_number (c1);
- }
- else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
- goto invalid_code_posints;
- c1 &= 0x7F;
- c2 &= 0x7F;
- if (c1 == 0
- ? c2 != 0
- : (c2 == 0
- ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
- : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
- goto invalid_code_posints;
- return make_number (MAKE_CHAR (charset_id, c1, c2));
-
- invalid_code_posints:
- error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
-}
+ char_index = CODE_POINT_TO_INDEX (charset, code);
+ if (char_index < 0)
+ return -1;
-DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
- doc: /* Return list of charset and one or two position-codes of CHAR.
-If CHAR is invalid as a character code,
-return a list of symbol `unknown' and CHAR. */)
- (ch)
- Lisp_Object ch;
-{
- int c, charset, c1, c2;
+ if (method == CHARSET_METHOD_MAP)
+ {
+ Lisp_Object decoder;
- CHECK_NUMBER (ch);
- c = XFASTINT (ch);
- if (!CHAR_VALID_P (c, 1))
- return Fcons (Qunknown, Fcons (ch, Qnil));
- SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
- return (c2 >= 0
- ? Fcons (CHARSET_SYMBOL (charset),
- Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
- : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
-}
+ decoder = CHARSET_DECODER (charset);
+ if (! VECTORP (decoder))
+ return -1;
+ c = XINT (AREF (decoder, char_index));
+ }
+ else
+ {
+ c = char_index + CHARSET_CODE_OFFSET (charset);
+ }
+ }
-DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
- doc: /* Return charset of CHAR. */)
- (ch)
- Lisp_Object ch;
-{
- CHECK_NUMBER (ch);
+ if (CHARSET_UNIFIED_P (charset)
+ && c >= 0)
+ {
+ MAYBE_UNIFY_CHAR (c);
+ }
- return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
+ return c;
}
-DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
- doc: /* Return charset of a character in the current buffer at position POS.
-If POS is nil, it defauls to the current point.
-If POS is out of range, the value is nil. */)
- (pos)
- Lisp_Object pos;
-{
- Lisp_Object ch;
- int charset;
-
- ch = Fchar_after (pos);
- if (! INTEGERP (ch))
- return ch;
- charset = CHAR_CHARSET (XINT (ch));
- return CHARSET_SYMBOL (charset);
-}
+/* Variable used temporarily by the macro ENCODE_CHAR. */
+Lisp_Object charset_work;
-DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
- doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
+/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
+ CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
+ use CHARSET's strict_max_char instead of max_char. */
-ISO 2022's designation sequence (escape sequence) distinguishes charsets
-by their DIMENSION, CHARS, and FINAL-CHAR,
-where as Emacs distinguishes them by charset symbol.
-See the documentation of the function `charset-info' for the meanings of
-DIMENSION, CHARS, and FINAL-CHAR. */)
- (dimension, chars, final_char)
- Lisp_Object dimension, chars, final_char;
-{
- int charset;
-
- CHECK_NUMBER (dimension);
- CHECK_NUMBER (chars);
- CHECK_NUMBER (final_char);
-
- if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
- return Qnil;
- return CHARSET_SYMBOL (charset);
-}
-
-/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
- generic character. If GENERICP is zero, return nonzero iff C is a
- valid normal character. Do not call this function directly,
- instead use macro CHAR_VALID_P. */
-int
-char_valid_p (c, genericp)
- int c, genericp;
+unsigned
+encode_char (charset, c)
+ struct charset *charset;
+ int c;
{
- int charset, c1, c2;
+ unsigned code;
+ enum charset_method method = CHARSET_METHOD (charset);
- if (c < 0 || c >= MAX_CHAR)
- return 0;
- if (SINGLE_BYTE_CHAR_P (c))
- return 1;
- SPLIT_CHAR (c, charset, c1, c2);
- if (genericp)
+ if (CHARSET_UNIFIED_P (charset))
{
- if (c1)
- {
- if (c2 <= 0) c2 = 0x20;
- }
- else
+ Lisp_Object deunifier, deunified;
+
+ deunifier = CHARSET_DEUNIFIER (charset);
+ if (! CHAR_TABLE_P (deunifier))
{
- if (c2 <= 0) c1 = c2 = 0x20;
+ Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
+ deunifier = CHARSET_DEUNIFIER (charset);
}
+ deunified = CHAR_TABLE_REF (deunifier, c);
+ if (! NILP (deunified))
+ c = XINT (deunified);
}
- return (CHARSET_DEFINED_P (charset)
- && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
-}
-DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
- doc: /* Return t if OBJECT is a valid normal character.
-If optional arg GENERICP is non-nil, also return t if OBJECT is
-a valid generic character. */)
- (object, genericp)
- Lisp_Object object, genericp;
-{
- if (! NATNUMP (object))
- return Qnil;
- return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
-}
+ if (method == CHARSET_METHOD_SUBSET)
+ {
+ Lisp_Object subset_info;
+ struct charset *this_charset;
+
+ subset_info = CHARSET_SUBSET (charset);
+ this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
+ code = ENCODE_CHAR (this_charset, c);
+ if (code == CHARSET_INVALID_CODE (this_charset)
+ || code < XFASTINT (AREF (subset_info, 1))
+ || code > XFASTINT (AREF (subset_info, 2)))
+ return CHARSET_INVALID_CODE (charset);
+ code += XINT (AREF (subset_info, 3));
+ return code;
+ }
-DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
- Sunibyte_char_to_multibyte, 1, 1, 0,
- doc: /* Convert the unibyte character CH to multibyte character.
-The conversion is done based on `nonascii-translation-table' (which see)
- or `nonascii-insert-offset' (which see). */)
- (ch)
- Lisp_Object ch;
-{
- int c;
+ if (method == CHARSET_METHOD_SUPERSET)
+ {
+ Lisp_Object parents;
- CHECK_NUMBER (ch);
- c = XINT (ch);
- if (c < 0 || c >= 0400)
- error ("Invalid unibyte character: %d", c);
- c = unibyte_char_to_multibyte (c);
- if (c < 0)
- error ("Can't convert to multibyte character: %d", XINT (ch));
- return make_number (c);
-}
+ parents = CHARSET_SUPERSET (charset);
+ for (; CONSP (parents); parents = XCDR (parents))
+ {
+ int id = XINT (XCAR (XCAR (parents)));
+ int code_offset = XINT (XCDR (XCAR (parents)));
+ struct charset *this_charset = CHARSET_FROM_ID (id);
-DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
- Smultibyte_char_to_unibyte, 1, 1, 0,
- doc: /* Convert the multibyte character CH to unibyte character.
-The conversion is done based on `nonascii-translation-table' (which see)
- or `nonascii-insert-offset' (which see). */)
- (ch)
- Lisp_Object ch;
-{
- int c;
+ code = ENCODE_CHAR (this_charset, c);
+ if (code != CHARSET_INVALID_CODE (this_charset))
+ return code + code_offset;
+ }
+ return CHARSET_INVALID_CODE (charset);
+ }
- CHECK_NUMBER (ch);
- c = XINT (ch);
- if (! CHAR_VALID_P (c, 0))
- error ("Invalid multibyte character: %d", c);
- c = multibyte_char_to_unibyte (c, Qnil);
- if (c < 0)
- error ("Can't convert to unibyte character: %d", XINT (ch));
- return make_number (c);
-}
+ if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
+ || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
+ return CHARSET_INVALID_CODE (charset);
-DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
- doc: /* Return 1 regardless of the argument CHAR. */)
- (ch)
- Lisp_Object ch;
-{
- CHECK_NUMBER (ch);
- return make_number (1);
-}
+ if (method == CHARSET_METHOD_MAP_DEFERRED)
+ {
+ load_charset (charset);
+ method = CHARSET_METHOD (charset);
+ }
-/* Return how many bytes C will occupy in a multibyte buffer.
- Don't call this function directly, instead use macro CHAR_BYTES. */
-int
-char_bytes (c)
- int c;
-{
- int charset;
+ if (method == CHARSET_METHOD_MAP)
+ {
+ Lisp_Object encoder;
+ Lisp_Object val;
- if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
- return 1;
- if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
- return 1;
+ encoder = CHARSET_ENCODER (charset);
+ if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
+ return CHARSET_INVALID_CODE (charset);
+ val = CHAR_TABLE_REF (encoder, c);
+ if (NILP (val))
+ return CHARSET_INVALID_CODE (charset);
+ code = XINT (val);
+ if (! CHARSET_COMPACT_CODES_P (charset))
+ code = INDEX_TO_CODE_POINT (charset, code);
+ }
+ else /* method == CHARSET_METHOD_OFFSET */
+ {
+ code = c - CHARSET_CODE_OFFSET (charset);
+ code = INDEX_TO_CODE_POINT (charset, code);
+ }
- charset = CHAR_CHARSET (c);
- return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
+ return code;
}
-/* Return the width of character of which multi-byte form starts with
- C. The width is measured by how many columns occupied on the
- screen when displayed in the current buffer. */
-
-#define ONE_BYTE_CHAR_WIDTH(c) \
- (c < 0x20 \
- ? (c == '\t' \
- ? XFASTINT (current_buffer->tab_width) \
- : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
- : (c < 0x7f \
- ? 1 \
- : (c == 0x7F \
- ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
- : ((! NILP (current_buffer->enable_multibyte_characters) \
- && BASE_LEADING_CODE_P (c)) \
- ? WIDTH_BY_CHAR_HEAD (c) \
- : 4))))
-
-DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
- doc: /* Return width of CHAR when displayed in the current buffer.
-The width is measured by how many columns it occupies on the screen.
-Tab is taken to occupy `tab-width' columns. */)
- (ch)
- Lisp_Object ch;
-{
- Lisp_Object val, disp;
- int c;
- struct Lisp_Char_Table *dp = buffer_display_table ();
- CHECK_NUMBER (ch);
+DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
+ doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
+Return nil if CODE-POINT is not valid in CHARSET.
- c = XINT (ch);
+CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
- /* Get the way the display table would display it. */
- disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
+Optional argument RESTRICTION specifies a way to map the pair of CCS
+and CODE-POINT to a chracter. Currently not supported and just ignored. */)
+ (charset, code_point, restriction)
+ Lisp_Object charset, code_point, restriction;
+{
+ int c, id;
+ unsigned code;
+ struct charset *charsetp;
- if (VECTORP (disp))
- XSETINT (val, XVECTOR (disp)->size);
- else if (SINGLE_BYTE_CHAR_P (c))
- XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
+ CHECK_CHARSET_GET_ID (charset, id);
+ if (CONSP (code_point))
+ {
+ CHECK_NATNUM_CAR (code_point);
+ CHECK_NATNUM_CDR (code_point);
+ code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
+ }
else
{
- int charset = CHAR_CHARSET (c);
-
- XSETFASTINT (val, CHARSET_WIDTH (charset));
+ CHECK_NATNUM (code_point);
+ code = XINT (code_point);
}
- return val;
+ charsetp = CHARSET_FROM_ID (id);
+ c = DECODE_CHAR (charsetp, code);
+ return (c >= 0 ? make_number (c) : Qnil);
}
-/* Return width of string STR of length LEN when displayed in the
- current buffer. The width is measured by how many columns it
- occupies on the screen. */
-int
-strwidth (str, len)
- unsigned char *str;
- int len;
+DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
+ doc: /* Encode the character CH into a code-point of CHARSET.
+Return nil if CHARSET doesn't include CH.
+
+Optional argument RESTRICTION specifies a way to map CHAR to a
+code-point in CCS. Currently not supported and just ignored. */)
+ (ch, charset, restriction)
+ Lisp_Object ch, charset, restriction;
{
- return c_string_width (str, len, -1, NULL, NULL);
+ int id;
+ unsigned code;
+ struct charset *charsetp;
+
+ CHECK_CHARSET_GET_ID (charset, id);
+ CHECK_NATNUM (ch);
+ charsetp = CHARSET_FROM_ID (id);
+ code = ENCODE_CHAR (charsetp, XINT (ch));
+ if (code == CHARSET_INVALID_CODE (charsetp))
+ return Qnil;
+ if (code > 0x7FFFFFF)
+ return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
+ return make_number (code);
}
-/* Return width of string STR of length LEN when displayed in the
- current buffer. The width is measured by how many columns it
- occupies on the screen. If PRECISION > 0, return the width of
- longest substring that doesn't exceed PRECISION, and set number of
- characters and bytes of the substring in *NCHARS and *NBYTES
- respectively. */
-int
-c_string_width (str, len, precision, nchars, nbytes)
- const unsigned char *str;
- int len, precision, *nchars, *nbytes;
+DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
+ doc:
+ /* Return a character of CHARSET whose position codes are CODEn.
+
+CODE1 through CODE4 are optional, but if you don't supply sufficient
+position codes, it is assumed that the minimum code in each dimension
+is specified. */)
+ (charset, code1, code2, code3, code4)
+ Lisp_Object charset, code1, code2, code3, code4;
{
- int i = 0, i_byte = 0;
- int width = 0;
- int chars;
- struct Lisp_Char_Table *dp = buffer_display_table ();
+ int id, dimension;
+ struct charset *charsetp;
+ unsigned code;
+ int c;
- while (i_byte < len)
+ CHECK_CHARSET_GET_ID (charset, id);
+ charsetp = CHARSET_FROM_ID (id);
+
+ dimension = CHARSET_DIMENSION (charsetp);
+ if (NILP (code1))
+ code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
+ ? 0 : CHARSET_MIN_CODE (charsetp));
+ else
{
- int bytes, thiswidth;
- Lisp_Object val;
+ CHECK_NATNUM (code1);
+ if (XFASTINT (code1) >= 0x100)
+ args_out_of_range (make_number (0xFF), code1);
+ code = XFASTINT (code1);
- if (dp)
+ if (dimension > 1)
{
- int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
-
- chars = 1;
- val = DISP_CHAR_VECTOR (dp, c);
- if (VECTORP (val))
- thiswidth = XVECTOR (val)->size;
+ code <<= 8;
+ if (NILP (code2))
+ code |= charsetp->code_space[(dimension - 2) * 4];
else
- thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
- }
- else
- {
- chars = 1;
- PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
- thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
- }
+ {
+ CHECK_NATNUM (code2);
+ if (XFASTINT (code2) >= 0x100)
+ args_out_of_range (make_number (0xFF), code2);
+ code |= XFASTINT (code2);
+ }
- if (precision > 0
- && (width + thiswidth > precision))
- {
- *nchars = i;
- *nbytes = i_byte;
- return width;
+ if (dimension > 2)
+ {
+ code <<= 8;
+ if (NILP (code3))
+ code |= charsetp->code_space[(dimension - 3) * 4];
+ else
+ {
+ CHECK_NATNUM (code3);
+ if (XFASTINT (code3) >= 0x100)
+ args_out_of_range (make_number (0xFF), code3);
+ code |= XFASTINT (code3);
+ }
+
+ if (dimension > 3)
+ {
+ code <<= 8;
+ if (NILP (code4))
+ code |= charsetp->code_space[0];
+ else
+ {
+ CHECK_NATNUM (code4);
+ if (XFASTINT (code4) >= 0x100)
+ args_out_of_range (make_number (0xFF), code4);
+ code |= XFASTINT (code4);
+ }
+ }
+ }
}
- i++;
- i_byte += bytes;
- width += thiswidth;
- }
-
- if (precision > 0)
- {
- *nchars = i;
- *nbytes = i_byte;
}
- return width;
+ if (CHARSET_ISO_FINAL (charsetp) >= 0)
+ code &= 0x7F7F7F7F;
+ c = DECODE_CHAR (charsetp, code);
+ if (c < 0)
+ error ("Invalid code(s)");
+ return make_number (c);
}
-/* Return width of Lisp string STRING when displayed in the current
- buffer. The width is measured by how many columns it occupies on
- the screen while paying attention to compositions. If PRECISION >
- 0, return the width of longest substring that doesn't exceed
- PRECISION, and set number of characters and bytes of the substring
- in *NCHARS and *NBYTES respectively. */
-int
-lisp_string_width (string, precision, nchars, nbytes)
- Lisp_Object string;
- int precision, *nchars, *nbytes;
-{
- int len = SCHARS (string);
- int len_byte = SBYTES (string);
- const unsigned char *str = SDATA (string);
- int i = 0, i_byte = 0;
- int width = 0;
- struct Lisp_Char_Table *dp = buffer_display_table ();
-
- while (i < len)
- {
- int chars, bytes, thiswidth;
- Lisp_Object val;
- int cmp_id;
- int ignore, end;
+/* Return the first charset in CHARSET_LIST that contains C.
+ CHARSET_LIST is a list of charset IDs. If it is nil, use
+ Vcharset_ordered_list. */
- if (find_composition (i, -1, &ignore, &end, &val, string)
- && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
- >= 0))
- {
- thiswidth = composition_table[cmp_id]->width;
- chars = end - i;
- bytes = string_char_to_byte (string, end) - i_byte;
- }
- else if (dp)
- {
- int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+struct charset *
+char_charset (c, charset_list, code_return)
+ int c;
+ Lisp_Object charset_list;
+ unsigned *code_return;
+{
+ if (NILP (charset_list))
+ charset_list = Vcharset_ordered_list;
- chars = 1;
- val = DISP_CHAR_VECTOR (dp, c);
- if (VECTORP (val))
- thiswidth = XVECTOR (val)->size;
- else
- thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
- }
- else
- {
- chars = 1;
- PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
- thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
- }
+ while (CONSP (charset_list))
+ {
+ struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ unsigned code = ENCODE_CHAR (charset, c);
- if (precision > 0
- && (width + thiswidth > precision))
+ if (code != CHARSET_INVALID_CODE (charset))
{
- *nchars = i;
- *nbytes = i_byte;
- return width;
+ if (code_return)
+ *code_return = code;
+ return charset;
}
- i += chars;
- i_byte += bytes;
- width += thiswidth;
- }
-
- if (precision > 0)
- {
- *nchars = i;
- *nbytes = i_byte;
+ charset_list = XCDR (charset_list);
}
-
- return width;
+ return NULL;
}
-DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
- doc: /* Return width of STRING when displayed in the current buffer.
-Width is measured by how many columns it occupies on the screen.
-When calculating width of a multibyte character in STRING,
-only the base leading-code is considered; the validity of
-the following bytes is not checked. Tabs in STRING are always
-taken to occupy `tab-width' columns. */)
- (str)
- Lisp_Object str;
+
+/* Fixme: `unknown' can't happen now? */
+DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
+ doc: /*Return list of charset and one to three position-codes of CHAR.
+If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */)
+ (ch)
+ Lisp_Object ch;
{
+ struct charset *charset;
+ int c, dimension;
+ unsigned code;
Lisp_Object val;
- CHECK_STRING (str);
- XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
- return val;
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ charset = CHAR_CHARSET (c);
+ if (! charset)
+ return Fcons (intern ("unknown"), Fcons (ch, Qnil));
+
+ code = ENCODE_CHAR (charset, c);
+ if (code == CHARSET_INVALID_CODE (charset))
+ abort ();
+ dimension = CHARSET_DIMENSION (charset);
+ val = (dimension == 1 ? Fcons (make_number (code), Qnil)
+ : dimension == 2 ? Fcons (make_number (code >> 8),
+ Fcons (make_number (code & 0xFF), Qnil))
+ : Fcons (make_number (code >> 16),
+ Fcons (make_number ((code >> 8) & 0xFF),
+ Fcons (make_number (code & 0xFF), Qnil))));
+ return Fcons (CHARSET_NAME (charset), val);
}
-DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
- doc: /* Return the direction of CHAR.
-The returned value is 0 for left-to-right and 1 for right-to-left. */)
+
+DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
+ doc: /* Return the charset of highest priority that contains CHAR. */)
(ch)
Lisp_Object ch;
{
- int charset;
+ struct charset *charset;
- CHECK_NUMBER (ch);
- charset = CHAR_CHARSET (XFASTINT (ch));
- if (!CHARSET_DEFINED_P (charset))
- invalid_character (XINT (ch));
- return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
+ CHECK_CHARACTER (ch);
+ charset = CHAR_CHARSET (XINT (ch));
+ return (CHARSET_NAME (charset));
}
-DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
- doc: /* Return number of characters between BEG and END. */)
- (beg, end)
- Lisp_Object beg, end;
-{
- int from, to;
-
- CHECK_NUMBER_COERCE_MARKER (beg);
- CHECK_NUMBER_COERCE_MARKER (end);
- from = min (XFASTINT (beg), XFASTINT (end));
- to = max (XFASTINT (beg), XFASTINT (end));
+DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
+ doc: /*
+Return charset of a character in the current buffer at position POS.
+If POS is nil, it defauls to the current point.
+If POS is out of range, the value is nil. */)
+ (pos)
+ Lisp_Object pos;
+{
+ Lisp_Object ch;
+ struct charset *charset;
- return make_number (to - from);
+ ch = Fchar_after (pos);
+ if (! INTEGERP (ch))
+ return ch;
+ charset = CHAR_CHARSET (XINT (ch));
+ return (CHARSET_NAME (charset));
}
-/* Return the number of characters in the NBYTES bytes at PTR.
- This works by looking at the contents and checking for multibyte sequences.
- However, if the current buffer has enable-multibyte-characters = nil,
- we treat each byte as a character. */
-int
-chars_in_text (ptr, nbytes)
- const unsigned char *ptr;
- int nbytes;
+DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
+ doc: /*
+Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
+
+ISO 2022's designation sequence (escape sequence) distinguishes charsets
+by their DIMENSION, CHARS, and FINAL-CHAR,
+where as Emacs distinguishes them by charset symbol.
+See the documentation of the function `charset-info' for the meanings of
+DIMENSION, CHARS, and FINAL-CHAR. */)
+ (dimension, chars, final_char)
+ Lisp_Object dimension, chars, final_char;
{
- /* current_buffer is null at early stages of Emacs initialization. */
- if (current_buffer == 0
- || NILP (current_buffer->enable_multibyte_characters))
- return nbytes;
+ int id;
- return multibyte_chars_in_text (ptr, nbytes);
+ check_iso_charset_parameter (dimension, chars, final_char);
+ id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
+ XFASTINT (final_char));
+ return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
}
-/* Return the number of characters in the NBYTES bytes at PTR.
- This works by looking at the contents and checking for multibyte sequences.
- It ignores enable-multibyte-characters. */
-int
-multibyte_chars_in_text (ptr, nbytes)
- const unsigned char *ptr;
- int nbytes;
+DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
+ 0, 0, 0,
+ doc: /*
+Clear encoder and decoder of charsets that are loaded from mapfiles. */)
+ ()
{
- const unsigned char *endp;
- int chars, bytes;
-
- endp = ptr + nbytes;
- chars = 0;
+ int i;
+ struct charset *charset;
+ Lisp_Object attrs;
- while (ptr < endp)
+ for (i = 0; i < charset_table_used; i++)
{
- PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
- ptr += bytes;
- chars++;
- }
-
- return chars;
-}
+ charset = CHARSET_FROM_ID (i);
+ attrs = CHARSET_ATTRIBUTES (charset);
-/* Parse unibyte text at STR of LEN bytes as multibyte text, and
- count the numbers of characters and bytes in it. On counting
- bytes, pay attention to the fact that 8-bit characters in the range
- 0x80..0x9F are represented by 2 bytes in multibyte text. */
-void
-parse_str_as_multibyte (str, len, nchars, nbytes)
- const unsigned char *str;
- int len, *nchars, *nbytes;
-{
- const unsigned char *endp = str + len;
- int n, chars = 0, bytes = 0;
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
+ {
+ CHARSET_ATTR_DECODER (attrs) = Qnil;
+ CHARSET_ATTR_ENCODER (attrs) = Qnil;
+ CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
+ }
- while (str < endp)
- {
- if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
- str += n, bytes += n;
- else
- str++, bytes += 2;
- chars++;
+ if (CHARSET_UNIFIED_P (charset))
+ CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
}
- *nchars = chars;
- *nbytes = bytes;
- return;
-}
-/* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
- It actually converts only 8-bit characters in the range 0x80..0x9F
- that don't contruct multibyte characters to multibyte forms. If
- NCHARS is nonzero, set *NCHARS to the number of characters in the
- text. It is assured that we can use LEN bytes at STR as a work
- area and that is enough. Return the number of bytes of the
- resulting text. */
-
-int
-str_as_multibyte (str, len, nbytes, nchars)
- unsigned char *str;
- int len, nbytes, *nchars;
-{
- unsigned char *p = str, *endp = str + nbytes;
- unsigned char *to;
- int chars = 0;
- int n;
-
- while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
- p += n, chars++;
- if (nchars)
- *nchars = chars;
- if (p == endp)
- return nbytes;
-
- to = p;
- nbytes = endp - p;
- endp = str + len;
- safe_bcopy (p, endp - nbytes, nbytes);
- p = endp - nbytes;
- while (p < endp)
+ if (CHAR_TABLE_P (Vchar_unified_charset_table))
{
- if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
- {
- while (n--)
- *to++ = *p++;
- }
- else
- {
- *to++ = LEADING_CODE_8_BIT_CONTROL;
- *to++ = *p++ + 0x20;
- }
- chars++;
+ Foptimize_char_table (Vchar_unified_charset_table);
+ Vchar_unify_table = Vchar_unified_charset_table;
+ Vchar_unified_charset_table = Qnil;
}
- if (nchars)
- *nchars = chars;
- return (to - str);
-}
-
-/* Parse unibyte string at STR of LEN bytes, and return the number of
- bytes it may ocupy when converted to multibyte string by
- `str_to_multibyte'. */
-int
-parse_str_to_multibyte (str, len)
- unsigned char *str;
- int len;
-{
- unsigned char *endp = str + len;
- int bytes;
-
- for (bytes = 0; str < endp; str++)
- bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
- return bytes;
+ return Qnil;
}
-/* Convert unibyte text at STR of NBYTES bytes to multibyte text
- that contains the same single-byte characters. It actually
- converts all 8-bit characters to multibyte forms. It is assured
- that we can use LEN bytes at STR as a work area and that is
- enough. */
-
-int
-str_to_multibyte (str, len, bytes)
- unsigned char *str;
- int len, bytes;
+DEFUN ("charset-priority-list", Fcharset_priority_list,
+ Scharset_priority_list, 0, 1, 0,
+ doc: /* Return the list of charsets ordered by priority.
+HIGHESTP non-nil means just return the highest priority one. */)
+ (highestp)
+ Lisp_Object highestp;
{
- unsigned char *p = str, *endp = str + bytes;
- unsigned char *to;
-
- while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
- if (p == endp)
- return bytes;
- to = p;
- bytes = endp - p;
- endp = str + len;
- safe_bcopy (p, endp - bytes, bytes);
- p = endp - bytes;
- while (p < endp)
- {
- if (*p < 0x80 || *p >= 0xA0)
- *to++ = *p++;
- else
- *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
- }
- return (to - str);
-}
+ Lisp_Object val = Qnil, list = Vcharset_ordered_list;
-/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
- actually converts only 8-bit characters in the range 0x80..0x9F to
- unibyte forms. */
+ if (!NILP (highestp))
+ return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
-int
-str_as_unibyte (str, bytes)
- unsigned char *str;
- int bytes;
-{
- unsigned char *p = str, *endp = str + bytes;
- unsigned char *to = str;
-
- while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
- to = p;
- while (p < endp)
+ while (!NILP (list))
{
- if (*p == LEADING_CODE_8_BIT_CONTROL)
- *to++ = *(p + 1) - 0x20, p += 2;
- else
- *to++ = *p++;
+ val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
+ list = XCDR (list);
}
- return (to - str);
+ return Fnreverse (val);
}
-
-DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
- doc: /* Concatenate all the argument characters and make the result a string.
-usage: (string &rest CHARACTERS) */)
- (n, args)
- int n;
+DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
+ 1, MANY, 0,
+ doc: /* Assign higher priority to the charsets given as arguments.
+usage: (set-charset-priority &rest charsets) */)
+ (nargs, args)
+ int nargs;
Lisp_Object *args;
{
- int i;
- unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
- unsigned char *p = buf;
- int c;
- int multibyte = 0;
+ Lisp_Object new_head, old_list, arglist[2];
+ int i, id;
- for (i = 0; i < n; i++)
+ old_list = Fcopy_sequence (Vcharset_ordered_list);
+ new_head = Qnil;
+ for (i = 0; i < nargs; i++)
{
- CHECK_NUMBER (args[i]);
- if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
- multibyte = 1;
+ CHECK_CHARSET_GET_ID (args[i], id);
+ if (! NILP (Fmemq (make_number (id), old_list)))
+ {
+ old_list = Fdelq (make_number (id), old_list);
+ new_head = Fcons (make_number (id), new_head);
+ }
}
+ arglist[0] = Fnreverse (new_head);
+ arglist[1] = old_list;
+ Vcharset_ordered_list = Fnconc (2, arglist);
+ charset_ordered_list_tick++;
- for (i = 0; i < n; i++)
+ for (old_list = Vcharset_ordered_list, new_head = Qnil;
+ CONSP (old_list); old_list = XCDR (old_list))
{
- c = XINT (args[i]);
- if (multibyte)
- p += CHAR_STRING (c, p);
- else
- *p++ = c;
+ if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
+ new_head = Fcons (XCAR (old_list), new_head);
}
+ Viso_2022_charset_list = Fnreverse (new_head);
- return make_string_from_bytes (buf, n, p - buf);
+ return Qnil;
}
-#endif /* emacs */
-
-int
-charset_id_internal (charset_name)
- char *charset_name;
+DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
+ 0, 1, 0,
+ doc: /* Internal use only.
+Return charset identification number of CHARSET. */)
+ (charset)
+ Lisp_Object charset;
{
- Lisp_Object val;
+ int id;
- val= Fget (intern (charset_name), Qcharset);
- if (!VECTORP (val))
- error ("Charset %s is not defined", charset_name);
-
- return (XINT (XVECTOR (val)->contents[0]));
+ CHECK_CHARSET_GET_ID (charset, id);
+ return make_number (id);
}
-DEFUN ("setup-special-charsets", Fsetup_special_charsets,
- Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only. */)
- ()
+
+void
+init_charset ()
{
- charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
- charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
- charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
- charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
- charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
- charset_big5_1 = charset_id_internal ("chinese-big5-1");
- charset_big5_2 = charset_id_internal ("chinese-big5-2");
- return Qnil;
+
}
+
void
init_charset_once ()
{
int i, j, k;
- staticpro (&Vcharset_table);
- staticpro (&Vcharset_symbol_table);
- staticpro (&Vgeneric_character_list);
-
- /* This has to be done here, before we call Fmake_char_table. */
- Qcharset_table = intern ("charset-table");
- staticpro (&Qcharset_table);
-
- /* Intern this now in case it isn't already done.
- Setting this variable twice is harmless.
- But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern ("char-table-extra-slots");
-
- /* Now we are ready to set up this property, so we can
- create the charset table. */
- Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
- Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
-
- Qunknown = intern ("unknown");
- staticpro (&Qunknown);
- Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
- Qunknown);
-
- /* Setup tables. */
- for (i = 0; i < 2; i++)
- for (j = 0; j < 2; j++)
- for (k = 0; k < 128; k++)
- iso_charset_table [i][j][k] = -1;
+ for (i = 0; i < ISO_MAX_DIMENSION; i++)
+ for (j = 0; j < ISO_MAX_CHARS; j++)
+ for (k = 0; k < ISO_MAX_FINAL; k++)
+ iso_charset_table[i][j][k] = -1;
for (i = 0; i < 256; i++)
- bytes_by_char_head[i] = 1;
- bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
- bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
- bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
- bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
+ emacs_mule_charset[i] = NULL;
+
+ charset_jisx0201_roman = -1;
+ charset_jisx0208_1978 = -1;
+ charset_jisx0208 = -1;
for (i = 0; i < 128; i++)
- width_by_char_head[i] = 1;
+ unibyte_to_multibyte_table[i] = i;
for (; i < 256; i++)
- width_by_char_head[i] = 4;
- width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
- width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
- width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
- width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
-
- {
- Lisp_Object val;
-
- val = Qnil;
- for (i = 0x81; i < 0x90; i++)
- val = Fcons (make_number ((i - 0x70) << 7), val);
- for (; i < 0x9A; i++)
- val = Fcons (make_number ((i - 0x8F) << 14), val);
- for (i = 0xA0; i < 0xF0; i++)
- val = Fcons (make_number ((i - 0x70) << 7), val);
- for (; i < 0xFF; i++)
- val = Fcons (make_number ((i - 0xE0) << 14), val);
- Vgeneric_character_list = Fnreverse (val);
- }
-
- nonascii_insert_offset = 0;
- Vnonascii_translation_table = Qnil;
+ unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
}
#ifdef emacs
@@ -1770,141 +2073,93 @@ init_charset_once ()
void
syms_of_charset ()
{
- Qcharset = intern ("charset");
- staticpro (&Qcharset);
-
- Qascii = intern ("ascii");
- staticpro (&Qascii);
-
- Qeight_bit_control = intern ("eight-bit-control");
- staticpro (&Qeight_bit_control);
-
- Qeight_bit_graphic = intern ("eight-bit-graphic");
- staticpro (&Qeight_bit_graphic);
-
- /* Define special charsets ascii, eight-bit-control, and
- eight-bit-graphic. */
- update_charset_table (make_number (CHARSET_ASCII),
- make_number (1), make_number (94),
- make_number (1),
- make_number (0),
- make_number ('B'),
- make_number (0),
- build_string ("ASCII"),
- Qnil, /* same as above */
- build_string ("ASCII (ISO646 IRV)"));
- CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
- Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
-
- update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
- make_number (1), make_number (96),
- make_number (4),
- make_number (0),
- make_number (-1),
- make_number (-1),
- build_string ("8-bit control code (0x80..0x9F)"),
- Qnil, /* same as above */
- Qnil); /* same as above */
- CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
- Fput (Qeight_bit_control, Qcharset,
- CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
-
- update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
- make_number (1), make_number (96),
- make_number (4),
- make_number (0),
- make_number (-1),
- make_number (-1),
- build_string ("8-bit graphic char (0xA0..0xFF)"),
- Qnil, /* same as above */
- Qnil); /* same as above */
- CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
- Fput (Qeight_bit_graphic, Qcharset,
- CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
-
- Qauto_fill_chars = intern ("auto-fill-chars");
- staticpro (&Qauto_fill_chars);
- Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
-
- defsubr (&Sdefine_charset);
- defsubr (&Sgeneric_character_list);
+ char *p;
+
+ DEFSYM (Qcharsetp, "charsetp");
+
+ DEFSYM (Qascii, "ascii");
+ DEFSYM (Qunicode, "unicode");
+ DEFSYM (Qeight_bit, "eight-bit");
+ DEFSYM (Qiso_8859_1, "iso-8859-1");
+
+ DEFSYM (Qgl, "gl");
+ DEFSYM (Qgr, "gr");
+
+ p = (char *) xmalloc (30000);
+
+ staticpro (&Vcharset_ordered_list);
+ Vcharset_ordered_list = Qnil;
+
+ staticpro (&Viso_2022_charset_list);
+ Viso_2022_charset_list = Qnil;
+
+ staticpro (&Vemacs_mule_charset_list);
+ Vemacs_mule_charset_list = Qnil;
+
+ staticpro (&Vcharset_hash_table);
+ {
+ Lisp_Object args[2];
+ args[0] = QCtest;
+ args[1] = Qeq;
+ Vcharset_hash_table = Fmake_hash_table (2, args);
+ }
+
+ charset_table_size = 128;
+ charset_table = ((struct charset *)
+ xmalloc (sizeof (struct charset) * charset_table_size));
+ charset_table_used = 0;
+
+ staticpro (&Vchar_unified_charset_table);
+ Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
+
+ defsubr (&Scharsetp);
+ defsubr (&Smap_charset_chars);
+ defsubr (&Sdefine_charset_internal);
+ defsubr (&Sdefine_charset_alias);
+ defsubr (&Sunibyte_charset);
+ defsubr (&Sset_unibyte_charset);
+ defsubr (&Scharset_plist);
+ defsubr (&Sset_charset_plist);
+ defsubr (&Sunify_charset);
defsubr (&Sget_unused_iso_final_char);
defsubr (&Sdeclare_equiv_charset);
defsubr (&Sfind_charset_region);
defsubr (&Sfind_charset_string);
- defsubr (&Smake_char_internal);
+ defsubr (&Sdecode_char);
+ defsubr (&Sencode_char);
defsubr (&Ssplit_char);
+ defsubr (&Smake_char);
defsubr (&Schar_charset);
defsubr (&Scharset_after);
defsubr (&Siso_charset);
- defsubr (&Schar_valid_p);
- defsubr (&Sunibyte_char_to_multibyte);
- defsubr (&Smultibyte_char_to_unibyte);
- defsubr (&Schar_bytes);
- defsubr (&Schar_width);
- defsubr (&Sstring_width);
- defsubr (&Schar_direction);
- defsubr (&Schars_in_region);
- defsubr (&Sstring);
- defsubr (&Ssetup_special_charsets);
+ defsubr (&Sclear_charset_maps);
+ defsubr (&Scharset_priority_list);
+ defsubr (&Sset_charset_priority);
+ defsubr (&Scharset_id_internal);
+
+ DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
+ doc: /* Directory of charset map files that come with GNU Emacs.
+The default value is sub-directory "charsets" of `data-directory'. */);
+ Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
+ Vdata_directory);
DEFVAR_LISP ("charset-list", &Vcharset_list,
- doc: /* List of charsets ever defined. */);
- Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
- Fcons (Qeight_bit_graphic, Qnil)));
-
- DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
- doc: /* Vector of cons cell of a symbol and translation table ever defined.
-An ID of a translation table is an index of this vector. */);
- Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
-
- DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
- doc: /* Leading-code of private TYPE9N charset of column-width 1. */);
- leading_code_private_11 = LEADING_CODE_PRIVATE_11;
-
- DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
- doc: /* Leading-code of private TYPE9N charset of column-width 2. */);
- leading_code_private_12 = LEADING_CODE_PRIVATE_12;
-
- DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
- doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1. */);
- leading_code_private_21 = LEADING_CODE_PRIVATE_21;
-
- DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
- doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2. */);
- leading_code_private_22 = LEADING_CODE_PRIVATE_22;
-
- DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
- doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
-This is used for converting unibyte text to multibyte,
-and for inserting character codes specified by number.
-
-This serves to convert a Latin-1 or similar 8-bit character code
-to the corresponding Emacs multibyte character code.
-Typically the value should be (- (make-char CHARSET 0) 128),
-for your choice of character set.
-If `nonascii-translation-table' is non-nil, it overrides this variable. */);
- nonascii_insert_offset = 0;
-
- DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
- doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
-This is used for converting unibyte text to multibyte,
-and for inserting character codes specified by number.
-
-Conversion is performed only when multibyte characters are enabled,
-and it serves to convert a Latin-1 or similar 8-bit character code
-to the corresponding Emacs character code.
-
-If this is nil, `nonascii-insert-offset' is used instead.
-See also the docstring of `make-translation-table'. */);
- Vnonascii_translation_table = Qnil;
-
- DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
- doc: /* A char-table for characters which invoke auto-filling.
-Such characters have value t in this table. */);
- Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
- CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
- CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
+ doc: /* List of all charsets ever defined. */);
+ Vcharset_list = Qnil;
+
+ charset_ascii
+ = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
+ 0, 127, 'B', -1, 0, 1, 0, 0);
+ charset_iso_8859_1
+ = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
+ 0, 255, -1, -1, -1, 1, 0, 0);
+ charset_unicode
+ = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
+ 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
+ charset_eight_bit
+ = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
+ 128, 255, -1, 0, -1, 0, 0,
+ MAX_5_BYTE_CHAR + 1);
}
#endif /* emacs */
diff --git a/src/charset.h b/src/charset.h
index fd8905e47d8..1ecbb49027b 100644
--- a/src/charset.h
+++ b/src/charset.h
@@ -1,7 +1,10 @@
-/* Header for multibyte character handler.
+/* Header for charset handler.
Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+ Licensed to the Free Software Foundation.
Copyright (C) 2001 Free Software Foundation, Inc.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -23,850 +26,510 @@ Boston, MA 02111-1307, USA. */
#ifndef EMACS_CHARSET_H
#define EMACS_CHARSET_H
-/* #define BYTE_COMBINING_DEBUG */
-
-/*** GENERAL NOTE on CHARACTER SET (CHARSET) ***
-
- A character set ("charset" hereafter) is a meaningful collection
- (i.e. language, culture, functionality, etc) of characters. Emacs
- handles multiple charsets at once. Each charset corresponds to one
- of the ISO charsets. Emacs identifies a charset by a unique
- identification number, whereas ISO identifies a charset by a triplet
- of DIMENSION, CHARS and FINAL-CHAR. So, hereafter, just saying
- "charset" means an identification number (integer value).
-
- The value range of charsets is 0x00, 0x81..0xFE. There are four
- kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or
- 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94
- characters.
-
- Within Emacs Lisp, a charset is treated as a symbol which has a
- property `charset'. The property value is a vector containing
- various information about the charset. For readability of C code,
- we use the following convention for C variable names:
- charset_symbol: Emacs Lisp symbol of a charset
- charset_id: Emacs Lisp integer of an identification number of a charset
- charset: C integer of an identification number of a charset
-
- Each charset (except for ascii) is assigned a base leading-code
- (range 0x80..0x9E). In addition, a charset of greater than 0xA0
- (whose base leading-code is 0x9A..0x9D) is assigned an extended
- leading-code (range 0xA0..0xFE). In this case, each base
- leading-code specifies the allowable range of extended leading-code
- as shown in the table below. A leading-code is used to represent a
- character in Emacs' buffer and string.
-
- We call a charset which has extended leading-code a "private
- charset" because those are mainly for a charset which is not yet
- registered by ISO. On the contrary, we call a charset which does
- not have extended leading-code an "official charset".
-
- ---------------------------------------------------------------------------
- charset dimension base leading-code extended leading-code
- ---------------------------------------------------------------------------
- 0x00 official dim1 -- none -- -- none --
- (ASCII)
- 0x01..0x7F --never used--
- 0x80 official dim1 -- none -- -- none --
- (eight-bit-graphic)
- 0x81..0x8F official dim1 same as charset -- none --
- 0x90..0x99 official dim2 same as charset -- none --
- 0x9A..0x9D --never used--
- 0x9E official dim1 same as charset -- none --
- (eight-bit-control)
- 0x9F --never used--
- 0xA0..0xDF private dim1 0x9A same as charset
- of 1-column width
- 0xE0..0xEF private dim1 0x9B same as charset
- of 2-column width
- 0xF0..0xF4 private dim2 0x9C same as charset
- of 1-column width
- 0xF5..0xFE private dim2 0x9D same as charset
- of 2-column width
- 0xFF --never used--
- ---------------------------------------------------------------------------
-
-*/
-
-/* Definition of special leading-codes. */
-/* Leading-code followed by extended leading-code. */
-#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
-#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
-#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
-#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
-
-#define LEADING_CODE_8_BIT_CONTROL 0x9E /* for `eight-bit-control' */
-
-/* Extended leading-code. */
-/* Start of each extended leading-codes. */
-#define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */
-#define LEADING_CODE_EXT_12 0xE0 /* follows LEADING_CODE_PRIVATE_12 */
-#define LEADING_CODE_EXT_21 0xF0 /* follows LEADING_CODE_PRIVATE_21 */
-#define LEADING_CODE_EXT_22 0xF5 /* follows LEADING_CODE_PRIVATE_22 */
-/* Maximum value of extended leading-codes. */
-#define LEADING_CODE_EXT_MAX 0xFE
-
-/* Definition of minimum/maximum charset of each DIMENSION. */
-#define MIN_CHARSET_OFFICIAL_DIMENSION1 0x80
-#define MAX_CHARSET_OFFICIAL_DIMENSION1 0x8F
-#define MIN_CHARSET_OFFICIAL_DIMENSION2 0x90
-#define MAX_CHARSET_OFFICIAL_DIMENSION2 0x99
-#define MIN_CHARSET_PRIVATE_DIMENSION1 LEADING_CODE_EXT_11
-#define MIN_CHARSET_PRIVATE_DIMENSION2 LEADING_CODE_EXT_21
-
-/* Maximum value of overall charset identification number. */
-#define MAX_CHARSET 0xFE
-
-/* Definition of special charsets. */
-#define CHARSET_ASCII 0 /* 0x00..0x7F */
-#define CHARSET_8_BIT_CONTROL 0x9E /* 0x80..0x9F */
-#define CHARSET_8_BIT_GRAPHIC 0x80 /* 0xA0..0xFF */
-
-extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
-extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
-extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
-extern int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
-extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
-extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
-extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
-
-/* Check if CH is an ASCII character or a base leading-code.
- Nowadays, any byte can be the first byte of a character in a
- multibyte buffer/string. So this macro name is not appropriate. */
-#define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0)
-
-/*** GENERAL NOTE on CHARACTER REPRESENTATION ***
-
- Firstly, the term "character" or "char" is used for a multilingual
- character (of course, including ASCII characters), not for a byte in
- computer memory. We use the term "code" or "byte" for the latter
- case.
-
- A character is identified by charset and one or two POSITION-CODEs.
- POSITION-CODE is the position of the character in the charset. A
- character of DIMENSION1 charset has one POSITION-CODE: POSITION-CODE-1.
- A character of DIMENSION2 charset has two POSITION-CODE:
- POSITION-CODE-1 and POSITION-CODE-2. The code range of
- POSITION-CODE is 0x20..0x7F.
-
- Emacs has two kinds of representation of a character: multi-byte
- form (for buffers and strings) and single-word form (for character
- objects in Emacs Lisp). The latter is called "character code"
- hereafter. Both representations encode the information of charset
- and POSITION-CODE but in a different way (for instance, the MSB of
- POSITION-CODE is set in multi-byte form).
-
- For details of the multi-byte form, see the section "2. Emacs
- internal format handlers" of `coding.c'.
-
- Emacs uses 19 bits for a character code. The bits are divided into
- 3 fields: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits).
-
- A character code of DIMENSION1 character uses FIELD2 to hold charset
- and FIELD3 to hold POSITION-CODE-1. A character code of DIMENSION2
- character uses FIELD1 to hold charset, FIELD2 and FIELD3 to hold
- POSITION-CODE-1 and POSITION-CODE-2 respectively.
-
- More precisely...
-
- FIELD2 of DIMENSION1 character (except for ascii, eight-bit-control,
- and eight-bit-graphic) is "charset - 0x70". This is to make all
- character codes except for ASCII and 8-bit codes greater than 256.
- So, the range of FIELD2 of DIMENSION1 character is 0, 1, or
- 0x11..0x7F.
-
- FIELD1 of DIMENSION2 character is "charset - 0x8F" for official
- charset and "charset - 0xE0" for private charset. So, the range of
- FIELD1 of DIMENSION2 character is 0x01..0x1E.
-
- -----------------------------------------------------------------------------
- charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit)
- -----------------------------------------------------------------------------
- ascii 0 0 0x00..0x7F
- eight-bit-control 0 1 0x00..0x1F
- eight-bit-graphic 0 1 0x20..0x7F
- DIMENSION1 0 charset - 0x70 POSITION-CODE-1
- DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2
- DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2
- -----------------------------------------------------------------------------
- "(o)": official, "(p)": private
- -----------------------------------------------------------------------------
-*/
-
-/* Masks of each field of character code. */
-#define CHAR_FIELD1_MASK (0x1F << 14)
-#define CHAR_FIELD2_MASK (0x7F << 7)
-#define CHAR_FIELD3_MASK 0x7F
-
-/* Macros to access each field of character C. */
-#define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14)
-#define CHAR_FIELD2(c) (((c) & CHAR_FIELD2_MASK) >> 7)
-#define CHAR_FIELD3(c) ((c) & CHAR_FIELD3_MASK)
-
-/* Minimum character code of character of each DIMENSION. */
-#define MIN_CHAR_OFFICIAL_DIMENSION1 \
- ((0x81 - 0x70) << 7)
-#define MIN_CHAR_PRIVATE_DIMENSION1 \
- ((MIN_CHARSET_PRIVATE_DIMENSION1 - 0x70) << 7)
-#define MIN_CHAR_OFFICIAL_DIMENSION2 \
- ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14)
-#define MIN_CHAR_PRIVATE_DIMENSION2 \
- ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
-/* Maximum character code currently used plus 1. */
-#define MAX_CHAR (0x1F << 14)
-
-/* 1 if C is a single byte character, else 0. */
-#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
-
-/* 1 if BYTE is an ASCII character in itself, in multibyte mode. */
-#define ASCII_BYTE_P(byte) ((byte) < 0x80)
-
-/* A char-table containing information on each character set.
-
- Unlike ordinary char-tables, this doesn't contain any nested tables.
- Only the top level elements are used. Each element is a vector of
- the following information:
- CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
- LEADING-CODE-BASE, LEADING-CODE-EXT,
- ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
- REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
- PLIST.
-
- CHARSET-ID (integer) is the identification number of the charset.
-
- BYTES (integer) is the length of the multi-byte form of a character
- in the charset: one of 1, 2, 3, and 4.
-
- DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
-
- CHARS (integer) is the number of characters in a dimension: 94 or 96.
-
- WIDTH (integer) is the number of columns a character in the charset
- occupies on the screen: one of 0, 1, and 2..
-
- DIRECTION (integer) is the rendering direction of characters in the
- charset when rendering. If 0, render from left to right, else
- render from right to left.
-
- LEADING-CODE-BASE (integer) is the base leading-code for the
- charset.
-
- LEADING-CODE-EXT (integer) is the extended leading-code for the
- charset. All charsets of less than 0xA0 have the value 0.
-
- ISO-FINAL-CHAR (character) is the final character of the
- corresponding ISO 2022 charset. It is -1 for such a character
- that is used only internally (e.g. `eight-bit-control').
-
- ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
- while encoding to variants of ISO 2022 coding system, one of the
- following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). It
- is -1 for such a character that is used only internally
- (e.g. `eight-bit-control').
-
- REVERSE-CHARSET (integer) is the charset which differs only in
- LEFT-TO-RIGHT value from the charset. If there's no such a
- charset, the value is -1.
-
- SHORT-NAME (string) is the short name to refer to the charset.
-
- LONG-NAME (string) is the long name to refer to the charset.
-
- DESCRIPTION (string) is the description string of the charset.
-
- PLIST (property list) may contain any type of information a user
- wants to put and get by functions `put-charset-property' and
- `get-charset-property' respectively. */
-extern Lisp_Object Vcharset_table;
-
-/* Macros to access various information of CHARSET in Vcharset_table.
- We provide these macros for efficiency. No range check of CHARSET. */
-
-/* Return entry of CHARSET (C integer) in Vcharset_table. */
-#define CHARSET_TABLE_ENTRY(charset) \
- XCHAR_TABLE (Vcharset_table)->contents[((charset) == CHARSET_ASCII \
- ? 0 : (charset) + 128)]
-
-/* Return information INFO-IDX of CHARSET. */
-#define CHARSET_TABLE_INFO(charset, info_idx) \
- XVECTOR (CHARSET_TABLE_ENTRY (charset))->contents[info_idx]
-
-#define CHARSET_ID_IDX (0)
-#define CHARSET_BYTES_IDX (1)
-#define CHARSET_DIMENSION_IDX (2)
-#define CHARSET_CHARS_IDX (3)
-#define CHARSET_WIDTH_IDX (4)
-#define CHARSET_DIRECTION_IDX (5)
-#define CHARSET_LEADING_CODE_BASE_IDX (6)
-#define CHARSET_LEADING_CODE_EXT_IDX (7)
-#define CHARSET_ISO_FINAL_CHAR_IDX (8)
-#define CHARSET_ISO_GRAPHIC_PLANE_IDX (9)
-#define CHARSET_REVERSE_CHARSET_IDX (10)
-#define CHARSET_SHORT_NAME_IDX (11)
-#define CHARSET_LONG_NAME_IDX (12)
-#define CHARSET_DESCRIPTION_IDX (13)
-#define CHARSET_PLIST_IDX (14)
-/* Size of a vector of each entry of Vcharset_table. */
-#define CHARSET_MAX_IDX (15)
-
-/* And several more macros to be used frequently. */
-#define CHARSET_BYTES(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX))
-#define CHARSET_DIMENSION(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX))
-#define CHARSET_CHARS(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX))
-#define CHARSET_WIDTH(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX))
-#define CHARSET_DIRECTION(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX))
-#define CHARSET_LEADING_CODE_BASE(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX))
-#define CHARSET_LEADING_CODE_EXT(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX))
-#define CHARSET_ISO_FINAL_CHAR(charset) \
- XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX))
-#define CHARSET_ISO_GRAPHIC_PLANE(charset) \
- XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX))
-#define CHARSET_REVERSE_CHARSET(charset) \
- XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX))
-
-/* Macros to specify direction of a charset. */
-#define CHARSET_DIRECTION_LEFT_TO_RIGHT 0
-#define CHARSET_DIRECTION_RIGHT_TO_LEFT 1
-
-/* A vector of charset symbol indexed by charset-id. This is used
- only for returning charset symbol from C functions. */
-extern Lisp_Object Vcharset_symbol_table;
-
-/* Return symbol of CHARSET. */
-#define CHARSET_SYMBOL(charset) \
- XVECTOR (Vcharset_symbol_table)->contents[charset]
-
-/* 1 if CHARSET is in valid value range, else 0. */
-#define CHARSET_VALID_P(charset) \
- ((charset) == 0 \
- || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
- || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 \
- && (charset) <= MAX_CHARSET) \
- || ((charset) == CHARSET_8_BIT_CONTROL) \
- || ((charset) == CHARSET_8_BIT_GRAPHIC))
-
-/* 1 if CHARSET is already defined, else 0. */
-#define CHARSET_DEFINED_P(charset) \
- (((charset) >= 0) && ((charset) <= MAX_CHARSET) \
- && !NILP (CHARSET_TABLE_ENTRY (charset)))
-
-/* Since the information CHARSET-BYTES and CHARSET-WIDTH of
- Vcharset_table can be retrieved only by the first byte of
- multi-byte form (an ASCII code or a base leading-code), we provide
- here tables to be used by macros BYTES_BY_CHAR_HEAD and
- WIDTH_BY_CHAR_HEAD for faster information retrieval. */
-extern int bytes_by_char_head[256];
-extern int width_by_char_head[256];
-
-#define BYTES_BY_CHAR_HEAD(char_head) \
- (ASCII_BYTE_P (char_head) ? 1 : bytes_by_char_head[char_head])
-#define WIDTH_BY_CHAR_HEAD(char_head) \
- (ASCII_BYTE_P (char_head) ? 1 : width_by_char_head[char_head])
-
-/* Charset of the character C. */
-#define CHAR_CHARSET(c) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? (ASCII_BYTE_P (c) \
- ? CHARSET_ASCII \
- : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC) \
- : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
- ? CHAR_FIELD2 (c) + 0x70 \
- : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \
- ? CHAR_FIELD1 (c) + 0x8F \
- : CHAR_FIELD1 (c) + 0xE0)))
+/* Index to arguments of Fdefine_charset_internal. */
+
+enum define_charset_arg_index
+ {
+ charset_arg_name,
+ charset_arg_dimension,
+ charset_arg_code_space,
+ charset_arg_min_code,
+ charset_arg_max_code,
+ charset_arg_iso_final,
+ charset_arg_iso_revision,
+ charset_arg_emacs_mule_id,
+ charset_arg_ascii_compatible_p,
+ charset_arg_supplementary_p,
+ charset_arg_invalid_code,
+ charset_arg_code_offset,
+ charset_arg_map,
+ charset_arg_subset,
+ charset_arg_superset,
+ charset_arg_unify_map,
+ charset_arg_plist,
+ charset_arg_max
+ };
+
+
+/* Indices to charset attributes vector. */
+
+enum charset_attr_index
+ {
+ /* ID number of the charset. */
+ charset_id,
+
+ /* Name of the charset (symbol). */
+ charset_name,
+
+ /* Property list of the charset. */
+ charset_plist,
+
+ /* If the method of the charset is `MAP_DEFERRED', the value is a
+ mapping vector or a file name that contains mapping vector.
+ Otherwise, nil. */
+ charset_map,
+
+ /* If the method of the charset is `MAP', the value is a vector
+ that maps code points of the charset to characters. The vector
+ is indexed by a character index. A character index is
+ calculated from a code point and the code-space table of the
+ charset. */
+ charset_decoder,
+
+ /* If the method of the charset is `MAP', the value is a
+ char-table that maps characters of the charset to code
+ points. */
+ charset_encoder,
+
+ /* If the method of the charset is `SUBSET', the value is a vector
+ that has this form:
+
+ [ CHARSET-ID MIN-CODE MAX-CODE OFFSET ]
+
+ CHARSET-ID is an ID number of a parent charset. MIN-CODE and
+ MAX-CODE specify the range of characters inherited from the
+ parent. OFFSET is an integer value to add to a code point of
+ the parent charset to get the corresponding code point of this
+ charset. */
+ charset_subset,
+
+ /* If the method of the charset is `SUPERSET', the value is a list
+ whose elements have this form:
+
+ (CHARSET-ID . OFFSET)
+
+ CHARSET-IDs are ID numbers of parent charsets. OFFSET is an
+ integer value to add to a code point of the parent charset to
+ get the corresponding code point of this charset. */
+ charset_superset,
+
+ /* The value is a mapping vector or a file name that contains the
+ mapping. This defines how characters in the charset should be
+ unified with Unicode. The value of the member
+ `charset_deunifier' is created from this information. */
+ charset_unify_map,
+
+ /* If characters in the charset must be unified Unicode, the value
+ is a char table that maps a character code in the charset to
+ the corresponding Unicode character. */
+ charset_deunifier,
+
+ /* The length of the charset attribute vector. */
+ charset_attr_max
+ };
+
+/* Methods for converting code points and characters of charsets. */
+
+enum charset_method
+ {
+ /* For a charset of this method, a character code is calculated
+ from a character index (which is calculated from a code point)
+ simply by adding an offset value. */
+ CHARSET_METHOD_OFFSET,
+
+ /* For a charset of this method, a decoder vector and an encoder
+ char-table is used for code point <-> character code
+ conversion. */
+ CHARSET_METHOD_MAP,
+
+ /* Same as above but decoder and encoder are loaded from a file on
+ demand. Once loaded, the method is changed to
+ CHARSET_METHOD_MAP. */
+ CHARSET_METHOD_MAP_DEFERRED,
+
+ /* A charset of this method is a subset of another charset. */
+ CHARSET_METHOD_SUBSET,
+
+ /* A charset of this method is a superset of other charsets. */
+ CHARSET_METHOD_SUPERSET
+ };
+
+struct charset
+{
+ /* Index to charset_table. */
+ int id;
-/* Check if two characters C1 and C2 belong to the same charset. */
-#define SAME_CHARSET_P(c1, c2) \
- (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \
- ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \
- : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK))
-
-/* Return a character of which charset is CHARSET and position-codes
- are C1 and C2. DIMENSION1 character ignores C2. */
-#define MAKE_CHAR(charset, c1, c2) \
- ((charset) == CHARSET_ASCII \
- ? (c1) & 0x7F \
- : (((charset) == CHARSET_8_BIT_CONTROL \
- || (charset) == CHARSET_8_BIT_GRAPHIC) \
- ? ((c1) & 0x7F) | 0x80 \
- : ((CHARSET_DEFINED_P (charset) \
- ? CHARSET_DIMENSION (charset) == 1 \
- : (charset) < MIN_CHARSET_PRIVATE_DIMENSION2) \
- ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : ((c1) & 0x7F)) \
- : ((((charset) \
- - ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \
- << 14) \
- | ((c2) <= 0 ? 0 : ((c2) & 0x7F)) \
- | ((c1) <= 0 ? 0 : (((c1) & 0x7F) << 7))))))
-
-
-/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
- generic character. If GENERICP is zero, return nonzero iff C is a
- valid normal character. */
-#define CHAR_VALID_P(c, genericp) \
- ((c) >= 0 \
- && (SINGLE_BYTE_CHAR_P (c) || char_valid_p (c, genericp)))
-
-/* This default value is used when nonascii-translation-table or
- nonascii-insert-offset fail to convert unibyte character to a valid
- multibyte character. This makes a Latin-1 character. */
-
-#define DEFAULT_NONASCII_INSERT_OFFSET 0x800
-
-/* Parse multibyte string STR of length LENGTH and set BYTES to the
- byte length of a character at STR. */
-
-#ifdef BYTE_COMBINING_DEBUG
-
-#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
- do { \
- int i = 1; \
- while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \
- (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \
- if ((bytes) > i) \
- abort (); \
- } while (0)
+ /* Index to Vcharset_hash_table. */
+ int hash_index;
+
+ /* Dimension of the charset: 1, 2, 3, or 4. */
+ int dimension;
+
+ /* Byte code range of each dimension. <code_space>[4N] is a mininum
+ byte code of the (N+1)th dimension, <code_space>[4N+1] is a
+ maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is
+ (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3]
+ is a number of characters containd in the first to (N+1)th
+ dismesions. We get `char-index' of a `code-point' from this
+ information. */
+ int code_space[16];
-#else /* not BYTE_COMBINING_DEBUG */
-
-#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
- ((void)(length), (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]))
-
-#endif /* not BYTE_COMBINING_DEBUG */
-
-#define VALID_LEADING_CODE_P(code) \
- (! NILP (CHARSET_TABLE_ENTRY (code)))
-
-/* Return 1 iff the byte sequence at unibyte string STR (LENGTH bytes)
- is valid as a multibyte form. If valid, by a side effect, BYTES is
- set to the byte length of the multibyte form. */
-
-#define UNIBYTE_STR_AS_MULTIBYTE_P(str, length, bytes) \
- (((str)[0] < 0x80 || (str)[0] >= 0xA0) \
- ? ((bytes) = 1) \
- : (((bytes) = BYTES_BY_CHAR_HEAD ((str)[0])), \
- ((bytes) <= (length) \
- && !CHAR_HEAD_P ((str)[1]) \
- && ((bytes) == 2 \
- ? (str)[0] != LEADING_CODE_8_BIT_CONTROL \
- : (!CHAR_HEAD_P ((str)[2]) \
- && ((bytes) == 3 \
- ? (((str)[0] != LEADING_CODE_PRIVATE_11 \
- && (str)[0] != LEADING_CODE_PRIVATE_12) \
- || VALID_LEADING_CODE_P (str[1])) \
- : (!CHAR_HEAD_P ((str)[3]) \
- && VALID_LEADING_CODE_P (str[1]))))))))
-
-
-/* Return 1 iff the byte sequence at multibyte string STR is valid as
- a unibyte form. By a side effect, BYTES is set to the byte length
- of one character at STR. */
-
-#define MULTIBYTE_STR_AS_UNIBYTE_P(str, bytes) \
- ((bytes) = BYTES_BY_CHAR_HEAD ((str)[0]), \
- (str)[0] != LEADING_CODE_8_BIT_CONTROL)
-
-/* The charset of character C is stored in CHARSET, and the
- position-codes of C are stored in C1 and C2.
- We store -1 in C2 if the dimension of the charset is 1. */
-
-#define SPLIT_CHAR(c, charset, c1, c2) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? ((charset \
- = (ASCII_BYTE_P (c) \
- ? CHARSET_ASCII \
- : ((c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC))), \
- c1 = (c), c2 = -1) \
- : ((c) & CHAR_FIELD1_MASK \
- ? (charset = (CHAR_FIELD1 (c) \
- + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \
- c1 = CHAR_FIELD2 (c), \
- c2 = CHAR_FIELD3 (c)) \
- : (charset = CHAR_FIELD2 (c) + 0x70, \
- c1 = CHAR_FIELD3 (c), \
- c2 = -1)))
-
-/* Return 1 iff character C has valid printable glyph. */
-#define CHAR_PRINTABLE_P(c) (ASCII_BYTE_P (c) || char_printable_p (c))
-
-/* The charset of the character at STR is stored in CHARSET, and the
- position-codes are stored in C1 and C2.
- We store -1 in C2 if the character is just 2 bytes. */
-
-#define SPLIT_STRING(str, len, charset, c1, c2) \
- ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \
- || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \
- || split_string (str, len, &charset, &c1, &c2) < 0) \
- ? c1 = *(str), charset = CHARSET_ASCII \
- : charset)
-
-/* Mapping table from ISO2022's charset (specified by DIMENSION,
- CHARS, and FINAL_CHAR) to Emacs' charset. Should be accessed by
- macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */
-extern int iso_charset_table[2][2][128];
-
-#define ISO_CHARSET_TABLE(dimension, chars, final_char) \
- iso_charset_table[XINT (dimension) - 1][XINT (chars) > 94][XINT (final_char)]
-
-#define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1)
-
-/* Return how many bytes C will occupy in a multibyte buffer. */
-#define CHAR_BYTES(c) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? ((ASCII_BYTE_P (c) || (c) >= 0xA0) ? 1 : 2) \
- : char_bytes (c))
-
-/* The following two macros CHAR_STRING and STRING_CHAR are the main
- entry points to convert between Emacs's two types of character
- representations: multi-byte form and single-word form (character
- code). */
-
-/* Store multi-byte form of the character C in STR. The caller should
- allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in
- advance. Returns the length of the multi-byte form. If C is an
- invalid character code, signal an error. */
-
-#define CHAR_STRING(c, str) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? ((ASCII_BYTE_P (c) || c >= 0xA0) \
- ? (*(str) = (unsigned char)(c), 1) \
- : (*(str) = LEADING_CODE_8_BIT_CONTROL, *((str)+ 1) = c + 0x20, 2)) \
- : char_to_string (c, (unsigned char *) str))
-
-/* Like CHAR_STRING but don't signal an error if C is invalid.
- Value is -1 in this case. */
-
-#define CHAR_STRING_NO_SIGNAL(c, str) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? ((ASCII_BYTE_P (c) || c >= 0xA0) \
- ? (*(str) = (unsigned char)(c), 1) \
- : (*(str) = LEADING_CODE_8_BIT_CONTROL, *((str)+ 1) = c + 0x20, 2)) \
- : char_to_string_1 (c, (unsigned char *) str))
-
-/* Return a character code of the character of which multi-byte form
- is at STR and the length is LEN. If STR doesn't contain valid
- multi-byte form, only the first byte in STR is returned. */
-
-#define STRING_CHAR(str, len) \
- (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
- ? (unsigned char) *(str) \
- : string_to_char (str, len, 0))
-
-/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the
- length of the multi-byte form. Just to know the length, use
- MULTIBYTE_FORM_LENGTH. */
-
-#define STRING_CHAR_AND_LENGTH(str, len, actual_len) \
- (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
- ? ((actual_len) = 1), (unsigned char) *(str) \
- : string_to_char (str, len, &(actual_len)))
-
-/* Fetch the "next" character from Lisp string STRING at byte position
- BYTEIDX, character position CHARIDX. Store it into OUTPUT.
-
- All the args must be side-effect-free.
- BYTEIDX and CHARIDX must be lvalues;
- we increment them past the character fetched. */
-
-#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
-if (1) \
- { \
- CHARIDX++; \
- if (STRING_MULTIBYTE (STRING)) \
- { \
- const unsigned char *ptr = SDATA (STRING) + BYTEIDX; \
- int space_left = SBYTES (STRING) - BYTEIDX; \
- int actual_len; \
- \
- OUTPUT = STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \
- BYTEIDX += actual_len; \
- } \
- else \
- OUTPUT = SREF (STRING, BYTEIDX++); \
- } \
-else
-
-/* Like FETCH_STRING_CHAR_ADVANCE but assume STRING is multibyte. */
-
-#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
-if (1) \
- { \
- const unsigned char *fetch_string_char_ptr = SDATA (STRING) + BYTEIDX; \
- int fetch_string_char_space_left = SBYTES (STRING) - BYTEIDX; \
- int actual_len; \
- \
- OUTPUT \
- = STRING_CHAR_AND_LENGTH (fetch_string_char_ptr, \
- fetch_string_char_space_left, actual_len); \
- \
- BYTEIDX += actual_len; \
- CHARIDX++; \
- } \
-else
-
-/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
- buffer. */
-
-#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
-if (1) \
- { \
- CHARIDX++; \
- if (!NILP (current_buffer->enable_multibyte_characters)) \
- { \
- unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
- int space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) - BYTEIDX); \
- int actual_len; \
- \
- OUTPUT= STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \
- BYTEIDX += actual_len; \
- } \
- else \
- { \
- OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
- BYTEIDX++; \
- } \
- } \
-else
-
-/* Return the length of the multi-byte form at string STR of length LEN. */
-
-#define MULTIBYTE_FORM_LENGTH(str, len) \
- (BYTES_BY_CHAR_HEAD (*(unsigned char *)(str)) == 1 \
- ? 1 \
- : multibyte_form_length (str, len))
-
-/* If P is before LIMIT, advance P to the next character boundary. It
- assumes that P is already at a character boundary of the sane
- mulitbyte form whose end address is LIMIT. */
-
-#define NEXT_CHAR_BOUNDARY(p, limit) \
- do { \
- if ((p) < (limit)) \
- (p) += BYTES_BY_CHAR_HEAD (*(p)); \
- } while (0)
+ /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit
+ of code_space_mask[B] is set. This array is used to quickly
+ check if a code-point is in a valid range. */
+ unsigned char *code_space_mask;
+ /* 1 if there's no gap in code-points. */
+ int code_linear_p;
-/* If P is after LIMIT, advance P to the previous character boundary.
- It assumes that P is already at a character boundary of the sane
- mulitbyte form whose beginning address is LIMIT. */
+ /* If the charset is treated as 94-chars in ISO-2022, the value is 0.
+ If the charset is treated as 96-chars in ISO-2022, the value is 1. */
+ int iso_chars_96;
-#define PREV_CHAR_BOUNDARY(p, limit) \
- do { \
- if ((p) > (limit)) \
- { \
- const unsigned char *p0 = (p); \
- do { \
- p0--; \
- } while (p0 >= limit && ! CHAR_HEAD_P (*p0)); \
- (p) = (BYTES_BY_CHAR_HEAD (*p0) == (p) - p0) ? p0 : (p) - 1; \
- } \
- } while (0)
+ /* ISO final byte of the charset: 48..127. It may be -1 if the
+ charset doesn't conform to ISO-2022. */
+ int iso_final;
+ /* ISO revision number of the charset. */
+ int iso_revision;
-#ifdef emacs
+ /* If the charset is identical to what supported by Emacs 21 and the
+ priors, the identification number of the charset used in those
+ version. Otherwise, -1. */
+ int emacs_mule_id;
+
+ /* Nonzero iff the charset is compatible with ASCII. */
+ int ascii_compatible_p;
-/* Increase the buffer byte position POS_BYTE of the current buffer to
- the next character boundary. This macro relies on the fact that
- *GPT_ADDR and *Z_ADDR are always accessible and the values are
- '\0'. No range checking of POS. */
+ /* Nonzero iff the charset is supplementary. */
+ int supplementary_p;
+
+ /* Nonzero iff all the code points are representable by Lisp_Int. */
+ int compact_codes_p;
+
+ /* The method for encoding/decoding characters of the charset. */
+ enum charset_method method;
+
+ /* Mininum and Maximum code points of the charset. */
+ unsigned min_code, max_code;
+
+ /* Offset value used by macros CODE_POINT_TO_INDEX and
+ INDEX_TO_CODE_POINT. . */
+ unsigned char_index_offset;
+
+ /* Mininum and Maximum character codes of the charset. If the
+ charset is compatible with ASCII, min_char is a minimum non-ASCII
+ character of the charset. If the method of charset is
+ CHARSET_METHOD_OFFSET, even if the charset is unified, min_char
+ and max_char doesn't change. */
+ int min_char, max_char;
+
+ /* The code returned by ENCODE_CHAR if a character is not encodable
+ by the charset. */
+ unsigned invalid_code;
+
+ /* If the method of the charset is CHARSET_METHOD_MAP, this is a
+ table of bits used to quickly and roughly guess if a character
+ belongs to the charset.
+
+ The first 64 elements are 512 bits for characters less than
+ 0x10000. Each bit corresponds to 128-character block. The last
+ 126 elements are 1008 bits for the greater characters
+ (0x10000..0x3FFFFF). Each bit corresponds to 4096-character
+ block.
+
+ If a bit is 1, at least one character in the corresponding block is
+ in this charset. */
+ unsigned char fast_map[190];
+
+ /* Offset value to calculate a character code from code-point, and
+ visa versa. */
+ int code_offset;
+
+ int unified_p;
+};
+
+/* Hash table of charset symbols vs. the correponding attribute
+ vectors. */
+extern Lisp_Object Vcharset_hash_table;
+
+/* Table of struct charset. */
+extern struct charset *charset_table;
+extern int charset_table_used;
+
+#define CHARSET_FROM_ID(id) (charset_table + (id))
+
+extern Lisp_Object Vcharset_ordered_list;
+
+/* Incremented everytime we change the priority of charsets. */
+extern unsigned short charset_ordered_list_tick;
+
+extern Lisp_Object Vcharset_list;
+extern Lisp_Object Viso_2022_charset_list;
+extern Lisp_Object Vemacs_mule_charset_list;
+
+extern struct charset *emacs_mule_charset[256];
+
+
+/* Macros to access information about charset. */
+
+/* Return the attribute vector of charset whose symbol is SYMBOL. */
+#define CHARSET_SYMBOL_ATTRIBUTES(symbol) \
+ Fgethash ((symbol), Vcharset_hash_table, Qnil)
+
+#define CHARSET_ATTR_ID(attrs) AREF ((attrs), charset_id)
+#define CHARSET_ATTR_NAME(attrs) AREF ((attrs), charset_name)
+#define CHARSET_ATTR_PLIST(attrs) AREF ((attrs), charset_plist)
+#define CHARSET_ATTR_MAP(attrs) AREF ((attrs), charset_map)
+#define CHARSET_ATTR_DECODER(attrs) AREF ((attrs), charset_decoder)
+#define CHARSET_ATTR_ENCODER(attrs) AREF ((attrs), charset_encoder)
+#define CHARSET_ATTR_SUBSET(attrs) AREF ((attrs), charset_subset)
+#define CHARSET_ATTR_SUPERSET(attrs) AREF ((attrs), charset_superset)
+#define CHARSET_ATTR_UNIFY_MAP(attrs) AREF ((attrs), charset_unify_map)
+#define CHARSET_ATTR_DEUNIFIER(attrs) AREF ((attrs), charset_deunifier)
+
+#define CHARSET_SYMBOL_ID(symbol) \
+ CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol))
+
+/* Return an index to Vcharset_hash_table of the charset whose symbol
+ is SYMBOL. */
+#define CHARSET_SYMBOL_HASH_INDEX(symbol) \
+ hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL)
+
+/* Return the attribute vector of CHARSET. */
+#define CHARSET_ATTRIBUTES(charset) \
+ (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index))
+
+#define CHARSET_ID(charset) ((charset)->id)
+#define CHARSET_HASH_INDEX(charset) ((charset)->hash_index)
+#define CHARSET_DIMENSION(charset) ((charset)->dimension)
+#define CHARSET_CODE_SPACE(charset) ((charset)->code_space)
+#define CHARSET_CODE_LINEAR_P(charset) ((charset)->code_linear_p)
+#define CHARSET_ISO_CHARS_96(charset) ((charset)->iso_chars_96)
+#define CHARSET_ISO_FINAL(charset) ((charset)->iso_final)
+#define CHARSET_ISO_PLANE(charset) ((charset)->iso_plane)
+#define CHARSET_ISO_REVISION(charset) ((charset)->iso_revision)
+#define CHARSET_EMACS_MULE_ID(charset) ((charset)->emacs_mule_id)
+#define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p)
+#define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p)
+#define CHARSET_METHOD(charset) ((charset)->method)
+#define CHARSET_MIN_CODE(charset) ((charset)->min_code)
+#define CHARSET_MAX_CODE(charset) ((charset)->max_code)
+#define CHARSET_INVALID_CODE(charset) ((charset)->invalid_code)
+#define CHARSET_MIN_CHAR(charset) ((charset)->min_char)
+#define CHARSET_MAX_CHAR(charset) ((charset)->max_char)
+#define CHARSET_CODE_OFFSET(charset) ((charset)->code_offset)
+#define CHARSET_UNIFIED_P(charset) ((charset)->unified_p)
+
+#define CHARSET_NAME(charset) \
+ (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_MAP(charset) \
+ (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_DECODER(charset) \
+ (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_ENCODER(charset) \
+ (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_SUBSET(charset) \
+ (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_SUPERSET(charset) \
+ (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_UNIFY_MAP(charset) \
+ (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_DEUNIFIER(charset) \
+ (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset)))
+
+
+/* Nonzero iff OBJ is a valid charset symbol. */
+#define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0)
+
+/* Check if X is a valid charset symbol. If not, signal an error. */
+#define CHECK_CHARSET(x) \
+ do { \
+ if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0) \
+ x = wrong_type_argument (Qcharsetp, (x)); \
+ } while (0)
-#ifdef BYTE_COMBINING_DEBUG
-#define INC_POS(pos_byte) \
- do { \
- unsigned char *p = BYTE_POS_ADDR (pos_byte); \
- if (BASE_LEADING_CODE_P (*p)) \
- { \
- int len, bytes; \
- len = Z_BYTE - pos_byte; \
- PARSE_MULTIBYTE_SEQ (p, len, bytes); \
- pos_byte += bytes; \
- } \
- else \
- pos_byte++; \
+/* Check if X is a valid charset symbol. If valid, set ID to the id
+ number of the charset. Otherwise, signal an error. */
+#define CHECK_CHARSET_GET_ID(x, id) \
+ do { \
+ int idx; \
+ \
+ if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0) \
+ x = wrong_type_argument (Qcharsetp, (x)); \
+ id = XINT (AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \
+ charset_id)); \
} while (0)
-#else /* not BYTE_COMBINING_DEBUG */
-#define INC_POS(pos_byte) \
- do { \
- unsigned char *p = BYTE_POS_ADDR (pos_byte); \
- pos_byte += BYTES_BY_CHAR_HEAD (*p); \
+/* Check if X is a valid charset symbol. If valid, set ATTR to the
+ attr vector of the charset. Otherwise, signal an error. */
+#define CHECK_CHARSET_GET_ATTR(x, attr) \
+ do { \
+ if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x))) \
+ x = wrong_type_argument (Qcharsetp, (x)); \
} while (0)
-#endif /* not BYTE_COMBINING_DEBUG */
-/* Decrease the buffer byte position POS_BYTE of the current buffer to
- the previous character boundary. No range checking of POS. */
-#define DEC_POS(pos_byte) \
- do { \
- unsigned char *p, *p_min; \
- \
- pos_byte--; \
- if (pos_byte < GPT_BYTE) \
- p = BEG_ADDR + pos_byte - BEG_BYTE, p_min = BEG_ADDR; \
- else \
- p = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE, p_min = GAP_END_ADDR;\
- if (p > p_min && !CHAR_HEAD_P (*p)) \
- { \
- unsigned char *pend = p--; \
- int len, bytes; \
- if (p_min < p - MAX_MULTIBYTE_LENGTH) \
- p_min = p - MAX_MULTIBYTE_LENGTH; \
- while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
- len = pend + 1 - p; \
- PARSE_MULTIBYTE_SEQ (p, len, bytes); \
- if (bytes == len) \
- pos_byte -= len - 1; \
- } \
+#define CHECK_CHARSET_GET_CHARSET(x, charset) \
+ do { \
+ int id; \
+ CHECK_CHARSET_GET_ID (x, id); \
+ charset = CHARSET_FROM_ID (id); \
} while (0)
-/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
-#define INC_BOTH(charpos, bytepos) \
-do \
- { \
- (charpos)++; \
- if (NILP (current_buffer->enable_multibyte_characters)) \
- (bytepos)++; \
- else \
- INC_POS ((bytepos)); \
- } \
-while (0)
-
-/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
-
-#define DEC_BOTH(charpos, bytepos) \
-do \
- { \
- (charpos)--; \
- if (NILP (current_buffer->enable_multibyte_characters)) \
- (bytepos)--; \
- else \
- DEC_POS ((bytepos)); \
- } \
-while (0)
+/* Lookup Vcharset_order_list and return the first charset that
+ contains the character C. */
+#define CHAR_CHARSET(c) \
+ char_charset ((c), Qnil, NULL)
-/* Increase the buffer byte position POS_BYTE of the current buffer to
- the next character boundary. This macro relies on the fact that
- *GPT_ADDR and *Z_ADDR are always accessible and the values are
- '\0'. No range checking of POS_BYTE. */
+#if 0
+/* Char-table of charset-sets. Each element is a bool vector indexed
+ by a charset ID. */
+extern Lisp_Object Vchar_charset_set;
-#ifdef BYTE_COMBINING_DEBUG
+/* Charset-bag of character C. */
+#define CHAR_CHARSET_SET(c) \
+ CHAR_TABLE_REF (Vchar_charset_set, c)
-#define BUF_INC_POS(buf, pos_byte) \
+/* Check if two characters C1 and C2 belong to the same charset. */
+#define SAME_CHARSET_P(c1, c2) \
+ intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2))
+
+#endif
+
+
+/* Return a character correponding to the code-point CODE of CHARSET.
+ Try some optimization before calling decode_char. */
+
+#define DECODE_CHAR(charset, code) \
+ ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p) \
+ ? (code) \
+ : ((code) < (charset)->min_code || (code) > (charset)->max_code) \
+ ? -1 \
+ : (charset)->unified_p \
+ ? decode_char ((charset), (code)) \
+ : (charset)->method == CHARSET_METHOD_OFFSET \
+ ? ((charset)->code_linear_p \
+ ? (code) - (charset)->min_code + (charset)->code_offset \
+ : decode_char ((charset), (code))) \
+ : (charset)->method == CHARSET_METHOD_MAP \
+ ? ((charset)->code_linear_p \
+ ? XINT (AREF (CHARSET_DECODER (charset), \
+ (code) - (charset)->min_code)) \
+ : decode_char ((charset), (code))) \
+ : decode_char ((charset), (code)))
+
+
+extern Lisp_Object charset_work;
+
+/* Return a code point of CHAR in CHARSET.
+ Try some optimization before calling encode_char. */
+
+#define ENCODE_CHAR(charset, c) \
+ ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \
+ ? (c) \
+ : ((charset)->unified_p \
+ || (charset)->method == CHARSET_METHOD_SUBSET \
+ || (charset)->method == CHARSET_METHOD_SUPERSET) \
+ ? encode_char ((charset), (c)) \
+ : ((c) < (charset)->min_char || (c) > (charset)->max_char) \
+ ? (charset)->invalid_code \
+ : (charset)->method == CHARSET_METHOD_OFFSET \
+ ? ((charset)->code_linear_p \
+ ? (c) - (charset)->code_offset + (charset)->min_code \
+ : encode_char ((charset), (c))) \
+ : (charset)->method == CHARSET_METHOD_MAP \
+ ? ((charset)->compact_codes_p \
+ ? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c)), \
+ (NILP (charset_work) \
+ ? (charset)->invalid_code \
+ : XFASTINT (charset_work))) \
+ : encode_char ((charset), (c))) \
+ : encode_char ((charset), (c)))
+
+
+/* Set to 1 when a charset map is loaded to warn that a buffer text
+ and a string data may be relocated. */
+extern int charset_map_loaded;
+
+
+/* Set CHARSET to the charset highest priority of C, CODE to the
+ code-point of C in CHARSET. */
+#define SPLIT_CHAR(c, charset, code) \
+ ((charset) = char_charset ((c), Qnil, &(code)))
+
+
+#define ISO_MAX_DIMENSION 3
+#define ISO_MAX_CHARS 2
+#define ISO_MAX_FINAL 0x80 /* only 0x30..0xFF are used */
+
+/* Mapping table from ISO2022's charset (specified by DIMENSION,
+ CHARS, and FINAL_CHAR) to Emacs' charset ID. Should be accessed by
+ macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */
+extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
+
+/* A charset of type iso2022 who has DIMENSION, CHARS, and FINAL
+ (final character). */
+#define ISO_CHARSET_TABLE(dimension, chars_96, final) \
+ iso_charset_table[(dimension) - 1][(chars_96)][(final)]
+
+/* Nonzero iff the charset who has FAST_MAP may contain C. */
+#define CHARSET_FAST_MAP_REF(c, fast_map) \
+ ((c) < 0x10000 \
+ ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7)) \
+ : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7)))
+
+#define CHARSET_FAST_MAP_SET(c, fast_map) \
do { \
- unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
- if (BASE_LEADING_CODE_P (*p)) \
- { \
- int len, bytes; \
- len = BUF_Z_BYTE (buf) - pos_byte; \
- PARSE_MULTIBYTE_SEQ (p, len, bytes); \
- pos_byte += bytes; \
- } \
+ if ((c) < 0x10000) \
+ (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7); \
else \
- pos_byte++; \
+ (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7); \
} while (0)
-#else /* not BYTE_COMBINING_DEBUG */
-#define BUF_INC_POS(buf, pos_byte) \
- do { \
- unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
- pos_byte += BYTES_BY_CHAR_HEAD (*p); \
- } while (0)
-#endif /* not BYTE_COMBINING_DEBUG */
+/* 1 iff CHARSET may contain the character C. */
+#define CHAR_CHARSET_P(c, charset) \
+ ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \
+ || (CHARSET_UNIFIED_P (charset) \
+ ? encode_char ((charset), (c)) != (charset)->invalid_code \
+ : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map) \
+ && ((charset)->method == CHARSET_METHOD_OFFSET \
+ ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \
+ : ((charset)->method == CHARSET_METHOD_MAP \
+ && (charset)->compact_codes_p) \
+ ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \
+ : encode_char ((charset), (c)) != (charset)->invalid_code))))
-/* Decrease the buffer byte position POS_BYTE of the current buffer to
- the previous character boundary. No range checking of POS_BYTE. */
-#define BUF_DEC_POS(buf, pos_byte) \
- do { \
- unsigned char *p, *p_min; \
- pos_byte--; \
- if (pos_byte < BUF_GPT_BYTE (buf)) \
- { \
- p = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \
- p_min = BUF_BEG_ADDR (buf); \
- } \
- else \
- { \
- p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
- p_min = BUF_GAP_END_ADDR (buf); \
- } \
- if (p > p_min && !CHAR_HEAD_P (*p)) \
- { \
- unsigned char *pend = p--; \
- int len, bytes; \
- if (p_min < p - MAX_MULTIBYTE_LENGTH) \
- p_min = p - MAX_MULTIBYTE_LENGTH; \
- while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
- len = pend + 1 - p; \
- PARSE_MULTIBYTE_SEQ (p, len, bytes); \
- if (bytes == len) \
- pos_byte -= len - 1; \
- } \
- } while (0)
+
+/* Special macros for emacs-mule encoding. */
-#endif /* emacs */
-
-/* This is the maximum byte length of multi-byte sequence. */
-#define MAX_MULTIBYTE_LENGTH 4
-
-extern void invalid_character P_ ((int));
-
-extern int translate_char P_ ((Lisp_Object, int, int, int, int));
-extern int split_string P_ ((const unsigned char *, int, int *,
- unsigned char *, unsigned char *));
-extern int char_to_string P_ ((int, unsigned char *));
-extern int char_to_string_1 P_ ((int, unsigned char *));
-extern int string_to_char P_ ((const unsigned char *, int, int *));
-extern int char_printable_p P_ ((int c));
-extern int multibyte_form_length P_ ((const unsigned char *, int));
-extern void parse_str_as_multibyte P_ ((const unsigned char *, int, int *,
- int *));
-extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
-extern int parse_str_to_multibyte P_ ((unsigned char *, int));
-extern int str_to_multibyte P_ ((unsigned char *, int, int));
-extern int str_as_unibyte P_ ((unsigned char *, int));
-extern int get_charset_id P_ ((Lisp_Object));
-extern int find_charset_in_text P_ ((const unsigned char *, int, int, int *,
- Lisp_Object));
-extern int strwidth P_ ((unsigned char *, int));
-extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *));
-extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
-extern int char_bytes P_ ((int));
-extern int char_valid_p P_ ((int, int));
-
-EXFUN (Funibyte_char_to_multibyte, 1);
-
-extern Lisp_Object Vtranslation_table_vector;
-
-/* Return a translation table of id number ID. */
-#define GET_TRANSLATION_TABLE(id) \
- (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
-
-/* A char-table for characters which may invoke auto-filling. */
-extern Lisp_Object Vauto_fill_chars;
-
-/* Copy LEN bytes from FROM to TO. This macro should be used only
- when a caller knows that LEN is short and the obvious copy loop is
- faster than calling bcopy which has some overhead. Copying a
- multibyte sequence of a multibyte character is the typical case. */
-
-#define BCOPY_SHORT(from, to, len) \
- do { \
- int i = len; \
- const unsigned char *from_p = from; \
- unsigned char *to_p = to; \
- while (i--) *to_p++ = *from_p++; \
- } while (0)
+/* Leading-code followed by extended leading-code. DIMENSION/COLUMN */
+#define EMACS_MULE_LEADING_CODE_PRIVATE_11 0x9A /* 1/1 */
+#define EMACS_MULE_LEADING_CODE_PRIVATE_12 0x9B /* 1/2 */
+#define EMACS_MULE_LEADING_CODE_PRIVATE_21 0x9C /* 2/2 */
+#define EMACS_MULE_LEADING_CODE_PRIVATE_22 0x9D /* 2/2 */
+
+extern struct charset *emacs_mule_charset[256];
+
+
+
+extern Lisp_Object Qcharsetp;
+
+extern Lisp_Object Qascii, Qunicode;
+extern int charset_ascii, charset_eight_bit;
+extern int charset_iso_8859_1;
+extern int charset_jisx0201_roman;
+extern int charset_jisx0208_1978;
+extern int charset_jisx0208;
+
+extern int charset_unibyte;
+
+extern struct charset *char_charset P_ ((int, Lisp_Object, unsigned *));
+extern Lisp_Object charset_attributes P_ ((int));
+
+extern int decode_char P_ ((struct charset *, unsigned));
+extern unsigned encode_char P_ ((struct charset *, int));
+extern int string_xstring_p P_ ((Lisp_Object));
+
+extern void map_charset_chars P_ ((void (*) (Lisp_Object, Lisp_Object),
+ Lisp_Object, Lisp_Object,
+ struct charset *, unsigned, unsigned));
+
+EXFUN (Funify_charset, 3);
#endif /* EMACS_CHARSET_H */
diff --git a/src/chartab.c b/src/chartab.c
new file mode 100644
index 00000000000..1288d49929f
--- /dev/null
+++ b/src/chartab.c
@@ -0,0 +1,965 @@
+/* chartab.c -- char-table support
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include <config.h>
+#include "lisp.h"
+#include "character.h"
+#include "charset.h"
+#include "ccl.h"
+
+/* 64/16/32/128 */
+
+/* Number of elements in Nth level char-table. */
+const int chartab_size[4] =
+ { (1 << CHARTAB_SIZE_BITS_0),
+ (1 << CHARTAB_SIZE_BITS_1),
+ (1 << CHARTAB_SIZE_BITS_2),
+ (1 << CHARTAB_SIZE_BITS_3) };
+
+/* Number of characters each element of Nth level char-table
+ covers. */
+const int chartab_chars[4] =
+ { (1 << (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
+ (1 << (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
+ (1 << CHARTAB_SIZE_BITS_3),
+ 1 };
+
+/* Number of characters (in bits) each element of Nth level char-table
+ covers. */
+const int chartab_bits[4] =
+ { (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
+ (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
+ CHARTAB_SIZE_BITS_3,
+ 0 };
+
+#define CHARTAB_IDX(c, depth, min_char) \
+ (((c) - (min_char)) >> chartab_bits[(depth)])
+
+
+DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
+ doc: /* Return a newly created char-table, with purpose PURPOSE.
+Each element is initialized to INIT, which defaults to nil.
+
+PURPOSE should be a symbol. If it has a `char-table-extra-slots'
+property, the property's value should be an integer between 0 and 10
+that specifies how many extra slots the char-table has. Otherwise,
+the char-table has no extra slot. */)
+ (purpose, init)
+ register Lisp_Object purpose, init;
+{
+ Lisp_Object vector;
+ Lisp_Object n;
+ int n_extras;
+ int size;
+
+ CHECK_SYMBOL (purpose);
+ n = Fget (purpose, Qchar_table_extra_slots);
+ if (NILP (n))
+ n_extras = 0;
+ else
+ {
+ CHECK_NATNUM (n);
+ n_extras = XINT (n);
+ if (n_extras > 10)
+ args_out_of_range (n, Qnil);
+ }
+
+ size = VECSIZE (struct Lisp_Char_Table) - 1 + n_extras;
+ vector = Fmake_vector (make_number (size), init);
+ XCHAR_TABLE (vector)->parent = Qnil;
+ XCHAR_TABLE (vector)->purpose = purpose;
+ XSETCHAR_TABLE (vector, XCHAR_TABLE (vector));
+ return vector;
+}
+
+static Lisp_Object
+make_sub_char_table (depth, min_char, defalt)
+ int depth, min_char;
+ Lisp_Object defalt;
+{
+ Lisp_Object table;
+ int size = VECSIZE (struct Lisp_Sub_Char_Table) - 1 + chartab_size[depth];
+
+ table = Fmake_vector (make_number (size), defalt);
+ XSUB_CHAR_TABLE (table)->depth = make_number (depth);
+ XSUB_CHAR_TABLE (table)->min_char = make_number (min_char);
+ XSETSUB_CHAR_TABLE (table, XSUB_CHAR_TABLE (table));
+
+ return table;
+}
+
+static Lisp_Object
+char_table_ascii (table)
+ Lisp_Object table;
+{
+ Lisp_Object sub;
+
+ sub = XCHAR_TABLE (table)->contents[0];
+ if (! SUB_CHAR_TABLE_P (sub))
+ return sub;
+ sub = XSUB_CHAR_TABLE (sub)->contents[0];
+ if (! SUB_CHAR_TABLE_P (sub))
+ return sub;
+ return XSUB_CHAR_TABLE (sub)->contents[0];
+}
+
+Lisp_Object
+copy_sub_char_table (table)
+ Lisp_Object table;
+{
+ Lisp_Object copy;
+ int depth = XINT (XSUB_CHAR_TABLE (table)->depth);
+ int min_char = XINT (XSUB_CHAR_TABLE (table)->min_char);
+ Lisp_Object val;
+ int i;
+
+ copy = make_sub_char_table (depth, min_char, Qnil);
+ /* Recursively copy any sub char-tables. */
+ for (i = 0; i < chartab_size[depth]; i++)
+ {
+ val = XSUB_CHAR_TABLE (table)->contents[i];
+ if (SUB_CHAR_TABLE_P (val))
+ XSUB_CHAR_TABLE (copy)->contents[i] = copy_sub_char_table (val);
+ else
+ XSUB_CHAR_TABLE (copy)->contents[i] = val;
+ }
+
+ return copy;
+}
+
+
+Lisp_Object
+copy_char_table (table)
+ Lisp_Object table;
+{
+ Lisp_Object copy;
+ int size = XCHAR_TABLE (table)->size & PSEUDOVECTOR_SIZE_MASK;
+ int i;
+
+ copy = Fmake_vector (make_number (size), Qnil);
+ XCHAR_TABLE (copy)->defalt = XCHAR_TABLE (table)->defalt;
+ XCHAR_TABLE (copy)->parent = XCHAR_TABLE (table)->parent;
+ XCHAR_TABLE (copy)->purpose = XCHAR_TABLE (table)->purpose;
+ XCHAR_TABLE (copy)->ascii = XCHAR_TABLE (table)->ascii;
+ for (i = 0; i < chartab_size[0]; i++)
+ XCHAR_TABLE (copy)->contents[i]
+ = (SUB_CHAR_TABLE_P (XCHAR_TABLE (table)->contents[i])
+ ? copy_sub_char_table (XCHAR_TABLE (table)->contents[i])
+ : XCHAR_TABLE (table)->contents[i]);
+ if (SUB_CHAR_TABLE_P (XCHAR_TABLE (copy)->ascii))
+ XCHAR_TABLE (copy)->ascii = char_table_ascii (copy);
+ size -= VECSIZE (struct Lisp_Char_Table) - 1;
+ for (i = 0; i < size; i++)
+ XCHAR_TABLE (copy)->extras[i] = XCHAR_TABLE (table)->extras[i];
+
+ XSETCHAR_TABLE (copy, XCHAR_TABLE (copy));
+ return copy;
+}
+
+Lisp_Object
+sub_char_table_ref (table, c)
+ Lisp_Object table;
+ int c;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ int min_char = XINT (tbl->min_char);
+ Lisp_Object val;
+
+ val = tbl->contents[CHARTAB_IDX (c, depth, min_char)];
+ if (SUB_CHAR_TABLE_P (val))
+ val = sub_char_table_ref (val, c);
+ return val;
+}
+
+Lisp_Object
+char_table_ref (table, c)
+ Lisp_Object table;
+ int c;
+{
+ struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
+ Lisp_Object val;
+
+ if (ASCII_CHAR_P (c))
+ {
+ val = tbl->ascii;
+ if (SUB_CHAR_TABLE_P (val))
+ val = XSUB_CHAR_TABLE (val)->contents[c];
+ }
+ else
+ {
+ val = tbl->contents[CHARTAB_IDX (c, 0, 0)];
+ if (SUB_CHAR_TABLE_P (val))
+ val = sub_char_table_ref (val, c);
+ }
+ if (NILP (val))
+ {
+ val = tbl->defalt;
+ if (NILP (val) && CHAR_TABLE_P (tbl->parent))
+ val = char_table_ref (tbl->parent, c);
+ }
+ return val;
+}
+
+static Lisp_Object
+sub_char_table_ref_and_range (table, c, from, to, defalt)
+ Lisp_Object table;
+ int c;
+ int *from, *to;
+ Lisp_Object defalt;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ int min_char = XINT (tbl->min_char);
+ int max_char = min_char + chartab_chars[depth - 1] - 1;
+ int index = CHARTAB_IDX (c, depth, min_char);
+ Lisp_Object val;
+
+ val = tbl->contents[index];
+ *from = min_char + index * chartab_chars[depth];
+ *to = *from + chartab_chars[depth] - 1;
+ if (SUB_CHAR_TABLE_P (val))
+ val = sub_char_table_ref_and_range (val, c, from, to, defalt);
+ else if (NILP (val))
+ val = defalt;
+
+ while (*from > min_char
+ && *from == min_char + index * chartab_chars[depth])
+ {
+ Lisp_Object this_val;
+ int this_from = *from - chartab_chars[depth];
+ int this_to = *from - 1;
+
+ index--;
+ this_val = tbl->contents[index];
+ if (SUB_CHAR_TABLE_P (this_val))
+ this_val = sub_char_table_ref_and_range (this_val, this_to,
+ &this_from, &this_to,
+ defalt);
+ else if (NILP (this_val))
+ this_val = defalt;
+
+ if (! EQ (this_val, val))
+ break;
+ *from = this_from;
+ }
+ index = CHARTAB_IDX (c, depth, min_char);
+ while (*to < max_char
+ && *to == min_char + (index + 1) * chartab_chars[depth] - 1)
+ {
+ Lisp_Object this_val;
+ int this_from = *to + 1;
+ int this_to = this_from + chartab_chars[depth] - 1;
+
+ index++;
+ this_val = tbl->contents[index];
+ if (SUB_CHAR_TABLE_P (this_val))
+ this_val = sub_char_table_ref_and_range (this_val, this_from,
+ &this_from, &this_to,
+ defalt);
+ else if (NILP (this_val))
+ this_val = defalt;
+ if (! EQ (this_val, val))
+ break;
+ *to = this_to;
+ }
+
+ return val;
+}
+
+
+/* Return the value for C in char-table TABLE. Set *FROM and *TO to
+ the range of characters (containing C) that have the same value as
+ C. It is not assured that the value of (*FROM - 1) and (*TO + 1)
+ is different from that of C. */
+
+Lisp_Object
+char_table_ref_and_range (table, c, from, to)
+ Lisp_Object table;
+ int c;
+ int *from, *to;
+{
+ struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
+ int index = CHARTAB_IDX (c, 0, 0);
+ Lisp_Object val;
+
+ val = tbl->contents[index];
+ *from = index * chartab_chars[0];
+ *to = *from + chartab_chars[0] - 1;
+ if (SUB_CHAR_TABLE_P (val))
+ val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt);
+ else if (NILP (val))
+ val = tbl->defalt;
+
+ while (*from > 0 && *from == index * chartab_chars[0])
+ {
+ Lisp_Object this_val;
+ int this_from = *from - chartab_chars[0];
+ int this_to = *from - 1;
+
+ index--;
+ this_val = tbl->contents[index];
+ if (SUB_CHAR_TABLE_P (this_val))
+ this_val = sub_char_table_ref_and_range (this_val, this_to,
+ &this_from, &this_to,
+ tbl->defalt);
+ else if (NILP (this_val))
+ this_val = tbl->defalt;
+
+ if (! EQ (this_val, val))
+ break;
+ *from = this_from;
+ }
+ while (*to < MAX_CHAR && *to == (index + 1) * chartab_chars[0] - 1)
+ {
+ Lisp_Object this_val;
+ int this_from = *to + 1;
+ int this_to = this_from + chartab_chars[0] - 1;
+
+ index++;
+ this_val = tbl->contents[index];
+ if (SUB_CHAR_TABLE_P (this_val))
+ this_val = sub_char_table_ref_and_range (this_val, this_from,
+ &this_from, &this_to,
+ tbl->defalt);
+ else if (NILP (this_val))
+ this_val = tbl->defalt;
+ if (! EQ (this_val, val))
+ break;
+ *to = this_to;
+ }
+
+ return val;
+}
+
+
+#define ASET_RANGE(ARRAY, FROM, TO, LIMIT, VAL) \
+ do { \
+ int limit = (TO) < (LIMIT) ? (TO) : (LIMIT); \
+ for (; (FROM) < limit; (FROM)++) (ARRAY)->contents[(FROM)] = (VAL); \
+ } while (0)
+
+#define GET_SUB_CHAR_TABLE(TABLE, SUBTABLE, IDX, DEPTH, MIN_CHAR) \
+ do { \
+ (SUBTABLE) = (TABLE)->contents[(IDX)]; \
+ if (!SUB_CHAR_TABLE_P (SUBTABLE)) \
+ (SUBTABLE) = make_sub_char_table ((DEPTH), (MIN_CHAR), (SUBTABLE)); \
+ } while (0)
+
+
+static void
+sub_char_table_set (table, c, val)
+ Lisp_Object table;
+ int c;
+ Lisp_Object val;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT ((tbl)->depth);
+ int min_char = XINT ((tbl)->min_char);
+ int i = CHARTAB_IDX (c, depth, min_char);
+ Lisp_Object sub;
+
+ if (depth == 3)
+ tbl->contents[i] = val;
+ else
+ {
+ sub = tbl->contents[i];
+ if (! SUB_CHAR_TABLE_P (sub))
+ {
+ sub = make_sub_char_table (depth + 1,
+ min_char + i * chartab_chars[depth], sub);
+ tbl->contents[i] = sub;
+ }
+ sub_char_table_set (sub, c, val);
+ }
+}
+
+Lisp_Object
+char_table_set (table, c, val)
+ Lisp_Object table;
+ int c;
+ Lisp_Object val;
+{
+ struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
+
+ if (ASCII_CHAR_P (c)
+ && SUB_CHAR_TABLE_P (tbl->ascii))
+ {
+ XSUB_CHAR_TABLE (tbl->ascii)->contents[c] = val;
+ }
+ else
+ {
+ int i = CHARTAB_IDX (c, 0, 0);
+ Lisp_Object sub;
+
+ sub = tbl->contents[i];
+ if (! SUB_CHAR_TABLE_P (sub))
+ {
+ sub = make_sub_char_table (1, i * chartab_chars[0], sub);
+ tbl->contents[i] = sub;
+ }
+ sub_char_table_set (sub, c, val);
+ if (ASCII_CHAR_P (c))
+ tbl->ascii = char_table_ascii (table);
+ }
+ return val;
+}
+
+static void
+sub_char_table_set_range (table, depth, min_char, from, to, val)
+ Lisp_Object *table;
+ int depth;
+ int min_char;
+ int from, to;
+ Lisp_Object val;
+{
+ int max_char = min_char + chartab_chars[depth] - 1;
+
+ if (depth == 3 || (from <= min_char && to >= max_char))
+ *table = val;
+ else
+ {
+ int i, j;
+
+ depth++;
+ if (! SUB_CHAR_TABLE_P (*table))
+ *table = make_sub_char_table (depth, min_char, *table);
+ if (from < min_char)
+ from = min_char;
+ if (to > max_char)
+ to = max_char;
+ i = CHARTAB_IDX (from, depth, min_char);
+ j = CHARTAB_IDX (to, depth, min_char);
+ min_char += chartab_chars[depth] * i;
+ for (; i <= j; i++, min_char += chartab_chars[depth])
+ sub_char_table_set_range (XSUB_CHAR_TABLE (*table)->contents + i,
+ depth, min_char, from, to, val);
+ }
+}
+
+
+Lisp_Object
+char_table_set_range (table, from, to, val)
+ Lisp_Object table;
+ int from, to;
+ Lisp_Object val;
+{
+ struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
+ Lisp_Object *contents = tbl->contents;
+ int i, min_char;
+
+ if (from == to)
+ char_table_set (table, from, val);
+ else
+ {
+ for (i = CHARTAB_IDX (from, 0, 0), min_char = i * chartab_chars[0];
+ min_char <= to;
+ i++, min_char += chartab_chars[0])
+ sub_char_table_set_range (contents + i, 0, min_char, from, to, val);
+ if (ASCII_CHAR_P (from))
+ tbl->ascii = char_table_ascii (table);
+ }
+ return val;
+}
+
+
+DEFUN ("char-table-subtype", Fchar_table_subtype, Schar_table_subtype,
+ 1, 1, 0,
+ doc: /*
+Return the subtype of char-table CHAR-TABLE. The value is a symbol. */)
+ (char_table)
+ Lisp_Object char_table;
+{
+ CHECK_CHAR_TABLE (char_table);
+
+ return XCHAR_TABLE (char_table)->purpose;
+}
+
+DEFUN ("char-table-parent", Fchar_table_parent, Schar_table_parent,
+ 1, 1, 0,
+ doc: /* Return the parent char-table of CHAR-TABLE.
+The value is either nil or another char-table.
+If CHAR-TABLE holds nil for a given character,
+then the actual applicable value is inherited from the parent char-table
+\(or from its parents, if necessary). */)
+ (char_table)
+ Lisp_Object char_table;
+{
+ CHECK_CHAR_TABLE (char_table);
+
+ return XCHAR_TABLE (char_table)->parent;
+}
+
+DEFUN ("set-char-table-parent", Fset_char_table_parent, Sset_char_table_parent,
+ 2, 2, 0,
+ doc: /* Set the parent char-table of CHAR-TABLE to PARENT.
+PARENT must be either nil or another char-table. */)
+ (char_table, parent)
+ Lisp_Object char_table, parent;
+{
+ Lisp_Object temp;
+
+ CHECK_CHAR_TABLE (char_table);
+
+ if (!NILP (parent))
+ {
+ CHECK_CHAR_TABLE (parent);
+
+ for (temp = parent; !NILP (temp); temp = XCHAR_TABLE (temp)->parent)
+ if (EQ (temp, char_table))
+ error ("Attempt to make a chartable be its own parent");
+ }
+
+ XCHAR_TABLE (char_table)->parent = parent;
+
+ return parent;
+}
+
+DEFUN ("char-table-extra-slot", Fchar_table_extra_slot, Schar_table_extra_slot,
+ 2, 2, 0,
+ doc: /* Return the value of CHAR-TABLE's extra-slot number N. */)
+ (char_table, n)
+ Lisp_Object char_table, n;
+{
+ CHECK_CHAR_TABLE (char_table);
+ CHECK_NUMBER (n);
+ if (XINT (n) < 0
+ || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
+ args_out_of_range (char_table, n);
+
+ return XCHAR_TABLE (char_table)->extras[XINT (n)];
+}
+
+DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
+ Sset_char_table_extra_slot,
+ 3, 3, 0,
+ doc: /* Set CHAR-TABLE's extra-slot number N to VALUE. */)
+ (char_table, n, value)
+ Lisp_Object char_table, n, value;
+{
+ CHECK_CHAR_TABLE (char_table);
+ CHECK_NUMBER (n);
+ if (XINT (n) < 0
+ || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
+ args_out_of_range (char_table, n);
+
+ return XCHAR_TABLE (char_table)->extras[XINT (n)] = value;
+}
+
+DEFUN ("char-table-range", Fchar_table_range, Schar_table_range,
+ 2, 2, 0,
+ doc: /* Return the value in CHAR-TABLE for a range of characters RANGE.
+RANGE should be nil (for the default value),
+a cons of character codes (for characters in the range), or a character code. */)
+ (char_table, range)
+ Lisp_Object char_table, range;
+{
+ Lisp_Object val;
+ CHECK_CHAR_TABLE (char_table);
+
+ if (EQ (range, Qnil))
+ val = XCHAR_TABLE (char_table)->defalt;
+ else if (INTEGERP (range))
+ val = CHAR_TABLE_REF (char_table, XINT (range));
+ else if (CONSP (range))
+ {
+ int from, to;
+
+ CHECK_CHARACTER_CAR (range);
+ CHECK_CHARACTER_CDR (range);
+ val = char_table_ref_and_range (char_table, XINT (XCAR (range)),
+ &from, &to);
+ /* Not yet implemented. */
+ }
+ else
+ error ("Invalid RANGE argument to `char-table-range'");
+ return val;
+}
+
+DEFUN ("set-char-table-range", Fset_char_table_range, Sset_char_table_range,
+ 3, 3, 0,
+ doc: /*
+Set the value in CHAR-TABLE for characters specified by RANGE to VALUE.
+RANGE should be t (for all characters), nil (for the default value),
+a cons of character codes (for characters in the range), or a character code. */)
+ (char_table, range, value)
+ Lisp_Object char_table, range, value;
+{
+ CHECK_CHAR_TABLE (char_table);
+ if (EQ (range, Qt))
+ {
+ int i;
+
+ XCHAR_TABLE (char_table)->ascii = Qnil;
+ for (i = 0; i < chartab_size[0]; i++)
+ XCHAR_TABLE (char_table)->contents[i] = Qnil;
+ XCHAR_TABLE (char_table)->defalt = value;
+ }
+ else if (EQ (range, Qnil))
+ XCHAR_TABLE (char_table)->defalt = value;
+ else if (INTEGERP (range))
+ char_table_set (char_table, XINT (range), value);
+ else if (CONSP (range))
+ {
+ CHECK_CHARACTER_CAR (range);
+ CHECK_CHARACTER_CDR (range);
+ char_table_set_range (char_table,
+ XINT (XCAR (range)), XINT (XCDR (range)), value);
+ }
+ else
+ error ("Invalid RANGE argument to `set-char-table-range'");
+
+ return value;
+}
+
+DEFUN ("set-char-table-default", Fset_char_table_default,
+ Sset_char_table_default, 3, 3, 0,
+ doc: /*
+This function is obsolete and has no effect. */)
+ (char_table, ch, value)
+ Lisp_Object char_table, ch, value;
+{
+ return Qnil;
+}
+
+/* Look up the element in TABLE at index CH, and return it as an
+ integer. If the element is nil, return CH itself. (Actually we do
+ that for any non-integer.) */
+
+int
+char_table_translate (table, ch)
+ Lisp_Object table;
+ int ch;
+{
+ Lisp_Object value;
+ value = Faref (table, make_number (ch));
+ if (! INTEGERP (value)) /* fixme: use CHARACTERP? */
+ return ch;
+ return XINT (value);
+}
+
+static Lisp_Object
+optimize_sub_char_table (table)
+ Lisp_Object table;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ Lisp_Object elt, this;
+ int i;
+
+ elt = XSUB_CHAR_TABLE (table)->contents[0];
+ if (SUB_CHAR_TABLE_P (elt))
+ elt = XSUB_CHAR_TABLE (table)->contents[0] = optimize_sub_char_table (elt);
+ if (SUB_CHAR_TABLE_P (elt))
+ return table;
+ for (i = 1; i < chartab_size[depth]; i++)
+ {
+ this = XSUB_CHAR_TABLE (table)->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ this = XSUB_CHAR_TABLE (table)->contents[i]
+ = optimize_sub_char_table (this);
+ if (SUB_CHAR_TABLE_P (this)
+ || NILP (Fequal (this, elt)))
+ break;
+ }
+
+ return (i < chartab_size[depth] ? table : elt);
+}
+
+DEFUN ("optimize-char-table", Foptimize_char_table, Soptimize_char_table,
+ 1, 1, 0,
+ doc: /* Optimize CHAR-TABLE. */)
+ (char_table)
+ Lisp_Object char_table;
+{
+ Lisp_Object elt;
+ int i;
+
+ CHECK_CHAR_TABLE (char_table);
+
+ for (i = 0; i < chartab_size[0]; i++)
+ {
+ elt = XCHAR_TABLE (char_table)->contents[i];
+ if (SUB_CHAR_TABLE_P (elt))
+ XCHAR_TABLE (char_table)->contents[i] = optimize_sub_char_table (elt);
+ }
+ return Qnil;
+}
+
+
+static Lisp_Object
+map_sub_char_table (c_function, function, table, arg, val, range,
+ default_val, parent)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
+ Lisp_Object function, table, arg, val, range, default_val, parent;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ int i, c;
+
+ for (i = 0, c = XINT (tbl->min_char); i < chartab_size[depth];
+ i++, c += chartab_chars[depth])
+ {
+ Lisp_Object this;
+
+ this = tbl->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ val = map_sub_char_table (c_function, function, this, arg, val, range,
+ default_val, parent);
+ else
+ {
+ if (NILP (this))
+ this = default_val;
+ if (NILP (this) && ! NILP (parent))
+ this = CHAR_TABLE_REF (parent, c);
+ if (NILP (Fequal (val, this)))
+ {
+ if (! NILP (val))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (depth == 3
+ && EQ (XCAR (range), XCDR (range)))
+ {
+ if (c_function)
+ (*c_function) (arg, XCAR (range), val);
+ else
+ call2 (function, XCAR (range), val);
+ }
+ else
+ {
+ if (c_function)
+ (*c_function) (arg, range, val);
+ else
+ call2 (function, range, val);
+ }
+ }
+ val = this;
+ XSETCAR (range, make_number (c));
+ }
+ }
+ }
+ return val;
+}
+
+
+/* Map C_FUNCTION or FUNCTION over TABLE, calling it for each
+ character or group of characters that share a value.
+
+ ARG is passed to C_FUNCTION when that is called. */
+
+void
+map_char_table (c_function, function, table, arg)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
+ Lisp_Object function, table, arg;
+{
+ Lisp_Object range, val;
+ int c, i;
+
+ range = Fcons (make_number (0), Qnil);
+ val = XCHAR_TABLE (table)->ascii;
+ if (SUB_CHAR_TABLE_P (val))
+ val = XSUB_CHAR_TABLE (val)->contents[0];
+
+ for (i = 0, c = 0; i < chartab_size[0]; i++, c += chartab_chars[0])
+ {
+ Lisp_Object this;
+
+ this = XCHAR_TABLE (table)->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ val = map_sub_char_table (c_function, function, this, arg, val, range,
+ XCHAR_TABLE (table)->defalt,
+ XCHAR_TABLE (table)->parent);
+ else
+ {
+ if (NILP (this))
+ this = XCHAR_TABLE (table)->defalt;
+ if (NILP (this) && ! NILP (XCHAR_TABLE (table)->parent))
+ this = CHAR_TABLE_REF (XCHAR_TABLE (table)->parent, c);
+ if (NILP (Fequal (val, this)))
+ {
+ if (! NILP (val))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range, val);
+ else
+ call2 (function, range, val);
+ }
+ val = this;
+ XSETCAR (range, make_number (c));
+ }
+ }
+ }
+
+ if (! NILP (val))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range, val);
+ else
+ call2 (function, range, val);
+ }
+}
+
+DEFUN ("map-char-table", Fmap_char_table, Smap_char_table,
+ 2, 2, 0,
+ doc: /*
+Call FUNCTION for each character in CHAR-TABLE that has non-nil value.
+FUNCTION is called with two arguments--a key and a value.
+The key is a character code or a cons of character codes specifying a
+range of characters that have the same value. */)
+ (function, char_table)
+ Lisp_Object function, char_table;
+{
+ CHECK_CHAR_TABLE (char_table);
+
+ map_char_table (NULL, function, char_table, char_table);
+ return Qnil;
+}
+
+
+static void
+map_sub_char_table_for_charset (c_function, function, table, arg, range,
+ charset, from, to)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object));
+ Lisp_Object function, table, arg, range;
+ struct charset *charset;
+ unsigned from, to;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ int c, i;
+
+ if (depth < 3)
+ for (i = 0, c = XINT (tbl->min_char); i < chartab_size[depth];
+ i++, c += chartab_chars[depth])
+ {
+ Lisp_Object this;
+
+ this = tbl->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ map_sub_char_table_for_charset (c_function, function, this, arg,
+ range, charset, from, to);
+ else
+ {
+ if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ }
+ XSETCAR (range, Qnil);
+ }
+ }
+ else
+ for (i = 0, c = XINT (tbl->min_char); i < chartab_size[depth]; i++, c ++)
+ {
+ Lisp_Object this;
+ unsigned code;
+
+ this = tbl->contents[i];
+ if (NILP (this)
+ || (charset
+ && (code = ENCODE_CHAR (charset, c),
+ (code < from || code > to))))
+ {
+ if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (range, arg);
+ else
+ call2 (function, range, arg);
+ XSETCAR (range, Qnil);
+ }
+ }
+ else
+ {
+ if (NILP (XCAR (range)))
+ XSETCAR (range, make_number (c));
+ }
+ }
+}
+
+
+void
+map_char_table_for_charset (c_function, function, table, arg,
+ charset, from, to)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object));
+ Lisp_Object function, table, arg;
+ struct charset *charset;
+ unsigned from, to;
+{
+ Lisp_Object range;
+ int c, i;
+
+ range = Fcons (Qnil, Qnil);
+
+ for (i = 0, c = 0; i < chartab_size[0]; i++, c += chartab_chars[0])
+ {
+ Lisp_Object this;
+
+ this = XCHAR_TABLE (table)->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ map_sub_char_table_for_charset (c_function, function, this, arg,
+ range, charset, from, to);
+ else
+ {
+ if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ }
+ XSETCAR (range, Qnil);
+ }
+ }
+ if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ }
+}
+
+
+void
+syms_of_chartab ()
+{
+ defsubr (&Smake_char_table);
+ defsubr (&Schar_table_parent);
+ defsubr (&Schar_table_subtype);
+ defsubr (&Sset_char_table_parent);
+ defsubr (&Schar_table_extra_slot);
+ defsubr (&Sset_char_table_extra_slot);
+ defsubr (&Schar_table_range);
+ defsubr (&Sset_char_table_range);
+ defsubr (&Sset_char_table_default);
+ defsubr (&Soptimize_char_table);
+ defsubr (&Smap_char_table);
+}
diff --git a/src/cmds.c b/src/cmds.c
index 4d7228e88ad..2e63b2fbb52 100644
--- a/src/cmds.c
+++ b/src/cmds.c
@@ -24,7 +24,7 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "syntax.h"
#include "window.h"
#include "keyboard.h"
@@ -326,11 +326,11 @@ Whichever character you type to run this command is inserted. */)
CHECK_NUMBER (n);
/* Barf if the key that invoked this was not a character. */
- if (!INTEGERP (last_command_char))
+ if (!CHARACTERP (last_command_char))
bitch_at_user ();
{
int character = translate_char (Vtranslation_table_for_input,
- XINT (last_command_char), 0, 0, 0);
+ XINT (last_command_char));
if (XINT (n) >= 2 && NILP (current_buffer->overwrite_mode))
{
int modified_char = character;
@@ -394,7 +394,6 @@ internal_self_insert (c, noautofill)
/* At first, get multi-byte form of C in STR. */
if (!NILP (current_buffer->enable_multibyte_characters))
{
- c = unibyte_char_to_multibyte (c);
len = CHAR_STRING (c, str);
if (len == 1)
/* If C has modifier bits, this makes C an appropriate
@@ -471,10 +470,19 @@ internal_self_insert (c, noautofill)
}
hairy = 2;
}
+
+ if (NILP (current_buffer->enable_multibyte_characters))
+ MAKE_CHAR_MULTIBYTE (c);
+ synt = SYNTAX (c);
+
if (!NILP (current_buffer->abbrev_mode)
- && SYNTAX (c) != Sword
+ && synt != Sword
&& NILP (current_buffer->read_only)
- && PT > BEGV && SYNTAX (XFASTINT (Fprevious_char ())) == Sword)
+ && PT > BEGV
+ && (!NILP (current_buffer->enable_multibyte_characters)
+ ? SYNTAX (XFASTINT (Fprevious_char ())) == Sword
+ : (SYNTAX (unibyte_char_to_multibyte (XFASTINT (Fprevious_char ())))
+ == Sword)))
{
int modiff = MODIFF;
Lisp_Object sym;
@@ -542,7 +550,6 @@ internal_self_insert (c, noautofill)
Vself_insert_face = Qnil;
}
- synt = SYNTAX (c);
if ((synt == Sclose || synt == Smath)
&& !NILP (Vblink_paren_function) && INTERACTIVE
&& !noautofill)
diff --git a/src/coding.c b/src/coding.c
index b06bf79a4bf..c3804630d72 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,7 +1,10 @@
-/* Coding system handler (conversion, detection, and etc).
- Copyright (C) 1995,97,1998,2002,2003 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
- Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
+/* Coding system handler (conversion, detection, etc).
+ Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -24,374 +27,309 @@ Boston, MA 02111-1307, USA. */
0. General comments
1. Preamble
- 2. Emacs' internal format (emacs-mule) handlers
- 3. ISO2022 handlers
- 4. Shift-JIS and BIG5 handlers
- 5. CCL handlers
- 6. End-of-line handlers
- 7. C library functions
- 8. Emacs Lisp library functions
- 9. Post-amble
+ 2. Emacs' internal format (emacs-utf-8) handlers
+ 3. UTF-8 handlers
+ 4. UTF-16 handlers
+ 5. Charset-base coding systems handlers
+ 6. emacs-mule (old Emacs' internal format) handlers
+ 7. ISO2022 handlers
+ 8. Shift-JIS and BIG5 handlers
+ 9. CCL handlers
+ 10. C library functions
+ 11. Emacs Lisp library functions
+ 12. Postamble
*/
-/*** 0. General comments ***/
+/*** 0. General comments ***
-/*** GENERAL NOTE on CODING SYSTEMS ***
+CODING SYSTEM
- A coding system is an encoding mechanism for one or more character
- sets. Here's a list of coding systems which Emacs can handle. When
- we say "decode", it means converting some other coding system to
- Emacs' internal format (emacs-mule), and when we say "encode",
- it means converting the coding system emacs-mule to some other
+ A coding system is an object for an encoding mechanism that contains
+ information about how to convert byte sequences to character
+ sequences and vice versa. When we say "decode", it means converting
+ a byte sequence of a specific coding system into a character
+ sequence that is represented by Emacs' internal coding system
+ `emacs-utf-8', and when we say "encode", it means converting a
+ character sequence of emacs-utf-8 to a byte sequence of a specific
coding system.
- 0. Emacs' internal format (emacs-mule)
+ In Emacs Lisp, a coding system is represented by a Lisp symbol. In
+ C level, a coding system is represented by a vector of attributes
+ stored in the hash table Vcharset_hash_table. The conversion from
+ coding system symbol to attributes vector is done by looking up
+ Vcharset_hash_table by the symbol.
- Emacs itself holds a multi-lingual character in buffers and strings
- in a special format. Details are described in section 2.
+ Coding systems are classified into the following types depending on
+ the encoding mechanism. Here's a brief description of the types.
- 1. ISO2022
+ o UTF-8
+
+ o UTF-16
+
+ o Charset-base coding system
+
+ A coding system defined by one or more (coded) character sets.
+ Decoding and encoding are done by a code converter defined for each
+ character set.
+
+ o Old Emacs internal format (emacs-mule)
+
+ The coding system adopted by old versions of Emacs (20 and 21).
+
+ o ISO2022-base coding system
The most famous coding system for multiple character sets. X's
- Compound Text, various EUCs (Extended Unix Code), and coding
- systems used in Internet communication such as ISO-2022-JP are
- all variants of ISO2022. Details are described in section 3.
+ Compound Text, various EUCs (Extended Unix Code), and coding systems
+ used in the Internet communication such as ISO-2022-JP are all
+ variants of ISO2022.
- 2. SJIS (or Shift-JIS or MS-Kanji-Code)
+ o SJIS (or Shift-JIS or MS-Kanji-Code)
A coding system to encode character sets: ASCII, JISX0201, and
JISX0208. Widely used for PC's in Japan. Details are described in
- section 4.
+ section 8.
- 3. BIG5
+ o BIG5
- A coding system to encode the character sets ASCII and Big5. Widely
+ A coding system to encode character sets: ASCII and Big5. Widely
used for Chinese (mainly in Taiwan and Hong Kong). Details are
- described in section 4. In this file, when we write "BIG5"
- (all uppercase), we mean the coding system, and when we write
- "Big5" (capitalized), we mean the character set.
+ described in section 8. In this file, when we write "big5" (all
+ lowercase), we mean the coding system, and when we write "Big5"
+ (capitalized), we mean the character set.
- 4. Raw text
+ o CCL
- A coding system for text containing random 8-bit code. Emacs does
- no code conversion on such text except for end-of-line format.
+ If a user wants to decode/encode text encoded in a coding system
+ not listed above, he can supply a decoder and an encoder for it in
+ CCL (Code Conversion Language) programs. Emacs executes the CCL
+ program while decoding/encoding.
- 5. Other
+ o Raw-text
- If a user wants to read/write text encoded in a coding system not
- listed above, he can supply a decoder and an encoder for it as CCL
- (Code Conversion Language) programs. Emacs executes the CCL program
- while reading/writing.
+ A coding system for text containing raw eight-bit data. Emacs
+ treats each byte of source text as a character (except for
+ end-of-line conversion).
- Emacs represents a coding system by a Lisp symbol that has a property
- `coding-system'. But, before actually using the coding system, the
- information about it is set in a structure of type `struct
- coding_system' for rapid processing. See section 6 for more details.
+ o No-conversion
+
+ Like raw text, but don't do end-of-line conversion.
-*/
-/*** GENERAL NOTES on END-OF-LINE FORMAT ***
+END-OF-LINE FORMAT
- How end-of-line of text is encoded depends on the operating system.
- For instance, Unix's format is just one byte of `line-feed' code,
+ How text end-of-line is encoded depends on operating system. For
+ instance, Unix's format is just one byte of LF (line-feed) code,
whereas DOS's format is two-byte sequence of `carriage-return' and
`line-feed' codes. MacOS's format is usually one byte of
`carriage-return'.
Since text character encoding and end-of-line encoding are
- independent, any coding system described above can have any
- end-of-line format. So Emacs has information about end-of-line
- format in each coding-system. See section 6 for more details.
+ independent, any coding system described above can take any format
+ of end-of-line (except for no-conversion).
+
+STRUCT CODING_SYSTEM
+
+ Before using a coding system for code conversion (i.e. decoding and
+ encoding), we setup a structure of type `struct coding_system'.
+ This structure keeps various information about a specific code
+ conversion (e.g. the location of source and destination data).
*/
+/* COMMON MACROS */
+
+
/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
- These functions check if a text between SRC and SRC_END is encoded
- in the coding system category XXX. Each returns an integer value in
- which appropriate flag bits for the category XXX are set. The flag
- bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
- template for these functions. If MULTIBYTEP is nonzero, 8-bit codes
- of the range 0x80..0x9F are in multibyte form. */
+ These functions check if a byte sequence specified as a source in
+ CODING conforms to the format of XXX, and update the members of
+ DETECT_INFO.
+
+ Return 1 if the byte sequence conforms to XXX, otherwise return 0.
+
+ Below is the template of these functions. */
+
#if 0
-int
-detect_coding_emacs_mule (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+static int
+detect_coding_XXX (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
- ...
+ unsigned char *src = coding->source;
+ unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
+ ...;
+
+ while (1)
+ {
+ /* Get one byte from the source. If the souce is exausted, jump
+ to no_more_source:. */
+ ONE_MORE_BYTE (c);
+
+ if (! __C_conforms_to_XXX___ (c))
+ break;
+ if (! __C_strongly_suggests_XXX__ (c))
+ found = CATEGORY_MASK_XXX;
+ }
+ /* The byte sequence is invalid for XXX. */
+ detect_info->rejected |= CATEGORY_MASK_XXX;
+ return 0;
+
+ no_more_source:
+ /* The source exausted successfully. */
+ detect_info->found |= found;
+ return 1;
}
#endif
/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
- These functions decode SRC_BYTES length of unibyte text at SOURCE
- encoded in CODING to Emacs' internal format. The resulting
- multibyte text goes to a place pointed to by DESTINATION, the length
- of which should not exceed DST_BYTES.
+ These functions decode a byte sequence specified as a source by
+ CODING. The resulting multibyte text goes to a place pointed to by
+ CODING->charbuf, the length of which should not exceed
+ CODING->charbuf_size;
- These functions set the information about original and decoded texts
- in the members `produced', `produced_char', `consumed', and
- `consumed_char' of the structure *CODING. They also set the member
- `result' to one of CODING_FINISH_XXX indicating how the decoding
- finished.
+ These functions set the information of original and decoded texts in
+ CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
+ They also set CODING->result to one of CODING_RESULT_XXX indicating
+ how the decoding is finished.
- DST_BYTES zero means that the source area and destination area are
- overlapped, which means that we can produce a decoded text until it
- reaches the head of the not-yet-decoded source text.
+ Below is the template of these functions. */
- Below is a template for these functions. */
#if 0
static void
-decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
+decode_coding_XXXX (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
{
- ...
+ unsigned char *src = coding->source + coding->consumed;
+ unsigned char *src_end = coding->source + coding->src_bytes;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source code, or
+ when there's no room in CHARBUF for a decoded character. */
+ unsigned char *src_base;
+ /* A buffer to produce decoded characters. */
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_size;
+ int multibytep = coding->src_multibyte;
+
+ while (1)
+ {
+ src_base = src;
+ if (charbuf < charbuf_end)
+ /* No more room to produce a decoded character. */
+ break;
+ ONE_MORE_BYTE (c);
+ /* Decode it. */
+ }
+
+ no_more_source:
+ if (src_base < src_end
+ && coding->mode & CODING_MODE_LAST_BLOCK)
+ /* If the source ends by partial bytes to construct a character,
+ treat them as eight-bit raw data. */
+ while (src_base < src_end && charbuf < charbuf_end)
+ *charbuf++ = *src_base++;
+ /* Remember how many bytes and characters we consumed. If the
+ source is multibyte, the bytes and chars are not identical. */
+ coding->consumed = coding->consumed_char = src_base - coding->source;
+ /* Remember how many characters we produced. */
+ coding->charbuf_used = charbuf - coding->charbuf;
}
#endif
/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
- These functions encode SRC_BYTES length text at SOURCE from Emacs'
- internal multibyte format to CODING. The resulting unibyte text
+ These functions encode SRC_BYTES length text at SOURCE of Emacs'
+ internal multibyte format by CODING. The resulting byte sequence
goes to a place pointed to by DESTINATION, the length of which
should not exceed DST_BYTES.
- These functions set the information about original and encoded texts
- in the members `produced', `produced_char', `consumed', and
- `consumed_char' of the structure *CODING. They also set the member
- `result' to one of CODING_FINISH_XXX indicating how the encoding
- finished.
+ These functions set the information of original and encoded texts in
+ the members produced, produced_char, consumed, and consumed_char of
+ the structure *CODING. They also set the member result to one of
+ CODING_RESULT_XXX indicating how the encoding finished.
- DST_BYTES zero means that the source area and destination area are
- overlapped, which means that we can produce encoded text until it
- reaches at the head of the not-yet-encoded source text.
+ DST_BYTES zero means that source area and destination area are
+ overlapped, which means that we can produce a encoded text until it
+ reaches at the head of not-yet-encoded source text.
- Below is a template for these functions. */
+ Below is a template of these functions. */
#if 0
static void
-encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
+encode_coding_XXX (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
{
- ...
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
+ int produced_chars = 0;
+
+ for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
+ {
+ int c = *charbuf;
+ /* Encode C into DST, and increment DST. */
+ }
+ label_no_more_destination:
+ /* How many chars and bytes we produced. */
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
}
#endif
-/*** COMMONLY USED MACROS ***/
-
-/* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
- get one, two, and three bytes from the source text respectively.
- If there are not enough bytes in the source, they jump to
- `label_end_of_loop'. The caller should set variables `coding',
- `src' and `src_end' to appropriate pointer in advance. These
- macros are called from decoding routines `decode_coding_XXX', thus
- it is assumed that the source text is unibyte. */
-
-#define ONE_MORE_BYTE(c1) \
- do { \
- if (src >= src_end) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
- } \
- c1 = *src++; \
- } while (0)
-
-#define TWO_MORE_BYTES(c1, c2) \
- do { \
- if (src + 1 >= src_end) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
- } \
- c1 = *src++; \
- c2 = *src++; \
- } while (0)
-
-
-/* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
- form if MULTIBYTEP is nonzero. */
-
-#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \
- do { \
- if (src >= src_end) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
- } \
- c1 = *src++; \
- if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \
- c1 = *src++ - 0x20; \
- } while (0)
-
-/* Set C to the next character at the source text pointed by `src'.
- If there are not enough characters in the source, jump to
- `label_end_of_loop'. The caller should set variables `coding'
- `src', `src_end', and `translation_table' to appropriate pointers
- in advance. This macro is used in encoding routines
- `encode_coding_XXX', thus it assumes that the source text is in
- multibyte form except for 8-bit characters. 8-bit characters are
- in multibyte form if coding->src_multibyte is nonzero, else they
- are represented by a single byte. */
-
-#define ONE_MORE_CHAR(c) \
- do { \
- int len = src_end - src; \
- int bytes; \
- if (len <= 0) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
- } \
- if (coding->src_multibyte \
- || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes)) \
- c = STRING_CHAR_AND_LENGTH (src, len, bytes); \
- else \
- c = *src, bytes = 1; \
- if (!NILP (translation_table)) \
- c = translate_char (translation_table, c, -1, 0, 0); \
- src += bytes; \
- } while (0)
-
-
-/* Produce a multibyte form of character C to `dst'. Jump to
- `label_end_of_loop' if there's not enough space at `dst'.
-
- If we are now in the middle of a composition sequence, the decoded
- character may be ALTCHAR (for the current composition). In that
- case, the character goes to coding->cmp_data->data instead of
- `dst'.
-
- This macro is used in decoding routines. */
-
-#define EMIT_CHAR(c) \
- do { \
- if (! COMPOSING_P (coding) \
- || coding->composing == COMPOSITION_RELATIVE \
- || coding->composing == COMPOSITION_WITH_RULE) \
- { \
- int bytes = CHAR_BYTES (c); \
- if ((dst + bytes) > (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- dst += CHAR_STRING (c, dst); \
- coding->produced_char++; \
- } \
- \
- if (COMPOSING_P (coding) \
- && coding->composing != COMPOSITION_RELATIVE) \
- { \
- CODING_ADD_COMPOSITION_COMPONENT (coding, c); \
- coding->composition_rule_follows \
- = coding->composing != COMPOSITION_WITH_ALTCHARS; \
- } \
- } while (0)
-
-
-#define EMIT_ONE_BYTE(c) \
- do { \
- if (dst >= (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- *dst++ = c; \
- } while (0)
-
-#define EMIT_TWO_BYTES(c1, c2) \
- do { \
- if (dst + 2 > (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- *dst++ = c1, *dst++ = c2; \
- } while (0)
-
-#define EMIT_BYTES(from, to) \
- do { \
- if (dst + (to - from) > (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- while (from < to) \
- *dst++ = *from++; \
- } while (0)
-
/*** 1. Preamble ***/
-#ifdef emacs
#include <config.h>
-#endif
-
#include <stdio.h>
-#ifdef emacs
-
#include "lisp.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
-#include "composite.h"
#include "ccl.h"
+#include "composite.h"
#include "coding.h"
#include "window.h"
-#include "intervals.h"
-
-#else /* not emacs */
-#include "mulelib.h"
+Lisp_Object Vcoding_system_hash_table;
-#endif /* not emacs */
-
-Lisp_Object Qcoding_system, Qeol_type;
+Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
+Lisp_Object Qunix, Qdos;
+extern Lisp_Object Qmac; /* frame.c */
Lisp_Object Qbuffer_file_coding_system;
Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
+Lisp_Object Qdefault_char;
Lisp_Object Qno_conversion, Qundecided;
+Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
+Lisp_Object Qbig, Qlittle;
Lisp_Object Qcoding_system_history;
-Lisp_Object Qsafe_chars;
Lisp_Object Qvalid_codes;
+Lisp_Object QCcategory;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
Lisp_Object Qstart_process, Qopen_network_stream;
Lisp_Object Qtarget_idx;
-Lisp_Object Vselect_safe_coding_system_function;
-
int coding_system_require_warning;
+Lisp_Object Vselect_safe_coding_system_function;
+
/* Mnemonic string for each format of end-of-line. */
Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
/* Mnemonic string to indicate format of end-of-line is not yet
decided. */
Lisp_Object eol_mnemonic_undecided;
-/* Format of end-of-line decided by system. This is CODING_EOL_LF on
- Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac. */
-int system_eol_type;
-
#ifdef emacs
-/* Information about which coding system is safe for which chars.
- The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
-
- GENERIC-LIST is a list of generic coding systems which can encode
- any characters.
-
- NON-GENERIC-ALIST is an alist of non generic coding systems vs the
- corresponding char table that contains safe chars. */
-Lisp_Object Vcoding_system_safe_chars;
-
Lisp_Object Vcoding_system_list, Vcoding_system_alist;
Lisp_Object Qcoding_system_p, Qcoding_system_error;
@@ -399,8 +337,7 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error;
/* Coding system emacs-mule and raw-text are for converting only
end-of-line format. */
Lisp_Object Qemacs_mule, Qraw_text;
-
-Lisp_Object Qutf_8;
+Lisp_Object Qutf_8_emacs;
/* Coding-systems are handed between Emacs Lisp programs and C internal
routines by the following three variables. */
@@ -434,9 +371,6 @@ struct coding_system safe_terminal_coding;
/* Coding system of what is sent from terminal keyboard. */
struct coding_system keyboard_coding;
-/* Default coding system to be used to write a file. */
-struct coding_system default_buffer_file_coding;
-
Lisp_Object Vfile_coding_system_alist;
Lisp_Object Vprocess_coding_system_alist;
Lisp_Object Vnetwork_coding_system_alist;
@@ -445,42 +379,6 @@ Lisp_Object Vlocale_coding_system;
#endif /* emacs */
-Lisp_Object Qcoding_category, Qcoding_category_index;
-
-/* List of symbols `coding-category-xxx' ordered by priority. */
-Lisp_Object Vcoding_category_list;
-
-/* Table of coding categories (Lisp symbols). */
-Lisp_Object Vcoding_category_table;
-
-/* Table of names of symbol for each coding-category. */
-char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
- "coding-category-emacs-mule",
- "coding-category-sjis",
- "coding-category-iso-7",
- "coding-category-iso-7-tight",
- "coding-category-iso-8-1",
- "coding-category-iso-8-2",
- "coding-category-iso-7-else",
- "coding-category-iso-8-else",
- "coding-category-ccl",
- "coding-category-big5",
- "coding-category-utf-8",
- "coding-category-utf-16-be",
- "coding-category-utf-16-le",
- "coding-category-raw-text",
- "coding-category-binary"
-};
-
-/* Table of pointers to coding systems corresponding to each coding
- categories. */
-struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
-
-/* Table of coding category masks. Nth element is a mask for a coding
- category of which priority is Nth. */
-static
-int coding_priorities[CODING_CATEGORY_IDX_MAX];
-
/* Flag to tell if we look up translation table on character code
conversion. */
Lisp_Object Venable_character_translation;
@@ -495,7 +393,7 @@ Lisp_Object Qtranslation_table_for_decode;
Lisp_Object Qtranslation_table_for_encode;
/* Alist of charsets vs revision number. */
-Lisp_Object Vcharset_revision_alist;
+static Lisp_Object Vcharset_revision_table;
/* Default coding systems used for process I/O. */
Lisp_Object Vdefault_process_coding_system;
@@ -509,28 +407,1115 @@ Lisp_Object Vtranslation_table_for_input;
to avoid infinite recursive call. */
static int inhibit_pre_post_conversion;
-Lisp_Object Qchar_coding_system;
+/* Two special coding systems. */
+Lisp_Object Vsjis_coding_system;
+Lisp_Object Vbig5_coding_system;
+
+
+static int detect_coding_utf_8 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_utf_8 P_ ((struct coding_system *));
+static int encode_coding_utf_8 P_ ((struct coding_system *));
+
+static int detect_coding_utf_16 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_utf_16 P_ ((struct coding_system *));
+static int encode_coding_utf_16 P_ ((struct coding_system *));
+
+static int detect_coding_iso_2022 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_iso_2022 P_ ((struct coding_system *));
+static int encode_coding_iso_2022 P_ ((struct coding_system *));
+
+static int detect_coding_emacs_mule P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_emacs_mule P_ ((struct coding_system *));
+static int encode_coding_emacs_mule P_ ((struct coding_system *));
+
+static int detect_coding_sjis P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_sjis P_ ((struct coding_system *));
+static int encode_coding_sjis P_ ((struct coding_system *));
+
+static int detect_coding_big5 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_big5 P_ ((struct coding_system *));
+static int encode_coding_big5 P_ ((struct coding_system *));
+
+static int detect_coding_ccl P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_ccl P_ ((struct coding_system *));
+static int encode_coding_ccl P_ ((struct coding_system *));
+
+static void decode_coding_raw_text P_ ((struct coding_system *));
+static int encode_coding_raw_text P_ ((struct coding_system *));
+
+
+/* ISO2022 section */
+
+#define CODING_ISO_INITIAL(coding, reg) \
+ (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
+ coding_attr_iso_initial), \
+ reg)))
+
+
+#define CODING_ISO_REQUEST(coding, charset_id) \
+ ((charset_id <= (coding)->max_charset_id \
+ ? (coding)->safe_charsets[charset_id] \
+ : -1))
+
+
+#define CODING_ISO_FLAGS(coding) \
+ ((coding)->spec.iso_2022.flags)
+#define CODING_ISO_DESIGNATION(coding, reg) \
+ ((coding)->spec.iso_2022.current_designation[reg])
+#define CODING_ISO_INVOCATION(coding, plane) \
+ ((coding)->spec.iso_2022.current_invocation[plane])
+#define CODING_ISO_SINGLE_SHIFTING(coding) \
+ ((coding)->spec.iso_2022.single_shifting)
+#define CODING_ISO_BOL(coding) \
+ ((coding)->spec.iso_2022.bol)
+#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
+ CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
+
+/* Control characters of ISO2022. */
+ /* code */ /* function */
+#define ISO_CODE_LF 0x0A /* line-feed */
+#define ISO_CODE_CR 0x0D /* carriage-return */
+#define ISO_CODE_SO 0x0E /* shift-out */
+#define ISO_CODE_SI 0x0F /* shift-in */
+#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
+#define ISO_CODE_ESC 0x1B /* escape */
+#define ISO_CODE_SS2 0x8E /* single-shift-2 */
+#define ISO_CODE_SS3 0x8F /* single-shift-3 */
+#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
+
+/* All code (1-byte) of ISO2022 is classified into one of the
+ followings. */
+enum iso_code_class_type
+ {
+ ISO_control_0, /* Control codes in the range
+ 0x00..0x1F and 0x7F, except for the
+ following 5 codes. */
+ ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
+ ISO_shift_out, /* ISO_CODE_SO (0x0E) */
+ ISO_shift_in, /* ISO_CODE_SI (0x0F) */
+ ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
+ ISO_escape, /* ISO_CODE_SO (0x1B) */
+ ISO_control_1, /* Control codes in the range
+ 0x80..0x9F, except for the
+ following 3 codes. */
+ ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
+ ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
+ ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
+ ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
+ ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
+ ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
+ ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
+ };
-/* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check
- its validity. */
+/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
+ `iso-flags' attribute of an iso2022 coding system. */
-Lisp_Object
-coding_safe_chars (coding_system)
- Lisp_Object coding_system;
+/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
+ instead of the correct short-form sequence (e.g. ESC $ A). */
+#define CODING_ISO_FLAG_LONG_FORM 0x0001
+
+/* If set, reset graphic planes and registers at end-of-line to the
+ initial state. */
+#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
+
+/* If set, reset graphic planes and registers before any control
+ characters to the initial state. */
+#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
+
+/* If set, encode by 7-bit environment. */
+#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
+
+/* If set, use locking-shift function. */
+#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
+
+/* If set, use single-shift function. Overwrite
+ CODING_ISO_FLAG_LOCKING_SHIFT. */
+#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
+
+/* If set, use designation escape sequence. */
+#define CODING_ISO_FLAG_DESIGNATION 0x0040
+
+/* If set, produce revision number sequence. */
+#define CODING_ISO_FLAG_REVISION 0x0080
+
+/* If set, produce ISO6429's direction specifying sequence. */
+#define CODING_ISO_FLAG_DIRECTION 0x0100
+
+/* If set, assume designation states are reset at beginning of line on
+ output. */
+#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
+
+/* If set, designation sequence should be placed at beginning of line
+ on output. */
+#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
+
+/* If set, do not encode unsafe charactes on output. */
+#define CODING_ISO_FLAG_SAFE 0x0800
+
+/* If set, extra latin codes (128..159) are accepted as a valid code
+ on input. */
+#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
+
+#define CODING_ISO_FLAG_COMPOSITION 0x2000
+
+#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
+
+#define CODING_ISO_FLAG_USE_ROMAN 0x8000
+
+#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
+
+#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
+
+/* A character to be produced on output if encoding of the original
+ character is prohibited by CODING_ISO_FLAG_SAFE. */
+#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
+
+
+/* UTF-16 section */
+#define CODING_UTF_16_BOM(coding) \
+ ((coding)->spec.utf_16.bom)
+
+#define CODING_UTF_16_ENDIAN(coding) \
+ ((coding)->spec.utf_16.endian)
+
+#define CODING_UTF_16_SURROGATE(coding) \
+ ((coding)->spec.utf_16.surrogate)
+
+
+/* CCL section */
+#define CODING_CCL_DECODER(coding) \
+ AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
+#define CODING_CCL_ENCODER(coding) \
+ AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
+#define CODING_CCL_VALIDS(coding) \
+ (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
+
+/* Index for each coding category in `coding_categories' */
+
+enum coding_category
+ {
+ coding_category_iso_7,
+ coding_category_iso_7_tight,
+ coding_category_iso_8_1,
+ coding_category_iso_8_2,
+ coding_category_iso_7_else,
+ coding_category_iso_8_else,
+ coding_category_utf_8,
+ coding_category_utf_16_auto,
+ coding_category_utf_16_be,
+ coding_category_utf_16_le,
+ coding_category_utf_16_be_nosig,
+ coding_category_utf_16_le_nosig,
+ coding_category_charset,
+ coding_category_sjis,
+ coding_category_big5,
+ coding_category_ccl,
+ coding_category_emacs_mule,
+ /* All above are targets of code detection. */
+ coding_category_raw_text,
+ coding_category_undecided,
+ coding_category_max
+ };
+
+/* Definitions of flag bits used in detect_coding_XXXX. */
+#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
+#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
+#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
+#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
+#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
+#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
+#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
+#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
+#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
+#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
+#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
+#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
+#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
+#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
+#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
+#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
+#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
+#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
+
+/* This value is returned if detect_coding_mask () find nothing other
+ than ASCII characters. */
+#define CATEGORY_MASK_ANY \
+ (CATEGORY_MASK_ISO_7 \
+ | CATEGORY_MASK_ISO_7_TIGHT \
+ | CATEGORY_MASK_ISO_8_1 \
+ | CATEGORY_MASK_ISO_8_2 \
+ | CATEGORY_MASK_ISO_7_ELSE \
+ | CATEGORY_MASK_ISO_8_ELSE \
+ | CATEGORY_MASK_UTF_8 \
+ | CATEGORY_MASK_UTF_16_BE \
+ | CATEGORY_MASK_UTF_16_LE \
+ | CATEGORY_MASK_UTF_16_BE_NOSIG \
+ | CATEGORY_MASK_UTF_16_LE_NOSIG \
+ | CATEGORY_MASK_CHARSET \
+ | CATEGORY_MASK_SJIS \
+ | CATEGORY_MASK_BIG5 \
+ | CATEGORY_MASK_CCL \
+ | CATEGORY_MASK_EMACS_MULE)
+
+
+#define CATEGORY_MASK_ISO_7BIT \
+ (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
+
+#define CATEGORY_MASK_ISO_8BIT \
+ (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
+
+#define CATEGORY_MASK_ISO_ELSE \
+ (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
+
+#define CATEGORY_MASK_ISO_ESCAPE \
+ (CATEGORY_MASK_ISO_7 \
+ | CATEGORY_MASK_ISO_7_TIGHT \
+ | CATEGORY_MASK_ISO_7_ELSE \
+ | CATEGORY_MASK_ISO_8_ELSE)
+
+#define CATEGORY_MASK_ISO \
+ ( CATEGORY_MASK_ISO_7BIT \
+ | CATEGORY_MASK_ISO_8BIT \
+ | CATEGORY_MASK_ISO_ELSE)
+
+#define CATEGORY_MASK_UTF_16 \
+ (CATEGORY_MASK_UTF_16_BE \
+ | CATEGORY_MASK_UTF_16_LE \
+ | CATEGORY_MASK_UTF_16_BE_NOSIG \
+ | CATEGORY_MASK_UTF_16_LE_NOSIG)
+
+
+/* List of symbols `coding-category-xxx' ordered by priority. This
+ variable is exposed to Emacs Lisp. */
+static Lisp_Object Vcoding_category_list;
+
+/* Table of coding categories (Lisp symbols). This variable is for
+ internal use oly. */
+static Lisp_Object Vcoding_category_table;
+
+/* Table of coding-categories ordered by priority. */
+static enum coding_category coding_priorities[coding_category_max];
+
+/* Nth element is a coding context for the coding system bound to the
+ Nth coding category. */
+static struct coding_system coding_categories[coding_category_max];
+
+/*** Commonly used macros and functions ***/
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
+ do { \
+ attrs = CODING_ID_ATTRS (coding->id); \
+ eol_type = CODING_ID_EOL_TYPE (coding->id); \
+ if (VECTORP (eol_type)) \
+ eol_type = Qunix; \
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
+ } while (0)
+
+
+/* Safely get one byte from the source text pointed by SRC which ends
+ at SRC_END, and set C to that byte. If there are not enough bytes
+ in the source, it jumps to `no_more_source'. The caller
+ should declare and set these variables appropriately in advance:
+ src, src_end, multibytep
+*/
+
+#define ONE_MORE_BYTE(c) \
+ do { \
+ if (src == src_end) \
+ { \
+ if (src_base < src) \
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
+ goto no_more_source; \
+ } \
+ c = *src++; \
+ if (multibytep && (c & 0x80)) \
+ { \
+ if ((c & 0xFE) != 0xC0) \
+ error ("Undecodable char found"); \
+ c = ((c & 1) << 6) | *src++; \
+ } \
+ consumed_chars++; \
+ } while (0)
+
+
+#define ONE_MORE_BYTE_NO_CHECK(c) \
+ do { \
+ c = *src++; \
+ if (multibytep && (c & 0x80)) \
+ { \
+ if ((c & 0xFE) != 0xC0) \
+ error ("Undecodable char found"); \
+ c = ((c & 1) << 6) | *src++; \
+ } \
+ consumed_chars++; \
+ } while (0)
+
+
+/* Store a byte C in the place pointed by DST and increment DST to the
+ next free point, and increment PRODUCED_CHARS. The caller should
+ assure that C is 0..127, and declare and set the variable `dst'
+ appropriately in advance.
+*/
+
+
+#define EMIT_ONE_ASCII_BYTE(c) \
+ do { \
+ produced_chars++; \
+ *dst++ = (c); \
+ } while (0)
+
+
+/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
+
+#define EMIT_TWO_ASCII_BYTES(c1, c2) \
+ do { \
+ produced_chars += 2; \
+ *dst++ = (c1), *dst++ = (c2); \
+ } while (0)
+
+
+/* Store a byte C in the place pointed by DST and increment DST to the
+ next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
+ nonzero, store in an appropriate multibyte from. The caller should
+ declare and set the variables `dst' and `multibytep' appropriately
+ in advance. */
+
+#define EMIT_ONE_BYTE(c) \
+ do { \
+ produced_chars++; \
+ if (multibytep) \
+ { \
+ int ch = (c); \
+ if (ch >= 0x80) \
+ ch = BYTE8_TO_CHAR (ch); \
+ CHAR_STRING_ADVANCE (ch, dst); \
+ } \
+ else \
+ *dst++ = (c); \
+ } while (0)
+
+
+/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
+
+#define EMIT_TWO_BYTES(c1, c2) \
+ do { \
+ produced_chars += 2; \
+ if (multibytep) \
+ { \
+ int ch; \
+ \
+ ch = (c1); \
+ if (ch >= 0x80) \
+ ch = BYTE8_TO_CHAR (ch); \
+ CHAR_STRING_ADVANCE (ch, dst); \
+ ch = (c2); \
+ if (ch >= 0x80) \
+ ch = BYTE8_TO_CHAR (ch); \
+ CHAR_STRING_ADVANCE (ch, dst); \
+ } \
+ else \
+ { \
+ *dst++ = (c1); \
+ *dst++ = (c2); \
+ } \
+ } while (0)
+
+
+#define EMIT_THREE_BYTES(c1, c2, c3) \
+ do { \
+ EMIT_ONE_BYTE (c1); \
+ EMIT_TWO_BYTES (c2, c3); \
+ } while (0)
+
+
+#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
+ do { \
+ EMIT_TWO_BYTES (c1, c2); \
+ EMIT_TWO_BYTES (c3, c4); \
+ } while (0)
+
+
+#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
+ do { \
+ charset_map_loaded = 0; \
+ c = DECODE_CHAR (charset, code); \
+ if (charset_map_loaded) \
+ { \
+ const unsigned char *orig = coding->source; \
+ EMACS_INT offset; \
+ \
+ coding_set_source (coding); \
+ offset = coding->source - orig; \
+ src += offset; \
+ src_base += offset; \
+ src_end += offset; \
+ } \
+ } while (0)
+
+
+#define ASSURE_DESTINATION(bytes) \
+ do { \
+ if (dst + (bytes) >= dst_end) \
+ { \
+ int more_bytes = charbuf_end - charbuf + (bytes); \
+ \
+ dst = alloc_destination (coding, more_bytes, dst); \
+ dst_end = coding->destination + coding->dst_bytes; \
+ } \
+ } while (0)
+
+
+
+static void
+coding_set_source (coding)
+ struct coding_system *coding;
+{
+ if (BUFFERP (coding->src_object))
+ {
+ struct buffer *buf = XBUFFER (coding->src_object);
+
+ if (coding->src_pos < 0)
+ coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
+ else
+ coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
+ }
+ else if (STRINGP (coding->src_object))
+ {
+ coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
+ }
+ else
+ /* Otherwise, the source is C string and is never relocated
+ automatically. Thus we don't have to update anything. */
+ ;
+}
+
+static void
+coding_set_destination (coding)
+ struct coding_system *coding;
+{
+ if (BUFFERP (coding->dst_object))
+ {
+ if (coding->src_pos < 0)
+ {
+ coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
+ coding->dst_bytes = (GAP_END_ADDR
+ - (coding->src_bytes - coding->consumed)
+ - coding->destination);
+ }
+ else
+ {
+ /* We are sure that coding->dst_pos_byte is before the gap
+ of the buffer. */
+ coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
+ + coding->dst_pos_byte - 1);
+ coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
+ - coding->destination);
+ }
+ }
+ else
+ /* Otherwise, the destination is C string and is never relocated
+ automatically. Thus we don't have to update anything. */
+ ;
+}
+
+
+static void
+coding_alloc_by_realloc (coding, bytes)
+ struct coding_system *coding;
+ EMACS_INT bytes;
+{
+ coding->destination = (unsigned char *) xrealloc (coding->destination,
+ coding->dst_bytes + bytes);
+ coding->dst_bytes += bytes;
+}
+
+static void
+coding_alloc_by_making_gap (coding, bytes)
+ struct coding_system *coding;
+ EMACS_INT bytes;
+{
+ if (BUFFERP (coding->dst_object)
+ && EQ (coding->src_object, coding->dst_object))
+ {
+ EMACS_INT add = coding->src_bytes - coding->consumed;
+
+ GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
+ make_gap (bytes);
+ GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
+ }
+ else
+ {
+ Lisp_Object this_buffer;
+
+ this_buffer = Fcurrent_buffer ();
+ set_buffer_internal (XBUFFER (coding->dst_object));
+ make_gap (bytes);
+ set_buffer_internal (XBUFFER (this_buffer));
+ }
+}
+
+
+static unsigned char *
+alloc_destination (coding, nbytes, dst)
+ struct coding_system *coding;
+ int nbytes;
+ unsigned char *dst;
+{
+ EMACS_INT offset = dst - coding->destination;
+
+ if (BUFFERP (coding->dst_object))
+ coding_alloc_by_making_gap (coding, nbytes);
+ else
+ coding_alloc_by_realloc (coding, nbytes);
+ coding->result = CODING_RESULT_SUCCESS;
+ coding_set_destination (coding);
+ dst = coding->destination + offset;
+ return dst;
+}
+
+/** Macros for annotations. */
+
+/* Maximum length of annotation data (sum of annotations for
+ composition and charset). */
+#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
+
+/* An annotation data is stored in the array coding->charbuf in this
+ format:
+ [ -LENGTH ANNOTATION_MASK FROM TO ... ]
+ LENGTH is the number of elements in the annotation.
+ ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
+ FROM and TO specify the range of text annotated. They are relative
+ to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
+
+ The format of the following elements depend on ANNOTATION_MASK.
+
+ In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
+ follows:
+ ... METHOD [ COMPOSITION-COMPONENTS ... ]
+ METHOD is one of enum composition_method.
+ Optionnal COMPOSITION-COMPONENTS are characters and composition
+ rules.
+
+ In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
+ follows. */
+
+#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
+ do { \
+ *(buf)++ = -(len); \
+ *(buf)++ = (mask); \
+ *(buf)++ = (from); \
+ *(buf)++ = (to); \
+ coding->annotated = 1; \
+ } while (0);
+
+#define ADD_COMPOSITION_DATA(buf, from, to, method) \
+ do { \
+ ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
+ *buf++ = method; \
+ } while (0)
+
+
+#define ADD_CHARSET_DATA(buf, from, to, id) \
+ do { \
+ ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
+ *buf++ = id; \
+ } while (0)
+
+
+/*** 2. Emacs' internal format (emacs-utf-8) ***/
+
+
+
+
+/*** 3. UTF-8 ***/
+
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in UTF-8. If it is, return 1, else
+ return 0. */
+
+#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
+#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
+#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
+#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
+#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
+#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
+
+static int
+detect_coding_utf_8 (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
+{
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
+ int incomplete;
+
+ detect_info->checked |= CATEGORY_MASK_UTF_8;
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
+
+ while (1)
+ {
+ int c, c1, c2, c3, c4;
+
+ incomplete = 0;
+ ONE_MORE_BYTE (c);
+ if (UTF_8_1_OCTET_P (c))
+ continue;
+ incomplete = 1;
+ ONE_MORE_BYTE (c1);
+ if (! UTF_8_EXTRA_OCTET_P (c1))
+ break;
+ if (UTF_8_2_OCTET_LEADING_P (c))
+ {
+ found = CATEGORY_MASK_UTF_8;
+ continue;
+ }
+ ONE_MORE_BYTE (c2);
+ if (! UTF_8_EXTRA_OCTET_P (c2))
+ break;
+ if (UTF_8_3_OCTET_LEADING_P (c))
+ {
+ found = CATEGORY_MASK_UTF_8;
+ continue;
+ }
+ ONE_MORE_BYTE (c3);
+ if (! UTF_8_EXTRA_OCTET_P (c3))
+ break;
+ if (UTF_8_4_OCTET_LEADING_P (c))
+ {
+ found = CATEGORY_MASK_UTF_8;
+ continue;
+ }
+ ONE_MORE_BYTE (c4);
+ if (! UTF_8_EXTRA_OCTET_P (c4))
+ break;
+ if (UTF_8_5_OCTET_LEADING_P (c))
+ {
+ found = CATEGORY_MASK_UTF_8;
+ continue;
+ }
+ break;
+ }
+ detect_info->rejected |= CATEGORY_MASK_UTF_8;
+ return 0;
+
+ no_more_source:
+ if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ detect_info->rejected |= CATEGORY_MASK_UTF_8;
+ return 0;
+ }
+ detect_info->found |= found;
+ return 1;
+}
+
+
+static void
+decode_coding_utf_8 (coding)
+ struct coding_system *coding;
+{
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_size;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ Lisp_Object attr, eol_type, charset_list;
+
+ CODING_GET_INFO (coding, attr, eol_type, charset_list);
+
+ while (1)
+ {
+ int c, c1, c2, c3, c4, c5;
+
+ src_base = src;
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf >= charbuf_end)
+ break;
+
+ ONE_MORE_BYTE (c1);
+ if (UTF_8_1_OCTET_P(c1))
+ {
+ c = c1;
+ if (c == '\r')
+ {
+ if (EQ (eol_type, Qdos))
+ {
+ if (src == src_end)
+ {
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ goto no_more_source;
+ }
+ if (*src == '\n')
+ ONE_MORE_BYTE (c);
+ }
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
+ }
+ }
+ else
+ {
+ ONE_MORE_BYTE (c2);
+ if (! UTF_8_EXTRA_OCTET_P (c2))
+ goto invalid_code;
+ if (UTF_8_2_OCTET_LEADING_P (c1))
+ {
+ c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
+ /* Reject overlong sequences here and below. Encoders
+ producing them are incorrect, they can be misleading,
+ and they mess up read/write invariance. */
+ if (c < 128)
+ goto invalid_code;
+ }
+ else
+ {
+ ONE_MORE_BYTE (c3);
+ if (! UTF_8_EXTRA_OCTET_P (c3))
+ goto invalid_code;
+ if (UTF_8_3_OCTET_LEADING_P (c1))
+ {
+ c = (((c1 & 0xF) << 12)
+ | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
+ if (c < 0x800
+ || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
+ goto invalid_code;
+ }
+ else
+ {
+ ONE_MORE_BYTE (c4);
+ if (! UTF_8_EXTRA_OCTET_P (c4))
+ goto invalid_code;
+ if (UTF_8_4_OCTET_LEADING_P (c1))
+ {
+ c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
+ | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
+ if (c < 0x10000)
+ goto invalid_code;
+ }
+ else
+ {
+ ONE_MORE_BYTE (c5);
+ if (! UTF_8_EXTRA_OCTET_P (c5))
+ goto invalid_code;
+ if (UTF_8_5_OCTET_LEADING_P (c1))
+ {
+ c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
+ | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
+ | (c5 & 0x3F));
+ if ((c > MAX_CHAR) || (c < 0x200000))
+ goto invalid_code;
+ }
+ else
+ goto invalid_code;
+ }
+ }
+ }
+ }
+
+ *charbuf++ = c;
+ continue;
+
+ invalid_code:
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ coding->errors++;
+ }
+
+ no_more_source:
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
+}
+
+
+static int
+encode_coding_utf_8 (coding)
+ struct coding_system *coding;
{
- Lisp_Object coding_spec, plist, safe_chars;
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int produced_chars = 0;
+ int c;
+
+ if (multibytep)
+ {
+ int safe_room = MAX_MULTIBYTE_LENGTH * 2;
+
+ while (charbuf < charbuf_end)
+ {
+ unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
+
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (CHAR_BYTE8_P (c))
+ {
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
+ }
+ else
+ {
+ CHAR_STRING_ADVANCE (c, pend);
+ for (p = str; p < pend; p++)
+ EMIT_ONE_BYTE (*p);
+ }
+ }
+ }
+ else
+ {
+ int safe_room = MAX_MULTIBYTE_LENGTH;
- coding_spec = Fget (coding_system, Qcoding_system);
- plist = XVECTOR (coding_spec)->contents[3];
- safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
- return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ dst += CHAR_STRING (c, dst);
+ produced_chars++;
+ }
+ }
+ coding->result = CODING_RESULT_SUCCESS;
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
-#define CODING_SAFE_CHAR_P(safe_chars, c) \
- (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
+
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in one of UTF-16 based coding systems.
+ If it is, return 1, else return 0. */
+
+#define UTF_16_HIGH_SURROGATE_P(val) \
+ (((val) & 0xFC00) == 0xD800)
+
+#define UTF_16_LOW_SURROGATE_P(val) \
+ (((val) & 0xFC00) == 0xDC00)
+
+#define UTF_16_INVALID_P(val) \
+ (((val) == 0xFFFE) \
+ || ((val) == 0xFFFF) \
+ || UTF_16_LOW_SURROGATE_P (val))
+
+
+static int
+detect_coding_utf_16 (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
+{
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int c1, c2;
+
+ detect_info->checked |= CATEGORY_MASK_UTF_16;
+
+ if (coding->mode & CODING_MODE_LAST_BLOCK
+ && (coding->src_bytes & 1))
+ {
+ detect_info->rejected |= CATEGORY_MASK_UTF_16;
+ return 0;
+ }
+ ONE_MORE_BYTE (c1);
+ ONE_MORE_BYTE (c2);
+
+ if ((c1 == 0xFF) && (c2 == 0xFE))
+ {
+ detect_info->found |= (CATEGORY_MASK_UTF_16_LE
+ | CATEGORY_MASK_UTF_16_AUTO);
+ detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
+ }
+ else if ((c1 == 0xFE) && (c2 == 0xFF))
+ {
+ detect_info->found |= (CATEGORY_MASK_UTF_16_BE
+ | CATEGORY_MASK_UTF_16_AUTO);
+ detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
+ }
+ no_more_source:
+ return 1;
+}
+
+static void
+decode_coding_utf_16 (coding)
+ struct coding_system *coding;
+{
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_size;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
+ enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
+ int surrogate = CODING_UTF_16_SURROGATE (coding);
+ Lisp_Object attr, eol_type, charset_list;
+
+ CODING_GET_INFO (coding, attr, eol_type, charset_list);
+
+ if (bom == utf_16_with_bom)
+ {
+ int c, c1, c2;
+
+ src_base = src;
+ ONE_MORE_BYTE (c1);
+ ONE_MORE_BYTE (c2);
+ c = (c1 << 8) | c2;
+
+ if (endian == utf_16_big_endian
+ ? c != 0xFEFF : c != 0xFFFE)
+ {
+ /* The first two bytes are not BOM. Treat them as bytes
+ for a normal character. */
+ src = src_base;
+ coding->errors++;
+ }
+ CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+ }
+ else if (bom == utf_16_detect_bom)
+ {
+ /* We have already tried to detect BOM and failed in
+ detect_coding. */
+ CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+ }
+
+ while (1)
+ {
+ int c, c1, c2;
+
+ src_base = src;
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf + 2 >= charbuf_end)
+ break;
+
+ ONE_MORE_BYTE (c1);
+ ONE_MORE_BYTE (c2);
+ c = (endian == utf_16_big_endian
+ ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
+ if (surrogate)
+ {
+ if (! UTF_16_LOW_SURROGATE_P (c))
+ {
+ if (endian == utf_16_big_endian)
+ c1 = surrogate >> 8, c2 = surrogate & 0xFF;
+ else
+ c1 = surrogate & 0xFF, c2 = surrogate >> 8;
+ *charbuf++ = c1;
+ *charbuf++ = c2;
+ coding->errors++;
+ if (UTF_16_HIGH_SURROGATE_P (c))
+ CODING_UTF_16_SURROGATE (coding) = surrogate = c;
+ else
+ *charbuf++ = c;
+ }
+ else
+ {
+ c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
+ CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
+ *charbuf++ = c;
+ }
+ }
+ else
+ {
+ if (UTF_16_HIGH_SURROGATE_P (c))
+ CODING_UTF_16_SURROGATE (coding) = surrogate = c;
+ else
+ *charbuf++ = c;
+ }
+ }
+
+ no_more_source:
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
+}
+
+static int
+encode_coding_utf_16 (coding)
+ struct coding_system *coding;
+{
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 8;
+ enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
+ int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
+ int produced_chars = 0;
+ Lisp_Object attrs, eol_type, charset_list;
+ int c;
+
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+
+ if (bom != utf_16_without_bom)
+ {
+ ASSURE_DESTINATION (safe_room);
+ if (big_endian)
+ EMIT_TWO_BYTES (0xFE, 0xFF);
+ else
+ EMIT_TWO_BYTES (0xFF, 0xFE);
+ CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+ }
+
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (c >= MAX_UNICODE_CHAR)
+ c = coding->default_char;
+
+ if (c < 0x10000)
+ {
+ if (big_endian)
+ EMIT_TWO_BYTES (c >> 8, c & 0xFF);
+ else
+ EMIT_TWO_BYTES (c & 0xFF, c >> 8);
+ }
+ else
+ {
+ int c1, c2;
+
+ c -= 0x10000;
+ c1 = (c >> 10) + 0xD800;
+ c2 = (c & 0x3FF) + 0xDC00;
+ if (big_endian)
+ EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
+ else
+ EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
+ }
+ }
+ coding->result = CODING_RESULT_SUCCESS;
+ coding->produced = dst - coding->destination;
+ coding->produced_char += produced_chars;
+ return 0;
+}
-/*** 2. Emacs internal format (emacs-mule) handlers ***/
+/*** 6. Old Emacs' internal format (emacs-mule) ***/
/* Emacs' internal format for representation of multiple character
sets is a kind of multi-byte encoding, i.e. characters are
@@ -572,7 +1557,7 @@ coding_safe_chars (coding_system)
In that case, a sequence of one-byte codes has a slightly different
form.
- Firstly, all characters in eight-bit-control are represented by
+ At first, all characters in eight-bit-control are represented by
one-byte sequences which are their 8-bit code.
Next, character composition data are represented by the byte
@@ -581,12 +1566,12 @@ coding_safe_chars (coding_system)
METHOD is 0xF0 plus one of composition method (enum
composition_method),
- BYTES is 0xA0 plus the byte length of these composition data,
+ BYTES is 0xA0 plus a byte length of this composition data,
- CHARS is 0xA0 plus the number of characters composed by these
+ CHARS is 0x20 plus a number of characters composed by this
data,
- COMPONENTs are characters of multibyte form or composition
+ COMPONENTs are characters of multibye form or composition
rules encoded by two-byte of ASCII codes.
In addition, for backward compatibility, the following formats are
@@ -603,575 +1588,565 @@ coding_safe_chars (coding_system)
represents a composition rule.
*/
-enum emacs_code_class_type emacs_code_class[256];
+char emacs_mule_bytes[256];
+
+int
+emacs_mule_char (coding, src, nbytes, nchars, id)
+ struct coding_system *coding;
+ unsigned char *src;
+ int *nbytes, *nchars, *id;
+{
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base = src;
+ int multibytep = coding->src_multibyte;
+ struct charset *charset;
+ unsigned code;
+ int c;
+ int consumed_chars = 0;
+
+ ONE_MORE_BYTE (c);
+ switch (emacs_mule_bytes[c])
+ {
+ case 2:
+ if (! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ code = c & 0x7F;
+ break;
+
+ case 3:
+ if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
+ || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+ {
+ ONE_MORE_BYTE (c);
+ if (! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ code = c & 0x7F;
+ }
+ else
+ {
+ if (! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ code = (c & 0x7F) << 8;
+ ONE_MORE_BYTE (c);
+ code |= c & 0x7F;
+ }
+ break;
+
+ case 4:
+ ONE_MORE_BYTE (c);
+ if (! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ code = (c & 0x7F) << 8;
+ ONE_MORE_BYTE (c);
+ code |= c & 0x7F;
+ break;
+
+ case 1:
+ code = c;
+ charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
+ ? charset_ascii : charset_eight_bit);
+ break;
+
+ default:
+ abort ();
+ }
+ c = DECODE_CHAR (charset, code);
+ if (c < 0)
+ goto invalid_code;
+ *nbytes = src - src_base;
+ *nchars = consumed_chars;
+ if (id)
+ *id = charset->id;
+ return c;
+
+ no_more_source:
+ return -2;
+
+ invalid_code:
+ return -1;
+}
+
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in Emacs' internal format. If it is,
- return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
+ Check if a text is encoded in `emacs-mule'. If it is, return 1,
+ else return 0. */
static int
-detect_coding_emacs_mule (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+detect_coding_emacs_mule (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
- unsigned char c;
- int composing = 0;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int c;
+ int found = 0;
+ int incomplete;
+
+ detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ incomplete = 0;
+ ONE_MORE_BYTE (c);
+ incomplete = 1;
- if (composing)
+ if (c == 0x80)
{
- if (c < 0xA0)
- composing = 0;
- else if (c == 0xA0)
+ /* Perhaps the start of composite character. We simple skip
+ it because analyzing it is too heavy for detecting. But,
+ at least, we check that the composite character
+ constitues of more than 4 bytes. */
+ const unsigned char *src_base;
+
+ repeat:
+ src_base = src;
+ do
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- c &= 0x7F;
+ ONE_MORE_BYTE (c);
}
- else
- c -= 0x20;
+ while (c >= 0xA0);
+
+ if (src - src_base <= 4)
+ break;
+ found = CATEGORY_MASK_EMACS_MULE;
+ if (c == 0x80)
+ goto repeat;
}
- if (c < 0x20)
+ if (c < 0x80)
{
- if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
- return 0;
+ if (c < 0x20
+ && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
+ break;
}
- else if (c >= 0x80 && c < 0xA0)
+ else
{
- if (c == 0x80)
- /* Old leading code for a composite character. */
- composing = 1;
- else
- {
- unsigned char *src_base = src - 1;
- int bytes;
+ const unsigned char *src_base = src - 1;
- if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
- bytes))
- return 0;
- src = src_base + bytes;
+ do
+ {
+ ONE_MORE_BYTE (c);
}
+ while (c >= 0xA0);
+ if (src - src_base != emacs_mule_bytes[*src_base])
+ break;
+ found = CATEGORY_MASK_EMACS_MULE;
}
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_EMACS_MULE;
+ detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
+ return 0;
+
+ no_more_source:
+ if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
+ return 0;
+ }
+ detect_info->found |= found;
+ return 1;
}
-/* Record the starting position START and METHOD of one composition. */
+/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
-#define CODING_ADD_COMPOSITION_START(coding, start, method) \
- do { \
- struct composition_data *cmp_data = coding->cmp_data; \
- int *data = cmp_data->data + cmp_data->used; \
- coding->cmp_data_start = cmp_data->used; \
- data[0] = -1; \
- data[1] = cmp_data->char_offset + start; \
- data[3] = (int) method; \
- cmp_data->used += 4; \
+/* Decode a character represented as a component of composition
+ sequence of Emacs 20/21 style at SRC. Set C to that character and
+ update SRC to the head of next character (or an encoded composition
+ rule). If SRC doesn't points a composition component, set C to -1.
+ If SRC points an invalid byte sequence, global exit by a return
+ value 0. */
+
+#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
+ if (1) \
+ { \
+ int c; \
+ int nbytes, nchars; \
+ \
+ if (src == src_end) \
+ break; \
+ c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
+ if (c < 0) \
+ { \
+ if (c == -2) \
+ break; \
+ goto invalid_code; \
+ } \
+ *buf++ = c; \
+ src += nbytes; \
+ consumed_chars += nchars; \
+ } \
+ else
+
+
+/* Decode a composition rule represented as a component of composition
+ sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
+ and increment BUF. If SRC points an invalid byte sequence, set C
+ to -1. */
+
+#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
+ do { \
+ int c, gref, nref; \
+ \
+ if (src >= src_end) \
+ goto invalid_code; \
+ ONE_MORE_BYTE_NO_CHECK (c); \
+ c -= 0x20; \
+ if (c < 0 || c >= 81) \
+ goto invalid_code; \
+ \
+ gref = c / 9, nref = c % 9; \
+ *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
} while (0)
-/* Record the ending position END of the current composition. */
-#define CODING_ADD_COMPOSITION_END(coding, end) \
- do { \
- struct composition_data *cmp_data = coding->cmp_data; \
- int *data = cmp_data->data + coding->cmp_data_start; \
- data[0] = cmp_data->used - coding->cmp_data_start; \
- data[2] = cmp_data->char_offset + end; \
+/* Decode a composition rule represented as a component of composition
+ sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
+ and increment BUF. If SRC points an invalid byte sequence, set C
+ to -1. */
+
+#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
+ do { \
+ int gref, nref; \
+ \
+ if (src + 1>= src_end) \
+ goto invalid_code; \
+ ONE_MORE_BYTE_NO_CHECK (gref); \
+ gref -= 0x20; \
+ ONE_MORE_BYTE_NO_CHECK (nref); \
+ nref -= 0x20; \
+ if (gref < 0 || gref >= 81 \
+ || nref < 0 || nref >= 81) \
+ goto invalid_code; \
+ *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
} while (0)
-/* Record one COMPONENT (alternate character or composition rule). */
-#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
+#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
do { \
- coding->cmp_data->data[coding->cmp_data->used++] = component; \
- if (coding->cmp_data->used - coding->cmp_data_start \
- == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \
+ /* Emacs 21 style format. The first three bytes at SRC are \
+ (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
+ the byte length of this composition information, CHARS is the \
+ number of characters composed by this composition. */ \
+ enum composition_method method = c - 0xF2; \
+ int *charbuf_base = charbuf; \
+ int from, to; \
+ int consumed_chars_limit; \
+ int nbytes, nchars; \
+ \
+ ONE_MORE_BYTE (c); \
+ nbytes = c - 0xA0; \
+ if (nbytes < 3) \
+ goto invalid_code; \
+ ONE_MORE_BYTE (c); \
+ nchars = c - 0xA0; \
+ from = coding->produced + char_offset; \
+ to = from + nchars; \
+ ADD_COMPOSITION_DATA (charbuf, from, to, method); \
+ consumed_chars_limit = consumed_chars_base + nbytes; \
+ if (method != COMPOSITION_RELATIVE) \
{ \
- CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
- coding->composing = COMPOSITION_NO; \
+ int i = 0; \
+ while (consumed_chars < consumed_chars_limit) \
+ { \
+ if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
+ DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
+ else \
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
+ i++; \
+ } \
+ if (consumed_chars < consumed_chars_limit) \
+ goto invalid_code; \
+ charbuf_base[0] -= i; \
} \
} while (0)
-/* Get one byte from a data pointed by SRC and increment SRC. If SRC
- is not less than SRC_END, return -1 without incrementing Src. */
-
-#define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
-
+#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
+ do { \
+ /* Emacs 20 style format for relative composition. */ \
+ /* Store multibyte form of characters to be composed. */ \
+ enum composition_method method = COMPOSITION_RELATIVE; \
+ int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
+ int *buf = components; \
+ int i, j; \
+ int from, to; \
+ \
+ src = src_base; \
+ ONE_MORE_BYTE (c); /* skip 0x80 */ \
+ for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
+ if (i < 2) \
+ goto invalid_code; \
+ from = coding->produced_char + char_offset; \
+ to = from + i; \
+ ADD_COMPOSITION_DATA (charbuf, from, to, method); \
+ for (j = 0; j < i; j++) \
+ *charbuf++ = components[j]; \
+ } while (0)
-/* Decode a character represented as a component of composition
- sequence of Emacs 20 style at SRC. Set C to that character, store
- its multibyte form sequence at P, and set P to the end of that
- sequence. If no valid character is found, set C to -1. */
-#define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \
+#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
do { \
- int bytes; \
+ /* Emacs 20 style format for rule-base composition. */ \
+ /* Store multibyte form of characters to be composed. */ \
+ enum composition_method method = COMPOSITION_WITH_RULE; \
+ int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
+ int *buf = components; \
+ int i, j; \
+ int from, to; \
\
- c = SAFE_ONE_MORE_BYTE (); \
- if (c < 0) \
- break; \
- if (CHAR_HEAD_P (c)) \
- c = -1; \
- else if (c == 0xA0) \
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
+ for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
{ \
- c = SAFE_ONE_MORE_BYTE (); \
- if (c < 0xA0) \
- c = -1; \
- else \
- { \
- c -= 0xA0; \
- *p++ = c; \
- } \
- } \
- else if (BASE_LEADING_CODE_P (c - 0x20)) \
- { \
- unsigned char *p0 = p; \
- \
- c -= 0x20; \
- *p++ = c; \
- bytes = BYTES_BY_CHAR_HEAD (c); \
- while (--bytes) \
- { \
- c = SAFE_ONE_MORE_BYTE (); \
- if (c < 0) \
- break; \
- *p++ = c; \
- } \
- if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes) \
- || (coding->flags /* We are recovering a file. */ \
- && p0[0] == LEADING_CODE_8_BIT_CONTROL \
- && ! CHAR_HEAD_P (p0[1]))) \
- c = STRING_CHAR (p0, bytes); \
- else \
- c = -1; \
+ DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
} \
- else \
- c = -1; \
+ if (i < 1 || (buf - components) % 2 == 0) \
+ goto invalid_code; \
+ if (charbuf + i + (i / 2) + 1 < charbuf_end) \
+ goto no_more_source; \
+ from = coding->produced_char + char_offset; \
+ to = from + i; \
+ ADD_COMPOSITION_DATA (buf, from, to, method); \
+ for (j = 0; j < i; j++) \
+ *charbuf++ = components[j]; \
+ for (j = 0; j < i; j += 2) \
+ *charbuf++ = components[j]; \
} while (0)
-/* Decode a composition rule represented as a component of composition
- sequence of Emacs 20 style at SRC. Set C to the rule. If not
- valid rule is found, set C to -1. */
-
-#define DECODE_EMACS_MULE_COMPOSITION_RULE(c) \
- do { \
- c = SAFE_ONE_MORE_BYTE (); \
- c -= 0xA0; \
- if (c < 0 || c >= 81) \
- c = -1; \
- else \
- { \
- gref = c / 9, nref = c % 9; \
- c = COMPOSITION_ENCODE_RULE (gref, nref); \
- } \
- } while (0)
-
-
-/* Decode composition sequence encoded by `emacs-mule' at the source
- pointed by SRC. SRC_END is the end of source. Store information
- of the composition in CODING->cmp_data.
-
- For backward compatibility, decode also a composition sequence of
- Emacs 20 style. In that case, the composition sequence contains
- characters that should be extracted into a buffer or string. Store
- those characters at *DESTINATION in multibyte form.
-
- If we encounter an invalid byte sequence, return 0.
- If we encounter an insufficient source or destination, or
- insufficient space in CODING->cmp_data, return 1.
- Otherwise, return consumed bytes in the source.
-
-*/
-static INLINE int
-decode_composition_emacs_mule (coding, src, src_end,
- destination, dst_end, dst_bytes)
+static void
+decode_coding_emacs_mule (coding)
struct coding_system *coding;
- unsigned char *src, *src_end, **destination, *dst_end;
- int dst_bytes;
{
- unsigned char *dst = *destination;
- int method, data_len, nchars;
- unsigned char *src_base = src++;
- /* Store components of composition. */
- int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
- int ncomponent;
- /* Store multibyte form of characters to be composed. This is for
- Emacs 20 style composition sequence. */
- unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
- unsigned char *bufp = buf;
- int c, i, gref, nref;
-
- if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
- >= COMPOSITION_DATA_SIZE)
- {
- coding->result = CODING_FINISH_INSUFFICIENT_CMP;
- return -1;
- }
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ Lisp_Object attrs, eol_type, charset_list;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
- ONE_MORE_BYTE (c);
- if (c - 0xF0 >= COMPOSITION_RELATIVE
- && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
- {
- int with_rule;
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
- method = c - 0xF0;
- with_rule = (method == COMPOSITION_WITH_RULE
- || method == COMPOSITION_WITH_RULE_ALTCHARS);
- ONE_MORE_BYTE (c);
- data_len = c - 0xA0;
- if (data_len < 4
- || src_base + data_len > src_end)
- return 0;
- ONE_MORE_BYTE (c);
- nchars = c - 0xA0;
- if (c < 1)
- return 0;
- for (ncomponent = 0; src < src_base + data_len; ncomponent++)
- {
- /* If it is longer than this, it can't be valid. */
- if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
- return 0;
-
- if (ncomponent % 2 && with_rule)
- {
- ONE_MORE_BYTE (gref);
- gref -= 32;
- ONE_MORE_BYTE (nref);
- nref -= 32;
- c = COMPOSITION_ENCODE_RULE (gref, nref);
- }
- else
- {
- int bytes;
- if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
- || (coding->flags /* We are recovering a file. */
- && src[0] == LEADING_CODE_8_BIT_CONTROL
- && ! CHAR_HEAD_P (src[1])))
- c = STRING_CHAR (src, bytes);
- else
- c = *src, bytes = 1;
- src += bytes;
- }
- component[ncomponent] = c;
- }
- }
- else
- {
- /* This may be an old Emacs 20 style format. See the comment at
- the section 2 of this file. */
- while (src < src_end && !CHAR_HEAD_P (*src)) src++;
- if (src == src_end
- && !(coding->mode & CODING_MODE_LAST_BLOCK))
- goto label_end_of_loop;
-
- src_end = src;
- src = src_base + 1;
- if (c < 0xC0)
- {
- method = COMPOSITION_RELATIVE;
- for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
- {
- DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
- if (c < 0)
- break;
- component[ncomponent++] = c;
- }
- if (ncomponent < 2)
- return 0;
- nchars = ncomponent;
- }
- else if (c == 0xFF)
- {
- method = COMPOSITION_WITH_RULE;
- src++;
- DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
- if (c < 0)
- return 0;
- component[0] = c;
- for (ncomponent = 1;
- ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
- {
- DECODE_EMACS_MULE_COMPOSITION_RULE (c);
- if (c < 0)
- break;
- component[ncomponent++] = c;
- DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
- if (c < 0)
- break;
- component[ncomponent++] = c;
- }
- if (ncomponent < 3)
- return 0;
- nchars = (ncomponent + 1) / 2;
- }
- else
- return 0;
- }
-
- if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
+ while (1)
{
- CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
- for (i = 0; i < ncomponent; i++)
- CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
- CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
- if (buf < bufp)
- {
- unsigned char *p = buf;
- EMIT_BYTES (p, bufp);
- *destination += bufp - buf;
- coding->produced_char += nchars;
- }
- return (src - src_base);
- }
- label_end_of_loop:
- return -1;
-}
+ int c;
-/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
+ src_base = src;
+ consumed_chars_base = consumed_chars;
-static void
-decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
- struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
-{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source code, or
- when there's not enough destination area to produce a
- character. */
- unsigned char *src_base;
+ if (charbuf >= charbuf_end)
+ break;
- coding->produced_char = 0;
- while ((src_base = src) < src_end)
- {
- unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
- int bytes;
+ ONE_MORE_BYTE (c);
- if (*src == '\r')
+ if (c < 0x80)
{
- int c = *src++;
-
- if (coding->eol_type == CODING_EOL_CR)
- c = '\n';
- else if (coding->eol_type == CODING_EOL_CRLF)
+ if (c == '\r')
{
- ONE_MORE_BYTE (c);
- if (c != '\n')
+ if (EQ (eol_type, Qdos))
{
- src--;
- c = '\r';
+ if (src == src_end)
+ {
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ goto no_more_source;
+ }
+ if (*src == '\n')
+ ONE_MORE_BYTE (c);
}
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
}
- *dst++ = c;
- coding->produced_char++;
- continue;
+ *charbuf++ = c;
+ char_offset++;
}
- else if (*src == '\n')
+ else if (c == 0x80)
{
- if ((coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF)
- && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
- *dst++ = *src++;
- coding->produced_char++;
- continue;
+ ONE_MORE_BYTE (c);
+ if (c - 0xF2 >= COMPOSITION_RELATIVE
+ && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
+ DECODE_EMACS_MULE_21_COMPOSITION (c);
+ else if (c < 0xC0)
+ DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
+ else if (c == 0xFF)
+ DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
+ else
+ goto invalid_code;
}
- else if (*src == 0x80 && coding->cmp_data)
+ else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
{
- /* Start of composition data. */
- int consumed = decode_composition_emacs_mule (coding, src, src_end,
- &dst, dst_end,
- dst_bytes);
- if (consumed < 0)
- goto label_end_of_loop;
- else if (consumed > 0)
+ int nbytes, nchars;
+ int id;
+
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
+ if (c < 0)
{
- src += consumed;
- continue;
+ if (c == -2)
+ break;
+ goto invalid_code;
}
- bytes = CHAR_STRING (*src, tmp);
- p = tmp;
- src++;
- }
- else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
- || (coding->flags /* We are recovering a file. */
- && src[0] == LEADING_CODE_8_BIT_CONTROL
- && ! CHAR_HEAD_P (src[1])))
- {
- p = src;
- src += bytes;
- }
- else
- {
- bytes = CHAR_STRING (*src, tmp);
- p = tmp;
- src++;
- }
- if (dst + bytes >= (dst_bytes ? dst_end : src))
- {
- coding->result = CODING_FINISH_INSUFFICIENT_DST;
- break;
+ if (last_id != id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ last_id = id;
+ last_offset = char_offset;
+ }
+ *charbuf++ = c;
+ src += nbytes;
+ consumed_chars += nchars;
+ char_offset++;
}
- while (bytes--) *dst++ = *p++;
- coding->produced_char++;
+ continue;
+
+ invalid_code:
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
}
- label_end_of_loop:
- coding->consumed = coding->consumed_char = src_base - source;
- coding->produced = dst - destination;
-}
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
+}
-/* Encode composition data stored at DATA into a special byte sequence
- starting by 0x80. Update CODING->cmp_data_start and maybe
- CODING->cmp_data for the next call. */
-
-#define ENCODE_COMPOSITION_EMACS_MULE(coding, data) \
- do { \
- unsigned char buf[1024], *p0 = buf, *p; \
- int len = data[0]; \
- int i; \
- \
- buf[0] = 0x80; \
- buf[1] = 0xF0 + data[3]; /* METHOD */ \
- buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */ \
- p = buf + 4; \
- if (data[3] == COMPOSITION_WITH_RULE \
- || data[3] == COMPOSITION_WITH_RULE_ALTCHARS) \
- { \
- p += CHAR_STRING (data[4], p); \
- for (i = 5; i < len; i += 2) \
- { \
- int gref, nref; \
- COMPOSITION_DECODE_RULE (data[i], gref, nref); \
- *p++ = 0x20 + gref; \
- *p++ = 0x20 + nref; \
- p += CHAR_STRING (data[i + 1], p); \
- } \
- } \
- else \
- { \
- for (i = 4; i < len; i++) \
- p += CHAR_STRING (data[i], p); \
- } \
- buf[2] = 0xA0 + (p - buf); /* COMPONENTS-BYTES */ \
- \
- if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- while (p0 < p) \
- *dst++ = *p0++; \
- coding->cmp_data_start += data[0]; \
- if (coding->cmp_data_start == coding->cmp_data->used \
- && coding->cmp_data->next) \
- { \
- coding->cmp_data = coding->cmp_data->next; \
- coding->cmp_data_start = 0; \
- } \
- } while (0)
+#define EMACS_MULE_LEADING_CODES(id, codes) \
+ do { \
+ if (id < 0xA0) \
+ codes[0] = id, codes[1] = 0; \
+ else if (id < 0xE0) \
+ codes[0] = 0x9A, codes[1] = id; \
+ else if (id < 0xF0) \
+ codes[0] = 0x9B, codes[1] = id; \
+ else if (id < 0xF5) \
+ codes[0] = 0x9C, codes[1] = id; \
+ else \
+ codes[0] = 0x9D, codes[1] = id; \
+ } while (0);
-static void encode_eol P_ ((struct coding_system *, const unsigned char *,
- unsigned char *, int, int));
-static void
-encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
+static int
+encode_coding_emacs_mule (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- unsigned char *src_base;
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 8;
+ int produced_chars = 0;
+ Lisp_Object attrs, eol_type, charset_list;
int c;
- int char_offset;
- int *data;
-
- Lisp_Object translation_table;
+ int preferred_charset_id = -1;
- translation_table = Qnil;
-
- /* Optimization for the case that there's no composition. */
- if (!coding->cmp_data || coding->cmp_data->used == 0)
- {
- encode_eol (coding, source, destination, src_bytes, dst_bytes);
- return;
- }
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
- char_offset = coding->cmp_data->char_offset;
- data = coding->cmp_data->data + coding->cmp_data_start;
- while (1)
+ while (charbuf < charbuf_end)
{
- src_base = src;
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
- /* If SRC starts a composition, encode the information about the
- composition in advance. */
- if (coding->cmp_data_start < coding->cmp_data->used
- && char_offset + coding->consumed_char == data[1])
+ if (c < 0)
{
- ENCODE_COMPOSITION_EMACS_MULE (coding, data);
- char_offset = coding->cmp_data->char_offset;
- data = coding->cmp_data->data + coding->cmp_data_start;
+ /* Handle an annotation. */
+ switch (*charbuf)
+ {
+ case CODING_ANNOTATE_COMPOSITION_MASK:
+ /* Not yet implemented. */
+ break;
+ case CODING_ANNOTATE_CHARSET_MASK:
+ preferred_charset_id = charbuf[3];
+ if (preferred_charset_id >= 0
+ && NILP (Fmemq (make_number (preferred_charset_id),
+ charset_list)))
+ preferred_charset_id = -1;
+ break;
+ default:
+ abort ();
+ }
+ charbuf += -c - 1;
+ continue;
}
- ONE_MORE_CHAR (c);
- if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
- || coding->eol_type == CODING_EOL_CR))
+ if (ASCII_CHAR_P (c))
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
{
- if (coding->eol_type == CODING_EOL_CRLF)
- EMIT_TWO_BYTES ('\r', c);
- else
- EMIT_ONE_BYTE ('\r');
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
}
- else if (SINGLE_BYTE_CHAR_P (c))
+ else
{
- if (coding->flags && ! ASCII_BYTE_P (c))
- {
- /* As we are auto saving, retain the multibyte form for
- 8-bit chars. */
- unsigned char buf[MAX_MULTIBYTE_LENGTH];
- int bytes = CHAR_STRING (c, buf);
+ struct charset *charset;
+ unsigned code;
+ int dimension;
+ int emacs_mule_id;
+ unsigned char leading_codes[2];
- if (bytes == 1)
- EMIT_ONE_BYTE (buf[0]);
- else
- EMIT_TWO_BYTES (buf[0], buf[1]);
+ if (preferred_charset_id >= 0)
+ {
+ charset = CHARSET_FROM_ID (preferred_charset_id);
+ if (! CHAR_CHARSET_P (c, charset))
+ charset = char_charset (c, charset_list, NULL);
}
else
- EMIT_ONE_BYTE (c);
+ charset = char_charset (c, charset_list, &code);
+ if (! charset)
+ {
+ c = coding->default_char;
+ if (ASCII_CHAR_P (c))
+ {
+ EMIT_ONE_ASCII_BYTE (c);
+ continue;
+ }
+ charset = char_charset (c, charset_list, &code);
+ }
+ dimension = CHARSET_DIMENSION (charset);
+ emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
+ EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
+ EMIT_ONE_BYTE (leading_codes[0]);
+ if (leading_codes[1])
+ EMIT_ONE_BYTE (leading_codes[1]);
+ if (dimension == 1)
+ EMIT_ONE_BYTE (code);
+ else
+ {
+ EMIT_ONE_BYTE (code >> 8);
+ EMIT_ONE_BYTE (code & 0xFF);
+ }
}
- else
- EMIT_BYTES (src_base, src);
- coding->consumed_char++;
}
- label_end_of_loop:
- coding->consumed = src_base - source;
- coding->produced = coding->produced_char = dst - destination;
- return;
+ coding->result = CODING_RESULT_SUCCESS;
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
-/*** 3. ISO2022 handlers ***/
+/*** 7. ISO2022 handlers ***/
/* The following note describes the coding system ISO2022 briefly.
Since the intention of this note is to help understand the
@@ -1301,7 +2276,7 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
7-bit environment, non-locking-shift, and non-single-shift.
Note (**): If <F> is '@', 'A', or 'B', the intermediate character
- '(' can be omitted. We refer to this as "short-form" hereafter.
+ '(' must be omitted. We refer to this as "short-form" hereafter.
Now you may notice that there are a lot of ways of encoding the
same multilingual text in ISO2022. Actually, there exist many
@@ -1331,10 +2306,10 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
Since these are not standard escape sequences of any ISO standard,
the use of them with these meanings is restricted to Emacs only.
- (*) This form is used only in Emacs 20.5 and older versions,
- but the newer versions can safely decode it.
+ (*) This form is used only in Emacs 20.7 and older versions,
+ but newer versions can safely decode it.
(**) This form is used only in Emacs 21.1 and newer versions,
- and the older versions can't decode it.
+ and older versions can't decode it.
Here's a list of example usages of these composition escape
sequences (categorized by `enum composition_method').
@@ -1350,209 +2325,231 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
enum iso_code_class_type iso_code_class[256];
-#define CHARSET_OK(idx, charset, c) \
- (coding_system_table[idx] \
- && (charset == CHARSET_ASCII \
- || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
- CODING_SAFE_CHAR_P (safe_chars, c))) \
- && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
- charset) \
- != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
+#define SAFE_CHARSET_P(coding, id) \
+ ((id) <= (coding)->max_charset_id \
+ && (coding)->safe_charsets[id] >= 0)
+
+
+#define SHIFT_OUT_OK(category) \
+ (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
+
+static void
+setup_iso_safe_charsets (attrs)
+ Lisp_Object attrs;
+{
+ Lisp_Object charset_list, safe_charsets;
+ Lisp_Object request;
+ Lisp_Object reg_usage;
+ Lisp_Object tail;
+ int reg94, reg96;
+ int flags = XINT (AREF (attrs, coding_attr_iso_flags));
+ int max_charset_id;
+
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
+ && ! EQ (charset_list, Viso_2022_charset_list))
+ {
+ CODING_ATTR_CHARSET_LIST (attrs)
+ = charset_list = Viso_2022_charset_list;
+ ASET (attrs, coding_attr_safe_charsets, Qnil);
+ }
+
+ if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
+ return;
+
+ max_charset_id = 0;
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ {
+ int id = XINT (XCAR (tail));
+ if (max_charset_id < id)
+ max_charset_id = id;
+ }
-#define SHIFT_OUT_OK(idx) \
- (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
+ safe_charsets = Fmake_string (make_number (max_charset_id + 1),
+ make_number (255));
+ request = AREF (attrs, coding_attr_iso_request);
+ reg_usage = AREF (attrs, coding_attr_iso_usage);
+ reg94 = XINT (XCAR (reg_usage));
+ reg96 = XINT (XCDR (reg_usage));
+
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ {
+ Lisp_Object id;
+ Lisp_Object reg;
+ struct charset *charset;
+
+ id = XCAR (tail);
+ charset = CHARSET_FROM_ID (XINT (id));
+ reg = Fcdr (Fassq (id, request));
+ if (! NILP (reg))
+ SSET (safe_charsets, XINT (id), XINT (reg));
+ else if (charset->iso_chars_96)
+ {
+ if (reg96 < 4)
+ SSET (safe_charsets, XINT (id), reg96);
+ }
+ else
+ {
+ if (reg94 < 4)
+ SSET (safe_charsets, XINT (id), reg94);
+ }
+ }
+ ASET (attrs, coding_attr_safe_charsets, safe_charsets);
+}
-#define COMPOSITION_OK(idx) \
- (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in ISO2022. If it is, return an
- integer in which appropriate flag bits any of:
- CODING_CATEGORY_MASK_ISO_7
- CODING_CATEGORY_MASK_ISO_7_TIGHT
- CODING_CATEGORY_MASK_ISO_8_1
- CODING_CATEGORY_MASK_ISO_8_2
- CODING_CATEGORY_MASK_ISO_7_ELSE
- CODING_CATEGORY_MASK_ISO_8_ELSE
- are set. If a code which should never appear in ISO2022 is found,
- returns 0. */
+ Check if a text is encoded in one of ISO-2022 based codig systems.
+ If it is, return 1, else return 0. */
static int
-detect_coding_iso2022 (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+detect_coding_iso_2022 (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
- int mask = CODING_CATEGORY_MASK_ISO;
- int mask_found = 0;
- int reg[4], shift_out = 0, single_shifting = 0;
- int c, c1, charset;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
- Lisp_Object safe_chars;
-
- reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
- while (mask && src < src_end)
- {
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- retry:
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int single_shifting = 0;
+ int id;
+ int c, c1;
+ int consumed_chars = 0;
+ int i;
+ int rejected = 0;
+ int found = 0;
+
+ detect_info->checked |= CATEGORY_MASK_ISO;
+
+ for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
+ {
+ struct coding_system *this = &(coding_categories[i]);
+ Lisp_Object attrs, val;
+
+ attrs = CODING_ID_ATTRS (this->id);
+ if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
+ && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
+ setup_iso_safe_charsets (attrs);
+ val = CODING_ATTR_SAFE_CHARSETS (attrs);
+ this->max_charset_id = SCHARS (val) - 1;
+ this->safe_charsets = (char *) SDATA (val);
+ }
+
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
+
+ while (rejected != CATEGORY_MASK_ISO)
+ {
+ ONE_MORE_BYTE (c);
switch (c)
{
case ISO_CODE_ESC:
if (inhibit_iso_escape_detection)
break;
single_shifting = 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE (c);
if (c >= '(' && c <= '/')
{
/* Designation sequence for a charset of dimension 1. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
- || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
+ || (id = iso_charset_table[0][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
break;
- reg[(c - '(') % 4] = charset;
}
else if (c == '$')
{
/* Designation sequence for a charset of dimension 2. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE (c);
if (c >= '@' && c <= 'B')
/* Designation for JISX0208.1978, GB2312, or JISX0208. */
- reg[0] = charset = iso_charset_table[1][0][c];
+ id = iso_charset_table[1][0][c];
else if (c >= '(' && c <= '/')
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
- || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
+ || (id = iso_charset_table[1][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
break;
- reg[(c - '(') % 4] = charset;
}
else
- /* Invalid designation sequence. Just ignore. */
+ /* Invalid designation sequence. Just ignore it. */
break;
}
else if (c == 'N' || c == 'O')
{
/* ESC <Fe> for SS2 or SS3. */
- mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
+ single_shifting = 1;
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
break;
}
else if (c >= '0' && c <= '4')
{
/* ESC <Fp> for start/end composition. */
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
- mask_found |= CODING_CATEGORY_MASK_ISO_7;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
- mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
- mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
- mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
- mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
- mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
+ found |= CATEGORY_MASK_ISO;
break;
}
else
- /* Invalid escape sequence. Just ignore. */
- break;
+ {
+ /* Invalid escape sequence. Just ignore it. */
+ break;
+ }
/* We found a valid designation sequence for CHARSET. */
- mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
- c = MAKE_CHAR (charset, 0, 0);
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
- mask_found |= CODING_CATEGORY_MASK_ISO_7;
+ rejected |= CATEGORY_MASK_ISO_8BIT;
+ if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
+ id))
+ found |= CATEGORY_MASK_ISO_7;
else
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
- mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
+ rejected |= CATEGORY_MASK_ISO_7;
+ if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
+ id))
+ found |= CATEGORY_MASK_ISO_7_TIGHT;
else
- mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
- mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
+ rejected |= CATEGORY_MASK_ISO_7_TIGHT;
+ if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
+ id))
+ found |= CATEGORY_MASK_ISO_7_ELSE;
else
- mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
- mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
+ rejected |= CATEGORY_MASK_ISO_7_ELSE;
+ if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
+ id))
+ found |= CATEGORY_MASK_ISO_8_ELSE;
else
- mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
+ rejected |= CATEGORY_MASK_ISO_8_ELSE;
break;
case ISO_CODE_SO:
- if (inhibit_iso_escape_detection)
- break;
- single_shifting = 0;
- if (shift_out == 0
- && (reg[1] >= 0
- || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
- || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
- {
- /* Locking shift out. */
- mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
- mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
- }
- break;
-
case ISO_CODE_SI:
+ /* Locking shift out/in. */
if (inhibit_iso_escape_detection)
break;
single_shifting = 0;
- if (shift_out == 1)
- {
- /* Locking shift in. */
- mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
- mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
- }
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
+ found |= CATEGORY_MASK_ISO_ELSE;
break;
case ISO_CODE_CSI:
+ /* Control sequence introducer. */
single_shifting = 0;
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
+ found |= CATEGORY_MASK_ISO_8_ELSE;
+ goto check_extra_latin;
+
+
case ISO_CODE_SS2:
case ISO_CODE_SS3:
- {
- int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
-
- if (inhibit_iso_escape_detection)
- break;
- if (c != ISO_CODE_CSI)
- {
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
- & CODING_FLAG_ISO_SINGLE_SHIFT)
- newmask |= CODING_CATEGORY_MASK_ISO_8_1;
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
- & CODING_FLAG_ISO_SINGLE_SHIFT)
- newmask |= CODING_CATEGORY_MASK_ISO_8_2;
- single_shifting = 1;
- }
- if (VECTORP (Vlatin_extra_code_table)
- && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
- {
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
- & CODING_FLAG_ISO_LATIN_EXTRA)
- newmask |= CODING_CATEGORY_MASK_ISO_8_1;
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
- & CODING_FLAG_ISO_LATIN_EXTRA)
- newmask |= CODING_CATEGORY_MASK_ISO_8_2;
- }
- mask &= newmask;
- mask_found |= newmask;
- }
- break;
+ /* Single shift. */
+ if (inhibit_iso_escape_detection)
+ break;
+ single_shifting = 1;
+ rejected |= CATEGORY_MASK_ISO_7BIT;
+ if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
+ & CODING_ISO_FLAG_SINGLE_SHIFT)
+ found |= CATEGORY_MASK_ISO_8_1;
+ if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
+ & CODING_ISO_FLAG_SINGLE_SHIFT)
+ found |= CATEGORY_MASK_ISO_8_2;
+ goto check_extra_latin;
default:
if (c < 0x80)
@@ -1560,210 +2557,205 @@ detect_coding_iso2022 (src, src_end, multibytep)
single_shifting = 0;
break;
}
- else if (c < 0xA0)
+ if (c >= 0xA0)
{
- single_shifting = 0;
- if (VECTORP (Vlatin_extra_code_table)
- && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
- {
- int newmask = 0;
-
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
- & CODING_FLAG_ISO_LATIN_EXTRA)
- newmask |= CODING_CATEGORY_MASK_ISO_8_1;
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
- & CODING_FLAG_ISO_LATIN_EXTRA)
- newmask |= CODING_CATEGORY_MASK_ISO_8_2;
- mask &= newmask;
- mask_found |= newmask;
- }
- else
- return 0;
- }
- else
- {
- mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
- | CODING_CATEGORY_MASK_ISO_7_ELSE);
- mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
+ found |= CATEGORY_MASK_ISO_8_1;
/* Check the length of succeeding codes of the range
- 0xA0..0FF. If the byte length is odd, we exclude
- CODING_CATEGORY_MASK_ISO_8_2. We can check this only
- when we are not single shifting. */
- if (!single_shifting
- && mask & CODING_CATEGORY_MASK_ISO_8_2)
+ 0xA0..0FF. If the byte length is even, we include
+ CATEGORY_MASK_ISO_8_2 in `found'. We can check this
+ only when we are not single shifting. */
+ if (! single_shifting
+ && ! (rejected & CATEGORY_MASK_ISO_8_2))
{
int i = 1;
-
- c = -1;
while (src < src_end)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE (c);
if (c < 0xA0)
break;
i++;
}
if (i & 1 && src < src_end)
- mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
+ rejected |= CATEGORY_MASK_ISO_8_2;
else
- mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
- if (c >= 0)
- /* This means that we have read one extra byte. */
- goto retry;
+ found |= CATEGORY_MASK_ISO_8_2;
}
+ break;
}
- break;
+ check_extra_latin:
+ single_shifting = 0;
+ if (! VECTORP (Vlatin_extra_code_table)
+ || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+ {
+ rejected = CATEGORY_MASK_ISO;
+ break;
+ }
+ if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
+ & CODING_ISO_FLAG_LATIN_EXTRA)
+ found |= CATEGORY_MASK_ISO_8_1;
+ else
+ rejected |= CATEGORY_MASK_ISO_8_1;
+ if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
+ & CODING_ISO_FLAG_LATIN_EXTRA)
+ found |= CATEGORY_MASK_ISO_8_2;
+ else
+ rejected |= CATEGORY_MASK_ISO_8_2;
}
}
- label_end_of_loop:
- return (mask & mask_found);
-}
+ detect_info->rejected |= CATEGORY_MASK_ISO;
+ return 0;
-/* Decode a character of which charset is CHARSET, the 1st position
- code is C1, the 2nd position code is C2, and return the decoded
- character code. If the variable `translation_table' is non-nil,
- returned the translated code. */
+ no_more_source:
+ detect_info->rejected |= rejected;
+ detect_info->found |= (found & ~rejected);
+ return 1;
+}
-#define DECODE_ISO_CHARACTER(charset, c1, c2) \
- (NILP (translation_table) \
- ? MAKE_CHAR (charset, c1, c2) \
- : translate_char (translation_table, -1, charset, c1, c2))
/* Set designation state into CODING. */
-#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
- do { \
- int charset, c; \
- \
- if (final_char < '0' || final_char >= 128) \
- goto label_invalid_code; \
- charset = ISO_CHARSET_TABLE (make_number (dimension), \
- make_number (chars), \
- make_number (final_char)); \
- c = MAKE_CHAR (charset, 0, 0); \
- if (charset >= 0 \
- && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
- || CODING_SAFE_CHAR_P (safe_chars, c))) \
- { \
- if (coding->spec.iso2022.last_invalid_designation_register == 0 \
- && reg == 0 \
- && charset == CHARSET_ASCII) \
- { \
- /* We should insert this designation sequence as is so \
- that it is surely written back to a file. */ \
- coding->spec.iso2022.last_invalid_designation_register = -1; \
- goto label_invalid_code; \
- } \
- coding->spec.iso2022.last_invalid_designation_register = -1; \
- if ((coding->mode & CODING_MODE_DIRECTION) \
- && CHARSET_REVERSE_CHARSET (charset) >= 0) \
- charset = CHARSET_REVERSE_CHARSET (charset); \
- CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
- } \
- else \
- { \
- coding->spec.iso2022.last_invalid_designation_register = reg; \
- goto label_invalid_code; \
- } \
+#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
+ do { \
+ int id, prev; \
+ \
+ if (final < '0' || final >= 128 \
+ || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
+ || !SAFE_CHARSET_P (coding, id)) \
+ { \
+ CODING_ISO_DESIGNATION (coding, reg) = -2; \
+ goto invalid_code; \
+ } \
+ prev = CODING_ISO_DESIGNATION (coding, reg); \
+ if (id == charset_jisx0201_roman) \
+ { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
+ id = charset_ascii; \
+ } \
+ else if (id == charset_jisx0208_1978) \
+ { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
+ id = charset_jisx0208; \
+ } \
+ CODING_ISO_DESIGNATION (coding, reg) = id; \
+ /* If there was an invalid designation to REG previously, and this \
+ designation is ASCII to REG, we should keep this designation \
+ sequence. */ \
+ if (prev == -2 && id == charset_ascii) \
+ goto invalid_code; \
} while (0)
-/* Allocate a memory block for storing information about compositions.
- The block is chained to the already allocated blocks. */
-void
-coding_allocate_composition_data (coding, char_offset)
- struct coding_system *coding;
- int char_offset;
-{
- struct composition_data *cmp_data
- = (struct composition_data *) xmalloc (sizeof *cmp_data);
-
- cmp_data->char_offset = char_offset;
- cmp_data->used = 0;
- cmp_data->prev = coding->cmp_data;
- cmp_data->next = NULL;
- if (coding->cmp_data)
- coding->cmp_data->next = cmp_data;
- coding->cmp_data = cmp_data;
- coding->cmp_data_start = 0;
-}
+#define MAYBE_FINISH_COMPOSITION() \
+ do { \
+ int i; \
+ if (composition_state == COMPOSING_NO) \
+ break; \
+ /* It is assured that we have enough room for producing \
+ characters stored in the table `components'. */ \
+ if (charbuf + component_idx > charbuf_end) \
+ goto no_more_source; \
+ composition_state = COMPOSING_NO; \
+ if (method == COMPOSITION_RELATIVE \
+ || method == COMPOSITION_WITH_ALTCHARS) \
+ { \
+ for (i = 0; i < component_idx; i++) \
+ *charbuf++ = components[i]; \
+ char_offset += component_idx; \
+ } \
+ else \
+ { \
+ for (i = 0; i < component_idx; i += 2) \
+ *charbuf++ = components[i]; \
+ char_offset += (component_idx / 2) + 1; \
+ } \
+ } while (0)
+
/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
- ESC 3 : altchar composition : ESC 3 ALT ... ESC 0 CHAR ... ESC 1
- ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
+ ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
+ ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
*/
-#define DECODE_COMPOSITION_START(c1) \
- do { \
- if (coding->composing == COMPOSITION_DISABLED) \
- { \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = c1 & 0x7f; \
- coding->produced_char += 2; \
- } \
- else if (!COMPOSING_P (coding)) \
- { \
- /* This is surely the start of a composition. We must be sure \
- that coding->cmp_data has enough space to store the \
- information about the composition. If not, terminate the \
- current decoding loop, allocate one more memory block for \
- coding->cmp_data in the caller, then start the decoding \
- loop again. We can't allocate memory here directly because \
- it may cause buffer/string relocation. */ \
- if (!coding->cmp_data \
- || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
- >= COMPOSITION_DATA_SIZE)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
- goto label_end_of_loop; \
- } \
- coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
- : c1 == '2' ? COMPOSITION_WITH_RULE \
- : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
- : COMPOSITION_WITH_RULE_ALTCHARS); \
- CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
- coding->composing); \
- coding->composition_rule_follows = 0; \
- } \
- else \
- { \
- /* We are already handling a composition. If the method is \
- the following two, the codes following the current escape \
- sequence are actual characters stored in a buffer. */ \
- if (coding->composing == COMPOSITION_WITH_ALTCHARS \
- || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
- { \
- coding->composing = COMPOSITION_RELATIVE; \
- coding->composition_rule_follows = 0; \
- } \
- } \
+#define DECODE_COMPOSITION_START(c1) \
+ do { \
+ if (c1 == '0' \
+ && composition_state == COMPOSING_COMPONENT_RULE) \
+ { \
+ component_len = component_idx; \
+ composition_state = COMPOSING_CHAR; \
+ } \
+ else \
+ { \
+ const unsigned char *p; \
+ \
+ MAYBE_FINISH_COMPOSITION (); \
+ if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
+ goto no_more_source; \
+ for (p = src; p < src_end - 1; p++) \
+ if (*p == ISO_CODE_ESC && p[1] == '1') \
+ break; \
+ if (p == src_end - 1) \
+ { \
+ if (coding->mode & CODING_MODE_LAST_BLOCK) \
+ goto invalid_code; \
+ goto no_more_source; \
+ } \
+ \
+ /* This is surely the start of a composition. */ \
+ method = (c1 == '0' ? COMPOSITION_RELATIVE \
+ : c1 == '2' ? COMPOSITION_WITH_RULE \
+ : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
+ : COMPOSITION_WITH_RULE_ALTCHARS); \
+ composition_state = (c1 <= '2' ? COMPOSING_CHAR \
+ : COMPOSING_COMPONENT_CHAR); \
+ component_idx = component_len = 0; \
+ } \
} while (0)
-/* Handle composition end sequence ESC 1. */
-#define DECODE_COMPOSITION_END(c1) \
+/* Handle compositoin end sequence ESC 1. */
+
+#define DECODE_COMPOSITION_END() \
do { \
- if (! COMPOSING_P (coding)) \
+ int nchars = (component_len > 0 ? component_idx - component_len \
+ : method == COMPOSITION_RELATIVE ? component_idx \
+ : (component_idx + 1) / 2); \
+ int i; \
+ int *saved_charbuf = charbuf; \
+ int from = char_offset; \
+ int to = from + nchars; \
+ \
+ ADD_COMPOSITION_DATA (charbuf, from, to, method); \
+ if (method != COMPOSITION_RELATIVE) \
{ \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = c1; \
- coding->produced_char += 2; \
+ if (component_len == 0) \
+ for (i = 0; i < component_idx; i++) \
+ *charbuf++ = components[i]; \
+ else \
+ for (i = 0; i < component_len; i++) \
+ *charbuf++ = components[i]; \
+ *saved_charbuf = saved_charbuf - charbuf; \
} \
+ if (method == COMPOSITION_WITH_RULE) \
+ for (i = 0; i < component_idx; i += 2, char_offset++) \
+ *charbuf++ = components[i]; \
else \
- { \
- CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
- coding->composing = COMPOSITION_NO; \
- } \
+ for (i = component_len; i < component_idx; i++, char_offset++) \
+ *charbuf++ = components[i]; \
+ coding->annotated = 1; \
+ composition_state = COMPOSING_NO; \
} while (0)
+
/* Decode a composition rule from the byte C1 (and maybe one more byte
from SRC) and store one encoded composition rule in
coding->cmp_data. */
#define DECODE_COMPOSITION_RULE(c1) \
do { \
- int rule = 0; \
(c1) -= 32; \
if (c1 < 81) /* old format (before ver.21) */ \
{ \
@@ -1771,167 +2763,173 @@ coding_allocate_composition_data (coding, char_offset)
int nref = (c1) % 9; \
if (gref == 4) gref = 10; \
if (nref == 4) nref = 10; \
- rule = COMPOSITION_ENCODE_RULE (gref, nref); \
+ c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
} \
else if (c1 < 93) /* new format (after ver.21) */ \
{ \
ONE_MORE_BYTE (c2); \
- rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
+ c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
} \
- CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \
- coding->composition_rule_follows = 0; \
+ else \
+ c1 = 0; \
} while (0)
/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
static void
-decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
+decode_coding_iso_2022 (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end
+ = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
/* Charsets invoked to graphic plane 0 and 1 respectively. */
- int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source code
- (within macro ONE_MORE_BYTE), or when there's not enough
- destination area to produce a character (within macro
- EMIT_CHAR). */
- unsigned char *src_base;
- int c, charset;
- Lisp_Object translation_table;
- Lisp_Object safe_chars;
-
- safe_chars = coding_safe_chars (coding->symbol);
-
- if (NILP (Venable_character_translation))
- translation_table = Qnil;
- else
- {
- translation_table = coding->translation_table_for_decode;
- if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_decode;
- }
-
- coding->result = CODING_FINISH_NORMAL;
+ int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ struct charset *charset;
+ int c;
+ /* For handling composition sequence. */
+#define COMPOSING_NO 0
+#define COMPOSING_CHAR 1
+#define COMPOSING_RULE 2
+#define COMPOSING_COMPONENT_CHAR 3
+#define COMPOSING_COMPONENT_RULE 4
+
+ int composition_state = COMPOSING_NO;
+ enum composition_method method;
+ int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
+ int component_idx;
+ int component_len;
+ Lisp_Object attrs, eol_type, charset_list;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
+
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ setup_iso_safe_charsets (attrs);
while (1)
{
int c1, c2;
src_base = src;
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf >= charbuf_end)
+ break;
+
ONE_MORE_BYTE (c1);
- /* We produce no character or one character. */
+ /* We produce at most one character. */
switch (iso_code_class [c1])
{
case ISO_0x20_or_0x7F:
- if (COMPOSING_P (coding) && coding->composition_rule_follows)
+ if (composition_state != COMPOSING_NO)
{
- DECODE_COMPOSITION_RULE (c1);
- continue;
- }
- if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
- {
- /* This is SPACE or DEL. */
- charset = CHARSET_ASCII;
- break;
+ if (composition_state == COMPOSING_RULE
+ || composition_state == COMPOSING_COMPONENT_RULE)
+ {
+ DECODE_COMPOSITION_RULE (c1);
+ components[component_idx++] = c1;
+ composition_state--;
+ continue;
+ }
}
- /* This is a graphic character, we fall down ... */
+ if (charset_id_0 < 0
+ || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
+ /* This is SPACE or DEL. */
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_0);
+ break;
case ISO_graphic_plane_0:
- if (COMPOSING_P (coding) && coding->composition_rule_follows)
+ if (composition_state != COMPOSING_NO)
{
- DECODE_COMPOSITION_RULE (c1);
- continue;
+ if (composition_state == COMPOSING_RULE
+ || composition_state == COMPOSING_COMPONENT_RULE)
+ {
+ DECODE_COMPOSITION_RULE (c1);
+ components[component_idx++] = c1;
+ composition_state--;
+ continue;
+ }
}
- charset = charset0;
+ charset = CHARSET_FROM_ID (charset_id_0);
break;
case ISO_0xA0_or_0xFF:
- if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
- || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
- goto label_invalid_code;
+ if (charset_id_1 < 0
+ || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
+ || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+ goto invalid_code;
/* This is a graphic character, we fall down ... */
case ISO_graphic_plane_1:
- if (charset1 < 0)
- goto label_invalid_code;
- charset = charset1;
+ if (charset_id_1 < 0)
+ goto invalid_code;
+ charset = CHARSET_FROM_ID (charset_id_1);
break;
- case ISO_control_0:
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
-
- /* All ISO2022 control characters in this class have the
- same representation in Emacs internal format. */
- if (c1 == '\n'
- && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- && (coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF))
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
- charset = CHARSET_ASCII;
- break;
-
- case ISO_control_1:
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
- goto label_invalid_code;
-
case ISO_carriage_return:
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
-
- if (coding->eol_type == CODING_EOL_CR)
- c1 = '\n';
- else if (coding->eol_type == CODING_EOL_CRLF)
+ if (c1 == '\r')
{
- ONE_MORE_BYTE (c1);
- if (c1 != ISO_CODE_LF)
+ if (EQ (eol_type, Qdos))
{
- src--;
- c1 = '\r';
+ if (src == src_end)
+ {
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ goto no_more_source;
+ }
+ if (*src == '\n')
+ ONE_MORE_BYTE (c1);
}
+ else if (EQ (eol_type, Qmac))
+ c1 = '\n';
}
- charset = CHARSET_ASCII;
+ /* fall through */
+
+ case ISO_control_0:
+ MAYBE_FINISH_COMPOSITION ();
+ charset = CHARSET_FROM_ID (charset_ascii);
break;
+ case ISO_control_1:
+ MAYBE_FINISH_COMPOSITION ();
+ goto invalid_code;
+
case ISO_shift_out:
- if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
- goto label_invalid_code;
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 1) < 0)
+ goto invalid_code;
+ CODING_ISO_INVOCATION (coding, 0) = 1;
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
continue;
case ISO_shift_in:
- if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
- goto label_invalid_code;
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
+ goto invalid_code;
+ CODING_ISO_INVOCATION (coding, 0) = 0;
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
continue;
case ISO_single_shift_2_7:
case ISO_single_shift_2:
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
- goto label_invalid_code;
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
+ goto invalid_code;
/* SS2 is handled as an escape sequence of ESC 'N' */
c1 = 'N';
goto label_escape_sequence;
case ISO_single_shift_3:
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
- goto label_invalid_code;
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
+ goto invalid_code;
/* SS2 is handled as an escape sequence of ESC 'O' */
c1 = 'O';
goto label_escape_sequence;
@@ -1944,7 +2942,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
case ISO_escape:
ONE_MORE_BYTE (c1);
label_escape_sequence:
- /* Escape sequences handled by Emacs are invocation,
+ /* Escape sequences handled here are invocation,
designation, direction specification, and character
composition specification. */
switch (c1)
@@ -1952,89 +2950,93 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
case '&': /* revision of following character set */
ONE_MORE_BYTE (c1);
if (!(c1 >= '@' && c1 <= '~'))
- goto label_invalid_code;
+ goto invalid_code;
ONE_MORE_BYTE (c1);
if (c1 != ISO_CODE_ESC)
- goto label_invalid_code;
+ goto invalid_code;
ONE_MORE_BYTE (c1);
goto label_escape_sequence;
case '$': /* designation of 2-byte character set */
- if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
- goto label_invalid_code;
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
+ goto invalid_code;
ONE_MORE_BYTE (c1);
if (c1 >= '@' && c1 <= 'B')
{ /* designation of JISX0208.1978, GB2312.1980,
or JISX0208.1980 */
- DECODE_DESIGNATION (0, 2, 94, c1);
+ DECODE_DESIGNATION (0, 2, 0, c1);
}
else if (c1 >= 0x28 && c1 <= 0x2B)
{ /* designation of DIMENSION2_CHARS94 character set */
ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
+ DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
}
else if (c1 >= 0x2C && c1 <= 0x2F)
{ /* designation of DIMENSION2_CHARS96 character set */
ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
+ DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
}
else
- goto label_invalid_code;
+ goto invalid_code;
/* We must update these variables now. */
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
continue;
case 'n': /* invocation of locking-shift-2 */
- if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
- goto label_invalid_code;
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 2) < 0)
+ goto invalid_code;
+ CODING_ISO_INVOCATION (coding, 0) = 2;
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
continue;
case 'o': /* invocation of locking-shift-3 */
- if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
- goto label_invalid_code;
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 3) < 0)
+ goto invalid_code;
+ CODING_ISO_INVOCATION (coding, 0) = 3;
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
continue;
case 'N': /* invocation of single-shift-2 */
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
- goto label_invalid_code;
- charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 2) < 0)
+ goto invalid_code;
+ charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
ONE_MORE_BYTE (c1);
if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
- goto label_invalid_code;
+ goto invalid_code;
break;
case 'O': /* invocation of single-shift-3 */
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
- goto label_invalid_code;
- charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 3) < 0)
+ goto invalid_code;
+ charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
ONE_MORE_BYTE (c1);
if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
- goto label_invalid_code;
+ goto invalid_code;
break;
case '0': case '2': case '3': case '4': /* start composition */
+ if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
+ goto invalid_code;
DECODE_COMPOSITION_START (c1);
continue;
case '1': /* end composition */
- DECODE_COMPOSITION_END (c1);
+ if (composition_state == COMPOSING_NO)
+ goto invalid_code;
+ DECODE_COMPOSITION_END ();
continue;
case '[': /* specification of direction */
- if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
- goto label_invalid_code;
+ if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
+ goto invalid_code;
/* For the moment, nested direction is not supported.
So, `coding->mode & CODING_MODE_DIRECTION' zero means
- left-to-right, and nonzero means right-to-left. */
+ left-to-right, and nozero means right-to-left. */
ONE_MORE_BYTE (c1);
switch (c1)
{
@@ -2047,7 +3049,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
if (c1 == ']')
coding->mode &= ~CODING_MODE_DIRECTION;
else
- goto label_invalid_code;
+ goto invalid_code;
break;
case '2': /* start of right-to-left direction */
@@ -2055,17 +3057,15 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
if (c1 == ']')
coding->mode |= CODING_MODE_DIRECTION;
else
- goto label_invalid_code;
+ goto invalid_code;
break;
default:
- goto label_invalid_code;
+ goto invalid_code;
}
continue;
case '%':
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
ONE_MORE_BYTE (c1);
if (c1 == '/')
{
@@ -2074,46 +3074,40 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
We keep these bytes as is for the moment.
They may be decoded by post-read-conversion. */
int dim, M, L;
- int size, required;
- int produced_chars;
-
+ int size;
+
ONE_MORE_BYTE (dim);
ONE_MORE_BYTE (M);
ONE_MORE_BYTE (L);
size = ((M - 128) * 128) + (L - 128);
- required = 8 + size * 2;
- if (dst + required > (dst_bytes ? dst_end : src))
- goto label_end_of_loop;
- *dst++ = ISO_CODE_ESC;
- *dst++ = '%';
- *dst++ = '/';
- *dst++ = dim;
- produced_chars = 4;
- dst += CHAR_STRING (M, dst), produced_chars++;
- dst += CHAR_STRING (L, dst), produced_chars++;
+ if (charbuf + 8 + size > charbuf_end)
+ goto break_loop;
+ *charbuf++ = ISO_CODE_ESC;
+ *charbuf++ = '%';
+ *charbuf++ = '/';
+ *charbuf++ = dim;
+ *charbuf++ = BYTE8_TO_CHAR (M);
+ *charbuf++ = BYTE8_TO_CHAR (L);
while (size-- > 0)
{
ONE_MORE_BYTE (c1);
- dst += CHAR_STRING (c1, dst), produced_chars++;
+ *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
}
- coding->produced_char += produced_chars;
}
else if (c1 == 'G')
{
- unsigned char *d = dst;
- int produced_chars;
-
/* XFree86 extension for embedding UTF-8 in CTEXT:
ESC % G --UTF-8-BYTES-- ESC % @
We keep these bytes as is for the moment.
They may be decoded by post-read-conversion. */
- if (d + 6 > (dst_bytes ? dst_end : src))
- goto label_end_of_loop;
- *d++ = ISO_CODE_ESC;
- *d++ = '%';
- *d++ = 'G';
- produced_chars = 3;
- while (d + 1 < (dst_bytes ? dst_end : src))
+ int *p = charbuf;
+
+ if (p + 6 > charbuf_end)
+ goto break_loop;
+ *p++ = ISO_CODE_ESC;
+ *p++ = '%';
+ *p++ = 'G';
+ while (p < charbuf_end)
{
ONE_MORE_BYTE (c1);
if (c1 == ISO_CODE_ESC
@@ -2121,69 +3115,119 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
&& src[0] == '%'
&& src[1] == '@')
break;
- d += CHAR_STRING (c1, d), produced_chars++;
+ *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
}
- if (d + 3 > (dst_bytes ? dst_end : src))
- goto label_end_of_loop;
- *d++ = ISO_CODE_ESC;
- *d++ = '%';
- *d++ = '@';
- dst = d;
- coding->produced_char += produced_chars + 3;
+ if (p + 3 > charbuf_end)
+ goto break_loop;
+ *p++ = ISO_CODE_ESC;
+ *p++ = '%';
+ *p++ = '@';
+ charbuf = p;
}
else
- goto label_invalid_code;
+ goto invalid_code;
continue;
+ break;
default:
- if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
- goto label_invalid_code;
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
+ goto invalid_code;
if (c1 >= 0x28 && c1 <= 0x2B)
{ /* designation of DIMENSION1_CHARS94 character set */
ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
+ DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
}
else if (c1 >= 0x2C && c1 <= 0x2F)
{ /* designation of DIMENSION1_CHARS96 character set */
ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
+ DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
}
else
- goto label_invalid_code;
+ goto invalid_code;
/* We must update these variables now. */
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
continue;
}
}
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
+ }
+
/* Now we know CHARSET and 1st position code C1 of a character.
- Produce a multibyte sequence for that character while getting
- 2nd position code C2 if necessary. */
- if (CHARSET_DIMENSION (charset) == 2)
+ Produce a decoded character while getting 2nd position code
+ C2 if necessary. */
+ c1 &= 0x7F;
+ if (CHARSET_DIMENSION (charset) > 1)
{
ONE_MORE_BYTE (c2);
- if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
+ if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
/* C2 is not in a valid range. */
- goto label_invalid_code;
+ goto invalid_code;
+ c1 = (c1 << 8) | (c2 & 0x7F);
+ if (CHARSET_DIMENSION (charset) > 2)
+ {
+ ONE_MORE_BYTE (c2);
+ if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
+ /* C2 is not in a valid range. */
+ goto invalid_code;
+ c1 = (c1 << 8) | (c2 & 0x7F);
+ }
+ }
+
+ CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
+ if (c < 0)
+ {
+ MAYBE_FINISH_COMPOSITION ();
+ for (; src_base < src; src_base++, char_offset++)
+ {
+ if (ASCII_BYTE_P (*src_base))
+ *charbuf++ = *src_base;
+ else
+ *charbuf++ = BYTE8_TO_CHAR (*src_base);
+ }
+ }
+ else if (composition_state == COMPOSING_NO)
+ {
+ *charbuf++ = c;
+ char_offset++;
+ }
+ else
+ {
+ components[component_idx++] = c;
+ if (method == COMPOSITION_WITH_RULE
+ || (method == COMPOSITION_WITH_RULE_ALTCHARS
+ && composition_state == COMPOSING_COMPONENT_CHAR))
+ composition_state++;
}
- c = DECODE_ISO_CHARACTER (charset, c1, c2);
- EMIT_CHAR (c);
continue;
- label_invalid_code:
- coding->errors++;
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
+ invalid_code:
+ MAYBE_FINISH_COMPOSITION ();
src = src_base;
- c = *src++;
- EMIT_CHAR (c);
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
+ continue;
+
+ break_loop:
+ break;
}
- label_end_of_loop:
- coding->consumed = coding->consumed_char = src_base - source;
- coding->produced = dst - destination;
- return;
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
}
@@ -2191,9 +3235,9 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
/*
It is not enough to say just "ISO2022" on encoding, we have to
- specify more details. In Emacs, each ISO2022 coding system
+ specify more details. In Emacs, each coding system of ISO2022
variant has the following specifications:
- 1. Initial designation to G0 through G3.
+ 1. Initial designation to G0 thru G3.
2. Allows short-form designation?
3. ASCII should be designated to G0 before control characters?
4. ASCII should be designated to G0 at end of line?
@@ -2203,8 +3247,8 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
And the following two are only for Japanese:
8. Use ASCII in place of JIS0201-1976-Roman?
9. Use JISX0208-1983 in place of JISX0208-1978?
- These specifications are encoded in `coding->flags' as flag bits
- defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
+ These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
+ defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
details.
*/
@@ -2215,115 +3259,136 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
#define ENCODE_DESIGNATION(charset, reg, coding) \
do { \
- unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
+ unsigned char final_char = CHARSET_ISO_FINAL (charset); \
char *intermediate_char_94 = "()*+"; \
char *intermediate_char_96 = ",-./"; \
- int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
- \
- if (revision < 255) \
+ int revision = -1; \
+ int c; \
+ \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
+ revision = CHARSET_ISO_REVISION (charset); \
+ \
+ if (revision >= 0) \
{ \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = '&'; \
- *dst++ = '@' + revision; \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
+ EMIT_ONE_BYTE ('@' + revision); \
} \
- *dst++ = ISO_CODE_ESC; \
+ EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
if (CHARSET_DIMENSION (charset) == 1) \
{ \
- if (CHARSET_CHARS (charset) == 94) \
- *dst++ = (unsigned char) (intermediate_char_94[reg]); \
+ if (! CHARSET_ISO_CHARS_96 (charset)) \
+ c = intermediate_char_94[reg]; \
else \
- *dst++ = (unsigned char) (intermediate_char_96[reg]); \
+ c = intermediate_char_96[reg]; \
+ EMIT_ONE_ASCII_BYTE (c); \
} \
else \
{ \
- *dst++ = '$'; \
- if (CHARSET_CHARS (charset) == 94) \
+ EMIT_ONE_ASCII_BYTE ('$'); \
+ if (! CHARSET_ISO_CHARS_96 (charset)) \
{ \
- if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
|| reg != 0 \
|| final_char < '@' || final_char > 'B') \
- *dst++ = (unsigned char) (intermediate_char_94[reg]); \
+ EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
} \
else \
- *dst++ = (unsigned char) (intermediate_char_96[reg]); \
+ EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
} \
- *dst++ = final_char; \
- CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
+ EMIT_ONE_ASCII_BYTE (final_char); \
+ \
+ CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
} while (0)
+
/* The following two macros produce codes (control character or escape
sequence) for ISO2022 single-shift functions (single-shift-2 and
single-shift-3). */
-#define ENCODE_SINGLE_SHIFT_2 \
- do { \
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
- else \
- *dst++ = ISO_CODE_SS2; \
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
+#define ENCODE_SINGLE_SHIFT_2 \
+ do { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
+ else \
+ EMIT_ONE_BYTE (ISO_CODE_SS2); \
+ CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
} while (0)
-#define ENCODE_SINGLE_SHIFT_3 \
- do { \
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
- else \
- *dst++ = ISO_CODE_SS3; \
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
+
+#define ENCODE_SINGLE_SHIFT_3 \
+ do { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
+ else \
+ EMIT_ONE_BYTE (ISO_CODE_SS3); \
+ CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
} while (0)
+
/* The following four macros produce codes (control character or
escape sequence) for ISO2022 locking-shift functions (shift-in,
shift-out, locking-shift-2, and locking-shift-3). */
-#define ENCODE_SHIFT_IN \
- do { \
- *dst++ = ISO_CODE_SI; \
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
+#define ENCODE_SHIFT_IN \
+ do { \
+ EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
+ CODING_ISO_INVOCATION (coding, 0) = 0; \
} while (0)
-#define ENCODE_SHIFT_OUT \
- do { \
- *dst++ = ISO_CODE_SO; \
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
+
+#define ENCODE_SHIFT_OUT \
+ do { \
+ EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
+ CODING_ISO_INVOCATION (coding, 0) = 1; \
} while (0)
-#define ENCODE_LOCKING_SHIFT_2 \
- do { \
- *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
+
+#define ENCODE_LOCKING_SHIFT_2 \
+ do { \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
+ CODING_ISO_INVOCATION (coding, 0) = 2; \
} while (0)
-#define ENCODE_LOCKING_SHIFT_3 \
- do { \
- *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
+
+#define ENCODE_LOCKING_SHIFT_3 \
+ do { \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
+ CODING_ISO_INVOCATION (coding, 0) = 3; \
} while (0)
+
/* Produce codes for a DIMENSION1 character whose character set is
CHARSET and whose position-code is C1. Designation and invocation
sequences are also produced in advance if necessary. */
#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
do { \
- if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
+ int id = CHARSET_ID (charset); \
+ \
+ if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
+ && id == charset_ascii) \
{ \
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = c1 & 0x7F; \
+ id = charset_jisx0201_roman; \
+ charset = CHARSET_FROM_ID (id); \
+ } \
+ \
+ if (CODING_ISO_SINGLE_SHIFTING (coding)) \
+ { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
else \
- *dst++ = c1 | 0x80; \
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
+ EMIT_ONE_BYTE (c1 | 0x80); \
+ CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
break; \
} \
- else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
+ else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
{ \
- *dst++ = c1 & 0x7F; \
+ EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
break; \
} \
- else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
+ else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
{ \
- *dst++ = c1 | 0x80; \
+ EMIT_ONE_BYTE (c1 | 0x80); \
break; \
} \
else \
@@ -2331,32 +3396,43 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
must invoke it, or, at first, designate it to some graphic \
register. Then repeat the loop to actually produce the \
character. */ \
- dst = encode_invocation_designation (charset, coding, dst); \
+ dst = encode_invocation_designation (charset, coding, dst, \
+ &produced_chars); \
} while (1)
+
/* Produce codes for a DIMENSION2 character whose character set is
CHARSET and whose position-codes are C1 and C2. Designation and
invocation codes are also produced in advance if necessary. */
#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
do { \
- if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
+ int id = CHARSET_ID (charset); \
+ \
+ if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
+ && id == charset_jisx0208) \
{ \
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
+ id = charset_jisx0208_1978; \
+ charset = CHARSET_FROM_ID (id); \
+ } \
+ \
+ if (CODING_ISO_SINGLE_SHIFTING (coding)) \
+ { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
else \
- *dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
+ EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
+ CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
break; \
} \
- else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
+ else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
{ \
- *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
+ EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
break; \
} \
- else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
+ else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
{ \
- *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
+ EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
break; \
} \
else \
@@ -2364,73 +3440,49 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
must invoke it, or, at first, designate it to some graphic \
register. Then repeat the loop to actually produce the \
character. */ \
- dst = encode_invocation_designation (charset, coding, dst); \
+ dst = encode_invocation_designation (charset, coding, dst, \
+ &produced_chars); \
} while (1)
-#define ENCODE_ISO_CHARACTER(c) \
- do { \
- int charset, c1, c2; \
- \
- SPLIT_CHAR (c, charset, c1, c2); \
- if (CHARSET_DEFINED_P (charset)) \
- { \
- if (CHARSET_DIMENSION (charset) == 1) \
- { \
- if (charset == CHARSET_ASCII \
- && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
- charset = charset_latin_jisx0201; \
- ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1); \
- } \
- else \
- { \
- if (charset == charset_jisx0208 \
- && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
- charset = charset_jisx0208_1978; \
- ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2); \
- } \
- } \
- else \
- { \
- *dst++ = c1; \
- if (c2 >= 0) \
- *dst++ = c2; \
- } \
- } while (0)
-
-
-/* Instead of encoding character C, produce one or two `?'s. */
-#define ENCODE_UNSAFE_CHARACTER(c) \
- do { \
- ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
- if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
- ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
+#define ENCODE_ISO_CHARACTER(charset, c) \
+ do { \
+ int code = ENCODE_CHAR ((charset),(c)); \
+ \
+ if (CHARSET_DIMENSION (charset) == 1) \
+ ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
+ else \
+ ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
} while (0)
/* Produce designation and invocation codes at a place pointed by DST
- to use CHARSET. The element `spec.iso2022' of *CODING is updated.
+ to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
Return new DST. */
unsigned char *
-encode_invocation_designation (charset, coding, dst)
- int charset;
+encode_invocation_designation (charset, coding, dst, p_nchars)
+ struct charset *charset;
struct coding_system *coding;
unsigned char *dst;
+ int *p_nchars;
{
+ int multibytep = coding->dst_multibyte;
+ int produced_chars = *p_nchars;
int reg; /* graphic register number */
+ int id = CHARSET_ID (charset);
/* At first, check designations. */
for (reg = 0; reg < 4; reg++)
- if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
+ if (id == CODING_ISO_DESIGNATION (coding, reg))
break;
if (reg >= 4)
{
/* CHARSET is not yet designated to any graphic registers. */
/* At first check the requested designation. */
- reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
- if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
+ reg = CODING_ISO_REQUEST (coding, id);
+ if (reg < 0)
/* Since CHARSET requests no special designation, designate it
to graphic register 0. */
reg = 0;
@@ -2438,8 +3490,8 @@ encode_invocation_designation (charset, coding, dst)
ENCODE_DESIGNATION (charset, reg, coding);
}
- if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
- && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
+ if (CODING_ISO_INVOCATION (coding, 0) != reg
+ && CODING_ISO_INVOCATION (coding, 1) != reg)
{
/* Since the graphic register REG is not invoked to any graphic
planes, invoke it to graphic plane 0. */
@@ -2454,14 +3506,14 @@ encode_invocation_designation (charset, coding, dst)
break;
case 2: /* graphic register 2 */
- if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
ENCODE_SINGLE_SHIFT_2;
else
ENCODE_LOCKING_SHIFT_2;
break;
case 3: /* graphic register 3 */
- if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
ENCODE_SINGLE_SHIFT_3;
else
ENCODE_LOCKING_SHIFT_3;
@@ -2469,98 +3521,55 @@ encode_invocation_designation (charset, coding, dst)
}
}
+ *p_nchars = produced_chars;
return dst;
}
-/* Produce 2-byte codes for encoded composition rule RULE. */
-
-#define ENCODE_COMPOSITION_RULE(rule) \
- do { \
- int gref, nref; \
- COMPOSITION_DECODE_RULE (rule, gref, nref); \
- *dst++ = 32 + 81 + gref; \
- *dst++ = 32 + nref; \
- } while (0)
-
-/* Produce codes for indicating the start of a composition sequence
- (ESC 0, ESC 3, or ESC 4). DATA points to an array of integers
- which specify information about the composition. See the comment
- in coding.h for the format of DATA. */
-
-#define ENCODE_COMPOSITION_START(coding, data) \
+/* The following three macros produce codes for indicating direction
+ of text. */
+#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
do { \
- coding->composing = data[3]; \
- *dst++ = ISO_CODE_ESC; \
- if (coding->composing == COMPOSITION_RELATIVE) \
- *dst++ = '0'; \
+ if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
else \
- { \
- *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS \
- ? '3' : '4'); \
- coding->cmp_data_index = coding->cmp_data_start + 4; \
- coding->composition_rule_follows = 0; \
- } \
+ EMIT_ONE_BYTE (ISO_CODE_CSI); \
} while (0)
-/* Produce codes for indicating the end of the current composition. */
-#define ENCODE_COMPOSITION_END(coding, data) \
- do { \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = '1'; \
- coding->cmp_data_start += data[0]; \
- coding->composing = COMPOSITION_NO; \
- if (coding->cmp_data_start == coding->cmp_data->used \
- && coding->cmp_data->next) \
- { \
- coding->cmp_data = coding->cmp_data->next; \
- coding->cmp_data_start = 0; \
- } \
+#define ENCODE_DIRECTION_R2L() \
+ do { \
+ ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
+ EMIT_TWO_ASCII_BYTES ('2', ']'); \
} while (0)
-/* Produce composition start sequence ESC 0. Here, this sequence
- doesn't mean the start of a new composition but means that we have
- just produced components (alternate chars and composition rules) of
- the composition and the actual text follows in SRC. */
-#define ENCODE_COMPOSITION_FAKE_START(coding) \
+#define ENCODE_DIRECTION_L2R() \
do { \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = '0'; \
- coding->composing = COMPOSITION_RELATIVE; \
+ ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
+ EMIT_TWO_ASCII_BYTES ('0', ']'); \
} while (0)
-/* The following three macros produce codes for indicating direction
- of text. */
-#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
- do { \
- if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = ISO_CODE_ESC, *dst++ = '['; \
- else \
- *dst++ = ISO_CODE_CSI; \
- } while (0)
-
-#define ENCODE_DIRECTION_R2L \
- ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
-
-#define ENCODE_DIRECTION_L2R \
- ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
/* Produce codes for designation and invocation to reset the graphic
planes and registers to initial state. */
-#define ENCODE_RESET_PLANE_AND_REGISTER \
- do { \
- int reg; \
- if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
- ENCODE_SHIFT_IN; \
- for (reg = 0; reg < 4; reg++) \
- if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
- && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
- != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
- ENCODE_DESIGNATION \
- (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
+#define ENCODE_RESET_PLANE_AND_REGISTER() \
+ do { \
+ int reg; \
+ struct charset *charset; \
+ \
+ if (CODING_ISO_INVOCATION (coding, 0) != 0) \
+ ENCODE_SHIFT_IN; \
+ for (reg = 0; reg < 4; reg++) \
+ if (CODING_ISO_INITIAL (coding, reg) >= 0 \
+ && (CODING_ISO_DESIGNATION (coding, reg) \
+ != CODING_ISO_INITIAL (coding, reg))) \
+ { \
+ charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
+ ENCODE_DESIGNATION (charset, reg, coding); \
+ } \
} while (0)
+
/* Produce designation sequences of charsets in the line started from
SRC to a place pointed by DST, and return updated DST.
@@ -2568,40 +3577,51 @@ encode_invocation_designation (charset, coding, dst)
find all the necessary designations. */
static unsigned char *
-encode_designation_at_bol (coding, translation_table, src, src_end, dst)
+encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
struct coding_system *coding;
- Lisp_Object translation_table;
- unsigned char *src, *src_end, *dst;
+ int *charbuf, *charbuf_end;
+ unsigned char *dst;
{
- int charset, c, found = 0, reg;
+ struct charset *charset;
/* Table of charsets to be designated to each graphic register. */
int r[4];
+ int c, found = 0, reg;
+ int produced_chars = 0;
+ int multibytep = coding->dst_multibyte;
+ Lisp_Object attrs;
+ Lisp_Object charset_list;
+
+ attrs = CODING_ID_ATTRS (coding->id);
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ if (EQ (charset_list, Qiso_2022))
+ charset_list = Viso_2022_charset_list;
for (reg = 0; reg < 4; reg++)
r[reg] = -1;
while (found < 4)
{
- ONE_MORE_CHAR (c);
+ int id;
+
+ c = *charbuf++;
if (c == '\n')
break;
-
- charset = CHAR_CHARSET (c);
- reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
- if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
+ charset = char_charset (c, charset_list, NULL);
+ id = CHARSET_ID (charset);
+ reg = CODING_ISO_REQUEST (coding, id);
+ if (reg >= 0 && r[reg] < 0)
{
found++;
- r[reg] = charset;
+ r[reg] = id;
}
}
- label_end_of_loop:
if (found)
{
for (reg = 0; reg < 4; reg++)
if (r[reg] >= 0
- && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
- ENCODE_DESIGNATION (r[reg], reg, coding);
+ && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
+ ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
}
return dst;
@@ -2609,187 +3629,156 @@ encode_designation_at_bol (coding, translation_table, src, src_end, dst)
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
-static void
-encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
+static int
+encode_coding_iso_2022 (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- /* Since the maximum bytes produced by each loop is 20, we subtract 19
- from DST_END to assure overflow checking is necessary only at the
- head of loop. */
- unsigned char *adjusted_dst_end = dst_end - 19;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text to
- analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
- there's not enough destination area to produce encoded codes
- (within macro EMIT_BYTES). */
- unsigned char *src_base;
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 16;
+ int bol_designation
+ = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
+ && CODING_ISO_BOL (coding));
+ int produced_chars = 0;
+ Lisp_Object attrs, eol_type, charset_list;
+ int ascii_compatible;
int c;
- Lisp_Object translation_table;
- Lisp_Object safe_chars;
+ int preferred_charset_id = -1;
- if (coding->flags & CODING_FLAG_ISO_SAFE)
- coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ setup_iso_safe_charsets (attrs);
+ /* Charset list may have been changed. */
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
+ coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
- safe_chars = coding_safe_chars (coding->symbol);
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
- if (NILP (Venable_character_translation))
- translation_table = Qnil;
- else
+ while (charbuf < charbuf_end)
{
- translation_table = coding->translation_table_for_encode;
- if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_encode;
- }
+ ASSURE_DESTINATION (safe_room);
- coding->consumed_char = 0;
- coding->errors = 0;
- while (1)
- {
- src_base = src;
-
- if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
+ if (bol_designation)
{
- coding->result = CODING_FINISH_INSUFFICIENT_DST;
- break;
- }
+ unsigned char *dst_prev = dst;
- if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
- && CODING_SPEC_ISO_BOL (coding))
- {
/* We have to produce designation sequences if any now. */
- dst = encode_designation_at_bol (coding, translation_table,
- src, src_end, dst);
- CODING_SPEC_ISO_BOL (coding) = 0;
+ dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
+ bol_designation = 0;
+ /* We are sure that designation sequences are all ASCII bytes. */
+ produced_chars += dst - dst_prev;
}
- /* Check composition start and end. */
- if (coding->composing != COMPOSITION_DISABLED
- && coding->cmp_data_start < coding->cmp_data->used)
- {
- struct composition_data *cmp_data = coding->cmp_data;
- int *data = cmp_data->data + coding->cmp_data_start;
- int this_pos = cmp_data->char_offset + coding->consumed_char;
+ c = *charbuf++;
- if (coding->composing == COMPOSITION_RELATIVE)
- {
- if (this_pos == data[2])
- {
- ENCODE_COMPOSITION_END (coding, data);
- cmp_data = coding->cmp_data;
- data = cmp_data->data + coding->cmp_data_start;
- }
- }
- else if (COMPOSING_P (coding))
- {
- /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */
- if (coding->cmp_data_index == coding->cmp_data_start + data[0])
- /* We have consumed components of the composition.
- What follows in SRC is the composition's base
- text. */
- ENCODE_COMPOSITION_FAKE_START (coding);
- else
- {
- int c = cmp_data->data[coding->cmp_data_index++];
- if (coding->composition_rule_follows)
- {
- ENCODE_COMPOSITION_RULE (c);
- coding->composition_rule_follows = 0;
- }
- else
- {
- if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
- && ! CODING_SAFE_CHAR_P (safe_chars, c))
- ENCODE_UNSAFE_CHARACTER (c);
- else
- ENCODE_ISO_CHARACTER (c);
- if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
- coding->composition_rule_follows = 1;
- }
- continue;
- }
- }
- if (!COMPOSING_P (coding))
+ if (c < 0)
+ {
+ /* Handle an annotation. */
+ switch (*charbuf)
{
- if (this_pos == data[1])
- {
- ENCODE_COMPOSITION_START (coding, data);
- continue;
- }
+ case CODING_ANNOTATE_COMPOSITION_MASK:
+ /* Not yet implemented. */
+ break;
+ case CODING_ANNOTATE_CHARSET_MASK:
+ preferred_charset_id = charbuf[3];
+ if (preferred_charset_id >= 0
+ && NILP (Fmemq (make_number (preferred_charset_id),
+ charset_list)))
+ preferred_charset_id = -1;
+ break;
+ default:
+ abort ();
}
+ charbuf += -c - 1;
+ continue;
}
- ONE_MORE_CHAR (c);
-
/* Now encode the character C. */
if (c < 0x20 || c == 0x7F)
{
- if (c == '\r')
+ if (c == '\n'
+ || (c == '\r' && EQ (eol_type, Qmac)))
{
- if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
+ ENCODE_RESET_PLANE_AND_REGISTER ();
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
{
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- *dst++ = c;
- continue;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ CODING_ISO_DESIGNATION (coding, i)
+ = CODING_ISO_INITIAL (coding, i);
}
- /* fall down to treat '\r' as '\n' ... */
- c = '\n';
- }
- if (c == '\n')
- {
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
- bcopy (coding->spec.iso2022.initial_designation,
- coding->spec.iso2022.current_designation,
- sizeof coding->spec.iso2022.initial_designation);
- if (coding->eol_type == CODING_EOL_LF
- || coding->eol_type == CODING_EOL_UNDECIDED)
- *dst++ = ISO_CODE_LF;
- else if (coding->eol_type == CODING_EOL_CRLF)
- *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
- else
- *dst++ = ISO_CODE_CR;
- CODING_SPEC_ISO_BOL (coding) = 1;
+ bol_designation
+ = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
}
+ else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
+ ENCODE_RESET_PLANE_AND_REGISTER ();
+ EMIT_ONE_ASCII_BYTE (c);
+ }
+ else if (ASCII_CHAR_P (c))
+ {
+ if (ascii_compatible)
+ EMIT_ONE_ASCII_BYTE (c);
else
{
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- *dst++ = c;
+ struct charset *charset = CHARSET_FROM_ID (charset_ascii);
+ ENCODE_ISO_CHARACTER (charset, c);
}
}
- else if (ASCII_BYTE_P (c))
- ENCODE_ISO_CHARACTER (c);
- else if (SINGLE_BYTE_CHAR_P (c))
+ else if (CHAR_BYTE8_P (c))
{
- *dst++ = c;
- coding->errors++;
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
}
- else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
- && ! CODING_SAFE_CHAR_P (safe_chars, c))
- ENCODE_UNSAFE_CHARACTER (c);
else
- ENCODE_ISO_CHARACTER (c);
+ {
+ struct charset *charset;
- coding->consumed_char++;
+ if (preferred_charset_id >= 0)
+ {
+ charset = CHARSET_FROM_ID (preferred_charset_id);
+ if (! CHAR_CHARSET_P (c, charset))
+ charset = char_charset (c, charset_list, NULL);
+ }
+ else
+ charset = char_charset (c, charset_list, NULL);
+ if (!charset)
+ {
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
+ {
+ c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ charset = CHARSET_FROM_ID (charset_ascii);
+ }
+ else
+ {
+ c = coding->default_char;
+ charset = char_charset (c, charset_list, NULL);
+ }
+ }
+ ENCODE_ISO_CHARACTER (charset, c);
+ }
}
- label_end_of_loop:
- coding->consumed = src_base - source;
- coding->produced = coding->produced_char = dst - destination;
+ if (coding->mode & CODING_MODE_LAST_BLOCK
+ && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
+ {
+ ASSURE_DESTINATION (safe_room);
+ ENCODE_RESET_PLANE_AND_REGISTER ();
+ }
+ coding->result = CODING_RESULT_SUCCESS;
+ CODING_ISO_BOL (coding) = bol_designation;
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
-/*** 4. SJIS and BIG5 handlers ***/
+/*** 8,9. SJIS and BIG5 handlers ***/
-/* Although SJIS and BIG5 are not ISO coding systems, they are used
+/* Although SJIS and BIG5 are not ISO's coding system, they are used
quite widely. So, for the moment, Emacs supports them in the bare
C code. But, in the future, they may be supported only by CCL. */
@@ -2798,12 +3787,12 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
as is. A character of charset katakana-jisx0201 is encoded by
"position-code + 0x80". A character of charset japanese-jisx0208
is encoded in 2-byte but two position-codes are divided and shifted
- so that it fits in the range below.
+ so that it fit in the range below.
--- CODE RANGE of SJIS ---
(character set) (range)
ASCII 0x00 .. 0x7F
- KATAKANA-JISX0201 0xA1 .. 0xDF
+ KATAKANA-JISX0201 0xA0 .. 0xDF
JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
(2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
-------------------------------
@@ -2812,7 +3801,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
/* BIG5 is a coding system encoding two character sets: ASCII and
Big5. An ASCII character is encoded as is. Big5 is a two-byte
- character set and is encoded in two bytes.
+ character set and is encoded in two-byte.
--- CODE RANGE of BIG5 ---
(character set) (range)
@@ -2821,315 +3810,321 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
(2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
--------------------------
- Since the number of characters in Big5 is larger than maximum
- characters in Emacs' charset (96x96), it can't be handled as one
- charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
- and `charset-big5-2'. Both are DIMENSION2 and CHARS94. The former
- contains frequently used characters and the latter contains less
- frequently used characters. */
-
-/* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
- are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
- C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
- format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
-
-/* Number of Big5 characters which have the same code in 1st byte. */
-#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
-
-#define DECODE_BIG5(b1, b2, charset, c1, c2) \
- do { \
- unsigned int temp \
- = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62); \
- if (b1 < 0xC9) \
- charset = charset_big5_1; \
- else \
- { \
- charset = charset_big5_2; \
- temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW; \
- } \
- c1 = temp / (0xFF - 0xA1) + 0x21; \
- c2 = temp % (0xFF - 0xA1) + 0x21; \
- } while (0)
-
-#define ENCODE_BIG5(charset, c1, c2, b1, b2) \
- do { \
- unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21); \
- if (charset == charset_big5_2) \
- temp += BIG5_SAME_ROW * (0xC9 - 0xA1); \
- b1 = temp / BIG5_SAME_ROW + 0xA1; \
- b2 = temp % BIG5_SAME_ROW; \
- b2 += b2 < 0x3F ? 0x40 : 0x62; \
- } while (0)
+ */
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in SJIS. If it is, return
- CODING_CATEGORY_MASK_SJIS, else return 0. */
+ CATEGORY_MASK_SJIS, else return 0. */
static int
-detect_coding_sjis (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+detect_coding_sjis (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
int c;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
+ int incomplete;
+
+ detect_info->checked |= CATEGORY_MASK_SJIS;
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ incomplete = 0;
+ ONE_MORE_BYTE (c);
+ incomplete = 1;
if (c < 0x80)
continue;
- if (c == 0x80 || c == 0xA0 || c > 0xEF)
- return 0;
- if (c <= 0x9F || c >= 0xE0)
+ if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE (c);
if (c < 0x40 || c == 0x7F || c > 0xFC)
- return 0;
+ break;
+ found = CATEGORY_MASK_SJIS;
}
+ else if (c >= 0xA0 && c < 0xE0)
+ found = CATEGORY_MASK_SJIS;
+ else
+ break;
+ }
+ detect_info->rejected |= CATEGORY_MASK_SJIS;
+ return 0;
+
+ no_more_source:
+ if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ detect_info->rejected |= CATEGORY_MASK_SJIS;
+ return 0;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_SJIS;
+ detect_info->found |= found;
+ return 1;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in BIG5. If it is, return
- CODING_CATEGORY_MASK_BIG5, else return 0. */
+ CATEGORY_MASK_BIG5, else return 0. */
static int
-detect_coding_big5 (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+detect_coding_big5 (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
int c;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
-
- while (1)
- {
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (c < 0x80)
- continue;
- if (c < 0xA1 || c > 0xFE)
- return 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
- return 0;
- }
- label_end_of_loop:
- return CODING_CATEGORY_MASK_BIG5;
-}
+ int incomplete;
-/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in UTF-8. If it is, return
- CODING_CATEGORY_MASK_UTF_8, else return 0. */
-
-#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
-#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
-#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
-#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
-#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
-#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
-#define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
-
-static int
-detect_coding_utf_8 (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
-{
- unsigned char c;
- int seq_maybe_bytes;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
+ detect_info->checked |= CATEGORY_MASK_BIG5;
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (UTF_8_1_OCTET_P (c))
+ incomplete = 0;
+ ONE_MORE_BYTE (c);
+ incomplete = 1;
+ if (c < 0x80)
continue;
- else if (UTF_8_2_OCTET_LEADING_P (c))
- seq_maybe_bytes = 1;
- else if (UTF_8_3_OCTET_LEADING_P (c))
- seq_maybe_bytes = 2;
- else if (UTF_8_4_OCTET_LEADING_P (c))
- seq_maybe_bytes = 3;
- else if (UTF_8_5_OCTET_LEADING_P (c))
- seq_maybe_bytes = 4;
- else if (UTF_8_6_OCTET_LEADING_P (c))
- seq_maybe_bytes = 5;
- else
- return 0;
-
- do
+ if (c >= 0xA1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (!UTF_8_EXTRA_OCTET_P (c))
+ ONE_MORE_BYTE (c);
+ if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
return 0;
- seq_maybe_bytes--;
+ found = CATEGORY_MASK_BIG5;
}
- while (seq_maybe_bytes > 0);
+ else
+ break;
}
-
- label_end_of_loop:
- return CODING_CATEGORY_MASK_UTF_8;
-}
-
-/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
- Little Endian (otherwise). If it is, return
- CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
- else return 0. */
-
-#define UTF_16_INVALID_P(val) \
- (((val) == 0xFFFE) \
- || ((val) == 0xFFFF))
-
-#define UTF_16_HIGH_SURROGATE_P(val) \
- (((val) & 0xD800) == 0xD800)
-
-#define UTF_16_LOW_SURROGATE_P(val) \
- (((val) & 0xDC00) == 0xDC00)
-
-static int
-detect_coding_utf_16 (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
-{
- unsigned char c1, c2;
- /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
-
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
-
- if ((c1 == 0xFF) && (c2 == 0xFE))
- return CODING_CATEGORY_MASK_UTF_16_LE;
- else if ((c1 == 0xFE) && (c2 == 0xFF))
- return CODING_CATEGORY_MASK_UTF_16_BE;
-
- label_end_of_loop:
+ detect_info->rejected |= CATEGORY_MASK_BIG5;
return 0;
+
+ no_more_source:
+ if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ detect_info->rejected |= CATEGORY_MASK_BIG5;
+ return 0;
+ }
+ detect_info->found |= found;
+ return 1;
}
/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
static void
-decode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, sjis_p)
+decode_coding_sjis (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
- int sjis_p;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source code
- (within macro ONE_MORE_BYTE), or when there's not enough
- destination area to produce a character (within macro
- EMIT_CHAR). */
- unsigned char *src_base;
- Lisp_Object translation_table;
-
- if (NILP (Venable_character_translation))
- translation_table = Qnil;
- else
- {
- translation_table = coding->translation_table_for_decode;
- if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_decode;
- }
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ struct charset *charset_roman, *charset_kanji, *charset_kana;
+ Lisp_Object attrs, eol_type, charset_list, val;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
+
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+
+ val = charset_list;
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
- coding->produced_char = 0;
while (1)
{
- int c, charset, c1, c2;
+ int c, c1;
src_base = src;
- ONE_MORE_BYTE (c1);
+ consumed_chars_base = consumed_chars;
- if (c1 < 0x80)
+ if (charbuf >= charbuf_end)
+ break;
+
+ ONE_MORE_BYTE (c);
+
+ if (c == '\r')
{
- charset = CHARSET_ASCII;
- if (c1 < 0x20)
+ if (EQ (eol_type, Qdos))
{
- if (c1 == '\r')
- {
- if (coding->eol_type == CODING_EOL_CRLF)
- {
- ONE_MORE_BYTE (c2);
- if (c2 == '\n')
- c1 = c2;
- else
- /* To process C2 again, SRC is subtracted by 1. */
- src--;
- }
- else if (coding->eol_type == CODING_EOL_CR)
- c1 = '\n';
- }
- else if (c1 == '\n'
- && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- && (coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF))
+ if (src == src_end)
{
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ goto no_more_source;
}
+ if (*src == '\n')
+ ONE_MORE_BYTE (c);
}
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
}
else
- {
- if (sjis_p)
+ {
+ struct charset *charset;
+
+ if (c < 0x80)
+ charset = charset_roman;
+ else
{
- if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
- goto label_invalid_code;
- if (c1 <= 0x9F || c1 >= 0xE0)
+ if (c >= 0xF0)
+ goto invalid_code;
+ if (c < 0xA0 || c >= 0xE0)
{
/* SJIS -> JISX0208 */
- ONE_MORE_BYTE (c2);
- if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
- goto label_invalid_code;
- DECODE_SJIS (c1, c2, c1, c2);
- charset = charset_jisx0208;
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
+ goto invalid_code;
+ c = (c << 8) | c1;
+ SJIS_TO_JIS (c);
+ charset = charset_kanji;
+ }
+ else if (c > 0xA0)
+ {
+ /* SJIS -> JISX0201-Kana */
+ c &= 0x7F;
+ charset = charset_kana;
}
else
- /* SJIS -> JISX0201-Kana */
- charset = charset_katakana_jisx0201;
+ goto invalid_code;
+ }
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
}
+ CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
+ }
+ *charbuf++ = c;
+ char_offset++;
+ continue;
+
+ invalid_code:
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
+ }
+
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
+}
+
+static void
+decode_coding_big5 (coding)
+ struct coding_system *coding;
+{
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ struct charset *charset_roman, *charset_big5;
+ Lisp_Object attrs, eol_type, charset_list, val;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
+
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ val = charset_list;
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
+
+ while (1)
+ {
+ int c, c1;
+
+ src_base = src;
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf >= charbuf_end)
+ break;
+
+ ONE_MORE_BYTE (c);
+
+ if (c == '\r')
+ {
+ if (EQ (eol_type, Qdos))
+ {
+ if (src == src_end)
+ {
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ goto no_more_source;
+ }
+ if (*src == '\n')
+ ONE_MORE_BYTE (c);
+ }
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
+ }
+ else
+ {
+ struct charset *charset;
+ if (c < 0x80)
+ charset = charset_roman;
else
{
/* BIG5 -> Big5 */
- if (c1 < 0xA0 || c1 > 0xFE)
- goto label_invalid_code;
- ONE_MORE_BYTE (c2);
- if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
- goto label_invalid_code;
- DECODE_BIG5 (c1, c2, charset, c1, c2);
+ if (c < 0xA1 || c > 0xFE)
+ goto invalid_code;
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
+ goto invalid_code;
+ c = c << 8 | c1;
+ charset = charset_big5;
}
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
+ }
+ CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
}
- c = DECODE_ISO_CHARACTER (charset, c1, c2);
- EMIT_CHAR (c);
+ *charbuf++ = c;
+ char_offset++;
continue;
- label_invalid_code:
- coding->errors++;
+ invalid_code:
src = src_base;
- c = *src++;
- EMIT_CHAR (c);
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
}
- label_end_of_loop:
- coding->consumed = coding->consumed_char = src_base - source;
- coding->produced = dst - destination;
- return;
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
}
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
@@ -3140,815 +4135,913 @@ decode_coding_sjis_big5 (coding, source, destination,
charsets are produced without any encoding. If SJIS_P is 1, encode
SJIS text, else encode BIG5 text. */
-static void
-encode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, sjis_p)
+static int
+encode_coding_sjis (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
- int sjis_p;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text to
- analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
- there's not enough destination area to produce encoded codes
- (within macro EMIT_BYTES). */
- unsigned char *src_base;
- Lisp_Object translation_table;
-
- if (NILP (Venable_character_translation))
- translation_table = Qnil;
- else
- {
- translation_table = coding->translation_table_for_encode;
- if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_encode;
- }
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 4;
+ int produced_chars = 0;
+ Lisp_Object attrs, eol_type, charset_list, val;
+ int ascii_compatible;
+ struct charset *charset_roman, *charset_kanji, *charset_kana;
+ int c;
- while (1)
- {
- int c, charset, c1, c2;
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ val = charset_list;
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
- src_base = src;
- ONE_MORE_CHAR (c);
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
/* Now encode the character C. */
- if (SINGLE_BYTE_CHAR_P (c))
+ if (ASCII_CHAR_P (c) && ascii_compatible)
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
+ {
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
+ }
+ else
{
- switch (c)
+ unsigned code;
+ struct charset *charset = char_charset (c, charset_list, &code);
+
+ if (!charset)
{
- case '\r':
- if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
{
- EMIT_ONE_BYTE (c);
- break;
+ code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ charset = CHARSET_FROM_ID (charset_ascii);
}
- c = '\n';
- case '\n':
- if (coding->eol_type == CODING_EOL_CRLF)
+ else
{
- EMIT_TWO_BYTES ('\r', c);
- break;
+ c = coding->default_char;
+ charset = char_charset (c, charset_list, &code);
}
- else if (coding->eol_type == CODING_EOL_CR)
- c = '\r';
- default:
- EMIT_ONE_BYTE (c);
}
+ if (code == CHARSET_INVALID_CODE (charset))
+ abort ();
+ if (charset == charset_kanji)
+ {
+ int c1, c2;
+ JIS_TO_SJIS (code);
+ c1 = code >> 8, c2 = code & 0xFF;
+ EMIT_TWO_BYTES (c1, c2);
+ }
+ else if (charset == charset_kana)
+ EMIT_ONE_BYTE (code | 0x80);
+ else
+ EMIT_ONE_ASCII_BYTE (code & 0x7F);
+ }
+ }
+ coding->result = CODING_RESULT_SUCCESS;
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
+}
+
+static int
+encode_coding_big5 (coding)
+ struct coding_system *coding;
+{
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 4;
+ int produced_chars = 0;
+ Lisp_Object attrs, eol_type, charset_list, val;
+ int ascii_compatible;
+ struct charset *charset_roman, *charset_big5;
+ int c;
+
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ val = charset_list;
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
+
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ /* Now encode the character C. */
+ if (ASCII_CHAR_P (c) && ascii_compatible)
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
+ {
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
}
else
{
- SPLIT_CHAR (c, charset, c1, c2);
- if (sjis_p)
+ unsigned code;
+ struct charset *charset = char_charset (c, charset_list, &code);
+
+ if (! charset)
{
- if (charset == charset_jisx0208
- || charset == charset_jisx0208_1978)
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
{
- ENCODE_SJIS (c1, c2, c1, c2);
- EMIT_TWO_BYTES (c1, c2);
+ code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ charset = CHARSET_FROM_ID (charset_ascii);
}
- else if (charset == charset_katakana_jisx0201)
- EMIT_ONE_BYTE (c1 | 0x80);
- else if (charset == charset_latin_jisx0201)
- EMIT_ONE_BYTE (c1);
- else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+ else
{
- EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
- if (CHARSET_WIDTH (charset) > 1)
- EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ c = coding->default_char;
+ charset = char_charset (c, charset_list, &code);
}
- else
- /* There's no way other than producing the internal
- codes as is. */
- EMIT_BYTES (src_base, src);
}
- else
+ if (code == CHARSET_INVALID_CODE (charset))
+ abort ();
+ if (charset == charset_big5)
{
- if (charset == charset_big5_1 || charset == charset_big5_2)
- {
- ENCODE_BIG5 (charset, c1, c2, c1, c2);
- EMIT_TWO_BYTES (c1, c2);
- }
- else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
- {
- EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
- if (CHARSET_WIDTH (charset) > 1)
- EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
- }
- else
- /* There's no way other than producing the internal
- codes as is. */
- EMIT_BYTES (src_base, src);
+ int c1, c2;
+
+ c1 = code >> 8, c2 = code & 0xFF;
+ EMIT_TWO_BYTES (c1, c2);
}
+ else
+ EMIT_ONE_ASCII_BYTE (code & 0x7F);
}
- coding->consumed_char++;
}
-
- label_end_of_loop:
- coding->consumed = src_base - source;
- coding->produced = coding->produced_char = dst - destination;
+ coding->result = CODING_RESULT_SUCCESS;
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
-/*** 5. CCL handlers ***/
+/*** 10. CCL handlers ***/
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in a coding system of which
encoder/decoder are written in CCL program. If it is, return
- CODING_CATEGORY_MASK_CCL, else return 0. */
+ CATEGORY_MASK_CCL, else return 0. */
static int
-detect_coding_ccl (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+detect_coding_ccl (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
- unsigned char *valid;
- int c;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
+ unsigned char *valids = CODING_CCL_VALIDS (coding);
+ int head_ascii = coding->head_ascii;
+ Lisp_Object attrs;
+
+ detect_info->checked |= CATEGORY_MASK_CCL;
+
+ coding = &coding_categories[coding_category_ccl];
+ attrs = CODING_ID_ATTRS (coding->id);
+ if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ src += head_ascii;
- /* No coding system is assigned to coding-category-ccl. */
- if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
- return 0;
-
- valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (! valid[c])
- return 0;
+ int c;
+ ONE_MORE_BYTE (c);
+ if (! valids[c])
+ break;
+ if ((valids[c] > 1))
+ found = CATEGORY_MASK_CCL;
+ }
+ detect_info->rejected |= CATEGORY_MASK_CCL;
+ return 0;
+
+ no_more_source:
+ detect_info->found |= found;
+ return 1;
+}
+
+static void
+decode_coding_ccl (coding)
+ struct coding_system *coding;
+{
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_size;
+ int consumed_chars = 0;
+ int multibytep = coding->src_multibyte;
+ struct ccl_program ccl;
+ int source_charbuf[1024];
+ int source_byteidx[1024];
+ Lisp_Object attrs, eol_type, charset_list;
+
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
+
+ while (src < src_end)
+ {
+ const unsigned char *p = src;
+ int *source, *source_end;
+ int i = 0;
+
+ if (multibytep)
+ while (i < 1024 && p < src_end)
+ {
+ source_byteidx[i] = p - src;
+ source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
+ }
+ else
+ while (i < 1024 && p < src_end)
+ source_charbuf[i++] = *p++;
+
+ if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
+ ccl.last_block = 1;
+
+ source = source_charbuf;
+ source_end = source + i;
+ while (source < source_end)
+ {
+ ccl_driver (&ccl, source, charbuf,
+ source_end - source, charbuf_end - charbuf,
+ charset_list);
+ source += ccl.consumed;
+ charbuf += ccl.produced;
+ if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
+ break;
+ }
+ if (source < source_end)
+ src += source_byteidx[source - source_charbuf];
+ else
+ src = p;
+ consumed_chars += source - source_charbuf;
+
+ if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
+ && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
+ break;
+ }
+
+ switch (ccl.status)
+ {
+ case CCL_STAT_SUSPEND_BY_SRC:
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ break;
+ case CCL_STAT_SUSPEND_BY_DST:
+ break;
+ case CCL_STAT_QUIT:
+ case CCL_STAT_INVALID_CMD:
+ coding->result = CODING_RESULT_INTERRUPT;
+ break;
+ default:
+ coding->result = CODING_RESULT_SUCCESS;
+ break;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_CCL;
+ coding->consumed_char += consumed_chars;
+ coding->consumed = src - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
}
+static int
+encode_coding_ccl (coding)
+ struct coding_system *coding;
+{
+ struct ccl_program ccl;
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ unsigned char *adjusted_dst_end = dst_end - 1;
+ int destination_charbuf[1024];
+ int i, produced_chars = 0;
+ Lisp_Object attrs, eol_type, charset_list;
+
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
+
+ ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
+ ccl.dst_multibyte = coding->dst_multibyte;
+
+ while (charbuf < charbuf_end && dst < adjusted_dst_end)
+ {
+ int dst_bytes = dst_end - dst;
+ if (dst_bytes > 1024)
+ dst_bytes = 1024;
+
+ ccl_driver (&ccl, charbuf, destination_charbuf,
+ charbuf_end - charbuf, dst_bytes, charset_list);
+ charbuf += ccl.consumed;
+ if (multibytep)
+ for (i = 0; i < ccl.produced; i++)
+ EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
+ else
+ {
+ for (i = 0; i < ccl.produced; i++)
+ *dst++ = destination_charbuf[i] & 0xFF;
+ produced_chars += ccl.produced;
+ }
+ }
+
+ switch (ccl.status)
+ {
+ case CCL_STAT_SUSPEND_BY_SRC:
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ break;
+ case CCL_STAT_SUSPEND_BY_DST:
+ coding->result = CODING_RESULT_INSUFFICIENT_DST;
+ break;
+ case CCL_STAT_QUIT:
+ case CCL_STAT_INVALID_CMD:
+ coding->result = CODING_RESULT_INTERRUPT;
+ break;
+ default:
+ coding->result = CODING_RESULT_SUCCESS;
+ break;
+ }
+
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
+}
+
+
-/*** 6. End-of-line handlers ***/
+/*** 10, 11. no-conversion handlers ***/
/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
static void
-decode_eol (coding, source, destination, src_bytes, dst_bytes)
+decode_coding_raw_text (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *dst = destination;
- unsigned char *src_end = src + src_bytes;
- unsigned char *dst_end = dst + dst_bytes;
- Lisp_Object translation_table;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source code
- (within macro ONE_MORE_BYTE), or when there's not enough
- destination area to produce a character (within macro
- EMIT_CHAR). */
- unsigned char *src_base;
+ coding->chars_at_source = 1;
+ coding->consumed_char = 0;
+ coding->consumed = 0;
+ coding->result = CODING_RESULT_SUCCESS;
+}
+
+static int
+encode_coding_raw_text (coding)
+ struct coding_system *coding;
+{
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = coding->charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int produced_chars = 0;
int c;
- translation_table = Qnil;
- switch (coding->eol_type)
+ if (multibytep)
{
- case CODING_EOL_CRLF:
- while (1)
+ int safe_room = MAX_MULTIBYTE_LENGTH * 2;
+
+ if (coding->src_multibyte)
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (ASCII_CHAR_P (c))
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
+ {
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
+ }
+ else
+ {
+ unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
+
+ CHAR_STRING_ADVANCE (c, p1);
+ while (p0 < p1)
+ {
+ EMIT_ONE_BYTE (*p0);
+ p0++;
+ }
+ }
+ }
+ else
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ EMIT_ONE_BYTE (c);
+ }
+ }
+ else
+ {
+ if (coding->src_multibyte)
{
- src_base = src;
- ONE_MORE_BYTE (c);
- if (c == '\r')
- {
- ONE_MORE_BYTE (c);
- if (c != '\n')
- {
- src--;
- c = '\r';
- }
- }
- else if (c == '\n'
- && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
+ int safe_room = MAX_MULTIBYTE_LENGTH;
+
+ while (charbuf < charbuf_end)
{
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (ASCII_CHAR_P (c))
+ *dst++ = c;
+ else if (CHAR_BYTE8_P (c))
+ *dst++ = CHAR_TO_BYTE8 (c);
+ else
+ CHAR_STRING_ADVANCE (c, dst);
+ produced_chars++;
}
- EMIT_CHAR (c);
}
- break;
+ else
+ {
+ ASSURE_DESTINATION (charbuf_end - charbuf);
+ while (charbuf < charbuf_end && dst < dst_end)
+ *dst++ = *charbuf++;
+ produced_chars = dst - (coding->destination + coding->dst_bytes);
+ }
+ }
+ coding->result = CODING_RESULT_SUCCESS;
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
+}
+
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in a charset-based coding system. If it
+ is, return 1, else return 0. */
- case CODING_EOL_CR:
- while (1)
+static int
+detect_coding_charset (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
+{
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ Lisp_Object attrs, valids;
+ int found = 0;
+
+ detect_info->checked |= CATEGORY_MASK_CHARSET;
+
+ coding = &coding_categories[coding_category_charset];
+ attrs = CODING_ID_ATTRS (coding->id);
+ valids = AREF (attrs, coding_attr_charset_valids);
+
+ if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ src += coding->head_ascii;
+
+ while (1)
+ {
+ int c;
+
+ ONE_MORE_BYTE (c);
+ if (NILP (AREF (valids, c)))
+ break;
+ if (c >= 0x80)
+ found = CATEGORY_MASK_CHARSET;
+ }
+ detect_info->rejected |= CATEGORY_MASK_CHARSET;
+ return 0;
+
+ no_more_source:
+ detect_info->found |= found;
+ return 1;
+}
+
+static void
+decode_coding_charset (coding)
+ struct coding_system *coding;
+{
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ Lisp_Object attrs, eol_type, charset_list, valids;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
+
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ valids = AREF (attrs, coding_attr_charset_valids);
+
+ while (1)
+ {
+ int c;
+
+ src_base = src;
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf >= charbuf_end)
+ break;
+
+ ONE_MORE_BYTE (c);
+ if (c == '\r')
{
- src_base = src;
- ONE_MORE_BYTE (c);
- if (c == '\n')
+ /* Here we assume that no charset maps '\r' to something
+ else. */
+ if (EQ (eol_type, Qdos))
{
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ if (src == src_end)
{
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ goto no_more_source;
}
+ if (*src == '\n')
+ ONE_MORE_BYTE (c);
}
- else if (c == '\r')
+ else if (EQ (eol_type, Qmac))
c = '\n';
- EMIT_CHAR (c);
}
- break;
-
- default: /* no need for EOL handling */
- while (1)
+ else
{
- src_base = src;
- ONE_MORE_BYTE (c);
- EMIT_CHAR (c);
+ Lisp_Object val;
+ struct charset *charset;
+ int dim;
+ int len = 1;
+ unsigned code = c;
+
+ val = AREF (valids, c);
+ if (NILP (val))
+ goto invalid_code;
+ if (INTEGERP (val))
+ {
+ charset = CHARSET_FROM_ID (XFASTINT (val));
+ dim = CHARSET_DIMENSION (charset);
+ while (len < dim)
+ {
+ ONE_MORE_BYTE (c);
+ code = (code << 8) | c;
+ len++;
+ }
+ CODING_DECODE_CHAR (coding, src, src_base, src_end,
+ charset, code, c);
+ }
+ else
+ {
+ /* VAL is a list of charset IDs. It is assured that the
+ list is sorted by charset dimensions (smaller one
+ comes first). */
+ while (CONSP (val))
+ {
+ charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
+ dim = CHARSET_DIMENSION (charset);
+ while (len < dim)
+ {
+ ONE_MORE_BYTE (c);
+ code = (code << 8) | c;
+ len++;
+ }
+ CODING_DECODE_CHAR (coding, src, src_base,
+ src_end, charset, code, c);
+ if (c >= 0)
+ break;
+ val = XCDR (val);
+ }
+ }
+ if (c < 0)
+ goto invalid_code;
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
+ }
}
+ *charbuf++ = c;
+ char_offset++;
+ continue;
+
+ invalid_code:
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
}
- label_end_of_loop:
- coding->consumed = coding->consumed_char = src_base - source;
- coding->produced = dst - destination;
- return;
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
}
-/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
- format of end-of-line according to `coding->eol_type'. It also
- convert multibyte form 8-bit characters to unibyte if
- CODING->src_multibyte is nonzero. If `coding->mode &
- CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
- also means end-of-line. */
-
-static void
-encode_eol (coding, source, destination, src_bytes, dst_bytes)
+static int
+encode_coding_charset (coding)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
{
- const unsigned char *src = source;
- unsigned char *dst = destination;
- const unsigned char *src_end = src + src_bytes;
- unsigned char *dst_end = dst + dst_bytes;
- Lisp_Object translation_table;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text to
- analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
- there's not enough destination area to produce encoded codes
- (within macro EMIT_BYTES). */
- const unsigned char *src_base;
- unsigned char *tmp;
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = MAX_MULTIBYTE_LENGTH;
+ int produced_chars = 0;
+ Lisp_Object attrs, eol_type, charset_list;
+ int ascii_compatible;
int c;
- int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
- translation_table = Qnil;
- if (coding->src_multibyte
- && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
- {
- src_end--;
- src_bytes--;
- coding->result = CODING_FINISH_INSUFFICIENT_SRC;
- }
+ CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
- if (coding->eol_type == CODING_EOL_CRLF)
- {
- while (src < src_end)
- {
- src_base = src;
- c = *src++;
- if (c >= 0x20)
- EMIT_ONE_BYTE (c);
- else if (c == '\n' || (c == '\r' && selective_display))
- EMIT_TWO_BYTES ('\r', '\n');
- else
- EMIT_ONE_BYTE (c);
- }
- src_base = src;
- label_end_of_loop:
- ;
- }
- else
+ while (charbuf < charbuf_end)
{
- if (!dst_bytes || src_bytes <= dst_bytes)
+ struct charset *charset;
+ unsigned code;
+
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (ascii_compatible && ASCII_CHAR_P (c))
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
{
- safe_bcopy (src, dst, src_bytes);
- src_base = src_end;
- dst += src_bytes;
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
}
else
{
- if (coding->src_multibyte
- && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
- dst_bytes--;
- safe_bcopy (src, dst, dst_bytes);
- src_base = src + dst_bytes;
- dst = destination + dst_bytes;
- coding->result = CODING_FINISH_INSUFFICIENT_DST;
- }
- if (coding->eol_type == CODING_EOL_CR)
- {
- for (tmp = destination; tmp < dst; tmp++)
- if (*tmp == '\n') *tmp = '\r';
- }
- else if (selective_display)
- {
- for (tmp = destination; tmp < dst; tmp++)
- if (*tmp == '\r') *tmp = '\n';
+ charset = char_charset (c, charset_list, &code);
+ if (charset)
+ {
+ if (CHARSET_DIMENSION (charset) == 1)
+ EMIT_ONE_BYTE (code);
+ else if (CHARSET_DIMENSION (charset) == 2)
+ EMIT_TWO_BYTES (code >> 8, code & 0xFF);
+ else if (CHARSET_DIMENSION (charset) == 3)
+ EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
+ else
+ EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
+ (code >> 8) & 0xFF, code & 0xFF);
+ }
+ else
+ {
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
+ c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ else
+ c = coding->default_char;
+ EMIT_ONE_BYTE (c);
+ }
}
}
- if (coding->src_multibyte)
- dst = destination + str_as_unibyte (destination, dst - destination);
- coding->consumed = src_base - source;
- coding->produced = dst - destination;
- coding->produced_char = coding->produced;
+ coding->result = CODING_RESULT_SUCCESS;
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
/*** 7. C library functions ***/
-/* In Emacs Lisp, a coding system is represented by a Lisp symbol which
- has a property `coding-system'. The value of this property is a
- vector of length 5 (called the coding-vector). Among elements of
- this vector, the first (element[0]) and the fifth (element[4])
- carry important information for decoding/encoding. Before
- decoding/encoding, this information should be set in fields of a
- structure of type `coding_system'.
-
- The value of the property `coding-system' can be a symbol of another
- subsidiary coding-system. In that case, Emacs gets coding-vector
- from that symbol.
-
- `element[0]' contains information to be set in `coding->type'. The
- value and its meaning is as follows:
-
- 0 -- coding_type_emacs_mule
- 1 -- coding_type_sjis
- 2 -- coding_type_iso2022
- 3 -- coding_type_big5
- 4 -- coding_type_ccl encoder/decoder written in CCL
- nil -- coding_type_no_conversion
- t -- coding_type_undecided (automatic conversion on decoding,
- no-conversion on encoding)
-
- `element[4]' contains information to be set in `coding->flags' and
- `coding->spec'. The meaning varies by `coding->type'.
-
- If `coding->type' is `coding_type_iso2022', element[4] is a vector
- of length 32 (of which the first 13 sub-elements are used now).
- Meanings of these sub-elements are:
-
- sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
- If the value is an integer of valid charset, the charset is
- assumed to be designated to graphic register N initially.
-
- If the value is minus, it is a minus value of charset which
- reserves graphic register N, which means that the charset is
- not designated initially but should be designated to graphic
- register N just before encoding a character in that charset.
-
- If the value is nil, graphic register N is never used on
- encoding.
-
- sub-element[N] where N is 4 through 11: to be set in `coding->flags'
- Each value takes t or nil. See the section ISO2022 of
- `coding.h' for more information.
-
- If `coding->type' is `coding_type_big5', element[4] is t to denote
- BIG5-ETen or nil to denote BIG5-HKU.
-
- If `coding->type' takes the other value, element[4] is ignored.
-
- Emacs Lisp's coding systems also carry information about format of
- end-of-line in a value of property `eol-type'. If the value is
- integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
- means CODING_EOL_CR. If it is not integer, it should be a vector
- of subsidiary coding systems of which property `eol-type' has one
- of the above values.
-
-*/
-
-/* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
- and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
- is setup so that no conversion is necessary and return -1, else
- return 0. */
+/* Setup coding context CODING from information about CODING_SYSTEM.
+ If CODING_SYSTEM is nil, `no-conversion' is assumed. If
+ CODING_SYSTEM is invalid, signal an error. */
-int
+void
setup_coding_system (coding_system, coding)
Lisp_Object coding_system;
struct coding_system *coding;
{
- Lisp_Object coding_spec, coding_type, eol_type, plist;
+ Lisp_Object attrs;
+ Lisp_Object eol_type;
+ Lisp_Object coding_type;
Lisp_Object val;
- /* At first, zero clear all members. */
- bzero (coding, sizeof (struct coding_system));
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
- /* Initialize some fields required for all kinds of coding systems. */
- coding->symbol = coding_system;
- coding->heading_ascii = -1;
- coding->post_read_conversion = coding->pre_write_conversion = Qnil;
- coding->composing = COMPOSITION_DISABLED;
- coding->cmp_data = NULL;
+ CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
- if (NILP (coding_system))
- goto label_invalid_coding_system;
+ attrs = CODING_ID_ATTRS (coding->id);
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
- coding_spec = Fget (coding_system, Qcoding_system);
+ coding->mode = 0;
+ coding->head_ascii = -1;
+ coding->common_flags
+ = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
+ if (! NILP (CODING_ATTR_POST_READ (attrs)))
+ coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
+ if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
+ coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
+ if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
+ coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
- if (!VECTORP (coding_spec)
- || XVECTOR (coding_spec)->size != 5
- || !CONSP (XVECTOR (coding_spec)->contents[3]))
- goto label_invalid_coding_system;
+ val = CODING_ATTR_SAFE_CHARSETS (attrs);
+ coding->max_charset_id = SCHARS (val) - 1;
+ coding->safe_charsets = (char *) SDATA (val);
+ coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
- eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
- if (VECTORP (eol_type))
+ coding_type = CODING_ATTR_TYPE (attrs);
+ if (EQ (coding_type, Qundecided))
{
- coding->eol_type = CODING_EOL_UNDECIDED;
- coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
+ coding->detector = NULL;
+ coding->decoder = decode_coding_raw_text;
+ coding->encoder = encode_coding_raw_text;
+ coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
}
- else if (XFASTINT (eol_type) == 1)
+ else if (EQ (coding_type, Qiso_2022))
{
- coding->eol_type = CODING_EOL_CRLF;
+ int i;
+ int flags = XINT (AREF (attrs, coding_attr_iso_flags));
+
+ /* Invoke graphic register 0 to plane 0. */
+ CODING_ISO_INVOCATION (coding, 0) = 0;
+ /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
+ CODING_ISO_INVOCATION (coding, 1)
+ = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
+ /* Setup the initial status of designation. */
+ for (i = 0; i < 4; i++)
+ CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
+ /* Not single shifting initially. */
+ CODING_ISO_SINGLE_SHIFTING (coding) = 0;
+ /* Beginning of buffer should also be regarded as bol. */
+ CODING_ISO_BOL (coding) = 1;
+ coding->detector = detect_coding_iso_2022;
+ coding->decoder = decode_coding_iso_2022;
+ coding->encoder = encode_coding_iso_2022;
+ if (flags & CODING_ISO_FLAG_SAFE)
+ coding->mode |= CODING_MODE_SAFE_ENCODING;
coding->common_flags
- = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
+ | CODING_REQUIRE_FLUSHING_MASK);
+ if (flags & CODING_ISO_FLAG_COMPOSITION)
+ coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
+ if (flags & CODING_ISO_FLAG_DESIGNATION)
+ coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
+ if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
+ {
+ setup_iso_safe_charsets (attrs);
+ val = CODING_ATTR_SAFE_CHARSETS (attrs);
+ coding->max_charset_id = SCHARS (val) - 1;
+ coding->safe_charsets = (char *) SDATA (val);
+ }
+ CODING_ISO_FLAGS (coding) = flags;
}
- else if (XFASTINT (eol_type) == 2)
+ else if (EQ (coding_type, Qcharset))
{
- coding->eol_type = CODING_EOL_CR;
+ coding->detector = detect_coding_charset;
+ coding->decoder = decode_coding_charset;
+ coding->encoder = encode_coding_charset;
coding->common_flags
- = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
}
- else
- coding->eol_type = CODING_EOL_LF;
-
- coding_type = XVECTOR (coding_spec)->contents[0];
- /* Try short cut. */
- if (SYMBOLP (coding_type))
+ else if (EQ (coding_type, Qutf_8))
{
- if (EQ (coding_type, Qt))
- {
- coding->type = coding_type_undecided;
- coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
- }
- else
- coding->type = coding_type_no_conversion;
- /* Initialize this member. Any thing other than
- CODING_CATEGORY_IDX_UTF_16_BE and
- CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
- special treatment in detect_eol. */
- coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
-
- return 0;
- }
-
- /* Get values of coding system properties:
- `post-read-conversion', `pre-write-conversion',
- `translation-table-for-decode', `translation-table-for-encode'. */
- plist = XVECTOR (coding_spec)->contents[3];
- /* Pre & post conversion functions should be disabled if
- inhibit_eol_conversion is nonzero. This is the case that a code
- conversion function is called while those functions are running. */
- if (! inhibit_pre_post_conversion)
- {
- coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
- coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
- }
- val = Fplist_get (plist, Qtranslation_table_for_decode);
- if (SYMBOLP (val))
- val = Fget (val, Qtranslation_table_for_decode);
- coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
- val = Fplist_get (plist, Qtranslation_table_for_encode);
- if (SYMBOLP (val))
- val = Fget (val, Qtranslation_table_for_encode);
- coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
- val = Fplist_get (plist, Qcoding_category);
- if (!NILP (val))
- {
- val = Fget (val, Qcoding_category_index);
- if (INTEGERP (val))
- coding->category_idx = XINT (val);
- else
- goto label_invalid_coding_system;
+ coding->detector = detect_coding_utf_8;
+ coding->decoder = decode_coding_utf_8;
+ coding->encoder = encode_coding_utf_8;
+ coding->common_flags
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ }
+ else if (EQ (coding_type, Qutf_16))
+ {
+ val = AREF (attrs, coding_attr_utf_16_bom);
+ CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
+ : EQ (val, Qt) ? utf_16_with_bom
+ : utf_16_without_bom);
+ val = AREF (attrs, coding_attr_utf_16_endian);
+ CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
+ : utf_16_little_endian);
+ CODING_UTF_16_SURROGATE (coding) = 0;
+ coding->detector = detect_coding_utf_16;
+ coding->decoder = decode_coding_utf_16;
+ coding->encoder = encode_coding_utf_16;
+ coding->common_flags
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
+ coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
}
- else
- goto label_invalid_coding_system;
-
- /* If the coding system has non-nil `composition' property, enable
- composition handling. */
- val = Fplist_get (plist, Qcomposition);
- if (!NILP (val))
- coding->composing = COMPOSITION_NO;
-
- switch (XFASTINT (coding_type))
+ else if (EQ (coding_type, Qccl))
{
- case 0:
- coding->type = coding_type_emacs_mule;
- coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- if (!NILP (coding->post_read_conversion))
- coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
- if (!NILP (coding->pre_write_conversion))
- coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
- break;
-
- case 1:
- coding->type = coding_type_sjis;
+ coding->detector = detect_coding_ccl;
+ coding->decoder = decode_coding_ccl;
+ coding->encoder = encode_coding_ccl;
coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- break;
-
- case 2:
- coding->type = coding_type_iso2022;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
+ | CODING_REQUIRE_FLUSHING_MASK);
+ }
+ else if (EQ (coding_type, Qemacs_mule))
+ {
+ coding->detector = detect_coding_emacs_mule;
+ coding->decoder = decode_coding_emacs_mule;
+ coding->encoder = encode_coding_emacs_mule;
coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- {
- Lisp_Object val, temp;
- Lisp_Object *flags;
- int i, charset, reg_bits = 0;
-
- val = XVECTOR (coding_spec)->contents[4];
-
- if (!VECTORP (val) || XVECTOR (val)->size != 32)
- goto label_invalid_coding_system;
-
- flags = XVECTOR (val)->contents;
- coding->flags
- = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
- | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
- | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
- | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
- | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
- | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
- | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
- | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
- | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
- | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
- | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
- | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
- | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
- );
-
- /* Invoke graphic register 0 to plane 0. */
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
- /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
- CODING_SPEC_ISO_INVOCATION (coding, 1)
- = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
- /* Not single shifting at first. */
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
- /* Beginning of buffer should also be regarded as bol. */
- CODING_SPEC_ISO_BOL (coding) = 1;
-
- for (charset = 0; charset <= MAX_CHARSET; charset++)
- CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
- val = Vcharset_revision_alist;
- while (CONSP (val))
- {
- charset = get_charset_id (Fcar_safe (XCAR (val)));
- if (charset >= 0
- && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
- && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
- CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
- val = XCDR (val);
- }
-
- /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
- FLAGS[REG] can be one of below:
- integer CHARSET: CHARSET occupies register I,
- t: designate nothing to REG initially, but can be used
- by any charsets,
- list of integer, nil, or t: designate the first
- element (if integer) to REG initially, the remaining
- elements (if integer) is designated to REG on request,
- if an element is t, REG can be used by any charsets,
- nil: REG is never used. */
- for (charset = 0; charset <= MAX_CHARSET; charset++)
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
- for (i = 0; i < 4; i++)
- {
- if ((INTEGERP (flags[i])
- && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
- || (charset = get_charset_id (flags[i])) >= 0)
- {
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
- }
- else if (EQ (flags[i], Qt))
- {
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
- reg_bits |= 1 << i;
- coding->flags |= CODING_FLAG_ISO_DESIGNATION;
- }
- else if (CONSP (flags[i]))
- {
- Lisp_Object tail;
- tail = flags[i];
-
- coding->flags |= CODING_FLAG_ISO_DESIGNATION;
- if ((INTEGERP (XCAR (tail))
- && (charset = XINT (XCAR (tail)),
- CHARSET_VALID_P (charset)))
- || (charset = get_charset_id (XCAR (tail))) >= 0)
- {
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
- }
- else
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
- tail = XCDR (tail);
- while (CONSP (tail))
- {
- if ((INTEGERP (XCAR (tail))
- && (charset = XINT (XCAR (tail)),
- CHARSET_VALID_P (charset)))
- || (charset = get_charset_id (XCAR (tail))) >= 0)
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- = i;
- else if (EQ (XCAR (tail), Qt))
- reg_bits |= 1 << i;
- tail = XCDR (tail);
- }
- }
- else
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
-
- CODING_SPEC_ISO_DESIGNATION (coding, i)
- = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
- }
-
- if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
- {
- /* REG 1 can be used only by locking shift in 7-bit env. */
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
- reg_bits &= ~2;
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
- /* Without any shifting, only REG 0 and 1 can be used. */
- reg_bits &= 3;
- }
-
- if (reg_bits)
- for (charset = 0; charset <= MAX_CHARSET; charset++)
- {
- if (CHARSET_DEFINED_P (charset)
- && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
- {
- /* There exist some default graphic registers to be
- used by CHARSET. */
-
- /* We had better avoid designating a charset of
- CHARS96 to REG 0 as far as possible. */
- if (CHARSET_CHARS (charset) == 96)
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- = (reg_bits & 2
- ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
- else
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- = (reg_bits & 1
- ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
- }
- }
- }
- coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
- coding->spec.iso2022.last_invalid_designation_register = -1;
- break;
-
- case 3:
- coding->type = coding_type_big5;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
+ && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
+ {
+ Lisp_Object tail, safe_charsets;
+ int max_charset_id = 0;
+
+ for (tail = Vemacs_mule_charset_list; CONSP (tail);
+ tail = XCDR (tail))
+ if (max_charset_id < XFASTINT (XCAR (tail)))
+ max_charset_id = XFASTINT (XCAR (tail));
+ safe_charsets = Fmake_string (make_number (max_charset_id + 1),
+ make_number (255));
+ for (tail = Vemacs_mule_charset_list; CONSP (tail);
+ tail = XCDR (tail))
+ SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
+ coding->max_charset_id = max_charset_id;
+ coding->safe_charsets = (char *) SDATA (safe_charsets);
+ }
+ }
+ else if (EQ (coding_type, Qshift_jis))
+ {
+ coding->detector = detect_coding_sjis;
+ coding->decoder = decode_coding_sjis;
+ coding->encoder = encode_coding_sjis;
coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- coding->flags
- = (NILP (XVECTOR (coding_spec)->contents[4])
- ? CODING_FLAG_BIG5_HKU
- : CODING_FLAG_BIG5_ETEN);
- break;
-
- case 4:
- coding->type = coding_type_ccl;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ }
+ else if (EQ (coding_type, Qbig5))
+ {
+ coding->detector = detect_coding_big5;
+ coding->decoder = decode_coding_big5;
+ coding->encoder = encode_coding_big5;
coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- {
- val = XVECTOR (coding_spec)->contents[4];
- if (! CONSP (val)
- || setup_ccl_program (&(coding->spec.ccl.decoder),
- XCAR (val)) < 0
- || setup_ccl_program (&(coding->spec.ccl.encoder),
- XCDR (val)) < 0)
- goto label_invalid_coding_system;
-
- bzero (coding->spec.ccl.valid_codes, 256);
- val = Fplist_get (plist, Qvalid_codes);
- if (CONSP (val))
- {
- Lisp_Object this;
-
- for (; CONSP (val); val = XCDR (val))
- {
- this = XCAR (val);
- if (INTEGERP (this)
- && XINT (this) >= 0 && XINT (this) < 256)
- coding->spec.ccl.valid_codes[XINT (this)] = 1;
- else if (CONSP (this)
- && INTEGERP (XCAR (this))
- && INTEGERP (XCDR (this)))
- {
- int start = XINT (XCAR (this));
- int end = XINT (XCDR (this));
-
- if (start >= 0 && start <= end && end < 256)
- while (start <= end)
- coding->spec.ccl.valid_codes[start++] = 1;
- }
- }
- }
- }
- coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
- coding->spec.ccl.cr_carryover = 0;
- coding->spec.ccl.eight_bit_carryover[0] = 0;
- break;
-
- case 5:
- coding->type = coding_type_raw_text;
- break;
-
- default:
- goto label_invalid_coding_system;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ }
+ else /* EQ (coding_type, Qraw_text) */
+ {
+ coding->detector = NULL;
+ coding->decoder = decode_coding_raw_text;
+ coding->encoder = encode_coding_raw_text;
}
- return 0;
- label_invalid_coding_system:
- coding->type = coding_type_no_conversion;
- coding->category_idx = CODING_CATEGORY_IDX_BINARY;
- coding->common_flags = 0;
- coding->eol_type = CODING_EOL_LF;
- coding->pre_write_conversion = coding->post_read_conversion = Qnil;
- return -1;
+ return;
}
-/* Free memory blocks allocated for storing composition information. */
+/* Return raw-text or one of its subsidiaries that has the same
+ eol_type as CODING-SYSTEM. */
-void
-coding_free_composition_data (coding)
- struct coding_system *coding;
+Lisp_Object
+raw_text_coding_system (coding_system)
+ Lisp_Object coding_system;
{
- struct composition_data *cmp_data = coding->cmp_data, *next;
-
- if (!cmp_data)
- return;
- /* Memory blocks are chained. At first, rewind to the first, then,
- free blocks one by one. */
- while (cmp_data->prev)
- cmp_data = cmp_data->prev;
- while (cmp_data)
- {
- next = cmp_data->next;
- xfree (cmp_data);
- cmp_data = next;
- }
- coding->cmp_data = NULL;
-}
+ Lisp_Object spec, attrs;
+ Lisp_Object eol_type, raw_text_eol_type;
-/* Set `char_offset' member of all memory blocks pointed by
- coding->cmp_data to POS. */
+ if (NILP (coding_system))
+ return Qraw_text;
+ spec = CODING_SYSTEM_SPEC (coding_system);
+ attrs = AREF (spec, 0);
-void
-coding_adjust_composition_offset (coding, pos)
- struct coding_system *coding;
- int pos;
-{
- struct composition_data *cmp_data;
+ if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
+ return coding_system;
- for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
- cmp_data->char_offset = pos;
+ eol_type = AREF (spec, 2);
+ if (VECTORP (eol_type))
+ return Qraw_text;
+ spec = CODING_SYSTEM_SPEC (Qraw_text);
+ raw_text_eol_type = AREF (spec, 2);
+ return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
+ : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
+ : AREF (raw_text_eol_type, 2));
}
-/* Setup raw-text or one of its subsidiaries in the structure
- coding_system CODING according to the already setup value eol_type
- in CODING. CODING should be setup for some coding system in
- advance. */
-void
-setup_raw_text_coding_system (coding)
- struct coding_system *coding;
+/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
+ does, return one of the subsidiary that has the same eol-spec as
+ PARENT. Otherwise, return CODING_SYSTEM. */
+
+Lisp_Object
+coding_inherit_eol_type (coding_system, parent)
+ Lisp_Object coding_system, parent;
{
- if (coding->type != coding_type_raw_text)
- {
- coding->symbol = Qraw_text;
- coding->type = coding_type_raw_text;
- if (coding->eol_type != CODING_EOL_UNDECIDED)
- {
- Lisp_Object subsidiaries;
- subsidiaries = Fget (Qraw_text, Qeol_type);
+ Lisp_Object spec, attrs, eol_type;
- if (VECTORP (subsidiaries)
- && XVECTOR (subsidiaries)->size == 3)
- coding->symbol
- = XVECTOR (subsidiaries)->contents[coding->eol_type];
- }
- setup_coding_system (coding->symbol, coding);
- }
- return;
+ if (NILP (coding_system))
+ coding_system = Qraw_text;
+ spec = CODING_SYSTEM_SPEC (coding_system);
+ attrs = AREF (spec, 0);
+ eol_type = AREF (spec, 2);
+ if (VECTORP (eol_type)
+ && ! NILP (parent))
+ {
+ Lisp_Object parent_spec;
+ Lisp_Object parent_eol_type;
+
+ parent_spec
+ = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
+ parent_eol_type = AREF (parent_spec, 2);
+ if (EQ (parent_eol_type, Qunix))
+ coding_system = AREF (eol_type, 0);
+ else if (EQ (parent_eol_type, Qdos))
+ coding_system = AREF (eol_type, 1);
+ else if (EQ (parent_eol_type, Qmac))
+ coding_system = AREF (eol_type, 2);
+ }
+ return coding_system;
}
/* Emacs has a mechanism to automatically detect a coding system if it
@@ -4001,14 +5094,14 @@ setup_raw_text_coding_system (coding)
o coding-category-iso-7-else
The category for a coding system which has the same code range
- as ISO2022 of 7-bit environment but uses locking shift or
+ as ISO2022 of 7-bit environemnt but uses locking shift or
single shift functions. Assigned the coding-system (Lisp
symbol) `iso-2022-7bit-lock' by default.
o coding-category-iso-8-else
The category for a coding system which has the same code range
- as ISO2022 of 8-bit environment but uses locking shift or
+ as ISO2022 of 8-bit environemnt but uses locking shift or
single shift functions. Assigned the coding-system (Lisp
symbol) `iso-2022-8bit-ss2' by default.
@@ -4051,2267 +5144,1514 @@ setup_raw_text_coding_system (coding)
`no-conversion' by default.
Each of them is a Lisp symbol and the value is an actual
- `coding-system' (this is also a Lisp symbol) assigned by a user.
+ `coding-system's (this is also a Lisp symbol) assigned by a user.
What Emacs does actually is to detect a category of coding system.
Then, it uses a `coding-system' assigned to it. If Emacs can't
- decide a single possible category, it selects a category of the
+ decide only one possible category, it selects a category of the
highest priority. Priorities of categories are also specified by a
user in a Lisp variable `coding-category-list'.
*/
-static
-int ascii_skip_code[256];
+#define EOL_SEEN_NONE 0
+#define EOL_SEEN_LF 1
+#define EOL_SEEN_CR 2
+#define EOL_SEEN_CRLF 4
-/* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
- If it detects possible coding systems, return an integer in which
- appropriate flag bits are set. Flag bits are defined by macros
- CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL,
- it should point the table `coding_priorities'. In that case, only
- the flag bit for a coding system of the highest priority is set in
- the returned value. If MULTIBYTEP is nonzero, 8-bit codes of the
- range 0x80..0x9F are in multibyte form.
+/* Detect how end-of-line of a text of length SRC_BYTES pointed by
+ SOURCE is encoded. If CATEGORY is one of
+ coding_category_utf_16_XXXX, assume that CR and LF are encoded by
+ two-byte, else they are encoded by one-byte.
+
+ Return one of EOL_SEEN_XXX. */
- How many ASCII characters are at the head is returned as *SKIP. */
+#define MAX_EOL_CHECK_COUNT 3
static int
-detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
+detect_eol (source, src_bytes, category)
unsigned char *source;
- int src_bytes, *priorities, *skip;
- int multibytep;
+ EMACS_INT src_bytes;
+ enum coding_category category;
{
- register unsigned char c;
- unsigned char *src = source, *src_end = source + src_bytes;
- unsigned int mask, utf16_examined_p, iso2022_examined_p;
- int i;
+ unsigned char *src = source, *src_end = src + src_bytes;
+ unsigned char c;
+ int total = 0;
+ int eol_seen = EOL_SEEN_NONE;
- /* At first, skip all ASCII characters and control characters except
- for three ISO2022 specific control characters. */
- ascii_skip_code[ISO_CODE_SO] = 0;
- ascii_skip_code[ISO_CODE_SI] = 0;
- ascii_skip_code[ISO_CODE_ESC] = 0;
-
- label_loop_detect_coding:
- while (src < src_end && ascii_skip_code[*src]) src++;
- *skip = src - source;
-
- if (src >= src_end)
- /* We found nothing other than ASCII. There's nothing to do. */
- return 0;
-
- c = *src;
- /* The text seems to be encoded in some multilingual coding system.
- Now, try to find in which coding system the text is encoded. */
- if (c < 0x80)
- {
- /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
- /* C is an ISO2022 specific control code of C0. */
- mask = detect_coding_iso2022 (src, src_end, multibytep);
- if (mask == 0)
- {
- /* No valid ISO2022 code follows C. Try again. */
- src++;
- if (c == ISO_CODE_ESC)
- ascii_skip_code[ISO_CODE_ESC] = 1;
- else
- ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
- goto label_loop_detect_coding;
- }
- if (priorities)
+ if ((1 << category) & CATEGORY_MASK_UTF_16)
+ {
+ int msb, lsb;
+
+ msb = category == (coding_category_utf_16_le
+ | coding_category_utf_16_le_nosig);
+ lsb = 1 - msb;
+
+ while (src + 1 < src_end)
{
- for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
+ c = src[lsb];
+ if (src[msb] == 0 && (c == '\n' || c == '\r'))
{
- if (mask & priorities[i])
- return priorities[i];
+ int this_eol;
+
+ if (c == '\n')
+ this_eol = EOL_SEEN_LF;
+ else if (src + 3 >= src_end
+ || src[msb + 2] != 0
+ || src[lsb + 2] != '\n')
+ this_eol = EOL_SEEN_CR;
+ else
+ this_eol = EOL_SEEN_CRLF;
+
+ if (eol_seen == EOL_SEEN_NONE)
+ /* This is the first end-of-line. */
+ eol_seen = this_eol;
+ else if (eol_seen != this_eol)
+ {
+ /* The found type is different from what found before. */
+ eol_seen = EOL_SEEN_LF;
+ break;
+ }
+ if (++total == MAX_EOL_CHECK_COUNT)
+ break;
}
- return CODING_CATEGORY_MASK_RAW_TEXT;
+ src += 2;
}
}
else
{
- int try;
-
- if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
- c = src[1] - 0x20;
-
- if (c < 0xA0)
- {
- /* C is the first byte of SJIS character code,
- or a leading-code of Emacs' internal format (emacs-mule),
- or the first byte of UTF-16. */
- try = (CODING_CATEGORY_MASK_SJIS
- | CODING_CATEGORY_MASK_EMACS_MULE
- | CODING_CATEGORY_MASK_UTF_16_BE
- | CODING_CATEGORY_MASK_UTF_16_LE);
-
- /* Or, if C is a special latin extra code,
- or is an ISO2022 specific control code of C1 (SS2 or SS3),
- or is an ISO2022 control-sequence-introducer (CSI),
- we should also consider the possibility of ISO2022 codings. */
- if ((VECTORP (Vlatin_extra_code_table)
- && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
- || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
- || (c == ISO_CODE_CSI
- && (src < src_end
- && (*src == ']'
- || ((*src == '0' || *src == '1' || *src == '2')
- && src + 1 < src_end
- && src[1] == ']')))))
- try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
- | CODING_CATEGORY_MASK_ISO_8BIT);
- }
- else
- /* C is a character of ISO2022 in graphic plane right,
- or a SJIS's 1-byte character code (i.e. JISX0201),
- or the first byte of BIG5's 2-byte code,
- or the first byte of UTF-8/16. */
- try = (CODING_CATEGORY_MASK_ISO_8_ELSE
- | CODING_CATEGORY_MASK_ISO_8BIT
- | CODING_CATEGORY_MASK_SJIS
- | CODING_CATEGORY_MASK_BIG5
- | CODING_CATEGORY_MASK_UTF_8
- | CODING_CATEGORY_MASK_UTF_16_BE
- | CODING_CATEGORY_MASK_UTF_16_LE);
-
- /* Or, we may have to consider the possibility of CCL. */
- if (coding_system_table[CODING_CATEGORY_IDX_CCL]
- && (coding_system_table[CODING_CATEGORY_IDX_CCL]
- ->spec.ccl.valid_codes)[c])
- try |= CODING_CATEGORY_MASK_CCL;
-
- mask = 0;
- utf16_examined_p = iso2022_examined_p = 0;
- if (priorities)
+ while (src < src_end)
{
- for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
+ c = *src++;
+ if (c == '\n' || c == '\r')
{
- if (!iso2022_examined_p
- && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
- {
- mask |= detect_coding_iso2022 (src, src_end, multibytep);
- iso2022_examined_p = 1;
- }
- else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
- mask |= detect_coding_sjis (src, src_end, multibytep);
- else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
- mask |= detect_coding_utf_8 (src, src_end, multibytep);
- else if (!utf16_examined_p
- && (priorities[i] & try &
- CODING_CATEGORY_MASK_UTF_16_BE_LE))
+ int this_eol;
+
+ if (c == '\n')
+ this_eol = EOL_SEEN_LF;
+ else if (src >= src_end || *src != '\n')
+ this_eol = EOL_SEEN_CR;
+ else
+ this_eol = EOL_SEEN_CRLF, src++;
+
+ if (eol_seen == EOL_SEEN_NONE)
+ /* This is the first end-of-line. */
+ eol_seen = this_eol;
+ else if (eol_seen != this_eol)
{
- mask |= detect_coding_utf_16 (src, src_end, multibytep);
- utf16_examined_p = 1;
+ /* The found type is different from what found before. */
+ eol_seen = EOL_SEEN_LF;
+ break;
}
- else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
- mask |= detect_coding_big5 (src, src_end, multibytep);
- else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
- mask |= detect_coding_emacs_mule (src, src_end, multibytep);
- else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
- mask |= detect_coding_ccl (src, src_end, multibytep);
- else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
- mask |= CODING_CATEGORY_MASK_RAW_TEXT;
- else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
- mask |= CODING_CATEGORY_MASK_BINARY;
- if (mask & priorities[i])
- return priorities[i];
+ if (++total == MAX_EOL_CHECK_COUNT)
+ break;
}
- return CODING_CATEGORY_MASK_RAW_TEXT;
}
- if (try & CODING_CATEGORY_MASK_ISO)
- mask |= detect_coding_iso2022 (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_SJIS)
- mask |= detect_coding_sjis (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_BIG5)
- mask |= detect_coding_big5 (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_UTF_8)
- mask |= detect_coding_utf_8 (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
- mask |= detect_coding_utf_16 (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_EMACS_MULE)
- mask |= detect_coding_emacs_mule (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_CCL)
- mask |= detect_coding_ccl (src, src_end, multibytep);
- }
- return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
+ }
+ return eol_seen;
}
-/* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
- The information of the detected coding system is set in CODING. */
-void
-detect_coding (coding, src, src_bytes)
+static void
+adjust_coding_eol_type (coding, eol_seen)
struct coding_system *coding;
- const unsigned char *src;
- int src_bytes;
+ int eol_seen;
{
- unsigned int idx;
- int skip, mask;
- Lisp_Object val;
-
- val = Vcoding_category_list;
- mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
- coding->src_multibyte);
- coding->heading_ascii = skip;
-
- if (!mask) return;
-
- /* We found a single coding system of the highest priority in MASK. */
- idx = 0;
- while (mask && ! (mask & 1)) mask >>= 1, idx++;
- if (! mask)
- idx = CODING_CATEGORY_IDX_RAW_TEXT;
-
- val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
-
- if (coding->eol_type != CODING_EOL_UNDECIDED)
- {
- Lisp_Object tmp;
-
- tmp = Fget (val, Qeol_type);
- if (VECTORP (tmp))
- val = XVECTOR (tmp)->contents[coding->eol_type];
- }
-
- /* Setup this new coding system while preserving some slots. */
- {
- int src_multibyte = coding->src_multibyte;
- int dst_multibyte = coding->dst_multibyte;
-
- setup_coding_system (val, coding);
- coding->src_multibyte = src_multibyte;
- coding->dst_multibyte = dst_multibyte;
- coding->heading_ascii = skip;
- }
+ Lisp_Object eol_type;
+
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (eol_seen & EOL_SEEN_LF)
+ coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
+ else if (eol_seen & EOL_SEEN_CRLF)
+ coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
+ else if (eol_seen & EOL_SEEN_CR)
+ coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
}
-/* Detect how end-of-line of a text of length SRC_BYTES pointed by
- SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
- CODING_EOL_CR, and CODING_EOL_UNDECIDED.
+/* Detect how a text specified in CODING is encoded. If a coding
+ system is detected, update fields of CODING by the detected coding
+ system. */
- How many non-eol characters are at the head is returned as *SKIP. */
-
-#define MAX_EOL_CHECK_COUNT 3
-
-static int
-detect_eol_type (source, src_bytes, skip)
- unsigned char *source;
- int src_bytes, *skip;
+void
+detect_coding (coding)
+ struct coding_system *coding;
{
- unsigned char *src = source, *src_end = src + src_bytes;
- unsigned char c;
- int total = 0; /* How many end-of-lines are found so far. */
- int eol_type = CODING_EOL_UNDECIDED;
- int this_eol_type;
+ const unsigned char *src, *src_end;
+ Lisp_Object attrs, coding_type;
+
+ coding->consumed = coding->consumed_char = 0;
+ coding->produced = coding->produced_char = 0;
+ coding_set_source (coding);
- *skip = 0;
+ src_end = coding->source + coding->src_bytes;
- while (src < src_end && total < MAX_EOL_CHECK_COUNT)
+ /* If we have not yet decided the text encoding type, detect it
+ now. */
+ if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
{
- c = *src++;
- if (c == '\n' || c == '\r')
- {
- if (*skip == 0)
- *skip = src - 1 - source;
- total++;
- if (c == '\n')
- this_eol_type = CODING_EOL_LF;
- else if (src >= src_end || *src != '\n')
- this_eol_type = CODING_EOL_CR;
- else
- this_eol_type = CODING_EOL_CRLF, src++;
+ int c, i;
- if (eol_type == CODING_EOL_UNDECIDED)
- /* This is the first end-of-line. */
- eol_type = this_eol_type;
- else if (eol_type != this_eol_type)
- {
- /* The found type is different from what found before. */
- eol_type = CODING_EOL_INCONSISTENT;
- break;
- }
+ for (src = coding->source; src < src_end; src++)
+ {
+ c = *src;
+ if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
+ || c == ISO_CODE_SI
+ || c == ISO_CODE_SO)))
+ break;
}
- }
-
- if (*skip == 0)
- *skip = src_end - source;
- return eol_type;
-}
+ coding->head_ascii = src - (coding->source + coding->consumed);
-/* Like detect_eol_type, but detect EOL type in 2-octet
- big-endian/little-endian format for coding systems utf-16-be and
- utf-16-le. */
-
-static int
-detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
- unsigned char *source;
- int src_bytes, *skip, big_endian_p;
-{
- unsigned char *src = source, *src_end = src + src_bytes;
- unsigned int c1, c2;
- int total = 0; /* How many end-of-lines are found so far. */
- int eol_type = CODING_EOL_UNDECIDED;
- int this_eol_type;
- int msb, lsb;
-
- if (big_endian_p)
- msb = 0, lsb = 1;
- else
- msb = 1, lsb = 0;
-
- *skip = 0;
-
- while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
- {
- c1 = (src[msb] << 8) | (src[lsb]);
- src += 2;
-
- if (c1 == '\n' || c1 == '\r')
+ if (coding->head_ascii < coding->src_bytes)
{
- if (*skip == 0)
- *skip = src - 2 - source;
- total++;
- if (c1 == '\n')
- {
- this_eol_type = CODING_EOL_LF;
- }
- else
+ struct coding_detection_info detect_info;
+ enum coding_category category;
+ struct coding_system *this;
+
+ detect_info.checked = detect_info.found = detect_info.rejected = 0;
+ for (i = 0; i < coding_category_raw_text; i++)
{
- if ((src + 1) >= src_end)
+ category = coding_priorities[i];
+ this = coding_categories + category;
+ if (this->id < 0)
{
- this_eol_type = CODING_EOL_CR;
+ /* No coding system of this category is defined. */
+ detect_info.rejected |= (1 << category);
}
- else
+ else if (category >= coding_category_raw_text)
+ continue;
+ else if (detect_info.checked & (1 << category))
{
- c2 = (src[msb] << 8) | (src[lsb]);
- if (c2 == '\n')
- this_eol_type = CODING_EOL_CRLF, src += 2;
- else
- this_eol_type = CODING_EOL_CR;
+ if (detect_info.found & (1 << category))
+ break;
}
+ else if ((*(this->detector)) (coding, &detect_info)
+ && detect_info.found & (1 << category))
+ break;
}
-
- if (eol_type == CODING_EOL_UNDECIDED)
- /* This is the first end-of-line. */
- eol_type = this_eol_type;
- else if (eol_type != this_eol_type)
- {
- /* The found type is different from what found before. */
- eol_type = CODING_EOL_INCONSISTENT;
- break;
- }
+ if (i < coding_category_raw_text)
+ setup_coding_system (CODING_ID_NAME (this->id), coding);
+ else if (detect_info.rejected == CATEGORY_MASK_ANY)
+ setup_coding_system (Qraw_text, coding);
+ else if (detect_info.rejected)
+ for (i = 0; i < coding_category_raw_text; i++)
+ if (! (detect_info.rejected & (1 << coding_priorities[i])))
+ {
+ this = coding_categories + coding_priorities[i];
+ setup_coding_system (CODING_ID_NAME (this->id), coding);
+ break;
+ }
}
}
-
- if (*skip == 0)
- *skip = src_end - source;
- return eol_type;
-}
-
-/* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
- is encoded. If it detects an appropriate format of end-of-line, it
- sets the information in *CODING. */
-
-void
-detect_eol (coding, src, src_bytes)
- struct coding_system *coding;
- const unsigned char *src;
- int src_bytes;
-{
- Lisp_Object val;
- int skip;
- int eol_type;
-
- switch (coding->category_idx)
- {
- case CODING_CATEGORY_IDX_UTF_16_BE:
- eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
- break;
- case CODING_CATEGORY_IDX_UTF_16_LE:
- eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
- break;
- default:
- eol_type = detect_eol_type (src, src_bytes, &skip);
- break;
- }
-
- if (coding->heading_ascii > skip)
- coding->heading_ascii = skip;
- else
- skip = coding->heading_ascii;
-
- if (eol_type == CODING_EOL_UNDECIDED)
- return;
- if (eol_type == CODING_EOL_INCONSISTENT)
+ else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
{
-#if 0
- /* This code is suppressed until we find a better way to
- distinguish raw text file and binary file. */
+ Lisp_Object coding_systems;
+ struct coding_detection_info detect_info;
- /* If we have already detected that the coding is raw-text, the
- coding should actually be no-conversion. */
- if (coding->type == coding_type_raw_text)
+ coding_systems
+ = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
+ detect_info.found = detect_info.rejected = 0;
+ if (CONSP (coding_systems)
+ && detect_coding_utf_16 (coding, &detect_info)
+ && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
+ | CATEGORY_MASK_UTF_16_BE)))
{
- setup_coding_system (Qno_conversion, coding);
- return;
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ setup_coding_system (XCAR (coding_systems), coding);
+ else
+ setup_coding_system (XCDR (coding_systems), coding);
}
- /* Else, let's decode only text code anyway. */
-#endif /* 0 */
- eol_type = CODING_EOL_LF;
}
- val = Fget (coding->symbol, Qeol_type);
- if (VECTORP (val) && XVECTOR (val)->size == 3)
+ attrs = CODING_ID_ATTRS (coding->id);
+ coding_type = CODING_ATTR_TYPE (attrs);
+
+ /* If we have not yet decided the EOL type, detect it now. But, the
+ detection is impossible for a CCL based coding system, in which
+ case, we detct the EOL type after decoding. */
+ if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
+ && ! EQ (coding_type, Qccl))
{
- int src_multibyte = coding->src_multibyte;
- int dst_multibyte = coding->dst_multibyte;
- struct composition_data *cmp_data = coding->cmp_data;
+ int eol_seen = detect_eol (coding->source, coding->src_bytes,
+ XINT (CODING_ATTR_CATEGORY (attrs)));
- setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
- coding->src_multibyte = src_multibyte;
- coding->dst_multibyte = dst_multibyte;
- coding->heading_ascii = skip;
- coding->cmp_data = cmp_data;
+ if (eol_seen != EOL_SEEN_NONE)
+ adjust_coding_eol_type (coding, eol_seen);
}
}
-#define CONVERSION_BUFFER_EXTRA_ROOM 256
-#define DECODING_BUFFER_MAG(coding) \
- (coding->type == coding_type_iso2022 \
- ? 3 \
- : (coding->type == coding_type_ccl \
- ? coding->spec.ccl.decoder.buf_magnification \
- : 2))
-
-/* Return maximum size (bytes) of a buffer enough for decoding
- SRC_BYTES of text encoded in CODING. */
-
-int
-decoding_buffer_size (coding, src_bytes)
+static void
+decode_eol (coding)
struct coding_system *coding;
- int src_bytes;
{
- return (src_bytes * DECODING_BUFFER_MAG (coding)
- + CONVERSION_BUFFER_EXTRA_ROOM);
-}
-
-/* Return maximum size (bytes) of a buffer enough for encoding
- SRC_BYTES of text to CODING. */
+ if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
+ {
+ unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
+ unsigned char *pend = p + coding->produced;
+ int eol_seen = EOL_SEEN_NONE;
-int
-encoding_buffer_size (coding, src_bytes)
- struct coding_system *coding;
- int src_bytes;
-{
- int magnification;
+ for (; p < pend; p++)
+ {
+ if (*p == '\n')
+ eol_seen |= EOL_SEEN_LF;
+ else if (*p == '\r')
+ {
+ if (p + 1 < pend && *(p + 1) == '\n')
+ {
+ eol_seen |= EOL_SEEN_CRLF;
+ p++;
+ }
+ else
+ eol_seen |= EOL_SEEN_CR;
+ }
+ }
+ if (eol_seen != EOL_SEEN_NONE)
+ adjust_coding_eol_type (coding, eol_seen);
+ }
- if (coding->type == coding_type_ccl)
+ if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
{
- magnification = coding->spec.ccl.encoder.buf_magnification;
- if (coding->eol_type == CODING_EOL_CRLF)
- magnification *= 2;
+ unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
+ unsigned char *pend = p + coding->produced;
+
+ for (; p < pend; p++)
+ if (*p == '\r')
+ *p = '\n';
}
- else if (CODING_REQUIRE_ENCODING (coding))
- magnification = 3;
- else
- magnification = 1;
+ else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
+ {
+ unsigned char *p, *pbeg, *pend;
+ Lisp_Object undo_list;
+
+ move_gap_both (coding->dst_pos + coding->produced_char,
+ coding->dst_pos_byte + coding->produced);
+ undo_list = current_buffer->undo_list;
+ current_buffer->undo_list = Qt;
+ del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
+ current_buffer->undo_list = undo_list;
+ pbeg = GPT_ADDR;
+ pend = pbeg + coding->produced;
- return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
+ for (p = pend - 1; p >= pbeg; p--)
+ if (*p == '\r')
+ {
+ safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
+ pend--;
+ }
+ coding->produced_char -= coding->produced - (pend - pbeg);
+ coding->produced = pend - pbeg;
+ insert_from_gap (coding->produced_char, coding->produced);
+ }
}
-/* Working buffer for code conversion. */
-struct conversion_buffer
+static void
+translate_chars (coding, table)
+ struct coding_system *coding;
+ Lisp_Object table;
{
- int size; /* size of data. */
- int on_stack; /* 1 if allocated by alloca. */
- unsigned char *data;
-};
-
-/* Don't use alloca for allocating memory space larger than this, lest
- we overflow their stack. */
-#define MAX_ALLOCA 16*1024
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ int c;
-/* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */
-#define allocate_conversion_buffer(buf, len) \
- do { \
- if (len < MAX_ALLOCA) \
- { \
- buf.data = (unsigned char *) alloca (len); \
- buf.on_stack = 1; \
- } \
- else \
- { \
- buf.data = (unsigned char *) xmalloc (len); \
- buf.on_stack = 0; \
- } \
- buf.size = len; \
- } while (0)
+ if (coding->chars_at_source)
+ return;
-/* Double the allocated memory for *BUF. */
-static void
-extend_conversion_buffer (buf)
- struct conversion_buffer *buf;
-{
- if (buf->on_stack)
- {
- unsigned char *save = buf->data;
- buf->data = (unsigned char *) xmalloc (buf->size * 2);
- bcopy (save, buf->data, buf->size);
- buf->on_stack = 0;
- }
- else
+ while (charbuf < charbuf_end)
{
- buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
+ c = *charbuf;
+ if (c < 0)
+ charbuf += c;
+ else
+ *charbuf++ = translate_char (table, c);
}
- buf->size *= 2;
-}
-
-/* Free the allocated memory for BUF if it is not on stack. */
-static void
-free_conversion_buffer (buf)
- struct conversion_buffer *buf;
-{
- if (!buf->on_stack)
- xfree (buf->data);
}
-int
-ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
+static int
+produce_chars (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes, encodep;
{
- struct ccl_program *ccl
- = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
- unsigned char *dst = destination;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int produced;
+ int produced_chars = 0;
- ccl->suppress_error = coding->suppress_error;
- ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
- if (encodep)
- {
- /* On encoding, EOL format is converted within ccl_driver. For
- that, setup proper information in the structure CCL. */
- ccl->eol_type = coding->eol_type;
- if (ccl->eol_type ==CODING_EOL_UNDECIDED)
- ccl->eol_type = CODING_EOL_LF;
- ccl->cr_consumed = coding->spec.ccl.cr_carryover;
- ccl->eight_bit_control = coding->dst_multibyte;
- }
- else
- ccl->eight_bit_control = 1;
- ccl->multibyte = coding->src_multibyte;
- if (coding->spec.ccl.eight_bit_carryover[0] != 0)
+ if (! coding->chars_at_source)
{
- /* Move carryover bytes to DESTINATION. */
- unsigned char *p = coding->spec.ccl.eight_bit_carryover;
- while (*p)
- *dst++ = *p++;
- coding->spec.ccl.eight_bit_carryover[0] = 0;
- if (dst_bytes)
- dst_bytes -= dst - destination;
- }
+ /* Characters are in coding->charbuf. */
+ int *buf = coding->charbuf;
+ int *buf_end = buf + coding->charbuf_used;
+ unsigned char *adjusted_dst_end;
- coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
- &(coding->consumed))
- + dst - destination);
+ if (BUFFERP (coding->src_object)
+ && EQ (coding->src_object, coding->dst_object))
+ dst_end = ((unsigned char *) coding->source) + coding->consumed;
+ adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
- if (encodep)
- {
- coding->produced_char = coding->produced;
- coding->spec.ccl.cr_carryover = ccl->cr_consumed;
- }
- else if (!ccl->eight_bit_control)
- {
- /* The produced bytes forms a valid multibyte sequence. */
- coding->produced_char
- = multibyte_chars_in_text (destination, coding->produced);
- coding->spec.ccl.eight_bit_carryover[0] = 0;
- }
- else
- {
- /* On decoding, the destination should always multibyte. But,
- CCL program might have been generated an invalid multibyte
- sequence. Here we make such a sequence valid as
- multibyte. */
- int bytes
- = dst_bytes ? dst_bytes : source + coding->consumed - destination;
-
- if ((coding->consumed < src_bytes
- || !ccl->last_block)
- && coding->produced >= 1
- && destination[coding->produced - 1] >= 0x80)
+ while (buf < buf_end)
{
- /* We should not convert the tailing 8-bit codes to
- multibyte form even if they doesn't form a valid
- multibyte sequence. They may form a valid sequence in
- the next call. */
- int carryover = 0;
-
- if (destination[coding->produced - 1] < 0xA0)
- carryover = 1;
- else if (coding->produced >= 2)
+ int c = *buf++;
+
+ if (dst >= adjusted_dst_end)
{
- if (destination[coding->produced - 2] >= 0x80)
- {
- if (destination[coding->produced - 2] < 0xA0)
- carryover = 2;
- else if (coding->produced >= 3
- && destination[coding->produced - 3] >= 0x80
- && destination[coding->produced - 3] < 0xA0)
- carryover = 3;
- }
+ dst = alloc_destination (coding,
+ buf_end - buf + MAX_MULTIBYTE_LENGTH,
+ dst);
+ dst_end = coding->destination + coding->dst_bytes;
+ adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
}
- if (carryover > 0)
+ if (c >= 0)
{
- BCOPY_SHORT (destination + coding->produced - carryover,
- coding->spec.ccl.eight_bit_carryover,
- carryover);
- coding->spec.ccl.eight_bit_carryover[carryover] = 0;
- coding->produced -= carryover;
+ if (coding->dst_multibyte
+ || ! CHAR_BYTE8_P (c))
+ CHAR_STRING_ADVANCE (c, dst);
+ else
+ *dst++ = CHAR_TO_BYTE8 (c);
+ produced_chars++;
}
+ else
+ /* This is an annotation datum. (-C) is the length of
+ it. */
+ buf += -c - 1;
}
- coding->produced = str_as_multibyte (destination, bytes,
- coding->produced,
- &(coding->produced_char));
}
-
- switch (ccl->status)
+ else
{
- case CCL_STAT_SUSPEND_BY_SRC:
- coding->result = CODING_FINISH_INSUFFICIENT_SRC;
- break;
- case CCL_STAT_SUSPEND_BY_DST:
- coding->result = CODING_FINISH_INSUFFICIENT_DST;
- break;
- case CCL_STAT_QUIT:
- case CCL_STAT_INVALID_CMD:
- coding->result = CODING_FINISH_INTERRUPT;
- break;
- default:
- coding->result = CODING_FINISH_NORMAL;
- break;
- }
- return coding->result;
-}
+ const unsigned char *src = coding->source;
+ const unsigned char *src_end = src + coding->src_bytes;
+ Lisp_Object eol_type;
-/* Decode EOL format of the text at PTR of BYTES length destructively
- according to CODING->eol_type. This is called after the CCL
- program produced a decoded text at PTR. If we do CRLF->LF
- conversion, update CODING->produced and CODING->produced_char. */
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
-static void
-decode_eol_post_ccl (coding, ptr, bytes)
- struct coding_system *coding;
- unsigned char *ptr;
- int bytes;
-{
- Lisp_Object val, saved_coding_symbol;
- unsigned char *pend = ptr + bytes;
- int dummy;
-
- /* Remember the current coding system symbol. We set it back when
- an inconsistent EOL is found so that `last-coding-system-used' is
- set to the coding system that doesn't specify EOL conversion. */
- saved_coding_symbol = coding->symbol;
-
- coding->spec.ccl.cr_carryover = 0;
- if (coding->eol_type == CODING_EOL_UNDECIDED)
- {
- /* Here, to avoid the call of setup_coding_system, we directly
- call detect_eol_type. */
- coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
- if (coding->eol_type == CODING_EOL_INCONSISTENT)
- coding->eol_type = CODING_EOL_LF;
- if (coding->eol_type != CODING_EOL_UNDECIDED)
- {
- val = Fget (coding->symbol, Qeol_type);
- if (VECTORP (val) && XVECTOR (val)->size == 3)
- coding->symbol = XVECTOR (val)->contents[coding->eol_type];
- }
- coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
- }
-
- if (coding->eol_type == CODING_EOL_LF
- || coding->eol_type == CODING_EOL_UNDECIDED)
- {
- /* We have nothing to do. */
- ptr = pend;
- }
- else if (coding->eol_type == CODING_EOL_CRLF)
- {
- unsigned char *pstart = ptr, *p = ptr;
-
- if (! (coding->mode & CODING_MODE_LAST_BLOCK)
- && *(pend - 1) == '\r')
- {
- /* If the last character is CR, we can't handle it here
- because LF will be in the not-yet-decoded source text.
- Record that the CR is not yet processed. */
- coding->spec.ccl.cr_carryover = 1;
- coding->produced--;
- coding->produced_char--;
- pend--;
- }
- while (ptr < pend)
+ if (coding->src_multibyte != coding->dst_multibyte)
{
- if (*ptr == '\r')
+ if (coding->src_multibyte)
{
- if (ptr + 1 < pend && *(ptr + 1) == '\n')
- {
- *p++ = '\n';
- ptr += 2;
- }
- else
+ int multibytep = 1;
+ int consumed_chars;
+
+ while (1)
{
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- goto undo_eol_conversion;
- *p++ = *ptr++;
+ const unsigned char *src_base = src;
+ int c;
+
+ ONE_MORE_BYTE (c);
+ if (c == '\r')
+ {
+ if (EQ (eol_type, Qdos))
+ {
+ if (src == src_end)
+ {
+ coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ goto no_more_source;
+ }
+ if (*src == '\n')
+ c = *src++;
+ }
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
+ }
+ if (dst == dst_end)
+ {
+ coding->consumed = src - coding->source;
+
+ if (EQ (coding->src_object, coding->dst_object))
+ dst_end = (unsigned char *) src;
+ if (dst == dst_end)
+ {
+ dst = alloc_destination (coding, src_end - src + 1,
+ dst);
+ dst_end = coding->destination + coding->dst_bytes;
+ coding_set_source (coding);
+ src = coding->source + coding->consumed;
+ src_end = coding->source + coding->src_bytes;
+ }
+ }
+ *dst++ = c;
+ produced_chars++;
}
+ no_more_source:
+ ;
}
- else if (*ptr == '\n'
- && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- goto undo_eol_conversion;
else
- *p++ = *ptr++;
- continue;
+ while (src < src_end)
+ {
+ int multibytep = 1;
+ int c = *src++;
- undo_eol_conversion:
- /* We have faced with inconsistent EOL format at PTR.
- Convert all LFs before PTR back to CRLFs. */
- for (p--, ptr--; p >= pstart; p--)
- {
- if (*p == '\n')
- *ptr-- = '\n', *ptr-- = '\r';
- else
- *ptr-- = *p;
- }
- /* If carryover is recorded, cancel it because we don't
- convert CRLF anymore. */
- if (coding->spec.ccl.cr_carryover)
- {
- coding->spec.ccl.cr_carryover = 0;
- coding->produced++;
- coding->produced_char++;
- pend++;
- }
- p = ptr = pend;
- coding->eol_type = CODING_EOL_LF;
- coding->symbol = saved_coding_symbol;
+ if (c == '\r')
+ {
+ if (EQ (eol_type, Qdos))
+ {
+ if (src < src_end
+ && *src == '\n')
+ c = *src++;
+ }
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
+ }
+ if (dst >= dst_end - 1)
+ {
+ coding->consumed = src - coding->source;
+
+ if (EQ (coding->src_object, coding->dst_object))
+ dst_end = (unsigned char *) src;
+ if (dst >= dst_end - 1)
+ {
+ dst = alloc_destination (coding, src_end - src + 2,
+ dst);
+ dst_end = coding->destination + coding->dst_bytes;
+ coding_set_source (coding);
+ src = coding->source + coding->consumed;
+ src_end = coding->source + coding->src_bytes;
+ }
+ }
+ EMIT_ONE_BYTE (c);
+ }
}
- if (p < pend)
+ else
{
- /* As each two-byte sequence CRLF was converted to LF, (PEND
- - P) is the number of deleted characters. */
- coding->produced -= pend - p;
- coding->produced_char -= pend - p;
- }
- }
- else /* i.e. coding->eol_type == CODING_EOL_CR */
- {
- unsigned char *p = ptr;
+ if (!EQ (coding->src_object, coding->dst_object))
+ {
+ int require = coding->src_bytes - coding->dst_bytes;
- for (; ptr < pend; ptr++)
- {
- if (*ptr == '\r')
- *ptr = '\n';
- else if (*ptr == '\n'
- && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ if (require > 0)
+ {
+ EMACS_INT offset = src - coding->source;
+
+ dst = alloc_destination (coding, require, dst);
+ coding_set_source (coding);
+ src = coding->source + offset;
+ src_end = coding->source + coding->src_bytes;
+ }
+ }
+ produced_chars = coding->src_chars;
+ while (src < src_end)
{
- for (; p < ptr; p++)
+ int c = *src++;
+
+ if (c == '\r')
{
- if (*p == '\n')
- *p = '\r';
+ if (EQ (eol_type, Qdos))
+ {
+ if (src < src_end
+ && *src == '\n')
+ c = *src++;
+ produced_chars--;
+ }
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
}
- ptr = pend;
- coding->eol_type = CODING_EOL_LF;
- coding->symbol = saved_coding_symbol;
+ *dst++ = c;
}
}
+ coding->consumed = coding->src_bytes;
+ coding->consumed_char = coding->src_chars;
}
+
+ produced = dst - (coding->destination + coding->produced);
+ if (BUFFERP (coding->dst_object))
+ insert_from_gap (produced_chars, produced);
+ coding->produced += produced;
+ coding->produced_char += produced_chars;
+ return produced_chars;
}
-/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
- decoding, it may detect coding system and format of end-of-line if
- those are not yet decided. The source should be unibyte, the
- result is multibyte if CODING->dst_multibyte is nonzero, else
- unibyte. */
+/* Compose text in CODING->object according to the annotation data at
+ CHARBUF. CHARBUF is an array:
+ [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
+ */
-int
-decode_coding (coding, source, destination, src_bytes, dst_bytes)
+static INLINE void
+produce_composition (coding, charbuf)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
+ int *charbuf;
{
- int extra = 0;
-
- if (coding->type == coding_type_undecided)
- detect_coding (coding, source, src_bytes);
-
- if (coding->eol_type == CODING_EOL_UNDECIDED
- && coding->type != coding_type_ccl)
- {
- detect_eol (coding, source, src_bytes);
- /* We had better recover the original eol format if we
- encounter an inconsistent eol format while decoding. */
- coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
- }
-
- coding->produced = coding->produced_char = 0;
- coding->consumed = coding->consumed_char = 0;
- coding->errors = 0;
- coding->result = CODING_FINISH_NORMAL;
-
- switch (coding->type)
- {
- case coding_type_sjis:
- decode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 1);
- break;
-
- case coding_type_iso2022:
- decode_coding_iso2022 (coding, source, destination,
- src_bytes, dst_bytes);
- break;
-
- case coding_type_big5:
- decode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 0);
- break;
-
- case coding_type_emacs_mule:
- decode_coding_emacs_mule (coding, source, destination,
- src_bytes, dst_bytes);
- break;
-
- case coding_type_ccl:
- if (coding->spec.ccl.cr_carryover)
- {
- /* Put the CR which was not processed by the previous call
- of decode_eol_post_ccl in DESTINATION. It will be
- decoded together with the following LF by the call to
- decode_eol_post_ccl below. */
- *destination = '\r';
- coding->produced++;
- coding->produced_char++;
- dst_bytes--;
- extra = coding->spec.ccl.cr_carryover;
- }
- ccl_coding_driver (coding, source, destination + extra,
- src_bytes, dst_bytes, 0);
- if (coding->eol_type != CODING_EOL_LF)
- {
- coding->produced += extra;
- coding->produced_char += extra;
- decode_eol_post_ccl (coding, destination, coding->produced);
- }
- break;
-
- default:
- decode_eol (coding, source, destination, src_bytes, dst_bytes);
- }
+ int len;
+ EMACS_INT from, to;
+ enum composition_method method;
+ Lisp_Object components;
- if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->mode & CODING_MODE_LAST_BLOCK
- && coding->consumed == src_bytes)
- coding->result = CODING_FINISH_NORMAL;
+ len = -charbuf[0];
+ from = coding->dst_pos + charbuf[2];
+ to = coding->dst_pos + charbuf[3];
+ method = (enum composition_method) (charbuf[4]);
- if (coding->mode & CODING_MODE_LAST_BLOCK
- && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
+ if (method == COMPOSITION_RELATIVE)
+ components = Qnil;
+ else
{
- const unsigned char *src = source + coding->consumed;
- unsigned char *dst = destination + coding->produced;
-
- src_bytes -= coding->consumed;
- coding->errors++;
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
- while (src_bytes--)
- {
- int c = *src++;
- dst += CHAR_STRING (c, dst);
- coding->produced_char++;
- }
- coding->consumed = coding->consumed_char = src - source;
- coding->produced = dst - destination;
- coding->result = CODING_FINISH_NORMAL;
- }
+ Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
+ int i;
- if (!coding->dst_multibyte)
- {
- coding->produced = str_as_unibyte (destination, coding->produced);
- coding->produced_char = coding->produced;
+ len -= 5;
+ charbuf += 5;
+ for (i = 0; i < len; i++)
+ args[i] = make_number (charbuf[i]);
+ components = (method == COMPOSITION_WITH_ALTCHARS
+ ? Fstring (len, args) : Fvector (len, args));
}
-
- return coding->result;
+ compose_text (from, to, components, Qnil, coding->dst_object);
}
-/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". The
- multibyteness of the source is CODING->src_multibyte, the
- multibyteness of the result is always unibyte. */
-int
-encode_coding (coding, source, destination, src_bytes, dst_bytes)
+/* Put `charset' property on text in CODING->object according to
+ the annotation data at CHARBUF. CHARBUF is an array:
+ [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
+ */
+
+static INLINE void
+produce_charset (coding, charbuf)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
+ int *charbuf;
{
- coding->produced = coding->produced_char = 0;
- coding->consumed = coding->consumed_char = 0;
- coding->errors = 0;
- coding->result = CODING_FINISH_NORMAL;
-
- switch (coding->type)
- {
- case coding_type_sjis:
- encode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 1);
- break;
-
- case coding_type_iso2022:
- encode_coding_iso2022 (coding, source, destination,
- src_bytes, dst_bytes);
- break;
-
- case coding_type_big5:
- encode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 0);
- break;
+ EMACS_INT from = coding->dst_pos + charbuf[2];
+ EMACS_INT to = coding->dst_pos + charbuf[3];
+ struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
- case coding_type_emacs_mule:
- encode_coding_emacs_mule (coding, source, destination,
- src_bytes, dst_bytes);
- break;
-
- case coding_type_ccl:
- ccl_coding_driver (coding, source, destination,
- src_bytes, dst_bytes, 1);
- break;
-
- default:
- encode_eol (coding, source, destination, src_bytes, dst_bytes);
- }
-
- if (coding->mode & CODING_MODE_LAST_BLOCK
- && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
- {
- const unsigned char *src = source + coding->consumed;
- unsigned char *dst = destination + coding->produced;
-
- if (coding->type == coding_type_iso2022)
- ENCODE_RESET_PLANE_AND_REGISTER;
- if (COMPOSING_P (coding))
- *dst++ = ISO_CODE_ESC, *dst++ = '1';
- if (coding->consumed < src_bytes)
- {
- int len = src_bytes - coding->consumed;
-
- BCOPY_SHORT (src, dst, len);
- if (coding->src_multibyte)
- len = str_as_unibyte (dst, len);
- dst += len;
- coding->consumed = src_bytes;
- }
- coding->produced = coding->produced_char = dst - destination;
- coding->result = CODING_FINISH_NORMAL;
- }
+ Fput_text_property (make_number (from), make_number (to),
+ Qcharset, CHARSET_NAME (charset),
+ coding->dst_object);
+}
- if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->consumed == src_bytes)
- coding->result = CODING_FINISH_NORMAL;
- return coding->result;
-}
+#define CHARBUF_SIZE 0x4000
-/* Scan text in the region between *BEG and *END (byte positions),
- skip characters which we don't have to decode by coding system
- CODING at the head and tail, then set *BEG and *END to the region
- of the text we actually have to convert. The caller should move
- the gap out of the region in advance if the region is from a
- buffer.
+#define ALLOC_CONVERSION_WORK_AREA(coding) \
+ do { \
+ int size = CHARBUF_SIZE;; \
+ \
+ coding->charbuf = NULL; \
+ while (size > 1024) \
+ { \
+ coding->charbuf = (int *) alloca (sizeof (int) * size); \
+ if (coding->charbuf) \
+ break; \
+ size >>= 1; \
+ } \
+ if (! coding->charbuf) \
+ { \
+ coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
+ return coding->result; \
+ } \
+ coding->charbuf_size = size; \
+ } while (0)
- If STR is not NULL, *BEG and *END are indices into STR. */
static void
-shrink_decoding_region (beg, end, coding, str)
- int *beg, *end;
+produce_annotation (coding)
struct coding_system *coding;
- unsigned char *str;
{
- unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
- int eol_conversion;
- Lisp_Object translation_table;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
- if (coding->type == coding_type_ccl
- || coding->type == coding_type_undecided
- || coding->eol_type != CODING_EOL_LF
- || !NILP (coding->post_read_conversion)
- || coding->composing != COMPOSITION_DISABLED)
- {
- /* We can't skip any data. */
- return;
- }
- if (coding->type == coding_type_no_conversion
- || coding->type == coding_type_raw_text
- || coding->type == coding_type_emacs_mule)
- {
- /* We need no conversion, but don't have to skip any data here.
- Decoding routine handles them effectively anyway. */
- return;
- }
-
- translation_table = coding->translation_table_for_decode;
- if (NILP (translation_table) && !NILP (Venable_character_translation))
- translation_table = Vstandard_translation_table_for_decode;
- if (CHAR_TABLE_P (translation_table))
- {
- int i;
- for (i = 0; i < 128; i++)
- if (!NILP (CHAR_TABLE_REF (translation_table, i)))
- break;
- if (i < 128)
- /* Some ASCII character should be translated. We give up
- shrinking. */
- return;
- }
-
- if (coding->heading_ascii >= 0)
- /* Detection routine has already found how much we can skip at the
- head. */
- *beg += coding->heading_ascii;
-
- if (str)
- {
- begp_orig = begp = str + *beg;
- endp_orig = endp = str + *end;
- }
- else
- {
- begp_orig = begp = BYTE_POS_ADDR (*beg);
- endp_orig = endp = begp + *end - *beg;
- }
-
- eol_conversion = (coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF);
+ if (NILP (coding->dst_object))
+ return;
- switch (coding->type)
+ while (charbuf < charbuf_end)
{
- case coding_type_sjis:
- case coding_type_big5:
- /* We can skip all ASCII characters at the head. */
- if (coding->heading_ascii < 0)
- {
- if (eol_conversion)
- while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
- else
- while (begp < endp && *begp < 0x80) begp++;
- }
- /* We can skip all ASCII characters at the tail except for the
- second byte of SJIS or BIG5 code. */
- if (eol_conversion)
- while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
+ if (*charbuf >= 0)
+ charbuf++;
else
- while (begp < endp && endp[-1] < 0x80) endp--;
- /* Do not consider LF as ascii if preceded by CR, since that
- confuses eol decoding. */
- if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
- endp++;
- if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
- endp++;
- break;
-
- case coding_type_iso2022:
- if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
- /* We can't skip any data. */
- break;
- if (coding->heading_ascii < 0)
{
- /* We can skip all ASCII characters at the head except for a
- few control codes. */
- while (begp < endp && (c = *begp) < 0x80
- && c != ISO_CODE_CR && c != ISO_CODE_SO
- && c != ISO_CODE_SI && c != ISO_CODE_ESC
- && (!eol_conversion || c != ISO_CODE_LF))
- begp++;
+ int len = -*charbuf;
+ switch (charbuf[1])
+ {
+ case CODING_ANNOTATE_COMPOSITION_MASK:
+ produce_composition (coding, charbuf);
+ break;
+ case CODING_ANNOTATE_CHARSET_MASK:
+ produce_charset (coding, charbuf);
+ break;
+ default:
+ abort ();
+ }
+ charbuf += len;
}
- switch (coding->category_idx)
- {
- case CODING_CATEGORY_IDX_ISO_8_1:
- case CODING_CATEGORY_IDX_ISO_8_2:
- /* We can skip all ASCII characters at the tail. */
- if (eol_conversion)
- while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
- else
- while (begp < endp && endp[-1] < 0x80) endp--;
- /* Do not consider LF as ascii if preceded by CR, since that
- confuses eol decoding. */
- if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
- endp++;
- break;
+ }
+}
- case CODING_CATEGORY_IDX_ISO_7:
- case CODING_CATEGORY_IDX_ISO_7_TIGHT:
- {
- /* We can skip all characters at the tail except for 8-bit
- codes and ESC and the following 2-byte at the tail. */
- unsigned char *eight_bit = NULL;
+/* Decode the data at CODING->src_object into CODING->dst_object.
+ CODING->src_object is a buffer, a string, or nil.
+ CODING->dst_object is a buffer.
- if (eol_conversion)
- while (begp < endp
- && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
- {
- if (!eight_bit && c & 0x80) eight_bit = endp;
- endp--;
- }
- else
- while (begp < endp
- && (c = endp[-1]) != ISO_CODE_ESC)
- {
- if (!eight_bit && c & 0x80) eight_bit = endp;
- endp--;
- }
- /* Do not consider LF as ascii if preceded by CR, since that
- confuses eol decoding. */
- if (begp < endp && endp < endp_orig
- && endp[-1] == '\r' && endp[0] == '\n')
- endp++;
- if (begp < endp && endp[-1] == ISO_CODE_ESC)
- {
- if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
- /* This is an ASCII designation sequence. We can
- surely skip the tail. But, if we have
- encountered an 8-bit code, skip only the codes
- after that. */
- endp = eight_bit ? eight_bit : endp + 2;
- else
- /* Hmmm, we can't skip the tail. */
- endp = endp_orig;
- }
- else if (eight_bit)
- endp = eight_bit;
- }
- }
- break;
+ If CODING->src_object is a buffer, it must be the current buffer.
+ In this case, if CODING->src_pos is positive, it is a position of
+ the source text in the buffer, otherwise, the source text is in the
+ gap area of the buffer, and CODING->src_pos specifies the offset of
+ the text from GPT (which must be the same as PT). If this is the
+ same buffer as CODING->dst_object, CODING->src_pos must be
+ negative.
- default:
- abort ();
- }
- *beg += begp - begp_orig;
- *end += endp - endp_orig;
- return;
-}
+ If CODING->src_object is a string, CODING->src_pos in an index to
+ that string.
-/* Like shrink_decoding_region but for encoding. */
+ If CODING->src_object is nil, CODING->source must already point to
+ the non-relocatable memory area. In this case, CODING->src_pos is
+ an offset from CODING->source.
-static void
-shrink_encoding_region (beg, end, coding, str)
- int *beg, *end;
+ The decoded data is inserted at the current point of the buffer
+ CODING->dst_object.
+*/
+
+static int
+decode_coding (coding)
struct coding_system *coding;
- unsigned char *str;
{
- unsigned char *begp_orig, *begp, *endp_orig, *endp;
- int eol_conversion;
- Lisp_Object translation_table;
+ Lisp_Object attrs;
- if (coding->type == coding_type_ccl
- || coding->eol_type == CODING_EOL_CRLF
- || coding->eol_type == CODING_EOL_CR
- || (coding->cmp_data && coding->cmp_data->used > 0))
- {
- /* We can't skip any data. */
- return;
- }
- if (coding->type == coding_type_no_conversion
- || coding->type == coding_type_raw_text
- || coding->type == coding_type_emacs_mule
- || coding->type == coding_type_undecided)
- {
- /* We need no conversion, but don't have to skip any data here.
- Encoding routine handles them effectively anyway. */
- return;
- }
+ if (BUFFERP (coding->src_object)
+ && coding->src_pos > 0
+ && coding->src_pos < GPT
+ && coding->src_pos + coding->src_chars > GPT)
+ move_gap_both (coding->src_pos, coding->src_pos_byte);
- translation_table = coding->translation_table_for_encode;
- if (NILP (translation_table) && !NILP (Venable_character_translation))
- translation_table = Vstandard_translation_table_for_encode;
- if (CHAR_TABLE_P (translation_table))
+ if (BUFFERP (coding->dst_object))
{
- int i;
- for (i = 0; i < 128; i++)
- if (!NILP (CHAR_TABLE_REF (translation_table, i)))
- break;
- if (i < 128)
- /* Some ASCII character should be translated. We give up
- shrinking. */
- return;
+ if (current_buffer != XBUFFER (coding->dst_object))
+ set_buffer_internal (XBUFFER (coding->dst_object));
+ if (GPT != PT)
+ move_gap_both (PT, PT_BYTE);
}
- if (str)
- {
- begp_orig = begp = str + *beg;
- endp_orig = endp = str + *end;
- }
- else
- {
- begp_orig = begp = BYTE_POS_ADDR (*beg);
- endp_orig = endp = begp + *end - *beg;
- }
+ coding->consumed = coding->consumed_char = 0;
+ coding->produced = coding->produced_char = 0;
+ coding->chars_at_source = 0;
+ coding->result = CODING_RESULT_SUCCESS;
+ coding->errors = 0;
- eol_conversion = (coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF);
+ ALLOC_CONVERSION_WORK_AREA (coding);
- /* Here, we don't have to check coding->pre_write_conversion because
- the caller is expected to have handled it already. */
- switch (coding->type)
+ attrs = CODING_ID_ATTRS (coding->id);
+
+ do
{
- case coding_type_iso2022:
- if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
- /* We can't skip any data. */
- break;
- if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
+ coding_set_source (coding);
+ coding->annotated = 0;
+ (*(coding->decoder)) (coding);
+ if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
+ translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
+ else if (!NILP (Vstandard_translation_table_for_decode))
+ translate_chars (coding, Vstandard_translation_table_for_decode);
+ coding_set_destination (coding);
+ produce_chars (coding);
+ if (coding->annotated)
+ produce_annotation (coding);
+ }
+ while (coding->consumed < coding->src_bytes
+ && ! coding->result);
+
+ if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
+ && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
+ && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
+ decode_eol (coding);
+
+ coding->carryover_bytes = 0;
+ if (coding->consumed < coding->src_bytes)
+ {
+ int nbytes = coding->src_bytes - coding->consumed;
+ const unsigned char *src;
+
+ coding_set_source (coding);
+ coding_set_destination (coding);
+ src = coding->source + coding->consumed;
+
+ if (coding->mode & CODING_MODE_LAST_BLOCK)
{
- unsigned char *bol = begp;
- while (begp < endp && *begp < 0x80)
+ /* Flush out unprocessed data as binary chars. We are sure
+ that the number of data is less than the size of
+ coding->charbuf. */
+ while (nbytes-- > 0)
{
- begp++;
- if (begp[-1] == '\n')
- bol = begp;
+ int c = *src++;
+
+ coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
}
- begp = bol;
- goto label_skip_tail;
+ produce_chars (coding);
}
- /* fall down ... */
-
- case coding_type_sjis:
- case coding_type_big5:
- /* We can skip all ASCII characters at the head and tail. */
- if (eol_conversion)
- while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
- else
- while (begp < endp && *begp < 0x80) begp++;
- label_skip_tail:
- if (eol_conversion)
- while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
else
- while (begp < endp && *(endp - 1) < 0x80) endp--;
- break;
-
- default:
- abort ();
+ {
+ /* Record unprocessed bytes in coding->carryover. We are
+ sure that the number of data is less than the size of
+ coding->carryover. */
+ unsigned char *p = coding->carryover;
+
+ coding->carryover_bytes = nbytes;
+ while (nbytes-- > 0)
+ *p++ = *src++;
+ }
+ coding->consumed = coding->src_bytes;
}
- *beg += begp - begp_orig;
- *end += endp - endp_orig;
- return;
+ return coding->result;
}
-/* As shrinking conversion region requires some overhead, we don't try
- shrinking if the length of conversion region is less than this
- value. */
-static int shrink_conversion_region_threshhold = 1024;
-#define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \
- do { \
- if (*(end) - *(beg) > shrink_conversion_region_threshhold) \
- { \
- if (encodep) shrink_encoding_region (beg, end, coding, str); \
- else shrink_decoding_region (beg, end, coding, str); \
- } \
- } while (0)
+/* Extract an annotation datum from a composition starting at POS and
+ ending before LIMIT of CODING->src_object (buffer or string), store
+ the data in BUF, set *STOP to a starting position of the next
+ composition (if any) or to LIMIT, and return the address of the
+ next element of BUF.
-static Lisp_Object
-code_convert_region_unwind (arg)
- Lisp_Object arg;
-{
- inhibit_pre_post_conversion = 0;
- Vlast_coding_system_used = arg;
- return Qnil;
-}
+ If such an annotation is not found, set *STOP to a starting
+ position of a composition after POS (if any) or to LIMIT, and
+ return BUF. */
-/* Store information about all compositions in the range FROM and TO
- of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a
- buffer or a string, defaults to the current buffer. */
-
-void
-coding_save_composition (coding, from, to, obj)
+static INLINE int *
+handle_composition_annotation (pos, limit, coding, buf, stop)
+ EMACS_INT pos, limit;
struct coding_system *coding;
- int from, to;
- Lisp_Object obj;
+ int *buf;
+ EMACS_INT *stop;
{
+ EMACS_INT start, end;
Lisp_Object prop;
- int start, end;
- if (coding->composing == COMPOSITION_DISABLED)
- return;
- if (!coding->cmp_data)
- coding_allocate_composition_data (coding, from);
- if (!find_composition (from, to, &start, &end, &prop, obj)
- || end > to)
- return;
- if (start < from
- && (!find_composition (end, to, &start, &end, &prop, obj)
- || end > to))
- return;
- coding->composing = COMPOSITION_NO;
- do
+ if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
+ || end > limit)
+ *stop = limit;
+ else if (start > pos)
+ *stop = start;
+ else
{
- if (COMPOSITION_VALID_P (start, end, prop))
+ if (start == pos)
{
+ /* We found a composition. Store the corresponding
+ annotation data in BUF. */
+ int *head = buf;
enum composition_method method = COMPOSITION_METHOD (prop);
- if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
- >= COMPOSITION_DATA_SIZE)
- coding_allocate_composition_data (coding, from);
- /* For relative composition, we remember start and end
- positions, for the other compositions, we also remember
- components. */
- CODING_ADD_COMPOSITION_START (coding, start - from, method);
+ int nchars = COMPOSITION_LENGTH (prop);
+
+ ADD_COMPOSITION_DATA (buf, 0, nchars, method);
if (method != COMPOSITION_RELATIVE)
{
- /* We must store a*/
- Lisp_Object val, ch;
+ Lisp_Object components;
+ int len, i, i_byte;
- val = COMPOSITION_COMPONENTS (prop);
- if (CONSP (val))
- while (CONSP (val))
- {
- ch = XCAR (val), val = XCDR (val);
- CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
- }
- else if (VECTORP (val) || STRINGP (val))
+ components = COMPOSITION_COMPONENTS (prop);
+ if (VECTORP (components))
{
- int len = (VECTORP (val)
- ? XVECTOR (val)->size : SCHARS (val));
- int i;
+ len = XVECTOR (components)->size;
for (i = 0; i < len; i++)
+ *buf++ = XINT (AREF (components, i));
+ }
+ else if (STRINGP (components))
+ {
+ len = SCHARS (components);
+ i = i_byte = 0;
+ while (i < len)
{
- ch = (STRINGP (val)
- ? Faref (val, make_number (i))
- : XVECTOR (val)->contents[i]);
- CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
+ FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
+ buf++;
}
}
- else /* INTEGERP (val) */
- CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
+ else if (INTEGERP (components))
+ {
+ len = 1;
+ *buf++ = XINT (components);
+ }
+ else if (CONSP (components))
+ {
+ for (len = 0; CONSP (components);
+ len++, components = XCDR (components))
+ *buf++ = XINT (XCAR (components));
+ }
+ else
+ abort ();
+ *head -= len;
}
- CODING_ADD_COMPOSITION_END (coding, end - from);
}
- start = end;
+
+ if (find_composition (end, limit, &start, &end, &prop,
+ coding->src_object)
+ && end <= limit)
+ *stop = start;
+ else
+ *stop = limit;
}
- while (start < to
- && find_composition (start, to, &start, &end, &prop, obj)
- && end <= to);
+ return buf;
+}
+
+
+/* Extract an annotation datum from a text property `charset' at POS of
+ CODING->src_object (buffer of string), store the data in BUF, set
+ *STOP to the position where the value of `charset' property changes
+ (limiting by LIMIT), and return the address of the next element of
+ BUF.
+
+ If the property value is nil, set *STOP to the position where the
+ property value is non-nil (limiting by LIMIT), and return BUF. */
- /* Make coding->cmp_data point to the first memory block. */
- while (coding->cmp_data->prev)
- coding->cmp_data = coding->cmp_data->prev;
- coding->cmp_data_start = 0;
+static INLINE int *
+handle_charset_annotation (pos, limit, coding, buf, stop)
+ EMACS_INT pos, limit;
+ struct coding_system *coding;
+ int *buf;
+ EMACS_INT *stop;
+{
+ Lisp_Object val, next;
+ int id;
+
+ val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
+ if (! NILP (val) && CHARSETP (val))
+ id = XINT (CHARSET_SYMBOL_ID (val));
+ else
+ id = -1;
+ ADD_CHARSET_DATA (buf, 0, 0, id);
+ next = Fnext_single_property_change (make_number (pos), Qcharset,
+ coding->src_object,
+ make_number (limit));
+ *stop = XINT (next);
+ return buf;
}
-/* Reflect the saved information about compositions to OBJ.
- CODING->cmp_data points to a memory block for the information. OBJ
- is a buffer or a string, defaults to the current buffer. */
-void
-coding_restore_composition (coding, obj)
+static void
+consume_chars (coding)
struct coding_system *coding;
- Lisp_Object obj;
{
- struct composition_data *cmp_data = coding->cmp_data;
+ int *buf = coding->charbuf;
+ int *buf_end = coding->charbuf + coding->charbuf_size;
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ EMACS_INT pos = coding->src_pos + coding->consumed_char;
+ EMACS_INT end_pos = coding->src_pos + coding->src_chars;
+ int multibytep = coding->src_multibyte;
+ Lisp_Object eol_type;
+ int c;
+ EMACS_INT stop, stop_composition, stop_charset;
- if (!cmp_data)
- return;
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (VECTORP (eol_type))
+ eol_type = Qunix;
+
+ /* Note: composition handling is not yet implemented. */
+ coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
- while (cmp_data->prev)
- cmp_data = cmp_data->prev;
+ if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
+ stop = stop_composition = pos;
+ else
+ stop = stop_composition = end_pos;
+ if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
+ stop = stop_charset = pos;
+ else
+ stop_charset = end_pos;
- while (cmp_data)
+ /* Compensate for CRLF and annotation. */
+ buf_end -= 1 + MAX_ANNOTATION_LENGTH;
+ while (buf < buf_end)
{
- int i;
+ if (pos == stop)
+ {
+ if (pos == end_pos)
+ break;
+ if (pos == stop_composition)
+ buf = handle_composition_annotation (pos, end_pos, coding,
+ buf, &stop_composition);
+ if (pos == stop_charset)
+ buf = handle_charset_annotation (pos, end_pos, coding,
+ buf, &stop_charset);
+ stop = (stop_composition < stop_charset
+ ? stop_composition : stop_charset);
+ }
- for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
- i += cmp_data->data[i])
+ if (! multibytep)
{
- int *data = cmp_data->data + i;
- enum composition_method method = (enum composition_method) data[3];
- Lisp_Object components;
+ EMACS_INT bytes;
- if (method == COMPOSITION_RELATIVE)
- components = Qnil;
+ if (! CODING_FOR_UNIBYTE (coding)
+ && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
+ c = STRING_CHAR_ADVANCE (src), pos += bytes;
else
+ c = *src++, pos++;
+ }
+ else
+ c = STRING_CHAR_ADVANCE (src), pos++;
+ if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
+ c = '\n';
+ if (! EQ (eol_type, Qunix))
+ {
+ if (c == '\n')
{
- int len = data[0] - 4, j;
- Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
-
- if (method == COMPOSITION_WITH_RULE_ALTCHARS
- && len % 2 == 0)
- len --;
- for (j = 0; j < len; j++)
- args[j] = make_number (data[4 + j]);
- components = (method == COMPOSITION_WITH_ALTCHARS
- ? Fstring (len, args) : Fvector (len, args));
+ if (EQ (eol_type, Qdos))
+ *buf++ = '\r';
+ else
+ c = '\r';
}
- compose_text (data[1], data[2], components, Qnil, obj);
}
- cmp_data = cmp_data->next;
+ *buf++ = c;
}
+
+ coding->consumed = src - coding->source;
+ coding->consumed_char = pos - coding->src_pos;
+ coding->charbuf_used = buf - coding->charbuf;
+ coding->chars_at_source = 0;
}
-/* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
- text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
- coding system CODING, and return the status code of code conversion
- (currently, this value has no meaning).
- How many characters (and bytes) are converted to how many
- characters (and bytes) are recorded in members of the structure
- CODING.
+/* Encode the text at CODING->src_object into CODING->dst_object.
+ CODING->src_object is a buffer or a string.
+ CODING->dst_object is a buffer or nil.
- If REPLACE is nonzero, we do various things as if the original text
- is deleted and a new text is inserted. See the comments in
- replace_range (insdel.c) to know what we are doing.
+ If CODING->src_object is a buffer, it must be the current buffer.
+ In this case, if CODING->src_pos is positive, it is a position of
+ the source text in the buffer, otherwise. the source text is in the
+ gap area of the buffer, and coding->src_pos specifies the offset of
+ the text from GPT (which must be the same as PT). If this is the
+ same buffer as CODING->dst_object, CODING->src_pos must be
+ negative and CODING should not have `pre-write-conversion'.
- If REPLACE is zero, it is assumed that the source text is unibyte.
- Otherwise, it is assumed that the source text is multibyte. */
+ If CODING->src_object is a string, CODING should not have
+ `pre-write-conversion'.
-int
-code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
- int from, from_byte, to, to_byte, encodep, replace;
+ If CODING->dst_object is a buffer, the encoded data is inserted at
+ the current point of that buffer.
+
+ If CODING->dst_object is nil, the encoded data is placed at the
+ memory area specified by CODING->destination. */
+
+static int
+encode_coding (coding)
struct coding_system *coding;
{
- int len = to - from, len_byte = to_byte - from_byte;
- int nchars_del = 0, nbytes_del = 0;
- int require, inserted, inserted_byte;
- int head_skip, tail_skip, total_skip = 0;
- Lisp_Object saved_coding_symbol;
- int first = 1;
- unsigned char *src, *dst;
- Lisp_Object deletion;
- int orig_point = PT, orig_len = len;
- int prev_Z;
- int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
+ Lisp_Object attrs;
- deletion = Qnil;
- saved_coding_symbol = coding->symbol;
+ attrs = CODING_ID_ATTRS (coding->id);
- if (from < PT && PT < to)
+ if (BUFFERP (coding->dst_object))
{
- TEMP_SET_PT_BOTH (from, from_byte);
- orig_point = from;
+ set_buffer_internal (XBUFFER (coding->dst_object));
+ coding->dst_multibyte
+ = ! NILP (current_buffer->enable_multibyte_characters);
}
- if (replace)
- {
- int saved_from = from;
- int saved_inhibit_modification_hooks;
+ coding->consumed = coding->consumed_char = 0;
+ coding->produced = coding->produced_char = 0;
+ coding->result = CODING_RESULT_SUCCESS;
+ coding->errors = 0;
- prepare_to_modify_buffer (from, to, &from);
- if (saved_from != from)
- {
- to = from + len;
- from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
- len_byte = to_byte - from_byte;
- }
+ ALLOC_CONVERSION_WORK_AREA (coding);
- /* The code conversion routine can not preserve text properties
- for now. So, we must remove all text properties in the
- region. Here, we must suppress all modification hooks. */
- saved_inhibit_modification_hooks = inhibit_modification_hooks;
- inhibit_modification_hooks = 1;
- Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
- inhibit_modification_hooks = saved_inhibit_modification_hooks;
- }
+ do {
+ coding_set_source (coding);
+ consume_chars (coding);
- if (! encodep && CODING_REQUIRE_DETECTION (coding))
- {
- /* We must detect encoding of text and eol format. */
+ if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
+ translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
+ else if (!NILP (Vstandard_translation_table_for_encode))
+ translate_chars (coding, Vstandard_translation_table_for_encode);
- if (from < GPT && to > GPT)
- move_gap_both (from, from_byte);
- if (coding->type == coding_type_undecided)
- {
- detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
- if (coding->type == coding_type_undecided)
- {
- /* It seems that the text contains only ASCII, but we
- should not leave it undecided because the deeper
- decoding routine (decode_coding) tries to detect the
- encodings again in vain. */
- coding->type = coding_type_emacs_mule;
- coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
- /* As emacs-mule decoder will handle composition, we
- need this setting to allocate coding->cmp_data
- later. */
- coding->composing = COMPOSITION_NO;
- }
- }
- if (coding->eol_type == CODING_EOL_UNDECIDED
- && coding->type != coding_type_ccl)
- {
- detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
- if (coding->eol_type == CODING_EOL_UNDECIDED)
- coding->eol_type = CODING_EOL_LF;
- /* We had better recover the original eol format if we
- encounter an inconsistent eol format while decoding. */
- coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
- }
- }
+ coding_set_destination (coding);
+ (*(coding->encoder)) (coding);
+ } while (coding->consumed_char < coding->src_chars);
- /* Now we convert the text. */
+ if (BUFFERP (coding->dst_object))
+ insert_from_gap (coding->produced_char, coding->produced);
- /* For encoding, we must process pre-write-conversion in advance. */
- if (! inhibit_pre_post_conversion
- && encodep
- && SYMBOLP (coding->pre_write_conversion)
- && ! NILP (Ffboundp (coding->pre_write_conversion)))
- {
- /* The function in pre-write-conversion may put a new text in a
- new buffer. */
- struct buffer *prev = current_buffer;
- Lisp_Object new;
+ return (coding->result);
+}
- record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
- /* We should not call any more pre-write/post-read-conversion
- functions while this pre-write-conversion is running. */
- inhibit_pre_post_conversion = 1;
- call2 (coding->pre_write_conversion,
- make_number (from), make_number (to));
- inhibit_pre_post_conversion = 0;
- /* Discard the unwind protect. */
- specpdl_ptr--;
- if (current_buffer != prev)
- {
- len = ZV - BEGV;
- new = Fcurrent_buffer ();
- set_buffer_internal_1 (prev);
- del_range_2 (from, from_byte, to, to_byte, 0);
- TEMP_SET_PT_BOTH (from, from_byte);
- insert_from_buffer (XBUFFER (new), 1, len, 0);
- Fkill_buffer (new);
- if (orig_point >= to)
- orig_point += len - orig_len;
- else if (orig_point > from)
- orig_point = from;
- orig_len = len;
- to = from + len;
- from_byte = CHAR_TO_BYTE (from);
- to_byte = CHAR_TO_BYTE (to);
- len_byte = to_byte - from_byte;
- TEMP_SET_PT_BOTH (from, from_byte);
- }
- }
+/* Stack of working buffers used in code conversion. An nil element
+ means that the code conversion of that level is not using a working
+ buffer. */
+Lisp_Object Vcode_conversion_work_buf_list;
+
+/* A working buffer used by the top level conversion. */
+Lisp_Object Vcode_conversion_reused_work_buf;
+
+
+/* Return a working buffer that can be freely used by the following
+ code conversion. MULTIBYTEP specifies the multibyteness of the
+ buffer. */
+
+Lisp_Object
+make_conversion_work_buffer (multibytep, depth)
+ int multibytep, depth;
+{
+ struct buffer *current = current_buffer;
+ Lisp_Object buf, name;
- if (replace)
+ if (depth == 0)
+ {
+ if (NILP (Vcode_conversion_reused_work_buf))
+ Vcode_conversion_reused_work_buf
+ = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
+ buf = Vcode_conversion_reused_work_buf;
+ }
+ else
{
- if (! EQ (current_buffer->undo_list, Qt))
- deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
+ if (depth < 0)
+ {
+ name = build_string (" *code-converting-work*");
+ name = Fgenerate_new_buffer_name (name, Qnil);
+ }
else
{
- nchars_del = to - from;
- nbytes_del = to_byte - from_byte;
+ char str[128];
+
+ sprintf (str, " *code-converting-work*<%d>", depth);
+ name = build_string (str);
}
+ buf = Fget_buffer_create (name);
}
+ set_buffer_internal (XBUFFER (buf));
+ current_buffer->undo_list = Qt;
+ Ferase_buffer ();
+ Fset_buffer_multibyte (multibytep ? Qt : Qnil);
+ set_buffer_internal (current);
+ return buf;
+}
+
+static Lisp_Object
+code_conversion_restore (buffer)
+ Lisp_Object buffer;
+{
+ Lisp_Object workbuf;
+
+ workbuf = XCAR (Vcode_conversion_work_buf_list);
+ if (! NILP (workbuf)
+ && ! EQ (workbuf, Vcode_conversion_reused_work_buf)
+ && ! NILP (Fbuffer_live_p (workbuf)))
+ Fkill_buffer (workbuf);
+ Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
+ set_buffer_internal (XBUFFER (buffer));
+ return Qnil;
+}
+
+static Lisp_Object
+code_conversion_save (buffer, with_work_buf, multibyte)
+ Lisp_Object buffer;
+ int with_work_buf, multibyte;
+{
+ Lisp_Object workbuf;
- if (coding->composing != COMPOSITION_DISABLED)
+ if (with_work_buf)
{
- if (encodep)
- coding_save_composition (coding, from, to, Fcurrent_buffer ());
- else
- coding_allocate_composition_data (coding, from);
+ int depth = XINT (Flength (Vcode_conversion_work_buf_list));
+
+ workbuf = make_conversion_work_buffer (multibyte, depth);
}
+ else
+ workbuf = Qnil;
+ Vcode_conversion_work_buf_list
+ = Fcons (workbuf, Vcode_conversion_work_buf_list);
+ record_unwind_protect (code_conversion_restore, buffer);
+ return workbuf;
+}
- /* Try to skip the heading and tailing ASCIIs. */
- if (coding->type != coding_type_ccl)
- {
- int from_byte_orig = from_byte, to_byte_orig = to_byte;
+int
+decode_coding_gap (coding, chars, bytes)
+ struct coding_system *coding;
+ EMACS_INT chars, bytes;
+{
+ int count = specpdl_ptr - specpdl;
+ Lisp_Object attrs;
+ Lisp_Object buffer;
- if (from < GPT && GPT < to)
- move_gap_both (from, from_byte);
- SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
- if (from_byte == to_byte
- && (encodep || NILP (coding->post_read_conversion))
- && ! CODING_REQUIRE_FLUSHING (coding))
- {
- coding->produced = len_byte;
- coding->produced_char = len;
- if (!replace)
- /* We must record and adjust for this new text now. */
- adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
- return 0;
- }
+ buffer = Fcurrent_buffer ();
+ code_conversion_save (buffer, 0, 0);
+
+ coding->src_object = buffer;
+ coding->src_chars = chars;
+ coding->src_bytes = bytes;
+ coding->src_pos = -chars;
+ coding->src_pos_byte = -bytes;
+ coding->src_multibyte = chars < bytes;
+ coding->dst_object = buffer;
+ coding->dst_pos = PT;
+ coding->dst_pos_byte = PT_BYTE;
+ coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+ coding->mode |= CODING_MODE_LAST_BLOCK;
+
+ if (CODING_REQUIRE_DETECTION (coding))
+ detect_coding (coding);
- head_skip = from_byte - from_byte_orig;
- tail_skip = to_byte_orig - to_byte;
- total_skip = head_skip + tail_skip;
- from += head_skip;
- to -= tail_skip;
- len -= total_skip; len_byte -= total_skip;
+ decode_coding (coding);
+
+ attrs = CODING_ID_ATTRS (coding->id);
+ if (! NILP (CODING_ATTR_POST_READ (attrs)))
+ {
+ EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
+ Lisp_Object val;
+
+ TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
+ val = call1 (CODING_ATTR_POST_READ (attrs),
+ make_number (coding->produced_char));
+ CHECK_NATNUM (val);
+ coding->produced_char += Z - prev_Z;
+ coding->produced += Z_BYTE - prev_Z_BYTE;
}
- /* For conversion, we must put the gap before the text in addition to
- making the gap larger for efficient decoding. The required gap
- size starts from 2000 which is the magic number used in make_gap.
- But, after one batch of conversion, it will be incremented if we
- find that it is not enough . */
- require = 2000;
+ unbind_to (count, Qnil);
+ return coding->result;
+}
- if (GAP_SIZE < require)
- make_gap (require - GAP_SIZE);
- move_gap_both (from, from_byte);
+int
+encode_coding_gap (coding, chars, bytes)
+ struct coding_system *coding;
+ EMACS_INT chars, bytes;
+{
+ int count = specpdl_ptr - specpdl;
+ Lisp_Object buffer;
- inserted = inserted_byte = 0;
+ buffer = Fcurrent_buffer ();
+ code_conversion_save (buffer, 0, 0);
- GAP_SIZE += len_byte;
- ZV -= len;
- Z -= len;
- ZV_BYTE -= len_byte;
- Z_BYTE -= len_byte;
+ coding->src_object = buffer;
+ coding->src_chars = chars;
+ coding->src_bytes = bytes;
+ coding->src_pos = -chars;
+ coding->src_pos_byte = -bytes;
+ coding->src_multibyte = chars < bytes;
+ coding->dst_object = coding->src_object;
+ coding->dst_pos = PT;
+ coding->dst_pos_byte = PT_BYTE;
- if (GPT - BEG < BEG_UNCHANGED)
- BEG_UNCHANGED = GPT - BEG;
- if (Z - GPT < END_UNCHANGED)
- END_UNCHANGED = Z - GPT;
+ encode_coding (coding);
- if (!encodep && coding->src_multibyte)
- {
- /* Decoding routines expects that the source text is unibyte.
- We must convert 8-bit characters of multibyte form to
- unibyte. */
- int len_byte_orig = len_byte;
- len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
- if (len_byte < len_byte_orig)
- safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
- len_byte);
- coding->src_multibyte = 0;
- }
+ unbind_to (count, Qnil);
+ return coding->result;
+}
- for (;;)
- {
- int result;
- /* The buffer memory is now:
- +--------+converted-text+---------+-------original-text-------+---+
- |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
- |<---------------------- GAP ----------------------->| */
- src = GAP_END_ADDR - len_byte;
- dst = GPT_ADDR + inserted_byte;
+/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
+ SRC_OBJECT into DST_OBJECT by coding context CODING.
- if (encodep)
- result = encode_coding (coding, src, dst, len_byte, 0);
- else
- {
- if (coding->composing != COMPOSITION_DISABLED)
- coding->cmp_data->char_offset = from + inserted;
- result = decode_coding (coding, src, dst, len_byte, 0);
- }
+ SRC_OBJECT is a buffer, a string, or Qnil.
- /* The buffer memory is now:
- +--------+-------converted-text----+--+------original-text----+---+
- |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
- |<---------------------- GAP ----------------------->| */
+ If it is a buffer, the text is at point of the buffer. FROM and TO
+ are positions in the buffer.
- inserted += coding->produced_char;
- inserted_byte += coding->produced;
- len_byte -= coding->consumed;
+ If it is a string, the text is at the beginning of the string.
+ FROM and TO are indices to the string.
- if (result == CODING_FINISH_INSUFFICIENT_CMP)
- {
- coding_allocate_composition_data (coding, from + inserted);
- continue;
- }
+ If it is nil, the text is at coding->source. FROM and TO are
+ indices to coding->source.
- src += coding->consumed;
- dst += coding->produced;
+ DST_OBJECT is a buffer, Qt, or Qnil.
- if (result == CODING_FINISH_NORMAL)
- {
- src += len_byte;
- break;
- }
- if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
- {
- unsigned char *pend = dst, *p = pend - inserted_byte;
- Lisp_Object eol_type;
+ If it is a buffer, the decoded text is inserted at point of the
+ buffer. If the buffer is the same as SRC_OBJECT, the source text
+ is deleted.
- /* Encode LFs back to the original eol format (CR or CRLF). */
- if (coding->eol_type == CODING_EOL_CR)
- {
- while (p < pend) if (*p++ == '\n') p[-1] = '\r';
- }
- else
- {
- int count = 0;
+ If it is Qt, a string is made from the decoded text, and
+ set in CODING->dst_object.
- while (p < pend) if (*p++ == '\n') count++;
- if (src - dst < count)
- {
- /* We don't have sufficient room for encoding LFs
- back to CRLF. We must record converted and
- not-yet-converted text back to the buffer
- content, enlarge the gap, then record them out of
- the buffer contents again. */
- int add = len_byte + inserted_byte;
-
- GAP_SIZE -= add;
- ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
- GPT += inserted_byte; GPT_BYTE += inserted_byte;
- make_gap (count - GAP_SIZE);
- GAP_SIZE += add;
- ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
- GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
- /* Don't forget to update SRC, DST, and PEND. */
- src = GAP_END_ADDR - len_byte;
- dst = GPT_ADDR + inserted_byte;
- pend = dst;
- }
- inserted += count;
- inserted_byte += count;
- coding->produced += count;
- p = dst = pend + count;
- while (count)
- {
- *--p = *--pend;
- if (*p == '\n') count--, *--p = '\r';
- }
- }
+ If it is Qnil, the decoded text is stored at CODING->destination.
+ The caller must allocate CODING->dst_bytes bytes at
+ CODING->destination by xmalloc. If the decoded text is longer than
+ CODING->dst_bytes, CODING->destination is relocated by xrealloc.
+ */
- /* Suppress eol-format conversion in the further conversion. */
- coding->eol_type = CODING_EOL_LF;
+void
+decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
+ dst_object)
+ struct coding_system *coding;
+ Lisp_Object src_object;
+ EMACS_INT from, from_byte, to, to_byte;
+ Lisp_Object dst_object;
+{
+ int count = specpdl_ptr - specpdl;
+ unsigned char *destination;
+ EMACS_INT dst_bytes;
+ EMACS_INT chars = to - from;
+ EMACS_INT bytes = to_byte - from_byte;
+ Lisp_Object attrs;
+ Lisp_Object buffer;
+ int saved_pt = -1, saved_pt_byte;
- /* Set the coding system symbol to that for Unix-like EOL. */
- eol_type = Fget (saved_coding_symbol, Qeol_type);
- if (VECTORP (eol_type)
- && XVECTOR (eol_type)->size == 3
- && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
- coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
- else
- coding->symbol = saved_coding_symbol;
+ buffer = Fcurrent_buffer ();
- continue;
- }
- if (len_byte <= 0)
- {
- if (coding->type != coding_type_ccl
- || coding->mode & CODING_MODE_LAST_BLOCK)
- break;
- coding->mode |= CODING_MODE_LAST_BLOCK;
- continue;
- }
- if (result == CODING_FINISH_INSUFFICIENT_SRC)
- {
- /* The source text ends in invalid codes. Let's just
- make them valid buffer contents, and finish conversion. */
- if (multibyte_p)
- {
- unsigned char *start = dst;
+ if (NILP (dst_object))
+ {
+ destination = coding->destination;
+ dst_bytes = coding->dst_bytes;
+ }
- inserted += len_byte;
- while (len_byte--)
- {
- int c = *src++;
- dst += CHAR_STRING (c, dst);
- }
+ coding->src_object = src_object;
+ coding->src_chars = chars;
+ coding->src_bytes = bytes;
+ coding->src_multibyte = chars < bytes;
- inserted_byte += dst - start;
- }
- else
- {
- inserted += len_byte;
- inserted_byte += len_byte;
- while (len_byte--)
- *dst++ = *src++;
- }
- break;
- }
- if (result == CODING_FINISH_INTERRUPT)
- {
- /* The conversion procedure was interrupted by a user. */
- break;
- }
- /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
- if (coding->consumed < 1)
- {
- /* It's quite strange to require more memory without
- consuming any bytes. Perhaps CCL program bug. */
- break;
- }
- if (first)
+ if (STRINGP (src_object))
+ {
+ coding->src_pos = from;
+ coding->src_pos_byte = from_byte;
+ }
+ else if (BUFFERP (src_object))
+ {
+ set_buffer_internal (XBUFFER (src_object));
+ if (from != GPT)
+ move_gap_both (from, from_byte);
+ if (EQ (src_object, dst_object))
{
- /* We have just done the first batch of conversion which was
- stopped because of insufficient gap. Let's reconsider the
- required gap size (i.e. SRT - DST) now.
-
- We have converted ORIG bytes (== coding->consumed) into
- NEW bytes (coding->produced). To convert the remaining
- LEN bytes, we may need REQUIRE bytes of gap, where:
- REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
- REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
- Here, we are sure that NEW >= ORIG. */
- float ratio;
-
- if (coding->produced <= coding->consumed)
- {
- /* This happens because of CCL-based coding system with
- eol-type CRLF. */
- require = 0;
- }
- else
- {
- ratio = (coding->produced - coding->consumed) / coding->consumed;
- require = len_byte * ratio;
- }
- first = 0;
+ saved_pt = PT, saved_pt_byte = PT_BYTE;
+ TEMP_SET_PT_BOTH (from, from_byte);
+ del_range_both (from, from_byte, to, to_byte, 1);
+ coding->src_pos = -chars;
+ coding->src_pos_byte = -bytes;
}
- if ((src - dst) < (require + 2000))
+ else
{
- /* See the comment above the previous call of make_gap. */
- int add = len_byte + inserted_byte;
-
- GAP_SIZE -= add;
- ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
- GPT += inserted_byte; GPT_BYTE += inserted_byte;
- make_gap (require + 2000);
- GAP_SIZE += add;
- ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
- GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
+ coding->src_pos = from;
+ coding->src_pos_byte = from_byte;
}
}
- if (src - dst > 0) *dst = 0; /* Put an anchor. */
- if (encodep && coding->dst_multibyte)
+ if (CODING_REQUIRE_DETECTION (coding))
+ detect_coding (coding);
+ attrs = CODING_ID_ATTRS (coding->id);
+
+ if (EQ (dst_object, Qt)
+ || (! NILP (CODING_ATTR_POST_READ (attrs))
+ && NILP (dst_object)))
{
- /* The output is unibyte. We must convert 8-bit characters to
- multibyte form. */
- if (inserted_byte * 2 > GAP_SIZE)
- {
- GAP_SIZE -= inserted_byte;
- ZV += inserted_byte; Z += inserted_byte;
- ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
- GPT += inserted_byte; GPT_BYTE += inserted_byte;
- make_gap (inserted_byte - GAP_SIZE);
- GAP_SIZE += inserted_byte;
- ZV -= inserted_byte; Z -= inserted_byte;
- ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
- GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
- }
- inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
+ coding->dst_object = code_conversion_save (buffer, 1, 1);
+ coding->dst_pos = BEG;
+ coding->dst_pos_byte = BEG_BYTE;
+ coding->dst_multibyte = 1;
}
-
- /* If we shrank the conversion area, adjust it now. */
- if (total_skip > 0)
+ else if (BUFFERP (dst_object))
{
- if (tail_skip > 0)
- safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
- inserted += total_skip; inserted_byte += total_skip;
- GAP_SIZE += total_skip;
- GPT -= head_skip; GPT_BYTE -= head_skip;
- ZV -= total_skip; ZV_BYTE -= total_skip;
- Z -= total_skip; Z_BYTE -= total_skip;
- from -= head_skip; from_byte -= head_skip;
- to += tail_skip; to_byte += tail_skip;
+ code_conversion_save (buffer, 0, 0);
+ coding->dst_object = dst_object;
+ coding->dst_pos = BUF_PT (XBUFFER (dst_object));
+ coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
+ coding->dst_multibyte
+ = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
}
-
- prev_Z = Z;
- if (! EQ (current_buffer->undo_list, Qt))
- adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
else
- adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
- inserted, inserted_byte);
- inserted = Z - prev_Z;
+ {
+ code_conversion_save (buffer, 0, 0);
+ coding->dst_object = Qnil;
+ coding->dst_multibyte = 1;
+ }
- if (!encodep && coding->cmp_data && coding->cmp_data->used)
- coding_restore_composition (coding, Fcurrent_buffer ());
- coding_free_composition_data (coding);
+ decode_coding (coding);
- if (! inhibit_pre_post_conversion
- && ! encodep && ! NILP (coding->post_read_conversion))
+ if (BUFFERP (coding->dst_object))
+ set_buffer_internal (XBUFFER (coding->dst_object));
+
+ if (! NILP (CODING_ATTR_POST_READ (attrs)))
{
+ struct gcpro gcpro1, gcpro2;
+ EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
Lisp_Object val;
- Lisp_Object saved_coding_system;
- if (from != PT)
- TEMP_SET_PT_BOTH (from, from_byte);
- prev_Z = Z;
- record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
- saved_coding_system = Vlast_coding_system_used;
- Vlast_coding_system_used = coding->symbol;
- /* We should not call any more pre-write/post-read-conversion
- functions while this post-read-conversion is running. */
- inhibit_pre_post_conversion = 1;
- val = call1 (coding->post_read_conversion, make_number (inserted));
- inhibit_pre_post_conversion = 0;
- coding->symbol = Vlast_coding_system_used;
- Vlast_coding_system_used = saved_coding_system;
- /* Discard the unwind protect. */
- specpdl_ptr--;
- CHECK_NUMBER (val);
- inserted += Z - prev_Z;
- }
-
- if (orig_point >= from)
- {
- if (orig_point >= from + orig_len)
- orig_point += inserted - orig_len;
- else
- orig_point = from;
- TEMP_SET_PT (orig_point);
+ TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
+ GCPRO2 (coding->src_object, coding->dst_object);
+ val = call1 (CODING_ATTR_POST_READ (attrs),
+ make_number (coding->produced_char));
+ UNGCPRO;
+ CHECK_NATNUM (val);
+ coding->produced_char += Z - prev_Z;
+ coding->produced += Z_BYTE - prev_Z_BYTE;
}
- if (replace)
+ if (EQ (dst_object, Qt))
{
- signal_after_change (from, to - from, inserted);
- update_compositions (from, from + inserted, CHECK_BORDER);
+ coding->dst_object = Fbuffer_string ();
+ }
+ else if (NILP (dst_object) && BUFFERP (coding->dst_object))
+ {
+ set_buffer_internal (XBUFFER (coding->dst_object));
+ if (dst_bytes < coding->produced)
+ {
+ destination
+ = (unsigned char *) xrealloc (destination, coding->produced);
+ if (! destination)
+ {
+ coding->result = CODING_RESULT_INSUFFICIENT_DST;
+ unbind_to (count, Qnil);
+ return;
+ }
+ if (BEGV < GPT && GPT < BEGV + coding->produced_char)
+ move_gap_both (BEGV, BEGV_BYTE);
+ bcopy (BEGV_ADDR, destination, coding->produced);
+ coding->destination = destination;
+ }
}
- {
- coding->consumed = to_byte - from_byte;
- coding->consumed_char = to - from;
- coding->produced = inserted_byte;
- coding->produced_char = inserted;
- }
+ if (saved_pt >= 0)
+ {
+ /* This is the case of:
+ (BUFFERP (src_object) && EQ (src_object, dst_object))
+ As we have moved PT while replacing the original buffer
+ contents, we must recover it now. */
+ set_buffer_internal (XBUFFER (src_object));
+ if (saved_pt < from)
+ TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
+ else if (saved_pt < from + chars)
+ TEMP_SET_PT_BOTH (from, from_byte);
+ else if (! NILP (current_buffer->enable_multibyte_characters))
+ TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
+ saved_pt_byte + (coding->produced - bytes));
+ else
+ TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
+ saved_pt_byte + (coding->produced - bytes));
+ }
- return 0;
+ unbind_to (count, Qnil);
}
-Lisp_Object
-run_pre_post_conversion_on_str (str, coding, encodep)
- Lisp_Object str;
+
+void
+encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
+ dst_object)
struct coding_system *coding;
- int encodep;
+ Lisp_Object src_object;
+ EMACS_INT from, from_byte, to, to_byte;
+ Lisp_Object dst_object;
{
- int count = SPECPDL_INDEX ();
- struct gcpro gcpro1, gcpro2;
- int multibyte = STRING_MULTIBYTE (str);
+ int count = specpdl_ptr - specpdl;
+ EMACS_INT chars = to - from;
+ EMACS_INT bytes = to_byte - from_byte;
+ Lisp_Object attrs;
Lisp_Object buffer;
- struct buffer *buf;
- Lisp_Object old_deactivate_mark;
-
- record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
- record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
- /* It is not crucial to specbind this. */
- old_deactivate_mark = Vdeactivate_mark;
- GCPRO2 (str, old_deactivate_mark);
-
- buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
- buf = XBUFFER (buffer);
-
- delete_all_overlays (buf);
- buf->directory = current_buffer->directory;
- buf->read_only = Qnil;
- buf->filename = Qnil;
- buf->undo_list = Qt;
- eassert (buf->overlays_before == NULL);
- eassert (buf->overlays_after == NULL);
-
- set_buffer_internal (buf);
- /* We must insert the contents of STR as is without
- unibyte<->multibyte conversion. For that, we adjust the
- multibyteness of the working buffer to that of STR. */
- Ferase_buffer ();
- buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
+ int saved_pt = -1, saved_pt_byte;
- insert_from_string (str, 0, 0,
- SCHARS (str), SBYTES (str), 0);
- UNGCPRO;
- inhibit_pre_post_conversion = 1;
- if (encodep)
- call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
- else
- {
- Vlast_coding_system_used = coding->symbol;
- TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
- call1 (coding->post_read_conversion, make_number (Z - BEG));
- coding->symbol = Vlast_coding_system_used;
- }
- inhibit_pre_post_conversion = 0;
- Vdeactivate_mark = old_deactivate_mark;
- str = make_buffer_string (BEG, Z, 1);
- return unbind_to (count, str);
-}
+ buffer = Fcurrent_buffer ();
-Lisp_Object
-decode_coding_string (str, coding, nocopy)
- Lisp_Object str;
- struct coding_system *coding;
- int nocopy;
-{
- int len;
- struct conversion_buffer buf;
- int from, to_byte;
- Lisp_Object saved_coding_symbol;
- int result;
- int require_decoding;
- int shrinked_bytes = 0;
- Lisp_Object newstr;
- int consumed, consumed_char, produced, produced_char;
-
- from = 0;
- to_byte = SBYTES (str);
-
- saved_coding_symbol = coding->symbol;
- coding->src_multibyte = STRING_MULTIBYTE (str);
- coding->dst_multibyte = 1;
- if (CODING_REQUIRE_DETECTION (coding))
+ coding->src_object = src_object;
+ coding->src_chars = chars;
+ coding->src_bytes = bytes;
+ coding->src_multibyte = chars < bytes;
+
+ attrs = CODING_ID_ATTRS (coding->id);
+
+ if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
{
- /* See the comments in code_convert_region. */
- if (coding->type == coding_type_undecided)
- {
- detect_coding (coding, SDATA (str), to_byte);
- if (coding->type == coding_type_undecided)
- {
- coding->type = coding_type_emacs_mule;
- coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
- /* As emacs-mule decoder will handle composition, we
- need this setting to allocate coding->cmp_data
- later. */
- coding->composing = COMPOSITION_NO;
- }
- }
- if (coding->eol_type == CODING_EOL_UNDECIDED
- && coding->type != coding_type_ccl)
+ coding->src_object = code_conversion_save (buffer, 1,
+ coding->src_multibyte);
+ set_buffer_internal (XBUFFER (coding->src_object));
+ if (STRINGP (src_object))
+ insert_from_string (src_object, from, from_byte, chars, bytes, 0);
+ else if (BUFFERP (src_object))
+ insert_from_buffer (XBUFFER (src_object), from, chars, 0);
+ else
+ insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
+
+ if (EQ (src_object, dst_object))
{
- saved_coding_symbol = coding->symbol;
- detect_eol (coding, SDATA (str), to_byte);
- if (coding->eol_type == CODING_EOL_UNDECIDED)
- coding->eol_type = CODING_EOL_LF;
- /* We had better recover the original eol format if we
- encounter an inconsistent eol format while decoding. */
- coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
+ set_buffer_internal (XBUFFER (src_object));
+ saved_pt = PT, saved_pt_byte = PT_BYTE;
+ del_range_both (from, from_byte, to, to_byte, 1);
+ set_buffer_internal (XBUFFER (coding->src_object));
}
- }
-
- if (coding->type == coding_type_no_conversion
- || coding->type == coding_type_raw_text)
- coding->dst_multibyte = 0;
-
- require_decoding = CODING_REQUIRE_DECODING (coding);
- if (STRING_MULTIBYTE (str))
- {
- /* Decoding routines expect the source text to be unibyte. */
- str = Fstring_as_unibyte (str);
- to_byte = SBYTES (str);
- nocopy = 1;
- coding->src_multibyte = 0;
+ call2 (CODING_ATTR_PRE_WRITE (attrs),
+ make_number (BEG), make_number (Z));
+ coding->src_object = Fcurrent_buffer ();
+ if (BEG != GPT)
+ move_gap_both (BEG, BEG_BYTE);
+ coding->src_chars = Z - BEG;
+ coding->src_bytes = Z_BYTE - BEG_BYTE;
+ coding->src_pos = BEG;
+ coding->src_pos_byte = BEG_BYTE;
+ coding->src_multibyte = Z < Z_BYTE;
}
-
- /* Try to skip the heading and tailing ASCIIs. */
- if (require_decoding && coding->type != coding_type_ccl)
+ else if (STRINGP (src_object))
{
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
- 0);
- if (from == to_byte)
- require_decoding = 0;
- shrinked_bytes = from + (SBYTES (str) - to_byte);
+ code_conversion_save (buffer, 0, 0);
+ coding->src_pos = from;
+ coding->src_pos_byte = from_byte;
}
-
- if (!require_decoding
- && !(SYMBOLP (coding->post_read_conversion)
- && !NILP (Ffboundp (coding->post_read_conversion))))
+ else if (BUFFERP (src_object))
{
- coding->consumed = SBYTES (str);
- coding->consumed_char = SCHARS (str);
- if (coding->dst_multibyte)
+ code_conversion_save (buffer, 0, 0);
+ set_buffer_internal (XBUFFER (src_object));
+ if (EQ (src_object, dst_object))
{
- str = Fstring_as_multibyte (str);
- nocopy = 1;
+ saved_pt = PT, saved_pt_byte = PT_BYTE;
+ coding->src_object = del_range_1 (from, to, 1, 1);
+ coding->src_pos = 0;
+ coding->src_pos_byte = 0;
}
- coding->produced = SBYTES (str);
- coding->produced_char = SCHARS (str);
- return (nocopy ? str : Fcopy_sequence (str));
- }
-
- if (coding->composing != COMPOSITION_DISABLED)
- coding_allocate_composition_data (coding, from);
- len = decoding_buffer_size (coding, to_byte - from);
- allocate_conversion_buffer (buf, len);
-
- consumed = consumed_char = produced = produced_char = 0;
- while (1)
- {
- result = decode_coding (coding, SDATA (str) + from + consumed,
- buf.data + produced, to_byte - from - consumed,
- buf.size - produced);
- consumed += coding->consumed;
- consumed_char += coding->consumed_char;
- produced += coding->produced;
- produced_char += coding->produced_char;
- if (result == CODING_FINISH_NORMAL
- || (result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->consumed == 0))
- break;
- if (result == CODING_FINISH_INSUFFICIENT_CMP)
- coding_allocate_composition_data (coding, from + produced_char);
- else if (result == CODING_FINISH_INSUFFICIENT_DST)
- extend_conversion_buffer (&buf);
- else if (result == CODING_FINISH_INCONSISTENT_EOL)
+ else
{
- Lisp_Object eol_type;
-
- /* Recover the original EOL format. */
- if (coding->eol_type == CODING_EOL_CR)
- {
- unsigned char *p;
- for (p = buf.data; p < buf.data + produced; p++)
- if (*p == '\n') *p = '\r';
- }
- else if (coding->eol_type == CODING_EOL_CRLF)
- {
- int num_eol = 0;
- unsigned char *p0, *p1;
- for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
- if (*p0 == '\n') num_eol++;
- if (produced + num_eol >= buf.size)
- extend_conversion_buffer (&buf);
- for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
- {
- *--p1 = *--p0;
- if (*p0 == '\n') *--p1 = '\r';
- }
- produced += num_eol;
- produced_char += num_eol;
- }
- /* Suppress eol-format conversion in the further conversion. */
- coding->eol_type = CODING_EOL_LF;
-
- /* Set the coding system symbol to that for Unix-like EOL. */
- eol_type = Fget (saved_coding_symbol, Qeol_type);
- if (VECTORP (eol_type)
- && XVECTOR (eol_type)->size == 3
- && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
- coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
- else
- coding->symbol = saved_coding_symbol;
-
-
+ if (from < GPT && to >= GPT)
+ move_gap_both (from, from_byte);
+ coding->src_pos = from;
+ coding->src_pos_byte = from_byte;
}
}
-
- coding->consumed = consumed;
- coding->consumed_char = consumed_char;
- coding->produced = produced;
- coding->produced_char = produced_char;
-
- if (coding->dst_multibyte)
- newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
- produced + shrinked_bytes);
else
- newstr = make_uninit_string (produced + shrinked_bytes);
- if (from > 0)
- STRING_COPYIN (newstr, 0, SDATA (str), from);
- STRING_COPYIN (newstr, from, buf.data, produced);
- if (shrinked_bytes > from)
- STRING_COPYIN (newstr, from + produced,
- SDATA (str) + to_byte,
- shrinked_bytes - from);
- free_conversion_buffer (&buf);
-
- if (coding->cmp_data && coding->cmp_data->used)
- coding_restore_composition (coding, newstr);
- coding_free_composition_data (coding);
-
- if (SYMBOLP (coding->post_read_conversion)
- && !NILP (Ffboundp (coding->post_read_conversion)))
- newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
-
- return newstr;
-}
+ code_conversion_save (buffer, 0, 0);
-Lisp_Object
-encode_coding_string (str, coding, nocopy)
- Lisp_Object str;
- struct coding_system *coding;
- int nocopy;
-{
- int len;
- struct conversion_buffer buf;
- int from, to, to_byte;
- int result;
- int shrinked_bytes = 0;
- Lisp_Object newstr;
- int consumed, consumed_char, produced, produced_char;
-
- if (SYMBOLP (coding->pre_write_conversion)
- && !NILP (Ffboundp (coding->pre_write_conversion)))
- str = run_pre_post_conversion_on_str (str, coding, 1);
-
- from = 0;
- to = SCHARS (str);
- to_byte = SBYTES (str);
-
- /* Encoding routines determine the multibyteness of the source text
- by coding->src_multibyte. */
- coding->src_multibyte = STRING_MULTIBYTE (str);
- coding->dst_multibyte = 0;
- if (! CODING_REQUIRE_ENCODING (coding))
- {
- coding->consumed = SBYTES (str);
- coding->consumed_char = SCHARS (str);
- if (STRING_MULTIBYTE (str))
+ if (BUFFERP (dst_object))
+ {
+ coding->dst_object = dst_object;
+ if (EQ (src_object, dst_object))
+ {
+ coding->dst_pos = from;
+ coding->dst_pos_byte = from_byte;
+ }
+ else
{
- str = Fstring_as_unibyte (str);
- nocopy = 1;
+ coding->dst_pos = BUF_PT (XBUFFER (dst_object));
+ coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
}
- coding->produced = SBYTES (str);
- coding->produced_char = SCHARS (str);
- return (nocopy ? str : Fcopy_sequence (str));
+ coding->dst_multibyte
+ = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
+ }
+ else if (EQ (dst_object, Qt))
+ {
+ coding->dst_object = Qnil;
+ coding->dst_bytes = coding->src_chars;
+ if (coding->dst_bytes == 0)
+ coding->dst_bytes = 1;
+ coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
+ coding->dst_multibyte = 0;
+ }
+ else
+ {
+ coding->dst_object = Qnil;
+ coding->dst_multibyte = 0;
}
- if (coding->composing != COMPOSITION_DISABLED)
- coding_save_composition (coding, from, to, str);
+ encode_coding (coding);
- /* Try to skip the heading and tailing ASCIIs. */
- if (coding->type != coding_type_ccl)
+ if (EQ (dst_object, Qt))
{
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
- 1);
- if (from == to_byte)
- return (nocopy ? str : Fcopy_sequence (str));
- shrinked_bytes = from + (SBYTES (str) - to_byte);
+ if (BUFFERP (coding->dst_object))
+ coding->dst_object = Fbuffer_string ();
+ else
+ {
+ coding->dst_object
+ = make_unibyte_string ((char *) coding->destination,
+ coding->produced);
+ xfree (coding->destination);
+ }
}
- len = encoding_buffer_size (coding, to_byte - from);
- allocate_conversion_buffer (buf, len);
-
- consumed = consumed_char = produced = produced_char = 0;
- while (1)
+ if (saved_pt >= 0)
{
- result = encode_coding (coding, SDATA (str) + from + consumed,
- buf.data + produced, to_byte - from - consumed,
- buf.size - produced);
- consumed += coding->consumed;
- consumed_char += coding->consumed_char;
- produced += coding->produced;
- produced_char += coding->produced_char;
- if (result == CODING_FINISH_NORMAL
- || (result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->consumed == 0))
- break;
- /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */
- extend_conversion_buffer (&buf);
+ /* This is the case of:
+ (BUFFERP (src_object) && EQ (src_object, dst_object))
+ As we have moved PT while replacing the original buffer
+ contents, we must recover it now. */
+ set_buffer_internal (XBUFFER (src_object));
+ if (saved_pt < from)
+ TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
+ else if (saved_pt < from + chars)
+ TEMP_SET_PT_BOTH (from, from_byte);
+ else if (! NILP (current_buffer->enable_multibyte_characters))
+ TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
+ saved_pt_byte + (coding->produced - bytes));
+ else
+ TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
+ saved_pt_byte + (coding->produced - bytes));
}
- coding->consumed = consumed;
- coding->consumed_char = consumed_char;
- coding->produced = produced;
- coding->produced_char = produced_char;
+ unbind_to (count, Qnil);
+}
- newstr = make_uninit_string (produced + shrinked_bytes);
- if (from > 0)
- STRING_COPYIN (newstr, 0, SDATA (str), from);
- STRING_COPYIN (newstr, from, buf.data, produced);
- if (shrinked_bytes > from)
- STRING_COPYIN (newstr, from + produced,
- SDATA (str) + to_byte,
- shrinked_bytes - from);
- free_conversion_buffer (&buf);
- coding_free_composition_data (coding);
+Lisp_Object
+preferred_coding_system ()
+{
+ int id = coding_categories[coding_priorities[0]].id;
- return newstr;
+ return CODING_ID_NAME (id);
}
@@ -6320,19 +6660,12 @@ encode_coding_string (str, coding, nocopy)
DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
doc: /* Return t if OBJECT is nil or a coding-system.
-See the documentation of `make-coding-system' for information
+See the documentation of `define-coding-system' for information
about coding-system objects. */)
(obj)
Lisp_Object obj;
{
- if (NILP (obj))
- return Qt;
- if (!SYMBOLP (obj))
- return Qnil;
- /* Get coding-spec vector for OBJ. */
- obj = Fget (obj, Qcoding_system);
- return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
- ? Qt : Qnil);
+ return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
}
DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
@@ -6359,7 +6692,7 @@ If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */
{
Lisp_Object val;
if (SYMBOLP (default_coding_system))
- default_coding_system = SYMBOL_NAME (default_coding_system);
+ XSETSTRING (default_coding_system, SYMBOL_NAME (default_coding_system));
val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
Qt, Qnil, Qcoding_system_history,
default_coding_system, Qnil);
@@ -6369,10 +6702,8 @@ If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */
DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
1, 1, 0,
doc: /* Check validity of CODING-SYSTEM.
-If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
-It is valid if it is a symbol with a non-nil `coding-system' property.
-The value of property should be a vector of length 5. */)
- (coding_system)
+If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
+ (coding_system)
Lisp_Object coding_system;
{
CHECK_SYMBOL (coding_system);
@@ -6381,77 +6712,234 @@ The value of property should be a vector of length 5. */)
while (1)
Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
}
+
+/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
+ HIGHEST is nonzero, return the coding system of the highest
+ priority among the detected coding systems. Otherwize return a
+ list of detected coding systems sorted by their priorities. If
+ MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
+ multibyte form but contains only ASCII and eight-bit chars.
+ Otherwise, the bytes are raw bytes.
+
+ CODING-SYSTEM controls the detection as below:
+
+ If it is nil, detect both text-format and eol-format. If the
+ text-format part of CODING-SYSTEM is already specified
+ (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
+ part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
+ detect only text-format. */
+
Lisp_Object
-detect_coding_system (src, src_bytes, highest, multibytep)
+detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
const unsigned char *src;
int src_bytes, highest;
int multibytep;
+ Lisp_Object coding_system;
{
- int coding_mask, eol_type;
- Lisp_Object val, tmp;
- int dummy;
+ const unsigned char *src_end = src + src_bytes;
+ Lisp_Object attrs, eol_type;
+ Lisp_Object val;
+ struct coding_system coding;
+ int id;
+ struct coding_detection_info detect_info;
- coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
- eol_type = detect_eol_type (src, src_bytes, &dummy);
- if (eol_type == CODING_EOL_INCONSISTENT)
- eol_type = CODING_EOL_UNDECIDED;
+ if (NILP (coding_system))
+ coding_system = Qundecided;
+ setup_coding_system (coding_system, &coding);
+ attrs = CODING_ID_ATTRS (coding.id);
+ eol_type = CODING_ID_EOL_TYPE (coding.id);
+ coding_system = CODING_ATTR_BASE_NAME (attrs);
+
+ coding.source = src;
+ coding.src_bytes = src_bytes;
+ coding.src_multibyte = multibytep;
+ coding.consumed = 0;
+ coding.mode |= CODING_MODE_LAST_BLOCK;
- if (!coding_mask)
- {
- val = Qundecided;
- if (eol_type != CODING_EOL_UNDECIDED)
- {
- Lisp_Object val2;
- val2 = Fget (Qundecided, Qeol_type);
- if (VECTORP (val2))
- val = XVECTOR (val2)->contents[eol_type];
- }
- return (highest ? val : Fcons (val, Qnil));
- }
+ detect_info.checked = detect_info.found = detect_info.rejected = 0;
- /* At first, gather possible coding systems in VAL. */
- val = Qnil;
- for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
+ /* At first, detect text-format if necessary. */
+ if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
{
- Lisp_Object category_val, category_index;
+ enum coding_category category;
+ struct coding_system *this;
+ int c, i;
- category_index = Fget (XCAR (tmp), Qcoding_category_index);
- category_val = Fsymbol_value (XCAR (tmp));
- if (!NILP (category_val)
- && NATNUMP (category_index)
- && (coding_mask & (1 << XFASTINT (category_index))))
+ for (; src < src_end; src++)
{
- val = Fcons (category_val, val);
- if (highest)
+ c = *src;
+ if (c & 0x80
+ || (c < 0x20 && (c == ISO_CODE_ESC
+ || c == ISO_CODE_SI
+ || c == ISO_CODE_SO)))
break;
}
- }
- if (!highest)
- val = Fnreverse (val);
+ coding.head_ascii = src - coding.source;
- /* Then, replace the elements with subsidiary coding systems. */
- for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
- {
- if (eol_type != CODING_EOL_UNDECIDED
- && eol_type != CODING_EOL_INCONSISTENT)
+ if (src < src_end)
+ for (i = 0; i < coding_category_raw_text; i++)
+ {
+ category = coding_priorities[i];
+ this = coding_categories + category;
+
+ if (this->id < 0)
+ {
+ /* No coding system of this category is defined. */
+ detect_info.rejected |= (1 << category);
+ }
+ else if (category >= coding_category_raw_text)
+ continue;
+ else if (detect_info.checked & (1 << category))
+ {
+ if (highest
+ && (detect_info.found & (1 << category)))
+ break;
+ }
+ else
+ {
+ if ((*(this->detector)) (&coding, &detect_info)
+ && highest
+ && (detect_info.found & (1 << category)))
+ break;
+ }
+ }
+
+
+ if (detect_info.rejected == CATEGORY_MASK_ANY)
+ {
+ detect_info.found = CATEGORY_MASK_RAW_TEXT;
+ id = coding_categories[coding_category_raw_text].id;
+ val = Fcons (make_number (id), Qnil);
+ }
+ else if (! detect_info.rejected && ! detect_info.found)
{
- Lisp_Object eol;
- eol = Fget (XCAR (tmp), Qeol_type);
- if (VECTORP (eol))
- XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
+ detect_info.found = CATEGORY_MASK_ANY;
+ id = coding_categories[coding_category_undecided].id;
+ val = Fcons (make_number (id), Qnil);
}
+ else if (highest)
+ {
+ if (detect_info.found)
+ {
+ detect_info.found = 1 << category;
+ val = Fcons (make_number (this->id), Qnil);
+ }
+ else
+ for (i = 0; i < coding_category_raw_text; i++)
+ if (! (detect_info.rejected & (1 << coding_priorities[i])))
+ {
+ detect_info.found = 1 << coding_priorities[i];
+ id = coding_categories[coding_priorities[i]].id;
+ val = Fcons (make_number (id), Qnil);
+ break;
+ }
+ }
+ else
+ {
+ int mask = detect_info.rejected | detect_info.found;
+ int found = 0;
+ val = Qnil;
+
+ for (i = coding_category_raw_text - 1; i >= 0; i--)
+ {
+ category = coding_priorities[i];
+ if (! (mask & (1 << category)))
+ {
+ found |= 1 << category;
+ id = coding_categories[category].id;
+ val = Fcons (make_number (id), val);
+ }
+ }
+ for (i = coding_category_raw_text - 1; i >= 0; i--)
+ {
+ category = coding_priorities[i];
+ if (detect_info.found & (1 << category))
+ {
+ id = coding_categories[category].id;
+ val = Fcons (make_number (id), val);
+ }
+ }
+ detect_info.found |= found;
+ }
+ }
+ else
+ {
+ detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
+ val = Fcons (make_number (coding.id), Qnil);
}
+
+ /* Then, detect eol-format if necessary. */
+ {
+ int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
+ Lisp_Object tail;
+
+ if (VECTORP (eol_type))
+ {
+ if (detect_info.found & ~CATEGORY_MASK_UTF_16)
+ normal_eol = detect_eol (coding.source, src_bytes,
+ coding_category_raw_text);
+ if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
+ | CATEGORY_MASK_UTF_16_BE_NOSIG))
+ utf_16_be_eol = detect_eol (coding.source, src_bytes,
+ coding_category_utf_16_be);
+ if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
+ | CATEGORY_MASK_UTF_16_LE_NOSIG))
+ utf_16_le_eol = detect_eol (coding.source, src_bytes,
+ coding_category_utf_16_le);
+ }
+ else
+ {
+ if (EQ (eol_type, Qunix))
+ normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
+ else if (EQ (eol_type, Qdos))
+ normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
+ else
+ normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
+ }
+
+ for (tail = val; CONSP (tail); tail = XCDR (tail))
+ {
+ enum coding_category category;
+ int this_eol;
+
+ id = XINT (XCAR (tail));
+ attrs = CODING_ID_ATTRS (id);
+ category = XINT (CODING_ATTR_CATEGORY (attrs));
+ eol_type = CODING_ID_EOL_TYPE (id);
+ if (VECTORP (eol_type))
+ {
+ if (category == coding_category_utf_16_be
+ || category == coding_category_utf_16_be_nosig)
+ this_eol = utf_16_be_eol;
+ else if (category == coding_category_utf_16_le
+ || category == coding_category_utf_16_le_nosig)
+ this_eol = utf_16_le_eol;
+ else
+ this_eol = normal_eol;
+
+ if (this_eol == EOL_SEEN_LF)
+ XSETCAR (tail, AREF (eol_type, 0));
+ else if (this_eol == EOL_SEEN_CRLF)
+ XSETCAR (tail, AREF (eol_type, 1));
+ else if (this_eol == EOL_SEEN_CR)
+ XSETCAR (tail, AREF (eol_type, 2));
+ else
+ XSETCAR (tail, CODING_ID_NAME (id));
+ }
+ else
+ XSETCAR (tail, CODING_ID_NAME (id));
+ }
+ }
+
return (highest ? XCAR (val) : val);
}
+
DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2, 3, 0,
- doc: /* Detect how the byte sequence in the region is encoded.
-Return a list of possible coding systems used on decoding a byte
-sequence containing the bytes in the region between START and END when
-the coding system `undecided' is specified. The list is ordered by
-priority decided in the current language environment.
+ doc: /* Detect coding system of the text in the region between START and END.
+Return a list of possible coding systems ordered by priority.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
@@ -6464,7 +6952,6 @@ highest priority. */)
{
int from, to;
int from_byte, to_byte;
- int include_anchor_byte = 0;
CHECK_NUMBER_COERCE_MARKER (start);
CHECK_NUMBER_COERCE_MARKER (end);
@@ -6476,29 +6963,19 @@ highest priority. */)
if (from < GPT && to >= GPT)
move_gap_both (to, to_byte);
- /* If we an anchor byte `\0' follows the region, we include it in
- the detecting source. Then code detectors can handle the tailing
- byte sequence more accurately.
- Fix me: This is not a perfect solution. It is better that we
- add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
- */
- if (to == Z || (to == GPT && GAP_SIZE > 0))
- include_anchor_byte = 1;
return detect_coding_system (BYTE_POS_ADDR (from_byte),
- to_byte - from_byte + include_anchor_byte,
+ to_byte - from_byte,
!NILP (highest),
!NILP (current_buffer
- ->enable_multibyte_characters));
+ ->enable_multibyte_characters),
+ Qnil);
}
DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
1, 2, 0,
- doc: /* Detect how the byte sequence in STRING is encoded.
-Return a list of possible coding systems used on decoding a byte
-sequence containing the bytes in STRING when the coding system
-`undecided' is specified. The list is ordered by priority decided in
-the current language environment.
+ doc: /* Detect coding system of the text in STRING.
+Return a list of possible coding systems ordered by priority.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
@@ -6511,271 +6988,149 @@ highest priority. */)
{
CHECK_STRING (string);
- return detect_coding_system (SDATA (string),
- /* "+ 1" is to include the anchor byte
- `\0'. With this, code detectors can
- handle the tailing bytes more
- accurately. */
- SBYTES (string) + 1,
- !NILP (highest),
- STRING_MULTIBYTE (string));
+ return detect_coding_system (SDATA (string), SBYTES (string),
+ !NILP (highest), STRING_MULTIBYTE (string),
+ Qnil);
}
-/* Subroutine for Fsafe_coding_systems_region_internal.
-
- Return a list of coding systems that safely encode the multibyte
- text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of
- possible coding systems. If it is nil, it means that we have not
- yet found any coding systems.
-
- WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An
- element of WORK_TABLE is set to t once the element is looked up.
- If a non-ASCII single byte char is found, set
- *single_byte_char_found to 1. */
-
-static Lisp_Object
-find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
- unsigned char *p, *pend;
- Lisp_Object safe_codings, work_table;
- int *single_byte_char_found;
+static INLINE int
+char_encodable_p (c, attrs)
+ int c;
+ Lisp_Object attrs;
{
- int c, len;
- Lisp_Object val, ch;
- Lisp_Object prev, tail;
+ Lisp_Object tail;
+ struct charset *charset;
- while (p < pend)
+ for (tail = CODING_ATTR_CHARSET_LIST (attrs);
+ CONSP (tail); tail = XCDR (tail))
{
- c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
- p += len;
- if (ASCII_BYTE_P (c))
- /* We can ignore ASCII characters here. */
- continue;
- if (SINGLE_BYTE_CHAR_P (c))
- *single_byte_char_found = 1;
- if (NILP (safe_codings))
- /* Already all coding systems are excluded. But, we can't
- terminate the loop here because non-ASCII single-byte char
- must be found. */
- continue;
- /* Check the safe coding systems for C. */
- ch = make_number (c);
- val = Faref (work_table, ch);
- if (EQ (val, Qt))
- /* This element was already checked. Ignore it. */
- continue;
- /* Remember that we checked this element. */
- Faset (work_table, ch, Qt);
-
- for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
- {
- Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
- int encodable;
-
- elt = XCAR (tail);
- if (CONSP (XCDR (elt)))
- {
- /* This entry has this format now:
- ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
- ACCEPT-LATIN-EXTRA ) */
- val = XCDR (elt);
- encodable = ! NILP (Faref (XCAR (val), ch));
- if (! encodable)
- {
- val = XCDR (val);
- translation_table = XCAR (val);
- hash_table = XCAR (XCDR (val));
- accept_latin_extra = XCAR (XCDR (XCDR (val)));
- }
- }
- else
- {
- /* This entry has this format now: ( CODING . SAFE-CHARS) */
- encodable = ! NILP (Faref (XCDR (elt), ch));
- if (! encodable)
- {
- /* Transform the format to:
- ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
- ACCEPT-LATIN-EXTRA ) */
- val = Fget (XCAR (elt), Qcoding_system);
- translation_table
- = Fplist_get (AREF (val, 3),
- Qtranslation_table_for_encode);
- if (SYMBOLP (translation_table))
- translation_table = Fget (translation_table,
- Qtranslation_table);
- hash_table
- = (CHAR_TABLE_P (translation_table)
- ? XCHAR_TABLE (translation_table)->extras[1]
- : Qnil);
- accept_latin_extra
- = ((EQ (AREF (val, 0), make_number (2))
- && VECTORP (AREF (val, 4)))
- ? AREF (AREF (val, 4), 16)
- : Qnil);
- XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
- translation_table, hash_table,
- accept_latin_extra));
- }
- }
-
- if (! encodable
- && ((CHAR_TABLE_P (translation_table)
- && ! NILP (Faref (translation_table, ch)))
- || (HASH_TABLE_P (hash_table)
- && ! NILP (Fgethash (ch, hash_table, Qnil)))
- || (SINGLE_BYTE_CHAR_P (c)
- && ! NILP (accept_latin_extra)
- && VECTORP (Vlatin_extra_code_table)
- && ! NILP (AREF (Vlatin_extra_code_table, c)))))
- encodable = 1;
- if (encodable)
- prev = tail;
- else
- {
- /* Exclude this coding system from SAFE_CODINGS. */
- if (EQ (tail, safe_codings))
- safe_codings = XCDR (safe_codings);
- else
- XSETCDR (prev, XCDR (tail));
- }
- }
+ charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
+ if (CHAR_CHARSET_P (c, charset))
+ break;
}
- return safe_codings;
+ return (! NILP (tail));
}
+
+/* Return a list of coding systems that safely encode the text between
+ START and END. If EXCLUDE is non-nil, it is a list of coding
+ systems not to check. The returned list doesn't contain any such
+ coding systems. In any case, if the text contains only ASCII or is
+ unibyte, return t. */
+
DEFUN ("find-coding-systems-region-internal",
Ffind_coding_systems_region_internal,
- Sfind_coding_systems_region_internal, 2, 2, 0,
+ Sfind_coding_systems_region_internal, 2, 3, 0,
doc: /* Internal use only. */)
- (start, end)
- Lisp_Object start, end;
+ (start, end, exclude)
+ Lisp_Object start, end, exclude;
{
- Lisp_Object work_table, safe_codings;
- int non_ascii_p = 0;
- int single_byte_char_found = 0;
- const unsigned char *p1, *p1end, *p2, *p2end, *p;
+ Lisp_Object coding_attrs_list, safe_codings;
+ EMACS_INT start_byte, end_byte;
+ const unsigned char *p, *pbeg, *pend;
+ int c;
+ Lisp_Object tail, elt;
if (STRINGP (start))
{
- if (!STRING_MULTIBYTE (start))
+ if (!STRING_MULTIBYTE (start)
+ || SCHARS (start) == SBYTES (start))
return Qt;
- p1 = SDATA (start), p1end = p1 + SBYTES (start);
- p2 = p2end = p1end;
- if (SCHARS (start) != SBYTES (start))
- non_ascii_p = 1;
+ start_byte = 0;
+ end_byte = SBYTES (start);
}
else
{
- int from, to, stop;
-
CHECK_NUMBER_COERCE_MARKER (start);
CHECK_NUMBER_COERCE_MARKER (end);
if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
args_out_of_range (start, end);
if (NILP (current_buffer->enable_multibyte_characters))
return Qt;
- from = CHAR_TO_BYTE (XINT (start));
- to = CHAR_TO_BYTE (XINT (end));
- stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
- p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
- if (stop == to)
- p2 = p2end = p1end;
- else
- p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
- if (XINT (end) - XINT (start) != to - from)
- non_ascii_p = 1;
- }
+ start_byte = CHAR_TO_BYTE (XINT (start));
+ end_byte = CHAR_TO_BYTE (XINT (end));
+ if (XINT (end) - XINT (start) == end_byte - start_byte)
+ return Qt;
- if (!non_ascii_p)
- {
- /* We are sure that the text contains no multibyte character.
- Check if it contains eight-bit-graphic. */
- p = p1;
- for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
- if (p == p1end)
+ if (XINT (start) < GPT && XINT (end) > GPT)
{
- for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
- if (p == p2end)
- return Qt;
+ if ((GPT - XINT (start)) < (XINT (end) - GPT))
+ move_gap_both (XINT (start), start_byte);
+ else
+ move_gap_both (XINT (end), end_byte);
}
}
- /* The text contains non-ASCII characters. */
+ coding_attrs_list = Qnil;
+ for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
+ if (NILP (exclude)
+ || NILP (Fmemq (XCAR (tail), exclude)))
+ {
+ Lisp_Object attrs;
- work_table = Fmake_char_table (Qchar_coding_system, Qnil);
- safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
+ attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
+ if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
+ && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
+ coding_attrs_list = Fcons (attrs, coding_attrs_list);
+ }
- safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
- &single_byte_char_found);
- if (p2 < p2end)
- safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
- &single_byte_char_found);
- if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
- safe_codings = Qt;
+ if (STRINGP (start))
+ p = pbeg = SDATA (start);
else
- {
- /* Turn safe_codings to a list of coding systems... */
- Lisp_Object val;
+ p = pbeg = BYTE_POS_ADDR (start_byte);
+ pend = p + (end_byte - start_byte);
- if (single_byte_char_found)
- /* ... and append these for eight-bit chars. */
- val = Fcons (Qraw_text,
- Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
- else
- /* ... and append generic coding systems. */
- val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
+ while (p < pend && ASCII_BYTE_P (*p)) p++;
+ while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
- for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
- val = Fcons (XCAR (XCAR (safe_codings)), val);
- safe_codings = val;
- }
-
- return safe_codings;
-}
-
-
-/* Search from position POS for such characters that are unencodable
- accoding to SAFE_CHARS, and return a list of their positions. P
- points where in the memory the character at POS exists. Limit the
- search at PEND or when Nth unencodable characters are found.
-
- If SAFE_CHARS is a char table, an element for an unencodable
- character is nil.
-
- If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
-
- Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
- eight-bit-graphic characters are unencodable. */
-
-static Lisp_Object
-unencodable_char_position (safe_chars, pos, p, pend, n)
- Lisp_Object safe_chars;
- int pos;
- unsigned char *p, *pend;
- int n;
-{
- Lisp_Object pos_list;
-
- pos_list = Qnil;
while (p < pend)
{
- int len;
- int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
-
- if (c >= 128
- && (CHAR_TABLE_P (safe_chars)
- ? NILP (CHAR_TABLE_REF (safe_chars, c))
- : (NILP (safe_chars) || c < 256)))
+ if (ASCII_BYTE_P (*p))
+ p++;
+ else
{
- pos_list = Fcons (make_number (pos), pos_list);
- if (--n <= 0)
- break;
+ c = STRING_CHAR_ADVANCE (p);
+
+ charset_map_loaded = 0;
+ for (tail = coding_attrs_list; CONSP (tail);)
+ {
+ elt = XCAR (tail);
+ if (NILP (elt))
+ tail = XCDR (tail);
+ else if (char_encodable_p (c, elt))
+ tail = XCDR (tail);
+ else if (CONSP (XCDR (tail)))
+ {
+ XSETCAR (tail, XCAR (XCDR (tail)));
+ XSETCDR (tail, XCDR (XCDR (tail)));
+ }
+ else
+ {
+ XSETCAR (tail, Qnil);
+ tail = XCDR (tail);
+ }
+ }
+ if (charset_map_loaded)
+ {
+ EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
+
+ if (STRINGP (start))
+ pbeg = SDATA (start);
+ else
+ pbeg = BYTE_POS_ADDR (start_byte);
+ p = pbeg + p_offset;
+ pend = pbeg + pend_offset;
+ }
}
- pos++;
- p += len;
}
- return Fnreverse (pos_list);
+
+ safe_codings = Qnil;
+ for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
+ if (! NILP (XCAR (tail)))
+ safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
+
+ return safe_codings;
}
@@ -6797,24 +7152,35 @@ to the string. */)
Lisp_Object start, end, coding_system, count, string;
{
int n;
- Lisp_Object safe_chars;
struct coding_system coding;
+ Lisp_Object attrs, charset_list;
Lisp_Object positions;
int from, to;
- unsigned char *p, *pend;
+ const unsigned char *p, *stop, *pend;
+ int ascii_compatible;
+
+ setup_coding_system (Fcheck_coding_system (coding_system), &coding);
+ attrs = CODING_ID_ATTRS (coding.id);
+ if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
+ return Qnil;
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
if (NILP (string))
{
validate_region (&start, &end);
from = XINT (start);
to = XINT (end);
- if (NILP (current_buffer->enable_multibyte_characters))
+ if (NILP (current_buffer->enable_multibyte_characters)
+ || (ascii_compatible
+ && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
return Qnil;
p = CHAR_POS_ADDR (from);
- if (to == GPT)
- pend = GPT_ADDR;
+ pend = CHAR_POS_ADDR (to);
+ if (from < GPT && to >= GPT)
+ stop = GPT_ADDR;
else
- pend = CHAR_POS_ADDR (to);
+ stop = pend;
}
else
{
@@ -6829,11 +7195,11 @@ to the string. */)
if (! STRING_MULTIBYTE (string))
return Qnil;
p = SDATA (string) + string_char_to_byte (string, from);
- pend = SDATA (string) + string_char_to_byte (string, to);
+ stop = pend = SDATA (string) + string_char_to_byte (string, to);
+ if (ascii_compatible && (to - from) == (pend - p))
+ return Qnil;
}
- setup_coding_system (Fcheck_coding_system (coding_system), &coding);
-
if (NILP (count))
n = 1;
else
@@ -6842,151 +7208,299 @@ to the string. */)
n = XINT (count);
}
- if (coding.type == coding_type_no_conversion
- || coding.type == coding_type_raw_text)
- return Qnil;
+ positions = Qnil;
+ while (1)
+ {
+ int c;
+
+ if (ascii_compatible)
+ while (p < stop && ASCII_BYTE_P (*p))
+ p++, from++;
+ if (p >= stop)
+ {
+ if (p >= pend)
+ break;
+ stop = pend;
+ p = GAP_END_ADDR;
+ }
+
+ c = STRING_CHAR_ADVANCE (p);
+ if (! (ASCII_CHAR_P (c) && ascii_compatible)
+ && ! char_charset (c, charset_list, NULL))
+ {
+ positions = Fcons (make_number (from), positions);
+ n--;
+ if (n == 0)
+ break;
+ }
+
+ from++;
+ }
+
+ return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
+}
- if (coding.type == coding_type_undecided)
- safe_chars = Qnil;
- else
- safe_chars = coding_safe_chars (coding_system);
- if (STRINGP (string)
- || from >= GPT || to <= GPT)
- positions = unencodable_char_position (safe_chars, from, p, pend, n);
+DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
+ Scheck_coding_systems_region, 3, 3, 0,
+ doc: /* Check if the region is encodable by coding systems.
+
+START and END are buffer positions specifying the region.
+CODING-SYSTEM-LIST is a list of coding systems to check.
+
+The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
+CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
+whole region, POS0, POS1, ... are buffer positions where non-encodable
+characters are found.
+
+If all coding systems in CODING-SYSTEM-LIST can encode the region, the
+value is nil.
+
+START may be a string. In that case, check if the string is
+encodable, and the value contains indices to the string instead of
+buffer positions. END is ignored. */)
+ (start, end, coding_system_list)
+ Lisp_Object start, end, coding_system_list;
+{
+ Lisp_Object list;
+ EMACS_INT start_byte, end_byte;
+ int pos;
+ const unsigned char *p, *pbeg, *pend;
+ int c;
+ Lisp_Object tail, elt;
+
+ if (STRINGP (start))
+ {
+ if (!STRING_MULTIBYTE (start)
+ && SCHARS (start) != SBYTES (start))
+ return Qnil;
+ start_byte = 0;
+ end_byte = SBYTES (start);
+ pos = 0;
+ }
else
{
- Lisp_Object args[2];
+ CHECK_NUMBER_COERCE_MARKER (start);
+ CHECK_NUMBER_COERCE_MARKER (end);
+ if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
+ args_out_of_range (start, end);
+ if (NILP (current_buffer->enable_multibyte_characters))
+ return Qnil;
+ start_byte = CHAR_TO_BYTE (XINT (start));
+ end_byte = CHAR_TO_BYTE (XINT (end));
+ if (XINT (end) - XINT (start) == end_byte - start_byte)
+ return Qt;
+
+ if (XINT (start) < GPT && XINT (end) > GPT)
+ {
+ if ((GPT - XINT (start)) < (XINT (end) - GPT))
+ move_gap_both (XINT (start), start_byte);
+ else
+ move_gap_both (XINT (end), end_byte);
+ }
+ pos = XINT (start);
+ }
+
+ list = Qnil;
+ for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
+ {
+ elt = XCAR (tail);
+ list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
+ Qnil)),
+ list);
+ }
+
+ if (STRINGP (start))
+ p = pbeg = SDATA (start);
+ else
+ p = pbeg = BYTE_POS_ADDR (start_byte);
+ pend = p + (end_byte - start_byte);
+
+ while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
+ while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
- args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
- n -= XINT (Flength (args[0]));
- if (n <= 0)
- positions = args[0];
+ while (p < pend)
+ {
+ if (ASCII_BYTE_P (*p))
+ p++;
else
{
- args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
- pend, n);
- positions = Fappend (2, args);
+ c = STRING_CHAR_ADVANCE (p);
+
+ charset_map_loaded = 0;
+ for (tail = list; CONSP (tail); tail = XCDR (tail))
+ {
+ elt = XCDR (XCAR (tail));
+ if (! char_encodable_p (c, XCAR (elt)))
+ XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
+ }
+ if (charset_map_loaded)
+ {
+ EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
+
+ if (STRINGP (start))
+ pbeg = SDATA (start);
+ else
+ pbeg = BYTE_POS_ADDR (start_byte);
+ p = pbeg + p_offset;
+ pend = pbeg + pend_offset;
+ }
}
+ pos++;
}
- return (NILP (count) ? Fcar (positions) : positions);
+ tail = list;
+ list = Qnil;
+ for (; CONSP (tail); tail = XCDR (tail))
+ {
+ elt = XCAR (tail);
+ if (CONSP (XCDR (XCDR (elt))))
+ list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
+ list);
+ }
+
+ return list;
}
+
Lisp_Object
-code_convert_region1 (start, end, coding_system, encodep)
- Lisp_Object start, end, coding_system;
- int encodep;
+code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
+ Lisp_Object start, end, coding_system, dst_object;
+ int encodep, norecord;
{
struct coding_system coding;
- int from, to;
+ EMACS_INT from, from_byte, to, to_byte;
+ Lisp_Object src_object;
CHECK_NUMBER_COERCE_MARKER (start);
CHECK_NUMBER_COERCE_MARKER (end);
- CHECK_SYMBOL (coding_system);
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ else
+ CHECK_CODING_SYSTEM (coding_system);
+ src_object = Fcurrent_buffer ();
+ if (NILP (dst_object))
+ dst_object = src_object;
+ else if (! EQ (dst_object, Qt))
+ CHECK_BUFFER (dst_object);
validate_region (&start, &end);
from = XFASTINT (start);
+ from_byte = CHAR_TO_BYTE (from);
to = XFASTINT (end);
+ to_byte = CHAR_TO_BYTE (to);
- if (NILP (coding_system))
- return make_number (to - from);
+ setup_coding_system (coding_system, &coding);
+ coding.mode |= CODING_MODE_LAST_BLOCK;
- if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
+ if (encodep)
+ encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
+ dst_object);
+ else
+ decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
+ dst_object);
+ if (! norecord)
+ Vlast_coding_system_used = CODING_ID_NAME (coding.id);
- coding.mode |= CODING_MODE_LAST_BLOCK;
- coding.src_multibyte = coding.dst_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
- code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
- &coding, encodep, 1);
- Vlast_coding_system_used = coding.symbol;
- return make_number (coding.produced_char);
+ if (coding.result != CODING_RESULT_SUCCESS)
+ error ("Code conversion error: %d", coding.result);
+
+ return (BUFFERP (dst_object)
+ ? make_number (coding.produced_char)
+ : coding.dst_object);
}
+
DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
- 3, 3, "r\nzCoding system: ",
+ 3, 4, "r\nzCoding system: ",
doc: /* Decode the current region from the specified coding system.
-When called from a program, takes three arguments:
-START, END, and CODING-SYSTEM. START and END are buffer positions.
+When called from a program, takes four arguments:
+ START, END, CODING-SYSTEM, and DESTINATION.
+START and END are buffer positions.
+
+Optional 4th arguments DESTINATION specifies where the decoded text goes.
+If nil, the region between START and END is replace by the decoded text.
+If buffer, the decoded text is inserted in the buffer.
+If t, the decoded text is returned.
+
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
not fully specified.)
It returns the length of the decoded text. */)
- (start, end, coding_system)
- Lisp_Object start, end, coding_system;
+ (start, end, coding_system, destination)
+ Lisp_Object start, end, coding_system, destination;
{
- return code_convert_region1 (start, end, coding_system, 0);
+ return code_convert_region (start, end, coding_system, destination, 0, 0);
}
DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
- 3, 3, "r\nzCoding system: ",
- doc: /* Encode the current region into the specified coding system.
+ 3, 4, "r\nzCoding system: ",
+ doc: /* Encode the current region by specified coding system.
When called from a program, takes three arguments:
START, END, and CODING-SYSTEM. START and END are buffer positions.
+
+Optional 4th arguments DESTINATION specifies where the encoded text goes.
+If nil, the region between START and END is replace by the encoded text.
+If buffer, the encoded text is inserted in the buffer.
+If t, the encoded text is returned.
+
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
not fully specified.)
It returns the length of the encoded text. */)
- (start, end, coding_system)
- Lisp_Object start, end, coding_system;
+ (start, end, coding_system, destination)
+ Lisp_Object start, end, coding_system, destination;
{
- return code_convert_region1 (start, end, coding_system, 1);
+ return code_convert_region (start, end, coding_system, destination, 1, 0);
}
Lisp_Object
-code_convert_string1 (string, coding_system, nocopy, encodep)
- Lisp_Object string, coding_system, nocopy;
- int encodep;
+code_convert_string (string, coding_system, dst_object,
+ encodep, nocopy, norecord)
+ Lisp_Object string, coding_system, dst_object;
+ int encodep, nocopy, norecord;
{
struct coding_system coding;
+ EMACS_INT chars, bytes;
CHECK_STRING (string);
- CHECK_SYMBOL (coding_system);
-
if (NILP (coding_system))
- return (NILP (nocopy) ? Fcopy_sequence (string) : string);
+ {
+ if (! norecord)
+ Vlast_coding_system_used = Qno_conversion;
+ if (NILP (dst_object))
+ return (nocopy ? Fcopy_sequence (string) : string);
+ }
- if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ else
+ CHECK_CODING_SYSTEM (coding_system);
+ if (NILP (dst_object))
+ dst_object = Qt;
+ else if (! EQ (dst_object, Qt))
+ CHECK_BUFFER (dst_object);
+ setup_coding_system (coding_system, &coding);
coding.mode |= CODING_MODE_LAST_BLOCK;
- string = (encodep
- ? encode_coding_string (string, &coding, !NILP (nocopy))
- : decode_coding_string (string, &coding, !NILP (nocopy)));
- Vlast_coding_system_used = coding.symbol;
+ chars = SCHARS (string);
+ bytes = SBYTES (string);
+ if (encodep)
+ encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
+ else
+ decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
+ if (! norecord)
+ Vlast_coding_system_used = CODING_ID_NAME (coding.id);
- return string;
-}
+ if (coding.result != CODING_RESULT_SUCCESS)
+ error ("Code conversion error: %d", coding.result);
-DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
- 2, 3, 0,
- doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
-Optional arg NOCOPY non-nil means it is OK to return STRING itself
-if the decoding operation is trivial.
-This function sets `last-coding-system-used' to the precise coding system
-used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.) */)
- (string, coding_system, nocopy)
- Lisp_Object string, coding_system, nocopy;
-{
- return code_convert_string1 (string, coding_system, nocopy, 0);
+ return (BUFFERP (dst_object)
+ ? make_number (coding.produced_char)
+ : coding.dst_object);
}
-DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
- 2, 3, 0,
- doc: /* Encode STRING to CODING-SYSTEM, and return the result.
-Optional arg NOCOPY non-nil means it is OK to return STRING itself
-if the encoding operation is trivial.
-This function sets `last-coding-system-used' to the precise coding system
-used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.) */)
- (string, coding_system, nocopy)
- Lisp_Object string, coding_system, nocopy;
-{
- return code_convert_string1 (string, coding_system, nocopy, 1);
-}
/* Encode or decode STRING according to CODING_SYSTEM.
Do not set Vlast_coding_system_used.
@@ -6999,23 +7513,52 @@ code_convert_string_norecord (string, coding_system, encodep)
Lisp_Object string, coding_system;
int encodep;
{
- struct coding_system coding;
+ return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
+}
- CHECK_STRING (string);
- CHECK_SYMBOL (coding_system);
- if (NILP (coding_system))
- return string;
+DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
+ 2, 4, 0,
+ doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
- if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
+Optional third arg NOCOPY non-nil means it is OK to return STRING itself
+if the decoding operation is trivial.
- coding.composing = COMPOSITION_DISABLED;
- coding.mode |= CODING_MODE_LAST_BLOCK;
- return (encodep
- ? encode_coding_string (string, &coding, 1)
- : decode_coding_string (string, &coding, 1));
+Optional fourth arg BUFFER non-nil meant that the decoded text is
+inserted in BUFFER instead of returned as a string. In this case,
+the return value is BUFFER.
+
+This function sets `last-coding-system-used' to the precise coding system
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
+not fully specified. */)
+ (string, coding_system, nocopy, buffer)
+ Lisp_Object string, coding_system, nocopy, buffer;
+{
+ return code_convert_string (string, coding_system, buffer,
+ 0, ! NILP (nocopy), 0);
}
+
+DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
+ 2, 4, 0,
+ doc: /* Encode STRING to CODING-SYSTEM, and return the result.
+
+Optional third arg NOCOPY non-nil means it is OK to return STRING
+itself if the encoding operation is trivial.
+
+Optional fourth arg BUFFER non-nil meant that the encoded text is
+inserted in BUFFER instead of returned as a string. In this case,
+the return value is BUFFER.
+
+This function sets `last-coding-system-used' to the precise coding system
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
+not fully specified.) */)
+ (string, coding_system, nocopy, buffer)
+ Lisp_Object string, coding_system, nocopy, buffer;
+{
+ return code_convert_string (string, coding_system, buffer,
+ 1, ! NILP (nocopy), 1);
+}
+
DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
@@ -7023,60 +7566,75 @@ Return the corresponding character. */)
(code)
Lisp_Object code;
{
- unsigned char c1, c2, s1, s2;
- Lisp_Object val;
+ Lisp_Object spec, attrs, val;
+ struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
+ int c;
+
+ CHECK_NATNUM (code);
+ c = XFASTINT (code);
+ CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
+ attrs = AREF (spec, 0);
+
+ if (ASCII_BYTE_P (c)
+ && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ return code;
- CHECK_NUMBER (code);
- s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
- if (s1 == 0)
+ val = CODING_ATTR_CHARSET_LIST (attrs);
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
+
+ if (c <= 0x7F)
+ charset = charset_roman;
+ else if (c >= 0xA0 && c < 0xDF)
{
- if (s2 < 0x80)
- XSETFASTINT (val, s2);
- else if (s2 >= 0xA0 || s2 <= 0xDF)
- XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
- else
- error ("Invalid Shift JIS code: %x", XFASTINT (code));
+ charset = charset_kana;
+ c -= 0x80;
}
else
{
- if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
- || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
- error ("Invalid Shift JIS code: %x", XFASTINT (code));
- DECODE_SJIS (s1, s2, c1, c2);
- XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
+ int s1 = c >> 8, s2 = c & 0xFF;
+
+ if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
+ || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
+ error ("Invalid code: %d", code);
+ SJIS_TO_JIS (c);
+ charset = charset_kanji;
}
- return val;
+ c = DECODE_CHAR (charset, c);
+ if (c < 0)
+ error ("Invalid code: %d", code);
+ return make_number (c);
}
+
DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
doc: /* Encode a Japanese character CHAR to shift_jis encoding.
Return the corresponding code in SJIS. */)
(ch)
- Lisp_Object ch;
+ Lisp_Object ch;
{
- int charset, c1, c2, s1, s2;
- Lisp_Object val;
+ Lisp_Object spec, attrs, charset_list;
+ int c;
+ struct charset *charset;
+ unsigned code;
- CHECK_NUMBER (ch);
- SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
- if (charset == CHARSET_ASCII)
- {
- val = ch;
- }
- else if (charset == charset_jisx0208
- && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
- {
- ENCODE_SJIS (c1, c2, s1, s2);
- XSETFASTINT (val, (s1 << 8) | s2);
- }
- else if (charset == charset_katakana_jisx0201
- && c1 > 0x20 && c2 < 0xE0)
- {
- XSETFASTINT (val, c1 | 0x80);
- }
- else
- error ("Can't encode to shift_jis: %d", XFASTINT (ch));
- return val;
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
+ attrs = AREF (spec, 0);
+
+ if (ASCII_CHAR_P (c)
+ && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ return ch;
+
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ charset = char_charset (c, charset_list, &code);
+ if (code == CHARSET_INVALID_CODE (charset))
+ error ("Can't encode by shift_jis encoding: %d", c);
+ JIS_TO_SJIS (code);
+
+ return make_number (code);
}
DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
@@ -7085,27 +7643,37 @@ Return the corresponding character. */)
(code)
Lisp_Object code;
{
- int charset;
- unsigned char b1, b2, c1, c2;
- Lisp_Object val;
+ Lisp_Object spec, attrs, val;
+ struct charset *charset_roman, *charset_big5, *charset;
+ int c;
- CHECK_NUMBER (code);
- b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
- if (b1 == 0)
- {
- if (b2 >= 0x80)
- error ("Invalid BIG5 code: %x", XFASTINT (code));
- val = code;
- }
+ CHECK_NATNUM (code);
+ c = XFASTINT (code);
+ CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
+ attrs = AREF (spec, 0);
+
+ if (ASCII_BYTE_P (c)
+ && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ return code;
+
+ val = CODING_ATTR_CHARSET_LIST (attrs);
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
+
+ if (c <= 0x7F)
+ charset = charset_roman;
else
{
- if ((b1 < 0xA1 || b1 > 0xFE)
- || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
- error ("Invalid BIG5 code: %x", XFASTINT (code));
- DECODE_BIG5 (b1, b2, charset, c1, c2);
- XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
+ int b1 = c >> 8, b2 = c & 0x7F;
+ if (b1 < 0xA1 || b1 > 0xFE
+ || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
+ error ("Invalid code: %d", code);
+ charset = charset_big5;
}
- return val;
+ c = DECODE_CHAR (charset, (unsigned )c);
+ if (c < 0)
+ error ("Invalid code: %d", code);
+ return make_number (c);
}
DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
@@ -7114,48 +7682,50 @@ Return the corresponding character code in Big5. */)
(ch)
Lisp_Object ch;
{
- int charset, c1, c2, b1, b2;
- Lisp_Object val;
-
- CHECK_NUMBER (ch);
- SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
- if (charset == CHARSET_ASCII)
- {
- val = ch;
- }
- else if ((charset == charset_big5_1
- && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
- || (charset == charset_big5_2
- && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
- {
- ENCODE_BIG5 (charset, c1, c2, b1, b2);
- XSETFASTINT (val, (b1 << 8) | b2);
- }
- else
- error ("Can't encode to Big5: %d", XFASTINT (ch));
- return val;
+ Lisp_Object spec, attrs, charset_list;
+ struct charset *charset;
+ int c;
+ unsigned code;
+
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
+ attrs = AREF (spec, 0);
+ if (ASCII_CHAR_P (c)
+ && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ return ch;
+
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ charset = char_charset (c, charset_list, &code);
+ if (code == CHARSET_INVALID_CODE (charset))
+ error ("Can't encode by Big5 encoding: %d", c);
+
+ return make_number (code);
}
+
-DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
+DEFUN ("set-terminal-coding-system-internal",
+ Fset_terminal_coding_system_internal,
Sset_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
Lisp_Object coding_system;
{
CHECK_SYMBOL (coding_system);
- setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
+ setup_coding_system (Fcheck_coding_system (coding_system),
+ &terminal_coding);
+
/* We had better not send unsafe characters to terminal. */
- terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
- /* Character composition should be disabled. */
- terminal_coding.composing = COMPOSITION_DISABLED;
- /* Error notification should be suppressed. */
- terminal_coding.suppress_error = 1;
+ terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
+ /* Characer composition should be disabled. */
+ terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
terminal_coding.src_multibyte = 1;
terminal_coding.dst_multibyte = 0;
return Qnil;
}
-DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
+DEFUN ("set-safe-terminal-coding-system-internal",
+ Fset_safe_terminal_coding_system_internal,
Sset_safe_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
@@ -7164,42 +7734,42 @@ DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_sys
CHECK_SYMBOL (coding_system);
setup_coding_system (Fcheck_coding_system (coding_system),
&safe_terminal_coding);
- /* Character composition should be disabled. */
- safe_terminal_coding.composing = COMPOSITION_DISABLED;
- /* Error notification should be suppressed. */
- terminal_coding.suppress_error = 1;
+ /* Characer composition should be disabled. */
+ safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
safe_terminal_coding.src_multibyte = 1;
safe_terminal_coding.dst_multibyte = 0;
return Qnil;
}
-DEFUN ("terminal-coding-system", Fterminal_coding_system,
- Sterminal_coding_system, 0, 0, 0,
+DEFUN ("terminal-coding-system",
+ Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
doc: /* Return coding system specified for terminal output. */)
()
{
- return terminal_coding.symbol;
+ return CODING_ID_NAME (terminal_coding.id);
}
-DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
+DEFUN ("set-keyboard-coding-system-internal",
+ Fset_keyboard_coding_system_internal,
Sset_keyboard_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
Lisp_Object coding_system;
{
CHECK_SYMBOL (coding_system);
- setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
- /* Character composition should be disabled. */
- keyboard_coding.composing = COMPOSITION_DISABLED;
+ setup_coding_system (Fcheck_coding_system (coding_system),
+ &keyboard_coding);
+ /* Characer composition should be disabled. */
+ keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
return Qnil;
}
-DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
- Skeyboard_coding_system, 0, 0, 0,
+DEFUN ("keyboard-coding-system",
+ Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
doc: /* Return coding system specified for decoding keyboard input. */)
()
{
- return keyboard_coding.symbol;
+ return CODING_ID_NAME (keyboard_coding.id);
}
@@ -7247,21 +7817,14 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
operation = args[0];
if (!SYMBOLP (operation)
|| !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
- error ("Invalid first argument");
+ error ("Invalid first arguement");
if (nargs < 1 + XINT (target_idx))
error ("Too few arguments for operation: %s",
SDATA (SYMBOL_NAME (operation)));
- /* For write-region, if the 6th argument (i.e. VISIT, the 5th
- argument to write-region) is string, it must be treated as a
- target file name. */
- if (EQ (operation, Qwrite_region)
- && nargs > 5
- && STRINGP (args[5]))
- target_idx = make_number (4);
target = args[XINT (target_idx) + 1];
if (!(STRINGP (target)
|| (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
- error ("Invalid argument %d", XINT (target_idx) + 1);
+ error ("Invalid %dth argument", XINT (target_idx) + 1);
chain = ((EQ (operation, Qinsert_file_contents)
|| EQ (operation, Qwrite_region))
@@ -7275,8 +7838,8 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
for (; CONSP (chain); chain = XCDR (chain))
{
Lisp_Object elt;
- elt = XCAR (chain);
+ elt = XCAR (chain);
if (CONSP (elt)
&& ((STRINGP (target)
&& STRINGP (XCAR (elt))
@@ -7306,99 +7869,734 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
return Qnil;
}
-DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal,
- Supdate_coding_systems_internal, 0, 0, 0,
- doc: /* Update internal database for ISO2022 and CCL based coding systems.
-When values of any coding categories are changed, you must
-call this function. */)
- ()
+DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
+ Sset_coding_system_priority, 0, MANY, 0,
+ doc: /* Assign higher priority to the coding systems given as arguments.
+If multiple coding systems belongs to the same category,
+all but the first one are ignored. */)
+ (nargs, args)
+ int nargs;
+ Lisp_Object *args;
{
- int i;
+ int i, j;
+ int changed[coding_category_max];
+ enum coding_category priorities[coding_category_max];
+
+ bzero (changed, sizeof changed);
- for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
+ for (i = j = 0; i < nargs; i++)
{
- Lisp_Object val;
+ enum coding_category category;
+ Lisp_Object spec, attrs;
- val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
- if (!NILP (val))
- {
- if (! coding_system_table[i])
- coding_system_table[i] = ((struct coding_system *)
- xmalloc (sizeof (struct coding_system)));
- setup_coding_system (val, coding_system_table[i]);
- }
- else if (coding_system_table[i])
- {
- xfree (coding_system_table[i]);
- coding_system_table[i] = NULL;
- }
+ CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
+ attrs = AREF (spec, 0);
+ category = XINT (CODING_ATTR_CATEGORY (attrs));
+ if (changed[category])
+ /* Ignore this coding system because a coding system of the
+ same category already had a higher priority. */
+ continue;
+ changed[category] = 1;
+ priorities[j++] = category;
+ if (coding_categories[category].id >= 0
+ && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
+ setup_coding_system (args[i], &coding_categories[category]);
+ Fset (AREF (Vcoding_category_table, category), args[i]);
}
+ /* Now we have decided top J priorities. Reflect the order of the
+ original priorities to the remaining priorities. */
+
+ for (i = j, j = 0; i < coding_category_max; i++, j++)
+ {
+ while (j < coding_category_max
+ && changed[coding_priorities[j]])
+ j++;
+ if (j == coding_category_max)
+ abort ();
+ priorities[i] = coding_priorities[j];
+ }
+
+ bcopy (priorities, coding_priorities, sizeof priorities);
+
+ /* Update `coding-category-list'. */
+ Vcoding_category_list = Qnil;
+ for (i = coding_category_max - 1; i >= 0; i--)
+ Vcoding_category_list
+ = Fcons (AREF (Vcoding_category_table, priorities[i]),
+ Vcoding_category_list);
+
return Qnil;
}
-DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
- Sset_coding_priority_internal, 0, 0, 0,
- doc: /* Update internal database for the current value of `coding-category-list'.
-This function is internal use only. */)
- ()
+DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
+ Scoding_system_priority_list, 0, 1, 0,
+ doc: /* Return a list of coding systems ordered by their priorities.
+HIGHESTP non-nil means just return the highest priority one. */)
+ (highestp)
+ Lisp_Object highestp;
{
- int i = 0, idx;
+ int i;
Lisp_Object val;
- val = Vcoding_category_list;
-
- while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
+ for (i = 0, val = Qnil; i < coding_category_max; i++)
{
- if (! SYMBOLP (XCAR (val)))
- break;
- idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
- if (idx >= CODING_CATEGORY_IDX_MAX)
- break;
- coding_priorities[i++] = (1 << idx);
- val = XCDR (val);
+ enum coding_category category = coding_priorities[i];
+ int id = coding_categories[category].id;
+ Lisp_Object attrs;
+
+ if (id < 0)
+ continue;
+ attrs = CODING_ID_ATTRS (id);
+ if (! NILP (highestp))
+ return CODING_ATTR_BASE_NAME (attrs);
+ val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
}
- /* If coding-category-list is valid and contains all coding
- categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
- the following code saves Emacs from crashing. */
- while (i < CODING_CATEGORY_IDX_MAX)
- coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
+ return Fnreverse (val);
+}
- return Qnil;
+static char *suffixes[] = { "-unix", "-dos", "-mac" };
+
+static Lisp_Object
+make_subsidiaries (base)
+ Lisp_Object base;
+{
+ Lisp_Object subsidiaries;
+ int base_name_len = SBYTES (SYMBOL_NAME (base));
+ char *buf = (char *) alloca (base_name_len + 6);
+ int i;
+
+ bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
+ subsidiaries = Fmake_vector (make_number (3), Qnil);
+ for (i = 0; i < 3; i++)
+ {
+ bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
+ ASET (subsidiaries, i, intern (buf));
+ }
+ return subsidiaries;
}
+
DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
- Sdefine_coding_system_internal, 1, 1, 0,
- doc: /* Register CODING-SYSTEM as a base coding system.
-This function is internal use only. */)
- (coding_system)
- Lisp_Object coding_system;
+ Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
+ doc: /* For internal use only.
+usage: (define-coding-system-internal ...) */)
+ (nargs, args)
+ int nargs;
+ Lisp_Object *args;
{
- Lisp_Object safe_chars, slot;
+ Lisp_Object name;
+ Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
+ Lisp_Object attrs; /* Vector of attributes. */
+ Lisp_Object eol_type;
+ Lisp_Object aliases;
+ Lisp_Object coding_type, charset_list, safe_charsets;
+ enum coding_category category;
+ Lisp_Object tail, val;
+ int max_charset_id = 0;
+ int i;
- if (NILP (Fcheck_coding_system (coding_system)))
- Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
- safe_chars = coding_safe_chars (coding_system);
- if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
- error ("No valid safe-chars property for %s",
- SDATA (SYMBOL_NAME (coding_system)));
- if (EQ (safe_chars, Qt))
+ if (nargs < coding_arg_max)
+ goto short_args;
+
+ attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
+
+ name = args[coding_arg_name];
+ CHECK_SYMBOL (name);
+ CODING_ATTR_BASE_NAME (attrs) = name;
+
+ val = args[coding_arg_mnemonic];
+ if (! STRINGP (val))
+ CHECK_CHARACTER (val);
+ CODING_ATTR_MNEMONIC (attrs) = val;
+
+ coding_type = args[coding_arg_coding_type];
+ CHECK_SYMBOL (coding_type);
+ CODING_ATTR_TYPE (attrs) = coding_type;
+
+ charset_list = args[coding_arg_charset_list];
+ if (SYMBOLP (charset_list))
{
- if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
- XSETCAR (Vcoding_system_safe_chars,
- Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
+ if (EQ (charset_list, Qiso_2022))
+ {
+ if (! EQ (coding_type, Qiso_2022))
+ error ("Invalid charset-list");
+ charset_list = Viso_2022_charset_list;
+ }
+ else if (EQ (charset_list, Qemacs_mule))
+ {
+ if (! EQ (coding_type, Qemacs_mule))
+ error ("Invalid charset-list");
+ charset_list = Vemacs_mule_charset_list;
+ }
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ if (max_charset_id < XFASTINT (XCAR (tail)))
+ max_charset_id = XFASTINT (XCAR (tail));
}
else
{
- slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
- if (NILP (slot))
- XSETCDR (Vcoding_system_safe_chars,
- nconc2 (XCDR (Vcoding_system_safe_chars),
- Fcons (Fcons (coding_system, safe_chars), Qnil)));
+ charset_list = Fcopy_sequence (charset_list);
+ for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
+ {
+ struct charset *charset;
+
+ val = Fcar (tail);
+ CHECK_CHARSET_GET_CHARSET (val, charset);
+ if (EQ (coding_type, Qiso_2022)
+ ? CHARSET_ISO_FINAL (charset) < 0
+ : EQ (coding_type, Qemacs_mule)
+ ? CHARSET_EMACS_MULE_ID (charset) < 0
+ : 0)
+ error ("Can't handle charset `%s'",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+
+ XSETCAR (tail, make_number (charset->id));
+ if (max_charset_id < charset->id)
+ max_charset_id = charset->id;
+ }
+ }
+ CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
+
+ safe_charsets = Fmake_string (make_number (max_charset_id + 1),
+ make_number (255));
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
+ CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
+
+ CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
+
+ val = args[coding_arg_decode_translation_table];
+ if (! NILP (val))
+ CHECK_CHAR_TABLE (val);
+ CODING_ATTR_DECODE_TBL (attrs) = val;
+
+ val = args[coding_arg_encode_translation_table];
+ if (! NILP (val))
+ CHECK_CHAR_TABLE (val);
+ CODING_ATTR_ENCODE_TBL (attrs) = val;
+
+ val = args[coding_arg_post_read_conversion];
+ CHECK_SYMBOL (val);
+ CODING_ATTR_POST_READ (attrs) = val;
+
+ val = args[coding_arg_pre_write_conversion];
+ CHECK_SYMBOL (val);
+ CODING_ATTR_PRE_WRITE (attrs) = val;
+
+ val = args[coding_arg_default_char];
+ if (NILP (val))
+ CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
+ else
+ {
+ CHECK_CHARACTER (val);
+ CODING_ATTR_DEFAULT_CHAR (attrs) = val;
+ }
+
+ val = args[coding_arg_for_unibyte];
+ CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
+
+ val = args[coding_arg_plist];
+ CHECK_LIST (val);
+ CODING_ATTR_PLIST (attrs) = val;
+
+ if (EQ (coding_type, Qcharset))
+ {
+ Lisp_Object list;
+ /* Generate a lisp vector of 256 elements. Each element is nil,
+ integer, or a list of charset IDs.
+
+ If Nth element is nil, the byte code N is invalid in this
+ coding system.
+
+ If Nth element is a number NUM, N is the first byte of a
+ charset whose ID is NUM.
+
+ If Nth element is a list of charset IDs, N is the first byte
+ of one of them. The list is sorted by dimensions of the
+ charsets. A charset of smaller dimension comes firtst.
+ */
+ for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ {
+ struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
+
+ if (charset->method == CHARSET_METHOD_SUPERSET)
+ {
+ val = CHARSET_SUPERSET (charset);
+ for (; CONSP (val); val = XCDR (val))
+ list = Fcons (XCAR (XCAR (val)), list);
+ }
+ else
+ list = Fcons (XCAR (tail), list);
+ }
+
+ val = Fmake_vector (make_number (256), Qnil);
+
+ for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail))
+ {
+ struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
+ int dim = CHARSET_DIMENSION (charset);
+ int idx = (dim - 1) * 4;
+
+ if (CHARSET_ASCII_COMPATIBLE_P (charset))
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+
+ for (i = charset->code_space[idx];
+ i <= charset->code_space[idx + 1]; i++)
+ {
+ Lisp_Object tmp, tmp2;
+ int dim2;
+
+ tmp = AREF (val, i);
+ if (NILP (tmp))
+ tmp = XCAR (tail);
+ else if (NUMBERP (tmp))
+ {
+ dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
+ if (dim < dim2)
+ tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
+ else
+ tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
+ }
+ else
+ {
+ for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
+ {
+ dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
+ if (dim < dim2)
+ break;
+ }
+ if (NILP (tmp2))
+ tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
+ else
+ {
+ XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
+ XSETCAR (tmp2, XCAR (tail));
+ }
+ }
+ ASET (val, i, tmp);
+ }
+ }
+ ASET (attrs, coding_attr_charset_valids, val);
+ category = coding_category_charset;
+ }
+ else if (EQ (coding_type, Qccl))
+ {
+ Lisp_Object valids;
+
+ if (nargs < coding_arg_ccl_max)
+ goto short_args;
+
+ val = args[coding_arg_ccl_decoder];
+ CHECK_CCL_PROGRAM (val);
+ if (VECTORP (val))
+ val = Fcopy_sequence (val);
+ ASET (attrs, coding_attr_ccl_decoder, val);
+
+ val = args[coding_arg_ccl_encoder];
+ CHECK_CCL_PROGRAM (val);
+ if (VECTORP (val))
+ val = Fcopy_sequence (val);
+ ASET (attrs, coding_attr_ccl_encoder, val);
+
+ val = args[coding_arg_ccl_valids];
+ valids = Fmake_string (make_number (256), make_number (0));
+ for (tail = val; !NILP (tail); tail = Fcdr (tail))
+ {
+ int from, to;
+
+ val = Fcar (tail);
+ if (INTEGERP (val))
+ {
+ from = to = XINT (val);
+ if (from < 0 || from > 255)
+ args_out_of_range_3 (val, make_number (0), make_number (255));
+ }
+ else
+ {
+ CHECK_CONS (val);
+ CHECK_NATNUM_CAR (val);
+ CHECK_NATNUM_CDR (val);
+ from = XINT (XCAR (val));
+ if (from > 255)
+ args_out_of_range_3 (XCAR (val),
+ make_number (0), make_number (255));
+ to = XINT (XCDR (val));
+ if (to < from || to > 255)
+ args_out_of_range_3 (XCDR (val),
+ XCAR (val), make_number (255));
+ }
+ for (i = from; i <= to; i++)
+ SSET (valids, i, 1);
+ }
+ ASET (attrs, coding_attr_ccl_valids, valids);
+
+ category = coding_category_ccl;
+ }
+ else if (EQ (coding_type, Qutf_16))
+ {
+ Lisp_Object bom, endian;
+
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
+
+ if (nargs < coding_arg_utf16_max)
+ goto short_args;
+
+ bom = args[coding_arg_utf16_bom];
+ if (! NILP (bom) && ! EQ (bom, Qt))
+ {
+ CHECK_CONS (bom);
+ val = XCAR (bom);
+ CHECK_CODING_SYSTEM (val);
+ val = XCDR (bom);
+ CHECK_CODING_SYSTEM (val);
+ }
+ ASET (attrs, coding_attr_utf_16_bom, bom);
+
+ endian = args[coding_arg_utf16_endian];
+ CHECK_SYMBOL (endian);
+ if (NILP (endian))
+ endian = Qbig;
+ else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
+ error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
+ ASET (attrs, coding_attr_utf_16_endian, endian);
+
+ category = (CONSP (bom)
+ ? coding_category_utf_16_auto
+ : NILP (bom)
+ ? (EQ (endian, Qbig)
+ ? coding_category_utf_16_be_nosig
+ : coding_category_utf_16_le_nosig)
+ : (EQ (endian, Qbig)
+ ? coding_category_utf_16_be
+ : coding_category_utf_16_le));
+ }
+ else if (EQ (coding_type, Qiso_2022))
+ {
+ Lisp_Object initial, reg_usage, request, flags;
+ int i;
+
+ if (nargs < coding_arg_iso2022_max)
+ goto short_args;
+
+ initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
+ CHECK_VECTOR (initial);
+ for (i = 0; i < 4; i++)
+ {
+ val = Faref (initial, make_number (i));
+ if (! NILP (val))
+ {
+ struct charset *charset;
+
+ CHECK_CHARSET_GET_CHARSET (val, charset);
+ ASET (initial, i, make_number (CHARSET_ID (charset)));
+ if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+ }
+ else
+ ASET (initial, i, make_number (-1));
+ }
+
+ reg_usage = args[coding_arg_iso2022_reg_usage];
+ CHECK_CONS (reg_usage);
+ CHECK_NUMBER_CAR (reg_usage);
+ CHECK_NUMBER_CDR (reg_usage);
+
+ request = Fcopy_sequence (args[coding_arg_iso2022_request]);
+ for (tail = request; ! NILP (tail); tail = Fcdr (tail))
+ {
+ int id;
+ Lisp_Object tmp;
+
+ val = Fcar (tail);
+ CHECK_CONS (val);
+ tmp = XCAR (val);
+ CHECK_CHARSET_GET_ID (tmp, id);
+ CHECK_NATNUM_CDR (val);
+ if (XINT (XCDR (val)) >= 4)
+ error ("Invalid graphic register number: %d", XINT (XCDR (val)));
+ XSETCAR (val, make_number (id));
+ }
+
+ flags = args[coding_arg_iso2022_flags];
+ CHECK_NATNUM (flags);
+ i = XINT (flags);
+ if (EQ (args[coding_arg_charset_list], Qiso_2022))
+ flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
+
+ ASET (attrs, coding_attr_iso_initial, initial);
+ ASET (attrs, coding_attr_iso_usage, reg_usage);
+ ASET (attrs, coding_attr_iso_request, request);
+ ASET (attrs, coding_attr_iso_flags, flags);
+ setup_iso_safe_charsets (attrs);
+
+ if (i & CODING_ISO_FLAG_SEVEN_BITS)
+ category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
+ | CODING_ISO_FLAG_SINGLE_SHIFT))
+ ? coding_category_iso_7_else
+ : EQ (args[coding_arg_charset_list], Qiso_2022)
+ ? coding_category_iso_7
+ : coding_category_iso_7_tight);
else
- XSETCDR (slot, safe_chars);
+ {
+ int id = XINT (AREF (initial, 1));
+
+ category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
+ || EQ (args[coding_arg_charset_list], Qiso_2022)
+ || id < 0)
+ ? coding_category_iso_8_else
+ : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
+ ? coding_category_iso_8_1
+ : coding_category_iso_8_2);
+ }
+ if (category != coding_category_iso_8_1
+ && category != coding_category_iso_8_2)
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
}
+ else if (EQ (coding_type, Qemacs_mule))
+ {
+ if (EQ (args[coding_arg_charset_list], Qemacs_mule))
+ ASET (attrs, coding_attr_emacs_mule_full, Qt);
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+ category = coding_category_emacs_mule;
+ }
+ else if (EQ (coding_type, Qshift_jis))
+ {
+
+ struct charset *charset;
+
+ if (XINT (Flength (charset_list)) != 3)
+ error ("There should be just three charsets");
+
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 1)
+ error ("Dimension of charset %s is not one",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+ if (CHARSET_ASCII_COMPATIBLE_P (charset))
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+
+ charset_list = XCDR (charset_list);
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 1)
+ error ("Dimension of charset %s is not one",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+
+ charset_list = XCDR (charset_list);
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 2)
+ error ("Dimension of charset %s is not two",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+
+ category = coding_category_sjis;
+ Vsjis_coding_system = name;
+ }
+ else if (EQ (coding_type, Qbig5))
+ {
+ struct charset *charset;
+
+ if (XINT (Flength (charset_list)) != 2)
+ error ("There should be just two charsets");
+
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 1)
+ error ("Dimension of charset %s is not one",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+ if (CHARSET_ASCII_COMPATIBLE_P (charset))
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+
+ charset_list = XCDR (charset_list);
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 2)
+ error ("Dimension of charset %s is not two",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+
+ category = coding_category_big5;
+ Vbig5_coding_system = name;
+ }
+ else if (EQ (coding_type, Qraw_text))
+ {
+ category = coding_category_raw_text;
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+ }
+ else if (EQ (coding_type, Qutf_8))
+ {
+ category = coding_category_utf_8;
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+ }
+ else if (EQ (coding_type, Qundecided))
+ category = coding_category_undecided;
+ else
+ error ("Invalid coding system type: %s",
+ SDATA (SYMBOL_NAME (coding_type)));
+
+ CODING_ATTR_CATEGORY (attrs) = make_number (category);
+ CODING_ATTR_PLIST (attrs)
+ = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
+ CODING_ATTR_PLIST (attrs)));
+
+ eol_type = args[coding_arg_eol_type];
+ if (! NILP (eol_type)
+ && ! EQ (eol_type, Qunix)
+ && ! EQ (eol_type, Qdos)
+ && ! EQ (eol_type, Qmac))
+ error ("Invalid eol-type");
+
+ aliases = Fcons (name, Qnil);
+
+ if (NILP (eol_type))
+ {
+ eol_type = make_subsidiaries (name);
+ for (i = 0; i < 3; i++)
+ {
+ Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
+
+ this_name = AREF (eol_type, i);
+ this_aliases = Fcons (this_name, Qnil);
+ this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
+ this_spec = Fmake_vector (make_number (3), attrs);
+ ASET (this_spec, 1, this_aliases);
+ ASET (this_spec, 2, this_eol_type);
+ Fputhash (this_name, this_spec, Vcoding_system_hash_table);
+ Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
+ Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
+ Vcoding_system_alist);
+ }
+ }
+
+ spec_vec = Fmake_vector (make_number (3), attrs);
+ ASET (spec_vec, 1, aliases);
+ ASET (spec_vec, 2, eol_type);
+
+ Fputhash (name, spec_vec, Vcoding_system_hash_table);
+ Vcoding_system_list = Fcons (name, Vcoding_system_list);
+ Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
+ Vcoding_system_alist);
+
+ {
+ int id = coding_categories[category].id;
+
+ if (id < 0 || EQ (name, CODING_ID_NAME (id)))
+ setup_coding_system (name, &coding_categories[category]);
+ }
+
return Qnil;
+
+ short_args:
+ return Fsignal (Qwrong_number_of_arguments,
+ Fcons (intern ("define-coding-system-internal"),
+ make_number (nargs)));
+}
+
+/* Fixme: should this record the alias relationships for
+ diagnostics? Should it update coding-system-list? */
+DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
+ Sdefine_coding_system_alias, 2, 2, 0,
+ doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
+ (alias, coding_system)
+ Lisp_Object alias, coding_system;
+{
+ Lisp_Object spec, aliases, eol_type;
+
+ CHECK_SYMBOL (alias);
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ aliases = AREF (spec, 1);
+ while (!NILP (XCDR (aliases)))
+ aliases = XCDR (aliases);
+ XSETCDR (aliases, Fcons (alias, Qnil));
+
+ eol_type = AREF (spec, 2);
+ if (VECTORP (eol_type))
+ {
+ Lisp_Object subsidiaries;
+ int i;
+
+ subsidiaries = make_subsidiaries (alias);
+ for (i = 0; i < 3; i++)
+ Fdefine_coding_system_alias (AREF (subsidiaries, i),
+ AREF (eol_type, i));
+
+ ASET (spec, 2, subsidiaries);
+ }
+
+ Fputhash (alias, spec, Vcoding_system_hash_table);
+ Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
+ Vcoding_system_alist);
+
+ return Qnil;
+}
+
+DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
+ 1, 1, 0,
+ doc: /* Return the base of CODING-SYSTEM.
+Any alias or subsidiary coding system is not a base coding system. */)
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object spec, attrs;
+
+ if (NILP (coding_system))
+ return (Qno_conversion);
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ attrs = AREF (spec, 0);
+ return CODING_ATTR_BASE_NAME (attrs);
+}
+
+DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
+ 1, 1, 0,
+ doc: "Return the property list of CODING-SYSTEM.")
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object spec, attrs;
+
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ attrs = AREF (spec, 0);
+ return CODING_ATTR_PLIST (attrs);
+}
+
+
+DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
+ 1, 1, 0,
+ doc: /* Return the list of aliases of CODING-SYSTEM. */)
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object spec;
+
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ return AREF (spec, 1);
+}
+
+DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
+ Scoding_system_eol_type, 1, 1, 0,
+ doc: /* Return eol-type of CODING-SYSTEM.
+An eol-type is integer 0, 1, 2, or a vector of coding systems.
+
+Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
+and CR respectively.
+
+A vector value indicates that a format of end-of-line should be
+detected automatically. Nth element of the vector is the subsidiary
+coding system whose eol-type is N. */)
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object spec, eol_type;
+ int n;
+
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ if (! CODING_SYSTEM_P (coding_system))
+ return Qnil;
+ spec = CODING_SYSTEM_SPEC (coding_system);
+ eol_type = AREF (spec, 2);
+ if (VECTORP (eol_type))
+ return Fcopy_sequence (eol_type);
+ n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
+ return make_number (n);
}
#endif /* emacs */
@@ -7411,20 +8609,11 @@ init_coding_once ()
{
int i;
- /* Emacs' internal format specific initialize routine. */
- for (i = 0; i <= 0x20; i++)
- emacs_code_class[i] = EMACS_control_code;
- emacs_code_class[0x0A] = EMACS_linefeed_code;
- emacs_code_class[0x0D] = EMACS_carriage_return_code;
- for (i = 0x21 ; i < 0x7F; i++)
- emacs_code_class[i] = EMACS_ascii_code;
- emacs_code_class[0x7F] = EMACS_control_code;
- for (i = 0x80; i < 0xFF; i++)
- emacs_code_class[i] = EMACS_invalid_code;
- emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
- emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
- emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
- emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
+ for (i = 0; i < coding_category_max; i++)
+ {
+ coding_categories[i].id = -1;
+ coding_priorities[i] = i;
+ }
/* ISO2022 specific initialize routine. */
for (i = 0; i < 0x20; i++)
@@ -7446,24 +8635,16 @@ init_coding_once ()
iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
- setup_coding_system (Qnil, &keyboard_coding);
- setup_coding_system (Qnil, &terminal_coding);
- setup_coding_system (Qnil, &safe_terminal_coding);
- setup_coding_system (Qnil, &default_buffer_file_coding);
-
- bzero (coding_system_table, sizeof coding_system_table);
-
- bzero (ascii_skip_code, sizeof ascii_skip_code);
- for (i = 0; i < 128; i++)
- ascii_skip_code[i] = 1;
-
-#if defined (MSDOS) || defined (WINDOWSNT)
- system_eol_type = CODING_EOL_CRLF;
-#else
- system_eol_type = CODING_EOL_LF;
-#endif
-
inhibit_pre_post_conversion = 0;
+
+ for (i = 0; i < 256; i++)
+ {
+ emacs_mule_bytes[i] = 1;
+ }
+ emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
+ emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
+ emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
+ emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
}
#ifdef emacs
@@ -7471,11 +8652,29 @@ init_coding_once ()
void
syms_of_coding ()
{
- Qtarget_idx = intern ("target-idx");
- staticpro (&Qtarget_idx);
+ staticpro (&Vcoding_system_hash_table);
+ {
+ Lisp_Object args[2];
+ args[0] = QCtest;
+ args[1] = Qeq;
+ Vcoding_system_hash_table = Fmake_hash_table (2, args);
+ }
+
+ staticpro (&Vsjis_coding_system);
+ Vsjis_coding_system = Qnil;
+
+ staticpro (&Vbig5_coding_system);
+ Vbig5_coding_system = Qnil;
+
+ staticpro (&Vcode_conversion_work_buf_list);
+ Vcode_conversion_work_buf_list = Qnil;
+
+ staticpro (&Vcode_conversion_reused_work_buf);
+ Vcode_conversion_reused_work_buf = Qnil;
- Qcoding_system_history = intern ("coding-system-history");
- staticpro (&Qcoding_system_history);
+ DEFSYM (Qcharset, "charset");
+ DEFSYM (Qtarget_idx, "target-idx");
+ DEFSYM (Qcoding_system_history, "coding-system-history");
Fset (Qcoding_system_history, Qnil);
/* Target FILENAME is the first argument. */
@@ -7483,117 +8682,117 @@ syms_of_coding ()
/* Target FILENAME is the third argument. */
Fput (Qwrite_region, Qtarget_idx, make_number (2));
- Qcall_process = intern ("call-process");
- staticpro (&Qcall_process);
+ DEFSYM (Qcall_process, "call-process");
/* Target PROGRAM is the first argument. */
Fput (Qcall_process, Qtarget_idx, make_number (0));
- Qcall_process_region = intern ("call-process-region");
- staticpro (&Qcall_process_region);
+ DEFSYM (Qcall_process_region, "call-process-region");
/* Target PROGRAM is the third argument. */
Fput (Qcall_process_region, Qtarget_idx, make_number (2));
- Qstart_process = intern ("start-process");
- staticpro (&Qstart_process);
+ DEFSYM (Qstart_process, "start-process");
/* Target PROGRAM is the third argument. */
Fput (Qstart_process, Qtarget_idx, make_number (2));
- Qopen_network_stream = intern ("open-network-stream");
- staticpro (&Qopen_network_stream);
+ DEFSYM (Qopen_network_stream, "open-network-stream");
/* Target SERVICE is the fourth argument. */
Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
- Qcoding_system = intern ("coding-system");
- staticpro (&Qcoding_system);
+ DEFSYM (Qcoding_system, "coding-system");
+ DEFSYM (Qcoding_aliases, "coding-aliases");
- Qeol_type = intern ("eol-type");
- staticpro (&Qeol_type);
+ DEFSYM (Qeol_type, "eol-type");
+ DEFSYM (Qunix, "unix");
+ DEFSYM (Qdos, "dos");
- Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
- staticpro (&Qbuffer_file_coding_system);
+ DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
+ DEFSYM (Qpost_read_conversion, "post-read-conversion");
+ DEFSYM (Qpre_write_conversion, "pre-write-conversion");
+ DEFSYM (Qdefault_char, "default-char");
+ DEFSYM (Qundecided, "undecided");
+ DEFSYM (Qno_conversion, "no-conversion");
+ DEFSYM (Qraw_text, "raw-text");
- Qpost_read_conversion = intern ("post-read-conversion");
- staticpro (&Qpost_read_conversion);
+ DEFSYM (Qiso_2022, "iso-2022");
- Qpre_write_conversion = intern ("pre-write-conversion");
- staticpro (&Qpre_write_conversion);
+ DEFSYM (Qutf_8, "utf-8");
+ DEFSYM (Qutf_8_emacs, "utf-8-emacs");
- Qno_conversion = intern ("no-conversion");
- staticpro (&Qno_conversion);
+ DEFSYM (Qutf_16, "utf-16");
+ DEFSYM (Qbig, "big");
+ DEFSYM (Qlittle, "little");
- Qundecided = intern ("undecided");
- staticpro (&Qundecided);
+ DEFSYM (Qshift_jis, "shift-jis");
+ DEFSYM (Qbig5, "big5");
- Qcoding_system_p = intern ("coding-system-p");
- staticpro (&Qcoding_system_p);
-
- Qcoding_system_error = intern ("coding-system-error");
- staticpro (&Qcoding_system_error);
+ DEFSYM (Qcoding_system_p, "coding-system-p");
+ DEFSYM (Qcoding_system_error, "coding-system-error");
Fput (Qcoding_system_error, Qerror_conditions,
Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
Fput (Qcoding_system_error, Qerror_message,
build_string ("Invalid coding system"));
- Qcoding_category = intern ("coding-category");
- staticpro (&Qcoding_category);
- Qcoding_category_index = intern ("coding-category-index");
- staticpro (&Qcoding_category_index);
-
- Vcoding_category_table
- = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
- staticpro (&Vcoding_category_table);
- {
- int i;
- for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
- {
- XVECTOR (Vcoding_category_table)->contents[i]
- = intern (coding_category_name[i]);
- Fput (XVECTOR (Vcoding_category_table)->contents[i],
- Qcoding_category_index, make_number (i));
- }
- }
-
- Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
- staticpro (&Vcoding_system_safe_chars);
-
- Qtranslation_table = intern ("translation-table");
- staticpro (&Qtranslation_table);
- Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
-
- Qtranslation_table_id = intern ("translation-table-id");
- staticpro (&Qtranslation_table_id);
-
- Qtranslation_table_for_decode = intern ("translation-table-for-decode");
- staticpro (&Qtranslation_table_for_decode);
-
- Qtranslation_table_for_encode = intern ("translation-table-for-encode");
- staticpro (&Qtranslation_table_for_encode);
-
- Qsafe_chars = intern ("safe-chars");
- staticpro (&Qsafe_chars);
-
- Qchar_coding_system = intern ("char-coding-system");
- staticpro (&Qchar_coding_system);
-
/* Intern this now in case it isn't already done.
Setting this variable twice is harmless.
But don't staticpro it here--that is done in alloc.c. */
Qchar_table_extra_slots = intern ("char-table-extra-slots");
- Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
- Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
- Qvalid_codes = intern ("valid-codes");
- staticpro (&Qvalid_codes);
+ DEFSYM (Qtranslation_table, "translation-table");
+ Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
+ DEFSYM (Qtranslation_table_id, "translation-table-id");
+ DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
+ DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
- Qemacs_mule = intern ("emacs-mule");
- staticpro (&Qemacs_mule);
+ DEFSYM (Qvalid_codes, "valid-codes");
- Qraw_text = intern ("raw-text");
- staticpro (&Qraw_text);
+ DEFSYM (Qemacs_mule, "emacs-mule");
- Qutf_8 = intern ("utf-8");
- staticpro (&Qutf_8);
+ DEFSYM (QCcategory, ":category");
+
+ Vcoding_category_table
+ = Fmake_vector (make_number (coding_category_max), Qnil);
+ staticpro (&Vcoding_category_table);
+ /* Followings are target of code detection. */
+ ASET (Vcoding_category_table, coding_category_iso_7,
+ intern ("coding-category-iso-7"));
+ ASET (Vcoding_category_table, coding_category_iso_7_tight,
+ intern ("coding-category-iso-7-tight"));
+ ASET (Vcoding_category_table, coding_category_iso_8_1,
+ intern ("coding-category-iso-8-1"));
+ ASET (Vcoding_category_table, coding_category_iso_8_2,
+ intern ("coding-category-iso-8-2"));
+ ASET (Vcoding_category_table, coding_category_iso_7_else,
+ intern ("coding-category-iso-7-else"));
+ ASET (Vcoding_category_table, coding_category_iso_8_else,
+ intern ("coding-category-iso-8-else"));
+ ASET (Vcoding_category_table, coding_category_utf_8,
+ intern ("coding-category-utf-8"));
+ ASET (Vcoding_category_table, coding_category_utf_16_be,
+ intern ("coding-category-utf-16-be"));
+ ASET (Vcoding_category_table, coding_category_utf_16_auto,
+ intern ("coding-category-utf-16-auto"));
+ ASET (Vcoding_category_table, coding_category_utf_16_le,
+ intern ("coding-category-utf-16-le"));
+ ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
+ intern ("coding-category-utf-16-be-nosig"));
+ ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
+ intern ("coding-category-utf-16-le-nosig"));
+ ASET (Vcoding_category_table, coding_category_charset,
+ intern ("coding-category-charset"));
+ ASET (Vcoding_category_table, coding_category_sjis,
+ intern ("coding-category-sjis"));
+ ASET (Vcoding_category_table, coding_category_big5,
+ intern ("coding-category-big5"));
+ ASET (Vcoding_category_table, coding_category_ccl,
+ intern ("coding-category-ccl"));
+ ASET (Vcoding_category_table, coding_category_emacs_mule,
+ intern ("coding-category-emacs-mule"));
+ /* Followings are NOT target of code detection. */
+ ASET (Vcoding_category_table, coding_category_raw_text,
+ intern ("coding-category-raw-text"));
+ ASET (Vcoding_category_table, coding_category_undecided,
+ intern ("coding-category-undecided"));
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
@@ -7603,6 +8802,7 @@ syms_of_coding ()
defsubr (&Sdetect_coding_string);
defsubr (&Sfind_coding_systems_region_internal);
defsubr (&Sunencodable_char_position);
+ defsubr (&Scheck_coding_systems_region);
defsubr (&Sdecode_coding_region);
defsubr (&Sencode_coding_region);
defsubr (&Sdecode_coding_string);
@@ -7617,15 +8817,20 @@ syms_of_coding ()
defsubr (&Sset_keyboard_coding_system_internal);
defsubr (&Skeyboard_coding_system);
defsubr (&Sfind_operation_coding_system);
- defsubr (&Supdate_coding_systems_internal);
- defsubr (&Sset_coding_priority_internal);
+ defsubr (&Sset_coding_system_priority);
defsubr (&Sdefine_coding_system_internal);
+ defsubr (&Sdefine_coding_system_alias);
+ defsubr (&Scoding_system_base);
+ defsubr (&Scoding_system_plist);
+ defsubr (&Scoding_system_aliases);
+ defsubr (&Scoding_system_eol_type);
+ defsubr (&Scoding_system_priority_list);
DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
doc: /* List of coding systems.
Do not alter the value of this variable manually. This variable should be
-updated by the functions `make-coding-system' and
+updated by the functions `define-coding-system' and
`define-coding-system-alias'. */);
Vcoding_system_list = Qnil;
@@ -7650,7 +8855,7 @@ system bound to the corresponding coding-category is selected. */);
int i;
Vcoding_category_list = Qnil;
- for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
+ for (i = coding_category_max - 1; i >= 0; i--)
Vcoding_category_list
= Fcons (XVECTOR (Vcoding_category_table)->contents[i],
Vcoding_category_list);
@@ -7680,25 +8885,27 @@ the value of `buffer-file-coding-system' is used. */);
Vcoding_system_for_write = Qnil;
DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
- doc: /* Coding system used in the latest file or process I/O.
-Also set by `encode-coding-region', `decode-coding-region',
-`encode-coding-string' and `decode-coding-string'. */);
+ doc: /*
+Coding system used in the latest file or process I/O. */);
Vlast_coding_system_used = Qnil;
DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
- doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
+ doc: /*
+*Non-nil means always inhibit code conversion of end-of-line format.
See info node `Coding Systems' and info node `Text and Binary' concerning
such conversion. */);
inhibit_eol_conversion = 0;
DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
- doc: /* Non-nil means process buffer inherits coding system of process output.
+ doc: /*
+Non-nil means process buffer inherits coding system of process output.
Bind it to t if the process output is to be treated as if it were a file
read from some filesystem. */);
inherit_process_coding_system = 0;
DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
- doc: /* Alist to decide a coding system to use for a file I/O operation.
+ doc: /*
+Alist to decide a coding system to use for a file I/O operation.
The format is ((PATTERN . VAL) ...),
where PATTERN is a regular expression matching a file name,
VAL is a coding system, a cons of coding systems, or a function symbol.
@@ -7708,14 +8915,15 @@ If VAL is a cons of coding systems, the car part is used for decoding,
and the cdr part is used for encoding.
If VAL is a function symbol, the function must return a coding system
or a cons of coding systems which are used as above. The function gets
-the arguments with which `find-operation-coding-system' was called.
+the arguments with which `find-operation-coding-systems' was called.
See also the function `find-operation-coding-system'
and the variable `auto-coding-alist'. */);
Vfile_coding_system_alist = Qnil;
DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
- doc: /* Alist to decide a coding system to use for a process I/O operation.
+ doc: /*
+Alist to decide a coding system to use for a process I/O operation.
The format is ((PATTERN . VAL) ...),
where PATTERN is a regular expression matching a program name,
VAL is a coding system, a cons of coding systems, or a function symbol.
@@ -7730,7 +8938,8 @@ See also the function `find-operation-coding-system'. */);
Vprocess_coding_system_alist = Qnil;
DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
- doc: /* Alist to decide a coding system to use for a network I/O operation.
+ doc: /*
+Alist to decide a coding system to use for a network I/O operation.
The format is ((PATTERN . VAL) ...),
where PATTERN is a regular expression matching a network service name
or is a port number to connect to,
@@ -7752,23 +8961,28 @@ Also used for decoding keyboard input on X Window system. */);
/* The eol mnemonics are reset in startup.el system-dependently. */
DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
- doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
+ doc: /*
+*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
eol_mnemonic_unix = build_string (":");
DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
- doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
+ doc: /*
+*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
eol_mnemonic_dos = build_string ("\\");
DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
- doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format. */);
+ doc: /*
+*String displayed in mode line for MAC-like (CR) end-of-line format. */);
eol_mnemonic_mac = build_string ("/");
DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
- doc: /* *String displayed in mode line when end-of-line format is not yet determined. */);
+ doc: /*
+*String displayed in mode line when end-of-line format is not yet determined. */);
eol_mnemonic_undecided = build_string (":");
DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
- doc: /* *Non-nil enables character translation while encoding and decoding. */);
+ doc: /*
+*Non-nil enables character translation while encoding and decoding. */);
Venable_character_translation = Qt;
DEFVAR_LISP ("standard-translation-table-for-decode",
@@ -7781,11 +8995,12 @@ Also used for decoding keyboard input on X Window system. */);
doc: /* Table for translating characters while encoding. */);
Vstandard_translation_table_for_encode = Qnil;
- DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
+ DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
doc: /* Alist of charsets vs revision numbers.
While encoding, if a charset (car part of an element) is found,
-designate it with the escape sequence identifying revision (cdr part of the element). */);
- Vcharset_revision_alist = Qnil;
+designate it with the escape sequence identifying revision (cdr part
+of the element). */);
+ Vcharset_revision_table = Qnil;
DEFVAR_LISP ("default-process-coding-system",
&Vdefault_process_coding_system,
@@ -7795,7 +9010,8 @@ the cdr part is used for encoding a text to be sent to a process. */);
Vdefault_process_coding_system = Qnil;
DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
- doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
+ doc: /*
+Table of extra Latin codes in the range 128..159 (inclusive).
This is a vector of length 256.
If Nth element is non-nil, the existence of code N in a file
\(or output of subprocess) doesn't prevent it to be detected as
@@ -7807,7 +9023,8 @@ Only 128th through 159th elements has a meaning. */);
DEFVAR_LISP ("select-safe-coding-system-function",
&Vselect_safe_coding_system_function,
- doc: /* Function to call to select safe coding system for encoding a text.
+ doc: /*
+Function to call to select safe coding system for encoding a text.
If set, this function is called to force a user to select a proper
coding system which can encode the text in the case that a default
@@ -7827,7 +9044,8 @@ called even if `coding-system-for-write' is non-nil. The command
DEFVAR_BOOL ("inhibit-iso-escape-detection",
&inhibit_iso_escape_detection,
- doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
+ doc: /*
+If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
By default, on reading a file, Emacs tries to detect how the text is
encoded. This code detection is sensitive to escape sequences. If
@@ -7857,6 +9075,49 @@ escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argumen
This is applied to the result of input methods, not their input. See also
`keyboard-translate-table'. */);
Vtranslation_table_for_input = Qnil;
+
+ {
+ Lisp_Object args[coding_arg_max];
+ Lisp_Object plist[16];
+ int i;
+
+ for (i = 0; i < coding_arg_max; i++)
+ args[i] = Qnil;
+
+ plist[0] = intern (":name");
+ plist[1] = args[coding_arg_name] = Qno_conversion;
+ plist[2] = intern (":mnemonic");
+ plist[3] = args[coding_arg_mnemonic] = make_number ('=');
+ plist[4] = intern (":coding-type");
+ plist[5] = args[coding_arg_coding_type] = Qraw_text;
+ plist[6] = intern (":ascii-compatible-p");
+ plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
+ plist[8] = intern (":default-char");
+ plist[9] = args[coding_arg_default_char] = make_number (0);
+ plist[10] = intern (":for-unibyte");
+ plist[11] = args[coding_arg_for_unibyte] = Qt;
+ plist[12] = intern (":docstring");
+ plist[13] = build_string ("Do no conversion.\n\
+\n\
+When you visit a file with this coding, the file is read into a\n\
+unibyte buffer as is, thus each byte of a file is treated as a\n\
+character.");
+ plist[14] = intern (":eol-type");
+ plist[15] = args[coding_arg_eol_type] = Qunix;
+ args[coding_arg_plist] = Flist (16, plist);
+ Fdefine_coding_system_internal (coding_arg_max, args);
+ }
+
+ setup_coding_system (Qno_conversion, &keyboard_coding);
+ setup_coding_system (Qno_conversion, &terminal_coding);
+ setup_coding_system (Qno_conversion, &safe_terminal_coding);
+
+ {
+ int i;
+
+ for (i = 0; i < coding_category_max; i++)
+ Fset (AREF (Vcoding_category_table, i), Qno_conversion);
+ }
}
char *
@@ -7880,4 +9141,3 @@ emacs_strerror (error_number)
}
#endif /* emacs */
-
diff --git a/src/coding.h b/src/coding.h
index 4d020d31521..78a7d4aac04 100644
--- a/src/coding.h
+++ b/src/coding.h
@@ -1,6 +1,9 @@
/* Header for coding system handler.
Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -22,306 +25,205 @@ Boston, MA 02111-1307, USA. */
#ifndef EMACS_CODING_H
#define EMACS_CODING_H
-#include "ccl.h"
+/* Index to arguments of Fdefine_coding_system_internal. */
-/*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/
+enum define_coding_system_arg_index
+ {
+ coding_arg_name,
+ coding_arg_mnemonic,
+ coding_arg_coding_type,
+ coding_arg_charset_list,
+ coding_arg_ascii_compatible_p,
+ coding_arg_decode_translation_table,
+ coding_arg_encode_translation_table,
+ coding_arg_post_read_conversion,
+ coding_arg_pre_write_conversion,
+ coding_arg_default_char,
+ coding_arg_for_unibyte,
+ coding_arg_plist,
+ coding_arg_eol_type,
+ coding_arg_max
+ };
-/* All code (1-byte) of Emacs' internal format is classified into one
- of the followings. See also `charset.h'. */
-enum emacs_code_class_type
+enum define_coding_iso2022_arg_index
{
- EMACS_control_code, /* Control codes in the range
- 0x00..0x1F and 0x7F except for the
- following two codes. */
- EMACS_linefeed_code, /* 0x0A (linefeed) to denote
- end-of-line. */
- EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used
- in selective display mode. */
- EMACS_ascii_code, /* ASCII characters. */
- EMACS_leading_code_2, /* Base leading code of official
- TYPE9N character. */
- EMACS_leading_code_3, /* Base leading code of private TYPE9N
- or official TYPE9Nx9N character. */
- EMACS_leading_code_4, /* Base leading code of private
- TYPE9Nx9N character. */
- EMACS_invalid_code /* Invalid code, i.e. a base leading
- code not yet assigned to any
- charset, or a code of the range
- 0xA0..0xFF. */
+ coding_arg_iso2022_initial = coding_arg_max,
+ coding_arg_iso2022_reg_usage,
+ coding_arg_iso2022_request,
+ coding_arg_iso2022_flags,
+ coding_arg_iso2022_max
};
-extern enum emacs_code_class_type emacs_code_class[256];
-
-/*** ISO2022 section ***/
-
-/* Macros to define code of control characters for ISO2022's functions. */
- /* code */ /* function */
-#define ISO_CODE_LF 0x0A /* line-feed */
-#define ISO_CODE_CR 0x0D /* carriage-return */
-#define ISO_CODE_SO 0x0E /* shift-out */
-#define ISO_CODE_SI 0x0F /* shift-in */
-#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
-#define ISO_CODE_ESC 0x1B /* escape */
-#define ISO_CODE_SS2 0x8E /* single-shift-2 */
-#define ISO_CODE_SS3 0x8F /* single-shift-3 */
-#define ISO_CODE_CSI 0x9B /* control-sequence-introduce */
-
-/* All code (1-byte) of ISO2022 is classified into one of the
- followings. */
-enum iso_code_class_type
+enum define_coding_utf16_arg_index
{
- ISO_control_0, /* Control codes in the range
- 0x00..0x1F and 0x7F, except for the
- following 5 codes. */
- ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
- ISO_shift_out, /* ISO_CODE_SO (0x0E) */
- ISO_shift_in, /* ISO_CODE_SI (0x0F) */
- ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
- ISO_escape, /* ISO_CODE_SO (0x1B) */
- ISO_control_1, /* Control codes in the range
- 0x80..0x9F, except for the
- following 3 codes. */
- ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
- ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
- ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
- ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
- ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
- ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
- ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
+ coding_arg_utf16_bom = coding_arg_max,
+ coding_arg_utf16_endian,
+ coding_arg_utf16_max
};
-/** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags'
- element in the structure `coding_system'. This information is used
- while encoding a text to ISO2022. **/
+enum define_coding_ccl_arg_index
+ {
+ coding_arg_ccl_decoder = coding_arg_max,
+ coding_arg_ccl_encoder,
+ coding_arg_ccl_valids,
+ coding_arg_ccl_max
+ };
-/* If set, produce short-form designation sequence (e.g. ESC $ A)
- instead of long-form sequence (e.g. ESC $ ( A). */
-#define CODING_FLAG_ISO_SHORT_FORM 0x0001
+extern Lisp_Object Vcoding_system_hash_table;
-/* If set, reset graphic planes and registers at end-of-line to the
- initial state. */
-#define CODING_FLAG_ISO_RESET_AT_EOL 0x0002
+/* Enumeration of coding system type. */
-/* If set, reset graphic planes and registers before any control
- characters to the initial state. */
-#define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004
+enum coding_system_type
+ {
+ coding_type_charset,
+ coding_type_utf_8,
+ coding_type_utf_16,
+ coding_type_iso_2022,
+ coding_type_emacs_mule,
+ coding_type_sjis,
+ coding_type_ccl,
+ coding_type_raw_text,
+ coding_type_undecided,
+ coding_type_max
+ };
-/* If set, encode by 7-bit environment. */
-#define CODING_FLAG_ISO_SEVEN_BITS 0x0008
-/* If set, use locking-shift function. */
-#define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010
+/* Enumeration of end-of-line format type. */
-/* If set, use single-shift function. Overwrite
- CODING_FLAG_ISO_LOCKING_SHIFT. */
-#define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020
+enum end_of_line_type
+ {
+ eol_lf, /* Line-feed only, same as Emacs' internal
+ format. */
+ eol_crlf, /* Sequence of carriage-return and
+ line-feed. */
+ eol_cr, /* Carriage-return only. */
+ eol_any, /* Accept any of above. Produce line-feed
+ only. */
+ eol_undecided, /* This value is used to denote that the
+ eol-type is not yet undecided. */
+ eol_type_max
+ };
-/* If set, designate JISX0201-Roman instead of ASCII. */
-#define CODING_FLAG_ISO_USE_ROMAN 0x0040
+/* Enumeration of index to an attribute vector of a coding system. */
-/* If set, designate JISX0208-1978 instead of JISX0208-1983. */
-#define CODING_FLAG_ISO_USE_OLDJIS 0x0080
+enum coding_attr_index
+ {
+ coding_attr_base_name,
+ coding_attr_docstring,
+ coding_attr_mnemonic,
+ coding_attr_type,
+ coding_attr_charset_list,
+ coding_attr_ascii_compat,
+ coding_attr_decode_tbl,
+ coding_attr_encode_tbl,
+ coding_attr_post_read,
+ coding_attr_pre_write,
+ coding_attr_default_char,
+ coding_attr_for_unibyte,
+ coding_attr_plist,
+
+ coding_attr_category,
+ coding_attr_safe_charsets,
+
+ /* The followings are extra attributes for each type. */
+ coding_attr_charset_valids,
+
+ coding_attr_ccl_decoder,
+ coding_attr_ccl_encoder,
+ coding_attr_ccl_valids,
+
+ coding_attr_iso_initial,
+ coding_attr_iso_usage,
+ coding_attr_iso_request,
+ coding_attr_iso_flags,
+
+ coding_attr_utf_16_bom,
+ coding_attr_utf_16_endian,
+
+ coding_attr_emacs_mule_full,
+
+ coding_attr_last_index
+ };
-/* If set, do not produce ISO6429's direction specifying sequence. */
-#define CODING_FLAG_ISO_NO_DIRECTION 0x0100
-/* If set, assume designation states are reset at beginning of line on
- output. */
-#define CODING_FLAG_ISO_INIT_AT_BOL 0x0200
+#define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
+#define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
+#define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
+#define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
+#define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
+#define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
+#define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
+#define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
+#define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
+#define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
+#define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
+#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
+#define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
+#define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
+#define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
+#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
-/* If set, designation sequence should be placed at beginning of line
- on output. */
-#define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400
-/* If set, do not encode unsafe characters on output. */
-#define CODING_FLAG_ISO_SAFE 0x0800
+#define CODING_ID_ATTRS(id) \
+ (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
-/* If set, extra latin codes (128..159) are accepted as a valid code
- on input. */
-#define CODING_FLAG_ISO_LATIN_EXTRA 0x1000
+#define CODING_ID_ALIASES(id) \
+ (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
-/* If set, use designation escape sequence. */
-#define CODING_FLAG_ISO_DESIGNATION 0x10000
+#define CODING_ID_EOL_TYPE(id) \
+ (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
-/* A character to be produced on output if encoding of the original
- character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR.
- It must be an ASCII character. */
-#define CODING_REPLACEMENT_CHARACTER '?'
+#define CODING_ID_NAME(id) \
+ (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
-/* Structure of the field `spec.iso2022' in the structure `coding_system'. */
-struct iso2022_spec
-{
- /* The current graphic register invoked to each graphic plane. */
- int current_invocation[2];
+#define CODING_SYSTEM_SPEC(coding_system_symbol) \
+ (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
- /* The current charset designated to each graphic register. */
- int current_designation[4];
+#define CODING_SYSTEM_ID(coding_system_symbol) \
+ hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
+ coding_system_symbol, NULL)
- /* A charset initially designated to each graphic register. */
- int initial_designation[4];
+#define CODING_SYSTEM_P(coding_system_symbol) \
+ (! NILP (CODING_SYSTEM_SPEC (coding_system_symbol)))
- /* If not -1, it is a graphic register specified in an invalid
- designation sequence. */
- int last_invalid_designation_register;
+#define CHECK_CODING_SYSTEM(x) \
+ do { \
+ if (!CODING_SYSTEM_P (x)) \
+ wrong_type_argument (Qcoding_system_p, (x)); \
+ } while (0)
- /* A graphic register to which each charset should be designated. */
- unsigned char requested_designation[MAX_CHARSET + 1];
- /* A revision number to be specified for each charset on encoding.
- The value 255 means no revision number for the corresponding
- charset. */
- unsigned char charset_revision_number[MAX_CHARSET + 1];
+#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
+ do { \
+ spec = CODING_SYSTEM_SPEC (x); \
+ if (NILP (spec)) \
+ x = wrong_type_argument (Qcoding_system_p, (x)); \
+ } while (0)
- /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
- by single-shift while encoding. */
- int single_shifting;
- /* Set to 1 temporarily only when processing at beginning of line. */
- int bol;
-};
+#define CHECK_CODING_SYSTEM_GET_ID(x, id) \
+ do \
+ { \
+ id = CODING_SYSTEM_ID (x); \
+ if (id < 0) \
+ x = wrong_type_argument (Qcoding_system_p, (x)); \
+ } while (0)
-/* Macros to access each field in the structure `spec.iso2022'. */
-#define CODING_SPEC_ISO_INVOCATION(coding, plane) \
- (coding)->spec.iso2022.current_invocation[plane]
-#define CODING_SPEC_ISO_DESIGNATION(coding, reg) \
- (coding)->spec.iso2022.current_designation[reg]
-#define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \
- (coding)->spec.iso2022.initial_designation[reg]
-#define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \
- (coding)->spec.iso2022.requested_designation[charset]
-#define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \
- (coding)->spec.iso2022.charset_revision_number[charset]
-#define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \
- (coding)->spec.iso2022.single_shifting
-#define CODING_SPEC_ISO_BOL(coding) \
- (coding)->spec.iso2022.bol
-
-/* A value which may appear in
- coding->spec.iso2022.requested_designation indicating that the
- corresponding charset does not request any graphic register to be
- designated. */
-#define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4
-
-/* Return a charset which is currently designated to the graphic plane
- PLANE in the coding-system CODING. */
-#define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \
- ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \
- ? -1 \
- : CODING_SPEC_ISO_DESIGNATION (coding, \
- CODING_SPEC_ISO_INVOCATION (coding, plane)))
-
-/*** BIG5 section ***/
-
-/* Macros to denote each type of BIG5 coding system. */
-#define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of
- BIG5 developed by Hong Kong
- University. */
-#define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants
- of BIG5 developed by the
- company ETen in Taiwan. */
/*** GENERAL section ***/
-/* Types of coding system. */
-enum coding_type
+/* Enumeration of result code of code conversion. */
+enum coding_result_code
{
- coding_type_no_conversion, /* A coding system which requires no
- conversion for reading and writing
- including end-of-line format. */
- coding_type_emacs_mule, /* A coding system used in Emacs'
- buffer and string. Requires no
- conversion for reading and writing
- except for end-of-line format. */
- coding_type_undecided, /* A coding system which requires
- automatic detection of a real
- coding system. */
- coding_type_sjis, /* SJIS coding system for Japanese. */
- coding_type_iso2022, /* Any coding system of ISO2022
- variants. */
- coding_type_big5, /* BIG5 coding system for Chinese. */
- coding_type_ccl, /* The coding system of which decoder
- and encoder are written in CCL. */
- coding_type_raw_text /* A coding system for a text
- containing random 8-bit code which
- does not require code conversion
- except for end-of-line format. */
+ CODING_RESULT_SUCCESS,
+ CODING_RESULT_INSUFFICIENT_SRC,
+ CODING_RESULT_INSUFFICIENT_DST,
+ CODING_RESULT_INCONSISTENT_EOL,
+ CODING_RESULT_INSUFFICIENT_CMP,
+ CODING_RESULT_INTERRUPT,
+ CODING_RESULT_INSUFFICIENT_MEM
};
-/* Formats of end-of-line. */
-#define CODING_EOL_LF 0 /* Line-feed only, same as Emacs'
- internal format. */
-#define CODING_EOL_CRLF 1 /* Sequence of carriage-return and
- line-feed. */
-#define CODING_EOL_CR 2 /* Carriage-return only. */
-#define CODING_EOL_UNDECIDED 3 /* This value is used to denote the
- eol-type is not yet decided. */
-#define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the
- eol-type is not consistent
- through the file. */
-
-/* 1 iff composing. */
-#define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO)
-
-#define COMPOSITION_DATA_SIZE 4080
-#define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2)
-
-/* Data structure to hold information about compositions of text that
- is being decoded or encode. ISO 2022 base code conversion routines
- handle special ESC sequences for composition specification. But,
- they can't get/put such information directly from/to a buffer in
- the deepest place. So, they store or retrieve the information
- through this structure.
-
- The encoder stores the information in this structure when it meets
- ESC sequences for composition while encoding codes, then, after all
- text codes are encoded, puts `composition' properties on the text
- by referring to the structure.
-
- The decoder at first stores the information of a text to be
- decoded, then, while decoding codes, generates ESC sequences for
- composition at proper places by referring to the structure. */
-
-struct composition_data
-{
- /* The character position of the first character to be encoded or
- decoded. START and END (see below) are relative to this
- position. */
- int char_offset;
-
- /* The composition data. These elements are repeated for each
- composition:
- LENGTH START END METHOD [ COMPONENT ... ]
- where,
- LENGTH is the number of elements for this composition.
-
- START and END are starting and ending character positions of
- the composition relative to `char_offset'.
-
- METHOD is one of `enum composing_status' specifying the way of
- composition.
-
- COMPONENT is a character or an encoded composition rule. */
- int data[COMPOSITION_DATA_SIZE];
-
- /* The number of elements in `data' currently used. */
- int used;
-
- /* Pointers to the previous and next structures. When `data' is
- filled up, another structure is allocated and linked in `next'.
- The new structure has backward link to this structure in `prev'.
- The number of chained structures depends on how many compositions
- the text being encoded or decoded contains. */
- struct composition_data *prev, *next;
-};
-
-/* Macros used for the member `result' of the struct
- coding_system. */
-#define CODING_FINISH_NORMAL 0
-#define CODING_FINISH_INSUFFICIENT_SRC 1
-#define CODING_FINISH_INSUFFICIENT_DST 2
-#define CODING_FINISH_INCONSISTENT_EOL 3
-#define CODING_FINISH_INSUFFICIENT_CMP 4
-#define CODING_FINISH_INTERRUPT 5
/* Macros used for the member `mode' of the struct coding_system. */
@@ -330,7 +232,7 @@ struct composition_data
#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
/* If set, the decoding/encoding routines treat the current data as
- the last block of the whole text to be converted, and do
+ the last block of the whole text to be converted, and do the
appropriate finishing job. */
#define CODING_MODE_LAST_BLOCK 0x02
@@ -338,65 +240,104 @@ struct composition_data
enables selective display. */
#define CODING_MODE_SELECTIVE_DISPLAY 0x04
-/* If set, replace unencodabae characters by `?' on encoding. */
-#define CODING_MODE_INHIBIT_UNENCODABLE_CHAR 0x08
-
/* This flag is used by the decoding/encoding routines on the fly. If
set, it means that right-to-left text is being processed. */
-#define CODING_MODE_DIRECTION 0x10
+#define CODING_MODE_DIRECTION 0x08
-struct coding_system
+#define CODING_MODE_FIXED_DESTINATION 0x10
+
+#define CODING_MODE_SAFE_ENCODING 0x20
+
+/* Structure of the field `spec.iso_2022' in the structure
+ `coding_system'. */
+struct iso_2022_spec
{
- /* Type of the coding system. */
- enum coding_type type;
+ /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
+ unsigned flags;
- /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */
- int eol_type;
+ /* The current graphic register invoked to each graphic plane. */
+ int current_invocation[2];
- /* Flag bits of the coding system. The meaning of each bit is common
- to all types of coding systems. */
- unsigned int common_flags;
+ /* The current charset designated to each graphic register. The
+ value -1 means that not charset is designated, -2 means that
+ there was an invalid designation previously. */
+ int current_designation[4];
- /* Flag bits of the coding system. The meaning of each bit depends
- on the type of the coding system. */
- unsigned int flags;
+ /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
+ by single-shift while encoding. */
+ int single_shifting;
- /* Mode bits of the coding system. See the comments of the macros
- CODING_MODE_XXX. */
- unsigned int mode;
+ /* Set to 1 temporarily only when processing at beginning of line. */
+ int bol;
+};
- /* The current status of composition handling. */
- int composing;
+struct ccl_spec;
- /* 1 iff the next character is a composition rule. */
- int composition_rule_follows;
+enum utf_16_bom_type
+ {
+ utf_16_detect_bom,
+ utf_16_without_bom,
+ utf_16_with_bom
+ };
+
+enum utf_16_endian_type
+ {
+ utf_16_big_endian,
+ utf_16_little_endian
+ };
+
+struct utf_16_spec
+{
+ enum utf_16_bom_type bom;
+ enum utf_16_endian_type endian;
+ int surrogate;
+};
- /* Information of compositions are stored here on decoding and set
- in advance on encoding. */
- struct composition_data *cmp_data;
+struct coding_detection_info
+{
+ /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
+ /* Which categories are already checked. */
+ int checked;
+ /* Which categories are strongly found. */
+ int found;
+ /* Which categories are rejected. */
+ int rejected;
+};
- /* Index to cmp_data->data for the first element for the current
- composition. */
- int cmp_data_start;
- /* Index to cmp_data->data for the current element for the current
- composition. */
- int cmp_data_index;
+struct coding_system
+{
+ /* ID number of the coding system. This is an index to
+ Vcoding_system_hash_table. This value is set by
+ setup_coding_system. At the early stage of building time, this
+ value is -1 in the array coding_categories to indicate that no
+ coding-system of that category is yet defined. */
+ int id;
+
+ /* Flag bits of the coding system. The meaning of each bit is common
+ to all types of coding systems. */
+ int common_flags;
+
+ /* Mode bits of the coding system. See the comments of the macros
+ CODING_MODE_XXX. */
+ unsigned int mode;
/* Detailed information specific to each type of coding system. */
- union spec
+ union
{
- struct iso2022_spec iso2022;
- struct ccl_spec ccl; /* Defined in ccl.h. */
+ struct iso_2022_spec iso_2022;
+ struct ccl_spec *ccl; /* Defined in ccl.h. */
+ struct utf_16_spec utf_16;
+ int emacs_mule_full_support;
} spec;
- /* Index number of coding category of the coding system. */
- int category_idx;
+ int max_charset_id;
+ char *safe_charsets;
- /* The following two members specify how characters 128..159 are
- represented in source and destination text respectively. 1 means
- they are represented by 2-byte sequence, 0 means they are
- represented by 1-byte as is (see the comment in charset.h). */
+ /* The following two members specify how binary 8-bit code 128..255
+ are represented in source and destination text respectively. 1
+ means they are represented by 2-byte sequence, 0 means they are
+ represented by 1-byte as is (see the comment in character.h). */
unsigned src_multibyte : 1;
unsigned dst_multibyte : 1;
@@ -404,170 +345,154 @@ struct coding_system
-1 in setup_coding_system, and updated by detect_coding. So,
when this is equal to the byte length of the text being
converted, we can skip the actual conversion process. */
- int heading_ascii;
+ int head_ascii;
/* The following members are set by encoding/decoding routine. */
- int produced, produced_char, consumed, consumed_char;
+ EMACS_INT produced, produced_char, consumed, consumed_char;
/* Number of error source data found in a decoding routine. */
int errors;
- /* Finish status of code conversion. It should be one of macros
- CODING_FINISH_XXXX. */
- int result;
+ /* Store the positions of error source data. */
+ EMACS_INT *error_positions;
- /* If nonzero, suppress error notification. */
- int suppress_error;
+ /* Finish status of code conversion. */
+ enum coding_result_code result;
/* The following members are all Lisp symbols. We don't have to
protect them from GC because the current garbage collection
doesn't relocate Lisp symbols. But, when it is changed, we must
find a way to protect them. */
- /* Backward pointer to the Lisp symbol of the coding system. */
- Lisp_Object symbol;
+ EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
+ Lisp_Object src_object;
+ const unsigned char *source;
- /* Lisp function (symbol) to be called after decoding to do
- additional conversion, or nil. */
- Lisp_Object post_read_conversion;
+ EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
+ Lisp_Object dst_object;
+ unsigned char *destination;
- /* Lisp function (symbol) to be called before encoding to do
- additional conversion, or nil. */
- Lisp_Object pre_write_conversion;
+ int chars_at_source;
- /* Character translation tables to look up, or nil. */
- Lisp_Object translation_table_for_decode;
- Lisp_Object translation_table_for_encode;
-};
+ /* If an element is non-negative, it is a character code.
+
+ If it is in the range -128..-1, it is a 8-bit character code
+ minus 256.
+
+ If it is less than -128, it specifies the start of an annotation
+ chunk. The length of the chunk is -128 minus the value of the
+ element. The following elements are OFFSET, ANNOTATION-TYPE, and
+ a sequence of actual data for the annotation. OFFSET is a
+ character position offset from dst_pos or src_pos,
+ ANNOTATION-TYPE specfies the meaning of the annotation and how to
+ handle the following data.. */
+ int *charbuf;
+ int charbuf_size, charbuf_used;
+
+ /* Set to 1 if charbuf contains an annotation. */
+ int annotated;
-#define CODING_REQUIRE_FLUSHING_MASK 1
-#define CODING_REQUIRE_DECODING_MASK 2
-#define CODING_REQUIRE_ENCODING_MASK 4
-#define CODING_REQUIRE_DETECTION_MASK 8
+ unsigned char carryover[64];
+ int carryover_bytes;
-/* Return 1 if the coding system CODING requires specific code to be
+ int default_char;
+
+ int (*detector) P_ ((struct coding_system *,
+ struct coding_detection_info *));
+ void (*decoder) P_ ((struct coding_system *));
+ int (*encoder) P_ ((struct coding_system *));
+};
+
+/* Meanings of bits in the member `common_flags' of the structure
+ coding_system. The lowest 8 bits are reserved for various kind of
+ annotations (currently two of them are used). */
+#define CODING_ANNOTATION_MASK 0x00FF
+#define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
+#define CODING_ANNOTATE_DIRECTION_MASK 0x0002
+#define CODING_ANNOTATE_CHARSET_MASK 0x0003
+#define CODING_FOR_UNIBYTE_MASK 0x0100
+#define CODING_REQUIRE_FLUSHING_MASK 0x0200
+#define CODING_REQUIRE_DECODING_MASK 0x0400
+#define CODING_REQUIRE_ENCODING_MASK 0x0800
+#define CODING_REQUIRE_DETECTION_MASK 0x1000
+#define CODING_RESET_AT_BOL_MASK 0x2000
+
+/* Return 1 if the coding context CODING requires annotaion
+ handling. */
+#define CODING_REQUIRE_ANNOTATION(coding) \
+ ((coding)->common_flags & CODING_ANNOTATION_MASK)
+
+/* Return 1 if the coding context CODING prefers decoding into unibyte. */
+#define CODING_FOR_UNIBYTE(coding) \
+ ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
+
+/* Return 1 if the coding context CODING requires specific code to be
attached at the tail of converted text. */
#define CODING_REQUIRE_FLUSHING(coding) \
((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
-/* Return 1 if the coding system CODING requires code conversion on
+/* Return 1 if the coding context CODING requires code conversion on
decoding. */
#define CODING_REQUIRE_DECODING(coding) \
((coding)->dst_multibyte \
|| (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
-/* Return 1 if the coding system CODING requires code conversion on
+
+/* Return 1 if the coding context CODING requires code conversion on
encoding. */
-#define CODING_REQUIRE_ENCODING(coding) \
- ((coding)->src_multibyte \
- || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK)
+#define CODING_REQUIRE_ENCODING(coding) \
+ ((coding)->src_multibyte \
+ || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
+ || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
+
-/* Return 1 if the coding system CODING requires some kind of code
+/* Return 1 if the coding context CODING requires some kind of code
detection. */
#define CODING_REQUIRE_DETECTION(coding) \
((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
-/* Return 1 if the coding system CODING requires code conversion on
+/* Return 1 if the coding context CODING requires code conversion on
decoding or some kind of code detection. */
#define CODING_MAY_REQUIRE_DECODING(coding) \
(CODING_REQUIRE_DECODING (coding) \
|| CODING_REQUIRE_DETECTION (coding))
-/* Index for each coding category in `coding_category_table' */
-#define CODING_CATEGORY_IDX_EMACS_MULE 0
-#define CODING_CATEGORY_IDX_SJIS 1
-#define CODING_CATEGORY_IDX_ISO_7 2
-#define CODING_CATEGORY_IDX_ISO_7_TIGHT 3
-#define CODING_CATEGORY_IDX_ISO_8_1 4
-#define CODING_CATEGORY_IDX_ISO_8_2 5
-#define CODING_CATEGORY_IDX_ISO_7_ELSE 6
-#define CODING_CATEGORY_IDX_ISO_8_ELSE 7
-#define CODING_CATEGORY_IDX_CCL 8
-#define CODING_CATEGORY_IDX_BIG5 9
-#define CODING_CATEGORY_IDX_UTF_8 10
-#define CODING_CATEGORY_IDX_UTF_16_BE 11
-#define CODING_CATEGORY_IDX_UTF_16_LE 12
-#define CODING_CATEGORY_IDX_RAW_TEXT 13
-#define CODING_CATEGORY_IDX_BINARY 14
-#define CODING_CATEGORY_IDX_MAX 15
-
-/* Definitions of flag bits returned by the function
- detect_coding_mask (). */
-#define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE)
-#define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS)
-#define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7)
-#define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT)
-#define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1)
-#define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2)
-#define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE)
-#define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE)
-#define CODING_CATEGORY_MASK_CCL (1 << CODING_CATEGORY_IDX_CCL)
-#define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5)
-#define CODING_CATEGORY_MASK_UTF_8 (1 << CODING_CATEGORY_IDX_UTF_8)
-#define CODING_CATEGORY_MASK_UTF_16_BE (1 << CODING_CATEGORY_IDX_UTF_16_BE)
-#define CODING_CATEGORY_MASK_UTF_16_LE (1 << CODING_CATEGORY_IDX_UTF_16_LE)
-#define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT)
-#define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY)
-
-/* This value is returned if detect_coding_mask () find nothing other
- than ASCII characters. */
-#define CODING_CATEGORY_MASK_ANY \
- ( CODING_CATEGORY_MASK_EMACS_MULE \
- | CODING_CATEGORY_MASK_SJIS \
- | CODING_CATEGORY_MASK_ISO_7 \
- | CODING_CATEGORY_MASK_ISO_7_TIGHT \
- | CODING_CATEGORY_MASK_ISO_8_1 \
- | CODING_CATEGORY_MASK_ISO_8_2 \
- | CODING_CATEGORY_MASK_ISO_7_ELSE \
- | CODING_CATEGORY_MASK_ISO_8_ELSE \
- | CODING_CATEGORY_MASK_CCL \
- | CODING_CATEGORY_MASK_BIG5 \
- | CODING_CATEGORY_MASK_UTF_8 \
- | CODING_CATEGORY_MASK_UTF_16_BE \
- | CODING_CATEGORY_MASK_UTF_16_LE)
-
-#define CODING_CATEGORY_MASK_ISO_7BIT \
- (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT)
-
-#define CODING_CATEGORY_MASK_ISO_8BIT \
- (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2)
-
-#define CODING_CATEGORY_MASK_ISO_SHIFT \
- (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE)
-
-#define CODING_CATEGORY_MASK_ISO \
- ( CODING_CATEGORY_MASK_ISO_7BIT \
- | CODING_CATEGORY_MASK_ISO_SHIFT \
- | CODING_CATEGORY_MASK_ISO_8BIT)
-
-#define CODING_CATEGORY_MASK_UTF_16_BE_LE \
- (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE)
-
/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
system. C1 and C2 are the 1st and 2nd position codes of Emacs'
internal format. */
-#define DECODE_SJIS(s1, s2, c1, c2) \
- do { \
- if (s2 >= 0x9F) \
- c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
- c2 = s2 - 0x7E; \
- else \
- c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
- c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F); \
+#define SJIS_TO_JIS(code) \
+ do { \
+ int s1, s2, j1, j2; \
+ \
+ s1 = (code) >> 8, s2 = (code) & 0xFF; \
+ \
+ if (s2 >= 0x9F) \
+ (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
+ j2 = s2 - 0x7E); \
+ else \
+ (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
+ j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
+ (code) = (j1 << 8) | j2; \
} while (0)
-#define ENCODE_SJIS(c1, c2, s1, s2) \
+
+#define JIS_TO_SJIS(code) \
do { \
- if (c1 & 1) \
- s1 = c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1), \
- s2 = c2 + ((c2 >= 0x60) ? 0x20 : 0x1F); \
+ int s1, s2, j1, j2; \
+ \
+ j1 = (code) >> 8, j2 = (code) & 0xFF; \
+ if (j1 & 1) \
+ (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
+ s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
else \
- s1 = c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0), \
- s2 = c2 + 0x7E; \
+ (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
+ s2 = j2 + 0x7E); \
+ (code) = (s1 << 8) | s2; \
} while (0)
+
/* Encode the file name NAME using the specified coding system
for file names, if any. */
#define ENCODE_FILE(name) \
@@ -579,6 +504,7 @@ struct coding_system
? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
: name))
+
/* Decode the file name NAME using the specified coding system
for file names, if any. */
#define DECODE_FILE(name) \
@@ -590,21 +516,22 @@ struct coding_system
? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
: name))
+
#ifdef WINDOWSNT
/* Encode the string STR using the specified coding system
for w32 system functions, if any. */
-#define ENCODE_SYSTEM(str) \
- (! NILP (Vlocale_coding_system) \
- && XFASTINT (Vlocale_coding_system) != 0 \
- ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
+#define ENCODE_SYSTEM(str) \
+ (! NILP (Vlocale_coding_system) \
+ && XFASTINT (Vlocale_coding_system) != 0 \
+ ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
: str)
/* Decode the string STR using the specified coding system
for w32 system functions, if any. */
-#define DECODE_SYSTEM(name) \
- (! NILP (Vlocale_coding_system) \
- && XFASTINT (Vlocale_coding_system) != 0 \
- ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
+#define DECODE_SYSTEM(name) \
+ (! NILP (Vlocale_coding_system) \
+ && XFASTINT (Vlocale_coding_system) != 0 \
+ ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
: str)
#else /* WINDOWSNT */
@@ -614,50 +541,76 @@ struct coding_system
#endif /* !WINDOWSNT */
-#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
-
/* Extern declarations. */
-extern int decode_coding P_ ((struct coding_system *, const unsigned char *,
- unsigned char *, int, int));
-extern int encode_coding P_ ((struct coding_system *, const unsigned char *,
- unsigned char *, int, int));
-extern void coding_save_composition P_ ((struct coding_system *, int, int,
- Lisp_Object));
-extern void coding_free_composition_data P_ ((struct coding_system *));
-extern void coding_adjust_composition_offset P_ ((struct coding_system *,
- int));
-extern void coding_allocate_composition_data P_ ((struct coding_system *,
- int));
-extern void coding_restore_composition P_ ((struct coding_system *,
- Lisp_Object));
-extern int code_convert_region P_ ((int, int, int, int, struct coding_system *,
- int, int));
-extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object,
- struct coding_system *,
- int));
+extern Lisp_Object make_conversion_work_buffer P_ ((int, int));
extern int decoding_buffer_size P_ ((struct coding_system *, int));
extern int encoding_buffer_size P_ ((struct coding_system *, int));
-extern void detect_coding P_ ((struct coding_system *, const unsigned char *,
- int));
-extern void detect_eol P_ ((struct coding_system *, const unsigned char *,
- int));
-extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *));
-extern Lisp_Object code_convert_string P_ ((Lisp_Object,
- struct coding_system *, int, int));
-extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object,
- Lisp_Object, int));
+extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *));
+extern void detect_coding P_ ((struct coding_system *));
+extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object,
+ Lisp_Object, Lisp_Object,
+ int, int));
+extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object,
+ Lisp_Object, int, int, int));
extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
int));
-extern void setup_raw_text_coding_system P_ ((struct coding_system *));
-extern Lisp_Object encode_coding_string P_ ((Lisp_Object,
- struct coding_system *, int));
-extern Lisp_Object decode_coding_string P_ ((Lisp_Object,
- struct coding_system *, int));
+extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object));
+extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object));
+
+extern int decode_coding_gap P_ ((struct coding_system *,
+ EMACS_INT, EMACS_INT));
+extern int encode_coding_gap P_ ((struct coding_system *,
+ EMACS_INT, EMACS_INT));
+extern void decode_coding_object P_ ((struct coding_system *,
+ Lisp_Object, EMACS_INT, EMACS_INT,
+ EMACS_INT, EMACS_INT, Lisp_Object));
+extern void encode_coding_object P_ ((struct coding_system *,
+ Lisp_Object, EMACS_INT, EMACS_INT,
+ EMACS_INT, EMACS_INT, Lisp_Object));
+
+#define decode_coding_region(coding, from, to) \
+ decode_coding_object (coding, Fcurrent_buffer (), \
+ from, CHAR_TO_BYTE (from), \
+ to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
+
+
+#define encode_coding_region(coding, from, to) \
+ encode_coding_object (coding, Fcurrent_buffer (), \
+ from, CHAR_TO_BYTE (from), \
+ to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
+
+
+#define decode_coding_string(coding, string, nocopy) \
+ decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
+ STRING_BYTES (XSTRING (string)), Qt)
+
+#define encode_coding_string(coding, string, nocopy) \
+ (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
+ STRING_BYTES (XSTRING (string)), Qt), \
+ (coding)->dst_object)
+
+
+#define decode_coding_c_string(coding, src, bytes, dst_object) \
+ do { \
+ (coding)->source = (src); \
+ (coding)->src_chars = (coding)->src_bytes = (bytes); \
+ decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
+ (dst_object)); \
+ } while (0)
+
+
+extern Lisp_Object preferred_coding_system P_ (());
+
+
+extern Lisp_Object Qutf_8, Qutf_8_emacs;
+
extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
-extern Lisp_Object Qraw_text, Qemacs_mule;
+extern Lisp_Object Qcoding_system_p;
+extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
+extern Lisp_Object Qiso_2022;
extern Lisp_Object Qbuffer_file_coding_system;
-extern Lisp_Object Vcoding_category_list;
-extern Lisp_Object Qutf_8;
+
+extern Lisp_Object Qunix, Qdos, Qmac;
extern Lisp_Object Qtranslation_table;
extern Lisp_Object Qtranslation_table_id;
@@ -702,13 +655,10 @@ extern struct coding_system safe_terminal_coding;
function `set-keyboard-coding-system'. */
extern struct coding_system keyboard_coding;
-/* Default coding system to be used to write a file. */
-extern struct coding_system default_buffer_file_coding;
-
/* Default coding systems used for process I/O. */
extern Lisp_Object Vdefault_process_coding_system;
-/* Function to call to force a user to force select a proper coding
+/* Function to call to force a user to force select a propert coding
system. */
extern Lisp_Object Vselect_safe_coding_system_function;
@@ -728,4 +678,7 @@ extern Lisp_Object Vdefault_file_name_coding_system;
/* Error signaled when there's a problem with detecting coding system */
extern Lisp_Object Qcoding_system_error;
+extern char emacs_mule_bytes[256];
+extern int emacs_mule_string_char P_ ((unsigned char *));
+
#endif /* EMACS_CODING_H */
diff --git a/src/composite.c b/src/composite.c
index cc05a869126..43da8887176 100644
--- a/src/composite.c
+++ b/src/composite.c
@@ -1,7 +1,10 @@
/* Composite sequence support.
Copyright (C) 1999 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+ Licensed to the Free Software Foundation.
Copyright (C) 2001 Free Software Foundation, Inc.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -23,7 +26,7 @@ Boston, MA 02111-1307, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "intervals.h"
/* Emacs uses special text property `composition' to support character
@@ -145,19 +148,13 @@ Lisp_Object composition_hash_table;
/* Function to call to adjust composition. */
Lisp_Object Vcompose_chars_after_function;
-/* Char-table of patterns and functions to make a composition. */
-Lisp_Object Vcomposition_function_table;
-Lisp_Object Qcomposition_function_table;
+Lisp_Object Qauto_composed;
+Lisp_Object Vauto_composition_function;
+Lisp_Object Qauto_composition_function;
/* Temporary variable used in macros COMPOSITION_XXX. */
Lisp_Object composition_temp;
-/* Return how many columns C will occupy on the screen. It always
- returns 1 for control characters and 8-bit characters because those
- are just ignored in a composition. */
-#define CHAR_WIDTH(c) \
- (SINGLE_BYTE_CHAR_P (c) ? 1 : CHARSET_WIDTH (CHAR_CHARSET (c)))
-
/* Return COMPOSITION-ID of a composition at buffer position
CHARPOS/BYTEPOS and length NCHARS. The `composition' property of
the sequence is PROP. STRING, if non-nil, is a string that
@@ -464,15 +461,16 @@ run_composition_function (from, to, prop)
to = end;
if (!NILP (Ffboundp (func)))
call2 (func, make_number (from), make_number (to));
- else if (!NILP (Ffboundp (Vcompose_chars_after_function)))
- call3 (Vcompose_chars_after_function,
- make_number (from), make_number (to), Qnil);
}
/* Make invalid compositions adjacent to or inside FROM and TO valid.
CHECK_MASK is bitwise `or' of mask bits defined by macros
CHECK_XXX (see the comment in composite.h).
+ It also resets the text-property `auto-composed' to a proper region
+ so that automatic character composition works correctly later while
+ displaying the region.
+
This function is called when a buffer text is changed. If the
change is deletion, FROM == TO. Otherwise, FROM < TO. */
@@ -482,6 +480,9 @@ update_compositions (from, to, check_mask)
{
Lisp_Object prop;
int start, end;
+ /* The beginning and end of the region to set the property
+ `auto-composed' to nil. */
+ int min_pos = from, max_pos = to;
if (inhibit_modification_hooks)
return;
@@ -500,6 +501,9 @@ update_compositions (from, to, check_mask)
if (from > BEGV
&& find_composition (from - 1, -1, &start, &end, &prop, Qnil))
{
+ min_pos = start;
+ if (end > to)
+ max_pos = end;
if (from < end)
Fput_text_property (make_number (from), make_number (end),
Qcomposition,
@@ -509,7 +513,11 @@ update_compositions (from, to, check_mask)
}
else if (from < ZV
&& find_composition (from, -1, &start, &from, &prop, Qnil))
- run_composition_function (start, from, prop);
+ {
+ if (from > to)
+ max_pos = from;
+ run_composition_function (start, from, prop);
+ }
}
if (check_mask & CHECK_INSIDE)
@@ -534,15 +542,25 @@ update_compositions (from, to, check_mask)
To avoid it, in such a case, we change the property of
the former to the copy of it. */
if (to < end)
- Fput_text_property (make_number (start), make_number (to),
- Qcomposition,
- Fcons (XCAR (prop), XCDR (prop)), Qnil);
+ {
+ Fput_text_property (make_number (start), make_number (to),
+ Qcomposition,
+ Fcons (XCAR (prop), XCDR (prop)), Qnil);
+ max_pos = end;
+ }
run_composition_function (start, end, prop);
}
else if (to < ZV
&& find_composition (to, -1, &start, &end, &prop, Qnil))
- run_composition_function (start, end, prop);
+ {
+ run_composition_function (start, end, prop);
+ max_pos = end;
+ }
}
+ if (min_pos < max_pos)
+ Fremove_list_of_text_properties (make_number (min_pos),
+ make_number (max_pos),
+ Fcons (Qauto_composed, Qnil), Qnil);
}
@@ -588,124 +606,6 @@ compose_text (start, end, components, modification_func, string)
Fput_text_property (make_number (start), make_number (end),
Qcomposition, prop, string);
}
-
-/* Compose sequences of characters in the region between START and END
- by functions registered in Vcomposition_function_table. If STRING
- is non-nil, operate on characters contained between indices START
- and END in STRING. */
-
-void
-compose_chars_in_text (start, end, string)
- int start, end;
- Lisp_Object string;
-{
- int count = 0;
- struct gcpro gcpro1;
- Lisp_Object tail, elt, val, to;
- /* Set to nonzero if we don't have to compose ASCII characters. */
- int skip_ascii;
- int i, len, stop, c;
- const unsigned char *ptr, *pend;
-
- if (! CHAR_TABLE_P (Vcomposition_function_table))
- return;
-
- if (STRINGP (string))
- {
- count = SPECPDL_INDEX ();
- GCPRO1 (string);
- stop = end;
- ptr = SDATA (string) + string_char_to_byte (string, start);
- pend = ptr + SBYTES (string);
- }
- else
- {
- record_unwind_protect (save_excursion_restore, save_excursion_save ());
- TEMP_SET_PT (start);
- stop = (start < GPT && GPT < end ? GPT : end);
- ptr = CHAR_POS_ADDR (start);
- pend = CHAR_POS_ADDR (end);
- }
-
- /* Preserve the match data. */
- record_unwind_protect (Fset_match_data, Fmatch_data (Qnil, Qnil));
-
- /* If none of ASCII characters have composition functions, we can
- skip them quickly. */
- for (i = 0; i < 128; i++)
- if (!NILP (CHAR_TABLE_REF (Vcomposition_function_table, i)))
- break;
- skip_ascii = (i == 128);
-
-
- while (1)
- {
- if (skip_ascii)
- while (start < stop && ASCII_BYTE_P (*ptr))
- start++, ptr++;
-
- if (start >= stop)
- {
- if (stop == end || start >= end)
- break;
- stop = end;
- if (STRINGP (string))
- ptr = SDATA (string) + string_char_to_byte (string, start);
- else
- ptr = CHAR_POS_ADDR (start);
- }
-
- c = STRING_CHAR_AND_LENGTH (ptr, pend - ptr, len);
- tail = CHAR_TABLE_REF (Vcomposition_function_table, c);
- while (CONSP (tail))
- {
- elt = XCAR (tail);
- if (CONSP (elt)
- && STRINGP (XCAR (elt))
- && !NILP (Ffboundp (XCDR (elt))))
- {
- if (STRINGP (string))
- val = Fstring_match (XCAR (elt), string, make_number (start));
- else
- {
- val = Flooking_at (XCAR (elt));
- if (!NILP (val))
- val = make_number (start);
- }
- if (INTEGERP (val) && XFASTINT (val) == start)
- {
- to = Fmatch_end (make_number (0));
- val = call4 (XCDR (elt), val, to, XCAR (elt), string);
- if (INTEGERP (val) && XINT (val) > 1)
- {
- start += XINT (val);
- if (STRINGP (string))
- ptr = SDATA (string) + string_char_to_byte (string, start);
- else
- ptr = CHAR_POS_ADDR (start);
- }
- else
- {
- start++;
- ptr += len;
- }
- break;
- }
- }
- tail = XCDR (tail);
- }
- if (!CONSP (tail))
- {
- /* No composition done. Try the next character. */
- start++;
- ptr += len;
- }
- }
-
- unbind_to (count, Qnil);
- if (STRINGP (string))
- UNGCPRO;
-}
/* Emacs Lisp APIs. */
@@ -868,29 +768,24 @@ valid.
The default value is the function `compose-chars-after'. */);
Vcompose_chars_after_function = intern ("compose-chars-after");
- Qcomposition_function_table = intern ("composition-function-table");
- staticpro (&Qcomposition_function_table);
-
- /* Intern this now in case it isn't already done.
- Setting this variable twice is harmless.
- But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ Qauto_composed = intern ("auto-composed");
+ staticpro (&Qauto_composed);
- Fput (Qcomposition_function_table, Qchar_table_extra_slots, make_number (0));
+ Qauto_composition_function = intern ("auto-composition-function");
+ staticpro (&Qauto_composition_function);
- DEFVAR_LISP ("composition-function-table", &Vcomposition_function_table,
- doc: /* Char table of patterns and functions to make a composition.
+ DEFVAR_LISP ("auto-composition-function", &Vauto_composition_function,
+ doc: /* Function to call to compose characters automatically.
+The function is called from the display routine with two arguments,
+POS and STRING.
-Each element is nil or an alist of PATTERNs vs FUNCs, where PATTERNs
-are regular expressions and FUNCs are functions. FUNC is responsible
-for composing text matching the corresponding PATTERN. FUNC is called
-with three arguments FROM, TO, and PATTERN. See the function
-`compose-chars-after' for more detail.
+If STRING is nil, the function must compose characters following POS
+in the current buffer.
-This table is looked up by the first character of a composition when
-the composition gets invalid after a change in a buffer. */);
- Vcomposition_function_table
- = Fmake_char_table (Qcomposition_function_table, Qnil);
+Otherwise, STRING is a string, and POS is an index to the string. In
+this case, the function must compose characters following POS in
+the string. */);
+ Vauto_composition_function = Qnil;
defsubr (&Scompose_region_internal);
defsubr (&Scompose_string_internal);
diff --git a/src/composite.h b/src/composite.h
index 620d5d4ce28..d061b484ea7 100644
--- a/src/composite.h
+++ b/src/composite.h
@@ -1,7 +1,10 @@
/* Header for composite sequence handler.
Copyright (C) 1999 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+ Licensed to the Free Software Foundation.
Copyright (C) 2001 Free Software Foundation, Inc.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -25,20 +28,18 @@ Boston, MA 02111-1307, USA. */
/* Methods to display a sequence of components a composition. */
enum composition_method {
- /* The first two are actually not methods, but used in code
- conversion to specify the current composing status. */
- COMPOSITION_DISABLED, /* Never handle composition data */
- COMPOSITION_NO, /* Not processing composition data */
/* Compose relatively without alternate characters. */
COMPOSITION_RELATIVE,
- /* Compose by specified composition rule. This is not used in Emacs
- 21 but we need it to decode files saved in the older versions of
- Emacs. */
+ /* Compose by specified composition rules. This is not used in
+ Emacs 21 but we need it to decode files saved in the older
+ versions of Emacs. */
COMPOSITION_WITH_RULE,
/* Compose relatively with alternate characters. */
COMPOSITION_WITH_ALTCHARS,
- /* Compose by specified composition rule with alternate characters. */
- COMPOSITION_WITH_RULE_ALTCHARS
+ /* Compose by specified composition rules with alternate characters. */
+ COMPOSITION_WITH_RULE_ALTCHARS,
+ /* This is not a method. */
+ COMPOSITION_NO
};
/* Maximum number of compoments a single composition can have. */
@@ -198,6 +199,9 @@ extern int n_compositions;
extern Lisp_Object Qcomposition;
extern Lisp_Object composition_hash_table;
+extern Lisp_Object Qauto_composed;
+extern Lisp_Object Vauto_composition_function;
+extern Lisp_Object Qauto_composition_function;
extern int get_composition_id P_ ((int, int, int, Lisp_Object, Lisp_Object));
extern int find_composition P_ ((int, int, int *, int *, Lisp_Object *,
diff --git a/src/data.c b/src/data.c
index d18cb187f62..8ee564db821 100644
--- a/src/data.c
+++ b/src/data.c
@@ -25,7 +25,7 @@ Boston, MA 02111-1307, USA. */
#include <stdio.h>
#include "lisp.h"
#include "puresize.h"
-#include "charset.h"
+#include "character.h"
#include "buffer.h"
#include "keyboard.h"
#include "frame.h"
@@ -447,7 +447,7 @@ DEFUN ("char-or-string-p", Fchar_or_string_p, Schar_or_string_p, 1, 1, 0,
(object)
register Lisp_Object object;
{
- if (INTEGERP (object) || STRINGP (object))
+ if (CHARACTERP (object) || STRINGP (object))
return Qt;
return Qnil;
}
@@ -1855,77 +1855,8 @@ or a byte-code object. IDX starts at 0. */)
}
else if (CHAR_TABLE_P (array))
{
- Lisp_Object val;
-
- val = Qnil;
-
- if (idxval < 0)
- args_out_of_range (array, idx);
- if (idxval < CHAR_TABLE_ORDINARY_SLOTS)
- {
- /* For ASCII and 8-bit European characters, the element is
- stored in the top table. */
- val = XCHAR_TABLE (array)->contents[idxval];
- if (NILP (val))
- val = XCHAR_TABLE (array)->defalt;
- while (NILP (val)) /* Follow parents until we find some value. */
- {
- array = XCHAR_TABLE (array)->parent;
- if (NILP (array))
- return Qnil;
- val = XCHAR_TABLE (array)->contents[idxval];
- if (NILP (val))
- val = XCHAR_TABLE (array)->defalt;
- }
- return val;
- }
- else
- {
- int code[4], i;
- Lisp_Object sub_table;
-
- SPLIT_CHAR (idxval, code[0], code[1], code[2]);
- if (code[1] < 32) code[1] = -1;
- else if (code[2] < 32) code[2] = -1;
-
- /* Here, the possible range of CODE[0] (== charset ID) is
- 128..MAX_CHARSET. Since the top level char table contains
- data for multibyte characters after 256th element, we must
- increment CODE[0] by 128 to get a correct index. */
- code[0] += 128;
- code[3] = -1; /* anchor */
-
- try_parent_char_table:
- sub_table = array;
- for (i = 0; code[i] >= 0; i++)
- {
- val = XCHAR_TABLE (sub_table)->contents[code[i]];
- if (SUB_CHAR_TABLE_P (val))
- sub_table = val;
- else
- {
- if (NILP (val))
- val = XCHAR_TABLE (sub_table)->defalt;
- if (NILP (val))
- {
- array = XCHAR_TABLE (array)->parent;
- if (!NILP (array))
- goto try_parent_char_table;
- }
- return val;
- }
- }
- /* Here, VAL is a sub char table. We try the default value
- and parent. */
- val = XCHAR_TABLE (val)->defalt;
- if (NILP (val))
- {
- array = XCHAR_TABLE (array)->parent;
- if (!NILP (array))
- goto try_parent_char_table;
- }
- return val;
- }
+ CHECK_CHARACTER (idx);
+ return CHAR_TABLE_REF (array, idxval);
}
else
{
@@ -1988,44 +1919,8 @@ bool-vector. IDX starts at 0. */)
}
else if (CHAR_TABLE_P (array))
{
- if (idxval < 0)
- args_out_of_range (array, idx);
- if (idxval < CHAR_TABLE_ORDINARY_SLOTS)
- XCHAR_TABLE (array)->contents[idxval] = newelt;
- else
- {
- int code[4], i;
- Lisp_Object val;
-
- SPLIT_CHAR (idxval, code[0], code[1], code[2]);
- if (code[1] < 32) code[1] = -1;
- else if (code[2] < 32) code[2] = -1;
-
- /* See the comment of the corresponding part in Faref. */
- code[0] += 128;
- code[3] = -1; /* anchor */
- for (i = 0; code[i + 1] >= 0; i++)
- {
- val = XCHAR_TABLE (array)->contents[code[i]];
- if (SUB_CHAR_TABLE_P (val))
- array = val;
- else
- {
- Lisp_Object temp;
-
- /* VAL is a leaf. Create a sub char table with the
- default value VAL or XCHAR_TABLE (array)->defalt
- and look into it. */
-
- temp = make_sub_char_table (NILP (val)
- ? XCHAR_TABLE (array)->defalt
- : val);
- XCHAR_TABLE (array)->contents[code[i]] = temp;
- array = temp;
- }
- }
- XCHAR_TABLE (array)->contents[code[i]] = newelt;
- }
+ CHECK_CHARACTER (idx);
+ CHAR_TABLE_SET (array, idxval, newelt);
}
else if (STRING_MULTIBYTE (array))
{
@@ -2071,7 +1966,7 @@ bool-vector. IDX starts at 0. */)
args_out_of_range (array, idx);
CHECK_NUMBER (newelt);
- if (XINT (newelt) < 0 || SINGLE_BYTE_CHAR_P (XINT (newelt)))
+ if (XINT (newelt) < 0 || ASCII_CHAR_P (XINT (newelt)))
SSET (array, idxval, XINT (newelt));
else
{
diff --git a/src/dired.c b/src/dired.c
index bf2a0e4e020..bcb0be12f86 100644
--- a/src/dired.c
+++ b/src/dired.c
@@ -88,6 +88,7 @@ extern struct direct *readdir ();
#include "lisp.h"
#include "buffer.h"
#include "commands.h"
+#include "character.h"
#include "charset.h"
#include "coding.h"
#include "regex.h"
diff --git a/src/dispextern.h b/src/dispextern.h
index 3fc33f55a67..7e917908e19 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1280,6 +1280,7 @@ enum lface_attribute_index
LFACE_FONT_INDEX,
LFACE_INHERIT_INDEX,
LFACE_AVGWIDTH_INDEX,
+ LFACE_FONTSET_INDEX,
LFACE_VECTOR_SIZE
};
@@ -1364,10 +1365,8 @@ struct face
reallocated. */
int font_info_id;
- /* Fontset ID if this face uses a fontset, or -1. This is only >= 0
- if the face was realized for a composition sequence.
- Otherwise, a specific font is loaded from the set of fonts
- specified by the fontset given by the family attribute of the face. */
+ /* Fontset ID if for this face's fontset. Non-ASCII faces derived
+ from the same ASCII face have the same fontset. */
int fontset;
/* Pixmap width and height. */
@@ -1399,13 +1398,6 @@ struct face
/* The hash value of this face. */
unsigned hash;
- /* The charset for which this face was realized if it was realized
- for use in multibyte text. If fontset >= 0, this is the charset
- of the first character of the composition sequence. A value of
- charset < 0 means the face was realized for use in unibyte text
- where the idea of Emacs charsets isn't applicable. */
- int charset;
-
/* Non-zero if text in this face should be underlined, overlined,
strike-through or have a box drawn around it. */
unsigned underline_p : 1;
@@ -1451,8 +1443,9 @@ struct face
/* Next and previous face in hash collision list of face cache. */
struct face *next, *prev;
- /* If this face is for ASCII characters, this points this face
- itself. Otherwise, this points a face for ASCII characters. */
+ /* If this face is an ASCII face, this points to this face itself.
+ Otherwise, this points to an ASCII face that has the same
+ attributes except the font. */
struct face *ascii_face;
};
@@ -1539,7 +1532,7 @@ struct face_cache
/* Non-zero if FACE is suitable for displaying character CHAR. */
#define FACE_SUITABLE_FOR_CHAR_P(FACE, CHAR) \
- (SINGLE_BYTE_CHAR_P (CHAR) \
+ (ASCII_CHAR_P (CHAR) \
? (FACE) == (FACE)->ascii_face \
: face_suitable_for_char_p ((FACE), (CHAR)))
@@ -1548,7 +1541,7 @@ struct face_cache
This macro is only meaningful for multibyte character CHAR. */
#define FACE_FOR_CHAR(F, FACE, CHAR) \
- (SINGLE_BYTE_CHAR_P (CHAR) \
+ (ASCII_CHAR_P (CHAR) \
? (FACE)->ascii_face->id \
: face_for_char ((F), (FACE), (CHAR)))
@@ -1690,6 +1683,7 @@ enum display_element_type
enum prop_idx
{
+ AUTO_COMPOSED_PROP_IDX,
FONTIFIED_PROP_IDX,
FACE_PROP_IDX,
INVISIBLE_PROP_IDX,
@@ -2136,7 +2130,9 @@ struct redisplay_interface
the two-byte form of C. Encoding is returned in *CHAR2B. If
TWO_BYTE_P is non-null, return non-zero there if font is two-byte. */
int (*encode_char) P_ ((int c, XChar2b *char2b,
- struct font_info *font_into, int *two_byte_p));
+ struct font_info *font_into,
+ struct charset *charset,
+ int *two_byte_p));
/* Compute left and right overhang of glyph string S.
A NULL pointer if platform does not support this. */
@@ -2566,14 +2562,17 @@ void clear_face_cache P_ ((int));
unsigned long load_color P_ ((struct frame *, struct face *, Lisp_Object,
enum lface_attribute_index));
void unload_color P_ ((struct frame *, unsigned long));
+char *choose_face_font P_ ((struct frame *, Lisp_Object *, Lisp_Object,
+ int *));
int ascii_face_of_lisp_face P_ ((struct frame *, int));
void prepare_face_for_display P_ ((struct frame *, struct face *));
int xstricmp P_ ((const unsigned char *, const unsigned char *));
-int lookup_face P_ ((struct frame *, Lisp_Object *, int, struct face *));
-int lookup_named_face P_ ((struct frame *, Lisp_Object, int));
+int lookup_face P_ ((struct frame *, Lisp_Object *));
+int lookup_non_ascii_face P_ ((struct frame *, int, struct face *));
+int lookup_named_face P_ ((struct frame *, Lisp_Object));
int smaller_face P_ ((struct frame *, int, int));
int face_with_height P_ ((struct frame *, int, int));
-int lookup_derived_face P_ ((struct frame *, Lisp_Object, int, int));
+int lookup_derived_face P_ ((struct frame *, Lisp_Object, int));
void init_frame_faces P_ ((struct frame *));
void free_frame_faces P_ ((struct frame *));
void recompute_basic_faces P_ ((struct frame *));
@@ -2583,9 +2582,11 @@ int face_at_string_position P_ ((struct window *, Lisp_Object, int, int, int,
int, int *, enum face_id, int));
int compute_char_face P_ ((struct frame *, int, Lisp_Object));
void free_all_realized_faces P_ ((Lisp_Object));
+void free_realized_face P_ ((struct frame *, struct face *));
extern Lisp_Object Qforeground_color, Qbackground_color;
extern char unspecified_fg[], unspecified_bg[];
-void free_realized_multibyte_face P_ ((struct frame *, int));
+extern Lisp_Object split_font_name_into_vector P_ ((Lisp_Object));
+extern Lisp_Object build_font_name_from_vector P_ ((Lisp_Object));
/* Defined in xfns.c */
diff --git a/src/dispnew.c b/src/dispnew.c
index 88f6a452236..544859eb89c 100644
--- a/src/dispnew.c
+++ b/src/dispnew.c
@@ -36,7 +36,7 @@ Boston, MA 02111-1307, USA. */
#include "dispextern.h"
#include "cm.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "keyboard.h"
#include "frame.h"
#include "window.h"
diff --git a/src/disptab.h b/src/disptab.h
index 526b1c4a9d3..9b387ba6887 100644
--- a/src/disptab.h
+++ b/src/disptab.h
@@ -35,8 +35,14 @@ Boston, MA 02111-1307, USA. */
extern Lisp_Object disp_char_vector P_ ((struct Lisp_Char_Table *, int));
-#define DISP_CHAR_VECTOR(dp, c) \
- (SINGLE_BYTE_CHAR_P(c) ? (dp)->contents[c] : disp_char_vector ((dp), (c)))
+#define DISP_CHAR_VECTOR(dp, c) \
+ (ASCII_CHAR_P(c) \
+ ? (NILP ((dp)->ascii) \
+ ? (dp)->defalt \
+ : (SUB_CHAR_TABLE_P ((dp)->ascii) \
+ ? XSUB_CHAR_TABLE ((dp)->ascii)->contents[c] \
+ : (dp)->ascii)) \
+ : disp_char_vector ((dp), (c)))
/* Defined in window.c. */
extern struct Lisp_Char_Table *window_display_table P_ ((struct window *));
diff --git a/src/doc.c b/src/doc.c
index c7b6d874013..fa87916c85b 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -39,7 +39,7 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "buffer.h"
#include "keyboard.h"
-#include "charset.h"
+#include "character.h"
#include "keymap.h"
#ifdef HAVE_INDEX
diff --git a/src/doprnt.c b/src/doprnt.c
index 72c0dd490e8..3bf8248c353 100644
--- a/src/doprnt.c
+++ b/src/doprnt.c
@@ -46,7 +46,7 @@ Boston, MA 02111-1307, USA. */
/* Since we use the macro CHAR_HEAD_P, we have to include this, but
don't have to include others because CHAR_HEAD_P does not contains
another macro. */
-#include "charset.h"
+#include "character.h"
static int doprnt1 ();
diff --git a/src/dosfns.c b/src/dosfns.c
index b2200ee507b..f220a442dc8 100644
--- a/src/dosfns.c
+++ b/src/dosfns.c
@@ -38,7 +38,7 @@ Boston, MA 02111-1307, USA. */
#include "dosfns.h"
#include "msdos.h"
#include "dispextern.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include <dpmi.h>
#include <go32.h>
diff --git a/src/editfns.c b/src/editfns.c
index 97a939ce43b..e58cf8a5dd9 100644
--- a/src/editfns.c
+++ b/src/editfns.c
@@ -44,7 +44,7 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "intervals.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "frame.h"
#include "window.h"
@@ -181,9 +181,7 @@ usage: (char-to-string CHAR) */)
CHECK_NUMBER (character);
- len = (SINGLE_BYTE_CHAR_P (XFASTINT (character))
- ? (*str = (unsigned char)(XFASTINT (character)), 1)
- : char_to_string (XFASTINT (character), str));
+ len = CHAR_STRING (XFASTINT (character), str);
return make_string_from_bytes (str, 1, len);
}
@@ -2016,7 +2014,7 @@ general_insert_function (insert_func, insert_from_string_func,
len = CHAR_STRING (XFASTINT (val), str);
else
{
- str[0] = (SINGLE_BYTE_CHAR_P (XINT (val))
+ str[0] = (ASCII_CHAR_P (XINT (val))
? XINT (val)
: multibyte_char_to_unibyte (XINT (val), Qnil));
len = 1;
@@ -2187,6 +2185,29 @@ from adjoining text, if those properties are sticky. */)
return Qnil;
}
+DEFUN ("insert-byte", Finsert_byte, Sinsert_byte, 2, 3, 0,
+ doc: /* Insert COUNT (second arg) copies of BYTE (first arg).
+Both arguments are required.
+BYTE is a number of the range 0..255.
+
+If BYTE is 128..255 and the current buffer is multibyte, the
+corresponding eight-bit character is inserted.
+
+Point, and before-insertion markers, are relocated as in the function `insert'.
+The optional third arg INHERIT, if non-nil, says to inherit text properties
+from adjoining text, if those properties are sticky. */)
+ (byte, count, inherit)
+ Lisp_Object byte, count, inherit;
+{
+ CHECK_NUMBER (byte);
+ if (XINT (byte) < 0 || XINT (byte) > 255)
+ args_out_of_range_3 (byte, make_number (0), make_number (255));
+ if (XINT (byte) >= 128
+ && ! NILP (current_buffer->enable_multibyte_characters))
+ XSETFASTINT (byte, BYTE8_TO_CHAR (XINT (byte)));
+ return Finsert_char (byte, count, inherit);
+}
+
/* Making strings from buffer contents. */
@@ -3399,7 +3420,7 @@ usage: (format STRING &rest OBJECTS) */)
thissize = 30;
if (*format == 'c')
{
- if (! SINGLE_BYTE_CHAR_P (XINT (args[n]))
+ if (! ASCII_CHAR_P (XINT (args[n]))
/* Note: No one can remeber why we have to treat
the character 0 as a multibyte character here.
But, until it causes a real problem, let's
@@ -3784,8 +3805,20 @@ Case is ignored if `case-fold-search' is non-nil in the current buffer. */)
/* Do these in separate statements,
then compare the variables.
because of the way DOWNCASE uses temp variables. */
- i1 = DOWNCASE (XFASTINT (c1));
- i2 = DOWNCASE (XFASTINT (c2));
+ i1 = XFASTINT (c1);
+ if (NILP (current_buffer->enable_multibyte_characters)
+ && ! ASCII_CHAR_P (i1))
+ {
+ MAKE_CHAR_MULTIBYTE (i1);
+ }
+ i2 = XFASTINT (c2);
+ if (NILP (current_buffer->enable_multibyte_characters)
+ && ! ASCII_CHAR_P (i2))
+ {
+ MAKE_CHAR_MULTIBYTE (i2);
+ }
+ i1 = DOWNCASE (i1);
+ i2 = DOWNCASE (i2);
return (i1 == i2 ? Qt : Qnil);
}
@@ -4271,6 +4304,7 @@ functions if all the text being accessed has this property. */);
defsubr (&Sinsert_and_inherit);
defsubr (&Sinsert_and_inherit_before_markers);
defsubr (&Sinsert_char);
+ defsubr (&Sinsert_byte);
defsubr (&Suser_login_name);
defsubr (&Suser_real_login_name);
diff --git a/src/emacs.c b/src/emacs.c
index 05897e9bb44..eca2930419e 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -1191,6 +1191,7 @@ main (argc, argv
init_alloc_once ();
init_obarray ();
init_eval_once ();
+ init_character_once ();
init_charset_once ();
init_coding_once ();
init_syntax_once (); /* Create standard syntax table. */
@@ -1302,12 +1303,15 @@ main (argc, argv
Lisp_Object buffer;
buffer = Fcdr (XCAR (tail));
- /* Verify that all buffers are empty now, as they
- ought to be. */
- if (BUF_Z (XBUFFER (buffer)) > BUF_BEG (XBUFFER (buffer)))
- abort ();
- /* It is safe to do this crudely in an empty buffer. */
- XBUFFER (buffer)->enable_multibyte_characters = Qnil;
+ /* Make a multibyte buffer unibyte. */
+ if (BUF_Z_BYTE (XBUFFER (buffer)) > BUF_Z (XBUFFER (buffer)))
+ {
+ struct buffer *current = current_buffer;
+
+ set_buffer_temp (XBUFFER (buffer));
+ Fset_buffer_multibyte (Qnil);
+ set_buffer_temp (current);
+ }
}
}
}
@@ -1422,6 +1426,7 @@ main (argc, argv
init_callproc (); /* Must follow init_cmdargs but not init_sys_modes. */
init_lread ();
+ init_charset ();
/* Intern the names of all standard functions and variables;
define standard keys. */
@@ -1436,6 +1441,7 @@ main (argc, argv
syms_of_data ();
#endif
syms_of_alloc ();
+ syms_of_chartab ();
syms_of_lread ();
syms_of_print ();
syms_of_eval ();
@@ -1454,6 +1460,7 @@ main (argc, argv
/* Called before init_window_once for Mac OS Classic. */
syms_of_ccl ();
#endif
+ syms_of_character ();
syms_of_charset ();
syms_of_cmds ();
#ifndef NO_DIR_LIBRARY
diff --git a/src/fileio.c b/src/fileio.c
index a44552010c7..1103a51bd65 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -75,7 +75,7 @@ extern int errno;
#include "lisp.h"
#include "intervals.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "window.h"
@@ -295,6 +295,17 @@ restore_point_unwind (location)
Fset_marker (location, Qnil, Qnil);
return Qnil;
}
+
+/* Kill the working buffer for code conversion. */
+
+static Lisp_Object
+kill_workbuf_unwind (workbuf)
+ Lisp_Object workbuf;
+{
+ if (! NILP (workbuf) && ! NILP (Fbuffer_live_p (workbuf)))
+ Fkill_buffer (workbuf);
+ return Qnil;
+}
Lisp_Object Qexpand_file_name;
Lisp_Object Qsubstitute_in_file_name;
@@ -3632,7 +3643,7 @@ actually used. */)
unsigned char buffer[1 << 14];
int replace_handled = 0;
int set_coding_system = 0;
- int coding_system_decided = 0;
+ Lisp_Object coding_system;
int read_quit = 0;
if (current_buffer->base_buffer && ! NILP (visit))
@@ -3650,6 +3661,10 @@ actually used. */)
CHECK_STRING (filename);
filename = Fexpand_file_name (filename, Qnil);
+ /* The value Qnil means that the coding system is not yet
+ decided. */
+ coding_system = Qnil;
+
/* If the file name has special constructs in it,
call the corresponding file handler. */
handler = Ffind_file_name_handler (filename, Qinsert_file_contents);
@@ -3773,32 +3788,18 @@ actually used. */)
}
if (EQ (Vcoding_system_for_read, Qauto_save_coding))
- {
- /* We use emacs-mule for auto saving... */
- setup_coding_system (Qemacs_mule, &coding);
- /* ... but with the special flag to indicate to read in a
- multibyte sequence for eight-bit-control char as is. */
- coding.flags = 1;
- coding.src_multibyte = 0;
- coding.dst_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
- coding.eol_type = CODING_EOL_LF;
- coding_system_decided = 1;
- }
+ coding_system = Qutf_8_emacs;
else if (BEG < Z)
{
/* Decide the coding system to use for reading the file now
because we can't use an optimized method for handling
`coding:' tag if the current buffer is not empty. */
- Lisp_Object val;
- val = Qnil;
-
if (!NILP (Vcoding_system_for_read))
- val = Vcoding_system_for_read;
+ coding_system = Vcoding_system_for_read;
else if (! NILP (replace))
/* In REPLACE mode, we can use the same coding system
that was used to visit the file. */
- val = current_buffer->buffer_file_coding_system;
+ coding_system = current_buffer->buffer_file_coding_system;
else
{
/* Don't try looking inside a file for a coding system
@@ -3854,8 +3855,8 @@ actually used. */)
insert_1_both (read_buf, nread, nread, 0, 0, 0);
TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
- val = call2 (Vset_auto_coding_function,
- filename, make_number (nread));
+ coding_system = call2 (Vset_auto_coding_function,
+ filename, make_number (nread));
set_buffer_internal (prev);
/* Discard the unwind protect for recovering the
@@ -3869,34 +3870,33 @@ actually used. */)
}
}
- if (NILP (val))
+ if (NILP (coding_system))
{
/* If we have not yet decided a coding system, check
file-coding-system-alist. */
- Lisp_Object args[6], coding_systems;
+ Lisp_Object args[6];
args[0] = Qinsert_file_contents, args[1] = orig_filename;
args[2] = visit, args[3] = beg, args[4] = end, args[5] = replace;
- coding_systems = Ffind_operation_coding_system (6, args);
- if (CONSP (coding_systems))
- val = XCAR (coding_systems);
+ coding_system = Ffind_operation_coding_system (6, args);
+ if (CONSP (coding_system))
+ coding_system = XCAR (coding_system);
}
}
- setup_coding_system (Fcheck_coding_system (val), &coding);
- /* Ensure we set Vlast_coding_system_used. */
- set_coding_system = 1;
+ if (NILP (coding_system))
+ coding_system = Qundecided;
+ else
+ CHECK_CODING_SYSTEM (coding_system);
- if (NILP (current_buffer->enable_multibyte_characters)
- && ! NILP (val))
+ if (NILP (current_buffer->enable_multibyte_characters))
/* We must suppress all character code conversion except for
end-of-line conversion. */
- setup_raw_text_coding_system (&coding);
+ coding_system = raw_text_coding_system (coding_system);
- coding.src_multibyte = 0;
- coding.dst_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
- coding_system_decided = 1;
+ setup_coding_system (coding_system, &coding);
+ /* Ensure we set Vlast_coding_system_used. */
+ set_coding_system = 1;
}
/* If requested, replace the accessible part of the buffer
@@ -3915,7 +3915,8 @@ actually used. */)
and let the following if-statement handle the replace job. */
if (!NILP (replace)
&& BEGV < ZV
- && !(coding.common_flags & CODING_REQUIRE_DECODING_MASK))
+ && (NILP (coding_system)
+ || ! CODING_REQUIRE_DECODING (&coding)))
{
/* same_at_start and same_at_end count bytes,
because file access counts bytes
@@ -3950,21 +3951,15 @@ actually used. */)
else if (nread == 0)
break;
- if (coding.type == coding_type_undecided)
- detect_coding (&coding, buffer, nread);
- if (coding.common_flags & CODING_REQUIRE_DECODING_MASK)
- /* We found that the file should be decoded somehow.
- Let's give up here. */
+ if (CODING_REQUIRE_DETECTION (&coding))
{
- giveup_match_end = 1;
- break;
+ coding_system = detect_coding_system (buffer, nread, 1, 0,
+ coding_system);
+ setup_coding_system (coding_system, &coding);
}
- if (coding.eol_type == CODING_EOL_UNDECIDED)
- detect_eol (&coding, buffer, nread);
- if (coding.eol_type != CODING_EOL_UNDECIDED
- && coding.eol_type != CODING_EOL_LF)
- /* We found that the format of eol should be decoded.
+ if (CODING_REQUIRE_DECODING (&coding))
+ /* We found that the file should be decoded somehow.
Let's give up here. */
{
giveup_match_end = 1;
@@ -4109,95 +4104,72 @@ actually used. */)
{
int same_at_start = BEGV_BYTE;
int same_at_end = ZV_BYTE;
+ int same_at_start_charpos;
+ int inserted_chars;
int overlap;
int bufpos;
- /* Make sure that the gap is large enough. */
- int bufsize = 2 * st.st_size;
- unsigned char *conversion_buffer = (unsigned char *) xmalloc (bufsize);
+ unsigned char *decoded;
int temp;
+ int this_count = SPECPDL_INDEX ();
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+ Lisp_Object conversion_buffer
+ = make_conversion_work_buffer (-1, multibyte);
+ struct gcpro1;
+
+ record_unwind_protect (kill_workbuf_unwind, conversion_buffer);
/* First read the whole file, performing code conversion into
CONVERSION_BUFFER. */
if (lseek (fd, XINT (beg), 0) < 0)
- {
- xfree (conversion_buffer);
- report_file_error ("Setting file position",
- Fcons (orig_filename, Qnil));
- }
+ report_file_error ("Setting file position",
+ Fcons (orig_filename, Qnil));
total = st.st_size; /* Total bytes in the file. */
how_much = 0; /* Bytes read from file so far. */
inserted = 0; /* Bytes put into CONVERSION_BUFFER so far. */
unprocessed = 0; /* Bytes not processed in previous loop. */
+ GCPRO1 (conversion_buffer);
while (how_much < total)
{
+ /* We read one bunch by one (READ_BUF_SIZE bytes) to allow
+ quitting while reading a huge while. */
/* try is reserved in some compilers (Microsoft C) */
int trytry = min (total - how_much, READ_BUF_SIZE - unprocessed);
- unsigned char *destination = read_buf + unprocessed;
int this;
/* Allow quitting out of the actual I/O. */
immediate_quit = 1;
QUIT;
- this = emacs_read (fd, destination, trytry);
+ this = emacs_read (fd, read_buf + unprocessed, trytry);
immediate_quit = 0;
- if (this < 0 || this + unprocessed == 0)
+ if (this <= 0)
{
- how_much = this;
+ if (this < 0)
+ how_much = this;
break;
}
how_much += this;
- if (CODING_MAY_REQUIRE_DECODING (&coding))
- {
- int require, result;
-
- this += unprocessed;
-
- /* If we are using more space than estimated,
- make CONVERSION_BUFFER bigger. */
- require = decoding_buffer_size (&coding, this);
- if (inserted + require + 2 * (total - how_much) > bufsize)
- {
- bufsize = inserted + require + 2 * (total - how_much);
- conversion_buffer = (unsigned char *) xrealloc (conversion_buffer, bufsize);
- }
-
- /* Convert this batch with results in CONVERSION_BUFFER. */
- if (how_much >= total) /* This is the last block. */
- coding.mode |= CODING_MODE_LAST_BLOCK;
- if (coding.composing != COMPOSITION_DISABLED)
- coding_allocate_composition_data (&coding, BEGV);
- result = decode_coding (&coding, read_buf,
- conversion_buffer + inserted,
- this, bufsize - inserted);
-
- /* Save for next iteration whatever we didn't convert. */
- unprocessed = this - coding.consumed;
- bcopy (read_buf + coding.consumed, read_buf, unprocessed);
- if (!NILP (current_buffer->enable_multibyte_characters))
- this = coding.produced;
- else
- this = str_as_unibyte (conversion_buffer + inserted,
- coding.produced);
- }
-
- inserted += this;
+ BUF_SET_PT (XBUFFER (conversion_buffer),
+ BUF_Z (XBUFFER (conversion_buffer)));
+ decode_coding_c_string (&coding, read_buf, unprocessed + this,
+ conversion_buffer);
+ unprocessed = coding.carryover_bytes;
+ if (coding.carryover_bytes > 0)
+ bcopy (coding.carryover, read_buf, unprocessed);
}
+ UNGCPRO;
+ emacs_close (fd);
- /* At this point, INSERTED is how many characters (i.e. bytes)
- are present in CONVERSION_BUFFER.
- HOW_MUCH should equal TOTAL,
- or should be <= 0 if we couldn't read the file. */
+ /* At this point, HOW_MUCH should equal TOTAL, or should be <= 0
+ if we couldn't read the file. */
if (how_much < 0)
{
- xfree (conversion_buffer);
-
if (how_much == -1)
error ("IO error reading %s: %s",
SDATA (orig_filename), emacs_strerror (errno));
@@ -4205,21 +4177,31 @@ actually used. */)
error ("maximum buffer size exceeded");
}
- /* Compare the beginning of the converted file
- with the buffer text. */
+ if (unprocessed > 0)
+ {
+ coding.mode |= CODING_MODE_LAST_BLOCK;
+ decode_coding_c_string (&coding, read_buf, unprocessed,
+ conversion_buffer);
+ coding.mode &= ~CODING_MODE_LAST_BLOCK;
+ }
+
+ decoded = BUF_BEG_ADDR (XBUFFER (conversion_buffer));
+ inserted = (BUF_Z_BYTE (XBUFFER (conversion_buffer))
+ - BUF_BEG_BYTE (XBUFFER (conversion_buffer)));
+
+ /* Compare the beginning of the converted string with the buffer
+ text. */
bufpos = 0;
while (bufpos < inserted && same_at_start < same_at_end
- && FETCH_BYTE (same_at_start) == conversion_buffer[bufpos])
+ && FETCH_BYTE (same_at_start) == decoded[bufpos])
same_at_start++, bufpos++;
- /* If the file matches the buffer completely,
+ /* If the file matches the head of buffer completely,
there's no need to replace anything. */
if (bufpos == inserted)
{
- xfree (conversion_buffer);
- emacs_close (fd);
specpdl_ptr--;
/* Truncate the buffer to the size of the file. */
del_range_byte (same_at_start, same_at_end, 0);
@@ -4227,8 +4209,8 @@ actually used. */)
goto handled;
}
- /* Extend the start of non-matching text area to multibyte
- character boundary. */
+ /* Extend the start of non-matching text area to the previous
+ multibyte character boundary. */
if (! NILP (current_buffer->enable_multibyte_characters))
while (same_at_start > BEGV_BYTE
&& ! CHAR_HEAD_P (FETCH_BYTE (same_at_start)))
@@ -4241,11 +4223,11 @@ actually used. */)
/* Compare with same_at_start to avoid counting some buffer text
as matching both at the file's beginning and at the end. */
while (bufpos > 0 && same_at_end > same_at_start
- && FETCH_BYTE (same_at_end - 1) == conversion_buffer[bufpos - 1])
+ && FETCH_BYTE (same_at_end - 1) == decoded[bufpos - 1])
same_at_end--, bufpos--;
- /* Extend the end of non-matching text area to multibyte
- character boundary. */
+ /* Extend the end of non-matching text area to the next
+ multibyte character boundary. */
if (! NILP (current_buffer->enable_multibyte_characters))
while (same_at_end < ZV_BYTE
&& ! CHAR_HEAD_P (FETCH_BYTE (same_at_end)))
@@ -4263,7 +4245,7 @@ actually used. */)
/* Replace the chars that we need to replace,
and update INSERTED to equal the number of bytes
- we are taking from the file. */
+ we are taking from the decoded string. */
inserted -= (Z_BYTE - same_at_end) + (same_at_start - BEG_BYTE);
if (same_at_end != same_at_start)
@@ -4278,18 +4260,19 @@ actually used. */)
}
/* Insert from the file at the proper position. */
SET_PT_BOTH (temp, same_at_start);
- insert_1 (conversion_buffer + same_at_start - BEG_BYTE, inserted,
- 0, 0, 0);
- if (coding.cmp_data && coding.cmp_data->used)
- coding_restore_composition (&coding, Fcurrent_buffer ());
- coding_free_composition_data (&coding);
-
+ same_at_start_charpos
+ = buf_bytepos_to_charpos (XBUFFER (conversion_buffer),
+ same_at_start);
+ inserted_chars
+ = (buf_bytepos_to_charpos (XBUFFER (conversion_buffer),
+ same_at_start + inserted)
+ - same_at_start_charpos);
+ insert_from_buffer (XBUFFER (conversion_buffer),
+ same_at_start_charpos, inserted_chars, 0);
/* Set `inserted' to the number of inserted characters. */
inserted = PT - temp;
- xfree (conversion_buffer);
- emacs_close (fd);
- specpdl_ptr--;
+ unbind_to (this_count, Qnil);
goto handled;
}
@@ -4333,7 +4316,7 @@ actually used. */)
inserted = 0;
/* Here, we don't do code conversion in the loop. It is done by
- code_convert_region after all data are read into the buffer. */
+ decode_coding_gap after all data are read into the buffer. */
{
int gap_size = GAP_SIZE;
@@ -4424,18 +4407,16 @@ actually used. */)
notfound:
- if (! coding_system_decided)
+ if (NILP (coding_system))
{
/* The coding system is not yet decided. Decide it by an
optimized method for handling `coding:' tag.
Note that we can get here only if the buffer was empty
before the insertion. */
- Lisp_Object val;
- val = Qnil;
if (!NILP (Vcoding_system_for_read))
- val = Vcoding_system_for_read;
+ coding_system = Vcoding_system_for_read;
else
{
/* Since we are sure that the current buffer was empty
@@ -4456,73 +4437,78 @@ actually used. */)
if (inserted > 0 && ! NILP (Vset_auto_coding_function))
{
- val = call2 (Vset_auto_coding_function,
- filename, make_number (inserted));
+ coding_system = call2 (Vset_auto_coding_function,
+ filename, make_number (inserted));
}
- if (NILP (val))
+ if (NILP (coding_system))
{
/* If the coding system is not yet decided, check
file-coding-system-alist. */
- Lisp_Object args[6], coding_systems;
+ Lisp_Object args[6];
args[0] = Qinsert_file_contents, args[1] = orig_filename;
args[2] = visit, args[3] = beg, args[4] = end, args[5] = Qnil;
- coding_systems = Ffind_operation_coding_system (6, args);
- if (CONSP (coding_systems))
- val = XCAR (coding_systems);
+ coding_system = Ffind_operation_coding_system (6, args);
+ if (CONSP (coding_system))
+ coding_system = XCAR (coding_system);
}
unbind_to (count, Qnil);
inserted = Z_BYTE - BEG_BYTE;
}
- /* The following kludgy code is to avoid some compiler bug.
- We can't simply do
- setup_coding_system (val, &coding);
- on some system. */
- {
- struct coding_system temp_coding;
- setup_coding_system (val, &temp_coding);
- bcopy (&temp_coding, &coding, sizeof coding);
- }
- /* Ensure we set Vlast_coding_system_used. */
- set_coding_system = 1;
+ if (NILP (coding_system))
+ coding_system = Qundecided;
+ else
+ CHECK_CODING_SYSTEM (coding_system);
- if (NILP (current_buffer->enable_multibyte_characters)
- && ! NILP (val))
+ if (NILP (current_buffer->enable_multibyte_characters))
/* We must suppress all character code conversion except for
end-of-line conversion. */
- setup_raw_text_coding_system (&coding);
- coding.src_multibyte = 0;
- coding.dst_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
+ coding_system = raw_text_coding_system (coding_system);
+ setup_coding_system (coding_system, &coding);
+ /* Ensure we set Vlast_coding_system_used. */
+ set_coding_system = 1;
}
- if (!NILP (visit)
- /* Can't do this if part of the buffer might be preserved. */
- && NILP (replace)
- && (coding.type == coding_type_no_conversion
- || coding.type == coding_type_raw_text))
+ if (!NILP (visit))
{
- /* Visiting a file with these coding system makes the buffer
- unibyte. */
- current_buffer->enable_multibyte_characters = Qnil;
- coding.dst_multibyte = 0;
+ /* When we visit a file by raw-text, we change the buffer to
+ unibyte. If we have not yet decided how to decode a text,
+ decide it at first by detecting the file's encoding. */
+ if (CODING_REQUIRE_DETECTION (&coding))
+ {
+ coding_system = detect_coding_system (PT_ADDR, inserted, 1, 0,
+ coding_system);
+ setup_coding_system (coding_system, &coding);
+ }
+
+ if (CODING_FOR_UNIBYTE (&coding)
+ /* Can't do this if part of the buffer might be preserved. */
+ && NILP (replace))
+ /* Visiting a file with these coding system makes the buffer
+ unibyte. */
+ current_buffer->enable_multibyte_characters = Qnil;
}
- if (inserted > 0 || coding.type == coding_type_ccl)
+ coding.dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+ if ((CODING_REQUIRE_DETECTION (&coding)
+ || CODING_REQUIRE_DECODING (&coding))
+ && (inserted > 0 || CODING_REQUIRE_FLUSHING (&coding)))
{
- if (CODING_MAY_REQUIRE_DECODING (&coding))
- {
- code_convert_region (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted,
- &coding, 0, 0);
- inserted = coding.produced_char;
- }
- else
- adjust_after_insert (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted,
- inserted);
+ move_gap_both (PT, PT_BYTE);
+ GAP_SIZE += inserted;
+ ZV_BYTE -= inserted;
+ Z_BYTE -= inserted;
+ ZV -= inserted;
+ Z -= inserted;
+ decode_coding_gap (&coding, inserted, inserted);
+ inserted = coding.produced_char;
}
+ else if (inserted > 0)
+ adjust_after_insert (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted,
+ inserted);
/* Now INSERTED is measured in characters. */
@@ -4530,8 +4516,8 @@ actually used. */)
/* Use the conversion type to determine buffer-file-type
(find-buffer-file-type is now used to help determine the
conversion). */
- if ((coding.eol_type == CODING_EOL_UNDECIDED
- || coding.eol_type == CODING_EOL_LF)
+ if ((coding.eol_type == eol_type_undecided
+ || coding.eol_type == eol_type_lf)
&& ! CODING_REQUIRE_DECODING (&coding))
current_buffer->buffer_file_type = Qt;
else
@@ -4572,7 +4558,7 @@ actually used. */)
}
if (set_coding_system)
- Vlast_coding_system_used = coding.symbol;
+ Vlast_coding_system_used = coding_system;
if (! NILP (Ffboundp (Qafter_insert_file_set_coding)))
{
@@ -4650,8 +4636,6 @@ actually used. */)
}
static Lisp_Object build_annotations P_ ((Lisp_Object, Lisp_Object));
-static Lisp_Object build_annotations_2 P_ ((Lisp_Object, Lisp_Object,
- Lisp_Object, Lisp_Object));
/* If build_annotations switched buffers, switch back to BUF.
Kill the temporary buffer that was selected in the meantime.
@@ -4676,7 +4660,7 @@ build_annotations_unwind (buf)
/* Decide the coding-system to encode the data with. */
-void
+static Lisp_Object
choose_write_coding_system (start, end, filename,
append, visit, lockname, coding)
Lisp_Object start, end, filename, append, visit, lockname;
@@ -4685,14 +4669,7 @@ choose_write_coding_system (start, end, filename,
Lisp_Object val;
if (auto_saving)
- {
- /* We use emacs-mule for auto saving... */
- setup_coding_system (Qemacs_mule, coding);
- /* ... but with the special flag to indicate not to strip off
- leading code of eight-bit-control chars. */
- coding->flags = 1;
- goto done_setup_coding;
- }
+ val = Qutf_8_emacs;
else if (!NILP (Vcoding_system_for_write))
{
val = Vcoding_system_for_write;
@@ -4739,8 +4716,7 @@ choose_write_coding_system (start, end, filename,
val = XCDR (coding_systems);
}
- if (NILP (val)
- && !NILP (current_buffer->buffer_file_coding_system))
+ if (NILP (val))
{
/* If we still have not decided a coding system, use the
default value of buffer-file-coding-system. */
@@ -4748,43 +4724,44 @@ choose_write_coding_system (start, end, filename,
using_default_coding = 1;
}
+ if (! NILP (val) && ! force_raw_text)
+ {
+ Lisp_Object spec, attrs;
+
+ CHECK_CODING_SYSTEM_GET_SPEC (val, spec);
+ attrs = AREF (spec, 0);
+ if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
+ force_raw_text = 1;
+ }
+
if (!force_raw_text
&& !NILP (Ffboundp (Vselect_safe_coding_system_function)))
/* Confirm that VAL can surely encode the current region. */
val = call5 (Vselect_safe_coding_system_function,
start, end, val, Qnil, filename);
- setup_coding_system (Fcheck_coding_system (val), coding);
- if (coding->eol_type == CODING_EOL_UNDECIDED
- && !using_default_coding)
- {
- if (! EQ (default_buffer_file_coding.symbol,
- buffer_defaults.buffer_file_coding_system))
- setup_coding_system (buffer_defaults.buffer_file_coding_system,
- &default_buffer_file_coding);
- if (default_buffer_file_coding.eol_type != CODING_EOL_UNDECIDED)
- {
- Lisp_Object subsidiaries;
-
- coding->eol_type = default_buffer_file_coding.eol_type;
- subsidiaries = Fget (coding->symbol, Qeol_type);
- if (VECTORP (subsidiaries)
- && XVECTOR (subsidiaries)->size == 3)
- coding->symbol
- = XVECTOR (subsidiaries)->contents[coding->eol_type];
- }
- }
+ /* If the decided coding-system doesn't specify end-of-line
+ format, we use that of
+ `default-buffer-file-coding-system'. */
+ if (! using_default_coding
+ && ! NILP (buffer_defaults.buffer_file_coding_system))
+ val = (coding_inherit_eol_type
+ (val, buffer_defaults.buffer_file_coding_system));
+ /* If we decide not to encode text, use `raw-text' or one of its
+ subsidiaries. */
if (force_raw_text)
- setup_raw_text_coding_system (coding);
- goto done_setup_coding;
+ val = raw_text_coding_system (val);
}
- setup_coding_system (Fcheck_coding_system (val), coding);
+ setup_coding_system (val, coding);
+ if (! NILP (val)
+ && VECTORP (CODING_ID_EOL_TYPE (coding->id)))
+ val = AREF (CODING_ID_EOL_TYPE (coding->id), 0);
- done_setup_coding:
if (!STRINGP (start) && !NILP (current_buffer->selective_display))
coding->mode |= CODING_MODE_SELECTIVE_DISPLAY;
+ return val;
}
DEFUN ("write-region", Fwrite_region, Swrite_region, 3, 7,
@@ -4928,21 +4905,9 @@ This does code conversion according to the value of
We used to make this choice before calling build_annotations, but that
leads to problems when a write-annotate-function takes care of
unsavable chars (as was the case with X-Symbol). */
- choose_write_coding_system (start, end, filename,
- append, visit, lockname, &coding);
- Vlast_coding_system_used = coding.symbol;
-
- given_buffer = current_buffer;
- if (! STRINGP (start))
- {
- annotations = build_annotations_2 (start, end,
- coding.pre_write_conversion, annotations);
- if (current_buffer != given_buffer)
- {
- XSETFASTINT (start, BEGV);
- XSETFASTINT (end, ZV);
- }
- }
+ Vlast_coding_system_used
+ = choose_write_coding_system (start, end, filename,
+ append, visit, lockname, &coding);
#ifdef CLASH_DETECTION
if (!auto_saving)
@@ -5080,6 +5045,9 @@ This does code conversion according to the value of
if (GPT > BEG && GPT_ADDR[-1] != '\n')
move_gap (find_next_newline (GPT, 1));
#else
+#if 0
+ /* The new encoding routine doesn't require the following. */
+
/* Whether VMS or not, we must move the gap to the next of newline
when we must put designation sequences at beginning of line. */
if (INTEGERP (start)
@@ -5093,6 +5061,7 @@ This does code conversion according to the value of
SET_PT_BOTH (opoint, opoint_byte);
}
#endif
+#endif
failure = 0;
immediate_quit = 1;
@@ -5105,23 +5074,10 @@ This does code conversion according to the value of
}
else if (XINT (start) != XINT (end))
{
- tem = CHAR_TO_BYTE (XINT (start));
-
- if (XINT (start) < GPT)
- {
- failure = 0 > a_write (desc, Qnil, XINT (start),
- min (GPT, XINT (end)) - XINT (start),
- &annotations, &coding);
- save_errno = errno;
- }
-
- if (XINT (end) > GPT && !failure)
- {
- tem = max (XINT (start), GPT);
- failure = 0 > a_write (desc, Qnil, tem , XINT (end) - tem,
- &annotations, &coding);
- save_errno = errno;
- }
+ failure = 0 > a_write (desc, Qnil,
+ XINT (start), XINT (end) - XINT (start),
+ &annotations, &coding);
+ save_errno = errno;
}
else
{
@@ -5137,7 +5093,7 @@ This does code conversion according to the value of
{
/* We have to flush out a data. */
coding.mode |= CODING_MODE_LAST_BLOCK;
- failure = 0 > e_write (desc, Qnil, 0, 0, &coding);
+ failure = 0 > e_write (desc, Qnil, 1, 1, &coding);
save_errno = errno;
}
@@ -5327,30 +5283,6 @@ build_annotations (start, end)
return annotations;
}
-static Lisp_Object
-build_annotations_2 (start, end, pre_write_conversion, annotations)
- Lisp_Object start, end, pre_write_conversion, annotations;
-{
- struct gcpro gcpro1;
- Lisp_Object res;
-
- GCPRO1 (annotations);
- /* At last, do the same for the function PRE_WRITE_CONVERSION
- implied by the current coding-system. */
- if (!NILP (pre_write_conversion))
- {
- struct buffer *given_buffer = current_buffer;
- Vwrite_region_annotations_so_far = annotations;
- res = call2 (pre_write_conversion, start, end);
- Flength (res);
- annotations = (current_buffer != given_buffer
- ? res
- : merge (annotations, res, Qcar_less_than_car));
- }
-
- UNGCPRO;
- return annotations;
-}
/* Write to descriptor DESC the NCHARS chars starting at POS of STRING.
If STRING is nil, POS is the character position in the current buffer.
@@ -5422,76 +5354,41 @@ e_write (desc, string, start, end, coding)
int start, end;
struct coding_system *coding;
{
- register char *addr;
- register int nbytes;
- char buf[WRITE_BUF_SIZE];
int return_val = 0;
- if (start >= end)
- coding->composing = COMPOSITION_DISABLED;
- if (coding->composing != COMPOSITION_DISABLED)
- coding_save_composition (coding, start, end, string);
-
if (STRINGP (string))
{
- addr = SDATA (string);
- nbytes = SBYTES (string);
- coding->src_multibyte = STRING_MULTIBYTE (string);
- }
- else if (start < end)
- {
- /* It is assured that the gap is not in the range START and END-1. */
- addr = CHAR_POS_ADDR (start);
- nbytes = CHAR_TO_BYTE (end) - CHAR_TO_BYTE (start);
- coding->src_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
- }
- else
- {
- addr = "";
- nbytes = 0;
- coding->src_multibyte = 1;
+ start = 0;
+ end = SCHARS (string);
}
/* We used to have a code for handling selective display here. But,
now it is handled within encode_coding. */
- while (1)
+ do
{
- int result;
+ if (STRINGP (string))
+ encode_coding_object (coding, string,
+ start, string_char_to_byte (string, start),
+ end, string_char_to_byte (string, end), Qt);
+ else
+ encode_coding_object (coding, Fcurrent_buffer (),
+ start, CHAR_TO_BYTE (start),
+ end, CHAR_TO_BYTE (end), Qt);
- result = encode_coding (coding, addr, buf, nbytes, WRITE_BUF_SIZE);
if (coding->produced > 0)
{
- coding->produced -= emacs_write (desc, buf, coding->produced);
+ coding->produced -= emacs_write (desc, SDATA (coding->dst_object),
+ coding->produced);
+
if (coding->produced)
{
return_val = -1;
break;
}
}
- nbytes -= coding->consumed;
- addr += coding->consumed;
- if (result == CODING_FINISH_INSUFFICIENT_SRC
- && nbytes > 0)
- {
- /* The source text ends by an incomplete multibyte form.
- There's no way other than write it out as is. */
- nbytes -= emacs_write (desc, addr, nbytes);
- if (nbytes)
- {
- return_val = -1;
- break;
- }
- }
- if (nbytes <= 0)
- break;
start += coding->consumed_char;
- if (coding->cmp_data)
- coding_adjust_composition_offset (coding, start);
}
-
- if (coding->cmp_data)
- coding_free_composition_data (coding);
+ while (start < end);
return return_val;
}
diff --git a/src/filelock.c b/src/filelock.c
index bcad75199cd..f6108942ba3 100644
--- a/src/filelock.c
+++ b/src/filelock.c
@@ -55,7 +55,7 @@ extern int errno;
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "systime.h"
diff --git a/src/fns.c b/src/fns.c
index 9ee15ff4994..0fdca30084e 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -38,7 +38,7 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "commands.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "buffer.h"
#include "keyboard.h"
@@ -142,8 +142,6 @@ To get the number of bytes, use `string-bytes'. */)
XSETFASTINT (val, SCHARS (sequence));
else if (VECTORP (sequence))
XSETFASTINT (val, XVECTOR (sequence)->size);
- else if (SUB_CHAR_TABLE_P (sequence))
- XSETFASTINT (val, SUB_CHAR_TABLE_ORDINARY_SLOTS);
else if (CHAR_TABLE_P (sequence))
XSETFASTINT (val, MAX_CHAR);
else if (BOOL_VECTOR_P (sequence))
@@ -452,27 +450,6 @@ usage: (vconcat &rest SEQUENCES) */)
return concat (nargs, args, Lisp_Vectorlike, 0);
}
-/* Return a copy of a sub char table ARG. The elements except for a
- nested sub char table are not copied. */
-static Lisp_Object
-copy_sub_char_table (arg)
- Lisp_Object arg;
-{
- Lisp_Object copy = make_sub_char_table (XCHAR_TABLE (arg)->defalt);
- int i;
-
- /* Copy all the contents. */
- bcopy (XCHAR_TABLE (arg)->contents, XCHAR_TABLE (copy)->contents,
- SUB_CHAR_TABLE_ORDINARY_SLOTS * sizeof (Lisp_Object));
- /* Recursively copy any sub char-tables in the ordinary slots. */
- for (i = 32; i < SUB_CHAR_TABLE_ORDINARY_SLOTS; i++)
- if (SUB_CHAR_TABLE_P (XCHAR_TABLE (arg)->contents[i]))
- XCHAR_TABLE (copy)->contents[i]
- = copy_sub_char_table (XCHAR_TABLE (copy)->contents[i]);
-
- return copy;
-}
-
DEFUN ("copy-sequence", Fcopy_sequence, Scopy_sequence, 1, 1, 0,
doc: /* Return a copy of a list, vector, string or char-table.
@@ -485,24 +462,7 @@ with the original. */)
if (CHAR_TABLE_P (arg))
{
- int i;
- Lisp_Object copy;
-
- copy = Fmake_char_table (XCHAR_TABLE (arg)->purpose, Qnil);
- /* Copy all the slots, including the extra ones. */
- bcopy (XVECTOR (arg)->contents, XVECTOR (copy)->contents,
- ((XCHAR_TABLE (arg)->size & PSEUDOVECTOR_SIZE_MASK)
- * sizeof (Lisp_Object)));
-
- /* Recursively copy any sub char tables in the ordinary slots
- for multibyte characters. */
- for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS;
- i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- if (SUB_CHAR_TABLE_P (XCHAR_TABLE (arg)->contents[i]))
- XCHAR_TABLE (copy)->contents[i]
- = copy_sub_char_table (XCHAR_TABLE (copy)->contents[i]);
-
- return copy;
+ return copy_char_table (arg);
}
if (BOOL_VECTOR_P (arg))
@@ -522,6 +482,7 @@ with the original. */)
return concat (1, &arg, CONSP (arg) ? Lisp_Cons : XTYPE (arg), 0);
}
+#if 0 /* unused */
/* In string STR of length LEN, see if bytes before STR[I] combine
with bytes after STR[I] to form a single character. If so, return
the number of bytes after STR[I] which combine in this way.
@@ -542,6 +503,7 @@ count_combining (str, len, i)
PARSE_MULTIBYTE_SEQ (str + j, len - j, bytes);
return (bytes <= i - j ? 0 : bytes - (i - j));
}
+#endif
/* This structure holds information of an argument of `concat' that is
a string and has text properties to be copied. */
@@ -625,11 +587,11 @@ concat (nargs, args, target_type, last_special)
for (i = 0; i < len; i++)
{
ch = XVECTOR (this)->contents[i];
- if (! INTEGERP (ch))
- wrong_type_argument (Qintegerp, ch);
+ if (! CHARACTERP (ch))
+ wrong_type_argument (Qcharacterp, ch);
this_len_byte = CHAR_BYTES (XINT (ch));
result_len_byte += this_len_byte;
- if (!SINGLE_BYTE_CHAR_P (XINT (ch)))
+ if (! ASCII_CHAR_P (XINT (ch)))
some_multibyte = 1;
}
else if (BOOL_VECTOR_P (this) && XBOOL_VECTOR (this)->size > 0)
@@ -638,11 +600,11 @@ concat (nargs, args, target_type, last_special)
for (; CONSP (this); this = XCDR (this))
{
ch = XCAR (this);
- if (! INTEGERP (ch))
- wrong_type_argument (Qintegerp, ch);
+ if (! CHARACTERP (ch))
+ wrong_type_argument (Qcharacterp, ch);
this_len_byte = CHAR_BYTES (XINT (ch));
result_len_byte += this_len_byte;
- if (!SINGLE_BYTE_CHAR_P (XINT (ch)))
+ if (! ASCII_CHAR_P (XINT (ch)))
some_multibyte = 1;
}
else if (STRINGP (this))
@@ -705,25 +667,17 @@ concat (nargs, args, target_type, last_special)
&& STRING_MULTIBYTE (this) == some_multibyte)
{
int thislen_byte = SBYTES (this);
- int combined;
bcopy (SDATA (this), SDATA (val) + toindex_byte,
SBYTES (this));
- combined = (some_multibyte && toindex_byte > 0
- ? count_combining (SDATA (val),
- toindex_byte + thislen_byte,
- toindex_byte)
- : 0);
if (! NULL_INTERVAL_P (STRING_INTERVALS (this)))
{
textprops[num_textprops].argnum = argnum;
- /* We ignore text properties on characters being combined. */
- textprops[num_textprops].from = combined;
+ textprops[num_textprops].from = 0;
textprops[num_textprops++].to = toindex;
}
toindex_byte += thislen_byte;
- toindex += thisleni - combined;
- STRING_SET_CHARS (val, SCHARS (val) - combined);
+ toindex += thisleni;
}
/* Copy a single-byte string to a multibyte string. */
else if (STRINGP (this) && STRINGP (val))
@@ -766,9 +720,7 @@ concat (nargs, args, target_type, last_special)
{
XSETFASTINT (elt, SREF (this, thisindex++));
if (some_multibyte
- && (XINT (elt) >= 0240
- || (XINT (elt) >= 0200
- && ! NILP (Vnonascii_translation_table)))
+ && XINT (elt) >= 0200
&& XINT (elt) < 0400)
{
c = unibyte_char_to_multibyte (XINT (elt));
@@ -801,34 +753,12 @@ concat (nargs, args, target_type, last_special)
else
{
CHECK_NUMBER (elt);
- if (SINGLE_BYTE_CHAR_P (XINT (elt)))
- {
- if (some_multibyte)
- toindex_byte
- += CHAR_STRING (XINT (elt),
- SDATA (val) + toindex_byte);
- else
- SSET (val, toindex_byte++, XINT (elt));
- if (some_multibyte
- && toindex_byte > 0
- && count_combining (SDATA (val),
- toindex_byte, toindex_byte - 1))
- STRING_SET_CHARS (val, SCHARS (val) - 1);
- else
- toindex++;
- }
+ if (some_multibyte)
+ toindex_byte += CHAR_STRING (XINT (elt),
+ SDATA (val) + toindex_byte);
else
- /* If we have any multibyte characters,
- we already decided to make a multibyte string. */
- {
- int c = XINT (elt);
- /* P exists as a variable
- to avoid a bug on the Masscomp C compiler. */
- unsigned char *p = SDATA (val) + toindex_byte;
-
- toindex_byte += CHAR_STRING (c, p);
- toindex++;
- }
+ SSET (val, toindex_byte++, XINT (elt));
+ toindex++;
}
}
}
@@ -876,7 +806,7 @@ string_char_to_byte (string, char_index)
Lisp_Object string;
int char_index;
{
- int i, i_byte;
+ int i_byte;
int best_below, best_below_byte;
int best_above, best_above_byte;
@@ -903,40 +833,30 @@ string_char_to_byte (string, char_index)
if (char_index - best_below < best_above - char_index)
{
+ unsigned char *p = SDATA (string) + best_below_byte;
+
while (best_below < char_index)
{
- int c;
- FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string,
- best_below, best_below_byte);
+ p += BYTES_BY_CHAR_HEAD (*p);
+ best_below++;
}
- i = best_below;
- i_byte = best_below_byte;
+ i_byte = p - SDATA (string);
}
else
{
+ unsigned char *p = SDATA (string) + best_above_byte;
+
while (best_above > char_index)
{
- unsigned char *pend = SDATA (string) + best_above_byte;
- unsigned char *pbeg = pend - best_above_byte;
- unsigned char *p = pend - 1;
- int bytes;
-
- while (p > pbeg && !CHAR_HEAD_P (*p)) p--;
- PARSE_MULTIBYTE_SEQ (p, pend - p, bytes);
- if (bytes == pend - p)
- best_above_byte -= bytes;
- else if (bytes > pend - p)
- best_above_byte -= (pend - p);
- else
- best_above_byte--;
+ p--;
+ while (!CHAR_HEAD_P (*p)) p--;
best_above--;
}
- i = best_above;
- i_byte = best_above_byte;
+ i_byte = p - SDATA (string);
}
string_char_byte_cache_bytepos = i_byte;
- string_char_byte_cache_charpos = i;
+ string_char_byte_cache_charpos = char_index;
string_char_byte_cache_string = string;
return i_byte;
@@ -976,36 +896,30 @@ string_byte_to_char (string, byte_index)
if (byte_index - best_below_byte < best_above_byte - byte_index)
{
- while (best_below_byte < byte_index)
+ unsigned char *p = SDATA (string) + best_below_byte;
+ unsigned char *pend = SDATA (string) + byte_index;
+
+ while (p < pend)
{
- int c;
- FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string,
- best_below, best_below_byte);
+ p += BYTES_BY_CHAR_HEAD (*p);
+ best_below++;
}
i = best_below;
- i_byte = best_below_byte;
+ i_byte = p - SDATA (string);
}
else
{
- while (best_above_byte > byte_index)
+ unsigned char *p = SDATA (string) + best_above_byte;
+ unsigned char *pbeg = SDATA (string) + byte_index;
+
+ while (p > pbeg)
{
- unsigned char *pend = SDATA (string) + best_above_byte;
- unsigned char *pbeg = pend - best_above_byte;
- unsigned char *p = pend - 1;
- int bytes;
-
- while (p > pbeg && !CHAR_HEAD_P (*p)) p--;
- PARSE_MULTIBYTE_SEQ (p, pend - p, bytes);
- if (bytes == pend - p)
- best_above_byte -= bytes;
- else if (bytes > pend - p)
- best_above_byte -= (pend - p);
- else
- best_above_byte--;
+ p--;
+ while (!CHAR_HEAD_P (*p)) p--;
best_above--;
}
i = best_above;
- i_byte = best_above_byte;
+ i_byte = p - SDATA (string);
}
string_char_byte_cache_bytepos = i_byte;
@@ -1015,9 +929,7 @@ string_byte_to_char (string, byte_index)
return i;
}
-/* Convert STRING to a multibyte string.
- Single-byte characters 0240 through 0377 are converted
- by adding nonascii_insert_offset to each. */
+/* Convert STRING to a multibyte string. */
Lisp_Object
string_make_multibyte (string)
@@ -1044,10 +956,9 @@ string_make_multibyte (string)
}
-/* Convert STRING to a multibyte string without changing each
- character codes. Thus, characters 0200 trough 0237 are converted
- to eight-bit-control characters, and characters 0240 through 0377
- are converted eight-bit-graphic characters. */
+/* Convert STRING (if unibyte) to a multibyte string without changing
+ the number of characters. Characters 0200 trough 0237 are
+ converted to eight-bit characters. */
Lisp_Object
string_to_multibyte (string)
@@ -1060,8 +971,8 @@ string_to_multibyte (string)
return string;
nbytes = parse_str_to_multibyte (SDATA (string), SBYTES (string));
- /* If all the chars are ASCII or eight-bit-graphic, they won't need
- any more bytes once converted. */
+ /* If all the chars are ASCII, they won't need any more bytes once
+ converted. */
if (nbytes == SBYTES (string))
return make_multibyte_string (SDATA (string), nbytes, nbytes);
@@ -1151,9 +1062,11 @@ DEFUN ("string-as-multibyte", Fstring_as_multibyte, Sstring_as_multibyte,
doc: /* Return a multibyte string with the same individual bytes as STRING.
If STRING is multibyte, the result is STRING itself.
Otherwise it is a newly created string, with no text properties.
+
If STRING is unibyte and contains an individual 8-bit byte (i.e. not
-part of a multibyte form), it is converted to the corresponding
-multibyte character of charset `eight-bit-control' or `eight-bit-graphic'. */)
+part of a correct utf-8 sequence), it is converted to the corresponding
+multibyte character of charset `eight-bit'.
+See also `string-to-multibyte'. */)
(string)
Lisp_Object string;
{
@@ -1184,10 +1097,13 @@ DEFUN ("string-to-multibyte", Fstring_to_multibyte, Sstring_to_multibyte,
doc: /* Return a multibyte string with the same individual chars as STRING.
If STRING is multibyte, the result is STRING itself.
Otherwise it is a newly created string, with no text properties.
-Characters 0200 through 0237 are converted to eight-bit-control
-characters of the same character code. Characters 0240 through 0377
-are converted to eight-bit-control characters of the same character
-codes. */)
+
+If STRING is unibyte and contains an 8-bit byte, it is converted to
+the corresponding multibyte character of charset `eight-bit'.
+
+This differs from `string-as-multibyte' by converting each byte of a correct
+utf-8 sequence to an eight-bit character, not just bytes that don't form a
+correct sequence. */)
(string)
Lisp_Object string;
{
@@ -2219,7 +2135,8 @@ internal_equal (o1, o2, depth)
functions are sensible to compare, so eliminate the others now. */
if (size & PSEUDOVECTOR_FLAG)
{
- if (!(size & (PVEC_COMPILED | PVEC_CHAR_TABLE)))
+ if (!(size & (PVEC_COMPILED
+ | PVEC_CHAR_TABLE | PVEC_SUB_CHAR_TABLE)))
return 0;
size &= PSEUDOVECTOR_SIZE_MASK;
}
@@ -2273,11 +2190,11 @@ ARRAY is a vector, string, char-table, or bool-vector. */)
}
else if (CHAR_TABLE_P (array))
{
- register Lisp_Object *p = XCHAR_TABLE (array)->contents;
- size = CHAR_TABLE_ORDINARY_SLOTS;
- for (index = 0; index < size; index++)
- p[index] = item;
- XCHAR_TABLE (array)->defalt = Qnil;
+ int i;
+
+ for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
+ XCHAR_TABLE (array)->contents[i] = item;
+ XCHAR_TABLE (array)->defalt = item;
}
else if (STRINGP (array))
{
@@ -2340,458 +2257,6 @@ This makes STRING unibyte and may change its length. */)
return Qnil;
}
-DEFUN ("char-table-subtype", Fchar_table_subtype, Schar_table_subtype,
- 1, 1, 0,
- doc: /* Return the subtype of char-table CHAR-TABLE. The value is a symbol. */)
- (char_table)
- Lisp_Object char_table;
-{
- CHECK_CHAR_TABLE (char_table);
-
- return XCHAR_TABLE (char_table)->purpose;
-}
-
-DEFUN ("char-table-parent", Fchar_table_parent, Schar_table_parent,
- 1, 1, 0,
- doc: /* Return the parent char-table of CHAR-TABLE.
-The value is either nil or another char-table.
-If CHAR-TABLE holds nil for a given character,
-then the actual applicable value is inherited from the parent char-table
-\(or from its parents, if necessary). */)
- (char_table)
- Lisp_Object char_table;
-{
- CHECK_CHAR_TABLE (char_table);
-
- return XCHAR_TABLE (char_table)->parent;
-}
-
-DEFUN ("set-char-table-parent", Fset_char_table_parent, Sset_char_table_parent,
- 2, 2, 0,
- doc: /* Set the parent char-table of CHAR-TABLE to PARENT.
-PARENT must be either nil or another char-table. */)
- (char_table, parent)
- Lisp_Object char_table, parent;
-{
- Lisp_Object temp;
-
- CHECK_CHAR_TABLE (char_table);
-
- if (!NILP (parent))
- {
- CHECK_CHAR_TABLE (parent);
-
- for (temp = parent; !NILP (temp); temp = XCHAR_TABLE (temp)->parent)
- if (EQ (temp, char_table))
- error ("Attempt to make a chartable be its own parent");
- }
-
- XCHAR_TABLE (char_table)->parent = parent;
-
- return parent;
-}
-
-DEFUN ("char-table-extra-slot", Fchar_table_extra_slot, Schar_table_extra_slot,
- 2, 2, 0,
- doc: /* Return the value of CHAR-TABLE's extra-slot number N. */)
- (char_table, n)
- Lisp_Object char_table, n;
-{
- CHECK_CHAR_TABLE (char_table);
- CHECK_NUMBER (n);
- if (XINT (n) < 0
- || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
- args_out_of_range (char_table, n);
-
- return XCHAR_TABLE (char_table)->extras[XINT (n)];
-}
-
-DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
- Sset_char_table_extra_slot,
- 3, 3, 0,
- doc: /* Set CHAR-TABLE's extra-slot number N to VALUE. */)
- (char_table, n, value)
- Lisp_Object char_table, n, value;
-{
- CHECK_CHAR_TABLE (char_table);
- CHECK_NUMBER (n);
- if (XINT (n) < 0
- || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
- args_out_of_range (char_table, n);
-
- return XCHAR_TABLE (char_table)->extras[XINT (n)] = value;
-}
-
-DEFUN ("char-table-range", Fchar_table_range, Schar_table_range,
- 2, 2, 0,
- doc: /* Return the value in CHAR-TABLE for a range of characters RANGE.
-RANGE should be nil (for the default value)
-a vector which identifies a character set or a row of a character set,
-a character set name, or a character code. */)
- (char_table, range)
- Lisp_Object char_table, range;
-{
- CHECK_CHAR_TABLE (char_table);
-
- if (EQ (range, Qnil))
- return XCHAR_TABLE (char_table)->defalt;
- else if (INTEGERP (range))
- return Faref (char_table, range);
- else if (SYMBOLP (range))
- {
- Lisp_Object charset_info;
-
- charset_info = Fget (range, Qcharset);
- CHECK_VECTOR (charset_info);
-
- return Faref (char_table,
- make_number (XINT (XVECTOR (charset_info)->contents[0])
- + 128));
- }
- else if (VECTORP (range))
- {
- if (XVECTOR (range)->size == 1)
- return Faref (char_table,
- make_number (XINT (XVECTOR (range)->contents[0]) + 128));
- else
- {
- int size = XVECTOR (range)->size;
- Lisp_Object *val = XVECTOR (range)->contents;
- Lisp_Object ch = Fmake_char_internal (size <= 0 ? Qnil : val[0],
- size <= 1 ? Qnil : val[1],
- size <= 2 ? Qnil : val[2]);
- return Faref (char_table, ch);
- }
- }
- else
- error ("Invalid RANGE argument to `char-table-range'");
- return Qt;
-}
-
-DEFUN ("set-char-table-range", Fset_char_table_range, Sset_char_table_range,
- 3, 3, 0,
- doc: /* Set the value in CHAR-TABLE for a range of characters RANGE to VALUE.
-RANGE should be t (for all characters), nil (for the default value)
-a vector which identifies a character set or a row of a character set,
-a coding system, or a character code. */)
- (char_table, range, value)
- Lisp_Object char_table, range, value;
-{
- int i;
-
- CHECK_CHAR_TABLE (char_table);
-
- if (EQ (range, Qt))
- for (i = 0; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- XCHAR_TABLE (char_table)->contents[i] = value;
- else if (EQ (range, Qnil))
- XCHAR_TABLE (char_table)->defalt = value;
- else if (SYMBOLP (range))
- {
- Lisp_Object charset_info;
-
- charset_info = Fget (range, Qcharset);
- CHECK_VECTOR (charset_info);
-
- return Faset (char_table,
- make_number (XINT (XVECTOR (charset_info)->contents[0])
- + 128),
- value);
- }
- else if (INTEGERP (range))
- Faset (char_table, range, value);
- else if (VECTORP (range))
- {
- if (XVECTOR (range)->size == 1)
- return Faset (char_table,
- make_number (XINT (XVECTOR (range)->contents[0]) + 128),
- value);
- else
- {
- int size = XVECTOR (range)->size;
- Lisp_Object *val = XVECTOR (range)->contents;
- Lisp_Object ch = Fmake_char_internal (size <= 0 ? Qnil : val[0],
- size <= 1 ? Qnil : val[1],
- size <= 2 ? Qnil : val[2]);
- return Faset (char_table, ch, value);
- }
- }
- else
- error ("Invalid RANGE argument to `set-char-table-range'");
-
- return value;
-}
-
-DEFUN ("set-char-table-default", Fset_char_table_default,
- Sset_char_table_default, 3, 3, 0,
- doc: /* Set the default value in CHAR-TABLE for generic character CH to VALUE.
-The generic character specifies the group of characters.
-See also the documentation of `make-char'. */)
- (char_table, ch, value)
- Lisp_Object char_table, ch, value;
-{
- int c, charset, code1, code2;
- Lisp_Object temp;
-
- CHECK_CHAR_TABLE (char_table);
- CHECK_NUMBER (ch);
-
- c = XINT (ch);
- SPLIT_CHAR (c, charset, code1, code2);
-
- /* Since we may want to set the default value for a character set
- not yet defined, we check only if the character set is in the
- valid range or not, instead of it is already defined or not. */
- if (! CHARSET_VALID_P (charset))
- invalid_character (c);
-
- if (charset == CHARSET_ASCII)
- return (XCHAR_TABLE (char_table)->defalt = value);
-
- /* Even if C is not a generic char, we had better behave as if a
- generic char is specified. */
- if (!CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1)
- code1 = 0;
- temp = XCHAR_TABLE (char_table)->contents[charset + 128];
- if (!code1)
- {
- if (SUB_CHAR_TABLE_P (temp))
- XCHAR_TABLE (temp)->defalt = value;
- else
- XCHAR_TABLE (char_table)->contents[charset + 128] = value;
- return value;
- }
- if (SUB_CHAR_TABLE_P (temp))
- char_table = temp;
- else
- char_table = (XCHAR_TABLE (char_table)->contents[charset + 128]
- = make_sub_char_table (temp));
- temp = XCHAR_TABLE (char_table)->contents[code1];
- if (SUB_CHAR_TABLE_P (temp))
- XCHAR_TABLE (temp)->defalt = value;
- else
- XCHAR_TABLE (char_table)->contents[code1] = value;
- return value;
-}
-
-/* Look up the element in TABLE at index CH,
- and return it as an integer.
- If the element is nil, return CH itself.
- (Actually we do that for any non-integer.) */
-
-int
-char_table_translate (table, ch)
- Lisp_Object table;
- int ch;
-{
- Lisp_Object value;
- value = Faref (table, make_number (ch));
- if (! INTEGERP (value))
- return ch;
- return XINT (value);
-}
-
-static void
-optimize_sub_char_table (table, chars)
- Lisp_Object *table;
- int chars;
-{
- Lisp_Object elt;
- int from, to;
-
- if (chars == 94)
- from = 33, to = 127;
- else
- from = 32, to = 128;
-
- if (!SUB_CHAR_TABLE_P (*table))
- return;
- elt = XCHAR_TABLE (*table)->contents[from++];
- for (; from < to; from++)
- if (NILP (Fequal (elt, XCHAR_TABLE (*table)->contents[from])))
- return;
- *table = elt;
-}
-
-DEFUN ("optimize-char-table", Foptimize_char_table, Soptimize_char_table,
- 1, 1, 0, doc: /* Optimize char table TABLE. */)
- (table)
- Lisp_Object table;
-{
- Lisp_Object elt;
- int dim;
- int i, j;
-
- CHECK_CHAR_TABLE (table);
-
- for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- {
- elt = XCHAR_TABLE (table)->contents[i];
- if (!SUB_CHAR_TABLE_P (elt))
- continue;
- dim = CHARSET_DIMENSION (i - 128);
- if (dim == 2)
- for (j = 32; j < SUB_CHAR_TABLE_ORDINARY_SLOTS; j++)
- optimize_sub_char_table (XCHAR_TABLE (elt)->contents + j, dim);
- optimize_sub_char_table (XCHAR_TABLE (table)->contents + i, dim);
- }
- return Qnil;
-}
-
-
-/* Map C_FUNCTION or FUNCTION over SUBTABLE, calling it for each
- character or group of characters that share a value.
- DEPTH is the current depth in the originally specified
- chartable, and INDICES contains the vector indices
- for the levels our callers have descended.
-
- ARG is passed to C_FUNCTION when that is called. */
-
-void
-map_char_table (c_function, function, table, subtable, arg, depth, indices)
- void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
- Lisp_Object function, table, subtable, arg, *indices;
- int depth;
-{
- int i, to;
-
- if (depth == 0)
- {
- /* At first, handle ASCII and 8-bit European characters. */
- for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
- {
- Lisp_Object elt= XCHAR_TABLE (subtable)->contents[i];
- if (NILP (elt))
- elt = XCHAR_TABLE (subtable)->defalt;
- if (NILP (elt))
- elt = Faref (subtable, make_number (i));
- if (c_function)
- (*c_function) (arg, make_number (i), elt);
- else
- call2 (function, make_number (i), elt);
- }
-#if 0 /* If the char table has entries for higher characters,
- we should report them. */
- if (NILP (current_buffer->enable_multibyte_characters))
- return;
-#endif
- to = CHAR_TABLE_ORDINARY_SLOTS;
- }
- else
- {
- int charset = XFASTINT (indices[0]) - 128;
-
- i = 32;
- to = SUB_CHAR_TABLE_ORDINARY_SLOTS;
- if (CHARSET_CHARS (charset) == 94)
- i++, to--;
- }
-
- for (; i < to; i++)
- {
- Lisp_Object elt;
- int charset;
-
- elt = XCHAR_TABLE (subtable)->contents[i];
- XSETFASTINT (indices[depth], i);
- charset = XFASTINT (indices[0]) - 128;
- if (depth == 0
- && (!CHARSET_DEFINED_P (charset)
- || charset == CHARSET_8_BIT_CONTROL
- || charset == CHARSET_8_BIT_GRAPHIC))
- continue;
-
- if (SUB_CHAR_TABLE_P (elt))
- {
- if (depth >= 3)
- error ("Too deep char table");
- map_char_table (c_function, function, table, elt, arg, depth + 1, indices);
- }
- else
- {
- int c1, c2, c;
-
- c1 = depth >= 1 ? XFASTINT (indices[1]) : 0;
- c2 = depth >= 2 ? XFASTINT (indices[2]) : 0;
- c = MAKE_CHAR (charset, c1, c2);
-
- if (NILP (elt))
- elt = XCHAR_TABLE (subtable)->defalt;
- if (NILP (elt))
- elt = Faref (table, make_number (c));
-
- if (c_function)
- (*c_function) (arg, make_number (c), elt);
- else
- call2 (function, make_number (c), elt);
- }
- }
-}
-
-static void void_call2 P_ ((Lisp_Object a, Lisp_Object b, Lisp_Object c));
-static void
-void_call2 (a, b, c)
- Lisp_Object a, b, c;
-{
- call2 (a, b, c);
-}
-
-DEFUN ("map-char-table", Fmap_char_table, Smap_char_table,
- 2, 2, 0,
- doc: /* Call FUNCTION for each (normal and generic) characters in CHAR-TABLE.
-FUNCTION is called with two arguments--a key and a value.
-The key is always a possible IDX argument to `aref'. */)
- (function, char_table)
- Lisp_Object function, char_table;
-{
- /* The depth of char table is at most 3. */
- Lisp_Object indices[3];
-
- CHECK_CHAR_TABLE (char_table);
-
- /* When Lisp_Object is represented as a union, `call2' cannot directly
- be passed to map_char_table because it returns a Lisp_Object rather
- than returning nothing.
- Casting leads to crashes on some architectures. -stef */
- map_char_table (void_call2, Qnil, char_table, char_table, function, 0, indices);
- return Qnil;
-}
-
-/* Return a value for character C in char-table TABLE. Store the
- actual index for that value in *IDX. Ignore the default value of
- TABLE. */
-
-Lisp_Object
-char_table_ref_and_index (table, c, idx)
- Lisp_Object table;
- int c, *idx;
-{
- int charset, c1, c2;
- Lisp_Object elt;
-
- if (SINGLE_BYTE_CHAR_P (c))
- {
- *idx = c;
- return XCHAR_TABLE (table)->contents[c];
- }
- SPLIT_CHAR (c, charset, c1, c2);
- elt = XCHAR_TABLE (table)->contents[charset + 128];
- *idx = MAKE_CHAR (charset, 0, 0);
- if (!SUB_CHAR_TABLE_P (elt))
- return elt;
- if (c1 < 32 || NILP (XCHAR_TABLE (elt)->contents[c1]))
- return XCHAR_TABLE (elt)->defalt;
- elt = XCHAR_TABLE (elt)->contents[c1];
- *idx = MAKE_CHAR (charset, c1, 0);
- if (!SUB_CHAR_TABLE_P (elt))
- return elt;
- if (c2 < 32 || NILP (XCHAR_TABLE (elt)->contents[c2]))
- return XCHAR_TABLE (elt)->defalt;
- *idx = c;
- return XCHAR_TABLE (elt)->contents[c2];
-}
-
-
/* ARGSUSED */
Lisp_Object
nconc2 (s1, s2)
@@ -3792,7 +3257,9 @@ base64_encode_1 (from, to, length, line_break, multibyte)
if (multibyte)
{
c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes);
- if (c >= 256)
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ else if (c >= 256)
return -1;
i += bytes;
}
@@ -3830,8 +3297,9 @@ base64_encode_1 (from, to, length, line_break, multibyte)
if (multibyte)
{
c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes);
- if (c >= 256)
- return -1;
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ else if (c >= 256)
i += bytes;
}
else
@@ -3852,7 +3320,9 @@ base64_encode_1 (from, to, length, line_break, multibyte)
if (multibyte)
{
c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes);
- if (c >= 256)
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ else if (c >= 256)
return -1;
i += bytes;
}
@@ -4008,8 +3478,8 @@ base64_decode_1 (from, to, length, multibyte, nchars_return)
value |= base64_char_to_value[c] << 12;
c = (unsigned char) (value >> 16);
- if (multibyte)
- e += CHAR_STRING (c, e);
+ if (multibyte && c >= 128)
+ e += BYTE8_STRING (c, e);
else
*e++ = c;
nchars++;
@@ -4032,8 +3502,8 @@ base64_decode_1 (from, to, length, multibyte, nchars_return)
value |= base64_char_to_value[c] << 6;
c = (unsigned char) (0xff & value >> 8);
- if (multibyte)
- e += CHAR_STRING (c, e);
+ if (multibyte && c >= 128)
+ e += BYTE8_STRING (c, e);
else
*e++ = c;
nchars++;
@@ -4050,8 +3520,8 @@ base64_decode_1 (from, to, length, multibyte, nchars_return)
value |= base64_char_to_value[c];
c = (unsigned char) (0xff & value);
- if (multibyte)
- e += CHAR_STRING (c, e);
+ if (multibyte && c >= 128)
+ e += BYTE8_STRING (c, e);
else
*e++ = c;
nchars++;
@@ -5321,7 +4791,6 @@ including negative integers. */)
************************************************************************/
#include "md5.h"
-#include "coding.h"
DEFUN ("md5", Fmd5, Smd5, 1, 5, 0,
doc: /* Return MD5 message digest of OBJECT, a buffer or string.
@@ -5372,7 +4841,7 @@ guesswork fails. Normally, an error is signaled in such case. */)
if (STRING_MULTIBYTE (object))
/* use default, we can't guess correct value */
- coding_system = SYMBOL_VALUE (XCAR (Vcoding_category_list));
+ coding_system = preferred_coding_system ();
else
coding_system = Qraw_text;
}
@@ -5389,7 +4858,7 @@ guesswork fails. Normally, an error is signaled in such case. */)
}
if (STRING_MULTIBYTE (object))
- object = code_convert_string1 (object, coding_system, Qnil, 1);
+ object = code_convert_string (object, coding_system, Qnil, 1, 0, 1);
size = SCHARS (object);
size_byte = SBYTES (object);
@@ -5521,7 +4990,7 @@ guesswork fails. Normally, an error is signaled in such case. */)
object = make_buffer_string (b, e, 0);
if (STRING_MULTIBYTE (object))
- object = code_convert_string1 (object, coding_system, Qnil, 1);
+ object = code_convert_string (object, coding_system, Qnil, 1, 0, 0);
}
md5_buffer (SDATA (object) + start_byte,
@@ -5674,16 +5143,6 @@ invoked by mouse clicks and mouse menu items. */);
defsubr (&Sequal);
defsubr (&Sfillarray);
defsubr (&Sclear_string);
- defsubr (&Schar_table_subtype);
- defsubr (&Schar_table_parent);
- defsubr (&Sset_char_table_parent);
- defsubr (&Schar_table_extra_slot);
- defsubr (&Sset_char_table_extra_slot);
- defsubr (&Schar_table_range);
- defsubr (&Sset_char_table_range);
- defsubr (&Sset_char_table_default);
- defsubr (&Soptimize_char_table);
- defsubr (&Smap_char_table);
defsubr (&Snconc);
defsubr (&Smapcar);
defsubr (&Smapc);
diff --git a/src/fontset.c b/src/fontset.c
index a23a146c76d..e9232f2a25e 100644
--- a/src/fontset.c
+++ b/src/fontset.c
@@ -1,6 +1,9 @@
/* Fontset handler.
Copyright (C) 1995, 1997, 2000 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -28,7 +31,9 @@ Boston, MA 02111-1307, USA. */
#endif
#include "lisp.h"
+#include "blockinput.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "ccl.h"
#include "keyboard.h"
@@ -37,71 +42,123 @@ Boston, MA 02111-1307, USA. */
#include "fontset.h"
#include "window.h"
-#ifdef FONTSET_DEBUG
#undef xassert
+#ifdef FONTSET_DEBUG
#define xassert(X) do {if (!(X)) abort ();} while (0)
#undef INLINE
#define INLINE
-#endif
+#else /* not FONTSET_DEBUG */
+#define xassert(X) (void) 0
+#endif /* not FONTSET_DEBUG */
+EXFUN (Fclear_face_cache, 1);
/* FONTSET
A fontset is a collection of font related information to give
- similar appearance (style, size, etc) of characters. There are two
- kinds of fontsets; base and realized. A base fontset is created by
- new-fontset from Emacs Lisp explicitly. A realized fontset is
- created implicitly when a face is realized for ASCII characters. A
- face is also realized for multibyte characters based on an ASCII
- face. All of the multibyte faces based on the same ASCII face
- share the same realized fontset.
+ similar appearance (style, etc) of characters. A fontset has two
+ roles. One is to use for the frame parameter `font' as if it is an
+ ASCII font. In that case, Emacs uses the font specified for
+ `ascii' script for the frame's default font.
+
+ Another role, the more important one, is to provide information
+ about which font to use for each non-ASCII character.
+
+ There are two kinds of fontsets; base and realized. A base fontset
+ is created by `new-fontset' from Emacs Lisp explicitly. A realized
+ fontset is created implicitly when a face is realized for ASCII
+ characters. A face is also realized for non-ASCII characters based
+ on an ASCII face. All of non-ASCII faces based on the same ASCII
+ face share the same realized fontset.
+
+ A fontset object is implemented by a char-table whose default value
+ and parent are always nil.
+
+ An element of a base fontset is a vector of FONT-DEFs which itself
+ is a vector [ FONT-SPEC ENCODING REPERTORY ].
+
+ FONT-SPEC is:
+ [ FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY ]
+ or
+ FONT-NAME
+ where FAMILY, WEIGHT, SLANT, SWIDTH, ADSTYLE, REGISTRY, and
+ FONT-NAME are strings.
+
+ ENCODING is a charset ID or a char-table that can convert
+ characters to glyph codes of the corresponding font.
+
+ REPERTORY is a charset ID or nil. If REPERTORY is a charset ID,
+ the repertory of the charset exactly matches with that of the font.
+ If REPERTORY is nil, we consult with the font itself to get the
+ repertory.
+
+ ENCODING and REPERTORY are extracted from the variable
+ Vfont_encoding_alist by using a font name generated form FONT-SPEC
+ (if it is a vector) or FONT-NAME as a key.
+
+
+ An element of a realized fontset is nil or t, or has this form:
- A fontset object is implemented by a char-table.
+ ( CHARSET-PRIORITY-LIST-TICK . FONT-VECTOR )
- An element of a base fontset is:
- (INDEX . FONTNAME) or
- (INDEX . (FOUNDRY . REGISTRY ))
- FONTNAME is a font name pattern for the corresponding character.
- FOUNDRY and REGISTRY are respectively foundry and registry fields of
- a font name for the corresponding character. INDEX specifies for
- which character (or generic character) the element is defined. It
- may be different from an index to access this element. For
- instance, if a fontset defines some font for all characters of
- charset `japanese-jisx0208', INDEX is the generic character of this
- charset. REGISTRY is the
+ FONT-VECTOR is a vector whose elements have this form:
- An element of a realized fontset is FACE-ID which is a face to use
- for displaying the corresponding character.
+ [ FACE-ID FONT-INDEX FONT-DEF ]
- All single byte characters (ASCII and 8bit-unibyte) share the same
- element in a fontset. The element is stored in the first element
- of the fontset.
+ FONT-VECTOR is automatically reordered by the current charset
+ priority list.
- To access or set each element, use macros FONTSET_REF and
- FONTSET_SET respectively for efficiency.
+ The value nil means that we have not yet generated FONT-VECTOR from
+ the base of the fontset.
- A fontset has 3 extra slots.
+ The value t means that no font is available for the corresponding
+ range of characters.
- The 1st slot is an ID number of the fontset.
- The 2nd slot is a name of the fontset. This is nil for a realized
- face.
+ A fontset has 8 extra slots.
- The 3rd slot is a frame that the fontset belongs to. This is nil
- for a default face.
+ The 1st slot: the ID number of the fontset
- A parent of a base fontset is nil. A parent of a realized fontset
- is a base fontset.
+ The 2nd slot:
+ base: the name of the fontset
+ realized: nil
- All fontsets are recorded in Vfontset_table.
+ The 3rd slot:
+ base: nil
+ realized: the base fontset
+
+ The 4th slot:
+ base: nil
+ realized: the frame that the fontset belongs to
+
+ The 5th slot:
+ base: the font name for ASCII characters
+ realized: nil
+
+ The 6th slot:
+ base: nil
+ realized: the ID number of a face to use for characters that
+ has no font in a realized fontset.
+
+ The 7th slot:
+ base: nil
+ realized: Alist of font index vs the corresponding repertory
+ char-table.
+
+ The 8th slot:
+ base: nil
+ realized: If the base is not the default fontset, a fontset
+ realized from the default fontset, else nil.
+
+ All fontsets are recorded in the vector Vfontset_table.
DEFAULT FONTSET
- There's a special fontset named `default fontset' which defines a
- default fontname pattern. When a base fontset doesn't specify a
- font for a specific character, the corresponding value in the
- default fontset is used. The format is the same as a base fontset.
+ There's a special base fontset named `default fontset' which
+ defines the default font specifications. When a base fontset
+ doesn't specify a font for a specific character, the corresponding
+ value in the default fontset is used.
The parent of a realized fontset created for such a face that has
no fontset is the default fontset.
@@ -109,16 +166,18 @@ Boston, MA 02111-1307, USA. */
These structures are hidden from the other codes than this file.
The other codes handle fontsets only by their ID numbers. They
- usually use variable name `fontset' for IDs. But, in this file, we
- always use variable name `id' for IDs, and name `fontset' for the
- actual fontset objects.
+ usually use the variable name `fontset' for IDs. But, in this
+ file, we always use varialbe name `id' for IDs, and name `fontset'
+ for an actual fontset object, i.e., char-table.
*/
/********** VARIABLES and FUNCTION PROTOTYPES **********/
extern Lisp_Object Qfont;
-Lisp_Object Qfontset;
+static Lisp_Object Qfontset;
+static Lisp_Object Qfontset_info;
+static Lisp_Object Qprepend, Qappend;
/* Vector containing all fontsets. */
static Lisp_Object Vfontset_table;
@@ -128,7 +187,7 @@ static Lisp_Object Vfontset_table;
static int next_fontset_id;
/* The default fontset. This gives default FAMILY and REGISTRY of
- font for each characters. */
+ font for each character. */
static Lisp_Object Vdefault_fontset;
Lisp_Object Vfont_encoding_alist;
@@ -169,17 +228,35 @@ void (*set_frame_fontset_func) P_ ((FRAME_PTR f, Lisp_Object arg,
This function set the member `encoder' of the structure. */
void (*find_ccl_program_func) P_ ((struct font_info *));
+Lisp_Object (*get_font_repertory_func) P_ ((struct frame *,
+ struct font_info *));
+
/* Check if any window system is used now. */
void (*check_window_system_func) P_ ((void));
/* Prototype declarations for static functions. */
-static Lisp_Object fontset_ref P_ ((Lisp_Object, int));
-static void fontset_set P_ ((Lisp_Object, int, Lisp_Object));
+static Lisp_Object fontset_add P_ ((Lisp_Object, Lisp_Object, Lisp_Object,
+ Lisp_Object));
static Lisp_Object make_fontset P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
-static int fontset_id_valid_p P_ ((int));
static Lisp_Object fontset_pattern_regexp P_ ((Lisp_Object));
-static Lisp_Object font_family_registry P_ ((Lisp_Object, int));
+static void accumulate_script_ranges P_ ((Lisp_Object, Lisp_Object,
+ Lisp_Object));
+static Lisp_Object find_font_encoding P_ ((char *));
+
+#ifdef FONTSET_DEBUG
+
+/* Return 1 if ID is a valid fontset id, else return 0. */
+
+static int
+fontset_id_valid_p (id)
+ int id;
+{
+ return (id >= 0 && id < ASIZE (Vfontset_table) - 1);
+}
+
+#endif
+
/********** MACROS AND FUNCTIONS TO HANDLE FONTSET **********/
@@ -189,122 +266,381 @@ static Lisp_Object font_family_registry P_ ((Lisp_Object, int));
/* Macros to access special values of FONTSET. */
#define FONTSET_ID(fontset) XCHAR_TABLE (fontset)->extras[0]
+
+/* Macros to access special values of (base) FONTSET. */
#define FONTSET_NAME(fontset) XCHAR_TABLE (fontset)->extras[1]
-#define FONTSET_FRAME(fontset) XCHAR_TABLE (fontset)->extras[2]
-#define FONTSET_ASCII(fontset) XCHAR_TABLE (fontset)->contents[0]
-#define FONTSET_BASE(fontset) XCHAR_TABLE (fontset)->parent
+#define FONTSET_ASCII(fontset) XCHAR_TABLE (fontset)->extras[4]
+
+/* Macros to access special values of (realized) FONTSET. */
+#define FONTSET_BASE(fontset) XCHAR_TABLE (fontset)->extras[2]
+#define FONTSET_FRAME(fontset) XCHAR_TABLE (fontset)->extras[3]
+#define FONTSET_NOFONT_FACE(fontset) XCHAR_TABLE (fontset)->extras[5]
+#define FONTSET_REPERTORY(fontset) XCHAR_TABLE (fontset)->extras[6]
+#define FONTSET_FALLBACK(fontset) XCHAR_TABLE (fontset)->extras[7]
-#define BASE_FONTSET_P(fontset) NILP (FONTSET_BASE(fontset))
+#define BASE_FONTSET_P(fontset) (NILP (FONTSET_BASE (fontset)))
-/* Return the element of FONTSET (char-table) at index C (character). */
+/* Return the element of FONTSET for the character C. If FONTSET is a
+ base fontset other then the default fontset and FONTSET doesn't
+ contain information for C, return the information in the default
+ fontset. */
-#define FONTSET_REF(fontset, c) fontset_ref (fontset, c)
+#define FONTSET_REF(fontset, c) \
+ (EQ (fontset, Vdefault_fontset) \
+ ? CHAR_TABLE_REF (fontset, c) \
+ : fontset_ref ((fontset), (c)))
static Lisp_Object
fontset_ref (fontset, c)
Lisp_Object fontset;
int c;
{
- int charset, c1, c2;
- Lisp_Object elt, defalt;
-
- if (SINGLE_BYTE_CHAR_P (c))
- return FONTSET_ASCII (fontset);
-
- SPLIT_CHAR (c, charset, c1, c2);
- elt = XCHAR_TABLE (fontset)->contents[charset + 128];
- if (!SUB_CHAR_TABLE_P (elt))
- return elt;
- defalt = XCHAR_TABLE (elt)->defalt;
- if (c1 < 32
- || (elt = XCHAR_TABLE (elt)->contents[c1],
- NILP (elt)))
- return defalt;
- if (!SUB_CHAR_TABLE_P (elt))
- return elt;
- defalt = XCHAR_TABLE (elt)->defalt;
- if (c2 < 32
- || (elt = XCHAR_TABLE (elt)->contents[c2],
- NILP (elt)))
- return defalt;
+ Lisp_Object elt;
+
+ elt = CHAR_TABLE_REF (fontset, c);
+ if (NILP (elt) && ! EQ (fontset, Vdefault_fontset)
+ /* Don't check Vdefault_fontset for a realized fontset. */
+ && NILP (FONTSET_BASE (fontset)))
+ elt = CHAR_TABLE_REF (Vdefault_fontset, c);
return elt;
}
-#define FONTSET_REF_VIA_BASE(fontset, c) fontset_ref_via_base (fontset, &c)
+/* Return the element of FONTSET for the character C, set FROM and TO
+ to the range of characters around C that have the same value as C.
+ If FONTSET is a base fontset other then the default fontset and
+ FONTSET doesn't contain information for C, return the information
+ in the default fontset. */
+
+#define FONTSET_REF_AND_RANGE(fontset, c, form, to) \
+ (EQ (fontset, Vdefault_fontset) \
+ ? char_table_ref_and_range (fontset, c, &from, &to) \
+ : fontset_ref_and_range (fontset, c, &from, &to))
static Lisp_Object
-fontset_ref_via_base (fontset, c)
+fontset_ref_and_range (fontset, c, from, to)
Lisp_Object fontset;
- int *c;
+ int c;
+ int *from, *to;
{
- int charset, c1, c2;
Lisp_Object elt;
- if (SINGLE_BYTE_CHAR_P (*c))
- return FONTSET_ASCII (fontset);
-
- elt = FONTSET_REF (FONTSET_BASE (fontset), *c);
- if (NILP (elt) && ! EQ (fontset, Vdefault_fontset))
- elt = FONTSET_REF (Vdefault_fontset, *c);
- if (NILP (elt))
- return Qnil;
+ elt = char_table_ref_and_range (fontset, c, from, to);
+ if (NILP (elt) && ! EQ (fontset, Vdefault_fontset)
+ /* Don't check Vdefault_fontset for a realized fontset. */
+ && NILP (FONTSET_BASE (fontset)))
+ {
+ int from1, to1;
- *c = XINT (XCAR (elt));
- SPLIT_CHAR (*c, charset, c1, c2);
- elt = XCHAR_TABLE (fontset)->contents[charset + 128];
- if (c1 < 32)
- return (SUB_CHAR_TABLE_P (elt) ? XCHAR_TABLE (elt)->defalt : elt);
- if (!SUB_CHAR_TABLE_P (elt))
- return Qnil;
- elt = XCHAR_TABLE (elt)->contents[c1];
- if (c2 < 32)
- return (SUB_CHAR_TABLE_P (elt) ? XCHAR_TABLE (elt)->defalt : elt);
- if (!SUB_CHAR_TABLE_P (elt))
- return Qnil;
- elt = XCHAR_TABLE (elt)->contents[c2];
+ elt = char_table_ref_and_range (Vdefault_fontset, c, &from1, &to1);
+ if (*from < from1)
+ *from = from1;
+ if (*to > to1)
+ *to = to1;
+ }
return elt;
}
-/* Store into the element of FONTSET at index C the value NEWELT. */
-#define FONTSET_SET(fontset, c, newelt) fontset_set(fontset, c, newelt)
+/* Set elements of FONTSET for characters in RANGE to the value ELT.
+ RANGE is a cons (FROM . TO), where FROM and TO are character codes
+ specifying a range. */
+
+#define FONTSET_SET(fontset, range, elt) \
+ Fset_char_table_range ((fontset), (range), (elt))
+
+
+/* Modify the elements of FONTSET for characters in RANGE by replacing
+ with ELT or adding ETL. RANGE is a cons (FROM . TO), where FROM
+ and TO are character codes specifying a range. If ADD is nil,
+ replace with ELT, if ADD is `prepend', prepend ELT, otherwise,
+ append ELT. */
+
+#define FONTSET_ADD(fontset, range, elt, add) \
+ (NILP (add) \
+ ? Fset_char_table_range ((fontset), (range), \
+ Fmake_vector (make_number (1), (elt))) \
+ : fontset_add ((fontset), (range), (elt), (add)))
+
+static Lisp_Object
+fontset_add (fontset, range, elt, add)
+ Lisp_Object fontset, range, elt, add;
+{
+ int from, to, from1, to1;
+ Lisp_Object elt1;
+
+ from = XINT (XCAR (range));
+ to = XINT (XCDR (range));
+ do {
+ elt1 = char_table_ref_and_range (fontset, from, &from1, &to1);
+ if (to < to1)
+ to1 = to;
+ if (NILP (elt1))
+ elt1 = Fmake_vector (make_number (1), elt);
+ else
+ {
+ int i, i0 = 1, i1 = ASIZE (elt1) + 1;
+ Lisp_Object new;
+
+ new = Fmake_vector (make_number (i1), elt);
+ if (EQ (add, Qappend))
+ i0--, i1--;
+ for (i = 0; i0 < i1; i++, i0++)
+ ASET (new, i0, AREF (elt1, i));
+ elt1 = new;
+ }
+ char_table_set_range (fontset, from, to1, elt1);
+ from = to1 + 1;
+ } while (from < to);
+ return Qnil;
+}
+
+
+/* Update FONTSET_ELEMENT which has this form:
+ ( CHARSET-PRIORITY-LIST-TICK . FONT-VECTOR).
+ Reorder FONT-VECTOR according to the current order of charset
+ (Vcharset_ordered_list), and update CHARSET-PRIORITY-LIST-TICK to
+ the latest value. */
static void
-fontset_set (fontset, c, newelt)
+reorder_font_vector (fontset_element)
+ Lisp_Object fontset_element;
+{
+ Lisp_Object vec, list, *new_vec;
+ int size;
+ int *charset_id_table;
+ int i, idx;
+
+ XSETCAR (fontset_element, make_number (charset_ordered_list_tick));
+ vec = XCDR (fontset_element);
+ size = ASIZE (vec);
+ if (size < 2)
+ /* No need of reordering VEC. */
+ return;
+ charset_id_table = (int *) alloca (sizeof (int) * size);
+ new_vec = (Lisp_Object *) alloca (sizeof (Lisp_Object) * size);
+ /* At first, extract ENCODING (a chaset ID) from VEC. VEC has this
+ form:
+ [[FACE-ID FONT-INDEX [ FONT-SPEC ENCODING REPERTORY ]] ...] */
+ for (i = 0; i < size; i++)
+ charset_id_table[i] = XINT (AREF (AREF (AREF (vec, i), 2), 1));
+
+ /* Then, store the elements of VEC in NEW_VEC in the correct
+ order. */
+ idx = 0;
+ for (list = Vcharset_ordered_list; CONSP (list); list = XCDR (list))
+ {
+ for (i = 0; i < size; i++)
+ if (charset_id_table[i] == XINT (XCAR (list)))
+ new_vec[idx++] = AREF (vec, i);
+ if (idx == size)
+ break;
+ }
+
+ /* At last, update VEC. */
+ for (i = 0; i < size; i++)
+ ASET (vec, i, new_vec[i]);
+}
+
+
+/* Load a font matching the font related attributes in FACE->lface and
+ font pattern in FONT_DEF of FONTSET, and return an index of the
+ font. FONT_DEF has this form:
+ [ FONT-SPEC ENCODING REPERTORY ]
+ If REPERTORY is nil, generate a char-table representing the font
+ repertory by looking into the font itself. */
+
+static int
+load_font_get_repertory (f, face, font_def, fontset)
+ FRAME_PTR f;
+ struct face *face;
+ Lisp_Object font_def;
+ Lisp_Object fontset;
+{
+ char *font_name;
+ struct font_info *font_info;
+
+ font_name = choose_face_font (f, face->lface, AREF (font_def, 0), NULL);
+ if (! (font_info = fs_load_font (f, font_name, XINT (AREF (font_def, 1)))))
+ return -1;
+
+ if (NILP (AREF (font_def, 2))
+ && NILP (Fassq (make_number (font_info->font_idx),
+ FONTSET_REPERTORY (fontset))))
+ {
+ /* We must look into the font to get the correct repertory as a
+ char-table. */
+ Lisp_Object repertory;
+
+ repertory = (*get_font_repertory_func) (f, font_info);
+ FONTSET_REPERTORY (fontset)
+ = Fcons (Fcons (make_number (font_info->font_idx), repertory),
+ FONTSET_REPERTORY (fontset));
+ }
+
+ return font_info->font_idx;
+}
+
+
+/* Return a face ID registerd in the realized fontset FONTSET for the
+ character C. If FACE is NULL, return -1 if a face is not yet
+ set. Otherwise, realize a proper face from FACE and return it. */
+
+static int
+fontset_face (fontset, c, face)
Lisp_Object fontset;
int c;
- Lisp_Object newelt;
+ struct face *face;
{
- int charset, code[3];
- Lisp_Object *elt;
- int i;
+ Lisp_Object base_fontset, elt, vec;
+ int i, from, to;
+ int font_idx;
+ FRAME_PTR f = XFRAME (FONTSET_FRAME (fontset));
+
+ base_fontset = FONTSET_BASE (fontset);
+ elt = CHAR_TABLE_REF (fontset, c);
+
+ if (EQ (elt, Qt))
+ goto try_default;
+
+ if (NILP (elt))
+ {
+ /* We have not yet decided a face for C. */
+ Lisp_Object range;
+
+ if (! face)
+ return -1;
+ elt = FONTSET_REF_AND_RANGE (base_fontset, c, from, to);
+ range = Fcons (make_number (from), make_number (to));
+ if (NILP (elt))
+ {
+ /* Record that we have no font for characters of this
+ range. */
+ FONTSET_SET (fontset, range, Qt);
+ goto try_default;
+ }
+ elt = Fcopy_sequence (elt);
+ /* Now ELT is a vector of FONT-DEFs. We at first change it to
+ FONT-VECTOR, a vector of [ nil nil FONT-DEF ]. */
+ for (i = 0; i < ASIZE (elt); i++)
+ {
+ Lisp_Object tmp;
+
+ tmp = Fmake_vector (make_number (3), Qnil);
+ ASET (tmp, 2, AREF (elt, i));
+ ASET (elt, i, tmp);
+ }
+ /* Then store (-1 . FONT-VECTOR) in the fontset. -1 is to force
+ reordering of FONT-VECTOR. */
+ elt = Fcons (make_number (-1), elt);
+ FONTSET_SET (fontset, range, elt);
+ }
- if (SINGLE_BYTE_CHAR_P (c))
+ if (XINT (XCAR (elt)) != charset_ordered_list_tick)
+ /* The priority of charsets is changed after we selected a face
+ for C last time. */
+ reorder_font_vector (elt);
+
+ vec = XCDR (elt);
+ /* Find the first available font in the font vector VEC. */
+ for (i = 0; i < ASIZE (vec); i++)
{
- FONTSET_ASCII (fontset) = newelt;
- return;
+ Lisp_Object font_def;
+
+ elt = AREF (vec, i);
+ /* ELT == [ FACE-ID FONT-INDEX [ FONT-SPEC ENCODING REPERTORY ] ] */
+ font_def = AREF (elt, 2);
+ if (INTEGERP (AREF (elt, 1)) && XINT (AREF (elt, 1)) < 0)
+ /* We couldn't open this font last time. */
+ continue;
+
+ if (!face && (NILP (AREF (elt, 1)) || NILP (AREF (elt, 0))))
+ /* We have not yet opened the font, or we have not yet made a
+ realized face for the font. */
+ return -1;
+
+ if (INTEGERP (AREF (font_def, 2)))
+ {
+ /* The repertory is specified by charset ID. */
+ struct charset *charset
+ = CHARSET_FROM_ID (XINT (AREF (font_def, 2)));
+
+ if (! CHAR_CHARSET_P (c, charset))
+ /* This font can't display C. */
+ continue;
+ }
+ else
+ {
+ Lisp_Object slot;
+
+ if (! INTEGERP (AREF (elt, 1)))
+ {
+ /* We have not yet opened a font matching this spec.
+ Open the best matching font now and register the
+ repertory. */
+ font_idx = load_font_get_repertory (f, face, font_def, fontset);
+ ASET (elt, 1, make_number (font_idx));
+ if (font_idx < 0)
+ /* This means that we couldn't find a font matching
+ FONT_DEF. */
+ continue;
+ }
+
+ slot = Fassq (AREF (elt, 1), FONTSET_REPERTORY (fontset));
+ if (! CONSP (slot))
+ abort ();
+ if (NILP (CHAR_TABLE_REF (XCDR (slot), c)))
+ /* This fond can't display C. */
+ continue;
+ }
+
+ /* Now we have decided to use this font spec to display C. */
+ if (INTEGERP (AREF (elt, 1)))
+ font_idx = XINT (AREF (elt, 1));
+ else
+ {
+ /* But not yet opened the best matching font. */
+ font_idx = load_font_get_repertory (f, face, font_def, fontset);
+ ASET (elt, 1, make_number (font_idx));
+ if (font_idx < 0)
+ continue;
+ }
+
+ /* Now we have the opened font. */
+ if (NILP (AREF (elt, 0)))
+ {
+ /* But not yet made a realized face that uses this font. */
+ int face_id = lookup_non_ascii_face (f, font_idx, face);
+
+ ASET (elt, 0, make_number (face_id));
+ }
+
+ /* Ok, this face can display C. */
+ return XINT (AREF (elt, 0));
}
- SPLIT_CHAR (c, charset, code[0], code[1]);
- code[2] = 0; /* anchor */
- elt = &XCHAR_TABLE (fontset)->contents[charset + 128];
- for (i = 0; code[i] > 0; i++)
+ try_default:
+ if (! EQ (base_fontset, Vdefault_fontset))
+ return fontset_face (FONTSET_FALLBACK (fontset), c, face);
+
+ /* We have tried all the fonts for C, but none of them can be opened
+ nor can display C. */
+ if (NILP (FONTSET_NOFONT_FACE (fontset)))
{
- if (!SUB_CHAR_TABLE_P (*elt))
- *elt = make_sub_char_table (*elt);
- elt = &XCHAR_TABLE (*elt)->contents[code[i]];
+ int face_id;
+
+ if (! face)
+ return -1;
+ face_id = lookup_non_ascii_face (f, -1, face);
+ FONTSET_NOFONT_FACE (fontset) = make_number (face_id);
}
- if (SUB_CHAR_TABLE_P (*elt))
- XCHAR_TABLE (*elt)->defalt = newelt;
- else
- *elt = newelt;
+ return XINT (FONTSET_NOFONT_FACE (fontset));
}
/* Return a newly created fontset with NAME. If BASE is nil, make a
- base fontset. Otherwise make a realized fontset whose parent is
+ base fontset. Otherwise make a realized fontset whose base is
BASE. */
static Lisp_Object
@@ -324,10 +660,11 @@ make_fontset (frame, name, base)
if (id + 1 == size)
{
+ /* We must grow Vfontset_table. */
Lisp_Object tem;
int i;
- tem = Fmake_vector (make_number (size + 8), Qnil);
+ tem = Fmake_vector (make_number (size + 32), Qnil);
for (i = 0; i < size; i++)
AREF (tem, i) = AREF (Vfontset_table, i);
Vfontset_table = tem;
@@ -336,130 +673,90 @@ make_fontset (frame, name, base)
fontset = Fmake_char_table (Qfontset, Qnil);
FONTSET_ID (fontset) = make_number (id);
- FONTSET_NAME (fontset) = name;
- FONTSET_FRAME (fontset) = frame;
- FONTSET_BASE (fontset) = base;
+ if (NILP (base))
+ {
+ FONTSET_NAME (fontset) = name;
+ }
+ else
+ {
+ FONTSET_NAME (fontset) = Qnil;
+ FONTSET_FRAME (fontset) = frame;
+ FONTSET_BASE (fontset) = base;
+ }
- AREF (Vfontset_table, id) = fontset;
+ ASET (Vfontset_table, id, fontset);
next_fontset_id = id + 1;
+ if (! NILP (base) && ! EQ (base, Vdefault_fontset))
+ FONTSET_FALLBACK (fontset) = make_fontset (frame, Qnil, Vdefault_fontset);
return fontset;
}
-/* Return 1 if ID is a valid fontset id, else return 0. */
-
-static INLINE int
-fontset_id_valid_p (id)
- int id;
-{
- return (id >= 0 && id < ASIZE (Vfontset_table) - 1);
-}
-
-
-/* Extract `family' and `registry' string from FONTNAME and a cons of
- them. Actually, `family' may also contain `foundry', `registry'
- may also contain `encoding' of FONTNAME. But, if FONTNAME doesn't
- conform to XLFD nor explicitely specifies the other fields
- (i.e. not using wildcard `*'), return FONTNAME. If FORCE is
- nonzero, specifications of the other fields are ignored, and return
- a cons as far as FONTNAME conform to XLFD. */
-
-static Lisp_Object
-font_family_registry (fontname, force)
- Lisp_Object fontname;
- int force;
-{
- Lisp_Object family, registry;
- const char *p = SDATA (fontname);
- const char *sep[15];
- int i = 0;
-
- while (*p && i < 15)
- if (*p++ == '-')
- {
- if (!force && i >= 2 && i <= 11 && *p != '*' && p[1] != '-')
- return fontname;
- sep[i++] = p;
- }
- if (i != 14)
- return fontname;
-
- family = make_unibyte_string (sep[0], sep[2] - 1 - sep[0]);
- registry = make_unibyte_string (sep[12], p - sep[12]);
- return Fcons (family, registry);
-}
-
-/********** INTERFACES TO xfaces.c and dispextern.h **********/
+/********** INTERFACES TO xfaces.c, xfns.c, and dispextern.h **********/
-/* Return name of the fontset with ID. */
+/* Return the name of the fontset who has ID. */
Lisp_Object
fontset_name (id)
int id;
{
Lisp_Object fontset;
+
fontset = FONTSET_FROM_ID (id);
return FONTSET_NAME (fontset);
}
-/* Return ASCII font name of the fontset with ID. */
+/* Return the ASCII font name of the fontset who has ID. */
Lisp_Object
fontset_ascii (id)
int id;
{
Lisp_Object fontset, elt;
+
fontset= FONTSET_FROM_ID (id);
elt = FONTSET_ASCII (fontset);
- return XCDR (elt);
+ /* It is assured that ELT is always a string (i.e. fontname
+ pattern). */
+ return elt;
}
-/* Free fontset of FACE. Called from free_realized_face. */
+/* Free fontset of FACE defined on frame F. Called from
+ free_realized_face. */
void
free_face_fontset (f, face)
FRAME_PTR f;
struct face *face;
{
- if (fontset_id_valid_p (face->fontset))
- {
- AREF (Vfontset_table, face->fontset) = Qnil;
- if (face->fontset < next_fontset_id)
- next_fontset_id = face->fontset;
- }
+ ASET (Vfontset_table, face->fontset, Qnil);
+ if (face->fontset < next_fontset_id)
+ next_fontset_id = face->fontset;
}
/* Return 1 iff FACE is suitable for displaying character C.
Otherwise return 0. Called from the macro FACE_SUITABLE_FOR_CHAR_P
- when C is not a single byte character.. */
+ when C is not an ASCII character. */
int
face_suitable_for_char_p (face, c)
struct face *face;
int c;
{
- Lisp_Object fontset, elt;
-
- if (SINGLE_BYTE_CHAR_P (c))
- return (face == face->ascii_face);
+ Lisp_Object fontset;
- xassert (fontset_id_valid_p (face->fontset));
fontset = FONTSET_FROM_ID (face->fontset);
- xassert (!BASE_FONTSET_P (fontset));
-
- elt = FONTSET_REF_VIA_BASE (fontset, c);
- return (!NILP (elt) && face->id == XFASTINT (elt));
+ return (face->id == fontset_face (fontset, c, NULL));
}
/* Return ID of face suitable for displaying character C on frame F.
- The selection of face is done based on the fontset of FACE. FACE
- should already have been realized for ASCII characters. Called
- from the macro FACE_FOR_CHAR when C is not a single byte character. */
+ FACE must be reazlied for ASCII characters in advance. Called from
+ the macro FACE_FOR_CHAR. */
int
face_for_char (f, face, c)
@@ -467,25 +764,15 @@ face_for_char (f, face, c)
struct face *face;
int c;
{
- Lisp_Object fontset, elt;
- int face_id;
+ Lisp_Object fontset;
+
+ if (ASCII_CHAR_P (c))
+ return face->ascii_face->id;
xassert (fontset_id_valid_p (face->fontset));
fontset = FONTSET_FROM_ID (face->fontset);
xassert (!BASE_FONTSET_P (fontset));
-
- elt = FONTSET_REF_VIA_BASE (fontset, c);
- if (!NILP (elt))
- return XINT (elt);
-
- /* No face is recorded for C in the fontset of FACE. Make a new
- realized face for C that has the same fontset. */
- face_id = lookup_face (f, face->lface, c, face);
-
- /* Record the face ID in FONTSET at the same index as the
- information in the base fontset. */
- FONTSET_SET (fontset, c, make_number (face_id));
- return face_id;
+ return fontset_face (fontset, c, face);
}
@@ -495,9 +782,10 @@ face_for_char (f, face, c)
Called from realize_x_face. */
int
-make_fontset_for_ascii_face (f, base_fontset_id)
+make_fontset_for_ascii_face (f, base_fontset_id, face)
FRAME_PTR f;
int base_fontset_id;
+ struct face *face;
{
Lisp_Object base_fontset, fontset, frame;
@@ -508,190 +796,79 @@ make_fontset_for_ascii_face (f, base_fontset_id)
if (!BASE_FONTSET_P (base_fontset))
base_fontset = FONTSET_BASE (base_fontset);
xassert (BASE_FONTSET_P (base_fontset));
+ if (! BASE_FONTSET_P (base_fontset))
+ abort ();
}
else
base_fontset = Vdefault_fontset;
fontset = make_fontset (frame, Qnil, base_fontset);
+ {
+ Lisp_Object elt;
+
+ elt = FONTSET_REF (base_fontset, 0);
+ elt = Fmake_vector (make_number (3), AREF (elt, 0));
+ ASET (elt, 0, make_number (face->id));
+ ASET (elt, 1, make_number (face->font_info_id));
+ elt = Fcons (make_number (charset_ordered_list_tick),
+ Fmake_vector (make_number (1), elt));
+ char_table_set_range (fontset, 0, 127, elt);
+ }
return XINT (FONTSET_ID (fontset));
}
-/* Return the font name pattern for C that is recorded in the fontset
- with ID. If a font name pattern is specified (instead of a cons of
- family and registry), check if a font can be opened by that pattern
- to get the fullname. If a font is opened, return that name.
- Otherwise, return nil. If ID is -1, or the fontset doesn't contain
- information about C, get the registry and encoding of C from the
- default fontset. Called from choose_face_font. */
-
-Lisp_Object
-fontset_font_pattern (f, id, c)
- FRAME_PTR f;
- int id, c;
-{
- Lisp_Object fontset, elt;
- struct font_info *fontp;
-
- elt = Qnil;
- if (fontset_id_valid_p (id))
- {
- fontset = FONTSET_FROM_ID (id);
- xassert (!BASE_FONTSET_P (fontset));
- fontset = FONTSET_BASE (fontset);
- elt = FONTSET_REF (fontset, c);
- }
- if (NILP (elt))
- elt = FONTSET_REF (Vdefault_fontset, c);
-
- if (!CONSP (elt))
- return Qnil;
- if (CONSP (XCDR (elt)))
- return XCDR (elt);
-
- /* The fontset specifies only a font name pattern (not cons of
- family and registry). If a font can be opened by that pattern,
- return the name of opened font. Otherwise return nil. The
- exception is a font for single byte characters. In that case, we
- return a cons of FAMILY and REGISTRY extracted from the opened
- font name. */
- elt = XCDR (elt);
- xassert (STRINGP (elt));
- fontp = FS_LOAD_FONT (f, c, SDATA (elt), -1);
- if (!fontp)
- return Qnil;
-
- return font_family_registry (build_string (fontp->full_name),
- SINGLE_BYTE_CHAR_P (c));
-}
-
-
#if defined(WINDOWSNT) && defined (_MSC_VER)
#pragma optimize("", off)
#endif
-/* Load a font named FONTNAME to display character C on frame F.
- Return a pointer to the struct font_info of the loaded font. If
- loading fails, return NULL. If FACE is non-zero and a fontset is
- assigned to it, record FACE->id in the fontset for C. If FONTNAME
- is NULL, the name is taken from the fontset of FACE or what
- specified by ID. */
+/* Load a font named FONTNAME on frame F. Return a pointer to the
+ struct font_info of the loaded font. If loading fails, return
+ NULL. CHARSET is an ID of charset to encode characters for this
+ font. If it is -1, find one from Vfont_encoding_alist. */
struct font_info *
-fs_load_font (f, c, fontname, id, face)
+fs_load_font (f, fontname, charset)
FRAME_PTR f;
- int c;
char *fontname;
- int id;
- struct face *face;
+ int charset;
{
- Lisp_Object fontset;
- Lisp_Object list, elt;
- int size = 0;
struct font_info *fontp;
- int charset = CHAR_CHARSET (c);
-
- if (face)
- id = face->fontset;
- if (id < 0)
- fontset = Qnil;
- else
- fontset = FONTSET_FROM_ID (id);
-
- if (!NILP (fontset)
- && !BASE_FONTSET_P (fontset))
- {
- elt = FONTSET_REF_VIA_BASE (fontset, c);
- if (!NILP (elt))
- {
- /* A suitable face for C is already recorded, which means
- that a proper font is already loaded. */
- int face_id = XINT (elt);
-
- xassert (face_id == face->id);
- face = FACE_FROM_ID (f, face_id);
- return (*get_font_info_func) (f, face->font_info_id);
- }
-
- if (!fontname && charset == CHARSET_ASCII)
- {
- elt = FONTSET_ASCII (fontset);
- fontname = SDATA (XCDR (elt));
- }
- }
if (!fontname)
/* No way to get fontname. */
- return 0;
+ return NULL;
- fontp = (*load_font_func) (f, fontname, size);
- if (!fontp)
- return 0;
+ fontp = (*load_font_func) (f, fontname, 0);
+ if (! fontp || fontp->charset >= 0)
+ return fontp;
- /* Fill in members (charset, vertical_centering, encoding, etc) of
- font_info structure that are not set by (*load_font_func). */
- fontp->charset = charset;
+ fontname = fontp->full_name;
- fontp->vertical_centering
- = (STRINGP (Vvertical_centering_font_regexp)
- && (fast_c_string_match_ignore_case
- (Vvertical_centering_font_regexp, fontp->full_name) >= 0));
-
- if (fontp->encoding[1] != FONT_ENCODING_NOT_DECIDED)
+ if (charset < 0)
{
- /* The font itself tells which code points to be used. Use this
- encoding for all other charsets. */
- int i;
+ Lisp_Object charset_symbol;
- fontp->encoding[0] = fontp->encoding[1];
- for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; i <= MAX_CHARSET; i++)
- fontp->encoding[i] = fontp->encoding[1];
+ charset_symbol = find_font_encoding (fontname);
+ if (CONSP (charset_symbol))
+ charset_symbol = XCAR (charset_symbol);
+ charset = XINT (CHARSET_SYMBOL_ID (charset_symbol));
}
- else
+ fontp->charset = charset;
+ fontp->vertical_centering = 0;
+ fontp->font_encoder = NULL;
+
+ if (charset != charset_ascii)
{
- /* The font itself doesn't have information about encoding. */
- int i;
+ fontp->vertical_centering
+ = (STRINGP (Vvertical_centering_font_regexp)
+ && (fast_c_string_match_ignore_case
+ (Vvertical_centering_font_regexp, fontname) >= 0));
- fontname = fontp->full_name;
- /* By default, encoding of ASCII chars is 0 (i.e. 0x00..0x7F),
- others is 1 (i.e. 0x80..0xFF). */
- fontp->encoding[0] = 0;
- for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; i <= MAX_CHARSET; i++)
- fontp->encoding[i] = 1;
- /* Then override them by a specification in Vfont_encoding_alist. */
- for (list = Vfont_encoding_alist; CONSP (list); list = XCDR (list))
- {
- elt = XCAR (list);
- if (CONSP (elt)
- && STRINGP (XCAR (elt)) && CONSP (XCDR (elt))
- && (fast_c_string_match_ignore_case (XCAR (elt), fontname)
- >= 0))
- {
- Lisp_Object tmp;
-
- for (tmp = XCDR (elt); CONSP (tmp); tmp = XCDR (tmp))
- if (CONSP (XCAR (tmp))
- && ((i = get_charset_id (XCAR (XCAR (tmp))))
- >= 0)
- && INTEGERP (XCDR (XCAR (tmp)))
- && XFASTINT (XCDR (XCAR (tmp))) < 4)
- fontp->encoding[i]
- = XFASTINT (XCDR (XCAR (tmp)));
- }
- }
+ if (find_ccl_program_func)
+ (*find_ccl_program_func) (fontp);
}
- fontp->font_encoder = (struct ccl_program *) 0;
-
- if (find_ccl_program_func)
- (*find_ccl_program_func) (fontp);
-
- /* If we loaded a font for a face that has fontset, record the face
- ID in the fontset for C. */
- if (face
- && !NILP (fontset)
- && !BASE_FONTSET_P (fontset))
- FONTSET_SET (fontset, c, make_number (face->id));
return fontp;
}
@@ -700,6 +877,34 @@ fs_load_font (f, c, fontname, id, face)
#endif
+/* Return ENCODING or a cons of ENCODING and REPERTORY of the font
+ FONTNAME. ENCODING is a charset symbol that specifies the encoding
+ of the font. REPERTORY is a charset symbol or nil. */
+
+
+static Lisp_Object
+find_font_encoding (fontname)
+ char *fontname;
+{
+ Lisp_Object tail, elt;
+
+ for (tail = Vfont_encoding_alist; CONSP (tail); tail = XCDR (tail))
+ {
+ elt = XCAR (tail);
+ if (CONSP (elt)
+ && STRINGP (XCAR (elt))
+ && fast_c_string_match_ignore_case (XCAR (elt), fontname) >= 0
+ && (SYMBOLP (XCDR (elt))
+ ? CHARSETP (XCDR (elt))
+ : CONSP (XCDR (elt)) && CHARSETP (XCAR (XCDR (elt)))))
+ return (XCDR (elt));
+ }
+ /* We don't know the encoding of this font. Let's assume Unicode
+ encoding. */
+ return Qunicode;
+}
+
+
/* Cache data used by fontset_pattern_regexp. The car part is a
pattern string containing at least one wild card, the cdr part is
the corresponding regular expression. */
@@ -782,7 +987,7 @@ fs_query_fontset (name, regexpp)
for (i = 0; i < ASIZE (Vfontset_table); i++)
{
Lisp_Object fontset;
- const unsigned char *this_name;
+ unsigned char *this_name;
fontset = FONTSET_FROM_ID (i);
if (NILP (fontset)
@@ -826,9 +1031,7 @@ If REGEXPP is non-nil, PATTERN is a regular expression. */)
return FONTSET_NAME (fontset);
}
-/* Return a list of base fontset names matching PATTERN on frame F.
- If SIZE is not 0, it is the size (maximum bound width) of fontsets
- to be listed. */
+/* Return a list of base fontset names matching PATTERN on frame F. */
Lisp_Object
list_fontsets (f, pattern, size)
@@ -847,7 +1050,7 @@ list_fontsets (f, pattern, size)
for (id = 0; id < ASIZE (Vfontset_table); id++)
{
Lisp_Object fontset;
- const unsigned char *name;
+ unsigned char *name;
fontset = FONTSET_FROM_ID (id);
if (NILP (fontset)
@@ -856,106 +1059,62 @@ list_fontsets (f, pattern, size)
continue;
name = SDATA (FONTSET_NAME (fontset));
- if (!NILP (regexp)
+ if (STRINGP (regexp)
? (fast_c_string_match_ignore_case (regexp, name) < 0)
: strcmp (SDATA (pattern), name))
continue;
- if (size)
- {
- struct font_info *fontp;
- fontp = FS_LOAD_FONT (f, 0, NULL, id);
- if (!fontp || size != fontp->size)
- continue;
- }
val = Fcons (Fcopy_sequence (FONTSET_NAME (fontset)), val);
}
return val;
}
-DEFUN ("new-fontset", Fnew_fontset, Snew_fontset, 2, 2, 0,
- doc: /* Create a new fontset NAME that contains font information in FONTLIST.
-FONTLIST is an alist of charsets vs corresponding font name patterns. */)
- (name, fontlist)
- Lisp_Object name, fontlist;
-{
- Lisp_Object fontset, elements, ascii_font;
- Lisp_Object tem, tail, elt;
- (*check_window_system_func) ();
+/* Free all realized fontsets whose base fontset is BASE. */
- CHECK_STRING (name);
- CHECK_LIST (fontlist);
+static void
+free_realized_fontsets (base)
+ Lisp_Object base;
+{
+#if 0
+ int id;
- name = Fdowncase (name);
- tem = Fquery_fontset (name, Qnil);
- if (!NILP (tem))
- error ("Fontset `%s' matches the existing fontset `%s'",
- SDATA (name), SDATA (tem));
-
- /* Check the validity of FONTLIST while creating a template for
- fontset elements. */
- elements = ascii_font = Qnil;
- for (tail = fontlist; CONSP (tail); tail = XCDR (tail))
+ /* For the moment, this doesn't work because free_realized_face
+ doesn't remove FACE from a cache. Until we find a solution, we
+ suppress this code, and simply use Fclear_face_cache even though
+ that is not efficient. */
+ BLOCK_INPUT;
+ for (id = 0; id < ASIZE (Vfontset_table); id++)
{
- int c, charset;
-
- tem = XCAR (tail);
- if (!CONSP (tem)
- || (charset = get_charset_id (XCAR (tem))) < 0
- || (!STRINGP (XCDR (tem)) && !CONSP (XCDR (tem))))
- error ("Elements of fontlist must be a cons of charset and font name pattern");
+ Lisp_Object this = AREF (Vfontset_table, id);
- tem = XCDR (tem);
- if (STRINGP (tem))
- tem = Fdowncase (tem);
- else
- tem = Fcons (Fdowncase (Fcar (tem)), Fdowncase (Fcdr (tem)));
- if (charset == CHARSET_ASCII)
- ascii_font = tem;
- else
+ if (EQ (FONTSET_BASE (this), base))
{
- c = MAKE_CHAR (charset, 0, 0);
- elements = Fcons (Fcons (make_number (c), tem), elements);
- }
- }
+ Lisp_Object tail;
- if (NILP (ascii_font))
- error ("No ASCII font in the fontlist");
+ for (tail = FONTSET_FACE_ALIST (this); CONSP (tail);
+ tail = XCDR (tail))
+ {
+ FRAME_PTR f = XFRAME (FONTSET_FRAME (this));
+ int face_id = XINT (XCDR (XCAR (tail)));
+ struct face *face = FACE_FROM_ID (f, face_id);
- fontset = make_fontset (Qnil, name, Qnil);
- FONTSET_ASCII (fontset) = Fcons (make_number (0), ascii_font);
- for (; CONSP (elements); elements = XCDR (elements))
- {
- elt = XCAR (elements);
- tem = XCDR (elt);
- if (STRINGP (tem))
- tem = font_family_registry (tem, 0);
- tem = Fcons (XCAR (elt), tem);
- FONTSET_SET (fontset, XINT (XCAR (elt)), tem);
+ /* Face THIS itself is also freed by the following call. */
+ free_realized_face (f, face);
+ }
+ }
}
-
- return Qnil;
-}
-
-
-/* Clear all elements of FONTSET for multibyte characters. */
-
-static void
-clear_fontset_elements (fontset)
- Lisp_Object fontset;
-{
- int i;
-
- for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- XCHAR_TABLE (fontset)->contents[i] = Qnil;
+ UNBLOCK_INPUT;
+#else /* not 0 */
+ Fclear_face_cache (Qt);
+#endif /* not 0 */
}
/* Check validity of NAME as a fontset name and return the
corresponding fontset. If not valid, signal an error.
- If NAME is nil, return Vdefault_fontset. */
+ If NAME is t, return Vdefault_fontset. */
static Lisp_Object
check_fontset_name (name)
@@ -963,7 +1122,7 @@ check_fontset_name (name)
{
int id;
- if (EQ (name, Qnil))
+ if (EQ (name, Qt))
return Vdefault_fontset;
CHECK_STRING (name);
@@ -973,121 +1132,318 @@ check_fontset_name (name)
return FONTSET_FROM_ID (id);
}
-DEFUN ("set-fontset-font", Fset_fontset_font, Sset_fontset_font, 3, 4, 0,
- doc: /* Modify fontset NAME to use FONTNAME for CHARACTER.
+static void
+accumulate_script_ranges (arg, range, val)
+ Lisp_Object arg, range, val;
+{
+ if (EQ (XCAR (arg), val))
+ {
+ if (CONSP (range))
+ XSETCDR (arg, Fcons (Fcons (XCAR (range), XCDR (range)), XCDR (arg)));
+ else
+ XSETCDR (arg, Fcons (Fcons (range, range), XCDR (arg)));
+ }
+}
+
+
+/* Return an ASCII font name generated from fontset name NAME and
+ ASCII font specification ASCII_SPEC. NAME is a string conforming
+ to XLFD. ASCII_SPEC is a vector:
+ [FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY]. */
+
+static INLINE Lisp_Object
+generate_ascii_font_name (name, ascii_spec)
+ Lisp_Object name, ascii_spec;
+{
+ Lisp_Object vec;
+ int i;
+
+ vec = split_font_name_into_vector (name);
+ for (i = FONT_SPEC_FAMILY_INDEX; i <= FONT_SPEC_ADSTYLE_INDEX; i++)
+ if (! NILP (AREF (ascii_spec, i)))
+ ASET (vec, 1 + i, AREF (ascii_spec, i));
+ if (! NILP (AREF (ascii_spec, FONT_SPEC_REGISTRY_INDEX)))
+ ASET (vec, 12, AREF (ascii_spec, FONT_SPEC_REGISTRY_INDEX));
+ return build_font_name_from_vector (vec);
+}
+
+
+DEFUN ("set-fontset-font", Fset_fontset_font, Sset_fontset_font, 3, 5, 0,
+ doc: /*
+Modify fontset NAME to use FONT-SPEC for CHARACTER.
-If NAME is nil, modify the default fontset.
CHARACTER may be a cons; (FROM . TO), where FROM and TO are
-non-generic characters. In that case, use FONTNAME
-for all characters in the range FROM and TO (inclusive).
-CHARACTER may be a charset. In that case, use FONTNAME
-for all character in the charsets.
-
-FONTNAME may be a cons; (FAMILY . REGISTRY), where FAMILY is a family
-name of a font, REGISTRY is a registry name of a font. */)
- (name, character, fontname, frame)
- Lisp_Object name, character, fontname, frame;
+characters. In that case, use FONT-SPEC for all characters in the
+range FROM and TO (inclusive).
+
+CHARACTER may be a script name symbol. In that case, use FONT-SPEC
+for all characters that belong to the script.
+
+CHARACTER may be a charset which has a :code-offset attribute and the
+attribute value is greater than the maximum Unicode character
+\(#x10FFFF). In that case, use FONT-SPEC for all characters in the
+charset.
+
+FONT-SPEC may be:
+ * A vector [ FAMILY WEIGHT SLANT WIDTH ADSTYLE REGISTRY ].
+ See the documentation of `set-face-attribute' for the detail of
+ these vector elements;
+ * A cons (FAMILY . REGISTRY), where FAMILY is a font family name and
+ REGISTRY is a font registry name;
+ * A font name string.
+
+Optional 4th argument FRAME, if non-nil, is a frame. This argument is
+kept for backward compatibility and has no meaning.
+
+Optional 5th argument ADD, if non-nil, specifies how to add FONT-SPEC
+to the font specifications for RANGE previously set. If it is
+`prepend', FONT-SPEC is prepended. If it is `append', FONT-SPEC is
+appended. By default, FONT-SPEC overrides the previous settings. */)
+ (name, character, font_spec, frame, add)
+ Lisp_Object name, character, font_spec, frame, add;
{
- Lisp_Object fontset, elt;
- Lisp_Object realized;
- int from, to;
- int id;
- Lisp_Object family, registry;
+ Lisp_Object fontset;
+ Lisp_Object font_def, registry;
+ Lisp_Object encoding, repertory;
+ Lisp_Object range_list;
fontset = check_fontset_name (name);
- if (CONSP (character))
+ /* The arg FRAME is kept for backward compatibility. We only check
+ the validity. */
+ if (!NILP (frame))
+ CHECK_LIVE_FRAME (frame);
+
+ if (VECTORP (font_spec))
{
- /* CH should be (FROM . TO) where FROM and TO are non-generic
- characters. */
- CHECK_NUMBER_CAR (character);
- CHECK_NUMBER_CDR (character);
- from = XINT (XCAR (character));
- to = XINT (XCDR (character));
- if (!char_valid_p (from, 0) || !char_valid_p (to, 0))
- error ("Character range should be by non-generic characters.");
- if (!NILP (name)
- && (SINGLE_BYTE_CHAR_P (from) || SINGLE_BYTE_CHAR_P (to)))
- error ("Can't change font for a single byte character");
+ int j;
+
+ if (ASIZE (font_spec) != FONT_SPEC_MAX_INDEX)
+ args_out_of_range (make_number (FONT_SPEC_MAX_INDEX),
+ make_number (ASIZE (font_spec)));
+
+ font_spec = Fcopy_sequence (font_spec);
+ for (j = 0; j < FONT_SPEC_MAX_INDEX - 1; j++)
+ if (! NILP (AREF (font_spec, j)))
+ {
+ CHECK_STRING (AREF (font_spec, j));
+ ASET (font_spec, j, Fdowncase (AREF (font_spec, j)));
+ }
+ /* REGISTRY should not be omitted. */
+ CHECK_STRING (AREF (font_spec, FONT_SPEC_REGISTRY_INDEX));
+ registry = Fdowncase (AREF (font_spec, FONT_SPEC_REGISTRY_INDEX));
+ ASET (font_spec, FONT_SPEC_REGISTRY_INDEX, registry);
+
}
- else if (SYMBOLP (character))
+ else if (CONSP (font_spec))
{
- elt = Fget (character, Qcharset);
- if (!VECTORP (elt) || ASIZE (elt) < 1 || !NATNUMP (AREF (elt, 0)))
- error ("Invalid charset: %s", SDATA (SYMBOL_NAME (character)));
- from = MAKE_CHAR (XINT (AREF (elt, 0)), 0, 0);
- to = from;
+ Lisp_Object family;
+
+ family = XCAR (font_spec);
+ registry = XCDR (font_spec);
+
+ if (! NILP (family))
+ {
+ CHECK_STRING (family);
+ family = Fdowncase (family);
+ }
+ CHECK_STRING (registry);
+ registry = Fdowncase (registry);
+ font_spec = Fmake_vector (make_number (FONT_SPEC_MAX_INDEX), Qnil);
+ ASET (font_spec, FONT_SPEC_FAMILY_INDEX, family);
+ ASET (font_spec, FONT_SPEC_REGISTRY_INDEX, registry);
}
else
{
- CHECK_NUMBER (character);
- from = XINT (character);
- to = from;
+ CHECK_STRING (font_spec);
+ font_spec = Fdowncase (font_spec);
+ registry = split_font_name_into_vector (font_spec);
+ if (NILP (registry))
+ error ("No XLFD: %s", SDATA (font_spec));
+ if (NILP (AREF (registry, 12))
+ || NILP (AREF (registry, 13)))
+ error ("Registry must be specified");
+ registry = concat2 (concat2 (AREF (registry, 12), build_string ("-")),
+ AREF (registry, 13));
}
- if (!char_valid_p (from, 1))
- invalid_character (from);
- if (SINGLE_BYTE_CHAR_P (from))
- error ("Can't change font for a single byte character");
- if (from < to)
+
+ if (STRINGP (font_spec))
+ encoding = find_font_encoding ((char *) SDATA (font_spec));
+ else
+ encoding = find_font_encoding ((char *) SDATA (registry));
+ if (SYMBOLP (encoding))
+ encoding = repertory = CHARSET_SYMBOL_ID (encoding);
+ else
{
- if (!char_valid_p (to, 1))
- invalid_character (to);
- if (SINGLE_BYTE_CHAR_P (to))
- error ("Can't change font for a single byte character");
+ repertory = XCDR (encoding);
+ encoding = CHARSET_SYMBOL_ID (XCAR (encoding));
}
+ font_def = Fmake_vector (make_number (3), font_spec);
+ ASET (font_def, 1, encoding);
+ ASET (font_def, 2, repertory);
- if (STRINGP (fontname))
+ if (CHARACTERP (character))
+ range_list = Fcons (Fcons (character, character), Qnil);
+ else if (CONSP (character))
{
- fontname = Fdowncase (fontname);
- elt = Fcons (make_number (from), font_family_registry (fontname, 0));
+ Lisp_Object from, to;
+
+ from = Fcar (character);
+ to = Fcdr (character);
+ CHECK_CHARACTER (from);
+ CHECK_CHARACTER (to);
+ range_list = Fcons (character, Qnil);
}
else
{
- CHECK_CONS (fontname);
- family = XCAR (fontname);
- registry = XCDR (fontname);
- if (!NILP (family))
+ Lisp_Object script_list;
+ Lisp_Object val;
+
+ CHECK_SYMBOL (character);
+ range_list = Qnil;
+ script_list = XCHAR_TABLE (Vchar_script_table)->extras[0];
+ if (! NILP (Fmemq (character, script_list)))
{
- CHECK_STRING (family);
- family = Fdowncase (family);
+ val = Fcons (character, Qnil);
+ map_char_table (accumulate_script_ranges, Qnil, Vchar_script_table,
+ val);
+ range_list = XCDR (val);
}
- if (!NILP (registry))
+ else if (CHARSETP (character))
{
- CHECK_STRING (registry);
- registry = Fdowncase (registry);
+ struct charset *charset;
+
+ CHECK_CHARSET_GET_CHARSET (character, charset);
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
+ range_list
+ = Fcons (Fcons (make_number (CHARSET_MIN_CHAR (charset)),
+ make_number (CHARSET_MAX_CHAR (charset))),
+ range_list);
+ if (EQ (character, Qascii))
+ {
+ if (VECTORP (font_spec))
+ font_spec = generate_ascii_font_name (FONTSET_NAME (fontset),
+ font_spec);
+ FONTSET_ASCII (fontset) = font_spec;
+ }
}
- elt = Fcons (make_number (from), Fcons (family, registry));
+
+ if (NILP (range_list))
+ error ("Invalid script or charset name: %s",
+ SDATA (SYMBOL_NAME (character)));
}
- /* The arg FRAME is kept for backward compatibility. We only check
- the validity. */
- if (!NILP (frame))
- CHECK_LIVE_FRAME (frame);
+ for (; CONSP (range_list); range_list = XCDR (range_list))
+ FONTSET_ADD (fontset, XCAR (range_list), font_def, add);
- for (; from <= to; from++)
- FONTSET_SET (fontset, from, elt);
- Foptimize_char_table (fontset);
+ /* Free all realized fontsets whose base is FONTSET. This way, the
+ specified character(s) are surely redisplayed by a correct
+ font. */
+ free_realized_fontsets (fontset);
- /* If there's a realized fontset REALIZED whose parent is FONTSET,
- clear all the elements of REALIZED and free all multibyte faces
- whose fontset is REALIZED. This way, the specified character(s)
- are surely redisplayed by a correct font. */
- for (id = 0; id < ASIZE (Vfontset_table); id++)
+ return Qnil;
+}
+
+
+DEFUN ("new-fontset", Fnew_fontset, Snew_fontset, 2, 2, 0,
+ doc: /* Create a new fontset NAME from font information in FONTLIST.
+
+FONTLIST is an alist of scripts vs the corresponding font specification list.
+Each element of FONTLIST has the form (SCRIPT FONT-SPEC ...), where a
+character of SCRIPT is displayed by a font that matches one of
+FONT-SPEC.
+
+SCRIPT is a symbol that appears in the first extra slot of the
+char-table `char-script-table'.
+
+FONT-SPEC is a vector, a cons, or a string. See the documentation of
+`set-fontset-font' for the meaning. */)
+ (name, fontlist)
+ Lisp_Object name, fontlist;
+{
+ Lisp_Object fontset;
+ Lisp_Object val;
+ int id;
+
+ CHECK_STRING (name);
+ CHECK_LIST (fontlist);
+
+ id = fs_query_fontset (name, 0);
+ if (id < 0)
{
- realized = AREF (Vfontset_table, id);
- if (!NILP (realized)
- && !BASE_FONTSET_P (realized)
- && EQ (FONTSET_BASE (realized), fontset))
- {
- FRAME_PTR f = XFRAME (FONTSET_FRAME (realized));
- clear_fontset_elements (realized);
- free_realized_multibyte_face (f, id);
- }
+ name = Fdowncase (name);
+ val = split_font_name_into_vector (name);
+ if (NILP (val))
+ error ("Fontset name must be in XLFD format");
+ if (strcmp (SDATA (AREF (val, 12)), "fontset"))
+ error ("Registry field of fontset name must be \"fontset\"");
+ Vfontset_alias_alist
+ = Fcons (Fcons (name,
+ concat2 (concat2 (AREF (val, 12), build_string ("-")),
+ AREF (val, 13))),
+ Vfontset_alias_alist);
+ ASET (val, 12, build_string ("iso8859-1"));
+ fontset = make_fontset (Qnil, name, Qnil);
+ FONTSET_ASCII (fontset) = build_font_name_from_vector (val);
+ }
+ else
+ {
+ fontset = FONTSET_FROM_ID (id);;
+ free_realized_fontsets (fontset);
+ Fset_char_table_range (fontset, Qt, Qnil);
}
- return Qnil;
+ for (; ! NILP (fontlist); fontlist = Fcdr (fontlist))
+ {
+ Lisp_Object elt, script;
+
+ elt = Fcar (fontlist);
+ script = Fcar (elt);
+ for (elt = Fcdr (elt); ! NILP (elt); elt = Fcdr (elt))
+ Fset_fontset_font (name, script, Fcar (elt), Qnil, Qappend);
+ }
+ return name;
+}
+
+
+/* Number of fontsets created from a fontname automatically. */
+static int n_auto_fontsets;
+
+int
+new_fontset_from_font_name (Lisp_Object fontname)
+{
+ Lisp_Object name;
+ Lisp_Object vec;
+
+ fontname = Fdowncase (fontname);
+ vec = split_font_name_into_vector (fontname);
+ if ( NILP (vec))
+ vec = Fmake_vector (make_number (14), build_string (""));
+ ASET (vec, 12, build_string ("fontset"));
+ if (n_auto_fontsets == 0)
+ {
+ ASET (vec, 13, build_string ("startup"));
+ name = build_font_name_from_vector (vec);
+ n_auto_fontsets++;
+ }
+ else
+ {
+ char temp[20];
+
+ do {
+ sprintf (temp, "auto%d", n_auto_fontsets);
+ ASET (vec, 13, build_string (temp));
+ name = build_font_name_from_vector (vec);
+ n_auto_fontsets++;
+ } while (fs_query_fontset (name, 0) >= 0);
+ }
+ name = Fnew_fontset (name,
+ Fcons (Fcons (Qascii, Fcons (fontname, Qnil)), Qnil));
+ Vfontset_alias_alist = Fcons (Fcons (name, fontname), Vfontset_alias_alist);
+ return fs_query_fontset (name, 0);
}
+
DEFUN ("font-info", Ffont_info, Sfont_info, 1, 2, 0,
doc: /* Return information about a font named NAME on frame FRAME.
If FRAME is omitted or nil, use the selected frame.
@@ -1174,8 +1530,6 @@ DEFUN ("internal-char-font", Finternal_char_font, Sinternal_char_font, 1, 1, 0,
args_out_of_range_3 (position, make_number (BEGV), make_number (ZV));
pos_byte = CHAR_TO_BYTE (pos);
c = FETCH_CHAR (pos_byte);
- if (! CHAR_VALID_P (c, 0))
- return Qnil;
window = Fget_buffer_window (Fcurrent_buffer (), Qnil);
if (NILP (window))
return Qnil;
@@ -1190,188 +1544,138 @@ DEFUN ("internal-char-font", Finternal_char_font, Sinternal_char_font, 1, 1, 0,
}
-/* Called from Ffontset_info via map_char_table on each leaf of
- fontset. ARG is a copy of the default fontset. The current leaf
- is indexed by CHARACTER and has value ELT. This function override
- the copy by ELT if ELT is not nil. */
-
-static void
-override_font_info (fontset, character, elt)
- Lisp_Object fontset, character, elt;
-{
- if (! NILP (elt))
- Faset (fontset, character, elt);
-}
+DEFUN ("fontset-info", Ffontset_info, Sfontset_info, 1, 2, 0,
+ doc: /* Return information about a fontset FONTSET on frame FRAME.
+The value is a char-table of which elements has this form.
-/* Called from Ffontset_info via map_char_table on each leaf of
- fontset. ARG is a list (LAST FONT-INFO ...), where LAST is `(last
- ARG)' and FONT-INFOs have this form:
- (CHAR FONT-SPEC) or ((FROM . TO) FONT-SPEC)
- The current leaf is indexed by CHARACTER and has value ELT. This
- function add the information of the current leaf to ARG by
- appending a new element or modifying the last element. */
+ ((FONT-PATTERN OPENED-FONT ...) ...)
-static void
-accumulate_font_info (arg, character, elt)
- Lisp_Object arg, character, elt;
-{
- Lisp_Object last, last_char, last_elt;
+FONT-PATTERN is a vector:
- if (!CONSP (elt) && !SINGLE_BYTE_CHAR_P (XINT (character)))
- elt = FONTSET_REF (Vdefault_fontset, XINT (character));
- if (!CONSP (elt))
- return;
- last = XCAR (arg);
- last_char = XCAR (XCAR (last));
- last_elt = XCAR (XCDR (XCAR (last)));
- elt = XCDR (elt);
- if (!NILP (Fequal (elt, last_elt)))
- {
- int this_charset = CHAR_CHARSET (XINT (character));
+ [ FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY ]
- if (CONSP (last_char)) /* LAST_CHAR == (FROM . TO) */
- {
- if (this_charset == CHAR_CHARSET (XINT (XCAR (last_char))))
- {
- XSETCDR (last_char, character);
- return;
- }
- }
- else if (XINT (last_char) == XINT (character))
- return;
- else if (this_charset == CHAR_CHARSET (XINT (last_char)))
- {
- XSETCAR (XCAR (last), Fcons (last_char, character));
- return;
- }
- }
- XSETCDR (last, Fcons (Fcons (character, Fcons (elt, Qnil)), Qnil));
- XSETCAR (arg, XCDR (last));
-}
+or a string of font name pattern.
+OPENED-FONT is a name of a font actually opened.
-DEFUN ("fontset-info", Ffontset_info, Sfontset_info, 1, 2, 0,
- doc: /* Return information about a fontset named NAME on frame FRAME.
-If NAME is nil, return information about the default fontset.
-The value is a vector:
- [ SIZE HEIGHT ((CHARSET-OR-RANGE FONT-SPEC OPENED ...) ...) ],
-where,
- SIZE is the maximum bound width of ASCII font in the fontset,
- HEIGHT is the maximum bound height of ASCII font in the fontset,
- CHARSET-OR-RANGE is a charset, a character (may be a generic character)
- or a cons of two characters specifying the range of characters.
- FONT-SPEC is a fontname pattern string or a cons (FAMILY . REGISTRY),
- where FAMILY is a `FAMILY' field of a XLFD font name,
- REGISTRY is a `CHARSET_REGISTRY' field of a XLFD font name.
- FAMILY may contain a `FOUNDRY' field at the head.
- REGISTRY may contain a `CHARSET_ENCODING' field at the tail.
- OPENEDs are names of fonts actually opened.
-If the ASCII font is not yet opened, SIZE and HEIGHT are 0.
-If FRAME is omitted, it defaults to the currently selected frame. */)
- (name, frame)
- Lisp_Object name, frame;
+The char-table has one extra slot. The value is a char-table
+containing the information about the derived fonts from the default
+fontset. The format is the same as abobe. */)
+ (fontset, frame)
+ Lisp_Object fontset, frame;
{
- Lisp_Object fontset;
FRAME_PTR f;
- Lisp_Object indices[3];
- Lisp_Object val, tail, elt;
+ Lisp_Object table, val, elt;
Lisp_Object *realized;
- struct font_info *fontp = NULL;
int n_realized = 0;
- int i;
+ int fallback;
+ int c, i, j;
(*check_window_system_func) ();
- fontset = check_fontset_name (name);
+ fontset = check_fontset_name (fontset);
if (NILP (frame))
frame = selected_frame;
CHECK_LIVE_FRAME (frame);
f = XFRAME (frame);
- /* Recode realized fontsets whose base is FONTSET in the table
- `realized'. */
+ /* Recode fontsets realized on FRAME from the base fontset FONTSET
+ in the table `realized'. */
realized = (Lisp_Object *) alloca (sizeof (Lisp_Object)
* ASIZE (Vfontset_table));
for (i = 0; i < ASIZE (Vfontset_table); i++)
{
elt = FONTSET_FROM_ID (i);
if (!NILP (elt)
- && EQ (FONTSET_BASE (elt), fontset))
+ && EQ (FONTSET_BASE (elt), fontset)
+ && EQ (FONTSET_FRAME (elt), frame))
realized[n_realized++] = elt;
}
- if (! EQ (fontset, Vdefault_fontset))
- {
- /* Merge FONTSET onto the default fontset. */
- val = Fcopy_sequence (Vdefault_fontset);
- map_char_table (override_font_info, Qnil, fontset, fontset, val, 0, indices);
- fontset = val;
- }
- /* Accumulate information of the fontset in VAL. The format is
- (LAST FONT-INFO FONT-INFO ...), where FONT-INFO is (CHAR-OR-RANGE
- FONT-SPEC). See the comment for accumulate_font_info for the
- detail. */
- val = Fcons (Fcons (make_number (0),
- Fcons (XCDR (FONTSET_ASCII (fontset)), Qnil)),
- Qnil);
- val = Fcons (val, val);
- map_char_table (accumulate_font_info, Qnil, fontset, fontset, val, 0, indices);
- val = XCDR (val);
-
- /* For each FONT-INFO, if CHAR_OR_RANGE (car part) is a generic
- character for a charset, replace it with the charset symbol. If
- fonts are opened for FONT-SPEC, append the names of the fonts to
- FONT-SPEC. */
- for (tail = val; CONSP (tail); tail = XCDR (tail))
+ table = Fmake_char_table (Qfontset_info, Qnil);
+ XCHAR_TABLE (table)->extras[0] = Fmake_char_table (Qnil, Qnil);
+ /* Accumulate information of the fontset in TABLE. The format of
+ each element is ((FONT-SPEC OPENED-FONT ...) ...). */
+ for (fallback = 0; fallback <= 1; fallback++)
{
- int c;
- elt = XCAR (tail);
- if (INTEGERP (XCAR (elt)))
+ Lisp_Object this_fontset, this_table;
+
+ if (! fallback)
{
- int charset, c1, c2;
- c = XINT (XCAR (elt));
- SPLIT_CHAR (c, charset, c1, c2);
- if (c1 == 0)
- XSETCAR (elt, CHARSET_SYMBOL (charset));
+ this_fontset = fontset;
+ this_table = table;
}
else
- c = XINT (XCAR (XCAR (elt)));
- for (i = 0; i < n_realized; i++)
{
- Lisp_Object face_id, font;
- struct face *face;
+ this_fontset = Vdefault_fontset;
+ this_table = XCHAR_TABLE (table)->extras[0];
+#if 0
+ for (i = 0; i < n_realized; i++)
+ realized[i] = FONTSET_FALLBACK (realized[i]);
+#endif
+ }
+ for (c = 0; c <= MAX_5_BYTE_CHAR; )
+ {
+ int from, to;
- face_id = FONTSET_REF_VIA_BASE (realized[i], c);
- if (INTEGERP (face_id))
+ val = char_table_ref_and_range (this_fontset, c, &from, &to);
+ if (VECTORP (val))
{
- face = FACE_FROM_ID (f, XINT (face_id));
- if (face && face->font && face->font_name)
+ Lisp_Object alist;
+
+ /* At first, set ALIST to ((FONT-SPEC) ...). */
+ for (alist = Qnil, i = 0; i < ASIZE (val); i++)
+ alist = Fcons (Fcons (AREF (AREF (val, i), 0), Qnil), alist);
+ alist = Fnreverse (alist);
+
+ /* Then store opend font names to cdr of each elements. */
+ for (i = 0; i < n_realized; i++)
{
- font = build_string (face->font_name);
- if (NILP (Fmember (font, XCDR (XCDR (elt)))))
- XSETCDR (XCDR (elt), Fcons (font, XCDR (XCDR (elt))));
+ if (NILP (realized[i]))
+ continue;
+ val = FONTSET_REF (realized[i], c);
+ if (NILP (val))
+ continue;
+ val = XCDR (val);
+ /* Now VAL is [[FACE-ID FONT-INDEX FONT-DEF] ...].
+ If a font of an element is already opened,
+ FONT-INDEX of the element is integer. */
+ for (j = 0; j < ASIZE (val); j++)
+ if (INTEGERP (AREF (AREF (val, j), 0)))
+ {
+ Lisp_Object font_idx;
+
+ font_idx = AREF (AREF (val, j), 1);
+ elt = Fassq (AREF (AREF (AREF (val, j), 2), 0), alist);
+ if (CONSP (elt)
+ && NILP (Fmemq (font_idx, XCDR(elt))))
+ nconc2 (elt, Fcons (font_idx, Qnil));
+ }
}
+ for (val = alist; CONSP (val); val = XCDR (val))
+ for (elt = XCDR (XCAR (val)); CONSP (elt); elt = XCDR (elt))
+ {
+ struct font_info *font_info
+ = (*get_font_info_func) (f, XINT (XCAR (elt)));
+ XSETCAR (elt, build_string (font_info->full_name));
+ }
+
+ /* Store ALIST in TBL for characters C..TO. */
+ char_table_set_range (this_table, c, to, alist);
}
+ c = to + 1;
}
}
- elt = Fcdr (Fcdr (Fassq (CHARSET_SYMBOL (CHARSET_ASCII), val)));
- if (CONSP (elt))
- {
- elt = XCAR (elt);
- fontp = (*query_font_func) (f, SDATA (elt));
- }
- val = Fmake_vector (make_number (3), val);
- AREF (val, 0) = fontp ? make_number (fontp->size) : make_number (0);
- AREF (val, 1) = fontp ? make_number (fontp->height) : make_number (0);
- return val;
+ return table;
}
+
DEFUN ("fontset-font", Ffontset_font, Sfontset_font, 2, 2, 0,
doc: /* Return a font name pattern for character CH in fontset NAME.
-If NAME is nil, find a font name pattern in the default fontset. */)
+If NAME is t, find a font name pattern in the default fontset. */)
(name, ch)
Lisp_Object name, ch;
{
@@ -1380,16 +1684,10 @@ If NAME is nil, find a font name pattern in the default fontset. */)
fontset = check_fontset_name (name);
- CHECK_NUMBER (ch);
+ CHECK_CHARACTER (ch);
c = XINT (ch);
- if (!char_valid_p (c, 1))
- invalid_character (c);
-
elt = FONTSET_REF (fontset, c);
- if (CONSP (elt))
- elt = XCDR (elt);
-
- return elt;
+ return Fcopy_sequence (elt);
}
DEFUN ("fontset-list", Ffontset_list, Sfontset_list, 0, 0, 0,
@@ -1418,9 +1716,13 @@ syms_of_fontset ()
/* Window system initializer should have set proper functions. */
abort ();
- Qfontset = intern ("fontset");
- staticpro (&Qfontset);
- Fput (Qfontset, Qchar_table_extra_slots, make_number (3));
+ DEFSYM (Qfontset, "fontset");
+ Fput (Qfontset, Qchar_table_extra_slots, make_number (8));
+ DEFSYM (Qfontset_info, "fontset-info");
+ Fput (Qfontset_info, Qchar_table_extra_slots, make_number (1));
+
+ DEFSYM (Qprepend, "prepend");
+ DEFSYM (Qappend, "append");
Vcached_fontset_data = Qnil;
staticpro (&Vcached_fontset_data);
@@ -1433,45 +1735,46 @@ syms_of_fontset ()
FONTSET_ID (Vdefault_fontset) = make_number (0);
FONTSET_NAME (Vdefault_fontset)
= build_string ("-*-*-*-*-*-*-*-*-*-*-*-*-fontset-default");
-#if defined (MAC_OS)
- FONTSET_ASCII (Vdefault_fontset)
- = Fcons (make_number (0),
- build_string ("-apple-monaco-medium-r-*--*-120-*-*-*-*-mac-roman"));
+ {
+ Lisp_Object default_ascii_font;
+
+#if defined (macintosh)
+ default_ascii_font
+ = build_string ("-apple-monaco-medium-r-*--*-120-*-*-*-*-mac-roman");
#elif defined (WINDOWSNT)
- FONTSET_ASCII (Vdefault_fontset)
- = Fcons (make_number (0),
- build_string ("-*-courier new-normal-r-*-*-*-100-*-*-*-*-iso8859-1"));
+ default_ascii_font
+ = build_string ("-*-courier new-normal-r-*-*-*-100-*-*-*-*-iso8859-1");
#else
- FONTSET_ASCII (Vdefault_fontset)
- = Fcons (make_number (0),
- build_string ("-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1"));
+ default_ascii_font
+ = build_string ("-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1");
#endif
+ FONTSET_ASCII (Vdefault_fontset) = default_ascii_font;
+ }
AREF (Vfontset_table, 0) = Vdefault_fontset;
next_fontset_id = 1;
DEFVAR_LISP ("font-encoding-alist", &Vfont_encoding_alist,
- doc: /* Alist of fontname patterns vs corresponding encoding info.
-Each element looks like (REGEXP . ENCODING-INFO),
- where ENCODING-INFO is an alist of CHARSET vs ENCODING.
-ENCODING is one of the following integer values:
- 0: code points 0x20..0x7F or 0x2020..0x7F7F are used,
- 1: code points 0xA0..0xFF or 0xA0A0..0xFFFF are used,
- 2: code points 0x20A0..0x7FFF are used,
- 3: code points 0xA020..0xFF7F are used. */);
+ doc: /*
+Alist of fontname patterns vs the corresponding encoding and repertory info.
+Each element looks like (REGEXP . (ENCODING . REPERTORY)),
+where ENCODING is a charset or a char-table,
+and REPERTORY is a charset, a char-table, or nil.
+
+ENCODING is for converting a character to a glyph code of the font.
+If ENCODING is a charset, encoding a character by the charset gives
+the corresponding glyph code. If ENCODING is a char-table, looking up
+the table by a character gives the corresponding glyph code.
+
+REPERTORY specifies a repertory of characters supported by the font.
+If REPERTORY is a charset, all characters beloging to the charset are
+supported. If REPERTORY is a char-table, all characters who have a
+non-nil value in the table are supported. It REPERTORY is nil, Emacs
+gets the repertory information by an opened font and ENCODING. */);
Vfont_encoding_alist = Qnil;
- Vfont_encoding_alist
- = Fcons (Fcons (build_string ("JISX0201"),
- Fcons (Fcons (intern ("latin-jisx0201"), make_number (0)),
- Qnil)),
- Vfont_encoding_alist);
- Vfont_encoding_alist
- = Fcons (Fcons (build_string ("ISO8859-1"),
- Fcons (Fcons (intern ("ascii"), make_number (0)),
- Qnil)),
- Vfont_encoding_alist);
DEFVAR_LISP ("use-default-ascent", &Vuse_default_ascent,
- doc: /* Char table of characters whose ascent values should be ignored.
+ doc: /*
+Char table of characters whose ascent values should be ignored.
If an entry for a character is non-nil, the ascent value of the glyph
is assumed to be what specified by _MULE_DEFAULT_ASCENT property of a font.
@@ -1480,7 +1783,8 @@ such a character is displayed on screen. */);
Vuse_default_ascent = Qnil;
DEFVAR_LISP ("ignore-relative-composition", &Vignore_relative_composition,
- doc: /* Char table of characters which is not composed relatively.
+ doc: /*
+Char table of characters which is not composed relatively.
If an entry for a character is non-nil, a composition sequence
which contains that character is displayed so that
the glyph of that character is put without considering
@@ -1515,3 +1819,26 @@ at the vertical center of lines. */);
defsubr (&Sfontset_font);
defsubr (&Sfontset_list);
}
+
+Lisp_Object
+dump_fontset (fontset)
+ Lisp_Object fontset;
+{
+ Lisp_Object val;
+
+ if (NILP (FONTSET_FALLBACK (fontset)))
+ val = Fcons (Fcons (intern ("fallback-id"), Qnil), Qnil);
+ else
+ val = Fcons (Fcons (intern ("fallback-id"),
+ FONTSET_ID (FONTSET_FALLBACK (fontset))),
+ Qnil);
+ if (NILP (FONTSET_BASE (fontset)))
+ val = Fcons (Fcons (intern ("base"), Qnil), val);
+ else
+ val = Fcons (Fcons (intern ("base"),
+ FONTSET_NAME (FONTSET_BASE (fontset))),
+ val);
+ val = Fcons (Fcons (intern ("name"), FONTSET_NAME (fontset)), val);
+ val = Fcons (Fcons (intern ("id"), FONTSET_ID (fontset)), val);
+ return val;
+}
diff --git a/src/fontset.h b/src/fontset.h
index 634711110bc..5c70eddecdc 100644
--- a/src/fontset.h
+++ b/src/fontset.h
@@ -1,6 +1,9 @@
/* Header for fontset handler.
Copyright (C) 1995, 1997, 2000 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -61,25 +64,15 @@ struct font_info
of lines. */
int vertical_centering;
- /* Encodings of the font indexed by CHARSET. The value is one of
+ /* Encoding type of the font. The value is one of
0, 1, 2, or 3:
0: code points 0x20..0x7F or 0x2020..0x7F7F are used
1: code points 0xA0..0xFF or 0xA0A0..0xFFFF are used
2: code points 0x20A0..0x7FFF are used
3: code points 0xA020..0xFF7F are used
- For instance, ASCII and Latin-1 characters may use the same font
- but different code points (ASCII uses 0x20..0x7F and Latin-1 uses
- 0xA0..0xFF).
-
- If the value can't be decided from information of the font, we
- consult `font-encoding-alist' to get of the corresponding charset
- whose default value is defined in lisp/fontset.el. Since there's
- no charset whose id is 1, we use encoding[1] to store the
- encoding information decided by the font itself.
-
If the member `font_encoder' is not NULL, this member is ignored.
*/
- unsigned char encoding[MAX_CHARSET + 1];
+ unsigned char encoding_type;
/* The baseline position of a font is normally `ascent' value of the
font. However, there exists many fonts which don't set `ascent'
@@ -139,6 +132,17 @@ struct font_info
to be used. */
#define FONT_ENCODING_NOT_DECIDED 255
+enum FONT_SPEC_INDEX
+ {
+ FONT_SPEC_FAMILY_INDEX,
+ FONT_SPEC_WEIGHT_INDEX,
+ FONT_SPEC_SLANT_INDEX,
+ FONT_SPEC_SWIDTH_INDEX,
+ FONT_SPEC_ADSTYLE_INDEX,
+ FONT_SPEC_REGISTRY_INDEX,
+ FONT_SPEC_MAX_INDEX
+ };
+
/* Forward declaration for prototypes. */
struct frame;
@@ -178,42 +182,38 @@ extern void (*set_frame_fontset_func) P_ ((struct frame *f, Lisp_Object arg,
This function set the memer `encoder' of the structure. */
extern void (*find_ccl_program_func) P_ ((struct font_info *));
+extern Lisp_Object (*get_font_repertory_func) P_ ((struct frame *,
+ struct font_info *));
+
/* Check if any window system is used now. */
extern void (*check_window_system_func) P_ ((void));
struct face;
extern void free_face_fontset P_ ((FRAME_PTR, struct face *));
-extern Lisp_Object fontset_font_pattern P_ ((FRAME_PTR, int, int));
+extern Lisp_Object fontset_font_pattern P_ ((FRAME_PTR, struct face *, int));
extern int face_suitable_for_char_p P_ ((struct face *, int));
extern int face_for_char P_ ((FRAME_PTR, struct face *, int));
-extern int make_fontset_for_ascii_face P_ ((FRAME_PTR, int));
-extern struct font_info *fs_load_font P_ ((struct frame *, int, char *, int,
- struct face *));
+extern int make_fontset_for_ascii_face P_ ((FRAME_PTR, int, struct face *));
+extern int new_fontset_from_font_name P_ ((Lisp_Object));
+extern struct font_info *fs_load_font P_ ((struct frame *, char *, int));
extern int fs_query_fontset P_ ((Lisp_Object, int));
EXFUN (Fquery_fontset, 2);
extern Lisp_Object list_fontsets P_ ((struct frame *, Lisp_Object, int));
-extern Lisp_Object Qfontset;
extern Lisp_Object Vuse_default_ascent;
extern Lisp_Object Vignore_relative_composition;
extern Lisp_Object Valternate_fontname_alist;
extern Lisp_Object Vfontset_alias_alist;
extern Lisp_Object Vvertical_centering_font_regexp;
-/* Load a font named FONTNAME for displaying character C. All fonts
- for frame F is stored in a table pointed by FONT_TABLE. Return a
- pointer to the struct font_info of the loaded font. If loading
- fails, return 0; If FONTNAME is NULL, the name is taken from the
- information of FONTSET. If FONTSET is given, try to load a font
- whose size matches that of FONTSET, and, the font index is stored
- in the table for FONTSET. */
+/* Load a font named FONTNAME on frame F. All fonts for frame F is
+ stored in a table pointed by FONT_TABLE. Return a pointer to the
+ struct font_info of the loaded font. If loading fails, return
+ NULL. */
-#define FS_LOAD_FONT(f, c, fontname, fontset) \
- fs_load_font (f, c, fontname, fontset, NULL)
+#define FS_LOAD_FONT(f, fontname) fs_load_font (f, fontname, -1)
-#define FS_LOAD_FACE_FONT(f, c, fontname, face) \
- fs_load_font (f, c, fontname, -1, face)
/* Return an immutable id for font_info FONT_INFO on frame F. The
reason for this macro is hat one cannot hold pointers to font_info
diff --git a/src/frame.c b/src/frame.c
index 98b3e7067bd..a92176730b5 100644
--- a/src/frame.c
+++ b/src/frame.c
@@ -23,7 +23,7 @@ Boston, MA 02111-1307, USA. */
#include <stdio.h>
#include "lisp.h"
-#include "charset.h"
+#include "character.h"
#ifdef HAVE_X_WINDOWS
#include "xterm.h"
#endif
@@ -3063,14 +3063,14 @@ x_set_font (f, arg, oldval)
BLOCK_INPUT;
result = (STRINGP (fontset_name)
- ? x_new_fontset (f, SDATA (fontset_name))
- : x_new_font (f, SDATA (arg)));
+ ? x_new_fontset (f, fontset_name)
+ : x_new_fontset (f, arg));
UNBLOCK_INPUT;
if (EQ (result, Qnil))
error ("Font `%s' is not defined", SDATA (arg));
else if (EQ (result, Qt))
- error ("The characters of the given font have varying widths");
+ error ("The default fontset can't be used for a frame font");
else if (STRINGP (result))
{
if (STRINGP (fontset_name))
@@ -3080,10 +3080,10 @@ x_set_font (f, arg, oldval)
if (old_fontset == FRAME_FONTSET (f))
return;
}
- else if (!NILP (Fequal (result, oldval)))
+ store_frame_param (f, Qfont, result);
+ if (!NILP (Fequal (result, oldval)))
return;
- store_frame_param (f, Qfont, result);
recompute_basic_faces (f);
}
else
diff --git a/src/frame.h b/src/frame.h
index a2ca24a014d..b4ddf5136d5 100644
--- a/src/frame.h
+++ b/src/frame.h
@@ -1034,7 +1034,7 @@ extern void x_set_offset P_ ((struct frame *, int, int, int));
extern void x_wm_set_icon_position P_ ((struct frame *, int, int));
extern Lisp_Object x_new_font P_ ((struct frame *, char *));
-extern Lisp_Object x_new_fontset P_ ((struct frame *, char *));
+extern Lisp_Object x_new_fontset P_ ((struct frame *, Lisp_Object));
/* These are in frame.c */
diff --git a/src/indent.c b/src/indent.c
index e21c9a2b867..86984d4902d 100644
--- a/src/indent.c
+++ b/src/indent.c
@@ -22,7 +22,7 @@ Boston, MA 02111-1307, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "category.h"
#include "indent.h"
#include "keyboard.h"
@@ -323,7 +323,7 @@ check_composition (pos, pos_byte, point, len, len_byte, width)
if (dp != 0 && VECTORP (DISP_CHAR_VECTOR (dp, c))) \
width = XVECTOR (DISP_CHAR_VECTOR (dp, c))->size; \
else \
- width = WIDTH_BY_CHAR_HEAD (*p); \
+ width = CHAR_WIDTH (c); \
if (width > 1) \
wide_column = width; \
} \
diff --git a/src/insdel.c b/src/insdel.c
index 5becd5d9163..2d9befb677d 100644
--- a/src/insdel.c
+++ b/src/insdel.c
@@ -24,7 +24,7 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "intervals.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "window.h"
#include "blockinput.h"
#include "region-cache.h"
@@ -655,22 +655,11 @@ copy_text (from_addr, to_addr, nbytes,
int bytes_left = nbytes;
Lisp_Object tbl = Qnil;
- /* We set the variable tbl to the reverse table of
- Vnonascii_translation_table in advance. */
- if (CHAR_TABLE_P (Vnonascii_translation_table))
- {
- tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
- make_number (0));
- if (!CHAR_TABLE_P (tbl))
- tbl = Qnil;
- }
-
- /* Convert multibyte to single byte. */
while (bytes_left > 0)
{
int thislen, c;
c = STRING_CHAR_AND_LENGTH (from_addr, bytes_left, thislen);
- if (!SINGLE_BYTE_CHAR_P (c))
+ if (!ASCII_CHAR_P (c))
c = multibyte_char_to_unibyte (c, tbl);
*to_addr++ = c;
from_addr += thislen;
@@ -1164,6 +1153,47 @@ insert_from_string_1 (string, pos, pos_byte, nchars, nbytes,
current_buffer, inherit);
adjust_point (nchars, outgoing_nbytes);
+
+ CHECK_MARKERS ();
+}
+
+/* Insert a sequence of NCHARS chars which occupy NBYTES bytes
+ starting at GPT_ADDR. */
+
+void
+insert_from_gap (nchars, nbytes)
+ register int nchars, nbytes;
+{
+ if (NILP (current_buffer->enable_multibyte_characters))
+ nchars = nbytes;
+
+ record_insert (GPT, nchars);
+ MODIFF++;
+
+ GAP_SIZE -= nbytes;
+ GPT += nchars;
+ ZV += nchars;
+ Z += nchars;
+ GPT_BYTE += nbytes;
+ ZV_BYTE += nbytes;
+ Z_BYTE += nbytes;
+ if (GAP_SIZE > 0) *(GPT_ADDR) = 0; /* Put an anchor. */
+
+ if (GPT_BYTE < GPT)
+ abort ();
+
+ adjust_overlays_for_insert (GPT, nchars);
+ adjust_markers_for_insert (GPT, GPT_BYTE,
+ GPT + nchars, GPT_BYTE + nbytes,
+ 0);
+
+ if (BUF_INTERVALS (current_buffer) != 0)
+ offset_intervals (current_buffer, GPT, nchars);
+
+ if (GPT - nchars < PT)
+ adjust_point (nchars, nbytes);
+
+ CHECK_MARKERS ();
}
/* Insert text from BUF, NCHARS characters starting at CHARPOS, into the
diff --git a/src/keyboard.c b/src/keyboard.c
index ba137f67cb6..449c8a1b453 100644
--- a/src/keyboard.c
+++ b/src/keyboard.c
@@ -32,7 +32,7 @@ Boston, MA 02111-1307, USA. */
#include "window.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "disptab.h"
#include "dispextern.h"
#include "syntax.h"
@@ -1633,7 +1633,7 @@ command_loop_1 ()
: (lose >= 0x20 && lose < 0x7f))
/* To extract the case of continuation on
wide-column characters. */
- && (WIDTH_BY_CHAR_HEAD (FETCH_BYTE (PT_BYTE)) == 1)
+ && ASCII_BYTE_P (lose)
&& (XFASTINT (XWINDOW (selected_window)->last_modified)
>= MODIFF)
&& (XFASTINT (XWINDOW (selected_window)->last_overlay_modified)
@@ -1690,7 +1690,7 @@ command_loop_1 ()
{
unsigned int c
= translate_char (Vtranslation_table_for_input,
- XFASTINT (last_command_char), 0, 0, 0);
+ XFASTINT (last_command_char));
int value;
if (NILP (Vexecuting_macro)
&& !EQ (minibuf_window, selected_window))
@@ -2920,8 +2920,7 @@ read_char (commandflag, nmaps, maps, prev_event, used_mouse_menu)
&& SCHARS (Vkeyboard_translate_table) > (unsigned) XFASTINT (c))
|| (VECTORP (Vkeyboard_translate_table)
&& XVECTOR (Vkeyboard_translate_table)->size > (unsigned) XFASTINT (c))
- || (CHAR_TABLE_P (Vkeyboard_translate_table)
- && CHAR_VALID_P (XINT (c), 0)))
+ || CHAR_TABLE_P (Vkeyboard_translate_table))
{
Lisp_Object d;
d = Faref (Vkeyboard_translate_table, c);
@@ -6327,6 +6326,8 @@ modify_event_symbol (symbol_num, modifiers, symbol_kind, name_alist_or_stem,
else if (sizeof (long) == sizeof (EMACS_INT))
sprintf (buf, "%s-%ld", SDATA (name_alist_or_stem),
XINT (symbol_int) + 1);
+ else
+ abort ();
value = intern (buf);
}
else if (name_table != 0 && name_table[symbol_num])
@@ -9376,9 +9377,8 @@ read_key_sequence (keybuf, bufsize, prompt, dont_downcase_last,
if (first_binding >= nmaps
&& fkey.start >= t && keytran.start >= t
&& INTEGERP (key)
- && ((((XINT (key) & 0x3ffff)
- < XCHAR_TABLE (current_buffer->downcase_table)->size)
- && UPPERCASEP (XINT (key) & 0x3ffff))
+ && ((CHARACTERP (make_number (XINT (key) & ~CHAR_MODIFIER_MASK))
+ && UPPERCASEP (XINT (key) & ~CHAR_MODIFIER_MASK))
|| (XINT (key) & shift_modifier)))
{
Lisp_Object new_key;
@@ -9389,8 +9389,8 @@ read_key_sequence (keybuf, bufsize, prompt, dont_downcase_last,
if (XINT (key) & shift_modifier)
XSETINT (new_key, XINT (key) & ~shift_modifier);
else
- XSETINT (new_key, (DOWNCASE (XINT (key) & 0x3ffff)
- | (XINT (key) & ~0x3ffff)));
+ XSETINT (new_key, (DOWNCASE (XINT (key) & ~CHAR_MODIFIER_MASK)
+ | (XINT (key) & ~CHAR_MODIFIER_MASK)));
/* We have to do this unconditionally, regardless of whether
the lower-case char is defined in the keymaps, because they
diff --git a/src/keymap.c b/src/keymap.c
index 64f849f7845..c274183de49 100644
--- a/src/keymap.c
+++ b/src/keymap.c
@@ -25,6 +25,7 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "keyboard.h"
#include "termhooks.h"
@@ -413,8 +414,7 @@ PARENT should be nil or another keymap. */)
Lisp_Object indices[3];
map_char_table (fix_submap_inheritance, Qnil,
- XCAR (list), XCAR (list),
- keymap, 0, indices);
+ XCAR (list), keymap);
}
}
@@ -550,9 +550,7 @@ access_keymap (map, idx, t_ok, noinherit, autoload)
GCPRO4 (map, tail, idx, t_binding);
- /* If `t_ok' is 2, both `t' and generic-char bindings are accepted.
- If it is 1, only generic-char bindings are accepted.
- Otherwise, neither are. */
+ /* If `t_ok' is 2, both `t' is accepted. */
t_ok = t_ok ? 2 : 0;
for (tail = XCDR (map);
@@ -576,24 +574,6 @@ access_keymap (map, idx, t_ok, noinherit, autoload)
if (EQ (key, idx))
val = XCDR (binding);
- else if (t_ok
- && INTEGERP (idx)
- && (XINT (idx) & CHAR_MODIFIER_MASK) == 0
- && INTEGERP (key)
- && (XINT (key) & CHAR_MODIFIER_MASK) == 0
- && !SINGLE_BYTE_CHAR_P (XINT (idx))
- && !SINGLE_BYTE_CHAR_P (XINT (key))
- && CHAR_VALID_P (XINT (key), 1)
- && !CHAR_VALID_P (XINT (key), 0)
- && (CHAR_CHARSET (XINT (key))
- == CHAR_CHARSET (XINT (idx))))
- {
- /* KEY is the generic character of the charset of IDX.
- Use KEY's binding if there isn't a binding for IDX
- itself. */
- t_binding = XCDR (binding);
- t_ok = 0;
- }
else if (t_ok > 1 && EQ (key, Qt))
{
t_binding = XCDR (binding);
@@ -687,7 +667,7 @@ map_keymap (map, fun, args, data, autoload)
tail = XCDR (tail))
{
Lisp_Object binding = XCAR (tail);
-
+
if (CONSP (binding))
map_keymap_item (fun, args, XCAR (binding), XCDR (binding), data);
else if (VECTORP (binding))
@@ -706,11 +686,10 @@ map_keymap (map, fun, args, data, autoload)
else if (CHAR_TABLE_P (binding))
{
Lisp_Object indices[3];
- map_char_table (map_keymap_char_table_item, Qnil, binding, binding,
+ map_char_table (map_keymap_char_table_item, Qnil, binding,
Fcons (make_save_value (fun, 0),
Fcons (make_save_value (data, 0),
- args)),
- 0, indices);
+ args)));
}
}
UNGCPRO;
@@ -906,6 +885,11 @@ store_in_keymap (keymap, idx, def)
NILP (def) ? Qt : def);
return def;
}
+ else if (CONSP (idx) && CHARACTERP (XCAR (idx)))
+ {
+ Fset_char_table_range (elt, idx, NILP (def) ? Qt : def);
+ return def;
+ }
insertion_point = tail;
}
else if (CONSP (elt))
@@ -1016,7 +1000,7 @@ static void
copy_keymap_1 (chartable, idx, elt)
Lisp_Object chartable, idx, elt;
{
- Faset (chartable, idx, copy_keymap_item (elt));
+ Fset_char_table_range (chartable, idx, copy_keymap_item (elt));
}
DEFUN ("copy-keymap", Fcopy_keymap, Scopy_keymap, 1, 1, 0,
@@ -1041,7 +1025,7 @@ is not copied. */)
{
Lisp_Object indices[3];
elt = Fcopy_sequence (elt);
- map_char_table (copy_keymap_1, Qnil, elt, elt, elt, 0, indices);
+ map_char_table (copy_keymap_1, Qnil, elt, elt);
}
else if (VECTORP (elt))
{
@@ -1122,8 +1106,15 @@ binding KEY to DEF is added at the front of KEYMAP. */)
{
c = Faref (key, make_number (idx));
- if (CONSP (c) && lucid_event_type_list_p (c))
- c = Fevent_convert_list (c);
+ if (CONSP (c))
+ {
+ /* C may be a cons (FROM . TO) specifying a range of
+ characters. */
+ if (CHARACTERP (XCAR (c)))
+ CHECK_CHARACTER_CDR (c);
+ else if (lucid_event_type_list_p (c))
+ c = Fevent_convert_list (c);
+ }
if (SYMBOLP (c))
silly_event_symbol_error (c);
@@ -1144,7 +1135,10 @@ binding KEY to DEF is added at the front of KEYMAP. */)
idx++;
}
- if (!INTEGERP (c) && !SYMBOLP (c) && !CONSP (c))
+ if (!INTEGERP (c) && !SYMBOLP (c)
+ && (!CONSP (c)
+ /* If C is a range, it must be a leaf. */
+ || (INTEGERP (XCAR (c)) && idx != length)))
error ("Key sequence contains invalid event");
if (idx == length)
@@ -1792,9 +1786,9 @@ accessible_keymaps_1 (key, cmd, maps, tail, thisseq, is_metized)
int meta_bit = meta_modifier;
Lisp_Object last = make_number (XINT (Flength (thisseq)) - 1);
tem = Fcopy_sequence (thisseq);
-
+
Faset (tem, last, make_number (XINT (key) | meta_bit));
-
+
/* This new sequence is the same length as
thisseq, so stick it in the list right
after this one. */
@@ -1915,10 +1909,9 @@ then the value includes only maps for prefixes that start with PREFIX. */)
{
Lisp_Object indices[3];
- map_char_table (accessible_keymaps_char_table, Qnil, elt,
+ map_char_table (accessible_keymaps_char_table, Qnil,
elt, Fcons (Fcons (maps, make_number (is_metized)),
- Fcons (tail, thisseq)),
- 0, indices);
+ Fcons (tail, thisseq)));
}
else if (VECTORP (elt))
{
@@ -2115,30 +2108,24 @@ push_key_description (c, p, force_multibyte)
{
*p++ = c;
}
+ else if (CHARACTERP (make_number (c)))
+ {
+ if (NILP (current_buffer->enable_multibyte_characters)
+ && ! force_multibyte)
+ *p++ = multibyte_char_to_unibyte (c, Qnil);
+ else
+ p += CHAR_STRING (c, (unsigned char *) p);
+ }
else
{
- int valid_p = SINGLE_BYTE_CHAR_P (c) || char_valid_p (c, 0);
-
- if (force_multibyte && valid_p)
+ int bit_offset;
+ *p++ = '\\';
+ /* The biggest character code uses 22 bits. */
+ for (bit_offset = 21; bit_offset >= 0; bit_offset -= 3)
{
- if (SINGLE_BYTE_CHAR_P (c))
- c = unibyte_char_to_multibyte (c);
- p += CHAR_STRING (c, p);
- }
- else if (NILP (current_buffer->enable_multibyte_characters)
- || valid_p)
- {
- int bit_offset;
- *p++ = '\\';
- /* The biggest character code uses 19 bits. */
- for (bit_offset = 18; bit_offset >= 0; bit_offset -= 3)
- {
- if (c >= (1 << bit_offset))
- *p++ = ((c & (7 << bit_offset)) >> bit_offset) + '0';
- }
+ if (c >= (1 << bit_offset))
+ *p++ = ((c & (7 << bit_offset)) >> bit_offset) + '0';
}
- else
- p += CHAR_STRING (c, p);
}
return p;
@@ -2162,43 +2149,10 @@ around function keys and event symbols. */)
if (INTEGERP (key)) /* Normal character */
{
- unsigned int charset, c1, c2;
- int without_bits = XINT (key) & ~((-1) << CHARACTERBITS);
-
- if (SINGLE_BYTE_CHAR_P (without_bits))
- charset = 0;
- else
- SPLIT_CHAR (without_bits, charset, c1, c2);
+ char tem[KEY_DESCRIPTION_SIZE];
- if (charset
- && CHARSET_DEFINED_P (charset)
- && ((c1 >= 0 && c1 < 32)
- || (c2 >= 0 && c2 < 32)))
- {
- /* Handle a generic character. */
- Lisp_Object name;
- name = CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX);
- CHECK_STRING (name);
- return concat2 (build_string ("Character set "), name);
- }
- else
- {
- char tem[KEY_DESCRIPTION_SIZE], *end;
- int nbytes, nchars;
- Lisp_Object string;
-
- end = push_key_description (XUINT (key), tem, 1);
- nbytes = end - tem;
- nchars = multibyte_chars_in_text (tem, nbytes);
- if (nchars == nbytes)
- {
- *end = '\0';
- string = build_string (tem);
- }
- else
- string = make_multibyte_string (tem, nchars, nbytes);
- return string;
- }
+ *push_key_description (XUINT (key), tem, 1) = 0;
+ return build_string (tem);
}
else if (SYMBOLP (key)) /* Function key or event-symbol */
{
@@ -2260,7 +2214,7 @@ Control characters turn into "^char", etc. */)
CHECK_NUMBER (character);
c = XINT (character);
- if (!SINGLE_BYTE_CHAR_P (c))
+ if (!ASCII_CHAR_P (c))
{
int len = CHAR_STRING (c, str);
@@ -2432,8 +2386,7 @@ where_is_internal (definition, keymaps, firstonly, noindirect, no_remap)
Fcons (Fcons (this, last),
Fcons (make_number (nomenus),
make_number (last_is_meta))));
- map_char_table (where_is_internal_2, Qnil, elt, elt, args,
- 0, indices);
+ map_char_table (where_is_internal_2, Qnil, elt, args);
sequences = XCDR (XCAR (args));
}
else if (CONSP (elt))
@@ -3246,11 +3199,10 @@ This is text showing the elements of vector matched against indices. */)
If the definition in effect in the whole map does not match
the one in this vector, we ignore this one.
- When describing a sub-char-table, INDICES is a list of
- indices at higher levels in this char-table,
- and CHAR_TABLE_DEPTH says how many levels down we have gone.
+ ARGS is simply passed as the second argument to ELT_DESCRIBER.
- ARGS is simply passed as the second argument to ELT_DESCRIBER. */
+ INDICES and CHAR_TABLE_DEPTH are ignored. They will be removed in
+ the near future. */
void
describe_vector (vector, elt_prefix, args, elt_describer,
@@ -3267,24 +3219,18 @@ describe_vector (vector, elt_prefix, args, elt_describer,
{
Lisp_Object definition;
Lisp_Object tem2;
- register int i;
+ int i;
Lisp_Object suppress;
Lisp_Object kludge;
- int first = 1;
struct gcpro gcpro1, gcpro2, gcpro3;
/* Range of elements to be handled. */
int from, to;
- /* A flag to tell if a leaf in this level of char-table is not a
- generic character (i.e. a complete multibyte character). */
- int complete_char;
- int character;
+ Lisp_Object character;
int starting_i;
+ int first = 1;
suppress = Qnil;
- if (indices == 0)
- indices = (int *) alloca (3 * sizeof (int));
-
definition = Qnil;
/* This vector gets used to present single keys to Flookup_key. Since
@@ -3296,60 +3242,23 @@ describe_vector (vector, elt_prefix, args, elt_describer,
if (partial)
suppress = intern ("suppress-keymap");
- if (CHAR_TABLE_P (vector))
- {
- if (char_table_depth == 0)
- {
- /* VECTOR is a top level char-table. */
- complete_char = 1;
- from = 0;
- to = CHAR_TABLE_ORDINARY_SLOTS;
- }
- else
- {
- /* VECTOR is a sub char-table. */
- if (char_table_depth >= 3)
- /* A char-table is never that deep. */
- error ("Too deep char table");
-
- complete_char
- = (CHARSET_VALID_P (indices[0])
- && ((CHARSET_DIMENSION (indices[0]) == 1
- && char_table_depth == 1)
- || char_table_depth == 2));
-
- /* Meaningful elements are from 32th to 127th. */
- from = 32;
- to = SUB_CHAR_TABLE_ORDINARY_SLOTS;
- }
- }
- else
- {
- /* This does the right thing for ordinary vectors. */
-
- complete_char = 1;
- from = 0;
- to = XVECTOR (vector)->size;
- }
+ from = 0;
+ to = CHAR_TABLE_P (vector) ? MAX_CHAR + 1 : XVECTOR (vector)->size;
for (i = from; i < to; i++)
{
- QUIT;
+ int range_beg, range_end;
+ Lisp_Object val;
- if (CHAR_TABLE_P (vector))
- {
- if (char_table_depth == 0 && i >= CHAR_TABLE_SINGLE_BYTE_SLOTS)
- complete_char = 0;
+ QUIT;
- if (i >= CHAR_TABLE_SINGLE_BYTE_SLOTS
- && !CHARSET_DEFINED_P (i - 128))
- continue;
+ starting_i = i;
- definition
- = get_keyelt (XCHAR_TABLE (vector)->contents[i], 0);
- }
+ if (CHAR_TABLE_P (vector))
+ val = char_table_ref_and_range (vector, i, &range_beg, &i);
else
- definition = get_keyelt (AREF (vector, i), 0);
+ val = AREF (vector, i);
+ definition = get_keyelt (val, 0);
if (NILP (definition)) continue;
@@ -3363,33 +3272,14 @@ describe_vector (vector, elt_prefix, args, elt_describer,
if (!NILP (tem)) continue;
}
- /* Set CHARACTER to the character this entry describes, if any.
- Also update *INDICES. */
- if (CHAR_TABLE_P (vector))
- {
- indices[char_table_depth] = i;
-
- if (char_table_depth == 0)
- {
- character = i;
- indices[0] = i - 128;
- }
- else if (complete_char)
- {
- character = MAKE_CHAR (indices[0], indices[1], indices[2]);
- }
- else
- character = 0;
- }
- else
- character = i;
+ character = make_number (starting_i);
/* If this binding is shadowed by some other map, ignore it. */
- if (!NILP (shadow) && complete_char)
+ if (!NILP (shadow))
{
Lisp_Object tem;
- ASET (kludge, 0, make_number (character));
+ ASET (kludge, 0, character);
tem = shadow_lookup (shadow, kludge, Qt);
if (!NILP (tem)) continue;
@@ -3397,11 +3287,11 @@ describe_vector (vector, elt_prefix, args, elt_describer,
/* Ignore this definition if it is shadowed by an earlier
one in the same keymap. */
- if (!NILP (entire_map) && complete_char)
+ if (!NILP (entire_map))
{
Lisp_Object tem;
- ASET (kludge, 0, make_number (character));
+ ASET (kludge, 0, character);
tem = Flookup_key (entire_map, kludge, Qt);
if (!EQ (tem, definition))
@@ -3410,88 +3300,28 @@ describe_vector (vector, elt_prefix, args, elt_describer,
if (first)
{
- if (char_table_depth == 0)
- insert ("\n", 1);
+ insert ("\n", 1);
first = 0;
}
- /* For a sub char-table, show the depth by indentation.
- CHAR_TABLE_DEPTH can be greater than 0 only for a char-table. */
- if (char_table_depth > 0)
- insert (" ", char_table_depth * 2); /* depth is 1 or 2. */
-
/* Output the prefix that applies to every entry in this map. */
if (!NILP (elt_prefix))
insert1 (elt_prefix);
- /* Insert or describe the character this slot is for,
- or a description of what it is for. */
- if (SUB_CHAR_TABLE_P (vector))
- {
- if (complete_char)
- insert_char (character);
- else
- {
- /* We need an octal representation for this block of
- characters. */
- char work[16];
- sprintf (work, "(row %d)", i);
- insert (work, strlen (work));
- }
- }
- else if (CHAR_TABLE_P (vector))
- {
- if (complete_char)
- insert1 (Fsingle_key_description (make_number (character), Qnil));
- else
- {
- /* Print the information for this character set. */
- insert_string ("<");
- tem2 = CHARSET_TABLE_INFO (i - 128, CHARSET_SHORT_NAME_IDX);
- if (STRINGP (tem2))
- insert_from_string (tem2, 0, 0, SCHARS (tem2),
- SBYTES (tem2), 0);
- else
- insert ("?", 1);
- insert (">", 1);
- }
- }
- else
- {
- insert1 (Fsingle_key_description (make_number (character), Qnil));
- }
-
- /* If we find a sub char-table within a char-table,
- scan it recursively; it defines the details for
- a character set or a portion of a character set. */
- if (CHAR_TABLE_P (vector) && SUB_CHAR_TABLE_P (definition))
- {
- insert ("\n", 1);
- describe_vector (definition, elt_prefix, args, elt_describer,
- partial, shadow, entire_map,
- indices, char_table_depth + 1);
- continue;
- }
-
- starting_i = i;
+ insert1 (Fsingle_key_description (character, Qnil));
/* Find all consecutive characters or rows that have the same
definition. But, for elements of a top level char table, if
they are for charsets, we had better describe one by one even
if they have the same definition. */
if (CHAR_TABLE_P (vector))
- {
- int limit = to;
-
- if (char_table_depth == 0)
- limit = CHAR_TABLE_SINGLE_BYTE_SLOTS;
-
- while (i + 1 < limit
- && (tem2 = get_keyelt (XCHAR_TABLE (vector)->contents[i + 1], 0),
- !NILP (tem2))
- && !NILP (Fequal (tem2, definition)))
- i++;
- }
+ while (i + 1 < to
+ && (val = char_table_ref_and_range (vector, i + 1,
+ &range_beg, &range_end),
+ tem2 = get_keyelt (val, 0),
+ !NILP (tem2))
+ && !NILP (Fequal (tem2, definition)))
+ i = range_end;
else
while (i + 1 < to
&& (tem2 = get_keyelt (AREF (vector, i + 1), 0),
@@ -3499,7 +3329,6 @@ describe_vector (vector, elt_prefix, args, elt_describer,
&& !NILP (Fequal (tem2, definition)))
i++;
-
/* If we have a range of more than one character,
print where the range reaches to. */
@@ -3510,31 +3339,7 @@ describe_vector (vector, elt_prefix, args, elt_describer,
if (!NILP (elt_prefix))
insert1 (elt_prefix);
- if (CHAR_TABLE_P (vector))
- {
- if (char_table_depth == 0)
- {
- insert1 (Fsingle_key_description (make_number (i), Qnil));
- }
- else if (complete_char)
- {
- indices[char_table_depth] = i;
- character = MAKE_CHAR (indices[0], indices[1], indices[2]);
- insert_char (character);
- }
- else
- {
- /* We need an octal representation for this block of
- characters. */
- char work[16];
- sprintf (work, "(row %d)", i);
- insert (work, strlen (work));
- }
- }
- else
- {
- insert1 (Fsingle_key_description (make_number (i), Qnil));
- }
+ insert1 (Fsingle_key_description (make_number (i), Qnil));
}
/* Print a description of the definition of this character.
@@ -3543,14 +3348,6 @@ describe_vector (vector, elt_prefix, args, elt_describer,
(*elt_describer) (definition, args);
}
- /* For (sub) char-table, print `defalt' slot at last. */
- if (CHAR_TABLE_P (vector) && !NILP (XCHAR_TABLE (vector)->defalt))
- {
- insert (" ", char_table_depth * 2);
- insert_string ("<<default>>");
- (*elt_describer) (XCHAR_TABLE (vector)->defalt, args);
- }
-
UNGCPRO;
}
diff --git a/src/lisp.h b/src/lisp.h
index 9a80cb774fc..717db80e558 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -297,7 +297,8 @@ enum pvec_type
PVEC_BOOL_VECTOR = 0x10000,
PVEC_BUFFER = 0x20000,
PVEC_HASH_TABLE = 0x40000,
- PVEC_TYPE_MASK = 0x7fe00
+ PVEC_SUB_CHAR_TABLE = 0x80000,
+ PVEC_TYPE_MASK = 0x0ffe00
#if 0 /* This is used to make the value of PSEUDOVECTOR_FLAG available to
GDB. It doesn't work on OS Alpha. Moved to a variable in
@@ -496,6 +497,7 @@ extern size_t pure_size;
#define XSUBR(a) (eassert (GC_SUBRP(a)),(struct Lisp_Subr *) XPNTR(a))
#define XBUFFER(a) (eassert (GC_BUFFERP(a)),(struct buffer *) XPNTR(a))
#define XCHAR_TABLE(a) ((struct Lisp_Char_Table *) XPNTR(a))
+#define XSUB_CHAR_TABLE(a) ((struct Lisp_Sub_Char_Table *) XPNTR(a))
#define XBOOL_VECTOR(a) ((struct Lisp_Bool_Vector *) XPNTR(a))
/* Construct a Lisp_Object from a value or address. */
@@ -525,6 +527,7 @@ extern size_t pure_size;
#define XSETBUFFER(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_BUFFER))
#define XSETCHAR_TABLE(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_CHAR_TABLE))
#define XSETBOOL_VECTOR(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_BOOL_VECTOR))
+#define XSETSUB_CHAR_TABLE(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_SUB_CHAR_TABLE))
/* Convenience macros for dealing with Lisp arrays. */
@@ -665,41 +668,20 @@ struct Lisp_Vector
Lisp_Object contents[1];
};
-/* A char table is a kind of vectorlike, with contents are like a
+/* A char-table is a kind of vectorlike, with contents are like a
vector but with a few other slots. For some purposes, it makes
- sense to handle a chartable with type struct Lisp_Vector. An
+ sense to handle a char-table with type struct Lisp_Vector. An
element of a char table can be any Lisp objects, but if it is a sub
char-table, we treat it a table that contains information of a
- group of characters of the same charsets or a specific character of
- a charset. A sub char-table has the same structure as a char table
- except for that the former omits several slots at the tail. A sub
- char table appears only in an element of a char table, and there's
- no way to access it directly from Emacs Lisp program. */
-
-/* This is the number of slots that apply to characters or character
- sets. The first 128 are for ASCII, the next 128 are for 8-bit
- European characters, and the last 128 are for multibyte characters.
- The first 256 are indexed by the code itself, but the last 128 are
- indexed by (charset-id + 128). */
-#define CHAR_TABLE_ORDINARY_SLOTS 384
-
-/* This is the number of slots that apply to characters of ASCII and
- 8-bit Europeans only. */
-#define CHAR_TABLE_SINGLE_BYTE_SLOTS 256
+ specific range of characters. A sub char-table has the same
+ structure as a vector. A sub char table appears only in an element
+ of a char-table, and there's no way to access it directly from
+ Emacs Lisp program. */
/* This is the number of slots that every char table must have. This
counts the ordinary slots and the top, defalt, parent, and purpose
slots. */
-#define CHAR_TABLE_STANDARD_SLOTS (CHAR_TABLE_ORDINARY_SLOTS + 4)
-
-/* This is the number of slots that apply to position-code-1 and
- position-code-2 of a multibyte character at the 2nd and 3rd level
- sub char tables respectively. */
-#define SUB_CHAR_TABLE_ORDINARY_SLOTS 128
-
-/* This is the number of slots that every sub char table must have.
- This counts the ordinary slots and the top and defalt slot. */
-#define SUB_CHAR_TABLE_STANDARD_SLOTS (SUB_CHAR_TABLE_ORDINARY_SLOTS + 2)
+#define CHAR_TABLE_STANDARD_SLOTS (VECSIZE (struct Lisp_Char_Table) - 1)
/* Return the number of "extra" slots in the char table CT. */
@@ -707,14 +689,13 @@ struct Lisp_Vector
(((CT)->size & PSEUDOVECTOR_SIZE_MASK) - CHAR_TABLE_STANDARD_SLOTS)
/* Almost equivalent to Faref (CT, IDX) with optimization for ASCII
- and 8-bit Europeans characters. For these characters, do not check
- validity of CT. Do not follow parent. */
-#define CHAR_TABLE_REF(CT, IDX) \
- ((IDX) >= 0 && (IDX) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
- ? (!NILP (XCHAR_TABLE (CT)->contents[IDX]) \
- ? XCHAR_TABLE (CT)->contents[IDX] \
- : XCHAR_TABLE (CT)->defalt) \
- : Faref (CT, make_number (IDX)))
+ characters. Do not check validity of CT. */
+#define CHAR_TABLE_REF(CT, IDX) \
+ ((ASCII_CHAR_P (IDX) \
+ && SUB_CHAR_TABLE_P (XCHAR_TABLE (CT)->ascii) \
+ && !NILP (XSUB_CHAR_TABLE (XCHAR_TABLE (CT)->ascii)->contents[IDX])) \
+ ? XSUB_CHAR_TABLE (XCHAR_TABLE (CT)->ascii)->contents[IDX] \
+ : char_table_ref ((CT), (IDX)))
/* Almost equivalent to Faref (CT, IDX) with optimization for ASCII
and 8-bit Europeans characters. However, if the result is nil,
@@ -722,55 +703,79 @@ struct Lisp_Vector
For these characters, do not check validity of CT
and do not follow parent. */
-#define CHAR_TABLE_TRANSLATE(CT, IDX) \
- ((IDX) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
- ? (!NILP (XCHAR_TABLE (CT)->contents[IDX]) \
- ? XINT (XCHAR_TABLE (CT)->contents[IDX]) \
- : IDX) \
- : char_table_translate (CT, IDX))
+#define CHAR_TABLE_TRANSLATE(CT, IDX) \
+ char_table_translate (CT, IDX)
/* Equivalent to Faset (CT, IDX, VAL) with optimization for ASCII and
- 8-bit Europeans characters. Do not check validity of CT. */
-#define CHAR_TABLE_SET(CT, IDX, VAL) \
- do { \
- if (XFASTINT (IDX) < CHAR_TABLE_SINGLE_BYTE_SLOTS) \
- XCHAR_TABLE (CT)->contents[XFASTINT (IDX)] = VAL; \
- else \
- Faset (CT, IDX, VAL); \
- } while (0)
+ 8-bit European characters. Do not check validity of CT. */
+#define CHAR_TABLE_SET(CT, IDX, VAL) \
+ (((IDX) >= 0 && ASCII_CHAR_P (IDX) \
+ && SUB_CHAR_TABLE_P (XCHAR_TABLE (CT)->ascii)) \
+ ? XSUB_CHAR_TABLE (XCHAR_TABLE (CT)->ascii)->contents[IDX] = VAL \
+ : char_table_set (CT, IDX, VAL))
+
+#define CHARTAB_SIZE_BITS_0 6
+#define CHARTAB_SIZE_BITS_1 4
+#define CHARTAB_SIZE_BITS_2 5
+#define CHARTAB_SIZE_BITS_3 7
+
+extern const int chartab_size[4];
+
+struct Lisp_Sub_Char_Table;
struct Lisp_Char_Table
{
/* This is the vector's size field, which also holds the
- pseudovector type information. It holds the size, too.
- The size counts the top, defalt, purpose, and parent slots.
- The last three are not counted if this is a sub char table. */
+ pseudovector type information. It holds the size, too. The size
+ counts the defalt, parent, purpose, ascii, contents, and extras
+ slots. */
EMACS_INT size;
struct Lisp_Vector *next;
- /* This holds a flag to tell if this is a top level char table (t)
- or a sub char table (nil). */
- Lisp_Object top;
+
/* This holds a default value,
which is used whenever the value for a specific character is nil. */
Lisp_Object defalt;
- /* This holds an actual value of each element. A sub char table
- has only SUB_CHAR_TABLE_ORDINARY_SLOTS number of elements. */
- Lisp_Object contents[CHAR_TABLE_ORDINARY_SLOTS];
-
- /* A sub char table doesn't has the following slots. */
- /* This points to another char table, which we inherit from
- when the value for a specific character is nil.
- The `defalt' slot takes precedence over this. */
+ /* This points to another char table, which we inherit from when the
+ value for a specific character is nil. The `defalt' slot takes
+ precedence over this. */
Lisp_Object parent;
- /* This should be a symbol which says what kind of use
- this char-table is meant for.
- Typically now the values can be `syntax-table' and `display-table'. */
+
+ /* This is a symbol which says what kind of use this char-table is
+ meant for. */
Lisp_Object purpose;
- /* These hold additional data. */
+
+ /* The bottom sub char-table for characters of the range 0..127. It
+ is nil if none of ASCII character has a specific value. */
+ Lisp_Object ascii;
+
+ Lisp_Object contents[(1 << CHARTAB_SIZE_BITS_0)];
+
+ /* These hold additional data. It is a vector. */
Lisp_Object extras[1];
};
+struct Lisp_Sub_Char_Table
+ {
+ /* This is the vector's size field, which also holds the
+ pseudovector type information. It holds the size, too. */
+ EMACS_INT size;
+ struct Lisp_Vector *next;
+
+ /* Depth of this sub char-table. It should be 1, 2, or 3. A sub
+ char-table of depth 1 contains 16 elments, and each element
+ covers 4096 (128*32) characters. A sub char-table of depth 2
+ contains 32 elements, and each element covers 128 characters. A
+ sub char-table of depth 3 contains 128 elements, and each element
+ is for one character. */
+ Lisp_Object depth;
+
+ /* Minimum character covered by the sub char-table. */
+ Lisp_Object min_char;
+
+ Lisp_Object contents[1];
+ };
+
/* A boolvector is a kind of vectorlike, with contents are like a string. */
struct Lisp_Bool_Vector
{
@@ -1264,9 +1269,9 @@ typedef unsigned char UCHAR;
(CHAR_ALT | CHAR_SUPER | CHAR_HYPER | CHAR_SHIFT | CHAR_CTL | CHAR_META)
-/* Actually, the current Emacs uses 19 bits for the character value
+/* Actually, the current Emacs uses 22 bits for the character value
itself. */
-#define CHARACTERBITS 19
+#define CHARACTERBITS 2
/* The maximum byte size consumed by push_key_description.
All callers should assure that at least this size of memory is
@@ -1322,9 +1327,9 @@ typedef unsigned char UCHAR;
#define GLYPH int
/* Mask bits for face. */
-#define GLYPH_MASK_FACE 0x7FF80000
+#define GLYPH_MASK_FACE 0x7FC00000
/* Mask bits for character code. */
-#define GLYPH_MASK_CHAR 0x0007FFFF /* The lowest 19 bits */
+#define GLYPH_MASK_CHAR 0x003FFFFF /* The lowest 22 bits */
/* The FAST macros assume that we already know we're in an X window. */
@@ -1422,13 +1427,14 @@ typedef unsigned char UCHAR;
#define BUFFERP(x) PSEUDOVECTORP (x, PVEC_BUFFER)
#define GC_BUFFERP(x) GC_PSEUDOVECTORP (x, PVEC_BUFFER)
#define CHAR_TABLE_P(x) PSEUDOVECTORP (x, PVEC_CHAR_TABLE)
+#define SUB_CHAR_TABLE_P(x) PSEUDOVECTORP (x, PVEC_SUB_CHAR_TABLE)
+#define GC_SUB_CHAR_TABLE_P(x) GC_PSEUDOVECTORP (x, PVEC_SUB_CHAR_TABLE)
#define GC_CHAR_TABLE_P(x) GC_PSEUDOVECTORP (x, PVEC_CHAR_TABLE)
#define BOOL_VECTOR_P(x) PSEUDOVECTORP (x, PVEC_BOOL_VECTOR)
#define GC_BOOL_VECTOR_P(x) GC_PSEUDOVECTORP (x, PVEC_BOOL_VECTOR)
#define FRAMEP(x) PSEUDOVECTORP (x, PVEC_FRAME)
#define GC_FRAMEP(x) GC_PSEUDOVECTORP (x, PVEC_FRAME)
-#define SUB_CHAR_TABLE_P(x) (CHAR_TABLE_P (x) && NILP (XCHAR_TABLE (x)->top))
#define EQ(x, y) (XFASTINT (x) == XFASTINT (y))
#define GC_EQ(x, y) (XGCTYPE (x) == XGCTYPE (y) && XPNTR (x) == XPNTR (y))
@@ -1530,6 +1536,20 @@ typedef unsigned char UCHAR;
XSETCDR ((x), tmp); \
} while (0)
+#define CHECK_NATNUM_CAR(x) \
+ do { \
+ Lisp_Object tmp = XCAR (x); \
+ CHECK_NATNUM (tmp); \
+ XSETCAR ((x), tmp); \
+ } while (0)
+
+#define CHECK_NATNUM_CDR(x) \
+ do { \
+ Lisp_Object tmp = XCDR (x); \
+ CHECK_NATNUM (tmp); \
+ XSETCDR ((x), tmp); \
+ } while (0)
+
/* Cast pointers to this type to compare them. Some machines want int. */
#ifndef PNTR_COMPARISON_TYPE
#define PNTR_COMPARISON_TYPE EMACS_UINT
@@ -2132,29 +2152,34 @@ EXFUN (Fread_coding_system, 2);
EXFUN (Fread_non_nil_coding_system, 1);
EXFUN (Ffind_operation_coding_system, MANY);
EXFUN (Fupdate_coding_systems_internal, 0);
-EXFUN (Fencode_coding_string, 3);
-EXFUN (Fdecode_coding_string, 3);
+EXFUN (Fencode_coding_string, 4);
+EXFUN (Fdecode_coding_string, 4);
extern Lisp_Object detect_coding_system P_ ((const unsigned char *, int, int,
- int));
+ int, Lisp_Object));
extern void init_coding P_ ((void));
extern void init_coding_once P_ ((void));
extern void syms_of_coding P_ ((void));
-extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
- int));
+
+/* Defined in character.c */
+extern void init_character_once P_ ((void));
+extern void syms_of_character P_ ((void));
/* Defined in charset.c */
-extern EMACS_INT nonascii_insert_offset;
-extern Lisp_Object Vnonascii_translation_table;
EXFUN (Fchar_bytes, 1);
EXFUN (Fchar_width, 1);
EXFUN (Fstring, MANY);
extern int chars_in_text P_ ((const unsigned char *, int));
extern int multibyte_chars_in_text P_ ((const unsigned char *, int));
-extern int unibyte_char_to_multibyte P_ ((int));
extern int multibyte_char_to_unibyte P_ ((int, Lisp_Object));
extern Lisp_Object Qcharset;
+extern void init_charset P_ ((void));
extern void init_charset_once P_ ((void));
extern void syms_of_charset P_ ((void));
+/* Structure forward declarations. */
+struct charset;
+
+/* Defined in composite.c */
+extern void syms_of_composite P_ ((void));
/* Defined in syntax.c */
EXFUN (Fforward_word, 1);
@@ -2172,9 +2197,8 @@ extern int next_almost_prime P_ ((int));
extern Lisp_Object larger_vector P_ ((Lisp_Object, int, Lisp_Object));
extern void sweep_weak_hash_tables P_ ((void));
extern Lisp_Object Qstring_lessp;
-EXFUN (Foptimize_char_table, 1);
extern Lisp_Object Vfeatures;
-extern Lisp_Object QCtest, QCweakness, Qequal;
+extern Lisp_Object QCtest, QCweakness, Qequal, Qeq;
unsigned sxhash P_ ((Lisp_Object, int));
Lisp_Object make_hash_table P_ ((Lisp_Object, Lisp_Object, Lisp_Object,
Lisp_Object, Lisp_Object, Lisp_Object,
@@ -2189,6 +2213,7 @@ void remove_hash_entry P_ ((struct Lisp_Hash_Table *, int));
extern void init_fns P_ ((void));
EXFUN (Fsxhash, 1);
EXFUN (Fmake_hash_table, MANY);
+EXFUN (Fmakehash, 1);
EXFUN (Fcopy_hash_table, 1);
EXFUN (Fhash_table_count, 1);
EXFUN (Fhash_table_rehash_size, 1);
@@ -2255,18 +2280,10 @@ EXFUN (Fcopy_alist, 1);
EXFUN (Fplist_get, 2);
EXFUN (Fplist_put, 3);
EXFUN (Fplist_member, 2);
-EXFUN (Fset_char_table_parent, 2);
-EXFUN (Fchar_table_extra_slot, 2);
-EXFUN (Fset_char_table_extra_slot, 3);
EXFUN (Frassoc, 2);
EXFUN (Fstring_equal, 2);
EXFUN (Fcompare_strings, 7);
EXFUN (Fstring_lessp, 2);
-extern int char_table_translate P_ ((Lisp_Object, int));
-extern void map_char_table P_ ((void (*) (Lisp_Object, Lisp_Object, Lisp_Object),
- Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, int,
- Lisp_Object *));
-extern Lisp_Object char_table_ref_and_index P_ ((Lisp_Object, int, int *));
extern void syms_of_fns P_ ((void));
/* Defined in floatfns.c */
@@ -2289,6 +2306,7 @@ extern void insert P_ ((const unsigned char *, int));
extern void insert_and_inherit P_ ((const unsigned char *, int));
extern void insert_1 P_ ((const unsigned char *, int, int, int, int));
extern void insert_1_both P_ ((const unsigned char *, int, int, int, int, int));
+extern void insert_from_gap P_ ((int, int));
extern void insert_from_string P_ ((Lisp_Object, int, int, int, int, int));
extern void insert_from_buffer P_ ((struct buffer *, int, int, int));
extern void insert_char P_ ((int));
@@ -2411,8 +2429,6 @@ extern Lisp_Object make_pure_vector P_ ((EMACS_INT));
EXFUN (Fgarbage_collect, 0);
EXFUN (Fmake_byte_code, MANY);
EXFUN (Fmake_bool_vector, 2);
-EXFUN (Fmake_char_table, 2);
-extern Lisp_Object make_sub_char_table P_ ((Lisp_Object));
extern Lisp_Object Qchar_table_extra_slots;
extern struct Lisp_Vector *allocate_vector P_ ((EMACS_INT));
extern struct Lisp_Vector *allocate_other_vector P_ ((EMACS_INT));
@@ -2433,6 +2449,31 @@ extern void init_alloc P_ ((void));
extern void syms_of_alloc P_ ((void));
extern struct buffer * allocate_buffer P_ ((void));
+/* Defined in chartab.c */
+EXFUN (Fmake_char_table, 2);
+EXFUN (Fchar_table_parent, 1);
+EXFUN (Fset_char_table_parent, 2);
+EXFUN (Fchar_table_extra_slot, 2);
+EXFUN (Fset_char_table_extra_slot, 3);
+EXFUN (Fchar_table_range, 2);
+EXFUN (Fset_char_table_range, 3);
+EXFUN (Fset_char_table_default, 3);
+EXFUN (Foptimize_char_table, 1);
+EXFUN (Fmap_char_table, 2);
+extern Lisp_Object copy_char_table P_ ((Lisp_Object));
+extern Lisp_Object sub_char_table_ref P_ ((Lisp_Object, int));
+extern Lisp_Object char_table_ref P_ ((Lisp_Object, int));
+extern Lisp_Object char_table_ref_and_range P_ ((Lisp_Object, int,
+ int *, int *));
+extern Lisp_Object char_table_set P_ ((Lisp_Object, int, Lisp_Object));
+extern Lisp_Object char_table_set_range P_ ((Lisp_Object, int, int,
+ Lisp_Object));
+extern int char_table_translate P_ ((Lisp_Object, int));
+extern void map_char_table P_ ((void (*) (Lisp_Object, Lisp_Object,
+ Lisp_Object),
+ Lisp_Object, Lisp_Object, Lisp_Object));
+extern void syms_of_chartab P_ ((void));
+
/* Defined in print.c */
extern Lisp_Object Vprin1_to_string_buffer;
extern void debug_print P_ ((Lisp_Object));
@@ -2639,6 +2680,7 @@ extern int overlay_touches_p P_ ((int));
extern Lisp_Object Vbuffer_alist, Vinhibit_read_only;
EXFUN (Fget_buffer, 1);
EXFUN (Fget_buffer_create, 1);
+EXFUN (Fgenerate_new_buffer_name, 2);
EXFUN (Fset_buffer, 1);
EXFUN (set_buffer_if_live, 1);
EXFUN (Fbarf_if_buffer_read_only, 0);
@@ -2963,6 +3005,7 @@ extern Lisp_Object Qinsert_in_front_hooks, Qinsert_behind_hooks;
EXFUN (Fnext_single_property_change, 4);
EXFUN (Fnext_single_char_property_change, 4);
EXFUN (Fprevious_single_property_change, 4);
+EXFUN (Fget_text_property, 3);
EXFUN (Fput_text_property, 5);
EXFUN (Fprevious_char_property_change, 2);
EXFUN (Fnext_char_property_change, 2);
@@ -3019,6 +3062,7 @@ extern void init_sound P_ ((void));
/* Defined in category.c */
extern void init_category_once P_ ((void));
+extern Lisp_Object char_category_set P_ ((int));
extern void syms_of_category P_ ((void));
/* Defined in ccl.c */
@@ -3035,7 +3079,8 @@ extern void fatal () NO_RETURN;
#ifdef HAVE_X_WINDOWS
/* Defined in fontset.c */
extern void syms_of_fontset P_ ((void));
-EXFUN (Fset_fontset_font, 4);
+EXFUN (Fset_fontset_font, 5);
+EXFUN (Fnew_fontset, 2);
#endif
/* Defined in xfaces.c */
diff --git a/src/lread.c b/src/lread.c
index 0c9bc140b73..256df2776a5 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -29,7 +29,9 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "intervals.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
+#include "coding.h"
#include <epaths.h>
#include "commands.h"
#include "keyboard.h"
@@ -86,6 +88,12 @@ Lisp_Object Qascii_character, Qload, Qload_file_name;
Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction;
Lisp_Object Qinhibit_file_name_operation;
+/* Used instead of Qget_file_char while loading *.elc files compiled
+ by Emacs 21 or older. */
+static Lisp_Object Qget_emacs_mule_file_char;
+
+static Lisp_Object Qload_force_doc_strings;
+
extern Lisp_Object Qevent_symbol_element_mask;
extern Lisp_Object Qfile_exists_p;
@@ -129,6 +137,11 @@ static int load_force_doc_strings;
/* Nonzero means read should convert strings to unibyte. */
static int load_convert_to_unibyte;
+/* Nonzero means READCHAR should read bytes one by one (not character)
+ when READCHARFUN is Qget_file_char or Qget_emacs_mule_file_char.
+ This is set to 1 by read1 temporarily while handling #@NUMBER. */
+static int load_each_byte;
+
/* Function to use for loading an Emacs lisp source file (not
compiled) instead of readevalloop. */
Lisp_Object Vload_source_file_function;
@@ -157,9 +170,6 @@ static int read_from_string_index;
static int read_from_string_index_byte;
static int read_from_string_limit;
-/* Number of bytes left to read in the buffer character
- that `readchar' has already advanced over. */
-static int readchar_backlog;
/* Number of characters read in the current call to Fread or
Fread_from_string. */
static int readchar_count;
@@ -203,7 +213,9 @@ int load_dangerous_libraries;
static Lisp_Object Vbytecomp_version_regexp;
-static void to_multibyte P_ ((char **, char **, int *));
+static int read_emacs_mule_char P_ ((int, int (*) (int, Lisp_Object),
+ Lisp_Object));
+
static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object,
Lisp_Object (*) (), int,
Lisp_Object, Lisp_Object));
@@ -211,29 +223,41 @@ static Lisp_Object load_unwind P_ ((Lisp_Object));
static Lisp_Object load_descriptor_unwind P_ ((Lisp_Object));
+/* Functions that read one byte from the current source READCHARFUN
+ or unreads one byte. If the integer argument C is -1, it returns
+ one read byte, or -1 when there's no more byte in the source. If C
+ is 0 or positive, it unreads C, and the return value is not
+ interesting. */
+
+static int readbyte_for_lambda P_ ((int, Lisp_Object));
+static int readbyte_from_file P_ ((int, Lisp_Object));
+static int readbyte_from_string P_ ((int, Lisp_Object));
+
/* Handle unreading and rereading of characters.
Write READCHAR to read a character,
UNREAD(c) to unread c to be read again.
- The READCHAR and UNREAD macros are meant for reading/unreading a
- byte code; they do not handle multibyte characters. The caller
- should manage them if necessary.
-
- [ Actually that seems to be a lie; READCHAR will definitely read
- multibyte characters from buffer sources, at least. Is the
- comment just out of date?
- -- Colin Walters <walters@gnu.org>, 22 May 2002 16:36:50 -0400 ]
- */
+ These macros correctly read/unread multibyte characters. */
#define READCHAR readchar (readcharfun)
#define UNREAD(c) unreadchar (readcharfun, c)
+/* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char,
+ Qlambda, or a cons, we use this to keep an unread character because
+ a file stream can't handle multibyte-char unreading. The value -1
+ means that there's no unread character. */
+static int unread_char;
+
static int
readchar (readcharfun)
Lisp_Object readcharfun;
{
Lisp_Object tem;
register int c;
+ int (*readbyte) P_ ((int, Lisp_Object));
+ unsigned char buf[MAX_MULTIBYTE_LENGTH];
+ int i, len;
+ int emacs_mule_encoding = 0;
readchar_count++;
@@ -242,21 +266,10 @@ readchar (readcharfun)
register struct buffer *inbuffer = XBUFFER (readcharfun);
int pt_byte = BUF_PT_BYTE (inbuffer);
- int orig_pt_byte = pt_byte;
-
- if (readchar_backlog > 0)
- /* We get the address of the byte just passed,
- which is the last byte of the character.
- The other bytes in this character are consecutive with it,
- because the gap can't be in the middle of a character. */
- return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1)
- - --readchar_backlog);
if (pt_byte >= BUF_ZV_BYTE (inbuffer))
return -1;
- readchar_backlog = -1;
-
if (! NILP (inbuffer->enable_multibyte_characters))
{
/* Fetch the character code from the buffer. */
@@ -267,6 +280,8 @@ readchar (readcharfun)
else
{
c = BUF_FETCH_BYTE (inbuffer, pt_byte);
+ if (! ASCII_BYTE_P (c))
+ c = BYTE8_TO_CHAR (c);
pt_byte++;
}
SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
@@ -278,21 +293,10 @@ readchar (readcharfun)
register struct buffer *inbuffer = XMARKER (readcharfun)->buffer;
int bytepos = marker_byte_position (readcharfun);
- int orig_bytepos = bytepos;
-
- if (readchar_backlog > 0)
- /* We get the address of the byte just passed,
- which is the last byte of the character.
- The other bytes in this character are consecutive with it,
- because the gap can't be in the middle of a character. */
- return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1)
- - --readchar_backlog);
if (bytepos >= BUF_ZV_BYTE (inbuffer))
return -1;
- readchar_backlog = -1;
-
if (! NILP (inbuffer->enable_multibyte_characters))
{
/* Fetch the character code from the buffer. */
@@ -303,6 +307,8 @@ readchar (readcharfun)
else
{
c = BUF_FETCH_BYTE (inbuffer, bytepos);
+ if (! ASCII_BYTE_P (c))
+ c = BYTE8_TO_CHAR (c);
bytepos++;
}
@@ -313,20 +319,15 @@ readchar (readcharfun)
}
if (EQ (readcharfun, Qlambda))
- return read_bytecode_char (0);
+ {
+ readbyte = readbyte_for_lambda;
+ goto read_multibyte;
+ }
if (EQ (readcharfun, Qget_file_char))
{
- c = getc (instream);
-#ifdef EINTR
- /* Interrupted reads have been observed while reading over the network */
- while (c == EOF && ferror (instream) && errno == EINTR)
- {
- clearerr (instream);
- c = getc (instream);
- }
-#endif
- return c;
+ readbyte = readbyte_from_file;
+ goto read_multibyte;
}
if (STRINGP (readcharfun))
@@ -341,11 +342,59 @@ readchar (readcharfun)
return c;
}
+ if (CONSP (readcharfun))
+ {
+ /* This is the case that read_vector is reading from a unibyte
+ string that contains a byte sequence previously skipped
+ because of #@NUMBER. The car part of readcharfun is that
+ string, and the cdr part is a value of readcharfun given to
+ read_vector. */
+ readbyte = readbyte_from_string;
+ if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char))
+ emacs_mule_encoding = 1;
+ goto read_multibyte;
+ }
+
+ if (EQ (readcharfun, Qget_emacs_mule_file_char))
+ {
+ readbyte = readbyte_from_file;
+ emacs_mule_encoding = 1;
+ goto read_multibyte;
+ }
+
tem = call0 (readcharfun);
if (NILP (tem))
return -1;
return XINT (tem);
+
+ read_multibyte:
+ if (unread_char >= 0)
+ {
+ c = unread_char;
+ unread_char = -1;
+ return c;
+ }
+ c = (*readbyte) (-1, readcharfun);
+ if (c < 0 || ASCII_BYTE_P (c) || load_each_byte)
+ return c;
+ if (emacs_mule_encoding)
+ return read_emacs_mule_char (c, readbyte, readcharfun);
+ i = 0;
+ buf[i++] = c;
+ len = BYTES_BY_CHAR_HEAD (c);
+ while (i < len)
+ {
+ c = (*readbyte) (-1, readcharfun);
+ if (c < 0 || ! TRAILING_CODE_P (c))
+ {
+ while (--i > 1)
+ (*readbyte) (buf[i], readcharfun);
+ return BYTE8_TO_CHAR (buf[0]);
+ }
+ buf[i++] = c;
+ }
+ return STRING_CHAR (buf, i);
}
/* Unread the character C in the way appropriate for the stream READCHARFUN.
@@ -366,36 +415,26 @@ unreadchar (readcharfun, c)
struct buffer *b = XBUFFER (readcharfun);
int bytepos = BUF_PT_BYTE (b);
- if (readchar_backlog >= 0)
- readchar_backlog++;
+ BUF_PT (b)--;
+ if (! NILP (b->enable_multibyte_characters))
+ BUF_DEC_POS (b, bytepos);
else
- {
- BUF_PT (b)--;
- if (! NILP (b->enable_multibyte_characters))
- BUF_DEC_POS (b, bytepos);
- else
- bytepos--;
+ bytepos--;
- BUF_PT_BYTE (b) = bytepos;
- }
+ BUF_PT_BYTE (b) = bytepos;
}
else if (MARKERP (readcharfun))
{
struct buffer *b = XMARKER (readcharfun)->buffer;
int bytepos = XMARKER (readcharfun)->bytepos;
- if (readchar_backlog >= 0)
- readchar_backlog++;
+ XMARKER (readcharfun)->charpos--;
+ if (! NILP (b->enable_multibyte_characters))
+ BUF_DEC_POS (b, bytepos);
else
- {
- XMARKER (readcharfun)->charpos--;
- if (! NILP (b->enable_multibyte_characters))
- BUF_DEC_POS (b, bytepos);
- else
- bytepos--;
+ bytepos--;
- XMARKER (readcharfun)->bytepos = bytepos;
- }
+ XMARKER (readcharfun)->bytepos = bytepos;
}
else if (STRINGP (readcharfun))
{
@@ -403,14 +442,151 @@ unreadchar (readcharfun, c)
read_from_string_index_byte
= string_char_to_byte (readcharfun, read_from_string_index);
}
+ else if (CONSP (readcharfun))
+ {
+ unread_char = c;
+ }
else if (EQ (readcharfun, Qlambda))
- read_bytecode_char (1);
- else if (EQ (readcharfun, Qget_file_char))
- ungetc (c, instream);
+ {
+ unread_char = c;
+ }
+ else if (EQ (readcharfun, Qget_file_char)
+ || EQ (readcharfun, Qget_emacs_mule_file_char))
+ {
+ if (load_each_byte)
+ ungetc (c, instream);
+ else
+ unread_char = c;
+ }
else
call1 (readcharfun, make_number (c));
}
+static int
+readbyte_for_lambda (c, readcharfun)
+ int c;
+ Lisp_Object readcharfun;
+{
+ return read_bytecode_char (c >= 0);
+}
+
+
+static int
+readbyte_from_file (c, readcharfun)
+ int c;
+ Lisp_Object readcharfun;
+{
+ if (c >= 0)
+ {
+ ungetc (c, instream);
+ return 0;
+ }
+
+ c = getc (instream);
+#ifdef EINTR
+ /* Interrupted reads have been observed while reading over the network */
+ while (c == EOF && ferror (instream) && errno == EINTR)
+ {
+ clearerr (instream);
+ c = getc (instream);
+ }
+#endif
+ return (c == EOF ? -1 : c);
+}
+
+static int
+readbyte_from_string (c, readcharfun)
+ int c;
+ Lisp_Object readcharfun;
+{
+ Lisp_Object string = XCAR (readcharfun);
+
+ if (c >= 0)
+ {
+ read_from_string_index--;
+ read_from_string_index_byte
+ = string_char_to_byte (string, read_from_string_index);
+ }
+
+ if (read_from_string_index >= read_from_string_limit)
+ c = -1;
+ else
+ FETCH_STRING_CHAR_ADVANCE (c, string,
+ read_from_string_index,
+ read_from_string_index_byte);
+ return c;
+}
+
+
+/* Read one non-ASCII character from INSTREAM. The character is
+ encoded in `emacs-mule' and the first byte is already read in
+ C. */
+
+extern char emacs_mule_bytes[256];
+
+static int
+read_emacs_mule_char (c, readbyte, readcharfun)
+ int c;
+ int (*readbyte) P_ ((int, Lisp_Object));
+ Lisp_Object readcharfun;
+{
+ /* Emacs-mule coding uses at most 4-byte for one character. */
+ unsigned char buf[4];
+ int len = emacs_mule_bytes[c];
+ struct charset *charset;
+ int i;
+ unsigned code;
+
+ if (len == 1)
+ /* C is not a valid leading-code of `emacs-mule'. */
+ return BYTE8_TO_CHAR (c);
+
+ i = 0;
+ buf[i++] = c;
+ while (i < len)
+ {
+ c = (*readbyte) (-1, readcharfun);
+ if (c < 0xA0)
+ {
+ while (--i > 1)
+ (*readbyte) (buf[i], readcharfun);
+ return BYTE8_TO_CHAR (buf[0]);
+ }
+ buf[i++] = c;
+ }
+
+ if (len == 2)
+ {
+ charset = emacs_mule_charset[buf[0]];
+ code = buf[1] & 0x7F;
+ }
+ else if (len == 3)
+ {
+ if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11
+ || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+ {
+ charset = emacs_mule_charset[buf[1]];
+ code = buf[2] & 0x7F;
+ }
+ else
+ {
+ charset = emacs_mule_charset[buf[0]];
+ code = ((buf[1] << 8) | buf[2]) & 0x7F7F;
+ }
+ }
+ else
+ {
+ charset = emacs_mule_charset[buf[1]];
+ code = ((buf[2] << 8) | buf[3]) & 0x7F7F;
+ }
+ c = DECODE_CHAR (charset, code);
+ if (c < 0)
+ Fsignal (Qinvalid_read_syntax,
+ Fcons (build_string ("invalid multibyte form"), Qnil));
+ return c;
+}
+
+
static Lisp_Object read_internal_start P_ ((Lisp_Object, Lisp_Object,
Lisp_Object));
static Lisp_Object read0 P_ ((Lisp_Object));
@@ -418,7 +594,6 @@ static Lisp_Object read1 P_ ((Lisp_Object, int *, int));
static Lisp_Object read_list P_ ((int, Lisp_Object));
static Lisp_Object read_vector P_ ((Lisp_Object, int));
-static int read_multibyte P_ ((int, Lisp_Object));
static Lisp_Object substitute_object_recurse P_ ((Lisp_Object, Lisp_Object,
Lisp_Object));
@@ -593,11 +768,11 @@ DEFUN ("get-file-char", Fget_file_char, Sget_file_char, 0, 0, 0,
-/* Value is non-zero if the file asswociated with file descriptor FD
- is a compiled Lisp file that's safe to load. Only files compiled
- with Emacs are safe to load. Files compiled with XEmacs can lead
- to a crash in Fbyte_code because of an incompatible change in the
- byte compiler. */
+/* Value is a version number of byte compiled code if the file
+ asswociated with file descriptor FD is a compiled Lisp file that's
+ safe to load. Only files compiled with Emacs are safe to load.
+ Files compiled with XEmacs can lead to a crash in Fbyte_code
+ because of an incompatible change in the byte compiler. */
static int
safe_to_load_p (fd)
@@ -606,6 +781,7 @@ safe_to_load_p (fd)
char buf[512];
int nbytes, i;
int safe_p = 1;
+ int version = 1;
/* Read the first few bytes from the file, and look for a line
specifying the byte compiler version used. */
@@ -615,15 +791,18 @@ safe_to_load_p (fd)
buf[nbytes] = '\0';
/* Skip to the next newline, skipping over the initial `ELC'
- with NUL bytes following it. */
+ with NUL bytes following it, but note the version. */
for (i = 0; i < nbytes && buf[i] != '\n'; ++i)
- ;
+ if (i == 4)
+ version = buf[i];
- if (i < nbytes
- && fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
+ if (i == nbytes
+ || fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
buf + i) < 0)
safe_p = 0;
}
+ if (safe_p)
+ safe_p = version;
lseek (fd, 0, SEEK_SET);
return safe_p;
@@ -683,6 +862,8 @@ Return t if file exists. */)
Lisp_Object handler;
int safe_p = 1;
char *fmode = "r";
+ int version;
+
#ifdef DOS_NT
fmode = "rt";
#endif /* DOS_NT */
@@ -798,8 +979,10 @@ Return t if file exists. */)
Vloads_in_progress = Fcons (found, Vloads_in_progress);
}
+ version = -1;
if (!bcmp (SDATA (found) + SBYTES (found) - 4,
- ".elc", 4))
+ ".elc", 4)
+ || (version = safe_to_load_p (fd)) > 0)
/* Load .elc files directly, but not when they are
remote and have no handler! */
{
@@ -808,7 +991,8 @@ Return t if file exists. */)
struct stat s1, s2;
int result;
- if (!safe_to_load_p (fd))
+ if (version < 0
+ && ! (version = safe_to_load_p (fd)))
{
safe_p = 0;
if (!load_dangerous_libraries)
@@ -911,7 +1095,16 @@ Return t if file exists. */)
load_descriptor_list
= Fcons (make_number (fileno (stream)), load_descriptor_list);
load_in_progress++;
- readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil);
+ if (! version || version >= 22)
+ readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil);
+ else
+ {
+ /* We can't handle a file which was compiled with
+ byte-compile-dynamic by older version of Emacs. */
+ specbind (Qload_force_doc_strings, Qt);
+ readevalloop (Qget_emacs_mule_file_char, stream, file, Feval, 0,
+ Qnil, Qnil);
+ }
unbind_to (count, Qnil);
/* Run any load-hooks for this file. */
@@ -1317,8 +1510,6 @@ readevalloop (readcharfun, stream, sourcename, evalfun, printflag, unibyte, read
record_unwind_protect (readevalloop_1, load_convert_to_unibyte ? Qt : Qnil);
load_convert_to_unibyte = !NILP (unibyte);
- readchar_backlog = -1;
-
GCPRO1 (sourcename);
LOADHIST_ATTACH (sourcename);
@@ -1526,7 +1717,6 @@ read_internal_start (stream, start, end)
{
Lisp_Object retval;
- readchar_backlog = -1;
readchar_count = 0;
new_backquote_flag = 0;
read_objects = Qnil;
@@ -1534,17 +1724,25 @@ read_internal_start (stream, start, end)
|| EQ (Vread_with_symbol_positions, stream))
Vread_symbol_positions_list = Qnil;
- if (STRINGP (stream))
+ if (STRINGP (stream)
+ || ((CONSP (stream) && STRINGP (XCAR (stream)))))
{
int startval, endval;
+ Lisp_Object string;
+
+ if (STRINGP (stream))
+ string = stream;
+ else
+ string = XCAR (stream);
+
if (NILP (end))
- endval = SCHARS (stream);
+ endval = SCHARS (string);
else
{
CHECK_NUMBER (end);
endval = XINT (end);
- if (endval < 0 || endval > SCHARS (stream))
- args_out_of_range (stream, end);
+ if (endval < 0 || endval > SCHARS (string))
+ args_out_of_range (string, end);
}
if (NILP (start))
@@ -1554,10 +1752,10 @@ read_internal_start (stream, start, end)
CHECK_NUMBER (start);
startval = XINT (start);
if (startval < 0 || startval > endval)
- args_out_of_range (stream, start);
+ args_out_of_range (string, start);
}
read_from_string_index = startval;
- read_from_string_index_byte = string_char_to_byte (stream, startval);
+ read_from_string_index_byte = string_char_to_byte (string, startval);
read_from_string_limit = endval;
}
@@ -1590,56 +1788,16 @@ read0 (readcharfun)
static int read_buffer_size;
static char *read_buffer;
-/* Read multibyte form and return it as a character. C is a first
- byte of multibyte form, and rest of them are read from
- READCHARFUN. */
-
-static int
-read_multibyte (c, readcharfun)
- register int c;
- Lisp_Object readcharfun;
-{
- /* We need the actual character code of this multibyte
- characters. */
- unsigned char str[MAX_MULTIBYTE_LENGTH];
- int len = 0;
- int bytes;
-
- if (c < 0)
- return c;
-
- str[len++] = c;
- while ((c = READCHAR) >= 0xA0
- && len < MAX_MULTIBYTE_LENGTH)
- {
- str[len++] = c;
- readchar_count--;
- }
- UNREAD (c);
- if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes))
- return STRING_CHAR (str, len);
- /* The byte sequence is not valid as multibyte. Unread all bytes
- but the first one, and return the first byte. */
- while (--len > 0)
- UNREAD (str[len]);
- return str[0];
-}
-
/* Read a \-escape sequence, assuming we already read the `\'.
- If the escape sequence forces unibyte, store 1 into *BYTEREP.
- If the escape sequence forces multibyte, store 2 into *BYTEREP.
- Otherwise store 0 into *BYTEREP. */
+ If the escape sequence forces unibyte, return eight-bit char. */
static int
-read_escape (readcharfun, stringp, byterep)
+read_escape (readcharfun, stringp)
Lisp_Object readcharfun;
int stringp;
- int *byterep;
{
register int c = READCHAR;
- *byterep = 0;
-
switch (c)
{
case -1:
@@ -1676,7 +1834,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | meta_modifier;
case 'S':
@@ -1685,7 +1843,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | shift_modifier;
case 'H':
@@ -1694,7 +1852,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | hyper_modifier;
case 'A':
@@ -1703,7 +1861,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | alt_modifier;
case 's':
@@ -1716,7 +1874,7 @@ read_escape (readcharfun, stringp, byterep)
}
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | super_modifier;
case 'C':
@@ -1726,7 +1884,7 @@ read_escape (readcharfun, stringp, byterep)
case '^':
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
if ((c & ~CHAR_MODIFIER_MASK) == '?')
return 0177 | (c & CHAR_MODIFIER_MASK);
else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
@@ -1766,7 +1924,8 @@ read_escape (readcharfun, stringp, byterep)
}
}
- *byterep = 1;
+ if (i >= 0x80 && i < 0x100)
+ i = BYTE8_TO_CHAR (i);
return i;
}
@@ -1774,6 +1933,7 @@ read_escape (readcharfun, stringp, byterep)
/* A hex escape, as in ANSI C. */
{
int i = 0;
+ int count = 0;
while (1)
{
c = READCHAR;
@@ -1796,15 +1956,15 @@ read_escape (readcharfun, stringp, byterep)
UNREAD (c);
break;
}
+ count++;
}
- *byterep = 2;
+ if (count < 3 && i >= 0x80)
+ return BYTE8_TO_CHAR (i);
return i;
}
default:
- if (BASE_LEADING_CODE_P (c))
- c = read_multibyte (c, readcharfun);
return c;
}
}
@@ -1876,43 +2036,6 @@ read_integer (readcharfun, radix)
}
-/* Convert unibyte text in read_buffer to multibyte.
-
- Initially, *P is a pointer after the end of the unibyte text, and
- the pointer *END points after the end of read_buffer.
-
- If read_buffer doesn't have enough room to hold the result
- of the conversion, reallocate it and adjust *P and *END.
-
- At the end, make *P point after the result of the conversion, and
- return in *NCHARS the number of characters in the converted
- text. */
-
-static void
-to_multibyte (p, end, nchars)
- char **p, **end;
- int *nchars;
-{
- int nbytes;
-
- parse_str_as_multibyte (read_buffer, *p - read_buffer, &nbytes, nchars);
- if (read_buffer_size < 2 * nbytes)
- {
- int offset = *p - read_buffer;
- read_buffer_size = 2 * max (read_buffer_size, nbytes);
- read_buffer = (char *) xrealloc (read_buffer, read_buffer_size);
- *p = read_buffer + offset;
- *end = read_buffer + read_buffer_size;
- }
-
- if (nbytes != *nchars)
- nbytes = str_as_multibyte (read_buffer, read_buffer_size,
- *p - read_buffer, nchars);
-
- *p = read_buffer + nbytes;
-}
-
-
/* If the next token is ')' or ']' or '.', we store that character
in *PCH and the return value is not interesting. Else, we store
zero in *PCH and we read and return one lisp object.
@@ -1929,6 +2052,7 @@ read1 (readcharfun, pch, first_in_list)
int uninterned_symbol = 0;
*pch = 0;
+ load_each_byte = 0;
retry:
@@ -1960,11 +2084,9 @@ read1 (readcharfun, pch, first_in_list)
{
Lisp_Object tmp;
tmp = read_vector (readcharfun, 0);
- if (XVECTOR (tmp)->size < CHAR_TABLE_STANDARD_SLOTS
- || XVECTOR (tmp)->size > CHAR_TABLE_STANDARD_SLOTS + 10)
+ if (XVECTOR (tmp)->size != VECSIZE (struct Lisp_Char_Table))
error ("Invalid size char-table");
XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
- XCHAR_TABLE (tmp)->top = Qt;
return tmp;
}
else if (c == '^')
@@ -1973,11 +2095,18 @@ read1 (readcharfun, pch, first_in_list)
if (c == '[')
{
Lisp_Object tmp;
+ int depth, size;
+
tmp = read_vector (readcharfun, 0);
- if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS)
+ if (!INTEGERP (AREF (tmp, 0)))
+ error ("Invalid depth in char-table");
+ depth = XINT (AREF (tmp, 0));
+ if (depth < 1 || depth > 3)
+ error ("Invalid depth in char-table");
+ size = XVECTOR (tmp)->size + 2;
+ if (chartab_size [depth] != size)
error ("Invalid size char-table");
- XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
- XCHAR_TABLE (tmp)->top = Qnil;
+ XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp));
return tmp;
}
Fsignal (Qinvalid_read_syntax,
@@ -1998,12 +2127,14 @@ read1 (readcharfun, pch, first_in_list)
UNREAD (c);
tmp = read1 (readcharfun, pch, first_in_list);
- if (size_in_chars != SCHARS (tmp)
- /* We used to print 1 char too many
- when the number of bits was a multiple of 8.
- Accept such input in case it came from an old version. */
- && ! (XFASTINT (length)
- == (SCHARS (tmp) - 1) * BITS_PER_CHAR))
+ if (STRING_MULTIBYTE (tmp)
+ || (size_in_chars != SCHARS (tmp)
+ /* We used to print 1 char too many
+ when the number of bits was a multiple of 8.
+ Accept such input in case it came from an old
+ version. */
+ && ! (XFASTINT (length)
+ == (SCHARS (tmp) - 1) * BITS_PER_CHAR)))
Fsignal (Qinvalid_read_syntax,
Fcons (make_string ("#&...", 5), Qnil));
@@ -2069,6 +2200,7 @@ read1 (readcharfun, pch, first_in_list)
{
int i, nskip = 0;
+ load_each_byte = 1;
/* Read a decimal integer. */
while ((c = READCHAR) >= 0
&& c >= '0' && c <= '9')
@@ -2079,7 +2211,9 @@ read1 (readcharfun, pch, first_in_list)
if (c >= 0)
UNREAD (c);
- if (load_force_doc_strings && EQ (readcharfun, Qget_file_char))
+ if (load_force_doc_strings
+ && (EQ (readcharfun, Qget_file_char)
+ || EQ (readcharfun, Qget_emacs_mule_file_char)))
{
/* If we are supposed to force doc strings into core right now,
record the last string that we skipped,
@@ -2131,6 +2265,7 @@ read1 (readcharfun, pch, first_in_list)
c = READCHAR;
}
+ load_each_byte = 0;
goto retry;
}
if (c == '!')
@@ -2260,7 +2395,7 @@ read1 (readcharfun, pch, first_in_list)
case '?':
{
- int discard;
+ int modifiers;
int next_char;
int ok;
@@ -2276,9 +2411,12 @@ read1 (readcharfun, pch, first_in_list)
return make_number (c);
if (c == '\\')
- c = read_escape (readcharfun, 0, &discard);
- else if (BASE_LEADING_CODE_P (c))
- c = read_multibyte (c, readcharfun);
+ c = read_escape (readcharfun, 0);
+ modifiers = c & CHAR_MODIFIER_MASK;
+ c &= ~CHAR_MODIFIER_MASK;
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ c |= modifiers;
next_char = READCHAR;
if (next_char == '.')
@@ -2313,14 +2451,12 @@ read1 (readcharfun, pch, first_in_list)
char *p = read_buffer;
char *end = read_buffer + read_buffer_size;
register int c;
- /* 1 if we saw an escape sequence specifying
- a multibyte character, or a multibyte character. */
+ /* Nonzero if we saw an escape sequence specifying
+ a multibyte character. */
int force_multibyte = 0;
- /* 1 if we saw an escape sequence specifying
+ /* Nonzero if we saw an escape sequence specifying
a single-byte character. */
int force_singlebyte = 0;
- /* 1 if read_buffer contains multibyte text now. */
- int is_multibyte = 0;
int cancel = 0;
int nchars = 0;
@@ -2338,9 +2474,9 @@ read1 (readcharfun, pch, first_in_list)
if (c == '\\')
{
- int byterep;
+ int modifiers;
- c = read_escape (readcharfun, 1, &byterep);
+ c = read_escape (readcharfun, 1);
/* C is -1 if \ newline has just been seen */
if (c == -1)
@@ -2350,50 +2486,55 @@ read1 (readcharfun, pch, first_in_list)
continue;
}
- if (byterep == 1)
+ modifiers = c & CHAR_MODIFIER_MASK;
+ c = c & ~CHAR_MODIFIER_MASK;
+
+ if (CHAR_BYTE8_P (c))
force_singlebyte = 1;
- else if (byterep == 2)
+ else if (! ASCII_CHAR_P (c))
force_multibyte = 1;
- }
-
- /* A character that must be multibyte forces multibyte. */
- if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK))
- force_multibyte = 1;
+ else /* i.e. ASCII_CHAR_P (c) */
+ {
+ /* Allow `\C- ' and `\C-?'. */
+ if (modifiers == CHAR_CTL)
+ {
+ if (c == ' ')
+ c = 0, modifiers = 0;
+ else if (c == '?')
+ c = 127, modifiers = 0;
+ }
+ if (modifiers & CHAR_SHIFT)
+ {
+ /* Shift modifier is valid only with [A-Za-z]. */
+ if (c >= 'A' && c <= 'Z')
+ modifiers &= ~CHAR_SHIFT;
+ else if (c >= 'a' && c <= 'z')
+ c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT;
+ }
+
+ if (modifiers & CHAR_META)
+ {
+ /* Move the meta bit to the right place for a
+ string. */
+ modifiers &= ~CHAR_META;
+ c = BYTE8_TO_CHAR (c | 0x80);
+ force_singlebyte = 1;
+ }
+ }
- /* If we just discovered the need to be multibyte,
- convert the text accumulated thus far. */
- if (force_multibyte && ! is_multibyte)
- {
- is_multibyte = 1;
- to_multibyte (&p, &end, &nchars);
+ /* Any modifiers remaining are invalid. */
+ if (modifiers)
+ error ("Invalid modifier in string");
+ p += CHAR_STRING (c, (unsigned char *) p);
}
-
- /* Allow `\C- ' and `\C-?'. */
- if (c == (CHAR_CTL | ' '))
- c = 0;
- else if (c == (CHAR_CTL | '?'))
- c = 127;
-
- if (c & CHAR_SHIFT)
+ else
{
- /* Shift modifier is valid only with [A-Za-z]. */
- if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
- c &= ~CHAR_SHIFT;
- else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
- c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+ p += CHAR_STRING (c, (unsigned char *) p);
+ if (CHAR_BYTE8_P (c))
+ force_singlebyte = 1;
+ else if (! ASCII_CHAR_P (c))
+ force_multibyte = 1;
}
-
- if (c & CHAR_META)
- /* Move the meta bit to the right place for a string. */
- c = (c & ~CHAR_META) | 0x80;
- if (c & CHAR_MODIFIER_MASK)
- error ("Invalid modifier in string");
-
- if (is_multibyte)
- p += CHAR_STRING (c, p);
- else
- *p++ = c;
-
nchars++;
}
@@ -2406,37 +2547,16 @@ read1 (readcharfun, pch, first_in_list)
if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
return make_number (0);
- if (is_multibyte || force_singlebyte)
+ if (force_multibyte)
+ /* READ_BUFFER already contains valid multibyte forms. */
;
- else if (load_convert_to_unibyte)
- {
- Lisp_Object string;
- to_multibyte (&p, &end, &nchars);
- if (p - read_buffer != nchars)
- {
- string = make_multibyte_string (read_buffer, nchars,
- p - read_buffer);
- return Fstring_make_unibyte (string);
- }
- /* We can make a unibyte string directly. */
- is_multibyte = 0;
- }
- else if (EQ (readcharfun, Qget_file_char)
- || EQ (readcharfun, Qlambda))
+ else if (force_singlebyte)
{
- /* Nowadays, reading directly from a file is used only for
- compiled Emacs Lisp files, and those always use the
- Emacs internal encoding. Meanwhile, Qlambda is used
- for reading dynamic byte code (compiled with
- byte-compile-dynamic = t). So make the string multibyte
- if the string contains any multibyte sequences.
- (to_multibyte is a no-op if not.) */
- to_multibyte (&p, &end, &nchars);
- is_multibyte = (p - read_buffer) != nchars;
+ nchars = str_as_unibyte (read_buffer, p - read_buffer);
+ p = read_buffer + nchars;
}
else
- /* In all other cases, if we read these bytes as
- separate characters, treat them as separate characters now. */
+ /* Otherwise, READ_BUFFER contains only ASCII. */
;
/* We want readchar_count to be the number of characters, not
@@ -2446,9 +2566,11 @@ read1 (readcharfun, pch, first_in_list)
/* readchar_count -= (p - read_buffer) - nchars; */
if (read_pure)
return make_pure_string (read_buffer, nchars, p - read_buffer,
- is_multibyte);
+ (force_multibyte
+ || (p - read_buffer != nchars)));
return make_specified_string (read_buffer, nchars, p - read_buffer,
- is_multibyte);
+ (force_multibyte
+ || (p - read_buffer != nchars)));
}
case '.':
@@ -2503,11 +2625,7 @@ read1 (readcharfun, pch, first_in_list)
quoted = 1;
}
- if (! SINGLE_BYTE_CHAR_P (c))
- p += CHAR_STRING (c, p);
- else
- *p++ = c;
-
+ p += CHAR_STRING (c, p);
c = READCHAR;
}
@@ -2541,6 +2659,8 @@ read1 (readcharfun, pch, first_in_list)
{
if (p1[-1] == '.')
p1[-1] = '\0';
+ /* Fixme: if we have strtol, use that, and check
+ for overflow. */
if (sizeof (int) == sizeof (EMACS_INT))
XSETINT (val, atoi (read_buffer));
else if (sizeof (long) == sizeof (EMACS_INT))
@@ -2844,7 +2964,7 @@ read_vector (readcharfun, bytecodeflag)
STRING_SET_CHARS (bytestr, SBYTES (bytestr));
STRING_SET_UNIBYTE (bytestr);
- item = Fread (bytestr);
+ item = Fread (Fcons (bytestr, readcharfun));
if (!CONSP (item))
error ("invalid byte code");
@@ -2857,6 +2977,15 @@ read_vector (readcharfun, bytecodeflag)
/* Now handle the bytecode slot. */
ptr[COMPILED_BYTECODE] = read_pure ? Fpurecopy (bytestr) : bytestr;
}
+ else if (i == COMPILED_DOC_STRING
+ && STRINGP (item)
+ && ! STRING_MULTIBYTE (item))
+ {
+ if (EQ (readcharfun, Qget_emacs_mule_file_char))
+ item = Fdecode_coding_string (item, Qemacs_mule, Qnil, Qnil);
+ else
+ item = Fstring_as_multibyte (item);
+ }
}
ptr[i] = read_pure ? Fpurecopy (item) : item;
otem = XCONS (tem);
@@ -2954,7 +3083,15 @@ read_list (flag, readcharfun)
if (doc_reference == 2)
{
/* Get a doc string from the file we are loading.
- If it's in saved_doc_string, get it from there. */
+ If it's in saved_doc_string, get it from there.
+
+ Here, we don't know if the string is a
+ bytecode string or a doc string. As a
+ bytecode string must be unibyte, we always
+ return a unibyte string. If it is actually a
+ doc string, caller must make it
+ multibyte. */
+
int pos = XINT (XCDR (val));
/* Position is negative for user variables. */
if (pos < 0) pos = -pos;
@@ -2986,8 +3123,8 @@ read_list (flag, readcharfun)
saved_doc_string[to++] = c;
}
- return make_string (saved_doc_string + start,
- to - start);
+ return make_unibyte_string (saved_doc_string + start,
+ to - start);
}
/* Look in prev_saved_doc_string the same way. */
else if (pos >= prev_saved_doc_string_position
@@ -3018,11 +3155,12 @@ read_list (flag, readcharfun)
prev_saved_doc_string[to++] = c;
}
- return make_string (prev_saved_doc_string + start,
- to - start);
+ return make_unibyte_string (prev_saved_doc_string
+ + start,
+ to - start);
}
else
- return get_doc_string (val, 0, 0);
+ return get_doc_string (val, 1, 0);
}
return val;
@@ -3937,6 +4075,12 @@ to load. See also `load-dangerous-libraries'. */);
Qget_file_char = intern ("get-file-char");
staticpro (&Qget_file_char);
+ Qget_emacs_mule_file_char = intern ("get-emacs-mule-file-char");
+ staticpro (&Qget_emacs_mule_file_char);
+
+ Qload_force_doc_strings = intern ("load-force-doc-strings");
+ staticpro (&Qload_force_doc_strings);
+
Qbackquote = intern ("`");
staticpro (&Qbackquote);
Qcomma = intern (",");
diff --git a/src/marker.c b/src/marker.c
index abdc123c876..c20ca8d2e84 100644
--- a/src/marker.c
+++ b/src/marker.c
@@ -22,7 +22,7 @@ Boston, MA 02111-1307, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
/* Record one cached position found recently by
buf_charpos_to_bytepos or buf_bytepos_to_charpos. */
diff --git a/src/minibuf.c b/src/minibuf.c
index d265e8063f1..50de309d21b 100644
--- a/src/minibuf.c
+++ b/src/minibuf.c
@@ -26,7 +26,7 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "dispextern.h"
#include "keyboard.h"
#include "frame.h"
@@ -2123,23 +2123,14 @@ Return nil if there is no valid completion, else t. */)
/* Now find first word-break in the stuff found by completion.
i gets index in string of where to stop completing. */
- {
- int len, c;
- int bytes = SBYTES (completion);
- completion_string = SDATA (completion);
- for (; i_byte < SBYTES (completion); i_byte += len, i++)
- {
- c = STRING_CHAR_AND_LENGTH (completion_string + i_byte,
- bytes - i_byte,
- len);
- if (SYNTAX (c) != Sword)
- {
- i_byte += len;
- i++;
- break;
- }
- }
- }
+ while (i_byte < SBYTES (completion))
+ {
+ int c;
+
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, completion, i, i_byte);
+ if (SYNTAX (c) != Sword)
+ break;
+ }
/* If got no characters, print help for user. */
@@ -2379,7 +2370,7 @@ DEFUN ("self-insert-and-exit", Fself_insert_and_exit, Sself_insert_and_exit, 0,
doc: /* Terminate minibuffer input. */)
()
{
- if (INTEGERP (last_command_char))
+ if (CHARACTERP (last_command_char))
internal_self_insert (XINT (last_command_char), 0);
else
bitch_at_user ();
diff --git a/src/msdos.c b/src/msdos.c
index 5f2f488b20f..5bf608dc6a7 100644
--- a/src/msdos.c
+++ b/src/msdos.c
@@ -55,7 +55,7 @@ Boston, MA 02111-1307, USA. */
#include "dispextern.h"
#include "dosfns.h"
#include "termopts.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "disptab.h"
#include "frame.h"
@@ -3796,15 +3796,15 @@ XMenuActivate (Display *foo, XMenu *menu, int *pane, int *selidx,
screensize = screen_size * 2;
faces[0]
= lookup_derived_face (sf, intern ("msdos-menu-passive-face"),
- 0, DEFAULT_FACE_ID);
+ DEFAULT_FACE_ID);
faces[1]
= lookup_derived_face (sf, intern ("msdos-menu-active-face"),
- 0, DEFAULT_FACE_ID);
+ DEFAULT_FACE_ID);
selectface = intern ("msdos-menu-select-face");
faces[2] = lookup_derived_face (sf, selectface,
- 0, faces[0]);
+ faces[0]);
faces[3] = lookup_derived_face (sf, selectface,
- 0, faces[1]);
+ faces[1]);
/* Make sure the menu title is always displayed with
`msdos-menu-active-face', no matter where the mouse pointer is. */
diff --git a/src/print.c b/src/print.c
index e06b8a0052d..c8e66095110 100644
--- a/src/print.c
+++ b/src/print.c
@@ -24,7 +24,7 @@ Boston, MA 02111-1307, USA. */
#include <stdio.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "keyboard.h"
#include "frame.h"
#include "window.h"
@@ -463,11 +463,15 @@ print_string (string, printcharfun)
{
int chars;
+ if (print_escape_nonascii)
+ string = string_escape_byte8 (string);
+
if (STRING_MULTIBYTE (string))
chars = SCHARS (string);
- else if (EQ (printcharfun, Qt)
- ? ! NILP (buffer_defaults.enable_multibyte_characters)
- : ! NILP (current_buffer->enable_multibyte_characters))
+ else if (! print_escape_nonascii
+ && (EQ (printcharfun, Qt)
+ ? ! NILP (buffer_defaults.enable_multibyte_characters)
+ : ! NILP (current_buffer->enable_multibyte_characters)))
{
/* If unibyte string STRING contains 8-bit codes, we must
convert STRING to a multibyte string containing the same
@@ -513,11 +517,6 @@ print_string (string, printcharfun)
int len;
int ch = STRING_CHAR_AND_LENGTH (SDATA (string) + i,
size_byte - i, len);
- if (!CHAR_VALID_P (ch, 0))
- {
- ch = SREF (string, i);
- len = 1;
- }
PRINTCHAR (ch);
i += len;
}
@@ -1435,10 +1434,7 @@ print_object (obj, printcharfun, escapeflag)
{
c = STRING_CHAR_AND_LENGTH (str + i_byte,
size_byte - i_byte, len);
- if (CHAR_VALID_P (c, 0))
- i_byte += len;
- else
- c = str[i_byte++];
+ i_byte += len;
}
else
c = str[i_byte++];
@@ -1456,8 +1452,7 @@ print_object (obj, printcharfun, escapeflag)
PRINTCHAR ('f');
}
else if (multibyte
- && ! ASCII_BYTE_P (c)
- && (SINGLE_BYTE_CHAR_P (c) || print_escape_multibyte))
+ && (CHAR_BYTE8_P (c) || print_escape_multibyte))
{
/* When multibyte is disabled,
print multibyte string chars using hex escapes.
@@ -1465,9 +1460,15 @@ print_object (obj, printcharfun, escapeflag)
when found in a multibyte string, always use a hex escape
so it reads back as multibyte. */
unsigned char outbuf[50];
- sprintf (outbuf, "\\x%x", c);
+
+ if (CHAR_BYTE8_P (c))
+ sprintf (outbuf, "\\%03o", CHAR_TO_BYTE8 (c));
+ else
+ {
+ sprintf (outbuf, "\\x%04x", c);
+ need_nonhex = 1;
+ }
strout (outbuf, -1, -1, printcharfun, 0);
- need_nonhex = 1;
}
else if (! multibyte
&& SINGLE_BYTE_CHAR_P (c) && ! ASCII_BYTE_P (c)
@@ -1732,7 +1733,12 @@ print_object (obj, printcharfun, escapeflag)
PRINTCHAR ('#');
PRINTCHAR ('&');
- sprintf (buf, "%d", XBOOL_VECTOR (obj)->size);
+ if (sizeof (int) == sizeof (EMACS_INT))
+ sprintf (buf, "%d", XBOOL_VECTOR (obj)->size);
+ else if (sizeof (long) == sizeof (EMACS_INT))
+ sprintf (buf, "%ld", XBOOL_VECTOR (obj)->size);
+ else
+ abort ();
strout (buf, -1, -1, printcharfun, 0);
PRINTCHAR ('\"');
@@ -1747,7 +1753,12 @@ print_object (obj, printcharfun, escapeflag)
{
QUIT;
c = XBOOL_VECTOR (obj)->data[i];
- if (c == '\n' && print_escape_newlines)
+ if (! ASCII_BYTE_P (c))
+ {
+ sprintf (buf, "\\%03o", c);
+ strout (buf, -1, -1, printcharfun, 0);
+ }
+ else if (c == '\n' && print_escape_newlines)
{
PRINTCHAR ('\\');
PRINTCHAR ('n');
@@ -1841,7 +1852,7 @@ print_object (obj, printcharfun, escapeflag)
PRINTCHAR ('#');
size &= PSEUDOVECTOR_SIZE_MASK;
}
- if (CHAR_TABLE_P (obj))
+ if (CHAR_TABLE_P (obj) || SUB_CHAR_TABLE_P (obj))
{
/* We print a char-table as if it were a vector,
lumping the parent and default slots in with the
diff --git a/src/process.c b/src/process.c
index c4ba96a9e5c..3051b81f7cd 100644
--- a/src/process.c
+++ b/src/process.c
@@ -112,7 +112,7 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "window.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "process.h"
#include "termhooks.h"
@@ -600,6 +600,7 @@ setup_process_coding_systems (process)
struct Lisp_Process *p = XPROCESS (process);
int inch = XINT (p->infd);
int outch = XINT (p->outfd);
+ Lisp_Object coding_system;
if (inch < 0 || outch < 0)
return;
@@ -607,18 +608,18 @@ setup_process_coding_systems (process)
if (!proc_decode_coding_system[inch])
proc_decode_coding_system[inch]
= (struct coding_system *) xmalloc (sizeof (struct coding_system));
- setup_coding_system (p->decode_coding_system,
- proc_decode_coding_system[inch]);
+ coding_system = p->decode_coding_system;
if (! NILP (p->filter))
{
if (NILP (p->filter_multibyte))
- setup_raw_text_coding_system (proc_decode_coding_system[inch]);
+ coding_system = raw_text_coding_system (coding_system);
}
else if (BUFFERP (p->buffer))
{
if (NILP (XBUFFER (p->buffer)->enable_multibyte_characters))
- setup_raw_text_coding_system (proc_decode_coding_system[inch]);
+ coding_system = raw_text_coding_system (coding_system);
}
+ setup_coding_system (coding_system, proc_decode_coding_system[inch]);
if (!proc_encode_coding_system[outch])
proc_encode_coding_system[outch]
@@ -4400,7 +4401,7 @@ read_process_output (proc, channel)
Lisp_Object proc;
register int channel;
{
- register int nchars, nbytes;
+ register int nbytes;
char *chars;
register Lisp_Object outstream;
register struct buffer *old = current_buffer;
@@ -4534,13 +4535,13 @@ read_process_output (proc, channel)
save the match data in a special nonrecursive fashion. */
running_asynch_code = 1;
- text = decode_coding_string (make_unibyte_string (chars, nbytes),
- coding, 0);
- Vlast_coding_system_used = coding->symbol;
+ decode_coding_c_string (coding, chars, nbytes, Qt);
+ text = coding->dst_object;
+ Vlast_coding_system_used = CODING_ID_NAME (coding->id);
/* A new coding system might be found. */
- if (!EQ (p->decode_coding_system, coding->symbol))
+ if (!EQ (p->decode_coding_system, Vlast_coding_system_used))
{
- p->decode_coding_system = coding->symbol;
+ p->decode_coding_system = Vlast_coding_system_used;
/* Don't call setup_coding_system for
proc_decode_coding_system[channel] here. It is done in
@@ -4556,16 +4557,18 @@ read_process_output (proc, channel)
if (NILP (p->encode_coding_system)
&& proc_encode_coding_system[XINT (p->outfd)])
{
- p->encode_coding_system = coding->symbol;
- setup_coding_system (coding->symbol,
+ p->encode_coding_system = Vlast_coding_system_used;
+ setup_coding_system (p->encode_coding_system,
proc_encode_coding_system[XINT (p->outfd)]);
}
}
- carryover = nbytes - coding->consumed;
- bcopy (chars + coding->consumed, SDATA (p->decoding_buf),
- carryover);
- XSETINT (p->decoding_carryover, carryover);
+ if (coding->carryover_bytes > 0)
+ {
+ bcopy (coding->carryover, SDATA (p->decoding_buf),
+ coding->carryover_bytes);
+ XSETINT (p->decoding_carryover, coding->carryover_bytes);
+ }
/* Adjust the multibyteness of TEXT to that of the filter. */
if (NILP (p->filter_multibyte) != ! STRING_MULTIBYTE (text))
text = (STRING_MULTIBYTE (text)
@@ -4650,26 +4653,28 @@ read_process_output (proc, channel)
if (! (BEGV <= PT && PT <= ZV))
Fwiden ();
- text = decode_coding_string (make_unibyte_string (chars, nbytes),
- coding, 0);
- Vlast_coding_system_used = coding->symbol;
+ decode_coding_c_string (coding, chars, nbytes, Qt);
+ text = coding->dst_object;
+ Vlast_coding_system_used = CODING_ID_NAME (coding->id);
/* A new coding system might be found. See the comment in the
similar code in the previous `if' block. */
- if (!EQ (p->decode_coding_system, coding->symbol))
+ if (!EQ (p->decode_coding_system, Vlast_coding_system_used))
{
- p->decode_coding_system = coding->symbol;
+ p->decode_coding_system = Vlast_coding_system_used;
if (NILP (p->encode_coding_system)
&& proc_encode_coding_system[XINT (p->outfd)])
{
- p->encode_coding_system = coding->symbol;
- setup_coding_system (coding->symbol,
+ p->encode_coding_system = Vlast_coding_system_used;
+ setup_coding_system (p->encode_coding_system,
proc_encode_coding_system[XINT (p->outfd)]);
}
}
- carryover = nbytes - coding->consumed;
- bcopy (chars + coding->consumed, SDATA (p->decoding_buf),
- carryover);
- XSETINT (p->decoding_carryover, carryover);
+ if (coding->carryover_bytes > 0)
+ {
+ bcopy (coding->carryover, SDATA (p->decoding_buf),
+ coding->carryover_bytes);
+ XSETINT (p->decoding_carryover, coding->carryover_bytes);
+ }
/* Adjust the multibyteness of TEXT to that of the buffer. */
if (NILP (current_buffer->enable_multibyte_characters)
!= ! STRING_MULTIBYTE (text))
@@ -4790,86 +4795,77 @@ send_process (proc, buf, len, object)
SDATA (XPROCESS (proc)->name));
coding = proc_encode_coding_system[XINT (XPROCESS (proc)->outfd)];
- Vlast_coding_system_used = coding->symbol;
+ Vlast_coding_system_used = CODING_ID_NAME (coding->id);
if ((STRINGP (object) && STRING_MULTIBYTE (object))
|| (BUFFERP (object)
&& !NILP (XBUFFER (object)->enable_multibyte_characters))
|| EQ (object, Qt))
{
- if (!EQ (coding->symbol, XPROCESS (proc)->encode_coding_system))
+ if (!EQ (Vlast_coding_system_used,
+ XPROCESS (proc)->encode_coding_system))
/* The coding system for encoding was changed to raw-text
because we sent a unibyte text previously. Now we are
sending a multibyte text, thus we must encode it by the
original coding system specified for the current
process. */
setup_coding_system (XPROCESS (proc)->encode_coding_system, coding);
- /* src_multibyte should be set to 1 _after_ a call to
- setup_coding_system, since it resets src_multibyte to
- zero. */
- coding->src_multibyte = 1;
}
else
{
/* For sending a unibyte text, character code conversion should
not take place but EOL conversion should. So, setup raw-text
or one of the subsidiary if we have not yet done it. */
- if (coding->type != coding_type_raw_text)
+ if (CODING_REQUIRE_ENCODING (coding))
{
if (CODING_REQUIRE_FLUSHING (coding))
{
/* But, before changing the coding, we must flush out data. */
coding->mode |= CODING_MODE_LAST_BLOCK;
send_process (proc, "", 0, Qt);
+ coding->mode &= CODING_MODE_LAST_BLOCK;
}
coding->src_multibyte = 0;
- setup_raw_text_coding_system (coding);
+ setup_coding_system (raw_text_coding_system
+ (Vlast_coding_system_used),
+ coding);
}
}
coding->dst_multibyte = 0;
if (CODING_REQUIRE_ENCODING (coding))
{
- int require = encoding_buffer_size (coding, len);
- int from_byte = -1, from = -1, to = -1;
- unsigned char *temp_buf = NULL;
-
+ coding->dst_object = Qt;
if (BUFFERP (object))
{
- from_byte = BUF_PTR_BYTE_POS (XBUFFER (object), buf);
- from = buf_bytepos_to_charpos (XBUFFER (object), from_byte);
- to = buf_bytepos_to_charpos (XBUFFER (object), from_byte + len);
+ int from_byte, from, to;
+ int save_pt, save_pt_byte;
+ struct buffer *cur = current_buffer;
+
+ set_buffer_internal (XBUFFER (object));
+ save_pt = PT, save_pt_byte = PT_BYTE;
+
+ from_byte = PTR_BYTE_POS (buf);
+ from = BYTE_TO_CHAR (from_byte);
+ to = BYTE_TO_CHAR (from_byte + len);
+ TEMP_SET_PT_BOTH (from, from_byte);
+ encode_coding_object (coding, object, from, from_byte,
+ to, from_byte + len, Qt);
+ TEMP_SET_PT_BOTH (save_pt, save_pt_byte);
+ set_buffer_internal (cur);
}
else if (STRINGP (object))
{
- from_byte = buf - SDATA (object);
- from = string_byte_to_char (object, from_byte);
- to = string_byte_to_char (object, from_byte + len);
+ encode_coding_string (coding, object, 1);
}
-
- if (coding->composing != COMPOSITION_DISABLED)
+ else
{
- if (from_byte >= 0)
- coding_save_composition (coding, from, to, object);
- else
- coding->composing = COMPOSITION_DISABLED;
+ coding->dst_object = make_unibyte_string (buf, len);
+ coding->produced = len;
}
- if (SBYTES (XPROCESS (proc)->encoding_buf) < require)
- XPROCESS (proc)->encoding_buf = make_uninit_string (require);
-
- if (from_byte >= 0)
- buf = (BUFFERP (object)
- ? BUF_BYTE_ADDRESS (XBUFFER (object), from_byte)
- : SDATA (object) + from_byte);
-
- object = XPROCESS (proc)->encoding_buf;
- encode_coding (coding, (char *) buf, SDATA (object),
- len, SBYTES (object));
len = coding->produced;
- buf = SDATA (object);
- if (temp_buf)
- xfree (temp_buf);
+ buf = SDATA (coding->dst_object);
}
#ifdef VMS
@@ -6499,7 +6495,7 @@ The value takes effect when `start-process' is called. */);
#include "lisp.h"
#include "systime.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "termopts.h"
#include "sysselect.h"
diff --git a/src/puresize.h b/src/puresize.h
index 67c8aede134..cc91da7d9f7 100644
--- a/src/puresize.h
+++ b/src/puresize.h
@@ -42,7 +42,7 @@ Boston, MA 02111-1307, USA. */
#endif
#ifndef BASE_PURESIZE
-#define BASE_PURESIZE (920000 + SYSTEM_PURESIZE_EXTRA + SITELOAD_PURESIZE_EXTRA)
+#define BASE_PURESIZE (1100000 + SYSTEM_PURESIZE_EXTRA + SITELOAD_PURESIZE_EXTRA)
#endif
/* Increase BASE_PURESIZE by a ratio depending on the machine's word size. */
diff --git a/src/regex.c b/src/regex.c
index f55cc5aeb61..453ca3d85d5 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -122,7 +122,7 @@
# define SYNTAX_ENTRY_VIA_PROPERTY
# include "syntax.h"
-# include "charset.h"
+# include "character.h"
# include "category.h"
# ifdef malloc
@@ -143,28 +143,44 @@
# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
+# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
# define RE_STRING_CHAR(p, s) \
(multibyte ? (STRING_CHAR (p, s)) : (*(p)))
# define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
(multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
-/* Set C a (possibly multibyte) character before P. P points into a
- string which is the virtual concatenation of STR1 (which ends at
- END1) or STR2 (which ends at END2). */
-# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
- do { \
- if (multibyte) \
- { \
- re_char *dtemp = (p) == (str2) ? (end1) : (p); \
- re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
- re_char *d0 = dtemp; \
- PREV_CHAR_BOUNDARY (d0, dlimit); \
- c = STRING_CHAR (d0, dtemp - d0); \
- } \
- else \
- (c = ((p) == (str2) ? (end1) : (p))[-1]); \
+/* Set C a (possibly converted to multibyte) character before P. P
+ points into a string which is the virtual concatenation of STR1
+ (which ends at END1) or STR2 (which ends at END2). */
+# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
+ do { \
+ if (multibyte) \
+ { \
+ re_char *dtemp = (p) == (str2) ? (end1) : (p); \
+ re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
+ while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
+ c = STRING_CHAR (dtemp, (p) - dtemp); \
+ } \
+ else \
+ { \
+ (c = ((p) == (str2) ? (end1) : (p))[-1]); \
+ MAKE_CHAR_MULTIBYTE (c); \
+ } \
} while (0)
+/* Set C a (possibly converted to multibyte) character at P, and set
+ LEN to the byte length of that character. */
+# define GET_CHAR_AFTER(c, p, len) \
+ do { \
+ if (multibyte) \
+ c = STRING_CHAR_AND_LENGTH (p, 0, len); \
+ else \
+ { \
+ c = *p; \
+ len = 1; \
+ MAKE_CHAR_MULTIBYTE (c); \
+ } \
+ } while (0)
#else /* not emacs */
@@ -231,6 +247,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 };
# define CHARSET_LEADING_CODE_BASE(c) 0
# define MAX_MULTIBYTE_LENGTH 1
# define RE_MULTIBYTE_P(x) 0
+# define RE_TARGET_MULTIBYTE_P(x) 0
# define WORD_BOUNDARY_P(c1, c2) (0)
# define CHAR_HEAD_P(p) (1)
# define SINGLE_BYTE_CHAR_P(c) (1)
@@ -244,7 +261,15 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 };
# define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
(c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
+# define GET_CHAR_AFTER(c, p, len) \
+ (c = *p, len = 1)
# define MAKE_CHAR(charset, c1, c2) (c1)
+# define BYTE8_TO_CHAR(c) (c)
+# define CHAR_BYTE8_P(c) (0)
+# define MAKE_CHAR_MULTIBYTE(c) (c)
+# define MAKE_CHAR_UNIBYTE(c) (c)
+# define CHAR_LEADING_CODE(c) (c)
+
#endif /* not emacs */
#ifndef RE_TRANSLATE
@@ -450,7 +475,7 @@ init_syntax_once ()
# ifdef __GNUC__
# define alloca __builtin_alloca
# else /* not __GNUC__ */
-# if HAVE_ALLOCA_H
+# ifdef HAVE_ALLOCA_H
# include <alloca.h>
# endif /* HAVE_ALLOCA_H */
# endif /* not __GNUC__ */
@@ -1871,10 +1896,10 @@ struct range_table_work_area
#define EXTEND_RANGE_TABLE(work_area, n) \
do { \
- if (((work_area)->used + (n)) * sizeof (int) > (work_area)->allocated) \
+ if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
{ \
- extend_range_table_work_area (work_area); \
- if ((work_area)->table == 0) \
+ extend_range_table_work_area (&work_area); \
+ if ((work_area).table == 0) \
return (REG_ESPACE); \
} \
} while (0)
@@ -1891,15 +1916,12 @@ struct range_table_work_area
#define BIT_UPPER 0x10
#define BIT_MULTIBYTE 0x20
-/* Set a range START..END to WORK_AREA.
- The range is passed through TRANSLATE, so START and END
- should be untranslated. */
-#define SET_RANGE_TABLE_WORK_AREA(work_area, start, end) \
+/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
+#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
do { \
- int tem; \
- tem = set_image_of_range (&work_area, start, end, translate); \
- if (tem > 0) \
- FREE_STACK_RETURN (tem); \
+ EXTEND_RANGE_TABLE ((work_area), 2); \
+ (work_area).table[(work_area).used++] = (range_start); \
+ (work_area).table[(work_area).used++] = (range_end); \
} while (0)
/* Free allocated memory for WORK_AREA. */
@@ -1919,6 +1941,38 @@ struct range_table_work_area
#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
+#ifdef emacs
+
+/* Store characters in the rage range C0 to C1 in WORK_AREA while
+ translating them and paying attention to the continuity of
+ translated characters.
+
+ Implementation note: It is better to implement this fairly big
+ macro by a function, but it's not that easy because macros called
+ in this macro assume various local variables already declared. */
+
+#define SETUP_MULTIBYTE_RANGE(work_area, c0, c1) \
+ do { \
+ re_wchar_t c, t, t_last; \
+ int n; \
+ \
+ c = (c0); \
+ t_last = multibyte ? TRANSLATE (c) : TRANSLATE (MAKE_CHAR_MULTIBYTE (c)); \
+ for (c++, n = 1; c <= (c1); c++, n++) \
+ { \
+ t = multibyte ? TRANSLATE (c) : TRANSLATE (MAKE_CHAR_MULTIBYTE (c)); \
+ if (t_last + n == t) \
+ continue; \
+ SET_RANGE_TABLE_WORK_AREA ((work_area), t_last, t_last + n - 1); \
+ t_last = t; \
+ n = 0; \
+ } \
+ if (n > 0) \
+ SET_RANGE_TABLE_WORK_AREA ((work_area), t_last, t_last + n - 1); \
+ } while (0)
+
+#endif /* emacs */
+
/* Get the next unsigned number in the uncompiled pattern. */
#define GET_UNSIGNED_NUMBER(num) \
do { if (p != pend) \
@@ -2074,6 +2128,7 @@ extend_range_table_work_area (work_area)
= (int *) malloc (work_area->allocated);
}
+#if 0
#ifdef emacs
/* Carefully find the ranges of codes that are equivalent
@@ -2306,6 +2361,7 @@ set_image_of_range (work_area, start, end, translate)
return -1;
}
+#endif /* 0 */
#ifndef MATCH_MAY_ALLOCATE
@@ -2449,6 +2505,9 @@ regex_compile (pattern, size, syntax, bufp)
/* If the object matched can contain multibyte characters. */
const boolean multibyte = RE_MULTIBYTE_P (bufp);
+ /* If a target of matching can contain multibyte characters. */
+ const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
+
#ifdef DEBUG
debug++;
DEBUG_PRINT1 ("\nCompiling pattern: ");
@@ -2768,10 +2827,6 @@ regex_compile (pattern, size, syntax, bufp)
break;
}
- /* What should we do for the character which is
- greater than 0x7F, but not BASE_LEADING_CODE_P?
- XXX */
-
/* See if we're at the beginning of a possible character
class. */
@@ -2810,6 +2865,7 @@ regex_compile (pattern, size, syntax, bufp)
{
re_wchar_t ch;
re_wctype_t cc;
+ int limit;
cc = re_wctype (str);
@@ -2829,15 +2885,31 @@ regex_compile (pattern, size, syntax, bufp)
don't need to handle them for multibyte.
They are distinguished by a negative wctype. */
- if (multibyte)
- SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
- re_wctype_to_bit (cc));
+ for (ch = 0; ch < 128; ++ch)
+ if (re_iswctype (btowc (ch), cc))
+ {
+ c = TRANSLATE (ch);
+ SET_LIST_BIT (c);
+ }
- for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
+ if (target_multibyte)
+ {
+ SET_RANGE_TABLE_WORK_AREA_BIT
+ (range_table_work, re_wctype_to_bit (cc));
+ }
+ else
{
- int translated = TRANSLATE (ch);
- if (re_iswctype (btowc (ch), cc))
- SET_LIST_BIT (translated);
+ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
+ {
+ c = ch;
+ MAKE_CHAR_MULTIBYTE (c);
+ if (re_iswctype (btowc (c), cc))
+ {
+ c = TRANSLATE (c);
+ MAKE_CHAR_UNIBYTE (c);
+ SET_LIST_BIT (c);
+ }
+ }
}
/* Repeat the loop. */
@@ -2864,57 +2936,51 @@ regex_compile (pattern, size, syntax, bufp)
/* Fetch the character which ends the range. */
PATFETCH (c1);
-
- if (SINGLE_BYTE_CHAR_P (c))
+ if (c > c1)
{
- if (! SINGLE_BYTE_CHAR_P (c1))
- {
- /* Handle a range starting with a
- character of less than 256, and ending
- with a character of not less than 256.
- Split that into two ranges, the low one
- ending at 0377, and the high one
- starting at the smallest character in
- the charset of C1 and ending at C1. */
- int charset = CHAR_CHARSET (c1);
- re_wchar_t c2 = MAKE_CHAR (charset, 0, 0);
-
- SET_RANGE_TABLE_WORK_AREA (range_table_work,
- c2, c1);
- c1 = 0377;
- }
+ if (syntax & RE_NO_EMPTY_RANGES)
+ FREE_STACK_RETURN (REG_ERANGE);
+ /* Else, repeat the loop. */
}
- else if (!SAME_CHARSET_P (c, c1))
- FREE_STACK_RETURN (REG_ERANGE);
}
else
/* Range from C to C. */
c1 = c;
- /* Set the range ... */
- if (SINGLE_BYTE_CHAR_P (c))
- /* ... into bitmap. */
+#ifndef emacs
+ c = TRANSLATE (c);
+ c1 = TRANSLATE (c1);
+ /* Set the range into bitmap */
+ for (; c <= c1; c++)
+ SET_LIST_BIT (TRANSLATE (c));
+#else /* not emacs */
+ if (target_multibyte)
{
- re_wchar_t this_char;
- re_wchar_t range_start = c, range_end = c1;
-
- /* If the start is after the end, the range is empty. */
- if (range_start > range_end)
+ if (c1 >= 128)
{
- if (syntax & RE_NO_EMPTY_RANGES)
- FREE_STACK_RETURN (REG_ERANGE);
- /* Else, repeat the loop. */
+ re_wchar_t c0 = MAX (c, 128);
+
+ SETUP_MULTIBYTE_RANGE (range_table_work, c0, c1);
+ c1 = 127;
}
- else
+ for (; c <= c1; c++)
+ SET_LIST_BIT (TRANSLATE (c));
+ }
+ else
+ {
+ re_wchar_t c0;
+
+ for (; c <= c1; c++)
{
- for (this_char = range_start; this_char <= range_end;
- this_char++)
- SET_LIST_BIT (TRANSLATE (this_char));
+ c0 = c;
+ if (! multibyte)
+ MAKE_CHAR_MULTIBYTE (c0);
+ c0 = TRANSLATE (c0);
+ MAKE_CHAR_UNIBYTE (c0);
+ SET_LIST_BIT (c0);
}
}
- else
- /* ... into range table. */
- SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
+#endif /* not emacs */
}
/* Discard any (non)matching list bytes that are all 0 at the
@@ -3488,12 +3554,20 @@ regex_compile (pattern, size, syntax, bufp)
{
int len;
+ if (! multibyte)
+ MAKE_CHAR_MULTIBYTE (c);
c = TRANSLATE (c);
- if (multibyte)
- len = CHAR_STRING (c, b);
+ if (target_multibyte)
+ {
+ len = CHAR_STRING (c, b);
+ b += len;
+ }
else
- *b = c, len = 1;
- b += len;
+ {
+ MAKE_CHAR_UNIBYTE (c);
+ *b++ = c;
+ len = 1;
+ }
(*pending_exact) += len;
}
@@ -3519,6 +3593,11 @@ regex_compile (pattern, size, syntax, bufp)
/* We have succeeded; set the length of the buffer. */
bufp->used = b - bufp->buffer;
+#ifdef emacs
+ /* Now the buffer is adjusted for the multibyteness of a target. */
+ bufp->multibyte = bufp->target_multibyte;
+#endif
+
#ifdef DEBUG
if (debug > 0)
{
@@ -3764,14 +3843,11 @@ analyse_first (p, pend, fastmap, multibyte)
case exactn:
if (fastmap)
- {
- int c = RE_STRING_CHAR (p + 1, pend - p);
-
- if (SINGLE_BYTE_CHAR_P (c))
- fastmap[c] = 1;
- else
- fastmap[p[1]] = 1;
- }
+ /* If multibyte is nonzero, the first byte of each
+ character is an ASCII or a leading code. Otherwise,
+ each byte is a character. Thus, this works in both
+ cases. */
+ fastmap[p[1]] = 1;
break;
@@ -3783,14 +3859,18 @@ analyse_first (p, pend, fastmap, multibyte)
case charset_not:
- /* Chars beyond end of bitmap are possible matches.
- All the single-byte codes can occur in multibyte buffers.
- So any that are not listed in the charset
- are possible matches, even in multibyte buffers. */
if (!fastmap) break;
- for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
- j < (1 << BYTEWIDTH); j++)
- fastmap[j] = 1;
+ {
+ /* Chars beyond end of bitmap are possible matches. */
+ /* In a multibyte case, the bitmap is used only for ASCII
+ characters. */
+ int limit = multibyte ? 128 : (1 << BYTEWIDTH);
+
+ for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
+ j < limit; j++)
+ fastmap[j] = 1;
+ }
+
/* Fallthrough */
case charset:
if (!fastmap) break;
@@ -3801,19 +3881,17 @@ analyse_first (p, pend, fastmap, multibyte)
fastmap[j] = 1;
if ((not && multibyte)
- /* Any character set can possibly contain a character
+ /* Any leading code can possibly start a character
which doesn't match the specified set of characters. */
|| (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
&& CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
/* If we can match a character class, we can match
- any character set. */
+ any multibyte characters. */
{
- set_fastmap_for_multibyte_characters:
if (match_any_multibyte_characters == false)
{
- for (j = 0x80; j < 0xA0; j++) /* XXX */
- if (BASE_LEADING_CODE_P (j))
- fastmap[j] = 1;
+ for (j = 0x80; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
match_any_multibyte_characters = true;
}
}
@@ -3821,9 +3899,10 @@ analyse_first (p, pend, fastmap, multibyte)
else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
&& match_any_multibyte_characters == false)
{
- /* Set fastmap[I] 1 where I is a base leading code of each
- multibyte character in the range table. */
+ /* Set fastmap[I] to 1 where I is a leading code of each
+ multibyte characer in the range table. */
int c, count;
+ unsigned char lc1, lc2;
/* Make P points the range table. `+ 2' is to skip flag
bits for a character class. */
@@ -3833,10 +3912,14 @@ analyse_first (p, pend, fastmap, multibyte)
EXTRACT_NUMBER_AND_INCR (count, p);
for (; count > 0; count--, p += 2 * 3) /* XXX */
{
- /* Extract the start of each range. */
+ /* Extract the start and end of each range. */
+ EXTRACT_CHARACTER (c, p);
+ lc1 = CHAR_LEADING_CODE (c);
+ p += 3;
EXTRACT_CHARACTER (c, p);
- j = CHAR_CHARSET (c);
- fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
+ lc2 = CHAR_LEADING_CODE (c);
+ for (j = lc1; j <= lc2; j++)
+ fastmap[j] = 1;
}
}
break;
@@ -3861,14 +3944,21 @@ analyse_first (p, pend, fastmap, multibyte)
if (!fastmap) break;
not = (re_opcode_t)p[-1] == notcategoryspec;
k = *p++;
- for (j = 0; j < (1 << BYTEWIDTH); j++)
+ for (j = (multibyte ? 127 : (1 << BYTEWIDTH)); j >= 0; j--)
if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
fastmap[j] = 1;
if (multibyte)
- /* Any character set can possibly contain a character
- whose category is K (or not). */
- goto set_fastmap_for_multibyte_characters;
+ {
+ /* Any character set can possibly contain a character
+ whose category is K (or not). */
+ if (match_any_multibyte_characters == false)
+ {
+ for (j = 0x80; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+ match_any_multibyte_characters = true;
+ }
+ }
break;
/* All cases after this match the empty string. These end with
@@ -4116,8 +4206,8 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
int total_size = size1 + size2;
int endpos = startpos + range;
boolean anchored_start;
-
- /* Nonzero if we have to concern multibyte character. */
+ /* Nonzero if BUFP is setup for multibyte characters. We are sure
+ that it is the same as RE_TARGET_MULTIBYTE_P (bufp). */
const boolean multibyte = RE_MULTIBYTE_P (bufp);
/* Check for out-of-range STARTPOS. */
@@ -4214,30 +4304,47 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
buf_charlen);
-
buf_ch = RE_TRANSLATE (translate, buf_ch);
- if (buf_ch >= 0400
- || fastmap[buf_ch])
+ if (fastmap[CHAR_LEADING_CODE (buf_ch)])
break;
range -= buf_charlen;
d += buf_charlen;
}
else
- while (range > lim
- && !fastmap[RE_TRANSLATE (translate, *d)])
+ while (range > lim)
{
+ buf_ch = *d;
+ MAKE_CHAR_MULTIBYTE (buf_ch);
+ buf_ch = RE_TRANSLATE (translate, buf_ch);
+ MAKE_CHAR_UNIBYTE (buf_ch);
+ if (fastmap[buf_ch])
+ break;
d++;
range--;
}
}
else
- while (range > lim && !fastmap[*d])
- {
- d++;
- range--;
- }
+ {
+ if (multibyte)
+ while (range > lim)
+ {
+ int buf_charlen;
+ buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
+ buf_charlen);
+ if (fastmap[CHAR_LEADING_CODE (buf_ch)])
+ break;
+ range -= buf_charlen;
+ d += buf_charlen;
+ }
+ else
+ while (range > lim && !fastmap[*d])
+ {
+ d++;
+ range--;
+ }
+ }
startpos += irange - range;
}
else /* Searching backwards. */
@@ -4245,12 +4352,18 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
int room = (startpos >= size1
? size2 + size1 - startpos
: size1 - startpos);
- buf_ch = RE_STRING_CHAR (d, room);
- buf_ch = TRANSLATE (buf_ch);
-
- if (! (buf_ch >= 0400
- || fastmap[buf_ch]))
- goto advance;
+ if (multibyte)
+ {
+ buf_ch = STRING_CHAR (d, room);
+ buf_ch = TRANSLATE (buf_ch);
+ if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
+ goto advance;
+ }
+ else
+ {
+ if (! fastmap[TRANSLATE (*d)])
+ goto advance;
+ }
}
}
@@ -4547,7 +4660,7 @@ mutually_exclusive_p (bufp, p1, p2)
/* Test if C is listed in charset (or charset_not)
at `p1'. */
- if (SINGLE_BYTE_CHAR_P (c))
+ if (! multibyte || IS_REAL_ASCII (c))
{
if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
&& p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
@@ -4590,9 +4703,10 @@ mutually_exclusive_p (bufp, p1, p2)
size of bitmap table of P1 is extracted by
using macro `CHARSET_BITMAP_SIZE'.
- Since we know that all the character listed in
- P2 is ASCII, it is enough to test only bitmap
- table of P1. */
+ In a multibyte case, we know that all the character
+ listed in P2 is ASCII. In a unibyte case, P1 has only a
+ bitmap table. So, in both cases, it is enough to test
+ only the bitmap table of P1. */
if ((re_opcode_t) *p1 == charset)
{
@@ -4750,6 +4864,24 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
WEAK_ALIAS (__re_match_2, re_match_2)
+#ifdef emacs
+#define TRANSLATE_VIA_MULTIBYTE(c) \
+ do { \
+ if (multibyte) \
+ (c) = TRANSLATE (c); \
+ else \
+ { \
+ MAKE_CHAR_MULTIBYTE (c); \
+ (c) = TRANSLATE (c); \
+ MAKE_CHAR_UNIBYTE (c); \
+ } \
+ } while (0)
+
+#else
+#define TRANSLATE_VIA_MULTIBYTE(c) ((c) = TRANSLATE (c))
+#endif
+
+
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
static int
@@ -4789,7 +4921,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* We use this to map every character in the string. */
RE_TRANSLATE_TYPE translate = bufp->translate;
- /* Nonzero if we have to concern multibyte character. */
+ /* Nonzero if BUFP is setup for multibyte characters. We are sure
+ that it is the same as RE_TARGET_MULTIBYTE_P (bufp). */
const boolean multibyte = RE_MULTIBYTE_P (bufp);
/* Failure point stack. Each place that can handle a failure further
@@ -5143,58 +5276,71 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Remember the start point to rollback upon failure. */
dfail = d;
+#ifndef emacs
/* This is written out as an if-else so we don't waste time
testing `translate' inside the loop. */
if (RE_TRANSLATE_P (translate))
- {
- if (multibyte)
- do
+ do
+ {
+ PREFETCH ();
+ if (RE_TRANSLATE (translate, *d) != *p++)
{
- int pat_charlen, buf_charlen;
- unsigned int pat_ch, buf_ch;
-
- PREFETCH ();
- pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
- buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+ d = dfail;
+ goto fail;
+ }
+ d++;
+ }
+ while (--mcnt);
+ else
+ do
+ {
+ PREFETCH ();
+ if (*d++ != *p++)
+ {
+ d = dfail;
+ goto fail;
+ }
+ }
+ while (--mcnt);
+#else /* emacs */
+ /* The cost of testing `translate' is comparatively small. */
+ if (multibyte)
+ do
+ {
+ int pat_charlen, buf_charlen;
+ unsigned int pat_ch, buf_ch;
- if (RE_TRANSLATE (translate, buf_ch)
- != pat_ch)
- {
- d = dfail;
- goto fail;
- }
+ PREFETCH ();
+ pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
- p += pat_charlen;
- d += buf_charlen;
- mcnt -= pat_charlen;
- }
- while (mcnt > 0);
- else
- do
+ if (TRANSLATE (buf_ch) != pat_ch)
{
- PREFETCH ();
- if (RE_TRANSLATE (translate, *d) != *p++)
- {
- d = dfail;
- goto fail;
- }
- d++;
+ d = dfail;
+ goto fail;
}
- while (--mcnt);
- }
+
+ p += pat_charlen;
+ d += buf_charlen;
+ mcnt -= pat_charlen;
+ }
+ while (mcnt > 0);
else
- {
- do
- {
- PREFETCH ();
- if (*d++ != *p++)
- {
- d = dfail;
- goto fail;
- }
- }
- while (--mcnt);
- }
+ do
+ {
+ unsigned int buf_ch;
+
+ PREFETCH ();
+ buf_ch = *d++;
+ TRANSLATE_VIA_MULTIBYTE (buf_ch);
+ if (buf_ch != *p++)
+ {
+ d = dfail;
+ goto fail;
+ }
+ }
+ while (--mcnt);
+#endif
break;
@@ -5252,9 +5398,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
PREFETCH ();
c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
- c = TRANSLATE (c); /* The character to match. */
+ TRANSLATE_VIA_MULTIBYTE (c); /* The character to match. */
- if (SINGLE_BYTE_CHAR_P (c))
+ if (! multibyte || IS_REAL_ASCII (c))
{ /* Lookup bitmap. */
/* Cast to `unsigned' instead of `unsigned char' in
case the bit list is a full 32 bytes long. */
@@ -5417,7 +5563,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
}
else
{
- unsigned char c;
+ unsigned c;
GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
if (c == '\n')
break;
@@ -5686,6 +5832,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
is the character at D, and S2 is the syntax of C2. */
re_wchar_t c1, c2;
int s1, s2;
+ int dummy;
#ifdef emacs
int offset = PTR_TO_OFFSET (d - 1);
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5697,7 +5844,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
PREFETCH_NOLIMIT ();
- c2 = RE_STRING_CHAR (d, dend - d);
+ GET_CHAR_AFTER (c2, d, dummy);
s2 = SYNTAX (c2);
if (/* Case 2: Only one of S1 and S2 is Sword. */
@@ -5726,13 +5873,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
is the character at D, and S2 is the syntax of C2. */
re_wchar_t c1, c2;
int s1, s2;
+ int dummy;
#ifdef emacs
int offset = PTR_TO_OFFSET (d);
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
#endif
PREFETCH ();
- c2 = RE_STRING_CHAR (d, dend - d);
+ GET_CHAR_AFTER (c2, d, dummy);
s2 = SYNTAX (c2);
/* Case 2: S2 is not Sword. */
@@ -5770,6 +5918,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
is the character at D, and S2 is the syntax of C2. */
re_wchar_t c1, c2;
int s1, s2;
+ int dummy;
#ifdef emacs
int offset = PTR_TO_OFFSET (d) - 1;
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5786,7 +5935,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
if (!AT_STRINGS_END (d))
{
PREFETCH_NOLIMIT ();
- c2 = RE_STRING_CHAR (d, dend - d);
+ GET_CHAR_AFTER (c2, d, dummy);
#ifdef emacs
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
#endif
@@ -5817,8 +5966,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
int len;
re_wchar_t c;
- c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-
+ GET_CHAR_AFTER (c, d, len);
if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
goto fail;
d += len;
@@ -5854,8 +6002,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
int len;
re_wchar_t c;
- c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-
+ GET_CHAR_AFTER (c, d, len);
if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
goto fail;
d += len;
@@ -5947,8 +6094,8 @@ bcmp_translate (s1, s2, len, translate, multibyte)
int p1_charlen, p2_charlen;
re_wchar_t p1_ch, p2_ch;
- p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
- p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
+ GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
+ GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
if (RE_TRANSLATE (translate, p1_ch)
!= RE_TRANSLATE (translate, p2_ch))
diff --git a/src/regex.h b/src/regex.h
index 1cfd4363ea7..b23c8855eff 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -391,9 +391,15 @@ struct re_pattern_buffer
unsigned not_eol : 1;
#ifdef emacs
- /* If true, multi-byte form in the `buffer' should be recognized as a
- multibyte character. */
+ /* If true, multi-byte form in the regexp pattern should be
+ recognized as a multibyte character. When the pattern is
+ compiled, this is set to the same value as target_multibyte
+ below. */
unsigned multibyte : 1;
+
+ /* If true, multi-byte form in the target of match should be
+ recognized as a multibyte character. */
+ unsigned target_multibyte : 1;
#endif
/* [[[end pattern_buffer]]] */
diff --git a/src/search.c b/src/search.c
index 89a4a5ad68e..96ea41e8f8e 100644
--- a/src/search.c
+++ b/src/search.c
@@ -24,7 +24,7 @@ Boston, MA 02111-1307, USA. */
#include "syntax.h"
#include "category.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "region-cache.h"
#include "commands.h"
#include "blockinput.h"
@@ -104,9 +104,8 @@ matcher_overflow ()
subexpression bounds.
POSIX is nonzero if we want full backtracking (POSIX style)
for this pattern. 0 means backtrack only enough to get a valid match.
- MULTIBYTE is nonzero if we want to handle multibyte characters in
- PATTERN. 0 means all multibyte characters are recognized just as
- sequences of binary data. */
+ MULTIBYTE is nonzero iff a target of match is a multibyte buffer or
+ string. */
static void
compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
@@ -117,51 +116,19 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
int posix;
int multibyte;
{
- unsigned char *raw_pattern;
- int raw_pattern_size;
char *val;
reg_syntax_t old;
- /* MULTIBYTE says whether the text to be searched is multibyte.
- We must convert PATTERN to match that, or we will not really
- find things right. */
-
- if (multibyte == STRING_MULTIBYTE (pattern))
- {
- raw_pattern = (unsigned char *) SDATA (pattern);
- raw_pattern_size = SBYTES (pattern);
- }
- else if (multibyte)
- {
- raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
- SCHARS (pattern));
- raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
- copy_text (SDATA (pattern), raw_pattern,
- SCHARS (pattern), 0, 1);
- }
- else
- {
- /* Converting multibyte to single-byte.
-
- ??? Perhaps this conversion should be done in a special way
- by subtracting nonascii-insert-offset from each non-ASCII char,
- so that only the multibyte chars which really correspond to
- the chosen single-byte character set can possibly match. */
- raw_pattern_size = SCHARS (pattern);
- raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
- copy_text (SDATA (pattern), raw_pattern,
- SBYTES (pattern), 1, 0);
- }
-
cp->regexp = Qnil;
cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
cp->posix = posix;
- cp->buf.multibyte = multibyte;
+ cp->buf.multibyte = STRING_MULTIBYTE (pattern);
+ cp->buf.target_multibyte = multibyte;
BLOCK_INPUT;
old = re_set_syntax (RE_SYNTAX_EMACS
| (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
- val = (char *) re_compile_pattern ((char *)raw_pattern,
- raw_pattern_size, &cp->buf);
+ val = (char *) re_compile_pattern ((char *) SDATA (pattern),
+ SBYTES (pattern), &cp->buf);
re_set_syntax (old);
UNBLOCK_INPUT;
if (val)
@@ -222,7 +189,7 @@ compile_pattern (pattern, regp, translate, posix, multibyte)
&& !NILP (Fstring_equal (cp->regexp, pattern))
&& EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
&& cp->posix == posix
- && cp->buf.multibyte == multibyte)
+ && cp->buf.target_multibyte == multibyte)
break;
/* If we're at the end of the cache, compile into the nil cell
@@ -1140,7 +1107,12 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
unsigned char *patbuf;
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
unsigned char *base_pat = SDATA (string);
- int charset_base = -1;
+ /* High bits of char; 0 for ASCII characters, (CHAR & ~0x3F)
+ otherwise. Characters of the same high bits have the same
+ sequence of bytes but last. To do the BM search, all
+ characters in STRING must have the same high bits (including
+ their case translations). */
+ int char_high_bits = -1;
int boyer_moore_ok = 1;
/* MULTIBYTE says whether the text to be searched is multibyte.
@@ -1181,16 +1153,15 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
/* Copy and optionally translate the pattern. */
len = raw_pattern_size;
len_byte = raw_pattern_size_byte;
- patbuf = (unsigned char *) alloca (len_byte);
+ patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
pat = patbuf;
base_pat = raw_pattern;
if (multibyte)
{
while (--len >= 0)
{
- unsigned char str[MAX_MULTIBYTE_LENGTH];
int c, translated, inverse;
- int in_charlen, charlen;
+ int in_charlen;
/* If we got here and the RE flag is set, it's because we're
dealing with a regexp known to be trivial, so the backslash
@@ -1206,23 +1177,6 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
/* Translate the character, if requested. */
TRANSLATE (translated, trt, c);
- /* If translation changed the byte-length, go back
- to the original character. */
- charlen = CHAR_STRING (translated, str);
- if (in_charlen != charlen)
- {
- translated = c;
- charlen = CHAR_STRING (c, str);
- }
-
- /* If we are searching for something strange,
- an invalid multibyte code, don't use boyer-moore. */
- if (! ASCII_BYTE_P (translated)
- && (charlen == 1 /* 8bit code */
- || charlen != in_charlen /* invalid multibyte code */
- ))
- boyer_moore_ok = 0;
-
TRANSLATE (inverse, inverse_trt, c);
/* Did this char actually get translated?
@@ -1231,22 +1185,22 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
{
/* Keep track of which character set row
contains the characters that need translation. */
- int charset_base_code = c & ~CHAR_FIELD3_MASK;
- int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
+ int this_high_bit = ASCII_CHAR_P (c) ? 0 : (c & ~0x3F);
+ int c1 = inverse != c ? inverse : translated;
+ int trt_high_bit = ASCII_CHAR_P (c1) ? 0 : (c1 & ~0x3F);
- if (charset_base_code != inverse_charset_base)
+ if (this_high_bit != trt_high_bit)
boyer_moore_ok = 0;
- else if (charset_base == -1)
- charset_base = charset_base_code;
- else if (charset_base != charset_base_code)
+ else if (char_high_bits == -1)
+ char_high_bits = this_high_bit;
+ else if (char_high_bits != this_high_bit)
/* If two different rows appear, needing translation,
then we cannot use boyer_moore search. */
boyer_moore_ok = 0;
}
/* Store this character into the translated pattern. */
- bcopy (str, pat, charlen);
- pat += charlen;
+ CHAR_STRING_ADVANCE (translated, pat);
base_pat += in_charlen;
len_byte -= in_charlen;
}
@@ -1254,7 +1208,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
else
{
/* Unibyte buffer. */
- charset_base = 0;
+ char_high_bits = 0;
while (--len >= 0)
{
int c, translated;
@@ -1280,7 +1234,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
if (boyer_moore_ok)
return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
pos, pos_byte, lim, lim_byte,
- charset_base);
+ char_high_bits);
else
return simple_search (n, pat, len, len_byte, trt,
pos, pos_byte, lim, lim_byte);
@@ -1513,7 +1467,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
static int
boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
- pos, pos_byte, lim, lim_byte, charset_base)
+ pos, pos_byte, lim, lim_byte, char_high_bits)
int n;
unsigned char *base_pat;
int len, len_byte;
@@ -1521,7 +1475,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
Lisp_Object inverse_trt;
int pos, pos_byte;
int lim, lim_byte;
- int charset_base;
+ int char_high_bits;
{
int direction = ((n > 0) ? 1 : -1);
register int dirlen;
@@ -1622,7 +1576,8 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
while (! CHAR_HEAD_P (*charstart))
charstart--;
untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
- if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
+ if (char_high_bits
+ == (ASCII_CHAR_P (untranslated) ? 0 : untranslated & ~0x3F))
{
TRANSLATE (ch, trt, untranslated);
if (! CHAR_HEAD_P (*ptr))
@@ -1646,8 +1601,9 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
this_translated = 0;
}
- if (ch > 0400)
- j = ((unsigned char) ch) | 0200;
+ if (this_translated
+ && ch >= 0200)
+ j = (ch & 0x3F) | 0200;
else
j = (unsigned char) ch;
@@ -1664,8 +1620,8 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
while (1)
{
TRANSLATE (ch, inverse_trt, ch);
- if (ch > 0400)
- j = ((unsigned char) ch) | 0200;
+ if (ch > 0200)
+ j = (ch & 0x3F) | 0200;
else
j = (unsigned char) ch;
@@ -1958,7 +1914,7 @@ wordify (string)
{
int c;
- FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
if (SYNTAX (c) != Sword)
{
@@ -1993,7 +1949,7 @@ wordify (string)
int c;
int i_byte_orig = i_byte;
- FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
if (SYNTAX (c) == Sword)
{
@@ -2277,11 +2233,11 @@ since only regular expressions have distinguished subexpressions. */)
{
if (NILP (string))
{
- c = FETCH_CHAR (pos_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
INC_BOTH (pos, pos_byte);
}
else
- FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
if (LOWERCASEP (c))
{
@@ -2445,10 +2401,7 @@ since only regular expressions have distinguished subexpressions. */)
Lisp_Object rev_tbl;
int really_changed = 0;
- rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
- ? Fchar_table_extra_slot (Vnonascii_translation_table,
- make_number (0))
- : Qnil);
+ rev_tbl = Qnil;
substed_alloc_size = length * 2 + 100;
substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
@@ -2491,7 +2444,7 @@ since only regular expressions have distinguished subexpressions. */)
{
FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
pos, pos_byte);
- if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
+ if (!buf_multibyte && !ASCII_CHAR_P (c))
c = multibyte_char_to_unibyte (c, rev_tbl);
}
else
diff --git a/src/syntax.c b/src/syntax.c
index 706706a53a1..5b25371fcbc 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -24,7 +24,7 @@ Boston, MA 02111-1307, USA. */
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "keymap.h"
/* Make syntax table lookup grant data in gl_state. */
@@ -97,7 +97,8 @@ static int find_start_modiff;
static int find_defun_start P_ ((int, int));
static int back_comment P_ ((int, int, int, int, int, int *, int *));
static int char_quoted P_ ((int, int));
-static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
+static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object));
+static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
static Lisp_Object scan_lists P_ ((int, int, int, int));
static void scan_sexps_forward P_ ((struct lisp_parse_state *,
int, int, int, int,
@@ -293,7 +294,7 @@ char_quoted (charpos, bytepos)
while (bytepos >= beg)
{
UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
- code = SYNTAX (FETCH_CHAR (bytepos));
+ code = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (bytepos));
if (! (code == Scharquote || code == Sescape))
break;
@@ -382,10 +383,10 @@ find_defun_start (pos, pos_byte)
{
/* Open-paren at start of line means we may have found our
defun-start. */
- if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
+ if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
{
SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
- if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
+ if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
break;
/* Now fallback to the default value. */
gl_state.current_syntax_table = current_buffer->syntax_table;
@@ -505,7 +506,7 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p
UPDATE_SYNTAX_TABLE_BACKWARD (from);
prev_syntax = syntax;
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
syntax = SYNTAX_WITH_FLAGS (c);
code = SYNTAX (c);
@@ -534,7 +535,7 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p
int next = from, next_byte = from_byte, next_c, next_syntax;
DEC_BOTH (next, next_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (next);
- next_c = FETCH_CHAR (next_byte);
+ next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
next_syntax = SYNTAX_WITH_FLAGS (next_c);
if (((com2start || comnested)
&& SYNTAX_FLAGS_COMEND_SECOND (syntax)
@@ -838,29 +839,6 @@ char syntax_code_spec[16] =
static Lisp_Object Vsyntax_code_object;
-/* Look up the value for CHARACTER in syntax table TABLE's parent
- and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
- for CHARACTER. It's actually used only when not compiled with GCC. */
-
-Lisp_Object
-syntax_parent_lookup (table, character)
- Lisp_Object table;
- int character;
-{
- Lisp_Object value;
-
- while (1)
- {
- table = XCHAR_TABLE (table)->parent;
- if (NILP (table))
- return Qnil;
-
- value = XCHAR_TABLE (table)->contents[character];
- if (!NILP (value))
- return value;
- }
-}
-
DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
doc: /* Return the syntax code of CHARACTER, described by a character.
For example, if CHARACTER is a word constituent,
@@ -979,6 +957,8 @@ DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
doc: /* Set syntax for character CHAR according to string NEWENTRY.
The syntax is changed only for table SYNTAX_TABLE, which defaults to
the current buffer's syntax table.
+CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
+in the range MIN and MAX are changed.
The first character of NEWENTRY should be one of the following:
Space or - whitespace syntax. w word constituent.
_ symbol constituent. . punctuation.
@@ -1015,14 +995,24 @@ usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
(c, newentry, syntax_table)
Lisp_Object c, newentry, syntax_table;
{
- CHECK_NUMBER (c);
+ if (CONSP (c))
+ {
+ CHECK_CHARACTER_CAR (c);
+ CHECK_CHARACTER_CDR (c);
+ }
+ else
+ CHECK_CHARACTER (c);
if (NILP (syntax_table))
syntax_table = current_buffer->syntax_table;
else
check_syntax_table (syntax_table);
- SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
+ newentry = Fstring_to_syntax (newentry);
+ if (CONSP (c))
+ SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
+ else
+ SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
return Qnil;
}
@@ -1176,6 +1166,10 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
int parse_sexp_ignore_comments;
+/* Char-table of functions that find the next or previous word
+ boundary. */
+Lisp_Object Vfind_word_boundary_function_table;
+
/* Return the position across COUNT words from FROM.
If that many words cannot be found before the end of the buffer, return 0.
COUNT negative means scan backward and stop at word beginning. */
@@ -1189,6 +1183,7 @@ scan_words (from, count)
register int from_byte = CHAR_TO_BYTE (from);
register enum syntaxcode code;
int ch0, ch1;
+ Lisp_Object func, script, pos;
immediate_quit = 1;
QUIT;
@@ -1205,7 +1200,7 @@ scan_words (from, count)
return 0;
}
UPDATE_SYNTAX_TABLE_FORWARD (from);
- ch0 = FETCH_CHAR (from_byte);
+ ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (ch0);
INC_BOTH (from, from_byte);
if (words_include_escapes
@@ -1216,18 +1211,33 @@ scan_words (from, count)
}
/* Now CH0 is a character which begins a word and FROM is the
position of the next character. */
- while (1)
+ func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
+ if (! NILP (Ffboundp (func)))
{
- if (from == end) break;
- UPDATE_SYNTAX_TABLE_FORWARD (from);
- ch1 = FETCH_CHAR (from_byte);
- code = SYNTAX (ch1);
- if (!(words_include_escapes
- && (code == Sescape || code == Scharquote)))
- if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
- break;
- INC_BOTH (from, from_byte);
- ch0 = ch1;
+ pos = call2 (func, make_number (from - 1), make_number (end));
+ if (INTEGERP (pos) && XINT (pos) > from)
+ {
+ from = XINT (pos);
+ from_byte = CHAR_TO_BYTE (from);
+ }
+ }
+ else
+ {
+ script = CHAR_TABLE_REF (Vchar_script_table, ch0);
+ while (1)
+ {
+ if (from == end) break;
+ UPDATE_SYNTAX_TABLE_FORWARD (from);
+ ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
+ code = SYNTAX (ch1);
+ if ((code != Sword
+ && (! words_include_escapes
+ || (code != Sescape && code != Scharquote)))
+ || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch1), script))
+ break;
+ INC_BOTH (from, from_byte);
+ ch0 = ch1;
+ }
}
count--;
}
@@ -1242,7 +1252,7 @@ scan_words (from, count)
}
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
- ch1 = FETCH_CHAR (from_byte);
+ ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (ch1);
if (words_include_escapes
&& (code == Sescape || code == Scharquote))
@@ -1252,22 +1262,37 @@ scan_words (from, count)
}
/* Now CH1 is a character which ends a word and FROM is the
position of it. */
- while (1)
+ func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
+ if (! NILP (Ffboundp (func)))
+ {
+ pos = call2 (func, make_number (from), make_number (beg));
+ if (INTEGERP (pos) && XINT (pos) < from)
+ {
+ from = XINT (pos);
+ from_byte = CHAR_TO_BYTE (from);
+ }
+ }
+ else
{
- int temp_byte;
+ script = CHAR_TABLE_REF (Vchar_script_table, ch1);
+ while (1)
+ {
+ int temp_byte;
- if (from == beg)
- break;
- temp_byte = dec_bytepos (from_byte);
- UPDATE_SYNTAX_TABLE_BACKWARD (from);
- ch0 = FETCH_CHAR (temp_byte);
- code = SYNTAX (ch0);
- if (!(words_include_escapes
- && (code == Sescape || code == Scharquote)))
- if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
- break;
- DEC_BOTH (from, from_byte);
- ch1 = ch0;
+ if (from == beg)
+ break;
+ temp_byte = dec_bytepos (from_byte);
+ UPDATE_SYNTAX_TABLE_BACKWARD (from);
+ ch0 = FETCH_CHAR_AS_MULTIBYTE (temp_byte);
+ code = SYNTAX (ch0);
+ if ((code != Sword
+ && (! words_include_escapes
+ || (code != Sescape && code != Scharquote)))
+ || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script))
+ break;
+ DEC_BOTH (from, from_byte);
+ ch1 = ch0;
+ }
}
count++;
}
@@ -1316,7 +1341,7 @@ they will be treated as literals. */)
(string, lim)
Lisp_Object string, lim;
{
- return skip_chars (1, 0, string, lim);
+ return skip_chars (1, string, lim);
}
DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
@@ -1326,7 +1351,7 @@ Returns the distance traveled, either zero or negative. */)
(string, lim)
Lisp_Object string, lim;
{
- return skip_chars (0, 0, string, lim);
+ return skip_chars (0, string, lim);
}
DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
@@ -1338,7 +1363,7 @@ This function returns the distance traveled, either zero or positive. */)
(syntax, lim)
Lisp_Object syntax, lim;
{
- return skip_chars (1, 1, syntax, lim);
+ return skip_syntaxes (1, syntax, lim);
}
DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
@@ -1350,54 +1375,32 @@ This function returns the distance traveled, either zero or negative. */)
(syntax, lim)
Lisp_Object syntax, lim;
{
- return skip_chars (0, 1, syntax, lim);
+ return skip_syntaxes (0, syntax, lim);
}
static Lisp_Object
-skip_chars (forwardp, syntaxp, string, lim)
- int forwardp, syntaxp;
+skip_chars (forwardp, string, lim)
+ int forwardp;
Lisp_Object string, lim;
{
register unsigned int c;
unsigned char fastmap[0400];
- /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
- of which codes don't fit in FASTMAP. In that case, set the
- ranges of characters in CHAR_RANGES. */
+ /* Store the ranges of non-ASCII characters. */
int *char_ranges;
int n_char_ranges = 0;
int negate = 0;
register int i, i_byte;
- int multibyte = !NILP (current_buffer->enable_multibyte_characters);
+ /* Set to 1 if the current buffer is multibyte and the region
+ contains non-ASCII chars. */
+ int multibyte;
+ /* Set to 1 if STRING is multibyte and it contains non-ASCII
+ chars. */
int string_multibyte;
int size_byte;
const unsigned char *str;
int len;
CHECK_STRING (string);
- char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
- string_multibyte = STRING_MULTIBYTE (string);
- str = SDATA (string);
- size_byte = SBYTES (string);
-
- /* Adjust the multibyteness of the string to that of the buffer. */
- if (multibyte != string_multibyte)
- {
- int nbytes;
-
- if (multibyte)
- nbytes = count_size_as_multibyte (SDATA (string),
- SCHARS (string));
- else
- nbytes = SCHARS (string);
- if (nbytes != size_byte)
- {
- unsigned char *tmp = (unsigned char *) alloca (nbytes);
- copy_text (SDATA (string), tmp, size_byte,
- string_multibyte, multibyte);
- size_byte = nbytes;
- str = tmp;
- }
- }
if (NILP (lim))
XSETINT (lim, forwardp ? ZV : BEGV);
@@ -1410,10 +1413,18 @@ skip_chars (forwardp, syntaxp, string, lim)
if (XINT (lim) < BEGV)
XSETFASTINT (lim, BEGV);
+ multibyte = (!NILP (current_buffer->enable_multibyte_characters)
+ && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
+ string_multibyte = SBYTES (string) > SCHARS (string);
+
bzero (fastmap, sizeof fastmap);
+ if (multibyte)
+ char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
- i_byte = 0;
+ str = SDATA (string);
+ size_byte = SBYTES (string);
+ i_byte = 0;
if (i_byte < size_byte
&& SREF (string, 0) == '^')
{
@@ -1421,23 +1432,110 @@ skip_chars (forwardp, syntaxp, string, lim)
}
/* Find the characters specified and set their elements of fastmap.
- If syntaxp, each character counts as itself.
- Otherwise, handle backslashes and ranges specially. */
+ Handle backslashes and ranges specially.
- while (i_byte < size_byte)
+ If STRING contains non-ASCII characters, setup char_ranges for
+ them and use fastmap only for their leading codes. */
+
+ if (! string_multibyte)
{
- c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
- i_byte += len;
+ int string_has_eight_bit = 0;
- if (syntaxp)
- fastmap[syntax_spec_code[c & 0377]] = 1;
- else
+ /* At first setup fastmap. */
+ while (i_byte < size_byte)
+ {
+ c = str[i_byte++];
+
+ if (c == '\\')
+ {
+ if (i_byte == size_byte)
+ break;
+
+ c = str[i_byte++];
+ }
+ if (i_byte < size_byte
+ && str[i_byte] == '-')
+ {
+ unsigned int c2;
+
+ /* Skip over the dash. */
+ i_byte++;
+
+ if (i_byte == size_byte)
+ break;
+
+ /* Get the end of the range. */
+ c2 = str[i_byte++];
+ if (c2 == '\\'
+ && i_byte < size_byte)
+ c2 = str[i_byte++];
+
+ if (c <= c2)
+ {
+ while (c <= c2)
+ fastmap[c++] = 1;
+ if (! ASCII_CHAR_P (c2))
+ string_has_eight_bit = 1;
+ }
+ }
+ else
+ {
+ fastmap[c] = 1;
+ if (! ASCII_CHAR_P (c))
+ string_has_eight_bit = 1;
+ }
+ }
+
+ /* If the current range is multibyte and STRING contains
+ eight-bit chars, arrange fastmap and setup char_ranges for
+ the corresponding multibyte chars. */
+ if (multibyte && string_has_eight_bit)
+ {
+ unsigned char fastmap2[0400];
+ int range_start_byte, range_start_char;
+
+ bcopy (fastmap2 + 0200, fastmap + 0200, 0200);
+ bzero (fastmap + 0200, 0200);
+ /* We are sure that this loop stops. */
+ for (i = 0200; ! fastmap2[i]; i++);
+ c = unibyte_char_to_multibyte (i);
+ fastmap[CHAR_LEADING_CODE (c)] = 1;
+ range_start_byte = i;
+ range_start_char = c;
+ for (i = 129; i < 0400; i++)
+ {
+ c = unibyte_char_to_multibyte (i);
+ fastmap[CHAR_LEADING_CODE (c)] = 1;
+ if (i - range_start_byte != c - range_start_char)
+ {
+ char_ranges[n_char_ranges++] = range_start_char;
+ char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
+ + range_start_char);
+ range_start_byte = i;
+ range_start_char = c;
+ }
+ }
+ char_ranges[n_char_ranges++] = range_start_char;
+ char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
+ + range_start_char);
+ }
+ }
+ else
+ {
+ while (i_byte < size_byte)
{
+ unsigned char leading_code;
+
+ leading_code = str[i_byte];
+ c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
+ i_byte += len;
+
if (c == '\\')
{
if (i_byte == size_byte)
break;
+ leading_code = str[i_byte];
c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
i_byte += len;
}
@@ -1445,6 +1543,7 @@ skip_chars (forwardp, syntaxp, string, lim)
&& str[i_byte] == '-')
{
unsigned int c2;
+ unsigned char leading_code2;
/* Skip over the dash. */
i_byte++;
@@ -1453,55 +1552,81 @@ skip_chars (forwardp, syntaxp, string, lim)
break;
/* Get the end of the range. */
- c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
+ leading_code2 = str[i_byte];
+ c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
i_byte += len;
- if (SINGLE_BYTE_CHAR_P (c))
+ if (c2 == '\\'
+ && i_byte < size_byte)
+ {
+ leading_code2 = str[i_byte];
+ c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
+ i_byte += len;
+ }
+
+ if (ASCII_CHAR_P (c))
{
- if (! SINGLE_BYTE_CHAR_P (c2))
+ while (c <= c2 && c < 0x80)
+ fastmap[c++] = 1;
+ leading_code = CHAR_LEADING_CODE (c);
+ }
+ if (! ASCII_CHAR_P (c))
+ {
+ while (leading_code <= leading_code2)
+ fastmap[leading_code++] = 1;
+ if (c <= c2)
{
- /* Handle a range starting with a character of
- less than 256, and ending with a character of
- not less than 256. Split that into two
- ranges, the low one ending at 0377, and the
- high one starting at the smallest character
- in the charset of C2 and ending at C2. */
- int charset = CHAR_CHARSET (c2);
- int c1 = MAKE_CHAR (charset, 0, 0);
-
- char_ranges[n_char_ranges++] = c1;
+ char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c2;
- c2 = 0377;
- }
- while (c <= c2)
- {
- fastmap[c] = 1;
- c++;
}
}
- else if (c <= c2) /* Both C and C2 are multibyte char. */
- {
- char_ranges[n_char_ranges++] = c;
- char_ranges[n_char_ranges++] = c2;
- }
}
else
{
- if (SINGLE_BYTE_CHAR_P (c))
+ if (ASCII_CHAR_P (c))
fastmap[c] = 1;
else
{
+ fastmap[leading_code] = 1;
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c;
}
}
}
+
+ /* If the current range is unibyte and STRING contains non-ASCII
+ chars, arrange fastmap for the corresponding unibyte
+ chars. */
+
+ if (! multibyte && n_char_ranges > 0)
+ {
+ bzero (fastmap + 0200, 0200);
+ for (i = 0; i < n_char_ranges; i += 2)
+ {
+ int c1 = char_ranges[i];
+ int c2 = char_ranges[i + 1];
+
+ for (; c1 <= c2; c1++)
+ fastmap[CHAR_TO_BYTE8 (c1)] = 1;
+ }
+ }
}
/* If ^ was the first character, complement the fastmap. */
if (negate)
- for (i = 0; i < sizeof fastmap; i++)
- fastmap[i] ^= 1;
+ {
+ if (! multibyte)
+ for (i = 0; i < sizeof fastmap; i++)
+ fastmap[i] ^= 1;
+ else
+ {
+ for (i = 0; i < 0200; i++)
+ fastmap[i] ^= 1;
+ /* All non-ASCII chars possibly match. */
+ for (; i < sizeof fastmap; i++)
+ fastmap[i] = 1;
+ }
+ }
{
int start_point = PT;
@@ -1511,224 +1636,283 @@ skip_chars (forwardp, syntaxp, string, lim)
if (forwardp)
{
- endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
- stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
+ endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
+ stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
}
else
{
- endp = CHAR_POS_ADDR (XINT (lim));
- stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
+ endp = CHAR_POS_ADDR (XINT (lim));
+ stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
}
immediate_quit = 1;
- if (syntaxp)
+ if (forwardp)
{
- SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
- if (forwardp)
- {
- if (multibyte)
- while (1)
- {
- int nbytes;
+ if (multibyte)
+ while (1)
+ {
+ int nbytes;
- if (p >= stop)
- {
- if (p >= endp)
- break;
- p = GAP_END_ADDR;
- stop = endp;
- }
- c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
- if (! fastmap[(int) SYNTAX (c)])
- break;
- p += nbytes, pos++, pos_byte += nbytes;
- UPDATE_SYNTAX_TABLE_FORWARD (pos);
- }
- else
- while (1)
+ if (p >= stop)
{
- if (p >= stop)
- {
- if (p >= endp)
- break;
- p = GAP_END_ADDR;
- stop = endp;
- }
- if (! fastmap[(int) SYNTAX (*p)])
+ if (p >= endp)
break;
- p++, pos++;
- UPDATE_SYNTAX_TABLE_FORWARD (pos);
+ p = GAP_END_ADDR;
+ stop = endp;
}
- }
- else
- {
- if (multibyte)
- while (1)
+ if (! fastmap[*p])
+ break;
+ c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+ if (! ASCII_CHAR_P (c))
{
- unsigned char *prev_p;
- int nbytes;
-
- if (p <= stop)
- {
- if (p <= endp)
- break;
- p = GPT_ADDR;
- stop = endp;
- }
- prev_p = p;
- while (--p >= stop && ! CHAR_HEAD_P (*p));
- PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
- if (prev_p - p > nbytes)
- p = prev_p - 1, c = *p, nbytes = 1;
- else
- c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
- pos--, pos_byte -= nbytes;
- UPDATE_SYNTAX_TABLE_BACKWARD (pos);
- if (! fastmap[(int) SYNTAX (c)])
- {
- pos++;
- pos_byte += nbytes;
+ /* As we are looking at a multibyte character, we
+ must look up the character in the table
+ CHAR_RANGES. If there's no data in the table,
+ that character is not what we want to skip. */
+
+ /* The following code do the right thing even if
+ n_char_ranges is zero (i.e. no data in
+ CHAR_RANGES). */
+ for (i = 0; i < n_char_ranges; i += 2)
+ if (c >= char_ranges[i] && c <= char_ranges[i + 1])
break;
- }
+ if (!(negate ^ (i < n_char_ranges)))
+ break;
}
- else
- while (1)
+ p += nbytes, pos++, pos_byte += nbytes;
+ }
+ else
+ while (1)
+ {
+ if (p >= stop)
{
- if (p <= stop)
- {
- if (p <= endp)
- break;
- p = GPT_ADDR;
- stop = endp;
- }
- if (! fastmap[(int) SYNTAX (p[-1])])
+ if (p >= endp)
break;
- p--, pos--;
- UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
+ p = GAP_END_ADDR;
+ stop = endp;
}
- }
+ if (!fastmap[*p])
+ break;
+ p++, pos++, pos_byte++;
+ }
}
else
{
- if (forwardp)
- {
- if (multibyte)
- while (1)
- {
- int nbytes;
+ if (multibyte)
+ while (1)
+ {
+ unsigned char *prev_p;
- if (p >= stop)
- {
- if (p >= endp)
- break;
- p = GAP_END_ADDR;
- stop = endp;
- }
- c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
- if (SINGLE_BYTE_CHAR_P (c))
- {
- if (!fastmap[c])
- break;
- }
- else
- {
- /* If we are looking at a multibyte character,
- we must look up the character in the table
- CHAR_RANGES. If there's no data in the
- table, that character is not what we want to
- skip. */
-
- /* The following code do the right thing even if
- n_char_ranges is zero (i.e. no data in
- CHAR_RANGES). */
- for (i = 0; i < n_char_ranges; i += 2)
- if (c >= char_ranges[i] && c <= char_ranges[i + 1])
- break;
- if (!(negate ^ (i < n_char_ranges)))
- break;
- }
- p += nbytes, pos++, pos_byte += nbytes;
- }
- else
- while (1)
+ if (p <= stop)
{
- if (p >= stop)
- {
- if (p >= endp)
- break;
- p = GAP_END_ADDR;
- stop = endp;
- }
- if (!fastmap[*p])
+ if (p <= endp)
break;
- p++, pos++;
+ p = GPT_ADDR;
+ stop = endp;
}
- }
- else
- {
- if (multibyte)
- while (1)
+ prev_p = p;
+ while (--p >= stop && ! CHAR_HEAD_P (*p));
+ if (! fastmap[*p])
+ break;
+ c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+ if (! ASCII_CHAR_P (c))
{
- unsigned char *prev_p;
- int nbytes;
-
- if (p <= stop)
- {
- if (p <= endp)
- break;
- p = GPT_ADDR;
- stop = endp;
- }
- prev_p = p;
- while (--p >= stop && ! CHAR_HEAD_P (*p));
- PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
- if (prev_p - p > nbytes)
- p = prev_p - 1, c = *p, nbytes = 1;
- else
- c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
- if (SINGLE_BYTE_CHAR_P (c))
- {
- if (!fastmap[c])
- break;
- }
- else
- {
- /* See the comment in the previous similar code. */
- for (i = 0; i < n_char_ranges; i += 2)
- if (c >= char_ranges[i] && c <= char_ranges[i + 1])
- break;
- if (!(negate ^ (i < n_char_ranges)))
- break;
- }
- pos--, pos_byte -= nbytes;
+ /* See the comment in the previous similar code. */
+ for (i = 0; i < n_char_ranges; i += 2)
+ if (c >= char_ranges[i] && c <= char_ranges[i + 1])
+ break;
+ if (!(negate ^ (i < n_char_ranges)))
+ break;
}
- else
- while (1)
+ pos--, pos_byte -= prev_p - p;
+ }
+ else
+ while (1)
+ {
+ if (p <= stop)
{
- if (p <= stop)
- {
- if (p <= endp)
- break;
- p = GPT_ADDR;
- stop = endp;
- }
- if (!fastmap[p[-1]])
+ if (p <= endp)
break;
- p--, pos--;
+ p = GPT_ADDR;
+ stop = endp;
}
- }
+ if (!fastmap[p[-1]])
+ break;
+ p--, pos--, pos_byte--;
+ }
}
-#if 0 /* Not needed now that a position in mid-character
- cannot be specified in Lisp. */
- if (multibyte
- /* INC_POS or DEC_POS might have moved POS over LIM. */
- && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
- pos = XINT (lim);
-#endif
+ SET_PT_BOTH (pos, pos_byte);
+ immediate_quit = 0;
+
+ return make_number (PT - start_point);
+ }
+}
- if (! multibyte)
- pos_byte = pos;
+
+static Lisp_Object
+skip_syntaxes (forwardp, string, lim)
+ int forwardp;
+ Lisp_Object string, lim;
+{
+ register unsigned int c;
+ unsigned char fastmap[0400];
+ int negate = 0;
+ register int i, i_byte;
+ int multibyte;
+ int size_byte;
+ unsigned char *str;
+
+ CHECK_STRING (string);
+
+ if (NILP (lim))
+ XSETINT (lim, forwardp ? ZV : BEGV);
+ else
+ CHECK_NUMBER_COERCE_MARKER (lim);
+
+ /* In any case, don't allow scan outside bounds of buffer. */
+ if (XINT (lim) > ZV)
+ XSETFASTINT (lim, ZV);
+ if (XINT (lim) < BEGV)
+ XSETFASTINT (lim, BEGV);
+
+ if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
+ return Qnil;
+
+ multibyte = (!NILP (current_buffer->enable_multibyte_characters)
+ && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
+
+ bzero (fastmap, sizeof fastmap);
+
+ if (SBYTES (string) > SCHARS (string))
+ /* As this is very rare case (syntax spec is ASCII only), don't
+ consider efficiency. */
+ string = string_make_unibyte (string);
+
+ str = SDATA (string);
+ size_byte = SBYTES (string);
+
+ i_byte = 0;
+ if (i_byte < size_byte
+ && SREF (string, 0) == '^')
+ {
+ negate = 1; i_byte++;
+ }
+
+ /* Find the syntaxes specified and set their elements of fastmap. */
+
+ while (i_byte < size_byte)
+ {
+ c = str[i_byte++];
+ fastmap[syntax_spec_code[c]] = 1;
+ }
+
+ /* If ^ was the first character, complement the fastmap. */
+ if (negate)
+ for (i = 0; i < sizeof fastmap; i++)
+ fastmap[i] ^= 1;
+
+ {
+ int start_point = PT;
+ int pos = PT;
+ int pos_byte = PT_BYTE;
+ unsigned char *p = PT_ADDR, *endp, *stop;
+
+ if (forwardp)
+ {
+ endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
+ stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
+ }
+ else
+ {
+ endp = CHAR_POS_ADDR (XINT (lim));
+ stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
+ }
+
+ immediate_quit = 1;
+ SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
+ if (forwardp)
+ {
+ if (multibyte)
+ {
+ while (1)
+ {
+ int nbytes;
+
+ if (p >= stop)
+ {
+ if (p >= endp)
+ break;
+ p = GAP_END_ADDR;
+ stop = endp;
+ }
+ c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+ if (! fastmap[(int) SYNTAX (c)])
+ break;
+ p += nbytes, pos++, pos_byte += nbytes;
+ UPDATE_SYNTAX_TABLE_FORWARD (pos);
+ }
+ }
+ else
+ {
+ while (1)
+ {
+ if (p >= stop)
+ {
+ if (p >= endp)
+ break;
+ p = GAP_END_ADDR;
+ stop = endp;
+ }
+ if (! fastmap[(int) SYNTAX (*p)])
+ break;
+ p++, pos++, pos_byte++;
+ UPDATE_SYNTAX_TABLE_FORWARD (pos);
+ }
+ }
+ }
+ else
+ {
+ if (multibyte)
+ {
+ while (1)
+ {
+ unsigned char *prev_p;
+
+ if (p <= stop)
+ {
+ if (p <= endp)
+ break;
+ p = GPT_ADDR;
+ stop = endp;
+ }
+ prev_p = p;
+ while (--p >= stop && ! CHAR_HEAD_P (*p));
+ c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+ if (! fastmap[(int) SYNTAX (c)])
+ break;
+ pos--, pos_byte -= prev_p - p;
+ UPDATE_SYNTAX_TABLE_BACKWARD (pos);
+ }
+ }
+ else
+ {
+ while (1)
+ {
+ if (p <= stop)
+ {
+ if (p <= endp)
+ break;
+ p = GPT_ADDR;
+ stop = endp;
+ }
+ if (! fastmap[(int) SYNTAX (p[-1])])
+ break;
+ p--, pos--, pos_byte--;
+ UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
+ }
+ }
+ }
SET_PT_BOTH (pos, pos_byte);
immediate_quit = 0;
@@ -1788,7 +1972,7 @@ forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
*bytepos_ptr = from_byte;
return 0;
}
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
syntax = SYNTAX_WITH_FLAGS (c);
code = syntax & 0xff;
if (code == Sendcomment
@@ -1818,7 +2002,7 @@ forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
forw_incomment:
if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
&& SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
- && (c1 = FETCH_CHAR (from_byte),
+ && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
SYNTAX_COMEND_SECOND (c1))
&& ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
@@ -1837,7 +2021,7 @@ forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
if (nesting > 0
&& from < stop
&& SYNTAX_FLAGS_COMSTART_FIRST (syntax)
- && (c1 = FETCH_CHAR (from_byte),
+ && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
SYNTAX_COMMENT_STYLE (c1) == style
&& SYNTAX_COMSTART_SECOND (c1))
&& (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
@@ -1901,7 +2085,7 @@ between them, return t; otherwise return nil. */)
immediate_quit = 0;
return Qnil;
}
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (c);
comstart_first = SYNTAX_COMSTART_FIRST (c);
comnested = SYNTAX_COMMENT_NESTED (c);
@@ -1909,7 +2093,7 @@ between them, return t; otherwise return nil. */)
INC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_FORWARD (from);
if (from < stop && comstart_first
- && (c1 = FETCH_CHAR (from_byte),
+ && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
SYNTAX_COMSTART_SECOND (c1)))
{
/* We have encountered a comment start sequence and we
@@ -1967,7 +2151,7 @@ between them, return t; otherwise return nil. */)
DEC_BOTH (from, from_byte);
/* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
quoted = char_quoted (from, from_byte);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (c);
comstyle = 0;
comnested = SYNTAX_COMMENT_NESTED (c);
@@ -1984,7 +2168,7 @@ between them, return t; otherwise return nil. */)
code = Sendcomment;
/* Calling char_quoted, above, set up global syntax position
at the new value of FROM. */
- c1 = FETCH_CHAR (from_byte);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
comstyle = SYNTAX_COMMENT_STYLE (c1);
comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
}
@@ -2000,7 +2184,7 @@ between them, return t; otherwise return nil. */)
if (from == stop)
break;
UPDATE_SYNTAX_TABLE_BACKWARD (from);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
if (SYNTAX (c) == Scomment_fence
&& !char_quoted (from, from_byte))
{
@@ -2061,11 +2245,11 @@ between them, return t; otherwise return nil. */)
return Qt;
}
-/* Return syntax code of character C if C is a single byte character
+/* Return syntax code of character C if C is an ASCII character
or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
-#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
- ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
+#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
+ ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
? SYNTAX (c) : Ssymbol)
static Lisp_Object
@@ -2108,7 +2292,7 @@ scan_lists (from, count, depth, sexpflag)
{
int comstart_first, prefix;
UPDATE_SYNTAX_TABLE_FORWARD (from);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
comstart_first = SYNTAX_COMSTART_FIRST (c);
comnested = SYNTAX_COMMENT_NESTED (c);
@@ -2119,7 +2303,7 @@ scan_lists (from, count, depth, sexpflag)
INC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_FORWARD (from);
if (from < stop && comstart_first
- && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
+ && SYNTAX_COMSTART_SECOND (FETCH_CHAR_AS_MULTIBYTE (from_byte))
&& parse_sexp_ignore_comments)
{
/* we have encountered a comment start sequence and we
@@ -2128,7 +2312,7 @@ scan_lists (from, count, depth, sexpflag)
only a comment end of the same style actually ends
the comment section */
code = Scomment;
- c1 = FETCH_CHAR (from_byte);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
comstyle = SYNTAX_COMMENT_STYLE (c1);
comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
INC_BOTH (from, from_byte);
@@ -2154,7 +2338,7 @@ scan_lists (from, count, depth, sexpflag)
UPDATE_SYNTAX_TABLE_FORWARD (from);
/* Some compilers can't handle this inside the switch. */
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
switch (temp)
{
@@ -2197,7 +2381,7 @@ scan_lists (from, count, depth, sexpflag)
case Smath:
if (!sexpflag)
break;
- if (from != stop && c == FETCH_CHAR (from_byte))
+ if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
{
INC_BOTH (from, from_byte);
}
@@ -2225,12 +2409,12 @@ scan_lists (from, count, depth, sexpflag)
case Sstring:
case Sstring_fence:
temp_pos = dec_bytepos (from_byte);
- stringterm = FETCH_CHAR (temp_pos);
+ stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
while (1)
{
if (from >= stop) goto lose;
UPDATE_SYNTAX_TABLE_FORWARD (from);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
if (code == Sstring
? (c == stringterm
&& SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
@@ -2273,7 +2457,7 @@ scan_lists (from, count, depth, sexpflag)
{
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
if (depth == min_depth)
last_good = from;
@@ -2291,7 +2475,7 @@ scan_lists (from, count, depth, sexpflag)
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
code = Sendcomment;
- c1 = FETCH_CHAR (from_byte);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
comstyle = SYNTAX_COMMENT_STYLE (c1);
comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
}
@@ -2324,7 +2508,7 @@ scan_lists (from, count, depth, sexpflag)
else
temp_pos--;
UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
- c1 = FETCH_CHAR (temp_pos);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
/* Don't allow comment-end to be quoted. */
if (temp_code == Sendcomment)
@@ -2336,7 +2520,7 @@ scan_lists (from, count, depth, sexpflag)
temp_pos = dec_bytepos (temp_pos);
UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
}
- c1 = FETCH_CHAR (temp_pos);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
if (! (quoted || temp_code == Sword
|| temp_code == Ssymbol
@@ -2351,7 +2535,7 @@ scan_lists (from, count, depth, sexpflag)
break;
temp_pos = dec_bytepos (from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
- if (from != stop && c == FETCH_CHAR (temp_pos))
+ if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
DEC_BOTH (from, from_byte);
if (mathexit)
{
@@ -2397,7 +2581,7 @@ scan_lists (from, count, depth, sexpflag)
if (from == stop) goto lose;
UPDATE_SYNTAX_TABLE_BACKWARD (from);
if (!char_quoted (from, from_byte)
- && (c = FETCH_CHAR (from_byte),
+ && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
break;
}
@@ -2405,7 +2589,7 @@ scan_lists (from, count, depth, sexpflag)
break;
case Sstring:
- stringterm = FETCH_CHAR (from_byte);
+ stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
while (1)
{
if (from == stop) goto lose;
@@ -2416,7 +2600,7 @@ scan_lists (from, count, depth, sexpflag)
temp_pos--;
UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
if (!char_quoted (from - 1, temp_pos)
- && stringterm == (c = FETCH_CHAR (temp_pos))
+ && stringterm == (c = FETCH_CHAR_AS_MULTIBYTE (temp_pos))
&& SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
break;
DEC_BOTH (from, from_byte);
@@ -2524,7 +2708,7 @@ This includes chars with "quote" or "prefix" syntax (' or p). */)
while (!char_quoted (pos, pos_byte)
/* Previous statement updates syntax table. */
- && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
+ && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
|| SYNTAX_PREFIX (c)))
{
opoint = pos;
@@ -2552,7 +2736,8 @@ scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
stopbefore, oldstate, commentstop)
struct lisp_parse_state *stateptr;
register int from;
- int end, targetdepth, stopbefore, from_byte;
+ int from_byte;
+ int end, targetdepth, stopbefore;
Lisp_Object oldstate;
int commentstop;
{
@@ -2590,7 +2775,7 @@ scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
do { prev_from = from; \
prev_from_byte = from_byte; \
prev_from_syntax \
- = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
+ = SYNTAX_WITH_FLAGS (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte)); \
INC_BOTH (from, from_byte); \
if (from < end) \
UPDATE_SYNTAX_TABLE_FORWARD (from); \
@@ -2706,7 +2891,7 @@ do { prev_from = from; \
}
else if (from < end)
if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
- if (c1 = FETCH_CHAR (from_byte),
+ if (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
SYNTAX_COMSTART_SECOND (c1))
/* Duplicate code to avoid a complex if-expression
which causes trouble for the SGI compiler. */
@@ -2744,7 +2929,7 @@ do { prev_from = from; \
while (from < end)
{
/* Some compilers can't handle this inside the switch. */
- temp = SYNTAX (FETCH_CHAR (from_byte));
+ temp = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (from_byte));
switch (temp)
{
case Scharquote:
@@ -2817,7 +3002,7 @@ do { prev_from = from; \
if (stopbefore) goto stop; /* this arg means stop at sexp start */
curlevel->last = prev_from;
state.instring = (code == Sstring
- ? (FETCH_CHAR (prev_from_byte))
+ ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
: ST_STRING_STYLE);
if (boundary_stop) goto done;
startinstring:
@@ -2829,7 +3014,7 @@ do { prev_from = from; \
int c;
if (from >= end) goto done;
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
/* Some compilers can't handle this inside the switch. */
temp = SYNTAX (c);
@@ -3041,8 +3226,7 @@ init_syntax_once ()
/* All multibyte characters have syntax `word' by default. */
temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
- for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
+ char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
}
void
@@ -3082,6 +3266,25 @@ See the info node `(elisp)Syntax Properties' for a description of the
doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
open_paren_in_column_0_is_defun_start = 1;
+
+ DEFVAR_LISP ("find-word-boundary-function-table",
+ &Vfind_word_boundary_function_table,
+ doc: /*
+Char table of functions to search for the word boundary.
+Each function is called with two arguments; POS and LIMIT.
+POS and LIMIT are character positions in the current buffer.
+
+If POS is less than LIMIT, POS is at the first character of a word,
+and the return value of a function is a position after the last
+character of that word.
+
+If POS is not less than LIMIT, POS is at the last character of a word,
+and the return value of a function is a position at the first
+character of that word.
+
+In both cases, LIMIT bounds the search. */);
+ Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
+
defsubr (&Ssyntax_table_p);
defsubr (&Ssyntax_table);
defsubr (&Sstandard_syntax_table);
diff --git a/src/syntax.h b/src/syntax.h
index 6d8f201baeb..bdf7ebb31bd 100644
--- a/src/syntax.h
+++ b/src/syntax.h
@@ -57,37 +57,14 @@ enum syntaxcode
/* Set the syntax entry VAL for char C in table TABLE. */
-#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
- ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
- ? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val)) \
- : Faset ((table), make_number (c), (val)))
+#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
+ CHAR_TABLE_SET ((table), c, (val))
-/* Fetch the syntax entry for char C in syntax table TABLE.
- This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS.
- Do inheritance. */
+/* Set the syntax entry VAL for char-range RANGE in table TABLE.
+ RANGE is a cons (FROM . TO) specifying the range of characters. */
-#ifdef __GNUC__
-#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
- ({ Lisp_Object tbl = table; \
- Lisp_Object temp = XCHAR_TABLE (tbl)->contents[(c)]; \
- while (NILP (temp)) \
- { \
- tbl = XCHAR_TABLE (tbl)->parent; \
- if (NILP (tbl)) \
- break; \
- temp = XCHAR_TABLE (tbl)->contents[(c)]; \
- } \
- temp; })
-#else
-extern Lisp_Object syntax_temp;
-extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
-
-#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
- (syntax_temp = XCHAR_TABLE (table)->contents[(c)], \
- (NILP (syntax_temp) \
- ? syntax_parent_lookup (table, (c)) \
- : syntax_temp))
-#endif
+#define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
+ Fset_char_table_range ((table), (range), (val))
/* SYNTAX_ENTRY fetches the information from the entry for character C
in syntax table TABLE, or from globally kept data (gl_state).
@@ -105,12 +82,7 @@ extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
# define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
#endif
-#define SYNTAX_ENTRY_INT(c) \
- ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
- ? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE, \
- (unsigned char) (c)) \
- : Faref (CURRENT_SYNTAX_TABLE, \
- make_number (c)))
+#define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
/* Extract the information from the entry for character C
in the current syntax table. */
@@ -137,6 +109,7 @@ extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
? XCDR (temp) \
: Qnil); })
#else
+extern Lisp_Object syntax_temp;
#define SYNTAX(c) \
(syntax_temp = SYNTAX_ENTRY ((c)), \
(CONSP (syntax_temp) \
diff --git a/src/term.c b/src/term.c
index 829f2d88e6b..4bc460d0f78 100644
--- a/src/term.c
+++ b/src/term.c
@@ -29,6 +29,8 @@ Boston, MA 02111-1307, USA. */
#include "termchar.h"
#include "termopts.h"
#include "lisp.h"
+#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "coding.h"
#include "keyboard.h"
@@ -810,7 +812,6 @@ encode_terminal_code (src, dst, src_len, dst_len, consumed)
int len;
register int tlen = GLYPH_TABLE_LENGTH;
register Lisp_Object *tbase = GLYPH_TABLE_BASE;
- int result;
struct coding_system *coding;
/* If terminal_coding does any conversion, use it, otherwise use
@@ -829,7 +830,7 @@ encode_terminal_code (src, dst, src_len, dst_len, consumed)
if (g < 0 || g >= tlen)
{
- /* This glyph doesn't has an entry in Vglyph_table. */
+ /* This glyph doesn't have an entry in Vglyph_table. */
if (! CHAR_VALID_P (src->u.ch, 0))
{
len = 1;
@@ -868,12 +869,13 @@ encode_terminal_code (src, dst, src_len, dst_len, consumed)
}
}
- result = encode_coding (coding, buf, dst, len, dst_end - dst);
+ coding->source = buf;
+ coding->destination = dst;
+ coding->dst_bytes = dst_end - dst;
+ encode_coding_object (coding, Qnil, 0, 0, 1, len, Qnil);
len -= coding->consumed;
dst += coding->produced;
- if (result == CODING_FINISH_INSUFFICIENT_DST
- || (result == CODING_FINISH_INSUFFICIENT_SRC
- && len > dst_end - dst))
+ if (coding->result == CODING_RESULT_INSUFFICIENT_DST)
/* The remaining output buffer is too short. We must
break the loop here without increasing SRC so that the
next call of this function starts from the same glyph. */
@@ -977,8 +979,10 @@ write_glyphs (string, len)
if (CODING_REQUIRE_FLUSHING (&terminal_coding))
{
terminal_coding.mode |= CODING_MODE_LAST_BLOCK;
- encode_coding (&terminal_coding, "", conversion_buffer,
- 0, conversion_buffer_size);
+ terminal_coding.source = (unsigned char *) "";
+ terminal_coding.destination = conversion_buffer;
+ terminal_coding.dst_bytes = conversion_buffer_size;
+ encode_coding_object (&terminal_coding, Qnil, 0, 0, 0, 0, Qnil);
if (terminal_coding.produced > 0)
{
fwrite (conversion_buffer, 1, terminal_coding.produced, stdout);
@@ -1721,13 +1725,7 @@ produce_glyphs (it)
}
else
{
- /* A multi-byte character. The display width is fixed for all
- characters of the set. Some of the glyphs may have to be
- ignored because they are already displayed in a continued
- line. */
- int charset = CHAR_CHARSET (it->c);
-
- it->pixel_width = CHARSET_WIDTH (charset);
+ it->pixel_width = CHAR_WIDTH (it->c);
it->nglyphs = it->pixel_width;
if (it->glyph_row)
diff --git a/src/w16select.c b/src/w16select.c
index e655936e48b..199f4363a60 100644
--- a/src/w16select.c
+++ b/src/w16select.c
@@ -38,7 +38,7 @@ Boston, MA 02111-1307, USA. */
#include "frame.h" /* Need this to get the X window of selected_frame */
#include "blockinput.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "composite.h"
diff --git a/src/w32bdf.c b/src/w32bdf.c
index 73f40a22df6..254eeaeb71b 100644
--- a/src/w32bdf.c
+++ b/src/w32bdf.c
@@ -28,7 +28,7 @@ Boston, MA 02111-1307, USA. */
#endif
#include "lisp.h"
-#include "charset.h"
+#include "character.h"
#include "keyboard.h"
#include "frame.h"
#include "dispextern.h"
diff --git a/src/w32console.c b/src/w32console.c
index 0ad9c755e25..91601eb148a 100644
--- a/src/w32console.c
+++ b/src/w32console.c
@@ -31,7 +31,7 @@ Boston, MA 02111-1307, USA.
#include <string.h>
#include "lisp.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "disptab.h"
#include "termhooks.h"
diff --git a/src/w32term.c b/src/w32term.c
index 539df459402..35952e03347 100644
--- a/src/w32term.c
+++ b/src/w32term.c
@@ -996,7 +996,7 @@ w32_encode_char (c, char2b, font_info, two_byte_p)
struct font_info *font_info;
int * two_byte_p;
{
- int charset = CHAR_CHARSET (c);
+ struct charset *charset = CHAR_CHARSET (c);
int codepage;
int unicode_p = 0;
int internal_two_byte_p = 0;
@@ -1015,18 +1015,18 @@ w32_encode_char (c, char2b, font_info, two_byte_p)
if (CHARSET_DIMENSION (charset) == 1)
{
- ccl->reg[0] = charset;
+ ccl->reg[0] = CHARSET_ID (charset);
ccl->reg[1] = XCHAR2B_BYTE2 (char2b);
ccl->reg[2] = -1;
}
else
{
- ccl->reg[0] = charset;
+ ccl->reg[0] = CHARSET_ID (charset);
ccl->reg[1] = XCHAR2B_BYTE1 (char2b);
ccl->reg[2] = XCHAR2B_BYTE2 (char2b);
}
- ccl_driver (ccl, NULL, NULL, 0, 0, NULL);
+ ccl_driver (ccl, NULL, NULL, 0, 0, Qnil);
/* We assume that MSBs are appropriately set/reset by CCL
program. */
@@ -1050,11 +1050,10 @@ w32_encode_char (c, char2b, font_info, two_byte_p)
STORE_XCHAR2B (char2b, XCHAR2B_BYTE1 (char2b), XCHAR2B_BYTE2 (char2b) | 0x80);
else if (enc == 4)
{
- int sjis1, sjis2;
+ int code = (int) char2b;
- ENCODE_SJIS (XCHAR2B_BYTE1 (char2b), XCHAR2B_BYTE2 (char2b),
- sjis1, sjis2);
- STORE_XCHAR2B (char2b, sjis1, sjis2);
+ JIS_TO_SJIS (code);
+ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
}
}
codepage = font_info->codepage;
@@ -1062,8 +1061,7 @@ w32_encode_char (c, char2b, font_info, two_byte_p)
/* If charset is not ASCII or Latin-1, may need to move it into
Unicode space. */
if ( font && !font->bdf && w32_use_unicode_for_codepage (codepage)
- && charset != CHARSET_ASCII && charset != charset_latin_iso8859_1
- && charset != CHARSET_8_BIT_CONTROL && charset != CHARSET_8_BIT_GRAPHIC)
+ && c >= 0x100)
{
char temp[3];
temp[0] = XCHAR2B_BYTE1 (char2b);
@@ -5231,7 +5229,7 @@ x_new_font (f, fontname)
register char *fontname;
{
struct font_info *fontp
- = FS_LOAD_FONT (f, 0, fontname, -1);
+ = FS_LOAD_FONT (f, fontname);
if (!fontp)
return Qnil;
diff --git a/src/xdisp.c b/src/xdisp.c
index a61ead4bef2..762ed45b8a2 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -176,6 +176,7 @@ Boston, MA 02111-1307, USA. */
#include "termchar.h"
#include "dispextern.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "indent.h"
#include "commands.h"
@@ -674,11 +675,13 @@ static enum prop_handled handle_display_prop P_ ((struct it *));
static enum prop_handled handle_composition_prop P_ ((struct it *));
static enum prop_handled handle_overlay_change P_ ((struct it *));
static enum prop_handled handle_fontified_prop P_ ((struct it *));
+static enum prop_handled handle_auto_composed_prop P_ ((struct it *));
/* Properties handled by iterators. */
static struct props it_props[] =
{
+ {&Qauto_composed, AUTO_COMPOSED_PROP_IDX, handle_auto_composed_prop},
{&Qfontified, FONTIFIED_PROP_IDX, handle_fontified_prop},
/* Handle `face' before `display' because some sub-properties of
`display' need to know the face. */
@@ -3765,6 +3768,44 @@ string_buffer_position (w, string, around_charpos)
`composition' property
***********************************************************************/
+static enum prop_handled
+handle_auto_composed_prop (it)
+ struct it *it;
+{
+ enum prop_handled handled = HANDLED_NORMALLY;
+
+ if (! NILP (Vauto_composition_function))
+ {
+ Lisp_Object val;
+ int pos;
+
+ if (STRINGP (it->string))
+ pos = IT_STRING_CHARPOS (*it);
+ else
+ pos = IT_CHARPOS (*it);
+
+ val =Fget_char_property (make_number (pos), Qauto_composed, it->string);
+ if (NILP (val))
+ {
+ int count = SPECPDL_INDEX ();
+ Lisp_Object args[3];
+
+ args[0] = Vauto_composition_function;
+ specbind (Qauto_composition_function, Qnil);
+ args[1] = make_number (pos);
+ args[2] = it->string;
+ safe_call (3, args);
+ unbind_to (count, Qnil);
+
+ val = Fget_char_property (args[1], Qauto_composed, it->string);
+ if (! NILP (val))
+ handled = HANDLED_RECOMPUTE_PROPS;
+ }
+ }
+
+ return handled;
+}
+
/* Set up iterator IT from `composition' property at its current
position. Called from handle_stop. */
@@ -4725,12 +4766,9 @@ get_next_display_element (it)
else if ((it->c < ' '
&& (it->area != TEXT_AREA
|| (it->c != '\n' && it->c != '\t')))
- || (it->multibyte_p
- ? ((it->c >= 127
- && it->len == 1)
- || !CHAR_PRINTABLE_P (it->c))
- : (it->c >= 127
- && it->c == unibyte_char_to_multibyte (it->c))))
+ || (it->c != '\n' && it->c != '\t'
+ && (it->multibyte_p ? !CHAR_PRINTABLE_P (it->c)
+ : it->c == 127)))
{
/* IT->c is a control character which must be displayed
either as '\003' or as `^C' where the '\\' and '^'
@@ -4776,24 +4814,28 @@ get_next_display_element (it)
else
escape_glyph = FAST_MAKE_GLYPH ('\\', 0);
- if (SINGLE_BYTE_CHAR_P (it->c))
- str[0] = it->c, len = 1;
+ if (CHAR_BYTE8_P (it->c))
+ {
+ str[0] = CHAR_TO_BYTE8 (it->c);
+ len = 1;
+ }
+ else if (it->c < 256)
+ {
+ str[0] = it->c;
+ len = 1;
+ }
else
{
- len = CHAR_STRING_NO_SIGNAL (it->c, str);
- if (len < 0)
- {
- /* It's an invalid character, which
- shouldn't happen actually, but due to
- bugs it may happen. Let's print the char
- as is, there's not much meaningful we can
- do with it. */
- str[0] = it->c;
- str[1] = it->c >> 8;
- str[2] = it->c >> 16;
- str[3] = it->c >> 24;
- len = 4;
- }
+ /* It's an invalid character, which
+ shouldn't happen actually, but due to
+ bugs it may happen. Let's print the char
+ as is, there's not much meaningful we can
+ do with it. */
+ str[0] = it->c;
+ str[1] = it->c >> 8;
+ str[2] = it->c >> 16;
+ str[3] = it->c >> 24;
+ len = 4;
}
for (i = 0; i < len; i++)
@@ -6208,7 +6250,7 @@ message_dolog (m, nbytes, nlflag, multibyte)
for (i = 0; i < nbytes; i += char_bytes)
{
c = string_char_and_length (m + i, nbytes - i, &char_bytes);
- work[0] = (SINGLE_BYTE_CHAR_P (c)
+ work[0] = (ASCII_CHAR_P (c)
? c
: multibyte_char_to_unibyte (c, Qnil));
insert_1_both (work, 1, 1, 1, 0, 0);
@@ -7478,7 +7520,7 @@ set_message_1 (a1, a2, nbytes, multibyte_p)
for (i = 0; i < nbytes; i += n)
{
c = string_char_and_length (s + i, nbytes - i, &n);
- work[0] = (SINGLE_BYTE_CHAR_P (c)
+ work[0] = (ASCII_CHAR_P (c)
? c
: multibyte_char_to_unibyte (c, Qnil));
insert_1_both (work, 1, 1, 1, 0, 0);
@@ -10376,35 +10418,24 @@ disp_char_vector (dp, c)
struct Lisp_Char_Table *dp;
int c;
{
- int code[4], i;
Lisp_Object val;
- if (SINGLE_BYTE_CHAR_P (c))
- return (dp->contents[c]);
-
- SPLIT_CHAR (c, code[0], code[1], code[2]);
- if (code[1] < 32)
- code[1] = -1;
- else if (code[2] < 32)
- code[2] = -1;
-
- /* Here, the possible range of code[0] (== charset ID) is
- 128..max_charset. Since the top level char table contains data
- for multibyte characters after 256th element, we must increment
- code[0] by 128 to get a correct index. */
- code[0] += 128;
- code[3] = -1; /* anchor */
-
- for (i = 0; code[i] >= 0; i++, dp = XCHAR_TABLE (val))
+ if (ASCII_CHAR_P (c))
{
- val = dp->contents[code[i]];
- if (!SUB_CHAR_TABLE_P (val))
- return (NILP (val) ? dp->defalt : val);
+ val = dp->ascii;
+ if (SUB_CHAR_TABLE_P (val))
+ val = XSUB_CHAR_TABLE (val)->contents[c];
}
+ else
+ {
+ Lisp_Object table;
- /* Here, val is a sub char table. We return the default value of
- it. */
- return (dp->defalt);
+ XSETCHAR_TABLE (table, dp);
+ val = char_table_ref (table, c);
+ }
+ if (NILP (val))
+ val = dp->defalt;
+ return val;
}
@@ -14083,7 +14114,7 @@ extend_face_to_end_of_line (it)
ASCII face. This will be automatically undone the next time
get_next_display_element returns a multibyte character. Note
that the character will always be single byte in unibyte text. */
- if (!SINGLE_BYTE_CHAR_P (it->c))
+ if (!ASCII_CHAR_P (it->c))
{
it->face_id = FACE_FOR_CHAR (f, face, 0);
}
@@ -14191,7 +14222,7 @@ highlight_trailing_whitespace (f, row)
&& glyph->u.ch == ' '))
&& trailing_whitespace_p (glyph->charpos))
{
- int face_id = lookup_named_face (f, Qtrailing_whitespace, 0);
+ int face_id = lookup_named_face (f, Qtrailing_whitespace);
while (glyph >= start
&& BUFFERP (glyph->object)
@@ -15602,7 +15633,7 @@ decode_mode_spec_coding (coding_system, buf, eol_flag)
/* The EOL conversion we are using. */
Lisp_Object eoltype;
- val = Fget (coding_system, Qcoding_system);
+ val = CODING_SYSTEM_SPEC (coding_system);
eoltype = Qnil;
if (!VECTORP (val)) /* Not yet decided. */
@@ -15615,12 +15646,14 @@ decode_mode_spec_coding (coding_system, buf, eol_flag)
}
else
{
+ Lisp_Object attrs;
Lisp_Object eolvalue;
- eolvalue = Fget (coding_system, Qeol_type);
+ attrs = AREF (val, 0);
+ eolvalue = AREF (val, 2);
if (multibyte)
- *buf++ = XFASTINT (AREF (val, 1));
+ *buf++ = XFASTINT (CODING_ATTR_MNEMONIC (attrs));
if (eol_flag)
{
@@ -15630,10 +15663,10 @@ decode_mode_spec_coding (coding_system, buf, eol_flag)
eoltype = eol_mnemonic_undecided;
else if (VECTORP (eolvalue)) /* Not yet decided. */
eoltype = eol_mnemonic_undecided;
- else /* INTEGERP (eolvalue) -- 0:LF, 1:CRLF, 2:CR */
- eoltype = (XFASTINT (eolvalue) == 0
+ else /* eolvalue is Qunix, Qdos, or Qmac. */
+ eoltype = (EQ (eolvalue, Qunix)
? eol_mnemonic_unix
- : (XFASTINT (eolvalue) == 1
+ : (EQ (eolvalue, Qdos) == 1
? eol_mnemonic_dos : eol_mnemonic_mac));
}
}
@@ -15646,8 +15679,7 @@ decode_mode_spec_coding (coding_system, buf, eol_flag)
eol_str = SDATA (eoltype);
eol_str_len = SBYTES (eoltype);
}
- else if (INTEGERP (eoltype)
- && CHAR_VALID_P (XINT (eoltype), 0))
+ else if (CHARACTERP (eoltype))
{
unsigned char *tmp = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH);
eol_str_len = CHAR_STRING (XINT (eoltype), tmp);
@@ -15998,8 +16030,10 @@ decode_mode_spec (w, c, field_width, precision, multibyte)
{
/* No need to mention EOL here--the terminal never needs
to do EOL conversion. */
- p = decode_mode_spec_coding (keyboard_coding.symbol, p, 0);
- p = decode_mode_spec_coding (terminal_coding.symbol, p, 0);
+ p = decode_mode_spec_coding (CODING_ID_NAME (keyboard_coding.id),
+ p, 0);
+ p = decode_mode_spec_coding (CODING_ID_NAME (terminal_coding.id),
+ p, 0);
}
p = decode_mode_spec_coding (b->buffer_file_coding_system,
p, eol_flag);
@@ -16270,7 +16304,7 @@ display_string (string, lisp_string, face_string, face_string_pos,
}
break;
}
- else if (x + glyph->pixel_width > it->first_visible_x)
+ else if (x + glyph->pixel_width >= it->first_visible_x)
{
/* Glyph is at least partially visible. */
++it->hpos;
@@ -16573,24 +16607,25 @@ get_glyph_face_and_encoding (f, glyph, char2b, two_byte_p)
}
else
{
- int c1, c2, charset;
+ struct font_info *font_info
+ = FONT_INFO_FROM_ID (f, face->font_info_id);
+ if (font_info)
+ {
+ struct charset *charset = CHARSET_FROM_ID (font_info->charset);
+ unsigned code = ENCODE_CHAR (charset, glyph->u.ch);
- /* Split characters into bytes. If c2 is -1 afterwards, C is
- really a one-byte character so that byte1 is zero. */
- SPLIT_CHAR (glyph->u.ch, charset, c1, c2);
- if (c2 > 0)
- STORE_XCHAR2B (char2b, c1, c2);
- else
- STORE_XCHAR2B (char2b, 0, c1);
+ if (CHARSET_DIMENSION (charset) == 1)
+ STORE_XCHAR2B (char2b, 0, code);
+ else
+ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
- /* Maybe encode the character in *CHAR2B. */
- if (charset != CHARSET_ASCII)
- {
- struct font_info *font_info
- = FONT_INFO_FROM_ID (f, face->font_info_id);
- if (font_info)
- glyph->font_type
- = rif->encode_char (glyph->u.ch, char2b, font_info, two_byte_p);
+ /* Maybe encode the character in *CHAR2B. */
+ if (CHARSET_ID (charset) != charset_ascii)
+ {
+ glyph->font_type
+ = rif->encode_char (glyph->u.ch, char2b, font_info, charset,
+ two_byte_p);
+ }
}
}
@@ -16970,26 +17005,19 @@ get_char_face_and_encoding (f, c, face_id, char2b, multibyte_p, display_p)
/* Case of ASCII in a face known to fit ASCII. */
STORE_XCHAR2B (char2b, 0, c);
}
- else
+ else if (face->font != NULL)
{
- int c1, c2, charset;
+ struct font_info *font_info
+ = FONT_INFO_FROM_ID (f, face->font_info_id);
+ struct charset *charset = CHARSET_FROM_ID (font_info->charset);
+ unsigned code = ENCODE_CHAR (charset, c);
- /* Split characters into bytes. If c2 is -1 afterwards, C is
- really a one-byte character so that byte1 is zero. */
- SPLIT_CHAR (c, charset, c1, c2);
- if (c2 > 0)
- STORE_XCHAR2B (char2b, c1, c2);
+ if (CHARSET_DIMENSION (charset) == 1)
+ STORE_XCHAR2B (char2b, 0, code);
else
- STORE_XCHAR2B (char2b, 0, c1);
-
- /* Maybe encode the character in *CHAR2B. */
- if (face->font != NULL)
- {
- struct font_info *font_info
- = FONT_INFO_FROM_ID (f, face->font_info_id);
- if (font_info)
- rif->encode_char (c, char2b, font_info, 0);
- }
+ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
+ /* Maybe encode the character in *CHAR2B. */
+ rif->encode_char (c, char2b, font_info, charset, NULL);
}
/* Make sure X resources of the face are allocated. */
@@ -17831,20 +17859,13 @@ x_produce_glyphs (it)
/* Maybe translate single-byte characters to multibyte, or the
other way. */
it->char_to_display = it->c;
- if (!ASCII_BYTE_P (it->c))
+ if (!ASCII_BYTE_P (it->c)
+ && ! it->multibyte_p)
{
- if (unibyte_display_via_language_environment
- && SINGLE_BYTE_CHAR_P (it->c)
- && (it->c >= 0240
- || !NILP (Vnonascii_translation_table)))
- {
- it->char_to_display = unibyte_char_to_multibyte (it->c);
- it->multibyte_p = 1;
- it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display);
- face = FACE_FROM_ID (it->f, it->face_id);
- }
- else if (!SINGLE_BYTE_CHAR_P (it->c)
- && !it->multibyte_p)
+ if (SINGLE_BYTE_CHAR_P (it->c)
+ && unibyte_display_via_language_environment)
+ it->char_to_display = unibyte_char_to_multibyte (it->c);
+ if (! SINGLE_BYTE_CHAR_P (it->c))
{
it->multibyte_p = 1;
it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display);
@@ -18003,20 +18024,18 @@ x_produce_glyphs (it)
/* If we found a font, this font should give us the right
metrics. If we didn't find a font, use the frame's
- default font and calculate the width of the character
- from the charset width; this is what old redisplay code
- did. */
+ default font and calculate the width of the character by
+ multiplying the width of font by the width of the
+ character. */
pcm = rif->per_char_metric (font, &char2b,
FONT_TYPE_FOR_MULTIBYTE (font, it->c));
if (font_not_found_p || !pcm)
{
- int charset = CHAR_CHARSET (it->char_to_display);
-
it->glyph_not_available_p = 1;
it->pixel_width = (FRAME_COLUMN_WIDTH (it->f)
- * CHARSET_WIDTH (charset));
+ * CHAR_WIDTH (it->char_to_display));
it->phys_ascent = FONT_BASE (font) + boff;
it->phys_descent = FONT_DESCENT (font) - boff;
}
@@ -18079,10 +18098,7 @@ x_produce_glyphs (it)
/* Maybe translate single-byte characters to multibyte. */
it->char_to_display = it->c;
if (unibyte_display_via_language_environment
- && SINGLE_BYTE_CHAR_P (it->c)
- && (it->c >= 0240
- || (it->c >= 0200
- && !NILP (Vnonascii_translation_table))))
+ && it->c >= 0200)
{
it->char_to_display = unibyte_char_to_multibyte (it->c);
}
diff --git a/src/xfaces.c b/src/xfaces.c
index e296c52a2b8..91214977d8e 100644
--- a/src/xfaces.c
+++ b/src/xfaces.c
@@ -56,7 +56,7 @@ Boston, MA 02111-1307, USA. */
13. Whether or not a box should be drawn around characters, the box
type, and, for simple boxes, in what color.
- 14. Font or fontset pattern, or nil. This is a special attribute.
+ 14. Font pattern, or nil. This is a special attribute.
When this attribute is specified, the face uses a font opened by
that pattern as is. In addition, all the other font-related
attributes (1st thru 5th) are generated from the opened font name.
@@ -72,6 +72,8 @@ Boston, MA 02111-1307, USA. */
and is used to ensure that a font specified on the command line,
for example, can be matched exactly.
+ 17. A fontset name.
+
Faces are frame-local by nature because Emacs allows to define the
same named face (face names are symbols) differently for different
frames. Each frame has an alist of face definitions for all named
@@ -123,7 +125,7 @@ Boston, MA 02111-1307, USA. */
is realized, it inherits (thus shares) a fontset of an ASCII face
that has the same attributes other than font-related ones.
- Thus, all realized face have a realized fontset.
+ Thus, all realized faces have a realized fontset.
Unibyte text.
@@ -196,6 +198,7 @@ Boston, MA 02111-1307, USA. */
#include <sys/stat.h>
#include "lisp.h"
+#include "character.h"
#include "charset.h"
#include "keyboard.h"
#include "frame.h"
@@ -300,6 +303,7 @@ Lisp_Object QCinverse_video, QCforeground, QCbackground, QCstipple;
Lisp_Object QCwidth, QCfont, QCbold, QCitalic;
Lisp_Object QCreverse_video;
Lisp_Object QCoverline, QCstrike_through, QCbox, QCinherit;
+Lisp_Object QCfontset;
/* Symbols used for attribute values. */
@@ -476,7 +480,7 @@ static int load_pixmap P_ ((struct frame *, Lisp_Object, unsigned *, unsigned *)
static unsigned char *xstrlwr P_ ((unsigned char *));
static void signal_error P_ ((char *, Lisp_Object));
static struct frame *frame_or_selected_frame P_ ((Lisp_Object, int));
-static void load_face_font P_ ((struct frame *, struct face *, int));
+static void load_face_font P_ ((struct frame *, struct face *));
static void load_face_colors P_ ((struct frame *, struct face *, Lisp_Object *));
static void free_face_colors P_ ((struct frame *, struct face *));
static int face_color_gray_p P_ ((struct frame *, char *));
@@ -489,18 +493,17 @@ static int font_list_1 P_ ((struct frame *, Lisp_Object, Lisp_Object,
Lisp_Object, struct font_name **));
static int font_list P_ ((struct frame *, Lisp_Object, Lisp_Object,
Lisp_Object, struct font_name **));
-static int try_font_list P_ ((struct frame *, Lisp_Object *,
- Lisp_Object, Lisp_Object, struct font_name **,
- int));
+static int try_font_list P_ ((struct frame *, Lisp_Object,
+ Lisp_Object, Lisp_Object, struct font_name **));
static int try_alternative_families P_ ((struct frame *f, Lisp_Object,
Lisp_Object, struct font_name **));
static int cmp_font_names P_ ((const void *, const void *));
-static struct face *realize_face P_ ((struct face_cache *, Lisp_Object *, int,
- struct face *, int));
-static struct face *realize_x_face P_ ((struct face_cache *,
- Lisp_Object *, int, struct face *));
-static struct face *realize_tty_face P_ ((struct face_cache *,
- Lisp_Object *, int));
+static struct face *realize_face P_ ((struct face_cache *, Lisp_Object *,
+ int));
+static struct face *realize_non_ascii_face P_ ((struct frame *, int,
+ struct face *));
+static struct face *realize_x_face P_ ((struct face_cache *, Lisp_Object *));
+static struct face *realize_tty_face P_ ((struct face_cache *, Lisp_Object *));
static int realize_basic_faces P_ ((struct frame *));
static int realize_default_face P_ ((struct frame *));
static void realize_named_face P_ ((struct frame *, Lisp_Object, int));
@@ -510,14 +513,12 @@ static unsigned hash_string_case_insensitive P_ ((Lisp_Object));
static unsigned lface_hash P_ ((Lisp_Object *));
static int lface_same_font_attributes_p P_ ((Lisp_Object *, Lisp_Object *));
static struct face_cache *make_face_cache P_ ((struct frame *));
-static void free_realized_face P_ ((struct frame *, struct face *));
static void clear_face_gcs P_ ((struct face_cache *));
static void free_face_cache P_ ((struct face_cache *));
static int face_numeric_weight P_ ((Lisp_Object));
static int face_numeric_slant P_ ((Lisp_Object));
static int face_numeric_swidth P_ ((Lisp_Object));
static int face_fontset P_ ((Lisp_Object *));
-static char *choose_face_font P_ ((struct frame *, Lisp_Object *, int, int, int*));
static void merge_face_vectors P_ ((struct frame *, Lisp_Object *, Lisp_Object*, Lisp_Object));
static void merge_face_inheritance P_ ((struct frame *f, Lisp_Object,
Lisp_Object *, Lisp_Object));
@@ -527,7 +528,6 @@ static int set_lface_from_font_name P_ ((struct frame *, Lisp_Object,
Lisp_Object, int, int));
static Lisp_Object lface_from_face_name P_ ((struct frame *, Lisp_Object, int));
static struct face *make_realized_face P_ ((Lisp_Object *));
-static void free_realized_faces P_ ((struct face_cache *));
static char *best_matching_font P_ ((struct frame *, Lisp_Object *,
struct font_name *, int, int, int *));
static void cache_face P_ ((struct face_cache *, struct face *, unsigned));
@@ -1216,15 +1216,13 @@ load_pixmap (f, name, w_ptr, h_ptr)
#ifdef HAVE_WINDOW_SYSTEM
-/* Load font of face FACE which is used on frame F to display
- character C. The name of the font to load is determined by lface
- and fontset of FACE. */
+/* Load font of face FACE which is used on frame F to display ASCII
+ characters. The name of the font to load is determined by lface. */
static void
-load_face_font (f, face, c)
+load_face_font (f, face)
struct frame *f;
struct face *face;
- int c;
{
struct font_info *font_info = NULL;
char *font_name;
@@ -1232,14 +1230,14 @@ load_face_font (f, face, c)
face->font_info_id = -1;
face->font = NULL;
+ face->font_name = NULL;
- font_name = choose_face_font (f, face->lface, face->fontset, c,
- &needs_overstrike);
+ font_name = choose_face_font (f, face->lface, Qnil, &needs_overstrike);
if (!font_name)
return;
BLOCK_INPUT;
- font_info = FS_LOAD_FACE_FONT (f, c, font_name, face);
+ font_info = FS_LOAD_FONT (f, font_name);
UNBLOCK_INPUT;
if (font_info)
@@ -2151,6 +2149,62 @@ face_numeric_swidth (width)
return face_numeric_value (swidth_table, DIM (swidth_table), width);
}
+Lisp_Object
+split_font_name_into_vector (fontname)
+ Lisp_Object fontname;
+{
+ struct font_name font;
+ Lisp_Object vec;
+ int i;
+
+ font.name = LSTRDUPA (fontname);
+ if (! split_font_name (NULL, &font, 0))
+ return Qnil;
+ vec = Fmake_vector (make_number (XLFD_LAST), Qnil);
+ for (i = 0; i < XLFD_LAST; i++)
+ if (font.fields[i][0] != '*')
+ ASET (vec, i, build_string (font.fields[i]));
+ return vec;
+}
+
+Lisp_Object
+build_font_name_from_vector (vec)
+ Lisp_Object vec;
+{
+ struct font_name font;
+ Lisp_Object fontname;
+ char *p;
+ int i;
+
+ for (i = 0; i < XLFD_LAST; i++)
+ {
+ font.fields[i] = (NILP (AREF (vec, i))
+ ? "*" : (char *) SDATA (AREF (vec, i)));
+ if ((i == XLFD_FAMILY || i == XLFD_REGISTRY)
+ && (p = strchr (font.fields[i], '-')))
+ {
+ char *p1 = STRDUPA (font.fields[i]);
+
+ p1[p - font.fields[i]] = '\0';
+ if (i == XLFD_FAMILY)
+ {
+ font.fields[XLFD_FOUNDRY] = p1;
+ font.fields[XLFD_FAMILY] = p + 1;
+ }
+ else
+ {
+ font.fields[XLFD_REGISTRY] = p1;
+ font.fields[XLFD_ENCODING] = p + 1;
+ break;
+ }
+ }
+ }
+
+ p = build_font_name (&font);
+ fontname = build_string (p);
+ xfree (p);
+ return fontname;
+}
#ifdef HAVE_WINDOW_SYSTEM
@@ -2252,7 +2306,7 @@ static double
font_rescale_ratio (name)
char *name;
{
- Lisp_Object tail, elt;
+ Lisp_Object tail, elt;
for (tail = Vface_font_rescale_alist; CONSP (tail); tail = XCDR (tail))
{
@@ -2465,7 +2519,7 @@ x_face_list_fonts (f, pattern, pfonts, nfonts, try_alternatives_p)
if (nfonts < 0 && CONSP (lfonts))
num_fonts = XFASTINT (Flength (lfonts));
-
+
/* Make a copy of the font names we got from X, and
split them into fields. */
n = nignored = 0;
@@ -2638,12 +2692,12 @@ cmp_font_names (a, b)
}
-/* Get a sorted list of fonts of family FAMILY on frame F. If PATTERN
- is non-nil list fonts matching that pattern. Otherwise, if
- REGISTRY is non-nil return only fonts with that registry, otherwise
- return fonts of any registry. Set *FONTS to a vector of font_name
- structures allocated from the heap containing the fonts found.
- Value is the number of fonts found. */
+/* Get a sorted list of fonts matching PATTERN on frame F. If PATTERN
+ is nil, list fonts matching FAMILY and REGISTRY. FAMILY is a
+ family name string or nil. REGISTRY is a registry name string.
+ Set *FONTS to a vector of font_name structures allocated from the
+ heap containing the fonts found. Value is the number of fonts
+ found. */
static int
font_list_1 (f, pattern, family, registry, fonts)
@@ -2704,10 +2758,11 @@ concat_font_list (fonts1, nfonts1, fonts2, nfonts2)
/* Get a sorted list of fonts of family FAMILY on frame F.
- If PATTERN is non-nil list fonts matching that pattern.
+ If PATTERN is non-nil, list fonts matching that pattern.
- If REGISTRY is non-nil, return fonts with that registry and the
- alternative registries from Vface_alternative_font_registry_alist.
+ If REGISTRY is non-nil, it is a list of registry (and encoding)
+ names. Return fonts with those registries and the alternative
+ registries from Vface_alternative_font_registry_alist.
If REGISTRY is nil return fonts of any registry.
@@ -2721,35 +2776,37 @@ font_list (f, pattern, family, registry, fonts)
Lisp_Object pattern, family, registry;
struct font_name **fonts;
{
- int nfonts = font_list_1 (f, pattern, family, registry, fonts);
+ int nfonts;
+ int reg_prio;
+ int i;
+
+ if (NILP (registry))
+ return font_list_1 (f, pattern, family, registry, fonts);
- if (!NILP (registry)
- && CONSP (Vface_alternative_font_registry_alist))
+ for (reg_prio = 0, nfonts = 0; CONSP (registry); registry = XCDR (registry))
{
- Lisp_Object alter;
+ Lisp_Object elt, alter;
+ int nfonts2;
+ struct font_name *fonts2;
- alter = Fassoc (registry, Vface_alternative_font_registry_alist);
- if (CONSP (alter))
+ elt = XCAR (registry);
+ alter = Fassoc (elt, Vface_alternative_font_registry_alist);
+ if (NILP (alter))
+ alter = Fcons (elt, Qnil);
+ for (; CONSP (alter); alter = XCDR (alter), reg_prio++)
{
- int reg_prio, i;
-
- for (alter = XCDR (alter), reg_prio = 1;
- CONSP (alter);
- alter = XCDR (alter), reg_prio++)
- if (STRINGP (XCAR (alter)))
- {
- int nfonts2;
- struct font_name *fonts2;
-
- nfonts2 = font_list_1 (f, pattern, family, XCAR (alter),
- &fonts2);
+ nfonts2 = font_list_1 (f, pattern, family, XCAR (alter), &fonts2);
+ if (nfonts2 > 0)
+ {
+ if (reg_prio > 0)
for (i = 0; i < nfonts2; i++)
fonts2[i].registry_priority = reg_prio;
- *fonts = (nfonts > 0
- ? concat_font_list (*fonts, nfonts, fonts2, nfonts2)
- : fonts2);
- nfonts += nfonts2;
- }
+ if (nfonts > 0)
+ *fonts = concat_font_list (*fonts, nfonts, fonts2, nfonts2);
+ else
+ *fonts = fonts2;
+ nfonts += nfonts2;
+ }
}
}
@@ -2931,7 +2988,7 @@ the WIDTH times as wide as FACE on FRAME. */)
{
/* This is of limited utility since it works with character
widths. Keep it for compatibility. --gerd. */
- int face_id = lookup_named_face (f, face, 0);
+ int face_id = lookup_named_face (f, face);
struct face *face = (face_id < 0
? NULL
: FACE_FROM_ID (f, face_id));
@@ -2990,6 +3047,7 @@ the WIDTH times as wide as FACE on FRAME. */)
#define LFACE_FONT(LFACE) AREF ((LFACE), LFACE_FONT_INDEX)
#define LFACE_INHERIT(LFACE) AREF ((LFACE), LFACE_INHERIT_INDEX)
#define LFACE_AVGWIDTH(LFACE) AREF ((LFACE), LFACE_AVGWIDTH_INDEX)
+#define LFACE_FONTSET(LFACE) AREF ((LFACE), LFACE_FONTSET_INDEX)
/* Non-zero if LFACE is a Lisp face. A Lisp face is a vector of size
LFACE_VECTOR_SIZE which has the symbol `face' in slot 0. */
@@ -3053,6 +3111,8 @@ check_lface_attrs (attrs)
xassert (UNSPECIFIEDP (attrs[LFACE_FONT_INDEX])
|| NILP (attrs[LFACE_FONT_INDEX])
|| STRINGP (attrs[LFACE_FONT_INDEX]));
+ xassert (UNSPECIFIEDP (attrs[LFACE_FONTSET_INDEX])
+ || STRINGP (attrs[LFACE_FONTSET_INDEX]));
#endif
}
@@ -3179,7 +3239,7 @@ lface_fully_specified_p (attrs)
for (i = 1; i < LFACE_VECTOR_SIZE; ++i)
if (i != LFACE_FONT_INDEX && i != LFACE_INHERIT_INDEX
- && i != LFACE_AVGWIDTH_INDEX)
+ && i != LFACE_AVGWIDTH_INDEX && i != LFACE_FONTSET_INDEX)
if (UNSPECIFIEDP (attrs[i]))
break;
@@ -3217,8 +3277,14 @@ set_lface_from_font_name (f, lface, fontname, force_p, may_fail_p)
/* If FONTNAME is actually a fontset name, get ASCII font name of it. */
fontset = fs_query_fontset (fontname, 0);
- if (fontset >= 0)
+ if (fontset > 0)
font_name = SDATA (fontset_ascii (fontset));
+ else if (fontset == 0)
+ {
+ if (may_fail_p)
+ return 0;
+ abort ();
+ }
/* Check if FONT_NAME is surely available on the system. Usually
FONT_NAME is already cached for the frame F and FS_LOAD_FONT
@@ -3226,7 +3292,7 @@ set_lface_from_font_name (f, lface, fontname, force_p, may_fail_p)
caching it now is not futail because we anyway load the font
later. */
BLOCK_INPUT;
- font_info = FS_LOAD_FONT (f, 0, font_name, -1);
+ font_info = FS_LOAD_FONT (f, font_name);
UNBLOCK_INPUT;
if (!font_info)
@@ -3288,8 +3354,13 @@ set_lface_from_font_name (f, lface, fontname, force_p, may_fail_p)
LFACE_SLANT (lface)
= have_xlfd_p ? xlfd_symbolic_slant (&font) : Qnormal;
- LFACE_FONT (lface) = fontname;
-
+ if (fontset)
+ {
+ LFACE_FONT (lface) = build_string (font_info->full_name);
+ LFACE_FONTSET (lface) = fontset_name (fontset);
+ }
+ else
+ LFACE_FONT (lface) = fontname;
return 1;
}
@@ -4126,7 +4197,7 @@ FRAME 0 means change the face on all frames, and change the default
LFACE_SWIDTH (lface) = value;
font_related_attr_p = 1;
}
- else if (EQ (attr, QCfont))
+ else if (EQ (attr, QCfont) || EQ (attr, QCfontset))
{
#ifdef HAVE_WINDOW_SYSTEM
if (EQ (frame, Qt) || FRAME_WINDOW_P (XFRAME (frame)))
@@ -4150,9 +4221,16 @@ FRAME 0 means change the face on all frames, and change the default
tmp = Fquery_fontset (value, Qnil);
if (!NILP (tmp))
value = tmp;
+ else if (EQ (attr, QCfontset))
+ signal_error ("Invalid fontset name", value);
- if (!set_lface_from_font_name (f, lface, value, 1, 1))
- signal_error ("Invalid font or fontset name", value);
+ if (EQ (attr, QCfont))
+ {
+ if (!set_lface_from_font_name (f, lface, value, 1, 1))
+ signal_error ("Invalid font or fontset name", value);
+ }
+ else
+ LFACE_FONTSET (lface) = value;
}
font_attr_p = 1;
@@ -4203,6 +4281,7 @@ FRAME 0 means change the face on all frames, and change the default
init_iterator will then free realized faces. */
if (!EQ (frame, Qt)
&& (EQ (attr, QCfont)
+ || EQ (attr, QCfontset)
|| NILP (Fequal (old_value, value))))
{
++face_change_count;
@@ -4310,7 +4389,7 @@ FRAME 0 means change the face on all frames, and change the default
#ifdef HAVE_WINDOW_SYSTEM
/* Set the `font' frame parameter of FRAME determined from `default'
- face attributes LFACE. If a face or fontset name is explicitely
+ face attributes LFACE. If a font name is explicitely
specfied in LFACE, use it as is. Otherwise, determine a font name
from the other font-related atrributes of LFACE. In that case, if
there's no matching font, signals an error. */
@@ -4333,7 +4412,7 @@ set_font_frame_param (frame, lface)
/* Choose a font name that reflects LFACE's attributes and has
the registry and encoding pattern specified in the default
fontset (3rd arg: -1) for ASCII characters (4th arg: 0). */
- font = choose_face_font (f, XVECTOR (lface)->contents, -1, 0, 0);
+ font = choose_face_font (f, XVECTOR (lface)->contents, Qnil, NULL);
if (!font)
error ("No font matches the specified attribute");
font_name = build_string (font);
@@ -4683,6 +4762,8 @@ frames). If FRAME is omitted or nil, use the selected frame. */)
value = LFACE_INHERIT (lface);
else if (EQ (keyword, QCfont))
value = LFACE_FONT (lface);
+ else if (EQ (keyword, QCfontset))
+ value = LFACE_FONTSET (lface);
else
signal_error ("Invalid face attribute name", keyword);
@@ -4807,7 +4888,7 @@ If FRAME is omitted or nil, use the selected frame. */)
else
{
struct frame *f = frame_or_selected_frame (frame, 1);
- int face_id = lookup_named_face (f, face, 0);
+ int face_id = lookup_named_face (f, face);
struct face *face = FACE_FROM_ID (f, face_id);
return face ? build_string (face->font_name) : Qnil;
}
@@ -4969,8 +5050,8 @@ lface_hash (v)
/* Return non-zero if LFACE1 and LFACE2 specify the same font (without
considering charsets/registries). They do if they specify the same
- family, point size, weight, width, slant, and fontset. Both LFACE1
- and LFACE2 must be fully-specified. */
+ family, point size, weight, width, slant, font, and fontset. Both
+ LFACE1 and LFACE2 must be fully-specified. */
static INLINE int
lface_same_font_attributes_p (lface1, lface2)
@@ -4988,8 +5069,14 @@ lface_same_font_attributes_p (lface1, lface2)
&& (EQ (lface1[LFACE_FONT_INDEX], lface2[LFACE_FONT_INDEX])
|| (STRINGP (lface1[LFACE_FONT_INDEX])
&& STRINGP (lface2[LFACE_FONT_INDEX])
- && xstricmp (SDATA (lface1[LFACE_FONT_INDEX]),
- SDATA (lface2[LFACE_FONT_INDEX])))));
+ && ! xstricmp (SDATA (lface1[LFACE_FONT_INDEX]),
+ SDATA (lface2[LFACE_FONT_INDEX]))))
+ && (EQ (lface1[LFACE_FONTSET_INDEX], lface2[LFACE_FONTSET_INDEX])
+ || (STRINGP (lface1[LFACE_FONTSET_INDEX])
+ && STRINGP (lface2[LFACE_FONTSET_INDEX])
+ && ! xstricmp (SDATA (lface1[LFACE_FONTSET_INDEX]),
+ SDATA (lface2[LFACE_FONTSET_INDEX]))))
+ );
}
@@ -5016,7 +5103,7 @@ make_realized_face (attr)
/* Free realized face FACE, including its X resources. FACE may
be null. */
-static void
+void
free_realized_face (f, face)
struct frame *f;
struct face *face;
@@ -5437,11 +5524,10 @@ free_realized_faces (c)
}
-/* Free all faces realized for multibyte characters on frame F that
- has FONTSET. */
+/* Free all realized faces that are using FONTSET on frame F. */
void
-free_realized_multibyte_face (f, fontset)
+free_realized_faces_for_fontset (f, fontset)
struct frame *f;
int fontset;
{
@@ -5458,7 +5544,6 @@ free_realized_multibyte_face (f, fontset)
{
face = cache->faces_by_id[i];
if (face
- && face != face->ascii_face
&& face->fontset == fontset)
{
uncache_face (cache, face);
@@ -5516,10 +5601,11 @@ free_face_cache (c)
/* Cache realized face FACE in face cache C. HASH is the hash value
- of FACE. If FACE->fontset >= 0, add the new face to the end of the
- collision list of the face hash table of C. This is done because
- otherwise lookup_face would find FACE for every character, even if
- faces with the same attributes but for specific characters exist. */
+ of FACE. If FACE is for ASCII characters (i.e. FACE->ascii_face ==
+ FACE), insert the new face to the beginning of the collision list
+ of the face hash table of C. Otherwise, add the new face to the
+ end of the collision list. This way, lookup_face can quickly find
+ that a requested face is not cached. */
static void
cache_face (c, face, hash)
@@ -5531,7 +5617,7 @@ cache_face (c, face, hash)
face->hash = hash;
- if (face->fontset >= 0)
+ if (face->ascii_face != face)
{
struct face *last = c->buckets[i];
if (last)
@@ -5618,17 +5704,14 @@ uncache_face (c, face)
/* Look up a realized face with face attributes ATTR in the face cache
- of frame F. The face will be used to display character C. Value
- is the ID of the face found. If no suitable face is found, realize
- a new one. In that case, if C is a multibyte character, BASE_FACE
- is a face that has the same attributes. */
+ of frame F. The face will be used to display ASCII characters.
+ Value is the ID of the face found. If no suitable face is found,
+ realize a new one. */
INLINE int
-lookup_face (f, attr, c, base_face)
+lookup_face (f, attr)
struct frame *f;
Lisp_Object *attr;
- int c;
- struct face *base_face;
{
struct face_cache *cache = FRAME_FACE_CACHE (f);
unsigned hash;
@@ -5643,44 +5726,82 @@ lookup_face (f, attr, c, base_face)
i = hash % FACE_CACHE_BUCKETS_SIZE;
for (face = cache->buckets[i]; face; face = face->next)
- if (face->hash == hash
- && (!FRAME_WINDOW_P (f)
- || FACE_SUITABLE_FOR_CHAR_P (face, c))
- && lface_equal_p (face->lface, attr))
- break;
+ {
+ if (face->ascii_face != face)
+ {
+ /* There's no more ASCII face. */
+ face = NULL;
+ break;
+ }
+ if (face->hash == hash
+ && lface_equal_p (face->lface, attr))
+ break;
+ }
/* If not found, realize a new face. */
if (face == NULL)
- face = realize_face (cache, attr, c, base_face, -1);
+ face = realize_face (cache, attr, -1);
#if GLYPH_DEBUG
xassert (face == FACE_FROM_ID (f, face->id));
-
-/* When this function is called from face_for_char (in this case, C is
- a multibyte character), a fontset of a face returned by
- realize_face is not yet set, i.e. FACE_SUITABLE_FOR_CHAR_P (FACE,
- C) is not sutisfied. The fontset is set for this face by
- face_for_char later. */
-#if 0
- if (FRAME_WINDOW_P (f))
- xassert (FACE_SUITABLE_FOR_CHAR_P (face, c));
-#endif
#endif /* GLYPH_DEBUG */
return face->id;
}
+/* Look up a realized face that has the same attributes as BASE_FACE
+ except for the font in the face cache of frame F. If FONT_ID is
+ not negative, it is an ID number of an already opened font that is
+ used by the face. If FONT_ID is negative, the face has no font.
+ Value is the ID of the face found. If no suitable face is found,
+ realize a new one. */
+
+int
+lookup_non_ascii_face (f, font_id, base_face)
+ struct frame *f;
+ int font_id;
+ struct face *base_face;
+{
+ struct face_cache *cache = FRAME_FACE_CACHE (f);
+ unsigned hash;
+ int i;
+ struct face *face;
+
+ xassert (cache != NULL);
+ base_face = base_face->ascii_face;
+ hash = lface_hash (base_face->lface);
+ i = hash % FACE_CACHE_BUCKETS_SIZE;
+
+ for (face = cache->buckets[i]; face; face = face->next)
+ {
+ if (face->ascii_face == face)
+ continue;
+ if (face->ascii_face == base_face
+ && face->font_info_id == font_id)
+ break;
+ }
+
+ /* If not found, realize a new face. */
+ if (face == NULL)
+ face = realize_non_ascii_face (f, font_id, base_face);
+
+#if GLYPH_DEBUG
+ xassert (face == FACE_FROM_ID (f, face->id));
+#endif /* GLYPH_DEBUG */
+
+ return face->id;
+}
+
/* Return the face id of the realized face for named face SYMBOL on
- frame F suitable for displaying character C. Value is -1 if the
- face couldn't be determined, which might happen if the default face
- isn't realized and cannot be realized. */
+ frame F suitable for displaying ASCII characters. Value is -1 if
+ the face couldn't be determined, which might happen if the default
+ face isn't realized and cannot be realized. */
int
-lookup_named_face (f, symbol, c)
+lookup_named_face (f, symbol)
struct frame *f;
Lisp_Object symbol;
- int c;
{
Lisp_Object attrs[LFACE_VECTOR_SIZE];
Lisp_Object symbol_attrs[LFACE_VECTOR_SIZE];
@@ -5696,7 +5817,7 @@ lookup_named_face (f, symbol, c)
get_lface_attributes (f, symbol, symbol_attrs, 1);
bcopy (default_face->lface, attrs, sizeof attrs);
merge_face_vectors (f, symbol_attrs, attrs, Qnil);
- return lookup_face (f, attrs, c, NULL);
+ return lookup_face (f, attrs);
}
@@ -5713,7 +5834,7 @@ ascii_face_of_lisp_face (f, lface_id)
if (lface_id >= 0 && lface_id < lface_id_to_name_size)
{
Lisp_Object face_name = lface_id_to_name[lface_id];
- face_id = lookup_named_face (f, face_name, 0);
+ face_id = lookup_named_face (f, face_name);
}
else
face_id = -1;
@@ -5761,7 +5882,7 @@ smaller_face (f, face_id, steps)
/* Look up a face for a slightly smaller/larger font. */
pt += delta;
attrs[LFACE_HEIGHT_INDEX] = make_number (pt);
- new_face_id = lookup_face (f, attrs, 0, NULL);
+ new_face_id = lookup_face (f, attrs);
new_face = FACE_FROM_ID (f, new_face_id);
/* If height changes, count that as one step. */
@@ -5804,7 +5925,7 @@ face_with_height (f, face_id, height)
face = FACE_FROM_ID (f, face_id);
bcopy (face->lface, attrs, sizeof attrs);
attrs[LFACE_HEIGHT_INDEX] = make_number (height);
- face_id = lookup_face (f, attrs, 0, NULL);
+ face_id = lookup_face (f, attrs);
#endif /* HAVE_WINDOW_SYSTEM */
return face_id;
@@ -5812,17 +5933,16 @@ face_with_height (f, face_id, height)
/* Return the face id of the realized face for named face SYMBOL on
- frame F suitable for displaying character C, and use attributes of
- the face FACE_ID for attributes that aren't completely specified by
- SYMBOL. This is like lookup_named_face, except that the default
- attributes come from FACE_ID, not from the default face. FACE_ID
- is assumed to be already realized. */
+ frame F suitable for displaying ASCII characters, and use
+ attributes of the face FACE_ID for attributes that aren't
+ completely specified by SYMBOL. This is like lookup_named_face,
+ except that the default attributes come from FACE_ID, not from the
+ default face. FACE_ID is assumed to be already realized. */
int
-lookup_derived_face (f, symbol, c, face_id)
+lookup_derived_face (f, symbol, face_id)
struct frame *f;
Lisp_Object symbol;
- int c;
int face_id;
{
Lisp_Object attrs[LFACE_VECTOR_SIZE];
@@ -5835,7 +5955,7 @@ lookup_derived_face (f, symbol, c, face_id)
get_lface_attributes (f, symbol, symbol_attrs, 1);
bcopy (default_face->lface, attrs, sizeof attrs);
merge_face_vectors (f, symbol_attrs, attrs, Qnil);
- return lookup_face (f, attrs, c, default_face);
+ return lookup_face (f, attrs);
}
DEFUN ("face-attributes-as-vector", Fface_attributes_as_vector,
@@ -5859,7 +5979,7 @@ DEFUN ("face-attributes-as-vector", Fface_attributes_as_vector,
Font selection
***********************************************************************/
-DEFUN ("internal-set-font-selection-order",
+ DEFUN ("internal-set-font-selection-order",
Finternal_set_font_selection_order,
Sinternal_set_font_selection_order, 1, 1, 0,
doc: /* Set font selection order for face font selection to ORDER.
@@ -6363,69 +6483,53 @@ try_alternative_families (f, family, registry, fonts)
/* Get a list of matching fonts on frame F.
- FAMILY, if a string, specifies a font family derived from the fontset.
- It is only used if the face does not specify any family in ATTRS or
- if we cannot find any font of the face's family.
+ PATTERN, if a string, specifies a font name pattern to match while
+ ignoring FAMILY and REGISTRY.
- REGISTRY, if a string, specifies a font registry and encoding to
- match. A value of nil means include fonts of any registry and
- encoding.
+ FAMILY, if a list, specifies a list of font families to try.
- If PREFER_FACE_FAMILY is nonzero, perfer face's family to FAMILY.
- Otherwise, prefer FAMILY.
+ REGISTRY, if a list, specifies a list of font registries and
+ encodinging to try.
Return in *FONTS a pointer to a vector of font_name structures for
the fonts matched. Value is the number of fonts found. */
static int
-try_font_list (f, attrs, family, registry, fonts, prefer_face_family)
+try_font_list (f, pattern, family, registry, fonts)
struct frame *f;
- Lisp_Object *attrs;
- Lisp_Object family, registry;
+ Lisp_Object pattern, family, registry;
struct font_name **fonts;
- int prefer_face_family;
{
int nfonts = 0;
- Lisp_Object face_family = attrs[LFACE_FAMILY_INDEX];
- Lisp_Object try_family;
-
- try_family = (prefer_face_family || NILP (family)) ? face_family : family;
-
- if (STRINGP (try_family))
- nfonts = try_alternative_families (f, try_family, registry, fonts);
-#ifdef MAC_OS
- /* When realizing the default face and a font spec does not matched
- exactly, Emacs looks for ones with the same registry as the
- default font. On the Mac, this is mac-roman, which does not work
- if the family is -etl-fixed, e.g. The following widens the
- choices and fixes that problem. */
- if (nfonts == 0 && STRINGP (try_family) && STRINGP (registry)
- && xstricmp (SDATA (registry), "mac-roman") == 0)
- nfonts = try_alternative_families (f, try_family, Qnil, fonts);
-#endif
+ if (STRINGP (pattern))
+ nfonts = font_list (f, pattern, Qnil, Qnil, fonts);
+ else
+ {
+ Lisp_Object tail;
- if (EQ (try_family, family))
- family = face_family;
+ if (NILP (family))
+ nfonts = font_list (f, Qnil, Qnil, registry, fonts);
+ else
+ for (tail = family; ! nfonts && CONSP (tail); tail = XCDR (tail))
+ nfonts = try_alternative_families (f, XCAR (tail), registry, fonts);
- if (nfonts == 0 && STRINGP (family))
- nfonts = try_alternative_families (f, family, registry, fonts);
+ /* Try font family of the default face or "fixed". */
+ if (nfonts == 0 && !NILP (family))
+ {
+ struct face *default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
+ if (default_face)
+ family = default_face->lface[LFACE_FAMILY_INDEX];
+ else
+ family = build_string ("fixed");
+ nfonts = try_alternative_families (f, family, registry, fonts);
+ }
- /* Try font family of the default face or "fixed". */
- if (nfonts == 0)
- {
- struct face *default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
- if (default_face)
- family = default_face->lface[LFACE_FAMILY_INDEX];
- else
- family = build_string ("fixed");
- nfonts = font_list (f, Qnil, family, registry, fonts);
+ /* Try any family with the given registry. */
+ if (nfonts == 0 && !NILP (family))
+ nfonts = try_alternative_families (f, Qnil, registry, fonts);
}
- /* Try any family with the given registry. */
- if (nfonts == 0)
- nfonts = font_list (f, Qnil, Qnil, registry, fonts);
-
return nfonts;
}
@@ -6440,63 +6544,108 @@ face_fontset (attrs)
{
Lisp_Object name;
- name = attrs[LFACE_FONT_INDEX];
+ name = attrs[LFACE_FONTSET_INDEX];
if (!STRINGP (name))
return -1;
return fs_query_fontset (name, 0);
}
-/* Choose a name of font to use on frame F to display character C with
+/* Choose a name of font to use on frame F to display characters with
Lisp face attributes specified by ATTRS. The font name is
- determined by the font-related attributes in ATTRS and the name
- pattern for C in FONTSET. Value is the font name which is
- allocated from the heap and must be freed by the caller, or NULL if
- we can get no information about the font name of C. It is assured
- that we always get some information for a single byte
- character.
+ determined by the font-related attributes in ATTRS and FONT-SPEC
+ (if specified).
- If NEEDS_OVERSTRIKE is non-zero, a boolean is returned in it to
- indicate whether the resulting font should be drawn using overstrike
- to simulate bold-face. */
+ When we are choosing a font for ASCII characters, FONT-SPEC is
+ always nil. Otherwise FONT-SPEC is a list
+ [ FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY ]
+ or a string specifying a font name pattern.
-static char *
-choose_face_font (f, attrs, fontset, c, needs_overstrike)
+ If NEEDS_OVERSTRIKE is not NULL, a boolean is returned in it to
+ indicate whether the resulting font should be drawn using
+ overstrike to simulate bold-face.
+
+ Value is the font name which is allocated from the heap and must be
+ freed by the caller. */
+
+char *
+choose_face_font (f, attrs, font_spec, needs_overstrike)
struct frame *f;
Lisp_Object *attrs;
- int fontset, c;
+ Lisp_Object font_spec;
int *needs_overstrike;
{
- Lisp_Object pattern;
+ Lisp_Object pattern, family, adstyle, registry;
char *font_name = NULL;
struct font_name *fonts;
- int nfonts, width_ratio;
+ int nfonts;
if (needs_overstrike)
*needs_overstrike = 0;
- /* Get (foundry and) family name and registry (and encoding) name of
- a font for C. */
- pattern = fontset_font_pattern (f, fontset, c);
- if (NILP (pattern))
+ /* If we are choosing an ASCII font and a font name is explicitly
+ specified in ATTRS, return it. */
+ if (NILP (font_spec) && STRINGP (attrs[LFACE_FONT_INDEX]))
+ return xstrdup (SDATA (attrs[LFACE_FONT_INDEX]));
+
+ if (NILP (attrs[LFACE_FAMILY_INDEX]))
+ family = Qnil;
+ else
+ family = Fcons (attrs[LFACE_FAMILY_INDEX], Qnil);
+
+ /* Decide FAMILY, ADSTYLE, and REGISTRY from FONT_SPEC. But,
+ ADSTYLE is not used in the font selector for the moment. */
+ if (VECTORP (font_spec))
{
- xassert (!SINGLE_BYTE_CHAR_P (c));
- return NULL;
+ pattern = Qnil;
+ if (STRINGP (AREF (font_spec, FONT_SPEC_FAMILY_INDEX)))
+ family = Fcons (AREF (font_spec, FONT_SPEC_FAMILY_INDEX), family);
+ adstyle = AREF (font_spec, FONT_SPEC_ADSTYLE_INDEX);
+ registry = Fcons (AREF (font_spec, FONT_SPEC_REGISTRY_INDEX), Qnil);
+ }
+ else if (STRINGP (font_spec))
+ {
+ pattern = font_spec;
+ family = Qnil;
+ adstyle = Qnil;
+ registry = Qnil;
+ }
+ else
+ {
+ /* We are choosing an ASCII font. By default, use the registry
+ name "iso8859-1". But, if the registry name of the ASCII
+ font specified in the fontset of ATTRS is not "iso8859-1"
+ (e.g "iso10646-1"), use also that name with higher
+ priority. */
+ int fontset = face_fontset (attrs);
+ Lisp_Object ascii;
+ int len;
+ struct font_name font;
+
+ pattern = Qnil;
+ adstyle = Qnil;
+ registry = Fcons (build_string ("iso8859-1"), Qnil);
+
+ ascii = fontset_ascii (fontset);
+ len = SBYTES (ascii);
+ if (len < 9
+ || strcmp (SDATA (ascii) + len - 9, "iso8859-1"))
+ {
+ font.name = LSTRDUPA (ascii);
+ /* Check if the name is in XLFD. */
+ if (split_font_name (f, &font, 0))
+ {
+ font.fields[XLFD_ENCODING][-1] = '-';
+ registry = Fcons (build_string (font.fields[XLFD_REGISTRY]),
+ registry);
+ }
+ }
}
-
- /* If what we got is a name pattern, return it. */
- if (STRINGP (pattern))
- return xstrdup (SDATA (pattern));
/* Get a list of fonts matching that pattern and choose the
best match for the specified face attributes from it. */
- nfonts = try_font_list (f, attrs, XCAR (pattern), XCDR (pattern), &fonts,
- (SINGLE_BYTE_CHAR_P (c)
- || CHAR_CHARSET (c) == charset_latin_iso8859_1));
- width_ratio = (SINGLE_BYTE_CHAR_P (c)
- ? 1
- : CHARSET_WIDTH (CHAR_CHARSET (c)));
- font_name = best_matching_font (f, attrs, fonts, nfonts, width_ratio,
+ nfonts = try_font_list (f, pattern, family, registry, &fonts);
+ font_name = best_matching_font (f, attrs, fonts, nfonts, NILP (font_spec),
needs_overstrike);
return font_name;
}
@@ -6658,7 +6807,7 @@ realize_default_face (f)
xassert (lface_fully_specified_p (XVECTOR (lface)->contents));
check_lface (lface);
bcopy (XVECTOR (lface)->contents, attrs, sizeof attrs);
- face = realize_face (c, attrs, 0, NULL, DEFAULT_FACE_ID);
+ face = realize_face (c, attrs, DEFAULT_FACE_ID);
return 1;
}
@@ -6697,23 +6846,19 @@ realize_named_face (f, symbol, id)
merge_face_vectors (f, symbol_attrs, attrs, Qnil);
/* Realize the face. */
- new_face = realize_face (c, attrs, 0, NULL, id);
+ new_face = realize_face (c, attrs, id);
}
/* Realize the fully-specified face with attributes ATTRS in face
- cache CACHE for character C. If C is a multibyte character,
- BASE_FACE is a face that has the same attributes. Otherwise,
- BASE_FACE is ignored. If FORMER_FACE_ID is non-negative, it is an
- ID of face to remove before caching the new face. Value is a
- pointer to the newly created realized face. */
+ cache CACHE for ASCII characters. If FORMER_FACE_ID is
+ non-negative, it is an ID of face to remove before caching the new
+ face. Value is a pointer to the newly created realized face. */
static struct face *
-realize_face (cache, attrs, c, base_face, former_face_id)
+realize_face (cache, attrs, former_face_id)
struct face_cache *cache;
Lisp_Object *attrs;
- int c;
- struct face *base_face;
int former_face_id;
{
struct face *face;
@@ -6731,37 +6876,73 @@ realize_face (cache, attrs, c, base_face, former_face_id)
}
if (FRAME_WINDOW_P (cache->f))
- face = realize_x_face (cache, attrs, c, base_face);
+ face = realize_x_face (cache, attrs);
else if (FRAME_TERMCAP_P (cache->f) || FRAME_MSDOS_P (cache->f))
- face = realize_tty_face (cache, attrs, c);
+ face = realize_tty_face (cache, attrs);
else
abort ();
/* Insert the new face. */
cache_face (cache, face, lface_hash (attrs));
-#ifdef HAVE_WINDOW_SYSTEM
- if (FRAME_WINDOW_P (cache->f) && face->font == NULL)
- load_face_font (cache->f, face, c);
-#endif /* HAVE_WINDOW_SYSTEM */
+ return face;
+}
+
+
+/* Realize the fully-specified face that has the same attributes as
+ BASE_FACE except for the font on frame F. If FONT_ID is not
+ negative, it is an ID number of an already opened font that should
+ be used by the face. If FONT_ID is negative, the face has no font,
+ i.e., characters are displayed by empty boxes. */
+
+static struct face *
+realize_non_ascii_face (f, font_id, base_face)
+ struct frame *f;
+ int font_id;
+ struct face *base_face;
+{
+ struct face_cache *cache = FRAME_FACE_CACHE (f);
+ struct face *face;
+ struct font_info *font_info;
+
+ face = (struct face *) xmalloc (sizeof *face);
+ *face = *base_face;
+ face->gc = 0;
+
+ /* Don't try to free the colors copied bitwise from BASE_FACE. */
+ face->colors_copied_bitwise_p = 1;
+
+ face->font_info_id = font_id;
+ if (font_id >= 0)
+ {
+ font_info = FONT_INFO_FROM_ID (f, font_id);
+ face->font = font_info->font;
+ face->font_name = font_info->full_name;
+ }
+ else
+ {
+ face->font = NULL;
+ face->font_name = NULL;
+ }
+
+ face->gc = 0;
+
+ cache_face (cache, face, face->hash);
+
return face;
}
/* Realize the fully-specified face with attributes ATTRS in face
- cache CACHE for character C. Do it for X frame CACHE->f. If C is
- a multibyte character, BASE_FACE is a face that has the same
- attributes. Otherwise, BASE_FACE is ignored. If the new face
- doesn't share font with the default face, a fontname is allocated
- from the heap and set in `font_name' of the new face, but it is not
- yet loaded here. Value is a pointer to the newly created realized
- face. */
+ cache CACHE for ASCII characters. Do it for X frame CACHE->f. If
+ the new face doesn't share font with the default face, a fontname
+ is allocated from the heap and set in `font_name' of the new face,
+ but it is not yet loaded here. Value is a pointer to the newly
+ created realized face. */
static struct face *
-realize_x_face (cache, attrs, c, base_face)
+realize_x_face (cache, attrs)
struct face_cache *cache;
Lisp_Object *attrs;
- int c;
- struct face *base_face;
{
#ifdef HAVE_WINDOW_SYSTEM
struct face *face, *default_face;
@@ -6769,50 +6950,24 @@ realize_x_face (cache, attrs, c, base_face)
Lisp_Object stipple, overline, strike_through, box;
xassert (FRAME_WINDOW_P (cache->f));
- xassert (SINGLE_BYTE_CHAR_P (c)
- || base_face);
/* Allocate a new realized face. */
face = make_realized_face (attrs);
+ face->ascii_face = face;
f = cache->f;
- /* If C is a multibyte character, we share all face attirbutes with
- BASE_FACE including the realized fontset. But, we must load a
- different font. */
- if (!SINGLE_BYTE_CHAR_P (c))
- {
- bcopy (base_face, face, sizeof *face);
- face->gc = 0;
-
- /* Don't try to free the colors copied bitwise from BASE_FACE. */
- face->colors_copied_bitwise_p = 1;
-
- /* to force realize_face to load font */
- face->font = NULL;
- return face;
- }
-
- /* Now we are realizing a face for ASCII (and unibyte) characters. */
-
/* Determine the font to use. Most of the time, the font will be
the same as the font of the default face, so try that first. */
default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
if (default_face
- && FACE_SUITABLE_FOR_CHAR_P (default_face, c)
&& lface_same_font_attributes_p (default_face->lface, attrs))
{
face->font = default_face->font;
- face->fontset = default_face->fontset;
face->font_info_id = default_face->font_info_id;
face->font_name = default_face->font_name;
- face->ascii_face = face;
-
- /* But, as we can't share the fontset, make a new realized
- fontset that has the same base fontset as of the default
- face. */
face->fontset
- = make_fontset_for_ascii_face (f, default_face->fontset);
+ = make_fontset_for_ascii_face (f, default_face->fontset, face);
}
else
{
@@ -6824,10 +6979,16 @@ realize_x_face (cache, attrs, c, base_face)
are constructed from ATTRS. */
int fontset = face_fontset (attrs);
- if ((fontset == -1) && default_face)
+ /* If we are realizing the default face, ATTRS should specify a
+ fontset. In other words, if FONTSET is -1, we are not
+ realizing the default face, thus the default face should have
+ already been realized. */
+ if (fontset == -1)
fontset = default_face->fontset;
- face->fontset = make_fontset_for_ascii_face (f, fontset);
- face->font = NULL; /* to force realize_face to load font */
+ if (fontset == -1)
+ abort ();
+ load_face_font (f, face);
+ face->fontset = make_fontset_for_ascii_face (f, fontset, face);
}
/* Load colors, and set remaining attributes. */
@@ -6959,7 +7120,6 @@ realize_x_face (cache, attrs, c, base_face)
if (!NILP (stipple))
face->stipple = load_pixmap (f, stipple, &face->pixmap_w, &face->pixmap_h);
- xassert (FACE_SUITABLE_FOR_CHAR_P (face, c));
return face;
#endif /* HAVE_WINDOW_SYSTEM */
}
@@ -7052,14 +7212,13 @@ map_tty_color (f, face, idx, defaulted)
/* Realize the fully-specified face with attributes ATTRS in face
- cache CACHE for character C. Do it for TTY frame CACHE->f. Value is a
- pointer to the newly created realized face. */
+ cache CACHE for ASCII characters. Do it for TTY frame CACHE->f.
+ Value is a pointer to the newly created realized face. */
static struct face *
-realize_tty_face (cache, attrs, c)
+realize_tty_face (cache, attrs)
struct face_cache *cache;
Lisp_Object *attrs;
- int c;
{
struct face *face;
int weight, slant;
@@ -7157,10 +7316,15 @@ compute_char_face (f, ch, prop)
else
{
Lisp_Object attrs[LFACE_VECTOR_SIZE];
- struct face *default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
- bcopy (default_face->lface, attrs, sizeof attrs);
+ struct face *face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
+ bcopy (face->lface, attrs, sizeof attrs);
merge_face_vector_with_property (f, attrs, prop);
- face_id = lookup_face (f, attrs, ch, NULL);
+ face_id = lookup_face (f, attrs);
+ if (! ASCII_CHAR_P (ch))
+ {
+ face = FACE_FROM_ID (f, face_id);
+ face_id = FACE_FOR_CHAR (f, face, ch);
+ }
}
return face_id;
@@ -7294,7 +7458,7 @@ face_at_buffer_position (w, pos, region_beg, region_end,
/* Look up a realized face with the given face attributes,
or realize a new one for ASCII characters. */
- return lookup_face (f, attrs, 0, NULL);
+ return lookup_face (f, attrs);
}
@@ -7393,7 +7557,7 @@ face_at_string_position (w, string, pos, bufpos, region_beg,
/* Look up a realized face with the given face attributes,
or realize a new one for ASCII characters. */
- return lookup_face (f, attrs, 0, NULL);
+ return lookup_face (f, attrs);
}
@@ -7432,7 +7596,6 @@ dump_realized_face (face)
face->underline_p,
SDATA (Fsymbol_name (face->lface[LFACE_UNDERLINE_INDEX])));
fprintf (stderr, "hash: %d\n", face->hash);
- fprintf (stderr, "charset: %d\n", face->charset);
}
@@ -7523,6 +7686,8 @@ syms_of_xfaces ()
staticpro (&QCwidth);
QCfont = intern (":font");
staticpro (&QCfont);
+ QCfontset = intern (":fontset");
+ staticpro (&QCfontset);
QCbold = intern (":bold");
staticpro (&QCbold);
QCitalic = intern (":italic");
diff --git a/src/xfns.c b/src/xfns.c
index af7ed7473e8..758a0fc8345 100644
--- a/src/xfns.c
+++ b/src/xfns.c
@@ -42,6 +42,7 @@ Boston, MA 02111-1307, USA. */
#include "keyboard.h"
#include "blockinput.h"
#include <epaths.h>
+#include "character.h"
#include "charset.h"
#include "coding.h"
#include "fontset.h"
@@ -1838,49 +1839,29 @@ x_encode_text (string, coding_system, selectionp, text_bytes, stringp)
int *text_bytes, *stringp;
int selectionp;
{
- unsigned char *str = SDATA (string);
- int chars = SCHARS (string);
- int bytes = SBYTES (string);
- int charset_info;
- int bufsize;
- unsigned char *buf;
+ int result = string_xstring_p (string);
struct coding_system coding;
extern Lisp_Object Qcompound_text_with_extensions;
- charset_info = find_charset_in_text (str, chars, bytes, NULL, Qnil);
- if (charset_info == 0)
+ if (result == 0)
{
/* No multibyte character in OBJ. We need not encode it. */
- *text_bytes = bytes;
+ *text_bytes = SBYTES (string);
*stringp = 1;
- return str;
+ return SDATA (string);
}
setup_coding_system (coding_system, &coding);
- if (selectionp
- && SYMBOLP (coding.pre_write_conversion)
- && !NILP (Ffboundp (coding.pre_write_conversion)))
- {
- string = run_pre_post_conversion_on_str (string, &coding, 1);
- str = SDATA (string);
- chars = SCHARS (string);
- bytes = SBYTES (string);
- }
- coding.src_multibyte = 1;
- coding.dst_multibyte = 0;
- coding.mode |= CODING_MODE_LAST_BLOCK;
- if (coding.type == coding_type_iso2022)
- coding.flags |= CODING_FLAG_ISO_SAFE;
+ coding.mode |= (CODING_MODE_SAFE_ENCODING | CODING_MODE_LAST_BLOCK);
/* We suppress producing escape sequences for composition. */
- coding.composing = COMPOSITION_DISABLED;
- bufsize = encoding_buffer_size (&coding, bytes);
- buf = (unsigned char *) xmalloc (bufsize);
- encode_coding (&coding, str, buf, bytes, bufsize);
+ coding.common_flags &= ~CODING_ANNOTATION_MASK;
+ coding.dst_bytes = SCHARS (string) * 2;
+ coding.destination = (unsigned char *) xmalloc (coding.dst_bytes);
+ encode_coding_object (&coding, string, 0, 0,
+ SCHARS (string), SBYTES (string), Qnil);
*text_bytes = coding.produced;
- *stringp = (charset_info == 1
- || (!EQ (coding_system, Qcompound_text)
- && !EQ (coding_system, Qcompound_text_with_extensions)));
- return buf;
+ *stringp = (result == 1 || !EQ (coding_system, Qcompound_text));
+ return coding.destination;
}
@@ -3322,35 +3303,39 @@ This function is an internal primitive--use `make-frame' instead. */)
font = x_get_arg (dpyinfo, parms, Qfont, "font", "Font", RES_TYPE_STRING);
- BLOCK_INPUT;
- /* First, try whatever font the caller has specified. */
- if (STRINGP (font))
- {
- tem = Fquery_fontset (font, Qnil);
- if (STRINGP (tem))
- font = x_new_fontset (f, SDATA (tem));
- else
- font = x_new_font (f, SDATA (font));
- }
-
- /* Try out a font which we hope has bold and italic variations. */
- if (!STRINGP (font))
- font = x_new_font (f, "-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1");
+ /* If the caller has specified no font, try out fonts which we
+ hope have bold and italic variations. */
if (!STRINGP (font))
- font = x_new_font (f, "-misc-fixed-medium-r-normal-*-*-140-*-*-c-*-iso8859-1");
- if (! STRINGP (font))
- font = x_new_font (f, "-*-*-medium-r-normal-*-*-140-*-*-c-*-iso8859-1");
- if (! STRINGP (font))
- /* This was formerly the first thing tried, but it finds too many fonts
- and takes too long. */
- font = x_new_font (f, "-*-*-medium-r-*-*-*-*-*-*-c-*-iso8859-1");
- /* If those didn't work, look for something which will at least work. */
- if (! STRINGP (font))
- font = x_new_font (f, "-*-fixed-*-*-*-*-*-140-*-*-c-*-iso8859-1");
- UNBLOCK_INPUT;
- if (! STRINGP (font))
- font = build_string ("fixed");
+ {
+ char *names[]
+ = { "-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1",
+ "-misc-fixed-medium-r-normal-*-*-140-*-*-c-*-iso8859-1",
+ "-*-*-medium-r-normal-*-*-140-*-*-c-*-iso8859-1",
+ /* This was formerly the first thing tried, but it finds
+ too many fonts and takes too long. */
+ "-*-*-medium-r-*-*-*-*-*-*-c-*-iso8859-1",
+ /* If those didn't work, look for something which will
+ at least work. */
+ "-*-fixed-*-*-*-*-*-140-*-*-c-*-iso8859-1",
+ NULL };
+ int i;
+
+ BLOCK_INPUT;
+ for (i = 0; names[i]; i++)
+ {
+ Lisp_Object list;
+ list = x_list_fonts (f, build_string (names[i]), 0, 1);
+ if (CONSP (list))
+ {
+ font = XCAR (list);
+ break;
+ }
+ }
+ UNBLOCK_INPUT;
+ if (! STRINGP (font))
+ font = build_string ("fixed");
+ }
x_default_parameter (f, parms, Qfont, font,
"font", "Font", RES_TYPE_STRING);
}
@@ -9885,7 +9870,7 @@ x_create_tip_frame (dpyinfo, parms, text)
{
tem = Fquery_fontset (font, Qnil);
if (STRINGP (tem))
- font = x_new_fontset (f, SDATA (tem));
+ font = x_new_fontset (f, tem);
else
font = x_new_font (f, SDATA (font));
}
@@ -10928,6 +10913,7 @@ meaning don't clear the cache. */);
find_ccl_program_func = x_find_ccl_program;
query_font_func = x_query_font;
set_frame_fontset_func = x_set_font;
+ get_font_repertory_func = x_get_font_repertory;
check_window_system_func = check_x;
/* Images. */
diff --git a/src/xterm.c b/src/xterm.c
index 5463ce8e192..e22e06a107e 100644
--- a/src/xterm.c
+++ b/src/xterm.c
@@ -68,6 +68,7 @@ Boston, MA 02111-1307, USA. */
/* #include <sys/param.h> */
#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "ccl.h"
#include "frame.h"
@@ -765,7 +766,8 @@ XTreset_terminal_modes ()
/* Function prototypes of this page. */
-static int x_encode_char P_ ((int, XChar2b *, struct font_info *, int *));
+static int x_encode_char P_ ((int, XChar2b *, struct font_info *,
+ struct charset *, int *));
/* Get metrics of character CHAR2B in FONT. Value is null if CHAR2B
@@ -844,13 +846,13 @@ x_per_char_metric (font, char2b, font_type)
the two-byte form of C. Encoding is returned in *CHAR2B. */
static int
-x_encode_char (c, char2b, font_info, two_byte_p)
+x_encode_char (c, char2b, font_info, charset, two_byte_p)
int c;
XChar2b *char2b;
struct font_info *font_info;
+ struct charset *charset;
int *two_byte_p;
{
- int charset = CHAR_CHARSET (c);
XFontStruct *font = font_info->font;
/* FONT_INFO may define a scheme by which to encode byte1 and byte2.
@@ -863,31 +865,31 @@ x_encode_char (c, char2b, font_info, two_byte_p)
if (CHARSET_DIMENSION (charset) == 1)
{
- ccl->reg[0] = charset;
+ ccl->reg[0] = CHARSET_ID (charset);
ccl->reg[1] = char2b->byte2;
ccl->reg[2] = -1;
}
else
{
- ccl->reg[0] = charset;
+ ccl->reg[0] = CHARSET_ID (charset);
ccl->reg[1] = char2b->byte1;
ccl->reg[2] = char2b->byte2;
}
- ccl_driver (ccl, NULL, NULL, 0, 0, NULL);
+ ccl_driver (ccl, NULL, NULL, 0, 0, Qnil);
/* We assume that MSBs are appropriately set/reset by CCL
program. */
if (font->max_byte1 == 0) /* 1-byte font */
- char2b->byte1 = 0, char2b->byte2 = ccl->reg[1];
+ STORE_XCHAR2B (char2b, 0, ccl->reg[1]);
else
- char2b->byte1 = ccl->reg[1], char2b->byte2 = ccl->reg[2];
+ STORE_XCHAR2B (char2b, ccl->reg[1], ccl->reg[2]);
}
- else if (font_info->encoding[charset])
+ else if (font_info->encoding_type)
{
/* Fixed encoding scheme. See fontset.h for the meaning of the
encoding numbers. */
- int enc = font_info->encoding[charset];
+ unsigned char enc = font_info->encoding_type;
if ((enc == 1 || enc == 2)
&& CHARSET_DIMENSION (charset) == 2)
@@ -6440,7 +6442,7 @@ handle_one_xevent (dpyinfo, eventp, bufp_r, numcharsp, finish)
/* The input is converted to events, thus we can't
handle composition. Anyway, there's no XIM that
gives us composition information. */
- coding.composing = COMPOSITION_DISABLED;
+ coding.common_flags &= ~CODING_ANNOTATION_MASK;
for (i = 0; i < nbytes; i++)
{
@@ -6452,22 +6454,22 @@ handle_one_xevent (dpyinfo, eventp, bufp_r, numcharsp, finish)
{
/* Decode the input data. */
- int require;
- unsigned char *p;
-
- require = decoding_buffer_size (&coding, nbytes);
- p = (unsigned char *) alloca (require);
- coding.mode |= CODING_MODE_LAST_BLOCK;
- /* We explicitly disable composition
- handling because key data should
- not contain any composition
- sequence. */
- coding.composing = COMPOSITION_DISABLED;
- decode_coding (&coding, copy_bufptr, p,
- nbytes, require);
- nbytes = coding.produced;
- nchars = coding.produced_char;
- copy_bufptr = p;
+ coding.destination = (unsigned char *) malloc (nbytes);
+ if (! coding.destination)
+ break;
+ coding.dst_bytes = nbytes;
+ coding.mode |= CODING_MODE_LAST_BLOCK;
+ decode_coding_c_string (&coding, copy_bufptr,
+ nbytes, Qnil);
+ nbytes = coding.produced;
+ nchars = coding.produced_char;
+ if (copy_bufsiz < nbytes)
+ {
+ copy_bufsiz = nbytes;
+ copy_bufptr = (char *) alloca (nbytes);
+ }
+ bcopy (coding.destination, copy_bufptr, nbytes);
+ free (coding.destination);
}
/* Convert the input data to a sequence of
@@ -6480,7 +6482,7 @@ handle_one_xevent (dpyinfo, eventp, bufp_r, numcharsp, finish)
c = STRING_CHAR_AND_LENGTH (copy_bufptr + i,
nbytes - i, len);
- bufp->kind = (SINGLE_BYTE_CHAR_P (c)
+ bufp->kind = (ASCII_CHAR_P (c)
? ASCII_KEYSTROKE_EVENT
: MULTIBYTE_CHAR_KEYSTROKE_EVENT);
bufp->code = c;
@@ -7876,11 +7878,16 @@ x_new_font (f, fontname)
register char *fontname;
{
struct font_info *fontp
- = FS_LOAD_FONT (f, 0, fontname, -1);
+ = FS_LOAD_FONT (f, fontname);
if (!fontp)
return Qnil;
+ if (FRAME_FONT (f) == (XFontStruct *) (fontp->font))
+ /* This font is already set in frame F. There's nothing more to
+ do. */
+ return build_string (fontp->full_name);
+
FRAME_FONT (f) = (XFontStruct *) (fontp->font);
FRAME_BASELINE_OFFSET (f) = fontp->baseline_offset;
FRAME_FONTSET (f) = -1;
@@ -7923,33 +7930,45 @@ x_new_font (f, fontname)
return build_string (fontp->full_name);
}
-/* Give frame F the fontset named FONTSETNAME as its default font, and
- return the full name of that fontset. FONTSETNAME may be a wildcard
- pattern; in that case, we choose some fontset that fits the pattern.
- The return value shows which fontset we chose. */
+/* Give frame F the fontset named FONTSETNAME as its default fontset,
+ and return the full name of that fontset. FONTSETNAME may be a
+ wildcard pattern; in that case, we choose some fontset that fits
+ the pattern. FONTSETNAME may be a font name for ASCII characters;
+ in that case, we create a fontset from that font name.
+
+ The return value shows which fontset we chose.
+ If FONTSETNAME specifies the default fontset, return Qt.
+ If an ASCII font in the specified fontset can't be loaded, return
+ Qnil. */
Lisp_Object
x_new_fontset (f, fontsetname)
struct frame *f;
- char *fontsetname;
+ Lisp_Object fontsetname;
{
- int fontset = fs_query_fontset (build_string (fontsetname), 0);
+ int fontset = fs_query_fontset (fontsetname, 0);
Lisp_Object result;
- if (fontset < 0)
- return Qnil;
-
- if (FRAME_FONTSET (f) == fontset)
+ if (fontset > 0 && f->output_data.x->fontset == fontset)
/* This fontset is already set in frame F. There's nothing more
to do. */
return fontset_name (fontset);
+ else if (fontset == 0)
+ /* The default fontset can't be the default font. */
+ return Qt;
- result = x_new_font (f, (SDATA (fontset_ascii (fontset))));
+ if (fontset > 0)
+ result = x_new_font (f, (SDATA (fontset_ascii (fontset))));
+ else
+ result = x_new_font (f, SDATA (fontsetname));
if (!STRINGP (result))
/* Can't load ASCII font. */
return Qnil;
+ if (fontset < 0)
+ fontset = new_fontset_from_font_name (result);
+
/* Since x_new_font doesn't update any fontset information, do it now. */
FRAME_FONTSET (f) = fontset;
@@ -7959,7 +7978,7 @@ x_new_fontset (f, fontsetname)
xic_set_xfontset (f, SDATA (fontset_ascii (fontset)));
#endif
- return build_string (fontsetname);
+ return fontset_name (fontset);
}
@@ -9868,6 +9887,7 @@ x_load_font (f, fontname, size)
BLOCK_INPUT;
fontp->font = font;
fontp->font_idx = i;
+ fontp->charset = -1; /* fs_load_font sets it. */
fontp->name = (char *) xmalloc (strlen (fontname) + 1);
bcopy (fontname, fontp->name, strlen (fontname) + 1);
@@ -9942,10 +9962,10 @@ x_load_font (f, fontname, size)
the font code-points (0:0x20..0x7F, 1:0xA0..0xFF), or
(0:0x2020..0x7F7F, 1:0xA0A0..0xFFFF, 3:0x20A0..0x7FFF,
2:0xA020..0xFF7F). For the moment, we don't know which charset
- uses this font. So, we set information in fontp->encoding[1]
+ uses this font. So, we set information in fontp->encoding_type
which is never used by any charset. If mapping can't be
decided, set FONT_ENCODING_NOT_DECIDED. */
- fontp->encoding[1]
+ fontp->encoding_type
= (font->max_byte1 == 0
/* 1-byte font */
? (font->min_char_or_byte2 < 0x80
@@ -10045,6 +10065,98 @@ x_find_ccl_program (fontp)
}
+/* Return a char-table whose elements are t if the font FONT_INFO
+ contains a glyph for the corresponding character, and nil if not.
+
+ Fixme: For the moment, this function works only for fonts whose
+ glyph encoding is the same as Unicode (e.g. ISO10646-1 fonts). */
+
+Lisp_Object
+x_get_font_repertory (f, font_info)
+ FRAME_PTR f;
+ struct font_info *font_info;
+{
+ XFontStruct *font = (XFontStruct *) font_info->font;
+ Lisp_Object table;
+ int min_byte1, max_byte1, min_byte2, max_byte2;
+
+ table = Fmake_char_table (Qnil, Qnil);
+
+ min_byte1 = font->min_byte1;
+ max_byte1 = font->max_byte1;
+ min_byte2 = font->min_char_or_byte2;
+ max_byte2 = font->max_char_or_byte2;
+ if (min_byte1 == 0 && max_byte1 == 0)
+ {
+ if (! font->per_char || font->all_chars_exist == True)
+ char_table_set_range (table, min_byte2, max_byte2, Qt);
+ else
+ {
+ XCharStruct *pcm = font->per_char;
+ int from = -1;
+ int i;
+
+ for (i = min_byte2; i <= max_byte2; i++, pcm++)
+ {
+ if (pcm->width == 0 && pcm->rbearing == pcm->lbearing)
+ {
+ if (from >= 0)
+ {
+ char_table_set_range (table, from, i - 1, Qt);
+ from = -1;
+ }
+ }
+ else if (from < 0)
+ from = i;
+ }
+ if (from >= 0)
+ char_table_set_range (table, from, i - 1, Qt);
+ }
+ }
+ else
+ {
+ if (! font->per_char || font->all_chars_exist == True)
+ {
+ int i;
+
+ for (i = min_byte1; i <= max_byte1; i++)
+ char_table_set_range (table,
+ (i << 8) | min_byte2, (i << 8) | max_byte2,
+ Qt);
+ }
+ else
+ {
+ XCharStruct *pcm = font->per_char;
+ int i;
+
+ for (i = min_byte1; i <= max_byte1; i++)
+ {
+ int from = -1;
+ int j;
+
+ for (j = min_byte2; j <= max_byte2; j++, pcm++)
+ {
+ if (pcm->width == 0 && pcm->rbearing == pcm->lbearing)
+ {
+ if (from >= 0)
+ {
+ char_table_set_range (table, (i << 8) | from,
+ (i << 8) | (j - 1), Qt);
+ from = -1;
+ }
+ }
+ else if (from < 0)
+ from = j;
+ }
+ if (from >= 0)
+ char_table_set_range (table, (i << 8) | from,
+ (i << 8) | (j - 1), Qt);
+ }
+ }
+ }
+
+ return table;
+}
/***********************************************************************
Initialization
@@ -10801,8 +10913,6 @@ syms_of_xterm ()
staticpro (&Qvendor_specific_keysyms);
Qvendor_specific_keysyms = intern ("vendor-specific-keysyms");
- staticpro (&Qutf_8);
- Qutf_8 = intern ("utf-8");
staticpro (&Qlatin_1);
Qlatin_1 = intern ("latin-1");
diff --git a/src/xterm.h b/src/xterm.h
index fd1e79b9082..86cda277476 100644
--- a/src/xterm.h
+++ b/src/xterm.h
@@ -400,6 +400,9 @@ extern struct font_info *x_get_font_info P_ ((struct frame *f, int));
extern struct font_info *x_load_font P_ ((struct frame *, char *, int));
extern struct font_info *x_query_font P_ ((struct frame *, char *));
extern void x_find_ccl_program P_ ((struct font_info *));
+extern Lisp_Object x_get_font_repertory P_ ((struct frame *,
+ struct font_info *));
+
/* Each X frame object points to its own struct x_output object
in the output_data.x field. The x_output structure contains