diff options
author | Jeffrey Stedfast <fejj@src.gnome.org> | 2002-03-20 01:16:50 +0000 |
---|---|---|
committer | Jeffrey Stedfast <fejj@src.gnome.org> | 2002-03-20 01:16:50 +0000 |
commit | d7826a7752e74e952acdd098a902ae8f442d5275 (patch) | |
tree | cf6f2ea418c0a55b9920f9389093292e98c0380c | |
parent | 9ce34cd38047cf5fd6145a42f6cf0eb6eb259875 (diff) | |
download | gmime-d7826a7752e74e952acdd098a902ae8f442d5275.tar.gz |
*** empty log message ***
-rw-r--r-- | ChangeLog | 26 | ||||
-rw-r--r-- | Makefile.am | 10 | ||||
-rw-r--r-- | aclocal.m4 | 565 | ||||
-rw-r--r-- | gmime-charset.c | 399 | ||||
-rw-r--r-- | gmime-charset.h | 3 | ||||
-rw-r--r-- | gmime-iconv-utils.c | 4 | ||||
-rw-r--r-- | gmime-param.c | 174 | ||||
-rw-r--r-- | gmime-utils.c | 175 | ||||
-rw-r--r-- | gmime.c | 56 | ||||
-rw-r--r-- | gmime.h.in | 5 | ||||
-rw-r--r-- | gmime/gmime-charset.c | 399 | ||||
-rw-r--r-- | gmime/gmime-charset.h | 3 | ||||
-rw-r--r-- | gmime/gmime-iconv-utils.c | 4 | ||||
-rw-r--r-- | gmime/gmime-param.c | 174 | ||||
-rw-r--r-- | gmime/gmime-utils.c | 175 | ||||
-rw-r--r-- | gmime/gmime.c | 56 | ||||
-rw-r--r-- | gmime/gmime.h.in | 5 | ||||
-rw-r--r-- | gmime/internet-address.c | 67 | ||||
-rw-r--r-- | internet-address.c | 67 | ||||
-rw-r--r-- | test-mime.c | 2 | ||||
-rw-r--r-- | test-parser.c | 2 | ||||
-rw-r--r-- | tests/test-mime.c | 2 | ||||
-rw-r--r-- | tests/test-parser.c | 2 |
23 files changed, 1876 insertions, 499 deletions
@@ -1,3 +1,29 @@ +2002-03-19 Jeffrey Stedfast <fejj@ximian.com> + + * internet-address.c (decode_mailbox): If gmime was initialized to + use UTF-8 interfaces and we encountered illegal 8bit text, attempt + to convert it to UTF-8 using the user's locale charset. + + * gmime-charset.c (g_mime_charset_name): Fixed the ISO parser. + (g_mime_charset_best): New function to compute the best charset + for a given text input. + + * gmime-param.c (rfc2184_decode): If gmime was initialized to use + UTF-8 interfaces, then convert the decoded text to UTF-8 here. + (decode_param): If gmime was initialized to use UTF-8 interfaces + and we encounter illegal 8bit text, attempt to convert it to + UTF-8. + (encode_param): If gmime was initialized to use UTF-8 interfaces, + convert the input buffer to the locale charset before encoding. + + * gmime-utils.c (rfc2047_decode_word): Renamed from + decoded_encoded_8bit_word. If gmime was initialized to use UTF-8 + interfaces, then convert the header to UTF-8 here. + (rfc2047_encode_word): Renamed from encode_8bit_word. If gmime was + initialized to use UTF-8 interfaces, convert input text to locale + charset before encoding. + (g_mime_utils_8bit_header_encode): Avoid excess mallocing. + 2002-03-17 Jeffrey Stedfast <fejj@ximian.com> * gmime-message.c (g_mime_message_write_to_stream): fixed doc diff --git a/Makefile.am b/Makefile.am index 4e950c7f..5299ef11 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,6 +18,7 @@ libgmime_la_SOURCES = \ alloca.c \ strlib.c \ memchunk.c \ + gmime.c \ gmime-charset.c \ gmime-content-type.c \ gmime-data-wrapper.c \ @@ -82,10 +83,11 @@ gmimeinclude_HEADERS = \ gmime-utils.h \ internet-address.h -noinst_HEADERS = \ - strlib.h \ - memchunk.h \ - gmime-table-private.h \ +noinst_HEADERS = \ + strlib.h \ + memchunk.h \ + gmime-table-private.h \ + gmime-charset-map-private.h \ md5-utils.h DEPS = $(top_builddir)/libgmime.la @@ -1,6 +1,6 @@ -dnl aclocal.m4 generated automatically by aclocal 1.4-p5 +dnl aclocal.m4 generated automatically by aclocal 1.4-p4 -dnl Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc. +dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. @@ -12,7 +12,7 @@ dnl PARTICULAR PURPOSE. # Like AC_CONFIG_HEADER, but automatically create stamp file. -AC_DEFUN([AM_CONFIG_HEADER], +AC_DEFUN(AM_CONFIG_HEADER, [AC_PREREQ([2.12]) AC_CONFIG_HEADER([$1]) dnl When config.status generates a header, we must update the stamp-h file. @@ -42,7 +42,7 @@ changequote([,]))]) dnl Usage: dnl AM_INIT_AUTOMAKE(package,version, [no-define]) -AC_DEFUN([AM_INIT_AUTOMAKE], +AC_DEFUN(AM_INIT_AUTOMAKE, [AC_REQUIRE([AC_PROG_INSTALL]) PACKAGE=[$1] AC_SUBST(PACKAGE) @@ -70,7 +70,7 @@ AC_REQUIRE([AC_PROG_MAKE_SET])]) # Check to make sure that the build environment is sane. # -AC_DEFUN([AM_SANITY_CHECK], +AC_DEFUN(AM_SANITY_CHECK, [AC_MSG_CHECKING([whether build environment is sane]) # Just in case sleep 1 @@ -111,7 +111,7 @@ AC_MSG_RESULT(yes)]) dnl AM_MISSING_PROG(NAME, PROGRAM, DIRECTORY) dnl The program must properly implement --version. -AC_DEFUN([AM_MISSING_PROG], +AC_DEFUN(AM_MISSING_PROG, [AC_MSG_CHECKING(for working $2) # Run test in a subshell; some versions of sh will print an error if # an executable is not found, even if stderr is redirected. @@ -161,7 +161,7 @@ AC_DEFUN([AC_ISC_POSIX], # program @code{ansi2knr}, which comes with Ghostscript. # @end defmac -AC_DEFUN([AM_PROG_CC_STDC], +AC_DEFUN(AM_PROG_CC_STDC, [AC_REQUIRE([AC_PROG_CC]) AC_BEFORE([$0], [AC_C_INLINE]) AC_BEFORE([$0], [AC_C_CONST]) @@ -234,6 +234,7 @@ esac # libtool.m4 - Configure libtool for the host system. -*-Shell-script-*- # serial 46 AC_PROG_LIBTOOL + AC_DEFUN([AC_PROG_LIBTOOL], [AC_REQUIRE([AC_LIBTOOL_SETUP])dnl @@ -359,9 +360,30 @@ _LT_AC_LTCONFIG_HACK ]) +# AC_LIBTOOL_HEADER_ASSERT +# ------------------------ +AC_DEFUN([AC_LIBTOOL_HEADER_ASSERT], +[AC_CACHE_CHECK([whether $CC supports assert without backlinking], + [lt_cv_func_assert_works], + [case $host in + *-*-solaris*) + if test "$GCC" = yes && test "$with_gnu_ld" != yes; then + case `$CC --version 2>/dev/null` in + [[12]].*) lt_cv_func_assert_works=no ;; + *) lt_cv_func_assert_works=yes ;; + esac + fi + ;; + esac]) + +if test "x$lt_cv_func_assert_works" = xyes; then + AC_CHECK_HEADERS(assert.h) +fi +])# AC_LIBTOOL_HEADER_ASSERT + # _LT_AC_CHECK_DLFCN # -------------------- -AC_DEFUN(_LT_AC_CHECK_DLFCN, +AC_DEFUN([_LT_AC_CHECK_DLFCN], [AC_CHECK_HEADERS(dlfcn.h) ])# _LT_AC_CHECK_DLFCN @@ -379,10 +401,10 @@ AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], [dnl # [They come from Ultrix. What could be older than Ultrix?!! ;)] # Character class describing NM global symbol codes. -[symcode='[BCDEGRST]'] +symcode='[[BCDEGRST]]' # Regexp to match symbols that can be accessed directly from C. -[sympat='\([_A-Za-z][_A-Za-z0-9]*\)'] +sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' # Transform the above into a raw symbol and a C symbol. symxfrm='\1 \2\3 \3' @@ -390,25 +412,29 @@ symxfrm='\1 \2\3 \3' # Transform an extracted symbol line into a proper C declaration lt_cv_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'" +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" + # Define system-specific variables. case $host_os in aix*) - [symcode='[BCDT]'] + symcode='[[BCDT]]' ;; cygwin* | mingw* | pw32*) - [symcode='[ABCDGISTW]'] + symcode='[[ABCDGISTW]]' ;; hpux*) # Its linker distinguishes data from code symbols lt_cv_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" + lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" ;; -irix*) - [symcode='[BCDEGRST]'] +irix* | nonstopux*) + symcode='[[BCDEGRST]]' ;; solaris* | sysv5*) - [symcode='[BDT]'] + symcode='[[BDT]]' ;; sysv4) - [symcode='[DFNSTU]'] + symcode='[[DFNSTU]]' ;; esac @@ -422,14 +448,14 @@ esac # If we're using GNU nm, then use its standard symbol codes. if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then - [symcode='[ABCDGISTW]'] + symcode='[[ABCDGISTW]]' fi # Try without a prefix undercore, then with it. for ac_symprfx in "" "_"; do # Write the raw and C identifiers. -[lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'"] +lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'" # Check to see that the pipe works correctly. pipe_works=no @@ -471,23 +497,23 @@ EOF cat <<EOF >> conftest.$ac_ext #if defined (__STDC__) && __STDC__ -# define lt_ptr_t void * +# define lt_ptr void * #else -# define lt_ptr_t char * +# define lt_ptr char * # define const #endif /* The mapping between symbol names and symbols. */ const struct { const char *name; - lt_ptr_t address; + lt_ptr address; } -[lt_preloaded_symbols[] =] +lt_preloaded_symbols[[]] = { EOF - sed "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (lt_ptr_t) \&\2},/" < "$nlist" >> conftest.$ac_ext + sed "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (lt_ptr) \&\2},/" < "$nlist" >> conftest.$ac_ext cat <<\EOF >> conftest.$ac_ext - {0, (lt_ptr_t) 0} + {0, (lt_ptr) 0} }; #ifdef __cplusplus @@ -531,10 +557,13 @@ done global_symbol_pipe="$lt_cv_sys_global_symbol_pipe" if test -z "$lt_cv_sys_global_symbol_pipe"; then global_symbol_to_cdecl= + global_symbol_to_c_name_address= else global_symbol_to_cdecl="$lt_cv_global_symbol_to_cdecl" + global_symbol_to_c_name_address="$lt_cv_global_symbol_to_c_name_address" fi -if test -z "$global_symbol_pipe$global_symbol_to_cdecl"; then +if test -z "$global_symbol_pipe$global_symbol_to_cdec$global_symbol_to_c_name_address"; +then AC_MSG_RESULT(failed) else AC_MSG_RESULT(ok) @@ -552,16 +581,17 @@ if test "X${PATH_SEPARATOR+set}" != Xset; then *-DOS) lt_cv_sys_path_separator=';' ;; *) lt_cv_sys_path_separator=':' ;; esac + PATH_SEPARATOR=$lt_cv_sys_path_separator fi ])# _LT_AC_LIBTOOL_SYS_PATH_SEPARATOR # _LT_AC_PROG_ECHO_BACKSLASH # -------------------------- # Add some code to the start of the generated configure script which -# will find an echo command which doesn;t interpret backslashes. +# will find an echo command which doesn't interpret backslashes. AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH], [ifdef([AC_DIVERSION_NOTICE], [AC_DIVERT_PUSH(AC_DIVERSION_NOTICE)], - [AC_DIVERT_PUSH(NOTICE)]) + [AC_DIVERT_PUSH(NOTICE)]) _LT_AC_LIBTOOL_SYS_PATH_SEPARATOR # Check that we are running under the correct shell. @@ -627,7 +657,7 @@ else # # So, first we look for a working echo in the user's PATH. - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}" + IFS="${IFS= }"; save_ifs="$IFS"; IFS=$PATH_SEPARATOR for dir in $PATH /usr/ucb; do if (test -f $dir/echo || test -f $dir/echo$ac_exeext) && test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' && @@ -716,7 +746,7 @@ AC_DIVERT_POP # _LT_AC_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, # ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) # ------------------------------------------------------------------ -AC_DEFUN(_LT_AC_TRY_DLOPEN_SELF, +AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF], [if test "$cross_compiling" = yes; then : [$4] else @@ -803,7 +833,7 @@ rm -fr conftest* # AC_LIBTOOL_DLOPEN_SELF # ------------------- -AC_DEFUN(AC_LIBTOOL_DLOPEN_SELF, +AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], [if test "x$enable_dlopen" != xyes; then enable_dlopen=unknown enable_dlopen_self=unknown @@ -825,16 +855,22 @@ else ;; *) - AC_CHECK_LIB(dl, dlopen, [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], - [AC_CHECK_FUNC(dlopen, lt_cv_dlopen="dlopen", - [AC_CHECK_FUNC(shl_load, lt_cv_dlopen="shl_load", - [AC_CHECK_LIB(svld, dlopen, - [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], - [AC_CHECK_LIB(dld, shl_load, - [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"]) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen="shl_load"], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld"], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen="dlopen"], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"]) + ]) ]) - ]) - ]) + ]) + ]) ]) ;; esac @@ -897,10 +933,10 @@ AC_DEFUN([_LT_AC_LTCONFIG_HACK], # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. Xsed='sed -e s/^X//' -[sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g'] +sed_quote_subst='s/\([[\\"\\`$\\\\]]\)/\\\1/g' # Same as above, but do not quote variable references. -[double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g'] +double_quote_subst='s/\([[\\"\\`\\\\]]\)/\\\1/g' # Sed substitution to delay expansion of an escaped shell variable in a # double_quote_subst'ed string. @@ -969,8 +1005,15 @@ old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then + case $host_os in + openbsd*) + old_postinstall_cmds="\$RANLIB -t \$oldlib~$old_postinstall_cmds" + ;; + *) + old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds" + ;; + esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib" - old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds" fi # Allow CC to be a program name with arguments. @@ -990,7 +1033,7 @@ rmdir .libs 2>/dev/null AC_MSG_RESULT($objdir) -AC_ARG_WITH(pic, +AC_ARG_WITH(pic, [ --with-pic try to use only PIC/non-PIC objects [default=use both]], pic_mode="$withval", pic_mode=default) test -z "$pic_mode" && pic_mode=default @@ -1018,7 +1061,7 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic, # libC (AIX C++ library), which obviously doesn't included in libraries # list by gcc. This cause undefined symbols with -static flags. # This hack allows C programs to be linked with "-static -ldl", but - # we not sure about C++ programs. + # not sure about C++ programs. lt_cv_prog_cc_static="$lt_cv_prog_cc_static ${lt_cv_prog_cc_wl}-lC" ;; amigaos*) @@ -1027,7 +1070,7 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic, # like `-m68040'. lt_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4' ;; - beos* | irix5* | irix6* | osf3* | osf4* | osf5*) + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; darwin* | rhapsody*) @@ -1053,13 +1096,13 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic, # PORTME Check for PIC flags for the system compiler. case $host_os in aix3* | aix4* | aix5*) + lt_cv_prog_cc_wl='-Wl,' # All AIX code is PIC. if test "$host_cpu" = ia64; then - # AIX 5 now supports IA64 processor - lt_cv_prog_cc_static='-Bstatic' - lt_cv_prog_cc_wl='-Wl,' + # AIX 5 now supports IA64 processor + lt_cv_prog_cc_static='-Bstatic' else - lt_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp' + lt_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp' fi ;; @@ -1070,7 +1113,7 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic, lt_cv_prog_cc_pic='+Z' ;; - irix5* | irix6*) + irix5* | irix6* | nonstopux*) lt_cv_prog_cc_wl='-Wl,' lt_cv_prog_cc_static='-non_shared' # PIC (with -KPIC) is the default. @@ -1115,9 +1158,9 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic, lt_cv_prog_cc_pic='-KPIC' lt_cv_prog_cc_static='-Bstatic' if test "x$host_vendor" = xsni; then - lt_cv_prog_cc_wl='-LD' + lt_cv_prog_cc_wl='-LD' else - lt_cv_prog_cc_wl='-Wl,' + lt_cv_prog_cc_wl='-Wl,' fi ;; @@ -1184,7 +1227,7 @@ fi # Check for any special shared library compilation flags. if test -n "$lt_cv_prog_cc_shlib"; then AC_MSG_WARN([\`$CC' requires \`$lt_cv_prog_cc_shlib' to build shared libraries]) - if echo "$old_CC $old_CFLAGS " | [egrep -e "[ ]$lt_cv_prog_cc_shlib[ ]"] >/dev/null; then : + if echo "$old_CC $old_CFLAGS " | egrep -e "[[ ]]$lt_cv_prog_cc_shlib[[ ]]" >/dev/null; then : else AC_MSG_WARN([add \`$lt_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure]) lt_cv_prog_cc_can_build_shared=no @@ -1260,6 +1303,8 @@ if test x"$compiler_c_o" = x"yes"; then lt_cv_compiler_o_lo=no save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -c -o conftest.lo" + save_objext="$ac_objext" + ac_objext=lo AC_TRY_COMPILE([], [int some_variable = 0;], [dnl # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings @@ -1269,10 +1314,11 @@ if test x"$compiler_c_o" = x"yes"; then lt_cv_compiler_o_lo=yes fi ]) + ac_objext="$save_objext" CFLAGS="$save_CFLAGS" ]) compiler_o_lo=$lt_cv_compiler_o_lo - AC_MSG_RESULT([$compiler_c_lo]) + AC_MSG_RESULT([$compiler_o_lo]) else compiler_o_lo=no fi @@ -1365,7 +1411,7 @@ exclude_expsyms="_GLOBAL_OFFSET_TABLE_" extract_expsyms_cmds= case $host_os in -cygwin* | mingw* | pw32* ) +cygwin* | mingw* | pw32*) # FIXME: the MSVC++ port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++. @@ -1373,7 +1419,9 @@ cygwin* | mingw* | pw32* ) with_gnu_ld=no fi ;; - +openbsd*) + with_gnu_ld=no + ;; esac ld_shlibs=yes @@ -1460,7 +1508,7 @@ EOF # can override, but on older systems we have to supply one (in ltdll.c) if test "x$lt_cv_need_dllmain" = "xyes"; then ltdll_obj='$output_objdir/$soname-ltdll.'"$ac_objext " - ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < [$]0 > $output_objdir/$soname-ltdll.c~ + ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $''0 > $output_objdir/$soname-ltdll.c~ test -f $output_objdir/$soname-ltdll.$ac_objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~' else ltdll_obj= @@ -1473,24 +1521,24 @@ EOF # Be careful not to strip the DATA tag left be newer dlltools. export_symbols_cmds="$ltdll_cmds"' $DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~ - [sed -e "1,/EXPORTS/d" -e "s/ @ [0-9]*//" -e "s/ *;.*$//"] < $output_objdir/$soname-def > $export_symbols' + sed -e "1,/EXPORTS/d" -e "s/ @ [[0-9]]*//" -e "s/ *;.*$//" < $output_objdir/$soname-def > $export_symbols' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is. # If DATA tags from a recent dlltool are present, honour them! archive_expsym_cmds='if test "x`head -1 $export_symbols`" = xEXPORTS; then - cp $export_symbols $output_objdir/$soname-def; + cp $export_symbols $output_objdir/$soname-def; else - echo EXPORTS > $output_objdir/$soname-def; - _lt_hint=1; - cat $export_symbols | while read symbol; do - set dummy \$symbol; - case \[$]# in - 2) echo " \[$]2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;; - *) echo " \[$]2 @ \$_lt_hint \[$]3 ; " >> $output_objdir/$soname-def;; - esac; - _lt_hint=`expr 1 + \$_lt_hint`; - done; + echo EXPORTS > $output_objdir/$soname-def; + _lt_hint=1; + cat $export_symbols | while read symbol; do + set dummy \$symbol; + case \[$]# in + 2) echo " \[$]2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;; + *) echo " \[$]2 @ \$_lt_hint \[$]3 ; " >> $output_objdir/$soname-def;; + esac; + _lt_hint=`expr 1 + \$_lt_hint`; + done; fi~ '"$ltdll_cmds"' $CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~ @@ -1585,80 +1633,96 @@ else ;; aix4* | aix5*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*) + for ld_flag in $LDFLAGS; do + if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then + aix_use_runtimelinking=yes + break + fi + done + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + hardcode_direct=yes archive_cmds='' hardcode_libdir_separator=':' if test "$GCC" = yes; then - collect2name=`${CC} -print-prog-name=collect2` - if test -f "$collect2name" && \ - strings "$collect2name" | grep resolve_lib_name >/dev/null - then - # We have reworked collect2 - hardcode_direct=yes - else - # We have old collect2 - hardcode_direct=unsupported - # It fails to find uninstalled libraries when the uninstalled - # path is not listed in the libpath. Setting hardcode_minus_L - # to unsupported forces relinking - hardcode_minus_L=yes - hardcode_libdir_flag_spec='-L$libdir' - hardcode_libdir_separator= - fi + case $host_os in aix4.[[012]]|aix4.[[012]].*) + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && \ + strings "$collect2name" | grep resolve_lib_name >/dev/null + then + # We have reworked collect2 + hardcode_direct=yes + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + esac + shared_flag='-shared' else + # not using gcc if test "$host_cpu" = ia64; then - shared_flag='-G' + shared_flag='${wl}-G' else - shared_flag='${wl}-bM:SRE' + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi fi - hardcode_direct=yes fi - if test "$host_cpu" = ia64; then - # On IA64, the linker does run time linking by default, so we don't - # have to do anything special. - aix_use_runtimelinking=no - exp_sym_flag='-Bexport' - no_entry_flag="" - else - # Test if we are trying to use run time linking, or normal AIX style linking. - # If -brtl is somewhere in LDFLAGS, we need to do run time linking. - aix_use_runtimelinking=no - for ld_flag in $LDFLAGS; do - if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl" ); then - aix_use_runtimelinking=yes - break - fi - done - exp_sym_flag='-bexport' - no_entry_flag='-bnoentry' - fi # It seems that -bexpall can do strange things, so it is better to # generate a list of symbols to export. always_export_symbols=yes if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:/usr/lib:/lib' - allow_undefined_flag=' -Wl,-G' - archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols" + archive_expsym_cmds="\$CC"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then - hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' - allow_undefined_flag="-znodefs" - archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname ${wl}-h$soname $libobjs $deplibs $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols" + hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname ${wl}-h$soname $libobjs $deplibs $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols" else - hardcode_libdir_flag_spec='${wl}-bnolibpath ${wl}-blibpath:$libdir:/usr/lib:/lib' - # Warning - without using the other run time loading flags, -berok will - # link without error, but may produce a broken library. - allow_undefined_flag='${wl}-berok" - # This is a bit strange, but is similar to how AIX traditionally builds - # it's shared libraries. - archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"' ~$AR -crlo $objdir/$libname$release.a $objdir/$soname' + hardcode_libdir_flag_spec='${wl}-bnolibpath ${wl}-blibpath:$libdir:/usr/lib:/lib' + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='${wl}-berok' + # This is a bit strange, but is similar to how AIX traditionally builds + # it's shared libraries. + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"' ~$AR -crlo $objdir/$libname$release.a $objdir/$soname' fi fi ;; @@ -1690,11 +1754,19 @@ else ;; darwin* | rhapsody*) - allow_undefined_flag='-undefined suppress' + case "$host_os" in + rhapsody* | darwin1.[[012]]) + allow_undefined_flag='-undefined suppress' + ;; + *) # Darwin 1.3 on + allow_undefined_flag='-flat_namespace -undefined suppress' + ;; + esac # FIXME: Relying on posixy $() will cause problems for # cross-compilation, but unfortunately the echo tests do not - # yet detect zsh echo's removal of \ escapes. - archive_cmds='$CC $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib $libobjs $deplibs$linkopts -install_name $rpath/$soname $(test -n "$verstring" -a x$verstring != x0.0 && echo $verstring)' + # yet detect zsh echo's removal of \ escapes. Also zsh mangles + # `"' quotes if we put them in here... so don't! + archive_cmds='$nonopt $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib $libobjs $deplibs$linker_flags -install_name $rpath/$soname $verstring' # We need to add '_' to the symbols in $export_symbols first #archive_expsym_cmds="$archive_cmds"' && strip -s $export_symbols' hardcode_direct=yes @@ -1746,7 +1818,7 @@ else export_dynamic_flag_spec='${wl}-E' ;; - irix5* | irix6*) + irix5* | irix6* | nonstopux*) if test "$GCC" = yes; then archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else @@ -1769,7 +1841,7 @@ else ;; newsos6) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: @@ -1777,10 +1849,24 @@ else ;; openbsd*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + export_dynamic_flag_spec='${wl}-E' + else + case "$host_os" in + openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-R$libdir' + ;; + *) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + ;; + esac + fi ;; os2*) @@ -1825,10 +1911,39 @@ else hardcode_shlibpath_var=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes + export_dynamic_flag_spec='${wl}-Bexport' ;; solaris*) + # gcc --version < 3.0 without binutils cannot create self contained + # shared libraries reliably, requiring libgcc.a to resolve some of + # the object symbols generated in some cases. Libraries that use + # assert need libgcc.a to resolve __eprintf, for example. Linking + # a copy of libgcc.a into every shared library to guarantee resolving + # such symbols causes other problems: According to Tim Van Holder + # <tim.van.holder@pandora.be>, C++ libraries end up with a separate + # (to the application) exception stack for one thing. no_undefined_flag=' -z defs' + if test "$GCC" = yes; then + case `$CC --version 2>/dev/null` in + [[12]].*) + cat <<EOF 1>&2 + +*** Warning: Releases of GCC earlier than version 3.0 cannot reliably +*** create self contained shared libraries on Solaris systems, without +*** introducing a dependency on libgcc.a. Therefore, libtool is disabling +*** -no-undefined support, which will at least allow you to build shared +*** libraries. However, you may find that when you link such libraries +*** into an application without using GCC, you have to manually add +*** \`gcc --print-libgcc-file-name\` to the link command. We urge you to +*** upgrade to a newer version of GCC. Another option is to rebuild your +*** current GCC to use the GNU linker from GNU binutils 2.9.1 or newer. + +EOF + no_undefined_flag= + ;; + esac + fi # $CC -shared without GNU ld will not create a library from C++ # object files and a static libstdc++, better avoid it by now archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' @@ -1837,7 +1952,7 @@ else hardcode_libdir_flag_spec='-R$libdir' hardcode_shlibpath_var=no case $host_os in - [solaris2.[0-5] | solaris2.[0-5].*]) ;; + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) # Supported since Solaris 2.6 (maybe 2.5.1?) whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;; esac @@ -1860,7 +1975,7 @@ else sysv4) if test "x$host_vendor" = xsno; then - archive_cmds='$LD -G -Bsymbolic -h $soname -o $lib $libobjs $deplibs $linkopts' + archive_cmds='$LD -G -Bsymbolic -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes # is this really true??? else archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' @@ -2017,22 +2132,24 @@ aix4* | aix5*) # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in - [ aix4 | aix4.[01] | aix4.[01].*)] - if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' - echo ' yes ' - echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then - : - else - can_build_shared=no - fi - ;; + aix4 | aix4.[[01]] | aix4.[[01]].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; esac - # AIX (on Power*) has no versioning support, so currently we can not hardcode correct - # soname into executable. Probably we can add versioning support to - # collect2, so additional links can be useful in future. + # AIX (on Power*) has no versioning support, so currently we can + # not hardcode correct soname into executable. Probably we can + # add versioning support to collect2, so additional links can + # be useful in future. if test "$aix_use_runtimelinking" = yes; then - # If using run time linking (on AIX 4.2 or later) use lib<name>.so instead of - # lib<name>.a to let people know that these are not typical AIX shared libraries. + # If using run time linking (on AIX 4.2 or later) use lib<name>.so + # instead of lib<name>.a to let people know that these are not + # typical AIX shared libraries. library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' else # We preserve .a as extension for shared libraries through AIX4.2 @@ -2041,14 +2158,13 @@ aix4* | aix5*) soname_spec='${libname}${release}.so$major' fi shlibpath_var=LIBPATH - deplibs_check_method=pass_all fi ;; amigaos*) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. - finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | [$Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\'']`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done' + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done' ;; beos*) @@ -2079,7 +2195,7 @@ cygwin* | mingw* | pw32*) case $GCC,$host_os in yes,cygwin*) library_names_spec='$libname.dll.a' - soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | [sed -e 's/[.]/-/g']`${versuffix}.dll' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll' postinstall_cmds='dlpath=`bash 2>&1 -c '\''. $dir/${file}i;echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ @@ -2089,14 +2205,14 @@ cygwin* | mingw* | pw32*) $rm \$dlpath' ;; yes,mingw*) - library_names_spec='${libname}`echo ${release} | [sed -e 's/[.]/-/g']`${versuffix}.dll' + library_names_spec='${libname}`echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll' sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | sed -e "s/^libraries://" -e "s/;/ /g"` ;; yes,pw32*) library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll' ;; *) - library_names_spec='${libname}`echo ${release} | [sed -e 's/[.]/-/g']`${versuffix}.dll $libname.lib' + library_names_spec='${libname}`echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll $libname.lib' ;; esac dynamic_linker='Win32 ld.exe' @@ -2173,14 +2289,17 @@ hpux9* | hpux10* | hpux11*) postinstall_cmds='chmod 555 $lib' ;; -irix5* | irix6*) - version_type=irix +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) version_type=irix ;; + esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}.so$major' library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so $libname.so' case $host_os in - irix5*) + irix5* | nonstopux*) libsuff= shlibsuff= ;; *) @@ -2254,9 +2373,19 @@ newsos6) openbsd*) version_type=sunos - if test "$with_gnu_ld" = yes; then - need_lib_prefix=no - need_version=no + need_lib_prefix=no + need_version=no + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case "$host_os" in + openbsd2.[[89]] | openbsd2.[[89]].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes fi library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' @@ -2274,6 +2403,7 @@ os2*) osf3* | osf4* | osf5*) version_type=osf need_version=no + need_lib_prefix=no soname_spec='${libname}${release}.so' library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so' shlibpath_var=LD_LIBRARY_PATH @@ -2367,6 +2497,33 @@ test "$dynamic_linker" = no && can_build_shared=no AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) +AC_MSG_CHECKING([whether to build shared libraries]) +test "$can_build_shared" = "no" && enable_shared=no + +# On AIX, shared libraries and static libraries use the same namespace, and +# are all built from PIC. +case "$host_os" in +aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + +aix4*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; +esac +AC_MSG_RESULT([$enable_shared]) + +AC_MSG_CHECKING([whether to build static libraries]) +# Make sure either enable_shared or enable_static is yes. +test "$enable_shared" = yes || enable_static=yes +AC_MSG_RESULT([$enable_static]) + if test "$hardcode_action" = relink; then # Fast installation is not supported enable_fast_install=no @@ -2460,6 +2617,7 @@ if test -f "$ltmain"; then old_striplib striplib file_magic_cmd export_symbols_cmds \ deplibs_check_method allow_undefined_flag no_undefined_flag \ finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \ + global_symbol_to_c_name_address \ hardcode_libdir_flag_spec hardcode_libdir_separator \ sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do @@ -2526,12 +2684,12 @@ SHELL=$lt_SHELL # Whether or not to build shared libraries. build_libtool_libs=$enable_shared -# Whether or not to add -lc for building shared libraries. -build_libtool_need_lc=$need_lc - # Whether or not to build static libraries. build_old_libs=$enable_static +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$need_lc + # Whether or not to optimize for fast installation. fast_install=$enable_fast_install @@ -2697,6 +2855,9 @@ global_symbol_pipe=$lt_global_symbol_pipe # Transform the output of nm in a proper C declaration global_symbol_to_cdecl=$lt_global_symbol_to_cdecl +# Transform the output of nm in a C name address pair +global_symbol_to_c_name_address=$lt_global_symbol_to_c_name_address + # This is the shared library runtime path variable. runpath_var=$runpath_var @@ -2819,9 +2980,9 @@ EOF # return TRUE; # } # /* ltdll.c ends here */ - # This is a source program that is used to create import libraries - # on Windows for dlls which lack them. Don't remove nor modify the - # starting and closing comments + # This is a source program that is used to create import libraries + # on Windows for dlls which lack them. Don't remove nor modify the + # starting and closing comments # /* impgen.c starts here */ # /* Copyright (C) 1999-2000 Free Software Foundation, Inc. # @@ -3176,6 +3337,7 @@ test "$withval" = no || with_gnu_ld=yes, with_gnu_ld=no) AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR])dnl ac_prog=ld if test "$GCC" = yes; then # Check if gcc -print-prog-name=ld gives a path. @@ -3189,8 +3351,8 @@ if test "$GCC" = yes; then esac case $ac_prog in # Accept absolute paths. - [[\\/]* | [A-Za-z]:[\\/]*)] - [re_direlt='/[^/][^/]*/\.\./'] + [[\\/]]* | [[A-Za-z]]:[[\\/]]*) + re_direlt='/[[^/]][[^/]]*/\.\./' # Canonicalize the path of ld ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'` while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do @@ -3214,7 +3376,7 @@ else fi AC_CACHE_VAL(lt_cv_path_LD, [if test -z "$LD"; then - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}" + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then @@ -3278,7 +3440,7 @@ lt_cv_deplibs_check_method='unknown' # `unknown' -- same as none, but documents that we really don't know. # 'pass_all' -- all dependencies passed with no checks. # 'test_compile' -- check by making test program. -# ['file_magic [regex]'] -- check by looking for files in library path +# 'file_magic [[regex]]' -- check by looking for files in library path # which responds to the $file_magic_cmd with a given egrep regex. # If you have `file' or equivalent on your system and you're not sure # whether `pass_all' will *always* work, you probably want this one. @@ -3293,7 +3455,7 @@ beos*) ;; bsdi4*) - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'] + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' lt_cv_file_magic_cmd='/usr/bin/file -L' lt_cv_file_magic_test_file=/shlib/libc.so ;; @@ -3307,7 +3469,7 @@ darwin* | rhapsody*) lt_cv_deplibs_check_method='file_magic Mach-O dynamically linked shared library' lt_cv_file_magic_cmd='/usr/bin/file -L' case "$host_os" in - rhapsody* | darwin1.[012]) + rhapsody* | darwin1.[[012]]) lt_cv_file_magic_test_file=`echo /System/Library/Frameworks/System.framework/Versions/*/System | head -1` ;; *) # Darwin 1.3 on @@ -3322,7 +3484,7 @@ freebsd*) i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. - [lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD)/i[3-9]86 (compact )?demand paged shared library'] + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD)/i[[3-9]]86 (compact )?demand paged shared library' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; @@ -3337,14 +3499,14 @@ gnu*) ;; hpux10.20*|hpux11*) - [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9].[0-9]) shared library'] + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]].[[0-9]]) shared library' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=/usr/lib/libc.sl ;; -irix5* | irix6*) +irix5* | irix6* | nonstopux*) case $host_os in - irix5*) + irix5* | nonstopux*) # this will be overridden with pass_all, but let us keep it just in case lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1" ;; @@ -3356,7 +3518,7 @@ irix5* | irix6*) *) libmagic=never-match;; esac # this will be overridden with pass_all, but let us keep it just in case - [lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1"] + lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[[1234]] dynamic lib MIPS - version 1" ;; esac lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*` @@ -3365,30 +3527,34 @@ irix5* | irix6*) # This must be Linux ELF. linux-gnu*) - case $host_cpu in - alpha* | i*86 | powerpc* | sparc* | ia64* ) - lt_cv_deplibs_check_method=pass_all ;; - *) - # glibc up to 2.1.1 does not perform some relocations on ARM - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;;] - esac + lt_cv_deplibs_check_method=pass_all lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so` ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then - [lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so\.[0-9]+\.[0-9]+$'] + lt_cv_deplibs_check_method='match_pattern /lib[[^/\.]]+\.so\.[[0-9]]+\.[[0-9]]+$' else - [lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so$'] + lt_cv_deplibs_check_method='match_pattern /lib[[^/\.]]+\.so$' fi ;; newos6*) - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)'] + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; +openbsd*) + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB shared object' + else + lt_cv_deplibs_check_method='file_magic OpenBSD.* shared library' + fi + ;; + osf3* | osf4* | osf5*) # this will be overridden with pass_all, but let us keep it just in case lt_cv_deplibs_check_method='file_magic COFF format alpha shared library' @@ -3405,14 +3571,14 @@ solaris*) lt_cv_file_magic_test_file=/lib/libc.so ;; -[sysv5uw[78]* | sysv4*uw2*)] +sysv5uw[[78]]* | sysv4*uw2*) lt_cv_deplibs_check_method=pass_all ;; sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) case $host_vendor in motorola) - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'] + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` ;; ncr) @@ -3420,11 +3586,11 @@ sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) ;; sequent) lt_cv_file_magic_cmd='/bin/file' - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )'] + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' ;; sni) lt_cv_file_magic_cmd='/bin/file' - [lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib"] + lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" lt_cv_file_magic_test_file=/lib/libc.so ;; esac @@ -3438,13 +3604,14 @@ deplibs_check_method=$lt_cv_deplibs_check_method # AC_PROG_NM - find the path to a BSD-compatible name lister AC_DEFUN([AC_PROG_NM], -[AC_MSG_CHECKING([for BSD-compatible nm]) +[AC_REQUIRE([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR])dnl +AC_MSG_CHECKING([for BSD-compatible nm]) AC_CACHE_VAL(lt_cv_path_NM, [if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM="$NM" else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}" + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do test -z "$ac_dir" && ac_dir=. tmp_nm=$ac_dir/${ac_tool_prefix}nm @@ -3491,12 +3658,12 @@ esac ]) # AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for -# the libltdl convenience library and INCLTDL to the include flags for +# the libltdl convenience library and LTDLINCL to the include flags for # the libltdl header and adds --enable-ltdl-convenience to the -# configure arguments. Note that LIBLTDL and INCLTDL are not +# configure arguments. Note that LIBLTDL and LTDLINCL are not # AC_SUBSTed, nor is AC_CONFIG_SUBDIRS called. If DIR is not # provided, it is assumed to be `libltdl'. LIBLTDL will be prefixed -# with '${top_builddir}/' and INCLTDL will be prefixed with +# with '${top_builddir}/' and LTDLINCL will be prefixed with # '${top_srcdir}/' (note the single quotes!). If your package is not # flat and you're not using automake, define top_builddir and # top_srcdir appropriately in the Makefiles. @@ -3508,16 +3675,18 @@ AC_DEFUN([AC_LIBLTDL_CONVENIENCE], ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;; esac LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la - INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) + LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) + # For backwards non-gettext consistent compatibility... + INCLTDL="$LTDLINCL" ]) # AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for -# the libltdl installable library and INCLTDL to the include flags for +# the libltdl installable library and LTDLINCL to the include flags for # the libltdl header and adds --enable-ltdl-install to the configure -# arguments. Note that LIBLTDL and INCLTDL are not AC_SUBSTed, nor is +# arguments. Note that LIBLTDL and LTDLINCL are not AC_SUBSTed, nor is # AC_CONFIG_SUBDIRS called. If DIR is not provided and an installed # libltdl is not found, it is assumed to be `libltdl'. LIBLTDL will -# be prefixed with '${top_builddir}/' and INCLTDL will be prefixed +# be prefixed with '${top_builddir}/' and LTDLINCL will be prefixed # with '${top_srcdir}/' (note the single quotes!). If your package is # not flat and you're not using automake, define top_builddir and # top_srcdir appropriately in the Makefiles. @@ -3535,12 +3704,14 @@ AC_DEFUN([AC_LIBLTDL_INSTALLABLE], if test x"$enable_ltdl_install" = x"yes"; then ac_configure_args="$ac_configure_args --enable-ltdl-install" LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la - INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) + LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) else ac_configure_args="$ac_configure_args --enable-ltdl-install=no" LIBLTDL="-lltdl" - INCLTDL= + LTDLINCL= fi + # For backwards non-gettext consistent compatibility... + INCLTDL="$LTDLINCL" ]) # old names @@ -3560,7 +3731,7 @@ ifelse([AC_DISABLE_FAST_INSTALL]) # serial 1 -AC_DEFUN([AM_MAINTAINER_MODE], +AC_DEFUN(AM_MAINTAINER_MODE, [AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) dnl maintainer-mode is disabled by default AC_ARG_ENABLE(maintainer-mode, @@ -3577,7 +3748,7 @@ AC_DEFUN([AM_MAINTAINER_MODE], # Define a conditional. -AC_DEFUN([AM_CONDITIONAL], +AC_DEFUN(AM_CONDITIONAL, [AC_SUBST($1_TRUE) AC_SUBST($1_FALSE) if $2; then diff --git a/gmime-charset.c b/gmime-charset.c index 7e04fc40..815288b8 100644 --- a/gmime-charset.c +++ b/gmime-charset.c @@ -34,21 +34,58 @@ #include <alloca.h> #endif +#include "gmime-charset-map-private.h" #include "gmime-charset.h" #include "strlib.h" #if defined (__aix__) || defined (__irix__) || defined (__sun__) -#define CANONICAL_ISO_FORMAT "ISO%d-%d" +#define CANONICAL_ISO_D_FORMAT "ISO%d-%d" +#define CANONICAL_ISO_S_FORMAT "ISO%d-%s" +#elif defined (__hpux__) +#define CANONICAL_ISO_D_FORMAT "iso%d%d" +#define CANONICAL_ISO_S_FORMAT "iso%d%s" #else -#ifdef __hpux__ -#define CANONICAL_ISO_FORMAT "iso%d%d" -#else -#define CANONICAL_ISO_FORMAT "ISO-%d-%d" -#endif /* __hpux__ */ +#define CANONICAL_ISO_D_FORMAT "iso-%d-%d" +#define CANONICAL_ISO_S_FORMAT "iso-%d-%s" #endif /* __aix__, __irix__, __sun__ */ +static struct { + char *name; + unsigned int bit; /* assigned bit */ +} tables[] = { + /* These are the 8bit character sets (other than iso-8859-1, + * which is special-cased) which are supported by both other + * mailers and the GNOME environment. Note that the order + * they're listed in is the order they'll be tried in, so put + * the more-popular ones first. + */ + { "iso-8859-2", 0 }, /* Central/Eastern European */ + { "iso-8859-4", 0 }, /* Baltic */ + { "koi8-r", 0 }, /* Russian */ + { "koi8-u", 0 }, /* Ukranian */ + { "iso-8859-5", 0 }, /* Least-popular Russian encoding */ + { "iso-8859-7", 0 }, /* Greek */ + { "iso-8859-8", 0 }, /* Hebrew; Visual */ + { "iso-8859-9", 0 }, /* Turkish */ + { "iso-8859-13", 0 }, /* Baltic again */ + { "iso-8859-15", 0 }, /* New-and-improved iso-8859-1, but most + * programs that support this support UTF8 + */ + { "windows-1251", 0 }, /* Russian */ + { 0, 0 } +}; + +unsigned int encoding_map[256 * 256]; + +#if G_BYTE_ORDER == G_BIG_ENDIAN +#define UCS "UCS-4BE" +#else +#define UCS "UCS-4LE" +#endif + + struct { char *charset; char *iconv_name; @@ -195,7 +232,6 @@ const char * g_mime_charset_name (const char *charset) { char *name, *iconv_name, *buf; - int codepage; if (charset == NULL) return NULL; @@ -212,26 +248,37 @@ g_mime_charset_name (const char *charset) return iconv_name; if (!strncmp (name, "iso", 3)) { + int iso, codepage; + char *p; + buf = name + 3; if (*buf == '-' || *buf == '_') buf++; - g_assert (strncmp (buf, "8859", 4) == 0); + iso = strtoul (buf, &p, 10); - buf += 4; + g_assert (p > buf); + + buf = p; if (*buf == '-' || *buf == '_') buf++; - codepage = atoi (buf); - g_assert (codepage > 0); + codepage = strtoul (buf, &p, 10); + if (p > buf) { + /* codepage is numeric */ #ifdef __aix__ - if (codepage == 13) - iconv_name = g_strdup ("IBM-921"); - else + if (codepage == 13) + iconv_name = g_strdup ("IBM-921"); + else #endif /* __aix__ */ - iconv_name = g_strdup_printf (CANONICAL_ISO_FORMAT, - 8859, codepage); + iconv_name = g_strdup_printf (CANONICAL_ISO_D_FORMAT, + iso, codepage); + } else { + /* codepage is a string? */ + iconv_name = g_strdup_printf (CANONICAL_ISO_S_FORMAT, + iso, p); + } } else if (!strncmp (name, "windows-", 8)) { buf = name + 8; if (!strncmp (buf, "cp", 2)) @@ -253,3 +300,323 @@ g_mime_charset_name (const char *charset) return iconv_name; } + + + +/* unicode_* and unichar are stolen from glib2... */ +typedef guint32 unichar; + +static const char unicode_skip[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 +}; + +#define UTF8_COMPUTE(ch, mask, len) \ + if (ch < 128) \ + { \ + len = 1; \ + mask = 0x7f; \ + } \ + else if ((ch & 0xe0) == 0xc0) \ + { \ + len = 2; \ + mask = 0x1f; \ + } \ + else if ((ch & 0xf0) == 0xe0) \ + { \ + len = 3; \ + mask = 0x0f; \ + } \ + else if ((ch & 0xf8) == 0xf0) \ + { \ + len = 4; \ + mask = 0x07; \ + } \ + else if ((ch & 0xfc) == 0xf8) \ + { \ + len = 5; \ + mask = 0x03; \ + } \ + else if ((ch & 0xfe) == 0xfc) \ + { \ + len = 6; \ + mask = 0x01; \ + } \ + else \ + len = -1; + +#define UTF8_GET(result, chars, count, mask, len) \ + (result) = (chars)[0] & (mask); \ + for ((count) = 1; (count) < (len); ++(count)) \ + { \ + if (((chars)[(count)] & 0xc0) != 0x80) \ + { \ + (result) = -1; \ + break; \ + } \ + (result) <<= 6; \ + (result) |= ((chars)[(count)] & 0x3f); \ + } + +#define UNICODE_VALID(ch) \ + ((ch) < 0x110000 && \ + ((ch) < 0xD800 || (ch) >= 0xE000) && \ + (ch) != 0xFFFE && (ch) != 0xFFFF) + + + +#define unicode_next_char(p) (char *)((p) + unicode_skip[*(unsigned char *)(p)]) + +unichar +unicode_get_char (const char *p) +{ + unsigned char c = (unsigned char) *p; + int i, mask = 0, len; + unichar result; + + UTF8_COMPUTE (c, mask, len); + if (len == -1) + return (unichar) -1; + UTF8_GET (result, p, i, mask, len); + + return result; +} + +gboolean +unichar_validate (unichar ch) +{ + return UNICODE_VALID (ch); +} + + + +typedef struct _Charset { + unsigned int mask; + unsigned int level; +} Charset; + +static void +charset_init (Charset *charset) +{ + charset->mask = ~0; + charset->level = 0; +} + +static void +charset_step (Charset *charset, const char *in, size_t len) +{ + register const char *inptr = in; + const char *inend = in + len; + register unsigned int mask; + register int level; + + mask = charset->mask; + level = charset->level; + + while (inptr < inend) { + const char *newinptr; + unichar c; + + newinptr = unicode_next_char (inptr); + c = unicode_get_char (inptr); + if (newinptr == NULL || !unichar_validate (c)) { + inptr++; + continue; + } + + inptr = newinptr; + if (c <= 0xffff) { + mask &= charset_mask (c); + + if (c >= 128 && c < 256) + level = MAX (level, 1); + else if (c >= 256) + level = MAX (level, 2); + } else { + mask = 0; + level = MAX (level, 2); + } + } + + charset->mask = mask; + charset->level = level; +} + +static const char * +charset_best_mask (unsigned int mask) +{ + int i; + + for (i = 0; i < sizeof (charinfo) / sizeof (charinfo[0]); i++) { + if (charinfo[i].bit & mask) + return charinfo[i].name; + } + + return "UTF-8"; +} + +static const char * +charset_best_name (Charset *charset) +{ + if (charset->level == 1) + return "iso-8859-1"; + else if (charset->level == 2) + return charset_best_mask (charset->mask); + else + return NULL; +} + + +/** + * g_mime_charset_best: + * @in: a UTF-8 text buffer + * @inlen: length of @in + * + * Computes the best charset to use to encode this text buffer. + * + * Returns the charset name best suited for the input text or %NULL if + * it is US-ASCII safe. + **/ +const char * +g_mime_charset_best (const char *in, size_t inlen) +{ + Charset charset; + + charset_init (&charset); + charset_step (&charset, in, inlen); + return charset_best_name (&charset); +} + + +#ifdef BUILD_CHARSET_MAP + +#include <errno.h> +#include <iconv.h> + +/* the following functions are copied from glib2... */ + + +int main (int argc, char **argv) +{ + char *inptr, *outptr; + size_t inlen, outlen; + guint32 out[128]; + char in[128]; + int i, j, k; + int max, min; + int bit = 0x01; + int bytes; + iconv_t cd; + + /* dont count the terminator */ + bytes = ((sizeof (tables) / sizeof (tables[0])) + 7 - 1) / 8; + + for (i = 0; i < 128; i++) + in[i] = i + 128; + + for (j = 0; tables[j].name; j++) { + cd = iconv_open (UCS, tables[j].name); + inptr = in; + outptr = (char *)(out); + inlen = sizeof (in); + outlen = sizeof (out); + while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) { + if (errno == EILSEQ) { + inptr++; + inlen--; + } else { + g_warning ("%s\n", g_strerror (errno)); + exit (1); + } + } + iconv_close (cd); + + for (i = 0; i < 128 - outlen / 4; i++) { + encoding_map[i] |= bit; + encoding_map[out[i]] |= bit; + } + + tables[j].bit = bit; + bit <<= 1; + } + + printf ("/* This file is automatically generated: DO NOT EDIT */\n\n"); + + for (i = 0; i < 256; i++) { + /* first, do we need this block? */ + for (k = 0; k < bytes; k++) { + for (j = 0; j < 256; j++) { + if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0) + break; + } + if (j < 256) { + /* yes, dump it */ + printf ("static unsigned char m%02x%x[256] = {\n\t", i, k); + for (j = 0; j < 256; j++) { + printf ("0x%02x, ", (encoding_map[i * 256 + j] >> (k * 8)) & 0xff); + if (((j + 1) & 7) == 0 && j < 255) + printf ("\n\t"); + } + printf ("\n};\n\n"); + } + } + } + + printf ("struct {\n"); + for (k = 0; k < bytes; k++) { + printf ("\tunsigned char *bits%d;\n", k); + } + + printf ("} charmap[256] = {\n\t"); + for (i = 0; i < 256; i++) { + /* first, do we need this block? */ + printf ("{ "); + for (k = 0; k < bytes; k++) { + for (j = 0; j < 256; j++) { + if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0) + break; + } + + if (j < 256) { + printf ("m%02x%x, ", i, k); + } else { + printf ("0, "); + } + } + + printf ("}, "); + if (((i + 1) & 7) == 0 && i < 255) + printf ("\n\t"); + } + printf ("\n};\n\n"); + + printf ("struct {\n\tconst char *name;\n\tunsigned int bit;\n} charinfo[] = {\n"); + for (j = 0; tables[j].name; j++) { + printf ("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit); + } + printf ("};\n\n"); + + printf("#define charset_mask(x) \\\n"); + for (k = 0; k < bytes; k++) { + if (k != 0) + printf ("\t| "); + else + printf ("\t"); + + printf ("(charmap[(x) >> 8].bits%d ? charmap[(x) >> 8].bits%d[(x) & 0xff] << %d : 0)", + k, k, k * 8); + + if (k < bytes - 1) + printf ("\t\\\n"); + } + printf ("\n\n"); + + return 0; +} +#endif /* BUILD_CHARSET_MAP */ diff --git a/gmime-charset.h b/gmime-charset.h index 544815d8..83be12e5 100644 --- a/gmime-charset.h +++ b/gmime-charset.h @@ -30,6 +30,7 @@ extern "C" { #endif /* __cplusplus */ #include <glib.h> +#include <sys/types.h> void g_mime_charset_init (void); @@ -37,6 +38,8 @@ const char *g_mime_charset_locale_name (void); const char *g_mime_charset_name (const char *charset); +const char *g_mime_charset_best (const char *in, size_t inlen); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/gmime-iconv-utils.c b/gmime-iconv-utils.c index 48b6e102..3ba8eb06 100644 --- a/gmime-iconv-utils.c +++ b/gmime-iconv-utils.c @@ -25,8 +25,10 @@ #endif #include <glib.h> -#include <errno.h> +#include <stdio.h> #include <string.h> +#include <errno.h> + #include "gmime-iconv-utils.h" #include "gmime-charset.h" diff --git a/gmime-param.c b/gmime-param.c index 89de69ab..94fea84a 100644 --- a/gmime-param.c +++ b/gmime-param.c @@ -26,6 +26,7 @@ #include <string.h> #include <ctype.h> +#include <errno.h> #ifdef HAVE_ALLOCA_H #include <alloca.h> @@ -35,12 +36,16 @@ #include "gmime-table-private.h" #include "gmime-charset.h" #include "gmime-utils.h" +#include "gmime-iconv.h" +#include "gmime-iconv-utils.h" #include "strlib.h" -#define d(x) +#define d(x) x #define w(x) +extern int gmime_interfaces_utf8; + static unsigned char tohex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' @@ -73,18 +78,18 @@ g_mime_param_new (const char *name, const char *value) #define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10) -static char * -hex_decode (const char *in, unsigned int len) +static size_t +hex_decode (const unsigned char *in, size_t len, unsigned char *out) { - register unsigned char *inptr, *outptr; + register const unsigned char *inptr; + register unsigned char *outptr; const unsigned char *inend; - char *outbuf; - inend = (const unsigned char *) in + len; + inptr = in; + inend = in + len; - outptr = outbuf = g_malloc (len); + outptr = out; - inptr = (unsigned char *) in; while (inptr < inend) { if (*inptr == '%') { if (isxdigit (inptr[1]) && isxdigit (inptr[2])) { @@ -98,41 +103,72 @@ hex_decode (const char *in, unsigned int len) *outptr = '\0'; - return outbuf; + return outptr - out; } /* an rfc2184 encoded string looks something like: * us-ascii'en'This%20is%20even%20more%20 */ static char * -rfc2184_decode (const char *in, unsigned int len) +rfc2184_decode (const char *in, size_t len) { const char *inptr = in; const char *inend = in + len; - /*const char *charset;*/ + const char *charset; char *decoded = NULL; - /*char *encoding;*/ + char *charenc; /* skips to the end of the charset / beginning of the locale */ inptr = memchr (inptr, '\'', len); if (!inptr) return NULL; -#if 0 - /* someday we'll need to do something with the charset... */ - encoding = g_strndup (in, (unsigned) (inptr - in)); - charset = g_mime_iconv_charset_name (encoding); - g_free (encoding); -#endif + /* save the charset */ + len = inptr - in; + charenc = alloca (len + 1); + memcpy (charenc, in, len); + charenc[len] = '\0'; + charset = g_mime_charset_name (charenc); /* skip to the end of the locale */ - inptr = memchr (inptr + 1, '\'', (unsigned) (inend - inptr - 1)); + inptr = memchr (inptr + 1, '\'', (unsigned int) (inend - inptr - 1)); if (!inptr) return NULL; inptr++; - if (inptr < inend) - decoded = hex_decode (inptr, (unsigned) (inend - inptr)); + if (inptr < inend) { + len = inend - inptr; + if (gmime_interfaces_utf8 && strcasecmp (charset, "UTF-8") != 0) { + char *udecoded; + iconv_t cd; + + decoded = alloca (len + 1); + len = hex_decode (inptr, len, decoded); + + cd = g_mime_iconv_open ("UTF-8", charset); + if (cd == (iconv_t) -1) { + d(g_warning ("Cannot convert from %s to UTF-8, param display may " + "be corrupt: %s", charset, g_strerror (errno))); + charset = g_mime_charset_locale_name (); + cd = g_mime_iconv_open ("UTF-8", charset); + if (cd == (iconv_t) -1) + return NULL; + } + + udecoded = g_mime_iconv_strndup (cd, decoded, len); + g_mime_iconv_close (cd); + + if (!udecoded) { + d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be " + "corrupt: %s", len, decoded, g_strerror (errno))); + } + + decoded = udecoded; + } else { + decoded = g_malloc (len + 1); + hex_decode (inptr, len, decoded); + } + } return decoded; } @@ -202,11 +238,11 @@ decode_quoted_string (const char **in) if (*inptr == '"') { start++; - out = g_strndup (start, (unsigned) (inptr - start)); + out = g_strndup (start, (unsigned int) (inptr - start)); inptr++; } else { /* string wasn't properly quoted */ - out = g_strndup (start, (unsigned) (inptr - start)); + out = g_strndup (start, (unsigned int) (inptr - start)); } } @@ -228,7 +264,7 @@ decode_token (const char **in) inptr++; if (inptr > start) { *in = inptr; - return g_strndup (start, (unsigned) (inptr - start)); + return g_strndup (start, (unsigned int) (inptr - start)); } else { return NULL; } @@ -266,7 +302,7 @@ decode_param_token (const char **in) inptr++; if (inptr > start) { *in = inptr; - return g_strndup (start, (unsigned) (inptr - start)); + return g_strndup (start, (unsigned int) (inptr - start)); } else { return NULL; } @@ -324,6 +360,7 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218 { gboolean is_rfc2184_encoded = FALSE; gboolean is_rfc2184 = FALSE; + gboolean valid_utf8 = FALSE; const char *inptr = *in; char *param, *value = NULL; int rfc2184_part = -1; @@ -348,6 +385,7 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218 val = rfc2184_decode (value, strlen (value)); if (val) { + valid_utf8 = TRUE; g_free (value); value = val; } @@ -366,9 +404,29 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218 val = g_mime_utils_8bit_header_decode (value); if (val) { + valid_utf8 = TRUE; g_free (value); value = val; } + } else { + if (gmime_interfaces_utf8) + valid_utf8 = !g_mime_utils_text_is_8bit (value, strlen (value)); + } + } + + if (gmime_interfaces_utf8 && value && !valid_utf8) { + /* A (broken) mailer has sent us an unencoded 8bit value. + * Attempt to save it by assuming it's in the user's + * locale and converting to UTF-8 */ + char *buf; + + buf = g_mime_iconv_locale_to_utf8 (value); + if (buf) { + g_free (value); + value = buf; + } else { + d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s", + param, value, g_strerror (errno))); } } @@ -399,8 +457,12 @@ decode_param_list (const char **in) char *name, *value; /* invalid format? */ - if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0) + if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0) { + if (*inptr == ';') { + continue; + } break; + } if (is_rfc2184 && tail && !strcasecmp (name, tail->name)) { /* rfc2184 allows a parameter to be broken into multiple parts @@ -571,15 +633,15 @@ g_mime_param_append_param (GMimeParam *params, GMimeParam *param) static char * encode_param (const unsigned char *in, gboolean *encoded) { - const unsigned char *inptr; - char *outstr, *charset; - int encoding; + register const unsigned char *inptr; + unsigned char *outbuf = NULL; + iconv_t cd = (iconv_t) -1; + const char *charset = NULL; + char *outstr; GString *out; *encoded = FALSE; - g_return_val_if_fail (in != NULL, NULL); - for (inptr = in; *inptr && inptr - in < GMIME_FOLD_LEN; inptr++) if (*inptr > 127) break; @@ -587,29 +649,53 @@ encode_param (const unsigned char *in, gboolean *encoded) if (*inptr == '\0') return g_strdup (in); + if (*inptr > 127) { + if (gmime_interfaces_utf8) + charset = g_mime_charset_best (in, strlen (in)); + else + charset = g_mime_charset_locale_name (); + } + + if (!charset) + charset = "iso-8859-1"; + + if (gmime_interfaces_utf8) { + if (strcasecmp (charset, "UTF-8") != 0) { + charset = g_mime_charset_name (charset); + cd = g_mime_iconv_open (charset, "UTF-8"); + } + + if (cd == (iconv_t) -1) + charset = "UTF-8"; + } + + if (cd != (iconv_t) -1) { + outbuf = g_mime_iconv_strdup (cd, in); + g_mime_iconv_close (cd); + inptr = outbuf; + } else { + inptr = in; + } + + /* FIXME: set the 'language' as well, assuming we can get that info...? */ out = g_string_new (""); - inptr = in; - encoding = 0; + g_string_sprintfa (out, "%s''", charset); + while (inptr && *inptr) { - unsigned int c = *inptr++ & 0xff; + unsigned char c = *inptr++; + + /* FIXME: make sure that '\'', '*', and ';' are also encoded */ - if (c > 127 && c < 256) { - encoding = MAX (encoding, 1); + if (c > 127) { g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]); } else if (is_lwsp (c) || !(gmime_special_table[c] & IS_ESAFE)) { g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]); } else { - g_string_append_c (out, (char) c); + g_string_append_c (out, c); } } - if (encoding) { - charset = g_strdup_printf ("%s''", g_mime_charset_locale_name ()); - g_string_prepend (out, charset); - g_free (charset); - } else { - g_string_prepend (out, "iso-8859-1''"); - } + g_free (outbuf); outstr = out->str; g_string_free (out, FALSE); diff --git a/gmime-utils.c b/gmime-utils.c index ed01055b..ae202aae 100644 --- a/gmime-utils.c +++ b/gmime-utils.c @@ -30,6 +30,7 @@ #include <stdlib.h> #include <string.h> #include <ctype.h> +#include <errno.h> #ifdef HAVE_ALLOCA_H #include <alloca.h> @@ -40,8 +41,9 @@ #include "gmime-part.h" #include "gmime-charset.h" #include "gmime-iconv.h" +#include "gmime-iconv-utils.h" -#define d(x) +#define d(x) x #ifndef HAVE_ISBLANK #define isblank(c) (c == ' ' || c == '\t') @@ -67,6 +69,9 @@ #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6) #define DATE_TOKEN_HAS_SIGN (1 << 7) +/* from gmime.c */ +extern int gmime_interfaces_utf8; + static char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -994,19 +999,18 @@ quoted_decode (const unsigned char *in, size_t len, unsigned char *out) return -1; } -#define is_8bit_word_encoded(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2)) +#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2)) static unsigned char * -decode_encoded_8bit_word (const unsigned char *word) +rfc2047_decode_word (const unsigned char *in, size_t inlen) { const register unsigned char *inptr; const unsigned char *inend; - size_t len; - len = strlen (word); + inptr = in + 2; + inend = in + inlen - 2; - inptr = word + 2; - inend = word + len - 2; + d(fprintf (stderr, "decoding %.*s\n", inlen, in)); inptr = memchr (inptr, '?', inend - inptr); if (inptr && inptr[2] == '?') { @@ -1015,16 +1019,14 @@ decode_encoded_8bit_word (const unsigned char *word) int state = 0; int save = 0; - d(fprintf (stderr, "encoding is '%c'\n", inptr[0])); - inptr++; + switch (*inptr) { case 'B': case 'b': inptr += 2; decoded = alloca (inend - inptr); declen = g_mime_utils_base64_decode_step (inptr, inend - inptr, decoded, &state, &save); - return g_strndup (decoded, declen); break; case 'Q': case 'q': @@ -1036,13 +1038,63 @@ decode_encoded_8bit_word (const unsigned char *word) d(fprintf (stderr, "encountered broken 'Q' encoding\n")); return NULL; } - - return g_strndup (decoded, declen); break; default: d(fprintf (stderr, "unknown encoding\n")); return NULL; } + + if (gmime_interfaces_utf8) { + const char *charset; + unsigned char *buf; + char *charenc, *p; + size_t len; + iconv_t cd; + + len = (inptr - 3) - (in + 2); + charenc = alloca (len + 1); + memcpy (charenc, in + 2, len); + charenc[len] = '\0'; + + /* rfc2231 updates rfc2047 encoded words... + * The ABNF given in RFC 2047 for encoded-words is: + * encoded-word := "=?" charset "?" encoding "?" encoded-text "?=" + * This specification changes this ABNF to: + * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?=" + */ + + /* trim off the 'language' part if it's there... */ + p = strchr (charenc, '*'); + if (p) + *p = '\0'; + + /* slight optimization */ + if (!strcasecmp (charset, "UTF-8")) + return g_strndup (decoded, declen); + + charset = g_mime_charset_name (charenc); + cd = g_mime_iconv_open ("UTF-8", charset); + if (cd == (iconv_t) -1) { + d(g_warning ("Cannot convert from %s to UTF-8, header display may " + "be corrupt: %s", charset, g_strerror (errno))); + charset = g_mime_charset_locale_name (); + cd = g_mime_iconv_open ("UTF-8", charset); + if (cd == (iconv_t) -1) + return NULL; + } + + buf = g_mime_iconv_strndup (cd, decoded, declen); + g_mime_iconv_close (cd); + + if (!buf) { + d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be " + "corrupt: %s", declen, decoded, g_strerror (errno))); + } + + return buf; + } else { + return g_strndup (decoded, declen); + } } return NULL; @@ -1080,8 +1132,8 @@ g_mime_utils_8bit_header_decode (const unsigned char *in) const unsigned char *word; gboolean was_encoded; - if ((was_encoded = is_8bit_word_encoded (atom->str, atom->len))) - word = dword = decode_encoded_8bit_word (atom->str); + if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len))) + word = dword = rfc2047_decode_word (atom->str, atom->len); else word = atom->str; @@ -1135,8 +1187,8 @@ g_mime_utils_8bit_header_decode (const unsigned char *in) const unsigned char *word; gboolean was_encoded; - if ((was_encoded = is_8bit_word_encoded (atom->str, atom->len))) - word = dword = decode_encoded_8bit_word (atom->str); + if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len))) + word = dword = rfc2047_decode_word (atom->str, atom->len); else word = atom->str; @@ -1195,16 +1247,46 @@ quoted_encode (const unsigned char *in, size_t len, unsigned char *out, gushort } static char * -encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_was_encoded) +rfc2047_encode_word (const unsigned char *word, gushort safemask) { unsigned char *encoded, *ptr; + unsigned char *uword = NULL; + iconv_t cd = (iconv_t) -1; size_t enclen, pos, len; + const char *charset; int state = 0; int save = 0; char encoding; len = strlen (word); + if (gmime_interfaces_utf8) { + charset = g_mime_charset_best (word, len); + if (!charset) + charset = "iso-8859-1"; + } else { + charset = g_mime_charset_locale_name (); + } + + if (gmime_interfaces_utf8) { + if (strcasecmp (charset, "UTF-8") != 0) { + charset = g_mime_charset_name (charset); + cd = g_mime_iconv_open (charset, "UTF-8"); + } + + if (cd != (iconv_t) -1) { + uword = g_mime_iconv_strndup (cd, word, len); + g_mime_iconv_close (cd); + } + + if (uword) { + len = strlen (uword); + word = uword; + } else { + charset = "UTF-8"; + } + } + switch (g_mime_utils_best_encoding (word, len)) { case GMIME_PART_ENCODING_BASE64: enclen = BASE64_ENCODE_LEN (len); @@ -1232,16 +1314,16 @@ encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_wa break; default: - if (this_was_encoded) - *this_was_encoded = FALSE; - - return g_strdup (word); + g_assert_not_reached (); } - if (this_was_encoded) - *this_was_encoded = TRUE; + g_free (uword); + + uword = g_strdup_printf ("=?%s?%c?%s?=", charset, encoding, encoded); - return g_strdup_printf ("=?%s?%c?%s?=", g_mime_charset_locale_name (), encoding, encoded); + fprintf (stderr, "resultant encoded word: %s\n", uword); + + return uword; } @@ -1257,7 +1339,7 @@ encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_wa char * g_mime_utils_8bit_header_encode_phrase (const unsigned char *in) { - return encode_8bit_word (in, IS_PSAFE, NULL); + return rfc2047_encode_word (in, IS_PSAFE); } @@ -1276,29 +1358,29 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) register const unsigned char *inptr; GString *out, *word, *lwsp; unsigned char *encoded; - gboolean is8bit = FALSE; + gboolean encode_word = FALSE; gboolean last_was_encoded = FALSE; gboolean last_was_space = FALSE; out = g_string_new (""); word = g_string_new (""); lwsp = g_string_new (""); - inptr = (guchar *) in; + + inptr = in; while (inptr && *inptr) { unsigned char c = *inptr++; if (isspace (c) && !last_was_space) { - gboolean this_was_encoded = FALSE; char *eword; - if (is8bit) - eword = encode_8bit_word (word->str, IS_ESAFE, &this_was_encoded); + if (encode_word) + eword = rfc2047_encode_word (word->str, IS_ESAFE); else - eword = g_strdup (word->str); + eword = word->str; /* append any whitespace */ - if (last_was_encoded && this_was_encoded) { + if (last_was_encoded && encode_word) { /* we need to encode the whitespace */ unsigned char *elwsp; size_t len; @@ -1307,20 +1389,22 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) len = quoted_encode (lwsp->str, lwsp->len, elwsp, IS_SPACE); elwsp[len] = '\0'; - g_string_sprintfa (out, " =?%s?q?%s?= ", g_mime_charset_locale_name (), elwsp); + g_string_sprintfa (out, " =?iso-8859-1?q?%s?= ", elwsp); } else { g_string_append (out, lwsp->str); } - /* append the encoded word */ + /* append the (encoded) word */ g_string_append (out, eword); - g_free (eword); + + if (encode_word) + g_free (eword); g_string_truncate (lwsp, 0); g_string_truncate (word, 0); - last_was_encoded = this_was_encoded; - is8bit = FALSE; + last_was_encoded = encode_word; + encode_word = FALSE; } if (isspace (c)) { @@ -1328,7 +1412,7 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) last_was_space = TRUE; } else { if (c > 127) - is8bit = TRUE; + encode_word = TRUE; g_string_append_c (word, c); last_was_space = FALSE; @@ -1336,16 +1420,15 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) } if (word->len || lwsp->len) { - gboolean this_was_encoded = FALSE; char *eword; - if (is8bit) - eword = encode_8bit_word (word->str, IS_ESAFE, &this_was_encoded); + if (encode_word) + eword = rfc2047_encode_word (word->str, IS_ESAFE); else - eword = g_strdup (word->str); + eword = word->str; /* append any whitespace */ - if (last_was_encoded && this_was_encoded) { + if (last_was_encoded && encode_word) { /* we need to encode the whitespace */ unsigned char *elwsp; size_t len; @@ -1354,14 +1437,16 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) len = quoted_encode (lwsp->str, lwsp->len, elwsp, IS_SPACE); elwsp[len] = '\0'; - g_string_sprintfa (out, " =?%s?q?%s?= ", g_mime_charset_locale_name (), elwsp); + g_string_sprintfa (out, " =?iso-8859-1?q?%s?= ", elwsp); } else { g_string_append (out, lwsp->str); } /* append the encoded word */ g_string_append (out, eword); - g_free (eword); + + if (encode_word) + g_free (eword); } g_string_free (lwsp, TRUE); diff --git a/gmime.c b/gmime.c new file mode 100644 index 00000000..20c422c9 --- /dev/null +++ b/gmime.c @@ -0,0 +1,56 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Authors: Jeffrey Stedfast <fejj@ximian.com> + * + * Copyright 2002 Ximain, Inc. (www.ximian.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA. + * + */ + + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "gmime.h" + + +int gmime_interfaces_utf8 = FALSE; + + +/** + * g_mime_init: + * @flags: initialization flags + * + * Initailizes GMime. + **/ +void +g_mime_init (guint32 flags) +{ + static int initialized = FALSE; + + if (initialized) + return; + + initialized = TRUE; + + if (flags & GMIME_INIT_FLAG_UTF8) + gmime_interfaces_utf8 = TRUE; + + g_mime_charset_init (); + + g_mime_iconv_init (); +} @@ -51,6 +51,7 @@ #include "gmime-filter-crlf.h" #include "gmime-filter-from.h" #include "gmime-filter-html.h" +#include "gmime-filter-yenc.h" /* GMIME version */ static const guint gmime_major_version = @GMIME_MAJOR_VERSION@; @@ -65,4 +66,8 @@ static const guint gmime_binary_age = 0; (gmime_major_version == (major) && gmime_minor_version == (minor) && \ gmime_micro_version >= (micro))) +#define GMIME_INIT_FLAG_UTF8 (1 << 0) + +void g_mime_init (guint32 flags); + #endif /* __GMIME_H__ */ diff --git a/gmime/gmime-charset.c b/gmime/gmime-charset.c index 7e04fc40..815288b8 100644 --- a/gmime/gmime-charset.c +++ b/gmime/gmime-charset.c @@ -34,21 +34,58 @@ #include <alloca.h> #endif +#include "gmime-charset-map-private.h" #include "gmime-charset.h" #include "strlib.h" #if defined (__aix__) || defined (__irix__) || defined (__sun__) -#define CANONICAL_ISO_FORMAT "ISO%d-%d" +#define CANONICAL_ISO_D_FORMAT "ISO%d-%d" +#define CANONICAL_ISO_S_FORMAT "ISO%d-%s" +#elif defined (__hpux__) +#define CANONICAL_ISO_D_FORMAT "iso%d%d" +#define CANONICAL_ISO_S_FORMAT "iso%d%s" #else -#ifdef __hpux__ -#define CANONICAL_ISO_FORMAT "iso%d%d" -#else -#define CANONICAL_ISO_FORMAT "ISO-%d-%d" -#endif /* __hpux__ */ +#define CANONICAL_ISO_D_FORMAT "iso-%d-%d" +#define CANONICAL_ISO_S_FORMAT "iso-%d-%s" #endif /* __aix__, __irix__, __sun__ */ +static struct { + char *name; + unsigned int bit; /* assigned bit */ +} tables[] = { + /* These are the 8bit character sets (other than iso-8859-1, + * which is special-cased) which are supported by both other + * mailers and the GNOME environment. Note that the order + * they're listed in is the order they'll be tried in, so put + * the more-popular ones first. + */ + { "iso-8859-2", 0 }, /* Central/Eastern European */ + { "iso-8859-4", 0 }, /* Baltic */ + { "koi8-r", 0 }, /* Russian */ + { "koi8-u", 0 }, /* Ukranian */ + { "iso-8859-5", 0 }, /* Least-popular Russian encoding */ + { "iso-8859-7", 0 }, /* Greek */ + { "iso-8859-8", 0 }, /* Hebrew; Visual */ + { "iso-8859-9", 0 }, /* Turkish */ + { "iso-8859-13", 0 }, /* Baltic again */ + { "iso-8859-15", 0 }, /* New-and-improved iso-8859-1, but most + * programs that support this support UTF8 + */ + { "windows-1251", 0 }, /* Russian */ + { 0, 0 } +}; + +unsigned int encoding_map[256 * 256]; + +#if G_BYTE_ORDER == G_BIG_ENDIAN +#define UCS "UCS-4BE" +#else +#define UCS "UCS-4LE" +#endif + + struct { char *charset; char *iconv_name; @@ -195,7 +232,6 @@ const char * g_mime_charset_name (const char *charset) { char *name, *iconv_name, *buf; - int codepage; if (charset == NULL) return NULL; @@ -212,26 +248,37 @@ g_mime_charset_name (const char *charset) return iconv_name; if (!strncmp (name, "iso", 3)) { + int iso, codepage; + char *p; + buf = name + 3; if (*buf == '-' || *buf == '_') buf++; - g_assert (strncmp (buf, "8859", 4) == 0); + iso = strtoul (buf, &p, 10); - buf += 4; + g_assert (p > buf); + + buf = p; if (*buf == '-' || *buf == '_') buf++; - codepage = atoi (buf); - g_assert (codepage > 0); + codepage = strtoul (buf, &p, 10); + if (p > buf) { + /* codepage is numeric */ #ifdef __aix__ - if (codepage == 13) - iconv_name = g_strdup ("IBM-921"); - else + if (codepage == 13) + iconv_name = g_strdup ("IBM-921"); + else #endif /* __aix__ */ - iconv_name = g_strdup_printf (CANONICAL_ISO_FORMAT, - 8859, codepage); + iconv_name = g_strdup_printf (CANONICAL_ISO_D_FORMAT, + iso, codepage); + } else { + /* codepage is a string? */ + iconv_name = g_strdup_printf (CANONICAL_ISO_S_FORMAT, + iso, p); + } } else if (!strncmp (name, "windows-", 8)) { buf = name + 8; if (!strncmp (buf, "cp", 2)) @@ -253,3 +300,323 @@ g_mime_charset_name (const char *charset) return iconv_name; } + + + +/* unicode_* and unichar are stolen from glib2... */ +typedef guint32 unichar; + +static const char unicode_skip[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 +}; + +#define UTF8_COMPUTE(ch, mask, len) \ + if (ch < 128) \ + { \ + len = 1; \ + mask = 0x7f; \ + } \ + else if ((ch & 0xe0) == 0xc0) \ + { \ + len = 2; \ + mask = 0x1f; \ + } \ + else if ((ch & 0xf0) == 0xe0) \ + { \ + len = 3; \ + mask = 0x0f; \ + } \ + else if ((ch & 0xf8) == 0xf0) \ + { \ + len = 4; \ + mask = 0x07; \ + } \ + else if ((ch & 0xfc) == 0xf8) \ + { \ + len = 5; \ + mask = 0x03; \ + } \ + else if ((ch & 0xfe) == 0xfc) \ + { \ + len = 6; \ + mask = 0x01; \ + } \ + else \ + len = -1; + +#define UTF8_GET(result, chars, count, mask, len) \ + (result) = (chars)[0] & (mask); \ + for ((count) = 1; (count) < (len); ++(count)) \ + { \ + if (((chars)[(count)] & 0xc0) != 0x80) \ + { \ + (result) = -1; \ + break; \ + } \ + (result) <<= 6; \ + (result) |= ((chars)[(count)] & 0x3f); \ + } + +#define UNICODE_VALID(ch) \ + ((ch) < 0x110000 && \ + ((ch) < 0xD800 || (ch) >= 0xE000) && \ + (ch) != 0xFFFE && (ch) != 0xFFFF) + + + +#define unicode_next_char(p) (char *)((p) + unicode_skip[*(unsigned char *)(p)]) + +unichar +unicode_get_char (const char *p) +{ + unsigned char c = (unsigned char) *p; + int i, mask = 0, len; + unichar result; + + UTF8_COMPUTE (c, mask, len); + if (len == -1) + return (unichar) -1; + UTF8_GET (result, p, i, mask, len); + + return result; +} + +gboolean +unichar_validate (unichar ch) +{ + return UNICODE_VALID (ch); +} + + + +typedef struct _Charset { + unsigned int mask; + unsigned int level; +} Charset; + +static void +charset_init (Charset *charset) +{ + charset->mask = ~0; + charset->level = 0; +} + +static void +charset_step (Charset *charset, const char *in, size_t len) +{ + register const char *inptr = in; + const char *inend = in + len; + register unsigned int mask; + register int level; + + mask = charset->mask; + level = charset->level; + + while (inptr < inend) { + const char *newinptr; + unichar c; + + newinptr = unicode_next_char (inptr); + c = unicode_get_char (inptr); + if (newinptr == NULL || !unichar_validate (c)) { + inptr++; + continue; + } + + inptr = newinptr; + if (c <= 0xffff) { + mask &= charset_mask (c); + + if (c >= 128 && c < 256) + level = MAX (level, 1); + else if (c >= 256) + level = MAX (level, 2); + } else { + mask = 0; + level = MAX (level, 2); + } + } + + charset->mask = mask; + charset->level = level; +} + +static const char * +charset_best_mask (unsigned int mask) +{ + int i; + + for (i = 0; i < sizeof (charinfo) / sizeof (charinfo[0]); i++) { + if (charinfo[i].bit & mask) + return charinfo[i].name; + } + + return "UTF-8"; +} + +static const char * +charset_best_name (Charset *charset) +{ + if (charset->level == 1) + return "iso-8859-1"; + else if (charset->level == 2) + return charset_best_mask (charset->mask); + else + return NULL; +} + + +/** + * g_mime_charset_best: + * @in: a UTF-8 text buffer + * @inlen: length of @in + * + * Computes the best charset to use to encode this text buffer. + * + * Returns the charset name best suited for the input text or %NULL if + * it is US-ASCII safe. + **/ +const char * +g_mime_charset_best (const char *in, size_t inlen) +{ + Charset charset; + + charset_init (&charset); + charset_step (&charset, in, inlen); + return charset_best_name (&charset); +} + + +#ifdef BUILD_CHARSET_MAP + +#include <errno.h> +#include <iconv.h> + +/* the following functions are copied from glib2... */ + + +int main (int argc, char **argv) +{ + char *inptr, *outptr; + size_t inlen, outlen; + guint32 out[128]; + char in[128]; + int i, j, k; + int max, min; + int bit = 0x01; + int bytes; + iconv_t cd; + + /* dont count the terminator */ + bytes = ((sizeof (tables) / sizeof (tables[0])) + 7 - 1) / 8; + + for (i = 0; i < 128; i++) + in[i] = i + 128; + + for (j = 0; tables[j].name; j++) { + cd = iconv_open (UCS, tables[j].name); + inptr = in; + outptr = (char *)(out); + inlen = sizeof (in); + outlen = sizeof (out); + while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) { + if (errno == EILSEQ) { + inptr++; + inlen--; + } else { + g_warning ("%s\n", g_strerror (errno)); + exit (1); + } + } + iconv_close (cd); + + for (i = 0; i < 128 - outlen / 4; i++) { + encoding_map[i] |= bit; + encoding_map[out[i]] |= bit; + } + + tables[j].bit = bit; + bit <<= 1; + } + + printf ("/* This file is automatically generated: DO NOT EDIT */\n\n"); + + for (i = 0; i < 256; i++) { + /* first, do we need this block? */ + for (k = 0; k < bytes; k++) { + for (j = 0; j < 256; j++) { + if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0) + break; + } + if (j < 256) { + /* yes, dump it */ + printf ("static unsigned char m%02x%x[256] = {\n\t", i, k); + for (j = 0; j < 256; j++) { + printf ("0x%02x, ", (encoding_map[i * 256 + j] >> (k * 8)) & 0xff); + if (((j + 1) & 7) == 0 && j < 255) + printf ("\n\t"); + } + printf ("\n};\n\n"); + } + } + } + + printf ("struct {\n"); + for (k = 0; k < bytes; k++) { + printf ("\tunsigned char *bits%d;\n", k); + } + + printf ("} charmap[256] = {\n\t"); + for (i = 0; i < 256; i++) { + /* first, do we need this block? */ + printf ("{ "); + for (k = 0; k < bytes; k++) { + for (j = 0; j < 256; j++) { + if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0) + break; + } + + if (j < 256) { + printf ("m%02x%x, ", i, k); + } else { + printf ("0, "); + } + } + + printf ("}, "); + if (((i + 1) & 7) == 0 && i < 255) + printf ("\n\t"); + } + printf ("\n};\n\n"); + + printf ("struct {\n\tconst char *name;\n\tunsigned int bit;\n} charinfo[] = {\n"); + for (j = 0; tables[j].name; j++) { + printf ("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit); + } + printf ("};\n\n"); + + printf("#define charset_mask(x) \\\n"); + for (k = 0; k < bytes; k++) { + if (k != 0) + printf ("\t| "); + else + printf ("\t"); + + printf ("(charmap[(x) >> 8].bits%d ? charmap[(x) >> 8].bits%d[(x) & 0xff] << %d : 0)", + k, k, k * 8); + + if (k < bytes - 1) + printf ("\t\\\n"); + } + printf ("\n\n"); + + return 0; +} +#endif /* BUILD_CHARSET_MAP */ diff --git a/gmime/gmime-charset.h b/gmime/gmime-charset.h index 544815d8..83be12e5 100644 --- a/gmime/gmime-charset.h +++ b/gmime/gmime-charset.h @@ -30,6 +30,7 @@ extern "C" { #endif /* __cplusplus */ #include <glib.h> +#include <sys/types.h> void g_mime_charset_init (void); @@ -37,6 +38,8 @@ const char *g_mime_charset_locale_name (void); const char *g_mime_charset_name (const char *charset); +const char *g_mime_charset_best (const char *in, size_t inlen); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/gmime/gmime-iconv-utils.c b/gmime/gmime-iconv-utils.c index 48b6e102..3ba8eb06 100644 --- a/gmime/gmime-iconv-utils.c +++ b/gmime/gmime-iconv-utils.c @@ -25,8 +25,10 @@ #endif #include <glib.h> -#include <errno.h> +#include <stdio.h> #include <string.h> +#include <errno.h> + #include "gmime-iconv-utils.h" #include "gmime-charset.h" diff --git a/gmime/gmime-param.c b/gmime/gmime-param.c index 89de69ab..94fea84a 100644 --- a/gmime/gmime-param.c +++ b/gmime/gmime-param.c @@ -26,6 +26,7 @@ #include <string.h> #include <ctype.h> +#include <errno.h> #ifdef HAVE_ALLOCA_H #include <alloca.h> @@ -35,12 +36,16 @@ #include "gmime-table-private.h" #include "gmime-charset.h" #include "gmime-utils.h" +#include "gmime-iconv.h" +#include "gmime-iconv-utils.h" #include "strlib.h" -#define d(x) +#define d(x) x #define w(x) +extern int gmime_interfaces_utf8; + static unsigned char tohex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' @@ -73,18 +78,18 @@ g_mime_param_new (const char *name, const char *value) #define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10) -static char * -hex_decode (const char *in, unsigned int len) +static size_t +hex_decode (const unsigned char *in, size_t len, unsigned char *out) { - register unsigned char *inptr, *outptr; + register const unsigned char *inptr; + register unsigned char *outptr; const unsigned char *inend; - char *outbuf; - inend = (const unsigned char *) in + len; + inptr = in; + inend = in + len; - outptr = outbuf = g_malloc (len); + outptr = out; - inptr = (unsigned char *) in; while (inptr < inend) { if (*inptr == '%') { if (isxdigit (inptr[1]) && isxdigit (inptr[2])) { @@ -98,41 +103,72 @@ hex_decode (const char *in, unsigned int len) *outptr = '\0'; - return outbuf; + return outptr - out; } /* an rfc2184 encoded string looks something like: * us-ascii'en'This%20is%20even%20more%20 */ static char * -rfc2184_decode (const char *in, unsigned int len) +rfc2184_decode (const char *in, size_t len) { const char *inptr = in; const char *inend = in + len; - /*const char *charset;*/ + const char *charset; char *decoded = NULL; - /*char *encoding;*/ + char *charenc; /* skips to the end of the charset / beginning of the locale */ inptr = memchr (inptr, '\'', len); if (!inptr) return NULL; -#if 0 - /* someday we'll need to do something with the charset... */ - encoding = g_strndup (in, (unsigned) (inptr - in)); - charset = g_mime_iconv_charset_name (encoding); - g_free (encoding); -#endif + /* save the charset */ + len = inptr - in; + charenc = alloca (len + 1); + memcpy (charenc, in, len); + charenc[len] = '\0'; + charset = g_mime_charset_name (charenc); /* skip to the end of the locale */ - inptr = memchr (inptr + 1, '\'', (unsigned) (inend - inptr - 1)); + inptr = memchr (inptr + 1, '\'', (unsigned int) (inend - inptr - 1)); if (!inptr) return NULL; inptr++; - if (inptr < inend) - decoded = hex_decode (inptr, (unsigned) (inend - inptr)); + if (inptr < inend) { + len = inend - inptr; + if (gmime_interfaces_utf8 && strcasecmp (charset, "UTF-8") != 0) { + char *udecoded; + iconv_t cd; + + decoded = alloca (len + 1); + len = hex_decode (inptr, len, decoded); + + cd = g_mime_iconv_open ("UTF-8", charset); + if (cd == (iconv_t) -1) { + d(g_warning ("Cannot convert from %s to UTF-8, param display may " + "be corrupt: %s", charset, g_strerror (errno))); + charset = g_mime_charset_locale_name (); + cd = g_mime_iconv_open ("UTF-8", charset); + if (cd == (iconv_t) -1) + return NULL; + } + + udecoded = g_mime_iconv_strndup (cd, decoded, len); + g_mime_iconv_close (cd); + + if (!udecoded) { + d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be " + "corrupt: %s", len, decoded, g_strerror (errno))); + } + + decoded = udecoded; + } else { + decoded = g_malloc (len + 1); + hex_decode (inptr, len, decoded); + } + } return decoded; } @@ -202,11 +238,11 @@ decode_quoted_string (const char **in) if (*inptr == '"') { start++; - out = g_strndup (start, (unsigned) (inptr - start)); + out = g_strndup (start, (unsigned int) (inptr - start)); inptr++; } else { /* string wasn't properly quoted */ - out = g_strndup (start, (unsigned) (inptr - start)); + out = g_strndup (start, (unsigned int) (inptr - start)); } } @@ -228,7 +264,7 @@ decode_token (const char **in) inptr++; if (inptr > start) { *in = inptr; - return g_strndup (start, (unsigned) (inptr - start)); + return g_strndup (start, (unsigned int) (inptr - start)); } else { return NULL; } @@ -266,7 +302,7 @@ decode_param_token (const char **in) inptr++; if (inptr > start) { *in = inptr; - return g_strndup (start, (unsigned) (inptr - start)); + return g_strndup (start, (unsigned int) (inptr - start)); } else { return NULL; } @@ -324,6 +360,7 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218 { gboolean is_rfc2184_encoded = FALSE; gboolean is_rfc2184 = FALSE; + gboolean valid_utf8 = FALSE; const char *inptr = *in; char *param, *value = NULL; int rfc2184_part = -1; @@ -348,6 +385,7 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218 val = rfc2184_decode (value, strlen (value)); if (val) { + valid_utf8 = TRUE; g_free (value); value = val; } @@ -366,9 +404,29 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218 val = g_mime_utils_8bit_header_decode (value); if (val) { + valid_utf8 = TRUE; g_free (value); value = val; } + } else { + if (gmime_interfaces_utf8) + valid_utf8 = !g_mime_utils_text_is_8bit (value, strlen (value)); + } + } + + if (gmime_interfaces_utf8 && value && !valid_utf8) { + /* A (broken) mailer has sent us an unencoded 8bit value. + * Attempt to save it by assuming it's in the user's + * locale and converting to UTF-8 */ + char *buf; + + buf = g_mime_iconv_locale_to_utf8 (value); + if (buf) { + g_free (value); + value = buf; + } else { + d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s", + param, value, g_strerror (errno))); } } @@ -399,8 +457,12 @@ decode_param_list (const char **in) char *name, *value; /* invalid format? */ - if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0) + if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0) { + if (*inptr == ';') { + continue; + } break; + } if (is_rfc2184 && tail && !strcasecmp (name, tail->name)) { /* rfc2184 allows a parameter to be broken into multiple parts @@ -571,15 +633,15 @@ g_mime_param_append_param (GMimeParam *params, GMimeParam *param) static char * encode_param (const unsigned char *in, gboolean *encoded) { - const unsigned char *inptr; - char *outstr, *charset; - int encoding; + register const unsigned char *inptr; + unsigned char *outbuf = NULL; + iconv_t cd = (iconv_t) -1; + const char *charset = NULL; + char *outstr; GString *out; *encoded = FALSE; - g_return_val_if_fail (in != NULL, NULL); - for (inptr = in; *inptr && inptr - in < GMIME_FOLD_LEN; inptr++) if (*inptr > 127) break; @@ -587,29 +649,53 @@ encode_param (const unsigned char *in, gboolean *encoded) if (*inptr == '\0') return g_strdup (in); + if (*inptr > 127) { + if (gmime_interfaces_utf8) + charset = g_mime_charset_best (in, strlen (in)); + else + charset = g_mime_charset_locale_name (); + } + + if (!charset) + charset = "iso-8859-1"; + + if (gmime_interfaces_utf8) { + if (strcasecmp (charset, "UTF-8") != 0) { + charset = g_mime_charset_name (charset); + cd = g_mime_iconv_open (charset, "UTF-8"); + } + + if (cd == (iconv_t) -1) + charset = "UTF-8"; + } + + if (cd != (iconv_t) -1) { + outbuf = g_mime_iconv_strdup (cd, in); + g_mime_iconv_close (cd); + inptr = outbuf; + } else { + inptr = in; + } + + /* FIXME: set the 'language' as well, assuming we can get that info...? */ out = g_string_new (""); - inptr = in; - encoding = 0; + g_string_sprintfa (out, "%s''", charset); + while (inptr && *inptr) { - unsigned int c = *inptr++ & 0xff; + unsigned char c = *inptr++; + + /* FIXME: make sure that '\'', '*', and ';' are also encoded */ - if (c > 127 && c < 256) { - encoding = MAX (encoding, 1); + if (c > 127) { g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]); } else if (is_lwsp (c) || !(gmime_special_table[c] & IS_ESAFE)) { g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]); } else { - g_string_append_c (out, (char) c); + g_string_append_c (out, c); } } - if (encoding) { - charset = g_strdup_printf ("%s''", g_mime_charset_locale_name ()); - g_string_prepend (out, charset); - g_free (charset); - } else { - g_string_prepend (out, "iso-8859-1''"); - } + g_free (outbuf); outstr = out->str; g_string_free (out, FALSE); diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c index ed01055b..ae202aae 100644 --- a/gmime/gmime-utils.c +++ b/gmime/gmime-utils.c @@ -30,6 +30,7 @@ #include <stdlib.h> #include <string.h> #include <ctype.h> +#include <errno.h> #ifdef HAVE_ALLOCA_H #include <alloca.h> @@ -40,8 +41,9 @@ #include "gmime-part.h" #include "gmime-charset.h" #include "gmime-iconv.h" +#include "gmime-iconv-utils.h" -#define d(x) +#define d(x) x #ifndef HAVE_ISBLANK #define isblank(c) (c == ' ' || c == '\t') @@ -67,6 +69,9 @@ #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6) #define DATE_TOKEN_HAS_SIGN (1 << 7) +/* from gmime.c */ +extern int gmime_interfaces_utf8; + static char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -994,19 +999,18 @@ quoted_decode (const unsigned char *in, size_t len, unsigned char *out) return -1; } -#define is_8bit_word_encoded(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2)) +#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2)) static unsigned char * -decode_encoded_8bit_word (const unsigned char *word) +rfc2047_decode_word (const unsigned char *in, size_t inlen) { const register unsigned char *inptr; const unsigned char *inend; - size_t len; - len = strlen (word); + inptr = in + 2; + inend = in + inlen - 2; - inptr = word + 2; - inend = word + len - 2; + d(fprintf (stderr, "decoding %.*s\n", inlen, in)); inptr = memchr (inptr, '?', inend - inptr); if (inptr && inptr[2] == '?') { @@ -1015,16 +1019,14 @@ decode_encoded_8bit_word (const unsigned char *word) int state = 0; int save = 0; - d(fprintf (stderr, "encoding is '%c'\n", inptr[0])); - inptr++; + switch (*inptr) { case 'B': case 'b': inptr += 2; decoded = alloca (inend - inptr); declen = g_mime_utils_base64_decode_step (inptr, inend - inptr, decoded, &state, &save); - return g_strndup (decoded, declen); break; case 'Q': case 'q': @@ -1036,13 +1038,63 @@ decode_encoded_8bit_word (const unsigned char *word) d(fprintf (stderr, "encountered broken 'Q' encoding\n")); return NULL; } - - return g_strndup (decoded, declen); break; default: d(fprintf (stderr, "unknown encoding\n")); return NULL; } + + if (gmime_interfaces_utf8) { + const char *charset; + unsigned char *buf; + char *charenc, *p; + size_t len; + iconv_t cd; + + len = (inptr - 3) - (in + 2); + charenc = alloca (len + 1); + memcpy (charenc, in + 2, len); + charenc[len] = '\0'; + + /* rfc2231 updates rfc2047 encoded words... + * The ABNF given in RFC 2047 for encoded-words is: + * encoded-word := "=?" charset "?" encoding "?" encoded-text "?=" + * This specification changes this ABNF to: + * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?=" + */ + + /* trim off the 'language' part if it's there... */ + p = strchr (charenc, '*'); + if (p) + *p = '\0'; + + /* slight optimization */ + if (!strcasecmp (charset, "UTF-8")) + return g_strndup (decoded, declen); + + charset = g_mime_charset_name (charenc); + cd = g_mime_iconv_open ("UTF-8", charset); + if (cd == (iconv_t) -1) { + d(g_warning ("Cannot convert from %s to UTF-8, header display may " + "be corrupt: %s", charset, g_strerror (errno))); + charset = g_mime_charset_locale_name (); + cd = g_mime_iconv_open ("UTF-8", charset); + if (cd == (iconv_t) -1) + return NULL; + } + + buf = g_mime_iconv_strndup (cd, decoded, declen); + g_mime_iconv_close (cd); + + if (!buf) { + d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be " + "corrupt: %s", declen, decoded, g_strerror (errno))); + } + + return buf; + } else { + return g_strndup (decoded, declen); + } } return NULL; @@ -1080,8 +1132,8 @@ g_mime_utils_8bit_header_decode (const unsigned char *in) const unsigned char *word; gboolean was_encoded; - if ((was_encoded = is_8bit_word_encoded (atom->str, atom->len))) - word = dword = decode_encoded_8bit_word (atom->str); + if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len))) + word = dword = rfc2047_decode_word (atom->str, atom->len); else word = atom->str; @@ -1135,8 +1187,8 @@ g_mime_utils_8bit_header_decode (const unsigned char *in) const unsigned char *word; gboolean was_encoded; - if ((was_encoded = is_8bit_word_encoded (atom->str, atom->len))) - word = dword = decode_encoded_8bit_word (atom->str); + if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len))) + word = dword = rfc2047_decode_word (atom->str, atom->len); else word = atom->str; @@ -1195,16 +1247,46 @@ quoted_encode (const unsigned char *in, size_t len, unsigned char *out, gushort } static char * -encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_was_encoded) +rfc2047_encode_word (const unsigned char *word, gushort safemask) { unsigned char *encoded, *ptr; + unsigned char *uword = NULL; + iconv_t cd = (iconv_t) -1; size_t enclen, pos, len; + const char *charset; int state = 0; int save = 0; char encoding; len = strlen (word); + if (gmime_interfaces_utf8) { + charset = g_mime_charset_best (word, len); + if (!charset) + charset = "iso-8859-1"; + } else { + charset = g_mime_charset_locale_name (); + } + + if (gmime_interfaces_utf8) { + if (strcasecmp (charset, "UTF-8") != 0) { + charset = g_mime_charset_name (charset); + cd = g_mime_iconv_open (charset, "UTF-8"); + } + + if (cd != (iconv_t) -1) { + uword = g_mime_iconv_strndup (cd, word, len); + g_mime_iconv_close (cd); + } + + if (uword) { + len = strlen (uword); + word = uword; + } else { + charset = "UTF-8"; + } + } + switch (g_mime_utils_best_encoding (word, len)) { case GMIME_PART_ENCODING_BASE64: enclen = BASE64_ENCODE_LEN (len); @@ -1232,16 +1314,16 @@ encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_wa break; default: - if (this_was_encoded) - *this_was_encoded = FALSE; - - return g_strdup (word); + g_assert_not_reached (); } - if (this_was_encoded) - *this_was_encoded = TRUE; + g_free (uword); + + uword = g_strdup_printf ("=?%s?%c?%s?=", charset, encoding, encoded); - return g_strdup_printf ("=?%s?%c?%s?=", g_mime_charset_locale_name (), encoding, encoded); + fprintf (stderr, "resultant encoded word: %s\n", uword); + + return uword; } @@ -1257,7 +1339,7 @@ encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_wa char * g_mime_utils_8bit_header_encode_phrase (const unsigned char *in) { - return encode_8bit_word (in, IS_PSAFE, NULL); + return rfc2047_encode_word (in, IS_PSAFE); } @@ -1276,29 +1358,29 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) register const unsigned char *inptr; GString *out, *word, *lwsp; unsigned char *encoded; - gboolean is8bit = FALSE; + gboolean encode_word = FALSE; gboolean last_was_encoded = FALSE; gboolean last_was_space = FALSE; out = g_string_new (""); word = g_string_new (""); lwsp = g_string_new (""); - inptr = (guchar *) in; + + inptr = in; while (inptr && *inptr) { unsigned char c = *inptr++; if (isspace (c) && !last_was_space) { - gboolean this_was_encoded = FALSE; char *eword; - if (is8bit) - eword = encode_8bit_word (word->str, IS_ESAFE, &this_was_encoded); + if (encode_word) + eword = rfc2047_encode_word (word->str, IS_ESAFE); else - eword = g_strdup (word->str); + eword = word->str; /* append any whitespace */ - if (last_was_encoded && this_was_encoded) { + if (last_was_encoded && encode_word) { /* we need to encode the whitespace */ unsigned char *elwsp; size_t len; @@ -1307,20 +1389,22 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) len = quoted_encode (lwsp->str, lwsp->len, elwsp, IS_SPACE); elwsp[len] = '\0'; - g_string_sprintfa (out, " =?%s?q?%s?= ", g_mime_charset_locale_name (), elwsp); + g_string_sprintfa (out, " =?iso-8859-1?q?%s?= ", elwsp); } else { g_string_append (out, lwsp->str); } - /* append the encoded word */ + /* append the (encoded) word */ g_string_append (out, eword); - g_free (eword); + + if (encode_word) + g_free (eword); g_string_truncate (lwsp, 0); g_string_truncate (word, 0); - last_was_encoded = this_was_encoded; - is8bit = FALSE; + last_was_encoded = encode_word; + encode_word = FALSE; } if (isspace (c)) { @@ -1328,7 +1412,7 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) last_was_space = TRUE; } else { if (c > 127) - is8bit = TRUE; + encode_word = TRUE; g_string_append_c (word, c); last_was_space = FALSE; @@ -1336,16 +1420,15 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) } if (word->len || lwsp->len) { - gboolean this_was_encoded = FALSE; char *eword; - if (is8bit) - eword = encode_8bit_word (word->str, IS_ESAFE, &this_was_encoded); + if (encode_word) + eword = rfc2047_encode_word (word->str, IS_ESAFE); else - eword = g_strdup (word->str); + eword = word->str; /* append any whitespace */ - if (last_was_encoded && this_was_encoded) { + if (last_was_encoded && encode_word) { /* we need to encode the whitespace */ unsigned char *elwsp; size_t len; @@ -1354,14 +1437,16 @@ g_mime_utils_8bit_header_encode (const unsigned char *in) len = quoted_encode (lwsp->str, lwsp->len, elwsp, IS_SPACE); elwsp[len] = '\0'; - g_string_sprintfa (out, " =?%s?q?%s?= ", g_mime_charset_locale_name (), elwsp); + g_string_sprintfa (out, " =?iso-8859-1?q?%s?= ", elwsp); } else { g_string_append (out, lwsp->str); } /* append the encoded word */ g_string_append (out, eword); - g_free (eword); + + if (encode_word) + g_free (eword); } g_string_free (lwsp, TRUE); diff --git a/gmime/gmime.c b/gmime/gmime.c new file mode 100644 index 00000000..20c422c9 --- /dev/null +++ b/gmime/gmime.c @@ -0,0 +1,56 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Authors: Jeffrey Stedfast <fejj@ximian.com> + * + * Copyright 2002 Ximain, Inc. (www.ximian.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA. + * + */ + + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "gmime.h" + + +int gmime_interfaces_utf8 = FALSE; + + +/** + * g_mime_init: + * @flags: initialization flags + * + * Initailizes GMime. + **/ +void +g_mime_init (guint32 flags) +{ + static int initialized = FALSE; + + if (initialized) + return; + + initialized = TRUE; + + if (flags & GMIME_INIT_FLAG_UTF8) + gmime_interfaces_utf8 = TRUE; + + g_mime_charset_init (); + + g_mime_iconv_init (); +} diff --git a/gmime/gmime.h.in b/gmime/gmime.h.in index 66fa4268..cc586168 100644 --- a/gmime/gmime.h.in +++ b/gmime/gmime.h.in @@ -51,6 +51,7 @@ #include "gmime-filter-crlf.h" #include "gmime-filter-from.h" #include "gmime-filter-html.h" +#include "gmime-filter-yenc.h" /* GMIME version */ static const guint gmime_major_version = @GMIME_MAJOR_VERSION@; @@ -65,4 +66,8 @@ static const guint gmime_binary_age = 0; (gmime_major_version == (major) && gmime_minor_version == (minor) && \ gmime_micro_version >= (micro))) +#define GMIME_INIT_FLAG_UTF8 (1 << 0) + +void g_mime_init (guint32 flags); + #endif /* __GMIME_H__ */ diff --git a/gmime/internet-address.c b/gmime/internet-address.c index cc859774..12fa48f0 100644 --- a/gmime/internet-address.c +++ b/gmime/internet-address.c @@ -27,15 +27,20 @@ #include <string.h> #include <ctype.h> +#include <errno.h> #include "internet-address.h" #include "gmime-table-private.h" #include "gmime-utils.h" +#include "gmime-iconv-utils.h" #define w(x) x +extern int gmime_interfaces_utf8; + + /** * internet_address_new: * @@ -558,48 +563,6 @@ decode_quoted_string (const char **in) return out; } -#if 0 -static char * -decode_quoted_string (const char **in) -{ - const char *inptr = *in; - char *out = NULL, *outptr; - int outlen; - int c; - - decode_lwsp (&inptr); - if (*inptr == '"') { - const char *intmp; - int skip = 0; - - /* first, calc length */ - inptr++; - intmp = inptr + 1; - while ((c = *intmp++) && c != '"') { - if (c == '\\' && *intmp) { - intmp++; - skip++; - } - } - - outlen = intmp - inptr - skip; - out = outptr = g_malloc (outlen + 1); - - while ((c = *inptr++) && c != '"') { - if (c == '\\' && *inptr) { - c = *inptr++; - } - *outptr++ = c; - } - *outptr = '\0'; - } - - *in = inptr; - - return out; -} -#endif - static char * decode_atom (const char **in) { @@ -867,8 +830,26 @@ decode_mailbox (const char **in) *in = inptr; - if (addr->len) + if (addr->len) { + if (gmime_interfaces_utf8 && name && g_mime_utils_text_is_8bit (name->str, name->len)) { + /* A (broken) mailer has sent us an unencoded 8bit value. + * Attempt to save it by assuming it's in the user's + * locale and converting to UTF-8 */ + char *buf; + + buf = g_mime_iconv_locale_to_utf8 (name->str); + if (buf) { + g_string_truncate (name, 0); + g_string_append (name, buf); + g_free (buf); + } else { + (g_warning ("Failed to convert \"%s\" to UTF-8: %s", + name->str, g_strerror (errno))); + } + } + mailbox = internet_address_new_name (name ? name->str : NULL, addr->str); + } g_string_free (addr, TRUE); if (name) diff --git a/internet-address.c b/internet-address.c index cc859774..12fa48f0 100644 --- a/internet-address.c +++ b/internet-address.c @@ -27,15 +27,20 @@ #include <string.h> #include <ctype.h> +#include <errno.h> #include "internet-address.h" #include "gmime-table-private.h" #include "gmime-utils.h" +#include "gmime-iconv-utils.h" #define w(x) x +extern int gmime_interfaces_utf8; + + /** * internet_address_new: * @@ -558,48 +563,6 @@ decode_quoted_string (const char **in) return out; } -#if 0 -static char * -decode_quoted_string (const char **in) -{ - const char *inptr = *in; - char *out = NULL, *outptr; - int outlen; - int c; - - decode_lwsp (&inptr); - if (*inptr == '"') { - const char *intmp; - int skip = 0; - - /* first, calc length */ - inptr++; - intmp = inptr + 1; - while ((c = *intmp++) && c != '"') { - if (c == '\\' && *intmp) { - intmp++; - skip++; - } - } - - outlen = intmp - inptr - skip; - out = outptr = g_malloc (outlen + 1); - - while ((c = *inptr++) && c != '"') { - if (c == '\\' && *inptr) { - c = *inptr++; - } - *outptr++ = c; - } - *outptr = '\0'; - } - - *in = inptr; - - return out; -} -#endif - static char * decode_atom (const char **in) { @@ -867,8 +830,26 @@ decode_mailbox (const char **in) *in = inptr; - if (addr->len) + if (addr->len) { + if (gmime_interfaces_utf8 && name && g_mime_utils_text_is_8bit (name->str, name->len)) { + /* A (broken) mailer has sent us an unencoded 8bit value. + * Attempt to save it by assuming it's in the user's + * locale and converting to UTF-8 */ + char *buf; + + buf = g_mime_iconv_locale_to_utf8 (name->str); + if (buf) { + g_string_truncate (name, 0); + g_string_append (name, buf); + g_free (buf); + } else { + (g_warning ("Failed to convert \"%s\" to UTF-8: %s", + name->str, g_strerror (errno))); + } + } + mailbox = internet_address_new_name (name ? name->str : NULL, addr->str); + } g_string_free (addr, TRUE); if (name) diff --git a/test-mime.c b/test-mime.c index ae24227b..a2d0a421 100644 --- a/test-mime.c +++ b/test-mime.c @@ -422,6 +422,8 @@ test_date (void) int main (int argc, char *argv[]) { + g_mime_init (GMIME_INIT_FLAG_UTF8); + test_date (); test_onepart (); diff --git a/test-parser.c b/test-parser.c index 957acbfa..c1fb58ea 100644 --- a/test-parser.c +++ b/test-parser.c @@ -124,6 +124,8 @@ int main (int argc, char **argv) if (fd == -1) return 0; + g_mime_init (GMIME_INIT_FLAG_UTF8); + #ifdef STREAM_MMAP stream = g_mime_stream_mmap_new (fd, PROT_READ, MAP_PRIVATE); g_assert (stream != NULL); diff --git a/tests/test-mime.c b/tests/test-mime.c index ae24227b..a2d0a421 100644 --- a/tests/test-mime.c +++ b/tests/test-mime.c @@ -422,6 +422,8 @@ test_date (void) int main (int argc, char *argv[]) { + g_mime_init (GMIME_INIT_FLAG_UTF8); + test_date (); test_onepart (); diff --git a/tests/test-parser.c b/tests/test-parser.c index 957acbfa..c1fb58ea 100644 --- a/tests/test-parser.c +++ b/tests/test-parser.c @@ -124,6 +124,8 @@ int main (int argc, char **argv) if (fd == -1) return 0; + g_mime_init (GMIME_INIT_FLAG_UTF8); + #ifdef STREAM_MMAP stream = g_mime_stream_mmap_new (fd, PROT_READ, MAP_PRIVATE); g_assert (stream != NULL); |