summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeffrey Stedfast <fejj@src.gnome.org>2002-03-20 01:16:50 +0000
committerJeffrey Stedfast <fejj@src.gnome.org>2002-03-20 01:16:50 +0000
commitd7826a7752e74e952acdd098a902ae8f442d5275 (patch)
treecf6f2ea418c0a55b9920f9389093292e98c0380c
parent9ce34cd38047cf5fd6145a42f6cf0eb6eb259875 (diff)
downloadgmime-d7826a7752e74e952acdd098a902ae8f442d5275.tar.gz
*** empty log message ***
-rw-r--r--ChangeLog26
-rw-r--r--Makefile.am10
-rw-r--r--aclocal.m4565
-rw-r--r--gmime-charset.c399
-rw-r--r--gmime-charset.h3
-rw-r--r--gmime-iconv-utils.c4
-rw-r--r--gmime-param.c174
-rw-r--r--gmime-utils.c175
-rw-r--r--gmime.c56
-rw-r--r--gmime.h.in5
-rw-r--r--gmime/gmime-charset.c399
-rw-r--r--gmime/gmime-charset.h3
-rw-r--r--gmime/gmime-iconv-utils.c4
-rw-r--r--gmime/gmime-param.c174
-rw-r--r--gmime/gmime-utils.c175
-rw-r--r--gmime/gmime.c56
-rw-r--r--gmime/gmime.h.in5
-rw-r--r--gmime/internet-address.c67
-rw-r--r--internet-address.c67
-rw-r--r--test-mime.c2
-rw-r--r--test-parser.c2
-rw-r--r--tests/test-mime.c2
-rw-r--r--tests/test-parser.c2
23 files changed, 1876 insertions, 499 deletions
diff --git a/ChangeLog b/ChangeLog
index 43299768..be3e6471 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,29 @@
+2002-03-19 Jeffrey Stedfast <fejj@ximian.com>
+
+ * internet-address.c (decode_mailbox): If gmime was initialized to
+ use UTF-8 interfaces and we encountered illegal 8bit text, attempt
+ to convert it to UTF-8 using the user's locale charset.
+
+ * gmime-charset.c (g_mime_charset_name): Fixed the ISO parser.
+ (g_mime_charset_best): New function to compute the best charset
+ for a given text input.
+
+ * gmime-param.c (rfc2184_decode): If gmime was initialized to use
+ UTF-8 interfaces, then convert the decoded text to UTF-8 here.
+ (decode_param): If gmime was initialized to use UTF-8 interfaces
+ and we encounter illegal 8bit text, attempt to convert it to
+ UTF-8.
+ (encode_param): If gmime was initialized to use UTF-8 interfaces,
+ convert the input buffer to the locale charset before encoding.
+
+ * gmime-utils.c (rfc2047_decode_word): Renamed from
+ decoded_encoded_8bit_word. If gmime was initialized to use UTF-8
+ interfaces, then convert the header to UTF-8 here.
+ (rfc2047_encode_word): Renamed from encode_8bit_word. If gmime was
+ initialized to use UTF-8 interfaces, convert input text to locale
+ charset before encoding.
+ (g_mime_utils_8bit_header_encode): Avoid excess mallocing.
+
2002-03-17 Jeffrey Stedfast <fejj@ximian.com>
* gmime-message.c (g_mime_message_write_to_stream): fixed doc
diff --git a/Makefile.am b/Makefile.am
index 4e950c7f..5299ef11 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -18,6 +18,7 @@ libgmime_la_SOURCES = \
alloca.c \
strlib.c \
memchunk.c \
+ gmime.c \
gmime-charset.c \
gmime-content-type.c \
gmime-data-wrapper.c \
@@ -82,10 +83,11 @@ gmimeinclude_HEADERS = \
gmime-utils.h \
internet-address.h
-noinst_HEADERS = \
- strlib.h \
- memchunk.h \
- gmime-table-private.h \
+noinst_HEADERS = \
+ strlib.h \
+ memchunk.h \
+ gmime-table-private.h \
+ gmime-charset-map-private.h \
md5-utils.h
DEPS = $(top_builddir)/libgmime.la
diff --git a/aclocal.m4 b/aclocal.m4
index 6fd002c1..71b92b70 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1,6 +1,6 @@
-dnl aclocal.m4 generated automatically by aclocal 1.4-p5
+dnl aclocal.m4 generated automatically by aclocal 1.4-p4
-dnl Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc.
+dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
@@ -12,7 +12,7 @@ dnl PARTICULAR PURPOSE.
# Like AC_CONFIG_HEADER, but automatically create stamp file.
-AC_DEFUN([AM_CONFIG_HEADER],
+AC_DEFUN(AM_CONFIG_HEADER,
[AC_PREREQ([2.12])
AC_CONFIG_HEADER([$1])
dnl When config.status generates a header, we must update the stamp-h file.
@@ -42,7 +42,7 @@ changequote([,]))])
dnl Usage:
dnl AM_INIT_AUTOMAKE(package,version, [no-define])
-AC_DEFUN([AM_INIT_AUTOMAKE],
+AC_DEFUN(AM_INIT_AUTOMAKE,
[AC_REQUIRE([AC_PROG_INSTALL])
PACKAGE=[$1]
AC_SUBST(PACKAGE)
@@ -70,7 +70,7 @@ AC_REQUIRE([AC_PROG_MAKE_SET])])
# Check to make sure that the build environment is sane.
#
-AC_DEFUN([AM_SANITY_CHECK],
+AC_DEFUN(AM_SANITY_CHECK,
[AC_MSG_CHECKING([whether build environment is sane])
# Just in case
sleep 1
@@ -111,7 +111,7 @@ AC_MSG_RESULT(yes)])
dnl AM_MISSING_PROG(NAME, PROGRAM, DIRECTORY)
dnl The program must properly implement --version.
-AC_DEFUN([AM_MISSING_PROG],
+AC_DEFUN(AM_MISSING_PROG,
[AC_MSG_CHECKING(for working $2)
# Run test in a subshell; some versions of sh will print an error if
# an executable is not found, even if stderr is redirected.
@@ -161,7 +161,7 @@ AC_DEFUN([AC_ISC_POSIX],
# program @code{ansi2knr}, which comes with Ghostscript.
# @end defmac
-AC_DEFUN([AM_PROG_CC_STDC],
+AC_DEFUN(AM_PROG_CC_STDC,
[AC_REQUIRE([AC_PROG_CC])
AC_BEFORE([$0], [AC_C_INLINE])
AC_BEFORE([$0], [AC_C_CONST])
@@ -234,6 +234,7 @@ esac
# libtool.m4 - Configure libtool for the host system. -*-Shell-script-*-
# serial 46 AC_PROG_LIBTOOL
+
AC_DEFUN([AC_PROG_LIBTOOL],
[AC_REQUIRE([AC_LIBTOOL_SETUP])dnl
@@ -359,9 +360,30 @@ _LT_AC_LTCONFIG_HACK
])
+# AC_LIBTOOL_HEADER_ASSERT
+# ------------------------
+AC_DEFUN([AC_LIBTOOL_HEADER_ASSERT],
+[AC_CACHE_CHECK([whether $CC supports assert without backlinking],
+ [lt_cv_func_assert_works],
+ [case $host in
+ *-*-solaris*)
+ if test "$GCC" = yes && test "$with_gnu_ld" != yes; then
+ case `$CC --version 2>/dev/null` in
+ [[12]].*) lt_cv_func_assert_works=no ;;
+ *) lt_cv_func_assert_works=yes ;;
+ esac
+ fi
+ ;;
+ esac])
+
+if test "x$lt_cv_func_assert_works" = xyes; then
+ AC_CHECK_HEADERS(assert.h)
+fi
+])# AC_LIBTOOL_HEADER_ASSERT
+
# _LT_AC_CHECK_DLFCN
# --------------------
-AC_DEFUN(_LT_AC_CHECK_DLFCN,
+AC_DEFUN([_LT_AC_CHECK_DLFCN],
[AC_CHECK_HEADERS(dlfcn.h)
])# _LT_AC_CHECK_DLFCN
@@ -379,10 +401,10 @@ AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], [dnl
# [They come from Ultrix. What could be older than Ultrix?!! ;)]
# Character class describing NM global symbol codes.
-[symcode='[BCDEGRST]']
+symcode='[[BCDEGRST]]'
# Regexp to match symbols that can be accessed directly from C.
-[sympat='\([_A-Za-z][_A-Za-z0-9]*\)']
+sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)'
# Transform the above into a raw symbol and a C symbol.
symxfrm='\1 \2\3 \3'
@@ -390,25 +412,29 @@ symxfrm='\1 \2\3 \3'
# Transform an extracted symbol line into a proper C declaration
lt_cv_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'"
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'"
+
# Define system-specific variables.
case $host_os in
aix*)
- [symcode='[BCDT]']
+ symcode='[[BCDT]]'
;;
cygwin* | mingw* | pw32*)
- [symcode='[ABCDGISTW]']
+ symcode='[[ABCDGISTW]]'
;;
hpux*) # Its linker distinguishes data from code symbols
lt_cv_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+ lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'"
;;
-irix*)
- [symcode='[BCDEGRST]']
+irix* | nonstopux*)
+ symcode='[[BCDEGRST]]'
;;
solaris* | sysv5*)
- [symcode='[BDT]']
+ symcode='[[BDT]]'
;;
sysv4)
- [symcode='[DFNSTU]']
+ symcode='[[DFNSTU]]'
;;
esac
@@ -422,14 +448,14 @@ esac
# If we're using GNU nm, then use its standard symbol codes.
if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then
- [symcode='[ABCDGISTW]']
+ symcode='[[ABCDGISTW]]'
fi
# Try without a prefix undercore, then with it.
for ac_symprfx in "" "_"; do
# Write the raw and C identifiers.
-[lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'"]
+lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'"
# Check to see that the pipe works correctly.
pipe_works=no
@@ -471,23 +497,23 @@ EOF
cat <<EOF >> conftest.$ac_ext
#if defined (__STDC__) && __STDC__
-# define lt_ptr_t void *
+# define lt_ptr void *
#else
-# define lt_ptr_t char *
+# define lt_ptr char *
# define const
#endif
/* The mapping between symbol names and symbols. */
const struct {
const char *name;
- lt_ptr_t address;
+ lt_ptr address;
}
-[lt_preloaded_symbols[] =]
+lt_preloaded_symbols[[]] =
{
EOF
- sed "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (lt_ptr_t) \&\2},/" < "$nlist" >> conftest.$ac_ext
+ sed "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (lt_ptr) \&\2},/" < "$nlist" >> conftest.$ac_ext
cat <<\EOF >> conftest.$ac_ext
- {0, (lt_ptr_t) 0}
+ {0, (lt_ptr) 0}
};
#ifdef __cplusplus
@@ -531,10 +557,13 @@ done
global_symbol_pipe="$lt_cv_sys_global_symbol_pipe"
if test -z "$lt_cv_sys_global_symbol_pipe"; then
global_symbol_to_cdecl=
+ global_symbol_to_c_name_address=
else
global_symbol_to_cdecl="$lt_cv_global_symbol_to_cdecl"
+ global_symbol_to_c_name_address="$lt_cv_global_symbol_to_c_name_address"
fi
-if test -z "$global_symbol_pipe$global_symbol_to_cdecl"; then
+if test -z "$global_symbol_pipe$global_symbol_to_cdec$global_symbol_to_c_name_address";
+then
AC_MSG_RESULT(failed)
else
AC_MSG_RESULT(ok)
@@ -552,16 +581,17 @@ if test "X${PATH_SEPARATOR+set}" != Xset; then
*-DOS) lt_cv_sys_path_separator=';' ;;
*) lt_cv_sys_path_separator=':' ;;
esac
+ PATH_SEPARATOR=$lt_cv_sys_path_separator
fi
])# _LT_AC_LIBTOOL_SYS_PATH_SEPARATOR
# _LT_AC_PROG_ECHO_BACKSLASH
# --------------------------
# Add some code to the start of the generated configure script which
-# will find an echo command which doesn;t interpret backslashes.
+# will find an echo command which doesn't interpret backslashes.
AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH],
[ifdef([AC_DIVERSION_NOTICE], [AC_DIVERT_PUSH(AC_DIVERSION_NOTICE)],
- [AC_DIVERT_PUSH(NOTICE)])
+ [AC_DIVERT_PUSH(NOTICE)])
_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR
# Check that we are running under the correct shell.
@@ -627,7 +657,7 @@ else
#
# So, first we look for a working echo in the user's PATH.
- IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS=$PATH_SEPARATOR
for dir in $PATH /usr/ucb; do
if (test -f $dir/echo || test -f $dir/echo$ac_exeext) &&
test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' &&
@@ -716,7 +746,7 @@ AC_DIVERT_POP
# _LT_AC_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE,
# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING)
# ------------------------------------------------------------------
-AC_DEFUN(_LT_AC_TRY_DLOPEN_SELF,
+AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF],
[if test "$cross_compiling" = yes; then :
[$4]
else
@@ -803,7 +833,7 @@ rm -fr conftest*
# AC_LIBTOOL_DLOPEN_SELF
# -------------------
-AC_DEFUN(AC_LIBTOOL_DLOPEN_SELF,
+AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF],
[if test "x$enable_dlopen" != xyes; then
enable_dlopen=unknown
enable_dlopen_self=unknown
@@ -825,16 +855,22 @@ else
;;
*)
- AC_CHECK_LIB(dl, dlopen, [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
- [AC_CHECK_FUNC(dlopen, lt_cv_dlopen="dlopen",
- [AC_CHECK_FUNC(shl_load, lt_cv_dlopen="shl_load",
- [AC_CHECK_LIB(svld, dlopen,
- [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
- [AC_CHECK_LIB(dld, shl_load,
- [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"])
+ AC_CHECK_FUNC([shl_load],
+ [lt_cv_dlopen="shl_load"],
+ [AC_CHECK_LIB([dld], [shl_load],
+ [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld"],
+ [AC_CHECK_FUNC([dlopen],
+ [lt_cv_dlopen="dlopen"],
+ [AC_CHECK_LIB([dl], [dlopen],
+ [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
+ [AC_CHECK_LIB([svld], [dlopen],
+ [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
+ [AC_CHECK_LIB([dld], [dld_link],
+ [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"])
+ ])
])
- ])
- ])
+ ])
+ ])
])
;;
esac
@@ -897,10 +933,10 @@ AC_DEFUN([_LT_AC_LTCONFIG_HACK],
# Sed substitution that helps us do robust quoting. It backslashifies
# metacharacters that are still active within double-quoted strings.
Xsed='sed -e s/^X//'
-[sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g']
+sed_quote_subst='s/\([[\\"\\`$\\\\]]\)/\\\1/g'
# Same as above, but do not quote variable references.
-[double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g']
+double_quote_subst='s/\([[\\"\\`\\\\]]\)/\\\1/g'
# Sed substitution to delay expansion of an escaped shell variable in a
# double_quote_subst'ed string.
@@ -969,8 +1005,15 @@ old_postinstall_cmds='chmod 644 $oldlib'
old_postuninstall_cmds=
if test -n "$RANLIB"; then
+ case $host_os in
+ openbsd*)
+ old_postinstall_cmds="\$RANLIB -t \$oldlib~$old_postinstall_cmds"
+ ;;
+ *)
+ old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds"
+ ;;
+ esac
old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
- old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds"
fi
# Allow CC to be a program name with arguments.
@@ -990,7 +1033,7 @@ rmdir .libs 2>/dev/null
AC_MSG_RESULT($objdir)
-AC_ARG_WITH(pic,
+AC_ARG_WITH(pic,
[ --with-pic try to use only PIC/non-PIC objects [default=use both]],
pic_mode="$withval", pic_mode=default)
test -z "$pic_mode" && pic_mode=default
@@ -1018,7 +1061,7 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic,
# libC (AIX C++ library), which obviously doesn't included in libraries
# list by gcc. This cause undefined symbols with -static flags.
# This hack allows C programs to be linked with "-static -ldl", but
- # we not sure about C++ programs.
+ # not sure about C++ programs.
lt_cv_prog_cc_static="$lt_cv_prog_cc_static ${lt_cv_prog_cc_wl}-lC"
;;
amigaos*)
@@ -1027,7 +1070,7 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic,
# like `-m68040'.
lt_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4'
;;
- beos* | irix5* | irix6* | osf3* | osf4* | osf5*)
+ beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
# PIC is the default for these OSes.
;;
darwin* | rhapsody*)
@@ -1053,13 +1096,13 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic,
# PORTME Check for PIC flags for the system compiler.
case $host_os in
aix3* | aix4* | aix5*)
+ lt_cv_prog_cc_wl='-Wl,'
# All AIX code is PIC.
if test "$host_cpu" = ia64; then
- # AIX 5 now supports IA64 processor
- lt_cv_prog_cc_static='-Bstatic'
- lt_cv_prog_cc_wl='-Wl,'
+ # AIX 5 now supports IA64 processor
+ lt_cv_prog_cc_static='-Bstatic'
else
- lt_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp'
+ lt_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp'
fi
;;
@@ -1070,7 +1113,7 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic,
lt_cv_prog_cc_pic='+Z'
;;
- irix5* | irix6*)
+ irix5* | irix6* | nonstopux*)
lt_cv_prog_cc_wl='-Wl,'
lt_cv_prog_cc_static='-non_shared'
# PIC (with -KPIC) is the default.
@@ -1115,9 +1158,9 @@ AC_CACHE_VAL(lt_cv_prog_cc_pic,
lt_cv_prog_cc_pic='-KPIC'
lt_cv_prog_cc_static='-Bstatic'
if test "x$host_vendor" = xsni; then
- lt_cv_prog_cc_wl='-LD'
+ lt_cv_prog_cc_wl='-LD'
else
- lt_cv_prog_cc_wl='-Wl,'
+ lt_cv_prog_cc_wl='-Wl,'
fi
;;
@@ -1184,7 +1227,7 @@ fi
# Check for any special shared library compilation flags.
if test -n "$lt_cv_prog_cc_shlib"; then
AC_MSG_WARN([\`$CC' requires \`$lt_cv_prog_cc_shlib' to build shared libraries])
- if echo "$old_CC $old_CFLAGS " | [egrep -e "[ ]$lt_cv_prog_cc_shlib[ ]"] >/dev/null; then :
+ if echo "$old_CC $old_CFLAGS " | egrep -e "[[ ]]$lt_cv_prog_cc_shlib[[ ]]" >/dev/null; then :
else
AC_MSG_WARN([add \`$lt_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure])
lt_cv_prog_cc_can_build_shared=no
@@ -1260,6 +1303,8 @@ if test x"$compiler_c_o" = x"yes"; then
lt_cv_compiler_o_lo=no
save_CFLAGS="$CFLAGS"
CFLAGS="$CFLAGS -c -o conftest.lo"
+ save_objext="$ac_objext"
+ ac_objext=lo
AC_TRY_COMPILE([], [int some_variable = 0;], [dnl
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings
@@ -1269,10 +1314,11 @@ if test x"$compiler_c_o" = x"yes"; then
lt_cv_compiler_o_lo=yes
fi
])
+ ac_objext="$save_objext"
CFLAGS="$save_CFLAGS"
])
compiler_o_lo=$lt_cv_compiler_o_lo
- AC_MSG_RESULT([$compiler_c_lo])
+ AC_MSG_RESULT([$compiler_o_lo])
else
compiler_o_lo=no
fi
@@ -1365,7 +1411,7 @@ exclude_expsyms="_GLOBAL_OFFSET_TABLE_"
extract_expsyms_cmds=
case $host_os in
-cygwin* | mingw* | pw32* )
+cygwin* | mingw* | pw32*)
# FIXME: the MSVC++ port hasn't been tested in a loooong time
# When not using gcc, we currently assume that we are using
# Microsoft Visual C++.
@@ -1373,7 +1419,9 @@ cygwin* | mingw* | pw32* )
with_gnu_ld=no
fi
;;
-
+openbsd*)
+ with_gnu_ld=no
+ ;;
esac
ld_shlibs=yes
@@ -1460,7 +1508,7 @@ EOF
# can override, but on older systems we have to supply one (in ltdll.c)
if test "x$lt_cv_need_dllmain" = "xyes"; then
ltdll_obj='$output_objdir/$soname-ltdll.'"$ac_objext "
- ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < [$]0 > $output_objdir/$soname-ltdll.c~
+ ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $''0 > $output_objdir/$soname-ltdll.c~
test -f $output_objdir/$soname-ltdll.$ac_objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~'
else
ltdll_obj=
@@ -1473,24 +1521,24 @@ EOF
# Be careful not to strip the DATA tag left be newer dlltools.
export_symbols_cmds="$ltdll_cmds"'
$DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~
- [sed -e "1,/EXPORTS/d" -e "s/ @ [0-9]*//" -e "s/ *;.*$//"] < $output_objdir/$soname-def > $export_symbols'
+ sed -e "1,/EXPORTS/d" -e "s/ @ [[0-9]]*//" -e "s/ *;.*$//" < $output_objdir/$soname-def > $export_symbols'
# If the export-symbols file already is a .def file (1st line
# is EXPORTS), use it as is.
# If DATA tags from a recent dlltool are present, honour them!
archive_expsym_cmds='if test "x`head -1 $export_symbols`" = xEXPORTS; then
- cp $export_symbols $output_objdir/$soname-def;
+ cp $export_symbols $output_objdir/$soname-def;
else
- echo EXPORTS > $output_objdir/$soname-def;
- _lt_hint=1;
- cat $export_symbols | while read symbol; do
- set dummy \$symbol;
- case \[$]# in
- 2) echo " \[$]2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;;
- *) echo " \[$]2 @ \$_lt_hint \[$]3 ; " >> $output_objdir/$soname-def;;
- esac;
- _lt_hint=`expr 1 + \$_lt_hint`;
- done;
+ echo EXPORTS > $output_objdir/$soname-def;
+ _lt_hint=1;
+ cat $export_symbols | while read symbol; do
+ set dummy \$symbol;
+ case \[$]# in
+ 2) echo " \[$]2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;;
+ *) echo " \[$]2 @ \$_lt_hint \[$]3 ; " >> $output_objdir/$soname-def;;
+ esac;
+ _lt_hint=`expr 1 + \$_lt_hint`;
+ done;
fi~
'"$ltdll_cmds"'
$CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~
@@ -1585,80 +1633,96 @@ else
;;
aix4* | aix5*)
+ if test "$host_cpu" = ia64; then
+ # On IA64, the linker does run time linking by default, so we don't
+ # have to do anything special.
+ aix_use_runtimelinking=no
+ exp_sym_flag='-Bexport'
+ no_entry_flag=""
+ else
+ aix_use_runtimelinking=no
+
+ # Test if we are trying to use run time linking or normal
+ # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+ # need to do runtime linking.
+ case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*)
+ for ld_flag in $LDFLAGS; do
+ if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+ aix_use_runtimelinking=yes
+ break
+ fi
+ done
+ esac
+
+ exp_sym_flag='-bexport'
+ no_entry_flag='-bnoentry'
+ fi
+
# When large executables or shared objects are built, AIX ld can
# have problems creating the table of contents. If linking a library
# or program results in "error TOC overflow" add -mminimal-toc to
# CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not
# enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+ hardcode_direct=yes
archive_cmds=''
hardcode_libdir_separator=':'
if test "$GCC" = yes; then
- collect2name=`${CC} -print-prog-name=collect2`
- if test -f "$collect2name" && \
- strings "$collect2name" | grep resolve_lib_name >/dev/null
- then
- # We have reworked collect2
- hardcode_direct=yes
- else
- # We have old collect2
- hardcode_direct=unsupported
- # It fails to find uninstalled libraries when the uninstalled
- # path is not listed in the libpath. Setting hardcode_minus_L
- # to unsupported forces relinking
- hardcode_minus_L=yes
- hardcode_libdir_flag_spec='-L$libdir'
- hardcode_libdir_separator=
- fi
+ case $host_os in aix4.[[012]]|aix4.[[012]].*)
+ collect2name=`${CC} -print-prog-name=collect2`
+ if test -f "$collect2name" && \
+ strings "$collect2name" | grep resolve_lib_name >/dev/null
+ then
+ # We have reworked collect2
+ hardcode_direct=yes
+ else
+ # We have old collect2
+ hardcode_direct=unsupported
+ # It fails to find uninstalled libraries when the uninstalled
+ # path is not listed in the libpath. Setting hardcode_minus_L
+ # to unsupported forces relinking
+ hardcode_minus_L=yes
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_libdir_separator=
+ fi
+ esac
+
shared_flag='-shared'
else
+ # not using gcc
if test "$host_cpu" = ia64; then
- shared_flag='-G'
+ shared_flag='${wl}-G'
else
- shared_flag='${wl}-bM:SRE'
+ if test "$aix_use_runtimelinking" = yes; then
+ shared_flag='${wl}-G'
+ else
+ shared_flag='${wl}-bM:SRE'
+ fi
fi
- hardcode_direct=yes
fi
- if test "$host_cpu" = ia64; then
- # On IA64, the linker does run time linking by default, so we don't
- # have to do anything special.
- aix_use_runtimelinking=no
- exp_sym_flag='-Bexport'
- no_entry_flag=""
- else
- # Test if we are trying to use run time linking, or normal AIX style linking.
- # If -brtl is somewhere in LDFLAGS, we need to do run time linking.
- aix_use_runtimelinking=no
- for ld_flag in $LDFLAGS; do
- if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl" ); then
- aix_use_runtimelinking=yes
- break
- fi
- done
- exp_sym_flag='-bexport'
- no_entry_flag='-bnoentry'
- fi
# It seems that -bexpall can do strange things, so it is better to
# generate a list of symbols to export.
always_export_symbols=yes
if test "$aix_use_runtimelinking" = yes; then
+ # Warning - without using the other runtime loading flags (-brtl),
+ # -berok will link without error, but may produce a broken library.
+ allow_undefined_flag='-berok'
hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:/usr/lib:/lib'
- allow_undefined_flag=' -Wl,-G'
- archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"
+ archive_expsym_cmds="\$CC"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols $shared_flag"
else
if test "$host_cpu" = ia64; then
- hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
- allow_undefined_flag="-znodefs"
- archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname ${wl}-h$soname $libobjs $deplibs $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"
+ hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
+ allow_undefined_flag="-z nodefs"
+ archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname ${wl}-h$soname $libobjs $deplibs $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"
else
- hardcode_libdir_flag_spec='${wl}-bnolibpath ${wl}-blibpath:$libdir:/usr/lib:/lib'
- # Warning - without using the other run time loading flags, -berok will
- # link without error, but may produce a broken library.
- allow_undefined_flag='${wl}-berok"
- # This is a bit strange, but is similar to how AIX traditionally builds
- # it's shared libraries.
- archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"' ~$AR -crlo $objdir/$libname$release.a $objdir/$soname'
+ hardcode_libdir_flag_spec='${wl}-bnolibpath ${wl}-blibpath:$libdir:/usr/lib:/lib'
+ # Warning - without using the other run time loading flags,
+ # -berok will link without error, but may produce a broken library.
+ allow_undefined_flag='${wl}-berok'
+ # This is a bit strange, but is similar to how AIX traditionally builds
+ # it's shared libraries.
+ archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"' ~$AR -crlo $objdir/$libname$release.a $objdir/$soname'
fi
fi
;;
@@ -1690,11 +1754,19 @@ else
;;
darwin* | rhapsody*)
- allow_undefined_flag='-undefined suppress'
+ case "$host_os" in
+ rhapsody* | darwin1.[[012]])
+ allow_undefined_flag='-undefined suppress'
+ ;;
+ *) # Darwin 1.3 on
+ allow_undefined_flag='-flat_namespace -undefined suppress'
+ ;;
+ esac
# FIXME: Relying on posixy $() will cause problems for
# cross-compilation, but unfortunately the echo tests do not
- # yet detect zsh echo's removal of \ escapes.
- archive_cmds='$CC $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib $libobjs $deplibs$linkopts -install_name $rpath/$soname $(test -n "$verstring" -a x$verstring != x0.0 && echo $verstring)'
+ # yet detect zsh echo's removal of \ escapes. Also zsh mangles
+ # `"' quotes if we put them in here... so don't!
+ archive_cmds='$nonopt $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib $libobjs $deplibs$linker_flags -install_name $rpath/$soname $verstring'
# We need to add '_' to the symbols in $export_symbols first
#archive_expsym_cmds="$archive_cmds"' && strip -s $export_symbols'
hardcode_direct=yes
@@ -1746,7 +1818,7 @@ else
export_dynamic_flag_spec='${wl}-E'
;;
- irix5* | irix6*)
+ irix5* | irix6* | nonstopux*)
if test "$GCC" = yes; then
archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
else
@@ -1769,7 +1841,7 @@ else
;;
newsos6)
- archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts'
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
hardcode_direct=yes
hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
hardcode_libdir_separator=:
@@ -1777,10 +1849,24 @@ else
;;
openbsd*)
- archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
- hardcode_libdir_flag_spec='-R$libdir'
hardcode_direct=yes
hardcode_shlibpath_var=no
+ if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+ archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+ export_dynamic_flag_spec='${wl}-E'
+ else
+ case "$host_os" in
+ openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*)
+ archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='-R$libdir'
+ ;;
+ *)
+ archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+ ;;
+ esac
+ fi
;;
os2*)
@@ -1825,10 +1911,39 @@ else
hardcode_shlibpath_var=no
runpath_var=LD_RUN_PATH
hardcode_runpath_var=yes
+ export_dynamic_flag_spec='${wl}-Bexport'
;;
solaris*)
+ # gcc --version < 3.0 without binutils cannot create self contained
+ # shared libraries reliably, requiring libgcc.a to resolve some of
+ # the object symbols generated in some cases. Libraries that use
+ # assert need libgcc.a to resolve __eprintf, for example. Linking
+ # a copy of libgcc.a into every shared library to guarantee resolving
+ # such symbols causes other problems: According to Tim Van Holder
+ # <tim.van.holder@pandora.be>, C++ libraries end up with a separate
+ # (to the application) exception stack for one thing.
no_undefined_flag=' -z defs'
+ if test "$GCC" = yes; then
+ case `$CC --version 2>/dev/null` in
+ [[12]].*)
+ cat <<EOF 1>&2
+
+*** Warning: Releases of GCC earlier than version 3.0 cannot reliably
+*** create self contained shared libraries on Solaris systems, without
+*** introducing a dependency on libgcc.a. Therefore, libtool is disabling
+*** -no-undefined support, which will at least allow you to build shared
+*** libraries. However, you may find that when you link such libraries
+*** into an application without using GCC, you have to manually add
+*** \`gcc --print-libgcc-file-name\` to the link command. We urge you to
+*** upgrade to a newer version of GCC. Another option is to rebuild your
+*** current GCC to use the GNU linker from GNU binutils 2.9.1 or newer.
+
+EOF
+ no_undefined_flag=
+ ;;
+ esac
+ fi
# $CC -shared without GNU ld will not create a library from C++
# object files and a static libstdc++, better avoid it by now
archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
@@ -1837,7 +1952,7 @@ else
hardcode_libdir_flag_spec='-R$libdir'
hardcode_shlibpath_var=no
case $host_os in
- [solaris2.[0-5] | solaris2.[0-5].*]) ;;
+ solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
*) # Supported since Solaris 2.6 (maybe 2.5.1?)
whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;;
esac
@@ -1860,7 +1975,7 @@ else
sysv4)
if test "x$host_vendor" = xsno; then
- archive_cmds='$LD -G -Bsymbolic -h $soname -o $lib $libobjs $deplibs $linkopts'
+ archive_cmds='$LD -G -Bsymbolic -h $soname -o $lib $libobjs $deplibs $linker_flags'
hardcode_direct=yes # is this really true???
else
archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
@@ -2017,22 +2132,24 @@ aix4* | aix5*)
# depend on `.', always an invalid library. This was fixed in
# development snapshots of GCC prior to 3.0.
case $host_os in
- [ aix4 | aix4.[01] | aix4.[01].*)]
- if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
- echo ' yes '
- echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then
- :
- else
- can_build_shared=no
- fi
- ;;
+ aix4 | aix4.[[01]] | aix4.[[01]].*)
+ if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+ echo ' yes '
+ echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then
+ :
+ else
+ can_build_shared=no
+ fi
+ ;;
esac
- # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
- # soname into executable. Probably we can add versioning support to
- # collect2, so additional links can be useful in future.
+ # AIX (on Power*) has no versioning support, so currently we can
+ # not hardcode correct soname into executable. Probably we can
+ # add versioning support to collect2, so additional links can
+ # be useful in future.
if test "$aix_use_runtimelinking" = yes; then
- # If using run time linking (on AIX 4.2 or later) use lib<name>.so instead of
- # lib<name>.a to let people know that these are not typical AIX shared libraries.
+ # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+ # instead of lib<name>.a to let people know that these are not
+ # typical AIX shared libraries.
library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
else
# We preserve .a as extension for shared libraries through AIX4.2
@@ -2041,14 +2158,13 @@ aix4* | aix5*)
soname_spec='${libname}${release}.so$major'
fi
shlibpath_var=LIBPATH
- deplibs_check_method=pass_all
fi
;;
amigaos*)
library_names_spec='$libname.ixlibrary $libname.a'
# Create ${libname}_ixlibrary.a entries in /sys/libs.
- finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | [$Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\'']`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done'
+ finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done'
;;
beos*)
@@ -2079,7 +2195,7 @@ cygwin* | mingw* | pw32*)
case $GCC,$host_os in
yes,cygwin*)
library_names_spec='$libname.dll.a'
- soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | [sed -e 's/[.]/-/g']`${versuffix}.dll'
+ soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll'
postinstall_cmds='dlpath=`bash 2>&1 -c '\''. $dir/${file}i;echo \$dlname'\''`~
dldir=$destdir/`dirname \$dlpath`~
test -d \$dldir || mkdir -p \$dldir~
@@ -2089,14 +2205,14 @@ cygwin* | mingw* | pw32*)
$rm \$dlpath'
;;
yes,mingw*)
- library_names_spec='${libname}`echo ${release} | [sed -e 's/[.]/-/g']`${versuffix}.dll'
+ library_names_spec='${libname}`echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll'
sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | sed -e "s/^libraries://" -e "s/;/ /g"`
;;
yes,pw32*)
library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll'
;;
*)
- library_names_spec='${libname}`echo ${release} | [sed -e 's/[.]/-/g']`${versuffix}.dll $libname.lib'
+ library_names_spec='${libname}`echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll $libname.lib'
;;
esac
dynamic_linker='Win32 ld.exe'
@@ -2173,14 +2289,17 @@ hpux9* | hpux10* | hpux11*)
postinstall_cmds='chmod 555 $lib'
;;
-irix5* | irix6*)
- version_type=irix
+irix5* | irix6* | nonstopux*)
+ case $host_os in
+ nonstopux*) version_type=nonstopux ;;
+ *) version_type=irix ;;
+ esac
need_lib_prefix=no
need_version=no
soname_spec='${libname}${release}.so$major'
library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so $libname.so'
case $host_os in
- irix5*)
+ irix5* | nonstopux*)
libsuff= shlibsuff=
;;
*)
@@ -2254,9 +2373,19 @@ newsos6)
openbsd*)
version_type=sunos
- if test "$with_gnu_ld" = yes; then
- need_lib_prefix=no
- need_version=no
+ need_lib_prefix=no
+ need_version=no
+ if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+ case "$host_os" in
+ openbsd2.[[89]] | openbsd2.[[89]].*)
+ shlibpath_overrides_runpath=no
+ ;;
+ *)
+ shlibpath_overrides_runpath=yes
+ ;;
+ esac
+ else
+ shlibpath_overrides_runpath=yes
fi
library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
@@ -2274,6 +2403,7 @@ os2*)
osf3* | osf4* | osf5*)
version_type=osf
need_version=no
+ need_lib_prefix=no
soname_spec='${libname}${release}.so'
library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so'
shlibpath_var=LD_LIBRARY_PATH
@@ -2367,6 +2497,33 @@ test "$dynamic_linker" = no && can_build_shared=no
AC_MSG_CHECKING([if libtool supports shared libraries])
AC_MSG_RESULT([$can_build_shared])
+AC_MSG_CHECKING([whether to build shared libraries])
+test "$can_build_shared" = "no" && enable_shared=no
+
+# On AIX, shared libraries and static libraries use the same namespace, and
+# are all built from PIC.
+case "$host_os" in
+aix3*)
+ test "$enable_shared" = yes && enable_static=no
+ if test -n "$RANLIB"; then
+ archive_cmds="$archive_cmds~\$RANLIB \$lib"
+ postinstall_cmds='$RANLIB $lib'
+ fi
+ ;;
+
+aix4*)
+ if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+ test "$enable_shared" = yes && enable_static=no
+ fi
+ ;;
+esac
+AC_MSG_RESULT([$enable_shared])
+
+AC_MSG_CHECKING([whether to build static libraries])
+# Make sure either enable_shared or enable_static is yes.
+test "$enable_shared" = yes || enable_static=yes
+AC_MSG_RESULT([$enable_static])
+
if test "$hardcode_action" = relink; then
# Fast installation is not supported
enable_fast_install=no
@@ -2460,6 +2617,7 @@ if test -f "$ltmain"; then
old_striplib striplib file_magic_cmd export_symbols_cmds \
deplibs_check_method allow_undefined_flag no_undefined_flag \
finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \
+ global_symbol_to_c_name_address \
hardcode_libdir_flag_spec hardcode_libdir_separator \
sys_lib_search_path_spec sys_lib_dlsearch_path_spec \
compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do
@@ -2526,12 +2684,12 @@ SHELL=$lt_SHELL
# Whether or not to build shared libraries.
build_libtool_libs=$enable_shared
-# Whether or not to add -lc for building shared libraries.
-build_libtool_need_lc=$need_lc
-
# Whether or not to build static libraries.
build_old_libs=$enable_static
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$need_lc
+
# Whether or not to optimize for fast installation.
fast_install=$enable_fast_install
@@ -2697,6 +2855,9 @@ global_symbol_pipe=$lt_global_symbol_pipe
# Transform the output of nm in a proper C declaration
global_symbol_to_cdecl=$lt_global_symbol_to_cdecl
+# Transform the output of nm in a C name address pair
+global_symbol_to_c_name_address=$lt_global_symbol_to_c_name_address
+
# This is the shared library runtime path variable.
runpath_var=$runpath_var
@@ -2819,9 +2980,9 @@ EOF
# return TRUE;
# }
# /* ltdll.c ends here */
- # This is a source program that is used to create import libraries
- # on Windows for dlls which lack them. Don't remove nor modify the
- # starting and closing comments
+ # This is a source program that is used to create import libraries
+ # on Windows for dlls which lack them. Don't remove nor modify the
+ # starting and closing comments
# /* impgen.c starts here */
# /* Copyright (C) 1999-2000 Free Software Foundation, Inc.
#
@@ -3176,6 +3337,7 @@ test "$withval" = no || with_gnu_ld=yes, with_gnu_ld=no)
AC_REQUIRE([AC_PROG_CC])dnl
AC_REQUIRE([AC_CANONICAL_HOST])dnl
AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_REQUIRE([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR])dnl
ac_prog=ld
if test "$GCC" = yes; then
# Check if gcc -print-prog-name=ld gives a path.
@@ -3189,8 +3351,8 @@ if test "$GCC" = yes; then
esac
case $ac_prog in
# Accept absolute paths.
- [[\\/]* | [A-Za-z]:[\\/]*)]
- [re_direlt='/[^/][^/]*/\.\./']
+ [[\\/]]* | [[A-Za-z]]:[[\\/]]*)
+ re_direlt='/[[^/]][[^/]]*/\.\./'
# Canonicalize the path of ld
ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'`
while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do
@@ -3214,7 +3376,7 @@ else
fi
AC_CACHE_VAL(lt_cv_path_LD,
[if test -z "$LD"; then
- IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
for ac_dir in $PATH; do
test -z "$ac_dir" && ac_dir=.
if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
@@ -3278,7 +3440,7 @@ lt_cv_deplibs_check_method='unknown'
# `unknown' -- same as none, but documents that we really don't know.
# 'pass_all' -- all dependencies passed with no checks.
# 'test_compile' -- check by making test program.
-# ['file_magic [regex]'] -- check by looking for files in library path
+# 'file_magic [[regex]]' -- check by looking for files in library path
# which responds to the $file_magic_cmd with a given egrep regex.
# If you have `file' or equivalent on your system and you're not sure
# whether `pass_all' will *always* work, you probably want this one.
@@ -3293,7 +3455,7 @@ beos*)
;;
bsdi4*)
- [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)']
+ lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)'
lt_cv_file_magic_cmd='/usr/bin/file -L'
lt_cv_file_magic_test_file=/shlib/libc.so
;;
@@ -3307,7 +3469,7 @@ darwin* | rhapsody*)
lt_cv_deplibs_check_method='file_magic Mach-O dynamically linked shared library'
lt_cv_file_magic_cmd='/usr/bin/file -L'
case "$host_os" in
- rhapsody* | darwin1.[012])
+ rhapsody* | darwin1.[[012]])
lt_cv_file_magic_test_file=`echo /System/Library/Frameworks/System.framework/Versions/*/System | head -1`
;;
*) # Darwin 1.3 on
@@ -3322,7 +3484,7 @@ freebsd*)
i*86 )
# Not sure whether the presence of OpenBSD here was a mistake.
# Let's accept both of them until this is cleared up.
- [lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD)/i[3-9]86 (compact )?demand paged shared library']
+ lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD)/i[[3-9]]86 (compact )?demand paged shared library'
lt_cv_file_magic_cmd=/usr/bin/file
lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
;;
@@ -3337,14 +3499,14 @@ gnu*)
;;
hpux10.20*|hpux11*)
- [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9].[0-9]) shared library']
+ lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]].[[0-9]]) shared library'
lt_cv_file_magic_cmd=/usr/bin/file
lt_cv_file_magic_test_file=/usr/lib/libc.sl
;;
-irix5* | irix6*)
+irix5* | irix6* | nonstopux*)
case $host_os in
- irix5*)
+ irix5* | nonstopux*)
# this will be overridden with pass_all, but let us keep it just in case
lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1"
;;
@@ -3356,7 +3518,7 @@ irix5* | irix6*)
*) libmagic=never-match;;
esac
# this will be overridden with pass_all, but let us keep it just in case
- [lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1"]
+ lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[[1234]] dynamic lib MIPS - version 1"
;;
esac
lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*`
@@ -3365,30 +3527,34 @@ irix5* | irix6*)
# This must be Linux ELF.
linux-gnu*)
- case $host_cpu in
- alpha* | i*86 | powerpc* | sparc* | ia64* )
- lt_cv_deplibs_check_method=pass_all ;;
- *)
- # glibc up to 2.1.1 does not perform some relocations on ARM
- [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;;]
- esac
+ lt_cv_deplibs_check_method=pass_all
lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so`
;;
netbsd*)
if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
- [lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so\.[0-9]+\.[0-9]+$']
+ lt_cv_deplibs_check_method='match_pattern /lib[[^/\.]]+\.so\.[[0-9]]+\.[[0-9]]+$'
else
- [lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so$']
+ lt_cv_deplibs_check_method='match_pattern /lib[[^/\.]]+\.so$'
fi
;;
newos6*)
- [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)']
+ lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)'
lt_cv_file_magic_cmd=/usr/bin/file
lt_cv_file_magic_test_file=/usr/lib/libnls.so
;;
+openbsd*)
+ lt_cv_file_magic_cmd=/usr/bin/file
+ lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+ if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+ lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB shared object'
+ else
+ lt_cv_deplibs_check_method='file_magic OpenBSD.* shared library'
+ fi
+ ;;
+
osf3* | osf4* | osf5*)
# this will be overridden with pass_all, but let us keep it just in case
lt_cv_deplibs_check_method='file_magic COFF format alpha shared library'
@@ -3405,14 +3571,14 @@ solaris*)
lt_cv_file_magic_test_file=/lib/libc.so
;;
-[sysv5uw[78]* | sysv4*uw2*)]
+sysv5uw[[78]]* | sysv4*uw2*)
lt_cv_deplibs_check_method=pass_all
;;
sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
case $host_vendor in
motorola)
- [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]']
+ lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]'
lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
;;
ncr)
@@ -3420,11 +3586,11 @@ sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
;;
sequent)
lt_cv_file_magic_cmd='/bin/file'
- [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )']
+ lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )'
;;
sni)
lt_cv_file_magic_cmd='/bin/file'
- [lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib"]
+ lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib"
lt_cv_file_magic_test_file=/lib/libc.so
;;
esac
@@ -3438,13 +3604,14 @@ deplibs_check_method=$lt_cv_deplibs_check_method
# AC_PROG_NM - find the path to a BSD-compatible name lister
AC_DEFUN([AC_PROG_NM],
-[AC_MSG_CHECKING([for BSD-compatible nm])
+[AC_REQUIRE([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR])dnl
+AC_MSG_CHECKING([for BSD-compatible nm])
AC_CACHE_VAL(lt_cv_path_NM,
[if test -n "$NM"; then
# Let the user override the test.
lt_cv_path_NM="$NM"
else
- IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do
test -z "$ac_dir" && ac_dir=.
tmp_nm=$ac_dir/${ac_tool_prefix}nm
@@ -3491,12 +3658,12 @@ esac
])
# AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for
-# the libltdl convenience library and INCLTDL to the include flags for
+# the libltdl convenience library and LTDLINCL to the include flags for
# the libltdl header and adds --enable-ltdl-convenience to the
-# configure arguments. Note that LIBLTDL and INCLTDL are not
+# configure arguments. Note that LIBLTDL and LTDLINCL are not
# AC_SUBSTed, nor is AC_CONFIG_SUBDIRS called. If DIR is not
# provided, it is assumed to be `libltdl'. LIBLTDL will be prefixed
-# with '${top_builddir}/' and INCLTDL will be prefixed with
+# with '${top_builddir}/' and LTDLINCL will be prefixed with
# '${top_srcdir}/' (note the single quotes!). If your package is not
# flat and you're not using automake, define top_builddir and
# top_srcdir appropriately in the Makefiles.
@@ -3508,16 +3675,18 @@ AC_DEFUN([AC_LIBLTDL_CONVENIENCE],
ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;;
esac
LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la
- INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
+ LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
+ # For backwards non-gettext consistent compatibility...
+ INCLTDL="$LTDLINCL"
])
# AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for
-# the libltdl installable library and INCLTDL to the include flags for
+# the libltdl installable library and LTDLINCL to the include flags for
# the libltdl header and adds --enable-ltdl-install to the configure
-# arguments. Note that LIBLTDL and INCLTDL are not AC_SUBSTed, nor is
+# arguments. Note that LIBLTDL and LTDLINCL are not AC_SUBSTed, nor is
# AC_CONFIG_SUBDIRS called. If DIR is not provided and an installed
# libltdl is not found, it is assumed to be `libltdl'. LIBLTDL will
-# be prefixed with '${top_builddir}/' and INCLTDL will be prefixed
+# be prefixed with '${top_builddir}/' and LTDLINCL will be prefixed
# with '${top_srcdir}/' (note the single quotes!). If your package is
# not flat and you're not using automake, define top_builddir and
# top_srcdir appropriately in the Makefiles.
@@ -3535,12 +3704,14 @@ AC_DEFUN([AC_LIBLTDL_INSTALLABLE],
if test x"$enable_ltdl_install" = x"yes"; then
ac_configure_args="$ac_configure_args --enable-ltdl-install"
LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la
- INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
+ LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
else
ac_configure_args="$ac_configure_args --enable-ltdl-install=no"
LIBLTDL="-lltdl"
- INCLTDL=
+ LTDLINCL=
fi
+ # For backwards non-gettext consistent compatibility...
+ INCLTDL="$LTDLINCL"
])
# old names
@@ -3560,7 +3731,7 @@ ifelse([AC_DISABLE_FAST_INSTALL])
# serial 1
-AC_DEFUN([AM_MAINTAINER_MODE],
+AC_DEFUN(AM_MAINTAINER_MODE,
[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
dnl maintainer-mode is disabled by default
AC_ARG_ENABLE(maintainer-mode,
@@ -3577,7 +3748,7 @@ AC_DEFUN([AM_MAINTAINER_MODE],
# Define a conditional.
-AC_DEFUN([AM_CONDITIONAL],
+AC_DEFUN(AM_CONDITIONAL,
[AC_SUBST($1_TRUE)
AC_SUBST($1_FALSE)
if $2; then
diff --git a/gmime-charset.c b/gmime-charset.c
index 7e04fc40..815288b8 100644
--- a/gmime-charset.c
+++ b/gmime-charset.c
@@ -34,21 +34,58 @@
#include <alloca.h>
#endif
+#include "gmime-charset-map-private.h"
#include "gmime-charset.h"
#include "strlib.h"
#if defined (__aix__) || defined (__irix__) || defined (__sun__)
-#define CANONICAL_ISO_FORMAT "ISO%d-%d"
+#define CANONICAL_ISO_D_FORMAT "ISO%d-%d"
+#define CANONICAL_ISO_S_FORMAT "ISO%d-%s"
+#elif defined (__hpux__)
+#define CANONICAL_ISO_D_FORMAT "iso%d%d"
+#define CANONICAL_ISO_S_FORMAT "iso%d%s"
#else
-#ifdef __hpux__
-#define CANONICAL_ISO_FORMAT "iso%d%d"
-#else
-#define CANONICAL_ISO_FORMAT "ISO-%d-%d"
-#endif /* __hpux__ */
+#define CANONICAL_ISO_D_FORMAT "iso-%d-%d"
+#define CANONICAL_ISO_S_FORMAT "iso-%d-%s"
#endif /* __aix__, __irix__, __sun__ */
+static struct {
+ char *name;
+ unsigned int bit; /* assigned bit */
+} tables[] = {
+ /* These are the 8bit character sets (other than iso-8859-1,
+ * which is special-cased) which are supported by both other
+ * mailers and the GNOME environment. Note that the order
+ * they're listed in is the order they'll be tried in, so put
+ * the more-popular ones first.
+ */
+ { "iso-8859-2", 0 }, /* Central/Eastern European */
+ { "iso-8859-4", 0 }, /* Baltic */
+ { "koi8-r", 0 }, /* Russian */
+ { "koi8-u", 0 }, /* Ukranian */
+ { "iso-8859-5", 0 }, /* Least-popular Russian encoding */
+ { "iso-8859-7", 0 }, /* Greek */
+ { "iso-8859-8", 0 }, /* Hebrew; Visual */
+ { "iso-8859-9", 0 }, /* Turkish */
+ { "iso-8859-13", 0 }, /* Baltic again */
+ { "iso-8859-15", 0 }, /* New-and-improved iso-8859-1, but most
+ * programs that support this support UTF8
+ */
+ { "windows-1251", 0 }, /* Russian */
+ { 0, 0 }
+};
+
+unsigned int encoding_map[256 * 256];
+
+#if G_BYTE_ORDER == G_BIG_ENDIAN
+#define UCS "UCS-4BE"
+#else
+#define UCS "UCS-4LE"
+#endif
+
+
struct {
char *charset;
char *iconv_name;
@@ -195,7 +232,6 @@ const char *
g_mime_charset_name (const char *charset)
{
char *name, *iconv_name, *buf;
- int codepage;
if (charset == NULL)
return NULL;
@@ -212,26 +248,37 @@ g_mime_charset_name (const char *charset)
return iconv_name;
if (!strncmp (name, "iso", 3)) {
+ int iso, codepage;
+ char *p;
+
buf = name + 3;
if (*buf == '-' || *buf == '_')
buf++;
- g_assert (strncmp (buf, "8859", 4) == 0);
+ iso = strtoul (buf, &p, 10);
- buf += 4;
+ g_assert (p > buf);
+
+ buf = p;
if (*buf == '-' || *buf == '_')
buf++;
- codepage = atoi (buf);
- g_assert (codepage > 0);
+ codepage = strtoul (buf, &p, 10);
+ if (p > buf) {
+ /* codepage is numeric */
#ifdef __aix__
- if (codepage == 13)
- iconv_name = g_strdup ("IBM-921");
- else
+ if (codepage == 13)
+ iconv_name = g_strdup ("IBM-921");
+ else
#endif /* __aix__ */
- iconv_name = g_strdup_printf (CANONICAL_ISO_FORMAT,
- 8859, codepage);
+ iconv_name = g_strdup_printf (CANONICAL_ISO_D_FORMAT,
+ iso, codepage);
+ } else {
+ /* codepage is a string? */
+ iconv_name = g_strdup_printf (CANONICAL_ISO_S_FORMAT,
+ iso, p);
+ }
} else if (!strncmp (name, "windows-", 8)) {
buf = name + 8;
if (!strncmp (buf, "cp", 2))
@@ -253,3 +300,323 @@ g_mime_charset_name (const char *charset)
return iconv_name;
}
+
+
+
+/* unicode_* and unichar are stolen from glib2... */
+typedef guint32 unichar;
+
+static const char unicode_skip[256] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
+};
+
+#define UTF8_COMPUTE(ch, mask, len) \
+ if (ch < 128) \
+ { \
+ len = 1; \
+ mask = 0x7f; \
+ } \
+ else if ((ch & 0xe0) == 0xc0) \
+ { \
+ len = 2; \
+ mask = 0x1f; \
+ } \
+ else if ((ch & 0xf0) == 0xe0) \
+ { \
+ len = 3; \
+ mask = 0x0f; \
+ } \
+ else if ((ch & 0xf8) == 0xf0) \
+ { \
+ len = 4; \
+ mask = 0x07; \
+ } \
+ else if ((ch & 0xfc) == 0xf8) \
+ { \
+ len = 5; \
+ mask = 0x03; \
+ } \
+ else if ((ch & 0xfe) == 0xfc) \
+ { \
+ len = 6; \
+ mask = 0x01; \
+ } \
+ else \
+ len = -1;
+
+#define UTF8_GET(result, chars, count, mask, len) \
+ (result) = (chars)[0] & (mask); \
+ for ((count) = 1; (count) < (len); ++(count)) \
+ { \
+ if (((chars)[(count)] & 0xc0) != 0x80) \
+ { \
+ (result) = -1; \
+ break; \
+ } \
+ (result) <<= 6; \
+ (result) |= ((chars)[(count)] & 0x3f); \
+ }
+
+#define UNICODE_VALID(ch) \
+ ((ch) < 0x110000 && \
+ ((ch) < 0xD800 || (ch) >= 0xE000) && \
+ (ch) != 0xFFFE && (ch) != 0xFFFF)
+
+
+
+#define unicode_next_char(p) (char *)((p) + unicode_skip[*(unsigned char *)(p)])
+
+unichar
+unicode_get_char (const char *p)
+{
+ unsigned char c = (unsigned char) *p;
+ int i, mask = 0, len;
+ unichar result;
+
+ UTF8_COMPUTE (c, mask, len);
+ if (len == -1)
+ return (unichar) -1;
+ UTF8_GET (result, p, i, mask, len);
+
+ return result;
+}
+
+gboolean
+unichar_validate (unichar ch)
+{
+ return UNICODE_VALID (ch);
+}
+
+
+
+typedef struct _Charset {
+ unsigned int mask;
+ unsigned int level;
+} Charset;
+
+static void
+charset_init (Charset *charset)
+{
+ charset->mask = ~0;
+ charset->level = 0;
+}
+
+static void
+charset_step (Charset *charset, const char *in, size_t len)
+{
+ register const char *inptr = in;
+ const char *inend = in + len;
+ register unsigned int mask;
+ register int level;
+
+ mask = charset->mask;
+ level = charset->level;
+
+ while (inptr < inend) {
+ const char *newinptr;
+ unichar c;
+
+ newinptr = unicode_next_char (inptr);
+ c = unicode_get_char (inptr);
+ if (newinptr == NULL || !unichar_validate (c)) {
+ inptr++;
+ continue;
+ }
+
+ inptr = newinptr;
+ if (c <= 0xffff) {
+ mask &= charset_mask (c);
+
+ if (c >= 128 && c < 256)
+ level = MAX (level, 1);
+ else if (c >= 256)
+ level = MAX (level, 2);
+ } else {
+ mask = 0;
+ level = MAX (level, 2);
+ }
+ }
+
+ charset->mask = mask;
+ charset->level = level;
+}
+
+static const char *
+charset_best_mask (unsigned int mask)
+{
+ int i;
+
+ for (i = 0; i < sizeof (charinfo) / sizeof (charinfo[0]); i++) {
+ if (charinfo[i].bit & mask)
+ return charinfo[i].name;
+ }
+
+ return "UTF-8";
+}
+
+static const char *
+charset_best_name (Charset *charset)
+{
+ if (charset->level == 1)
+ return "iso-8859-1";
+ else if (charset->level == 2)
+ return charset_best_mask (charset->mask);
+ else
+ return NULL;
+}
+
+
+/**
+ * g_mime_charset_best:
+ * @in: a UTF-8 text buffer
+ * @inlen: length of @in
+ *
+ * Computes the best charset to use to encode this text buffer.
+ *
+ * Returns the charset name best suited for the input text or %NULL if
+ * it is US-ASCII safe.
+ **/
+const char *
+g_mime_charset_best (const char *in, size_t inlen)
+{
+ Charset charset;
+
+ charset_init (&charset);
+ charset_step (&charset, in, inlen);
+ return charset_best_name (&charset);
+}
+
+
+#ifdef BUILD_CHARSET_MAP
+
+#include <errno.h>
+#include <iconv.h>
+
+/* the following functions are copied from glib2... */
+
+
+int main (int argc, char **argv)
+{
+ char *inptr, *outptr;
+ size_t inlen, outlen;
+ guint32 out[128];
+ char in[128];
+ int i, j, k;
+ int max, min;
+ int bit = 0x01;
+ int bytes;
+ iconv_t cd;
+
+ /* dont count the terminator */
+ bytes = ((sizeof (tables) / sizeof (tables[0])) + 7 - 1) / 8;
+
+ for (i = 0; i < 128; i++)
+ in[i] = i + 128;
+
+ for (j = 0; tables[j].name; j++) {
+ cd = iconv_open (UCS, tables[j].name);
+ inptr = in;
+ outptr = (char *)(out);
+ inlen = sizeof (in);
+ outlen = sizeof (out);
+ while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) {
+ if (errno == EILSEQ) {
+ inptr++;
+ inlen--;
+ } else {
+ g_warning ("%s\n", g_strerror (errno));
+ exit (1);
+ }
+ }
+ iconv_close (cd);
+
+ for (i = 0; i < 128 - outlen / 4; i++) {
+ encoding_map[i] |= bit;
+ encoding_map[out[i]] |= bit;
+ }
+
+ tables[j].bit = bit;
+ bit <<= 1;
+ }
+
+ printf ("/* This file is automatically generated: DO NOT EDIT */\n\n");
+
+ for (i = 0; i < 256; i++) {
+ /* first, do we need this block? */
+ for (k = 0; k < bytes; k++) {
+ for (j = 0; j < 256; j++) {
+ if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0)
+ break;
+ }
+ if (j < 256) {
+ /* yes, dump it */
+ printf ("static unsigned char m%02x%x[256] = {\n\t", i, k);
+ for (j = 0; j < 256; j++) {
+ printf ("0x%02x, ", (encoding_map[i * 256 + j] >> (k * 8)) & 0xff);
+ if (((j + 1) & 7) == 0 && j < 255)
+ printf ("\n\t");
+ }
+ printf ("\n};\n\n");
+ }
+ }
+ }
+
+ printf ("struct {\n");
+ for (k = 0; k < bytes; k++) {
+ printf ("\tunsigned char *bits%d;\n", k);
+ }
+
+ printf ("} charmap[256] = {\n\t");
+ for (i = 0; i < 256; i++) {
+ /* first, do we need this block? */
+ printf ("{ ");
+ for (k = 0; k < bytes; k++) {
+ for (j = 0; j < 256; j++) {
+ if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0)
+ break;
+ }
+
+ if (j < 256) {
+ printf ("m%02x%x, ", i, k);
+ } else {
+ printf ("0, ");
+ }
+ }
+
+ printf ("}, ");
+ if (((i + 1) & 7) == 0 && i < 255)
+ printf ("\n\t");
+ }
+ printf ("\n};\n\n");
+
+ printf ("struct {\n\tconst char *name;\n\tunsigned int bit;\n} charinfo[] = {\n");
+ for (j = 0; tables[j].name; j++) {
+ printf ("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
+ }
+ printf ("};\n\n");
+
+ printf("#define charset_mask(x) \\\n");
+ for (k = 0; k < bytes; k++) {
+ if (k != 0)
+ printf ("\t| ");
+ else
+ printf ("\t");
+
+ printf ("(charmap[(x) >> 8].bits%d ? charmap[(x) >> 8].bits%d[(x) & 0xff] << %d : 0)",
+ k, k, k * 8);
+
+ if (k < bytes - 1)
+ printf ("\t\\\n");
+ }
+ printf ("\n\n");
+
+ return 0;
+}
+#endif /* BUILD_CHARSET_MAP */
diff --git a/gmime-charset.h b/gmime-charset.h
index 544815d8..83be12e5 100644
--- a/gmime-charset.h
+++ b/gmime-charset.h
@@ -30,6 +30,7 @@ extern "C" {
#endif /* __cplusplus */
#include <glib.h>
+#include <sys/types.h>
void g_mime_charset_init (void);
@@ -37,6 +38,8 @@ const char *g_mime_charset_locale_name (void);
const char *g_mime_charset_name (const char *charset);
+const char *g_mime_charset_best (const char *in, size_t inlen);
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
diff --git a/gmime-iconv-utils.c b/gmime-iconv-utils.c
index 48b6e102..3ba8eb06 100644
--- a/gmime-iconv-utils.c
+++ b/gmime-iconv-utils.c
@@ -25,8 +25,10 @@
#endif
#include <glib.h>
-#include <errno.h>
+#include <stdio.h>
#include <string.h>
+#include <errno.h>
+
#include "gmime-iconv-utils.h"
#include "gmime-charset.h"
diff --git a/gmime-param.c b/gmime-param.c
index 89de69ab..94fea84a 100644
--- a/gmime-param.c
+++ b/gmime-param.c
@@ -26,6 +26,7 @@
#include <string.h>
#include <ctype.h>
+#include <errno.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
@@ -35,12 +36,16 @@
#include "gmime-table-private.h"
#include "gmime-charset.h"
#include "gmime-utils.h"
+#include "gmime-iconv.h"
+#include "gmime-iconv-utils.h"
#include "strlib.h"
-#define d(x)
+#define d(x) x
#define w(x)
+extern int gmime_interfaces_utf8;
+
static unsigned char tohex[16] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
@@ -73,18 +78,18 @@ g_mime_param_new (const char *name, const char *value)
#define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
-static char *
-hex_decode (const char *in, unsigned int len)
+static size_t
+hex_decode (const unsigned char *in, size_t len, unsigned char *out)
{
- register unsigned char *inptr, *outptr;
+ register const unsigned char *inptr;
+ register unsigned char *outptr;
const unsigned char *inend;
- char *outbuf;
- inend = (const unsigned char *) in + len;
+ inptr = in;
+ inend = in + len;
- outptr = outbuf = g_malloc (len);
+ outptr = out;
- inptr = (unsigned char *) in;
while (inptr < inend) {
if (*inptr == '%') {
if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
@@ -98,41 +103,72 @@ hex_decode (const char *in, unsigned int len)
*outptr = '\0';
- return outbuf;
+ return outptr - out;
}
/* an rfc2184 encoded string looks something like:
* us-ascii'en'This%20is%20even%20more%20
*/
static char *
-rfc2184_decode (const char *in, unsigned int len)
+rfc2184_decode (const char *in, size_t len)
{
const char *inptr = in;
const char *inend = in + len;
- /*const char *charset;*/
+ const char *charset;
char *decoded = NULL;
- /*char *encoding;*/
+ char *charenc;
/* skips to the end of the charset / beginning of the locale */
inptr = memchr (inptr, '\'', len);
if (!inptr)
return NULL;
-#if 0
- /* someday we'll need to do something with the charset... */
- encoding = g_strndup (in, (unsigned) (inptr - in));
- charset = g_mime_iconv_charset_name (encoding);
- g_free (encoding);
-#endif
+ /* save the charset */
+ len = inptr - in;
+ charenc = alloca (len + 1);
+ memcpy (charenc, in, len);
+ charenc[len] = '\0';
+ charset = g_mime_charset_name (charenc);
/* skip to the end of the locale */
- inptr = memchr (inptr + 1, '\'', (unsigned) (inend - inptr - 1));
+ inptr = memchr (inptr + 1, '\'', (unsigned int) (inend - inptr - 1));
if (!inptr)
return NULL;
inptr++;
- if (inptr < inend)
- decoded = hex_decode (inptr, (unsigned) (inend - inptr));
+ if (inptr < inend) {
+ len = inend - inptr;
+ if (gmime_interfaces_utf8 && strcasecmp (charset, "UTF-8") != 0) {
+ char *udecoded;
+ iconv_t cd;
+
+ decoded = alloca (len + 1);
+ len = hex_decode (inptr, len, decoded);
+
+ cd = g_mime_iconv_open ("UTF-8", charset);
+ if (cd == (iconv_t) -1) {
+ d(g_warning ("Cannot convert from %s to UTF-8, param display may "
+ "be corrupt: %s", charset, g_strerror (errno)));
+ charset = g_mime_charset_locale_name ();
+ cd = g_mime_iconv_open ("UTF-8", charset);
+ if (cd == (iconv_t) -1)
+ return NULL;
+ }
+
+ udecoded = g_mime_iconv_strndup (cd, decoded, len);
+ g_mime_iconv_close (cd);
+
+ if (!udecoded) {
+ d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be "
+ "corrupt: %s", len, decoded, g_strerror (errno)));
+ }
+
+ decoded = udecoded;
+ } else {
+ decoded = g_malloc (len + 1);
+ hex_decode (inptr, len, decoded);
+ }
+ }
return decoded;
}
@@ -202,11 +238,11 @@ decode_quoted_string (const char **in)
if (*inptr == '"') {
start++;
- out = g_strndup (start, (unsigned) (inptr - start));
+ out = g_strndup (start, (unsigned int) (inptr - start));
inptr++;
} else {
/* string wasn't properly quoted */
- out = g_strndup (start, (unsigned) (inptr - start));
+ out = g_strndup (start, (unsigned int) (inptr - start));
}
}
@@ -228,7 +264,7 @@ decode_token (const char **in)
inptr++;
if (inptr > start) {
*in = inptr;
- return g_strndup (start, (unsigned) (inptr - start));
+ return g_strndup (start, (unsigned int) (inptr - start));
} else {
return NULL;
}
@@ -266,7 +302,7 @@ decode_param_token (const char **in)
inptr++;
if (inptr > start) {
*in = inptr;
- return g_strndup (start, (unsigned) (inptr - start));
+ return g_strndup (start, (unsigned int) (inptr - start));
} else {
return NULL;
}
@@ -324,6 +360,7 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218
{
gboolean is_rfc2184_encoded = FALSE;
gboolean is_rfc2184 = FALSE;
+ gboolean valid_utf8 = FALSE;
const char *inptr = *in;
char *param, *value = NULL;
int rfc2184_part = -1;
@@ -348,6 +385,7 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218
val = rfc2184_decode (value, strlen (value));
if (val) {
+ valid_utf8 = TRUE;
g_free (value);
value = val;
}
@@ -366,9 +404,29 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218
val = g_mime_utils_8bit_header_decode (value);
if (val) {
+ valid_utf8 = TRUE;
g_free (value);
value = val;
}
+ } else {
+ if (gmime_interfaces_utf8)
+ valid_utf8 = !g_mime_utils_text_is_8bit (value, strlen (value));
+ }
+ }
+
+ if (gmime_interfaces_utf8 && value && !valid_utf8) {
+ /* A (broken) mailer has sent us an unencoded 8bit value.
+ * Attempt to save it by assuming it's in the user's
+ * locale and converting to UTF-8 */
+ char *buf;
+
+ buf = g_mime_iconv_locale_to_utf8 (value);
+ if (buf) {
+ g_free (value);
+ value = buf;
+ } else {
+ d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s",
+ param, value, g_strerror (errno)));
}
}
@@ -399,8 +457,12 @@ decode_param_list (const char **in)
char *name, *value;
/* invalid format? */
- if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0)
+ if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0) {
+ if (*inptr == ';') {
+ continue;
+ }
break;
+ }
if (is_rfc2184 && tail && !strcasecmp (name, tail->name)) {
/* rfc2184 allows a parameter to be broken into multiple parts
@@ -571,15 +633,15 @@ g_mime_param_append_param (GMimeParam *params, GMimeParam *param)
static char *
encode_param (const unsigned char *in, gboolean *encoded)
{
- const unsigned char *inptr;
- char *outstr, *charset;
- int encoding;
+ register const unsigned char *inptr;
+ unsigned char *outbuf = NULL;
+ iconv_t cd = (iconv_t) -1;
+ const char *charset = NULL;
+ char *outstr;
GString *out;
*encoded = FALSE;
- g_return_val_if_fail (in != NULL, NULL);
-
for (inptr = in; *inptr && inptr - in < GMIME_FOLD_LEN; inptr++)
if (*inptr > 127)
break;
@@ -587,29 +649,53 @@ encode_param (const unsigned char *in, gboolean *encoded)
if (*inptr == '\0')
return g_strdup (in);
+ if (*inptr > 127) {
+ if (gmime_interfaces_utf8)
+ charset = g_mime_charset_best (in, strlen (in));
+ else
+ charset = g_mime_charset_locale_name ();
+ }
+
+ if (!charset)
+ charset = "iso-8859-1";
+
+ if (gmime_interfaces_utf8) {
+ if (strcasecmp (charset, "UTF-8") != 0) {
+ charset = g_mime_charset_name (charset);
+ cd = g_mime_iconv_open (charset, "UTF-8");
+ }
+
+ if (cd == (iconv_t) -1)
+ charset = "UTF-8";
+ }
+
+ if (cd != (iconv_t) -1) {
+ outbuf = g_mime_iconv_strdup (cd, in);
+ g_mime_iconv_close (cd);
+ inptr = outbuf;
+ } else {
+ inptr = in;
+ }
+
+ /* FIXME: set the 'language' as well, assuming we can get that info...? */
out = g_string_new ("");
- inptr = in;
- encoding = 0;
+ g_string_sprintfa (out, "%s''", charset);
+
while (inptr && *inptr) {
- unsigned int c = *inptr++ & 0xff;
+ unsigned char c = *inptr++;
+
+ /* FIXME: make sure that '\'', '*', and ';' are also encoded */
- if (c > 127 && c < 256) {
- encoding = MAX (encoding, 1);
+ if (c > 127) {
g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
} else if (is_lwsp (c) || !(gmime_special_table[c] & IS_ESAFE)) {
g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
} else {
- g_string_append_c (out, (char) c);
+ g_string_append_c (out, c);
}
}
- if (encoding) {
- charset = g_strdup_printf ("%s''", g_mime_charset_locale_name ());
- g_string_prepend (out, charset);
- g_free (charset);
- } else {
- g_string_prepend (out, "iso-8859-1''");
- }
+ g_free (outbuf);
outstr = out->str;
g_string_free (out, FALSE);
diff --git a/gmime-utils.c b/gmime-utils.c
index ed01055b..ae202aae 100644
--- a/gmime-utils.c
+++ b/gmime-utils.c
@@ -30,6 +30,7 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include <errno.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
@@ -40,8 +41,9 @@
#include "gmime-part.h"
#include "gmime-charset.h"
#include "gmime-iconv.h"
+#include "gmime-iconv-utils.h"
-#define d(x)
+#define d(x) x
#ifndef HAVE_ISBLANK
#define isblank(c) (c == ' ' || c == '\t')
@@ -67,6 +69,9 @@
#define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
#define DATE_TOKEN_HAS_SIGN (1 << 7)
+/* from gmime.c */
+extern int gmime_interfaces_utf8;
+
static char *base64_alphabet =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@@ -994,19 +999,18 @@ quoted_decode (const unsigned char *in, size_t len, unsigned char *out)
return -1;
}
-#define is_8bit_word_encoded(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
+#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
static unsigned char *
-decode_encoded_8bit_word (const unsigned char *word)
+rfc2047_decode_word (const unsigned char *in, size_t inlen)
{
const register unsigned char *inptr;
const unsigned char *inend;
- size_t len;
- len = strlen (word);
+ inptr = in + 2;
+ inend = in + inlen - 2;
- inptr = word + 2;
- inend = word + len - 2;
+ d(fprintf (stderr, "decoding %.*s\n", inlen, in));
inptr = memchr (inptr, '?', inend - inptr);
if (inptr && inptr[2] == '?') {
@@ -1015,16 +1019,14 @@ decode_encoded_8bit_word (const unsigned char *word)
int state = 0;
int save = 0;
- d(fprintf (stderr, "encoding is '%c'\n", inptr[0]));
-
inptr++;
+
switch (*inptr) {
case 'B':
case 'b':
inptr += 2;
decoded = alloca (inend - inptr);
declen = g_mime_utils_base64_decode_step (inptr, inend - inptr, decoded, &state, &save);
- return g_strndup (decoded, declen);
break;
case 'Q':
case 'q':
@@ -1036,13 +1038,63 @@ decode_encoded_8bit_word (const unsigned char *word)
d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
return NULL;
}
-
- return g_strndup (decoded, declen);
break;
default:
d(fprintf (stderr, "unknown encoding\n"));
return NULL;
}
+
+ if (gmime_interfaces_utf8) {
+ const char *charset;
+ unsigned char *buf;
+ char *charenc, *p;
+ size_t len;
+ iconv_t cd;
+
+ len = (inptr - 3) - (in + 2);
+ charenc = alloca (len + 1);
+ memcpy (charenc, in + 2, len);
+ charenc[len] = '\0';
+
+ /* rfc2231 updates rfc2047 encoded words...
+ * The ABNF given in RFC 2047 for encoded-words is:
+ * encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
+ * This specification changes this ABNF to:
+ * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
+ */
+
+ /* trim off the 'language' part if it's there... */
+ p = strchr (charenc, '*');
+ if (p)
+ *p = '\0';
+
+ /* slight optimization */
+ if (!strcasecmp (charset, "UTF-8"))
+ return g_strndup (decoded, declen);
+
+ charset = g_mime_charset_name (charenc);
+ cd = g_mime_iconv_open ("UTF-8", charset);
+ if (cd == (iconv_t) -1) {
+ d(g_warning ("Cannot convert from %s to UTF-8, header display may "
+ "be corrupt: %s", charset, g_strerror (errno)));
+ charset = g_mime_charset_locale_name ();
+ cd = g_mime_iconv_open ("UTF-8", charset);
+ if (cd == (iconv_t) -1)
+ return NULL;
+ }
+
+ buf = g_mime_iconv_strndup (cd, decoded, declen);
+ g_mime_iconv_close (cd);
+
+ if (!buf) {
+ d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be "
+ "corrupt: %s", declen, decoded, g_strerror (errno)));
+ }
+
+ return buf;
+ } else {
+ return g_strndup (decoded, declen);
+ }
}
return NULL;
@@ -1080,8 +1132,8 @@ g_mime_utils_8bit_header_decode (const unsigned char *in)
const unsigned char *word;
gboolean was_encoded;
- if ((was_encoded = is_8bit_word_encoded (atom->str, atom->len)))
- word = dword = decode_encoded_8bit_word (atom->str);
+ if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len)))
+ word = dword = rfc2047_decode_word (atom->str, atom->len);
else
word = atom->str;
@@ -1135,8 +1187,8 @@ g_mime_utils_8bit_header_decode (const unsigned char *in)
const unsigned char *word;
gboolean was_encoded;
- if ((was_encoded = is_8bit_word_encoded (atom->str, atom->len)))
- word = dword = decode_encoded_8bit_word (atom->str);
+ if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len)))
+ word = dword = rfc2047_decode_word (atom->str, atom->len);
else
word = atom->str;
@@ -1195,16 +1247,46 @@ quoted_encode (const unsigned char *in, size_t len, unsigned char *out, gushort
}
static char *
-encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_was_encoded)
+rfc2047_encode_word (const unsigned char *word, gushort safemask)
{
unsigned char *encoded, *ptr;
+ unsigned char *uword = NULL;
+ iconv_t cd = (iconv_t) -1;
size_t enclen, pos, len;
+ const char *charset;
int state = 0;
int save = 0;
char encoding;
len = strlen (word);
+ if (gmime_interfaces_utf8) {
+ charset = g_mime_charset_best (word, len);
+ if (!charset)
+ charset = "iso-8859-1";
+ } else {
+ charset = g_mime_charset_locale_name ();
+ }
+
+ if (gmime_interfaces_utf8) {
+ if (strcasecmp (charset, "UTF-8") != 0) {
+ charset = g_mime_charset_name (charset);
+ cd = g_mime_iconv_open (charset, "UTF-8");
+ }
+
+ if (cd != (iconv_t) -1) {
+ uword = g_mime_iconv_strndup (cd, word, len);
+ g_mime_iconv_close (cd);
+ }
+
+ if (uword) {
+ len = strlen (uword);
+ word = uword;
+ } else {
+ charset = "UTF-8";
+ }
+ }
+
switch (g_mime_utils_best_encoding (word, len)) {
case GMIME_PART_ENCODING_BASE64:
enclen = BASE64_ENCODE_LEN (len);
@@ -1232,16 +1314,16 @@ encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_wa
break;
default:
- if (this_was_encoded)
- *this_was_encoded = FALSE;
-
- return g_strdup (word);
+ g_assert_not_reached ();
}
- if (this_was_encoded)
- *this_was_encoded = TRUE;
+ g_free (uword);
+
+ uword = g_strdup_printf ("=?%s?%c?%s?=", charset, encoding, encoded);
- return g_strdup_printf ("=?%s?%c?%s?=", g_mime_charset_locale_name (), encoding, encoded);
+ fprintf (stderr, "resultant encoded word: %s\n", uword);
+
+ return uword;
}
@@ -1257,7 +1339,7 @@ encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_wa
char *
g_mime_utils_8bit_header_encode_phrase (const unsigned char *in)
{
- return encode_8bit_word (in, IS_PSAFE, NULL);
+ return rfc2047_encode_word (in, IS_PSAFE);
}
@@ -1276,29 +1358,29 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
register const unsigned char *inptr;
GString *out, *word, *lwsp;
unsigned char *encoded;
- gboolean is8bit = FALSE;
+ gboolean encode_word = FALSE;
gboolean last_was_encoded = FALSE;
gboolean last_was_space = FALSE;
out = g_string_new ("");
word = g_string_new ("");
lwsp = g_string_new ("");
- inptr = (guchar *) in;
+
+ inptr = in;
while (inptr && *inptr) {
unsigned char c = *inptr++;
if (isspace (c) && !last_was_space) {
- gboolean this_was_encoded = FALSE;
char *eword;
- if (is8bit)
- eword = encode_8bit_word (word->str, IS_ESAFE, &this_was_encoded);
+ if (encode_word)
+ eword = rfc2047_encode_word (word->str, IS_ESAFE);
else
- eword = g_strdup (word->str);
+ eword = word->str;
/* append any whitespace */
- if (last_was_encoded && this_was_encoded) {
+ if (last_was_encoded && encode_word) {
/* we need to encode the whitespace */
unsigned char *elwsp;
size_t len;
@@ -1307,20 +1389,22 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
len = quoted_encode (lwsp->str, lwsp->len, elwsp, IS_SPACE);
elwsp[len] = '\0';
- g_string_sprintfa (out, " =?%s?q?%s?= ", g_mime_charset_locale_name (), elwsp);
+ g_string_sprintfa (out, " =?iso-8859-1?q?%s?= ", elwsp);
} else {
g_string_append (out, lwsp->str);
}
- /* append the encoded word */
+ /* append the (encoded) word */
g_string_append (out, eword);
- g_free (eword);
+
+ if (encode_word)
+ g_free (eword);
g_string_truncate (lwsp, 0);
g_string_truncate (word, 0);
- last_was_encoded = this_was_encoded;
- is8bit = FALSE;
+ last_was_encoded = encode_word;
+ encode_word = FALSE;
}
if (isspace (c)) {
@@ -1328,7 +1412,7 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
last_was_space = TRUE;
} else {
if (c > 127)
- is8bit = TRUE;
+ encode_word = TRUE;
g_string_append_c (word, c);
last_was_space = FALSE;
@@ -1336,16 +1420,15 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
}
if (word->len || lwsp->len) {
- gboolean this_was_encoded = FALSE;
char *eword;
- if (is8bit)
- eword = encode_8bit_word (word->str, IS_ESAFE, &this_was_encoded);
+ if (encode_word)
+ eword = rfc2047_encode_word (word->str, IS_ESAFE);
else
- eword = g_strdup (word->str);
+ eword = word->str;
/* append any whitespace */
- if (last_was_encoded && this_was_encoded) {
+ if (last_was_encoded && encode_word) {
/* we need to encode the whitespace */
unsigned char *elwsp;
size_t len;
@@ -1354,14 +1437,16 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
len = quoted_encode (lwsp->str, lwsp->len, elwsp, IS_SPACE);
elwsp[len] = '\0';
- g_string_sprintfa (out, " =?%s?q?%s?= ", g_mime_charset_locale_name (), elwsp);
+ g_string_sprintfa (out, " =?iso-8859-1?q?%s?= ", elwsp);
} else {
g_string_append (out, lwsp->str);
}
/* append the encoded word */
g_string_append (out, eword);
- g_free (eword);
+
+ if (encode_word)
+ g_free (eword);
}
g_string_free (lwsp, TRUE);
diff --git a/gmime.c b/gmime.c
new file mode 100644
index 00000000..20c422c9
--- /dev/null
+++ b/gmime.c
@@ -0,0 +1,56 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Authors: Jeffrey Stedfast <fejj@ximian.com>
+ *
+ * Copyright 2002 Ximain, Inc. (www.ximian.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "gmime.h"
+
+
+int gmime_interfaces_utf8 = FALSE;
+
+
+/**
+ * g_mime_init:
+ * @flags: initialization flags
+ *
+ * Initailizes GMime.
+ **/
+void
+g_mime_init (guint32 flags)
+{
+ static int initialized = FALSE;
+
+ if (initialized)
+ return;
+
+ initialized = TRUE;
+
+ if (flags & GMIME_INIT_FLAG_UTF8)
+ gmime_interfaces_utf8 = TRUE;
+
+ g_mime_charset_init ();
+
+ g_mime_iconv_init ();
+}
diff --git a/gmime.h.in b/gmime.h.in
index 66fa4268..cc586168 100644
--- a/gmime.h.in
+++ b/gmime.h.in
@@ -51,6 +51,7 @@
#include "gmime-filter-crlf.h"
#include "gmime-filter-from.h"
#include "gmime-filter-html.h"
+#include "gmime-filter-yenc.h"
/* GMIME version */
static const guint gmime_major_version = @GMIME_MAJOR_VERSION@;
@@ -65,4 +66,8 @@ static const guint gmime_binary_age = 0;
(gmime_major_version == (major) && gmime_minor_version == (minor) && \
gmime_micro_version >= (micro)))
+#define GMIME_INIT_FLAG_UTF8 (1 << 0)
+
+void g_mime_init (guint32 flags);
+
#endif /* __GMIME_H__ */
diff --git a/gmime/gmime-charset.c b/gmime/gmime-charset.c
index 7e04fc40..815288b8 100644
--- a/gmime/gmime-charset.c
+++ b/gmime/gmime-charset.c
@@ -34,21 +34,58 @@
#include <alloca.h>
#endif
+#include "gmime-charset-map-private.h"
#include "gmime-charset.h"
#include "strlib.h"
#if defined (__aix__) || defined (__irix__) || defined (__sun__)
-#define CANONICAL_ISO_FORMAT "ISO%d-%d"
+#define CANONICAL_ISO_D_FORMAT "ISO%d-%d"
+#define CANONICAL_ISO_S_FORMAT "ISO%d-%s"
+#elif defined (__hpux__)
+#define CANONICAL_ISO_D_FORMAT "iso%d%d"
+#define CANONICAL_ISO_S_FORMAT "iso%d%s"
#else
-#ifdef __hpux__
-#define CANONICAL_ISO_FORMAT "iso%d%d"
-#else
-#define CANONICAL_ISO_FORMAT "ISO-%d-%d"
-#endif /* __hpux__ */
+#define CANONICAL_ISO_D_FORMAT "iso-%d-%d"
+#define CANONICAL_ISO_S_FORMAT "iso-%d-%s"
#endif /* __aix__, __irix__, __sun__ */
+static struct {
+ char *name;
+ unsigned int bit; /* assigned bit */
+} tables[] = {
+ /* These are the 8bit character sets (other than iso-8859-1,
+ * which is special-cased) which are supported by both other
+ * mailers and the GNOME environment. Note that the order
+ * they're listed in is the order they'll be tried in, so put
+ * the more-popular ones first.
+ */
+ { "iso-8859-2", 0 }, /* Central/Eastern European */
+ { "iso-8859-4", 0 }, /* Baltic */
+ { "koi8-r", 0 }, /* Russian */
+ { "koi8-u", 0 }, /* Ukranian */
+ { "iso-8859-5", 0 }, /* Least-popular Russian encoding */
+ { "iso-8859-7", 0 }, /* Greek */
+ { "iso-8859-8", 0 }, /* Hebrew; Visual */
+ { "iso-8859-9", 0 }, /* Turkish */
+ { "iso-8859-13", 0 }, /* Baltic again */
+ { "iso-8859-15", 0 }, /* New-and-improved iso-8859-1, but most
+ * programs that support this support UTF8
+ */
+ { "windows-1251", 0 }, /* Russian */
+ { 0, 0 }
+};
+
+unsigned int encoding_map[256 * 256];
+
+#if G_BYTE_ORDER == G_BIG_ENDIAN
+#define UCS "UCS-4BE"
+#else
+#define UCS "UCS-4LE"
+#endif
+
+
struct {
char *charset;
char *iconv_name;
@@ -195,7 +232,6 @@ const char *
g_mime_charset_name (const char *charset)
{
char *name, *iconv_name, *buf;
- int codepage;
if (charset == NULL)
return NULL;
@@ -212,26 +248,37 @@ g_mime_charset_name (const char *charset)
return iconv_name;
if (!strncmp (name, "iso", 3)) {
+ int iso, codepage;
+ char *p;
+
buf = name + 3;
if (*buf == '-' || *buf == '_')
buf++;
- g_assert (strncmp (buf, "8859", 4) == 0);
+ iso = strtoul (buf, &p, 10);
- buf += 4;
+ g_assert (p > buf);
+
+ buf = p;
if (*buf == '-' || *buf == '_')
buf++;
- codepage = atoi (buf);
- g_assert (codepage > 0);
+ codepage = strtoul (buf, &p, 10);
+ if (p > buf) {
+ /* codepage is numeric */
#ifdef __aix__
- if (codepage == 13)
- iconv_name = g_strdup ("IBM-921");
- else
+ if (codepage == 13)
+ iconv_name = g_strdup ("IBM-921");
+ else
#endif /* __aix__ */
- iconv_name = g_strdup_printf (CANONICAL_ISO_FORMAT,
- 8859, codepage);
+ iconv_name = g_strdup_printf (CANONICAL_ISO_D_FORMAT,
+ iso, codepage);
+ } else {
+ /* codepage is a string? */
+ iconv_name = g_strdup_printf (CANONICAL_ISO_S_FORMAT,
+ iso, p);
+ }
} else if (!strncmp (name, "windows-", 8)) {
buf = name + 8;
if (!strncmp (buf, "cp", 2))
@@ -253,3 +300,323 @@ g_mime_charset_name (const char *charset)
return iconv_name;
}
+
+
+
+/* unicode_* and unichar are stolen from glib2... */
+typedef guint32 unichar;
+
+static const char unicode_skip[256] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
+};
+
+#define UTF8_COMPUTE(ch, mask, len) \
+ if (ch < 128) \
+ { \
+ len = 1; \
+ mask = 0x7f; \
+ } \
+ else if ((ch & 0xe0) == 0xc0) \
+ { \
+ len = 2; \
+ mask = 0x1f; \
+ } \
+ else if ((ch & 0xf0) == 0xe0) \
+ { \
+ len = 3; \
+ mask = 0x0f; \
+ } \
+ else if ((ch & 0xf8) == 0xf0) \
+ { \
+ len = 4; \
+ mask = 0x07; \
+ } \
+ else if ((ch & 0xfc) == 0xf8) \
+ { \
+ len = 5; \
+ mask = 0x03; \
+ } \
+ else if ((ch & 0xfe) == 0xfc) \
+ { \
+ len = 6; \
+ mask = 0x01; \
+ } \
+ else \
+ len = -1;
+
+#define UTF8_GET(result, chars, count, mask, len) \
+ (result) = (chars)[0] & (mask); \
+ for ((count) = 1; (count) < (len); ++(count)) \
+ { \
+ if (((chars)[(count)] & 0xc0) != 0x80) \
+ { \
+ (result) = -1; \
+ break; \
+ } \
+ (result) <<= 6; \
+ (result) |= ((chars)[(count)] & 0x3f); \
+ }
+
+#define UNICODE_VALID(ch) \
+ ((ch) < 0x110000 && \
+ ((ch) < 0xD800 || (ch) >= 0xE000) && \
+ (ch) != 0xFFFE && (ch) != 0xFFFF)
+
+
+
+#define unicode_next_char(p) (char *)((p) + unicode_skip[*(unsigned char *)(p)])
+
+unichar
+unicode_get_char (const char *p)
+{
+ unsigned char c = (unsigned char) *p;
+ int i, mask = 0, len;
+ unichar result;
+
+ UTF8_COMPUTE (c, mask, len);
+ if (len == -1)
+ return (unichar) -1;
+ UTF8_GET (result, p, i, mask, len);
+
+ return result;
+}
+
+gboolean
+unichar_validate (unichar ch)
+{
+ return UNICODE_VALID (ch);
+}
+
+
+
+typedef struct _Charset {
+ unsigned int mask;
+ unsigned int level;
+} Charset;
+
+static void
+charset_init (Charset *charset)
+{
+ charset->mask = ~0;
+ charset->level = 0;
+}
+
+static void
+charset_step (Charset *charset, const char *in, size_t len)
+{
+ register const char *inptr = in;
+ const char *inend = in + len;
+ register unsigned int mask;
+ register int level;
+
+ mask = charset->mask;
+ level = charset->level;
+
+ while (inptr < inend) {
+ const char *newinptr;
+ unichar c;
+
+ newinptr = unicode_next_char (inptr);
+ c = unicode_get_char (inptr);
+ if (newinptr == NULL || !unichar_validate (c)) {
+ inptr++;
+ continue;
+ }
+
+ inptr = newinptr;
+ if (c <= 0xffff) {
+ mask &= charset_mask (c);
+
+ if (c >= 128 && c < 256)
+ level = MAX (level, 1);
+ else if (c >= 256)
+ level = MAX (level, 2);
+ } else {
+ mask = 0;
+ level = MAX (level, 2);
+ }
+ }
+
+ charset->mask = mask;
+ charset->level = level;
+}
+
+static const char *
+charset_best_mask (unsigned int mask)
+{
+ int i;
+
+ for (i = 0; i < sizeof (charinfo) / sizeof (charinfo[0]); i++) {
+ if (charinfo[i].bit & mask)
+ return charinfo[i].name;
+ }
+
+ return "UTF-8";
+}
+
+static const char *
+charset_best_name (Charset *charset)
+{
+ if (charset->level == 1)
+ return "iso-8859-1";
+ else if (charset->level == 2)
+ return charset_best_mask (charset->mask);
+ else
+ return NULL;
+}
+
+
+/**
+ * g_mime_charset_best:
+ * @in: a UTF-8 text buffer
+ * @inlen: length of @in
+ *
+ * Computes the best charset to use to encode this text buffer.
+ *
+ * Returns the charset name best suited for the input text or %NULL if
+ * it is US-ASCII safe.
+ **/
+const char *
+g_mime_charset_best (const char *in, size_t inlen)
+{
+ Charset charset;
+
+ charset_init (&charset);
+ charset_step (&charset, in, inlen);
+ return charset_best_name (&charset);
+}
+
+
+#ifdef BUILD_CHARSET_MAP
+
+#include <errno.h>
+#include <iconv.h>
+
+/* the following functions are copied from glib2... */
+
+
+int main (int argc, char **argv)
+{
+ char *inptr, *outptr;
+ size_t inlen, outlen;
+ guint32 out[128];
+ char in[128];
+ int i, j, k;
+ int max, min;
+ int bit = 0x01;
+ int bytes;
+ iconv_t cd;
+
+ /* dont count the terminator */
+ bytes = ((sizeof (tables) / sizeof (tables[0])) + 7 - 1) / 8;
+
+ for (i = 0; i < 128; i++)
+ in[i] = i + 128;
+
+ for (j = 0; tables[j].name; j++) {
+ cd = iconv_open (UCS, tables[j].name);
+ inptr = in;
+ outptr = (char *)(out);
+ inlen = sizeof (in);
+ outlen = sizeof (out);
+ while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) {
+ if (errno == EILSEQ) {
+ inptr++;
+ inlen--;
+ } else {
+ g_warning ("%s\n", g_strerror (errno));
+ exit (1);
+ }
+ }
+ iconv_close (cd);
+
+ for (i = 0; i < 128 - outlen / 4; i++) {
+ encoding_map[i] |= bit;
+ encoding_map[out[i]] |= bit;
+ }
+
+ tables[j].bit = bit;
+ bit <<= 1;
+ }
+
+ printf ("/* This file is automatically generated: DO NOT EDIT */\n\n");
+
+ for (i = 0; i < 256; i++) {
+ /* first, do we need this block? */
+ for (k = 0; k < bytes; k++) {
+ for (j = 0; j < 256; j++) {
+ if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0)
+ break;
+ }
+ if (j < 256) {
+ /* yes, dump it */
+ printf ("static unsigned char m%02x%x[256] = {\n\t", i, k);
+ for (j = 0; j < 256; j++) {
+ printf ("0x%02x, ", (encoding_map[i * 256 + j] >> (k * 8)) & 0xff);
+ if (((j + 1) & 7) == 0 && j < 255)
+ printf ("\n\t");
+ }
+ printf ("\n};\n\n");
+ }
+ }
+ }
+
+ printf ("struct {\n");
+ for (k = 0; k < bytes; k++) {
+ printf ("\tunsigned char *bits%d;\n", k);
+ }
+
+ printf ("} charmap[256] = {\n\t");
+ for (i = 0; i < 256; i++) {
+ /* first, do we need this block? */
+ printf ("{ ");
+ for (k = 0; k < bytes; k++) {
+ for (j = 0; j < 256; j++) {
+ if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0)
+ break;
+ }
+
+ if (j < 256) {
+ printf ("m%02x%x, ", i, k);
+ } else {
+ printf ("0, ");
+ }
+ }
+
+ printf ("}, ");
+ if (((i + 1) & 7) == 0 && i < 255)
+ printf ("\n\t");
+ }
+ printf ("\n};\n\n");
+
+ printf ("struct {\n\tconst char *name;\n\tunsigned int bit;\n} charinfo[] = {\n");
+ for (j = 0; tables[j].name; j++) {
+ printf ("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
+ }
+ printf ("};\n\n");
+
+ printf("#define charset_mask(x) \\\n");
+ for (k = 0; k < bytes; k++) {
+ if (k != 0)
+ printf ("\t| ");
+ else
+ printf ("\t");
+
+ printf ("(charmap[(x) >> 8].bits%d ? charmap[(x) >> 8].bits%d[(x) & 0xff] << %d : 0)",
+ k, k, k * 8);
+
+ if (k < bytes - 1)
+ printf ("\t\\\n");
+ }
+ printf ("\n\n");
+
+ return 0;
+}
+#endif /* BUILD_CHARSET_MAP */
diff --git a/gmime/gmime-charset.h b/gmime/gmime-charset.h
index 544815d8..83be12e5 100644
--- a/gmime/gmime-charset.h
+++ b/gmime/gmime-charset.h
@@ -30,6 +30,7 @@ extern "C" {
#endif /* __cplusplus */
#include <glib.h>
+#include <sys/types.h>
void g_mime_charset_init (void);
@@ -37,6 +38,8 @@ const char *g_mime_charset_locale_name (void);
const char *g_mime_charset_name (const char *charset);
+const char *g_mime_charset_best (const char *in, size_t inlen);
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
diff --git a/gmime/gmime-iconv-utils.c b/gmime/gmime-iconv-utils.c
index 48b6e102..3ba8eb06 100644
--- a/gmime/gmime-iconv-utils.c
+++ b/gmime/gmime-iconv-utils.c
@@ -25,8 +25,10 @@
#endif
#include <glib.h>
-#include <errno.h>
+#include <stdio.h>
#include <string.h>
+#include <errno.h>
+
#include "gmime-iconv-utils.h"
#include "gmime-charset.h"
diff --git a/gmime/gmime-param.c b/gmime/gmime-param.c
index 89de69ab..94fea84a 100644
--- a/gmime/gmime-param.c
+++ b/gmime/gmime-param.c
@@ -26,6 +26,7 @@
#include <string.h>
#include <ctype.h>
+#include <errno.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
@@ -35,12 +36,16 @@
#include "gmime-table-private.h"
#include "gmime-charset.h"
#include "gmime-utils.h"
+#include "gmime-iconv.h"
+#include "gmime-iconv-utils.h"
#include "strlib.h"
-#define d(x)
+#define d(x) x
#define w(x)
+extern int gmime_interfaces_utf8;
+
static unsigned char tohex[16] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
@@ -73,18 +78,18 @@ g_mime_param_new (const char *name, const char *value)
#define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
-static char *
-hex_decode (const char *in, unsigned int len)
+static size_t
+hex_decode (const unsigned char *in, size_t len, unsigned char *out)
{
- register unsigned char *inptr, *outptr;
+ register const unsigned char *inptr;
+ register unsigned char *outptr;
const unsigned char *inend;
- char *outbuf;
- inend = (const unsigned char *) in + len;
+ inptr = in;
+ inend = in + len;
- outptr = outbuf = g_malloc (len);
+ outptr = out;
- inptr = (unsigned char *) in;
while (inptr < inend) {
if (*inptr == '%') {
if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
@@ -98,41 +103,72 @@ hex_decode (const char *in, unsigned int len)
*outptr = '\0';
- return outbuf;
+ return outptr - out;
}
/* an rfc2184 encoded string looks something like:
* us-ascii'en'This%20is%20even%20more%20
*/
static char *
-rfc2184_decode (const char *in, unsigned int len)
+rfc2184_decode (const char *in, size_t len)
{
const char *inptr = in;
const char *inend = in + len;
- /*const char *charset;*/
+ const char *charset;
char *decoded = NULL;
- /*char *encoding;*/
+ char *charenc;
/* skips to the end of the charset / beginning of the locale */
inptr = memchr (inptr, '\'', len);
if (!inptr)
return NULL;
-#if 0
- /* someday we'll need to do something with the charset... */
- encoding = g_strndup (in, (unsigned) (inptr - in));
- charset = g_mime_iconv_charset_name (encoding);
- g_free (encoding);
-#endif
+ /* save the charset */
+ len = inptr - in;
+ charenc = alloca (len + 1);
+ memcpy (charenc, in, len);
+ charenc[len] = '\0';
+ charset = g_mime_charset_name (charenc);
/* skip to the end of the locale */
- inptr = memchr (inptr + 1, '\'', (unsigned) (inend - inptr - 1));
+ inptr = memchr (inptr + 1, '\'', (unsigned int) (inend - inptr - 1));
if (!inptr)
return NULL;
inptr++;
- if (inptr < inend)
- decoded = hex_decode (inptr, (unsigned) (inend - inptr));
+ if (inptr < inend) {
+ len = inend - inptr;
+ if (gmime_interfaces_utf8 && strcasecmp (charset, "UTF-8") != 0) {
+ char *udecoded;
+ iconv_t cd;
+
+ decoded = alloca (len + 1);
+ len = hex_decode (inptr, len, decoded);
+
+ cd = g_mime_iconv_open ("UTF-8", charset);
+ if (cd == (iconv_t) -1) {
+ d(g_warning ("Cannot convert from %s to UTF-8, param display may "
+ "be corrupt: %s", charset, g_strerror (errno)));
+ charset = g_mime_charset_locale_name ();
+ cd = g_mime_iconv_open ("UTF-8", charset);
+ if (cd == (iconv_t) -1)
+ return NULL;
+ }
+
+ udecoded = g_mime_iconv_strndup (cd, decoded, len);
+ g_mime_iconv_close (cd);
+
+ if (!udecoded) {
+ d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be "
+ "corrupt: %s", len, decoded, g_strerror (errno)));
+ }
+
+ decoded = udecoded;
+ } else {
+ decoded = g_malloc (len + 1);
+ hex_decode (inptr, len, decoded);
+ }
+ }
return decoded;
}
@@ -202,11 +238,11 @@ decode_quoted_string (const char **in)
if (*inptr == '"') {
start++;
- out = g_strndup (start, (unsigned) (inptr - start));
+ out = g_strndup (start, (unsigned int) (inptr - start));
inptr++;
} else {
/* string wasn't properly quoted */
- out = g_strndup (start, (unsigned) (inptr - start));
+ out = g_strndup (start, (unsigned int) (inptr - start));
}
}
@@ -228,7 +264,7 @@ decode_token (const char **in)
inptr++;
if (inptr > start) {
*in = inptr;
- return g_strndup (start, (unsigned) (inptr - start));
+ return g_strndup (start, (unsigned int) (inptr - start));
} else {
return NULL;
}
@@ -266,7 +302,7 @@ decode_param_token (const char **in)
inptr++;
if (inptr > start) {
*in = inptr;
- return g_strndup (start, (unsigned) (inptr - start));
+ return g_strndup (start, (unsigned int) (inptr - start));
} else {
return NULL;
}
@@ -324,6 +360,7 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218
{
gboolean is_rfc2184_encoded = FALSE;
gboolean is_rfc2184 = FALSE;
+ gboolean valid_utf8 = FALSE;
const char *inptr = *in;
char *param, *value = NULL;
int rfc2184_part = -1;
@@ -348,6 +385,7 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218
val = rfc2184_decode (value, strlen (value));
if (val) {
+ valid_utf8 = TRUE;
g_free (value);
value = val;
}
@@ -366,9 +404,29 @@ decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc218
val = g_mime_utils_8bit_header_decode (value);
if (val) {
+ valid_utf8 = TRUE;
g_free (value);
value = val;
}
+ } else {
+ if (gmime_interfaces_utf8)
+ valid_utf8 = !g_mime_utils_text_is_8bit (value, strlen (value));
+ }
+ }
+
+ if (gmime_interfaces_utf8 && value && !valid_utf8) {
+ /* A (broken) mailer has sent us an unencoded 8bit value.
+ * Attempt to save it by assuming it's in the user's
+ * locale and converting to UTF-8 */
+ char *buf;
+
+ buf = g_mime_iconv_locale_to_utf8 (value);
+ if (buf) {
+ g_free (value);
+ value = buf;
+ } else {
+ d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s",
+ param, value, g_strerror (errno)));
}
}
@@ -399,8 +457,12 @@ decode_param_list (const char **in)
char *name, *value;
/* invalid format? */
- if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0)
+ if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0) {
+ if (*inptr == ';') {
+ continue;
+ }
break;
+ }
if (is_rfc2184 && tail && !strcasecmp (name, tail->name)) {
/* rfc2184 allows a parameter to be broken into multiple parts
@@ -571,15 +633,15 @@ g_mime_param_append_param (GMimeParam *params, GMimeParam *param)
static char *
encode_param (const unsigned char *in, gboolean *encoded)
{
- const unsigned char *inptr;
- char *outstr, *charset;
- int encoding;
+ register const unsigned char *inptr;
+ unsigned char *outbuf = NULL;
+ iconv_t cd = (iconv_t) -1;
+ const char *charset = NULL;
+ char *outstr;
GString *out;
*encoded = FALSE;
- g_return_val_if_fail (in != NULL, NULL);
-
for (inptr = in; *inptr && inptr - in < GMIME_FOLD_LEN; inptr++)
if (*inptr > 127)
break;
@@ -587,29 +649,53 @@ encode_param (const unsigned char *in, gboolean *encoded)
if (*inptr == '\0')
return g_strdup (in);
+ if (*inptr > 127) {
+ if (gmime_interfaces_utf8)
+ charset = g_mime_charset_best (in, strlen (in));
+ else
+ charset = g_mime_charset_locale_name ();
+ }
+
+ if (!charset)
+ charset = "iso-8859-1";
+
+ if (gmime_interfaces_utf8) {
+ if (strcasecmp (charset, "UTF-8") != 0) {
+ charset = g_mime_charset_name (charset);
+ cd = g_mime_iconv_open (charset, "UTF-8");
+ }
+
+ if (cd == (iconv_t) -1)
+ charset = "UTF-8";
+ }
+
+ if (cd != (iconv_t) -1) {
+ outbuf = g_mime_iconv_strdup (cd, in);
+ g_mime_iconv_close (cd);
+ inptr = outbuf;
+ } else {
+ inptr = in;
+ }
+
+ /* FIXME: set the 'language' as well, assuming we can get that info...? */
out = g_string_new ("");
- inptr = in;
- encoding = 0;
+ g_string_sprintfa (out, "%s''", charset);
+
while (inptr && *inptr) {
- unsigned int c = *inptr++ & 0xff;
+ unsigned char c = *inptr++;
+
+ /* FIXME: make sure that '\'', '*', and ';' are also encoded */
- if (c > 127 && c < 256) {
- encoding = MAX (encoding, 1);
+ if (c > 127) {
g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
} else if (is_lwsp (c) || !(gmime_special_table[c] & IS_ESAFE)) {
g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
} else {
- g_string_append_c (out, (char) c);
+ g_string_append_c (out, c);
}
}
- if (encoding) {
- charset = g_strdup_printf ("%s''", g_mime_charset_locale_name ());
- g_string_prepend (out, charset);
- g_free (charset);
- } else {
- g_string_prepend (out, "iso-8859-1''");
- }
+ g_free (outbuf);
outstr = out->str;
g_string_free (out, FALSE);
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index ed01055b..ae202aae 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -30,6 +30,7 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include <errno.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
@@ -40,8 +41,9 @@
#include "gmime-part.h"
#include "gmime-charset.h"
#include "gmime-iconv.h"
+#include "gmime-iconv-utils.h"
-#define d(x)
+#define d(x) x
#ifndef HAVE_ISBLANK
#define isblank(c) (c == ' ' || c == '\t')
@@ -67,6 +69,9 @@
#define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
#define DATE_TOKEN_HAS_SIGN (1 << 7)
+/* from gmime.c */
+extern int gmime_interfaces_utf8;
+
static char *base64_alphabet =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@@ -994,19 +999,18 @@ quoted_decode (const unsigned char *in, size_t len, unsigned char *out)
return -1;
}
-#define is_8bit_word_encoded(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
+#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
static unsigned char *
-decode_encoded_8bit_word (const unsigned char *word)
+rfc2047_decode_word (const unsigned char *in, size_t inlen)
{
const register unsigned char *inptr;
const unsigned char *inend;
- size_t len;
- len = strlen (word);
+ inptr = in + 2;
+ inend = in + inlen - 2;
- inptr = word + 2;
- inend = word + len - 2;
+ d(fprintf (stderr, "decoding %.*s\n", inlen, in));
inptr = memchr (inptr, '?', inend - inptr);
if (inptr && inptr[2] == '?') {
@@ -1015,16 +1019,14 @@ decode_encoded_8bit_word (const unsigned char *word)
int state = 0;
int save = 0;
- d(fprintf (stderr, "encoding is '%c'\n", inptr[0]));
-
inptr++;
+
switch (*inptr) {
case 'B':
case 'b':
inptr += 2;
decoded = alloca (inend - inptr);
declen = g_mime_utils_base64_decode_step (inptr, inend - inptr, decoded, &state, &save);
- return g_strndup (decoded, declen);
break;
case 'Q':
case 'q':
@@ -1036,13 +1038,63 @@ decode_encoded_8bit_word (const unsigned char *word)
d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
return NULL;
}
-
- return g_strndup (decoded, declen);
break;
default:
d(fprintf (stderr, "unknown encoding\n"));
return NULL;
}
+
+ if (gmime_interfaces_utf8) {
+ const char *charset;
+ unsigned char *buf;
+ char *charenc, *p;
+ size_t len;
+ iconv_t cd;
+
+ len = (inptr - 3) - (in + 2);
+ charenc = alloca (len + 1);
+ memcpy (charenc, in + 2, len);
+ charenc[len] = '\0';
+
+ /* rfc2231 updates rfc2047 encoded words...
+ * The ABNF given in RFC 2047 for encoded-words is:
+ * encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
+ * This specification changes this ABNF to:
+ * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
+ */
+
+ /* trim off the 'language' part if it's there... */
+ p = strchr (charenc, '*');
+ if (p)
+ *p = '\0';
+
+ /* slight optimization */
+ if (!strcasecmp (charset, "UTF-8"))
+ return g_strndup (decoded, declen);
+
+ charset = g_mime_charset_name (charenc);
+ cd = g_mime_iconv_open ("UTF-8", charset);
+ if (cd == (iconv_t) -1) {
+ d(g_warning ("Cannot convert from %s to UTF-8, header display may "
+ "be corrupt: %s", charset, g_strerror (errno)));
+ charset = g_mime_charset_locale_name ();
+ cd = g_mime_iconv_open ("UTF-8", charset);
+ if (cd == (iconv_t) -1)
+ return NULL;
+ }
+
+ buf = g_mime_iconv_strndup (cd, decoded, declen);
+ g_mime_iconv_close (cd);
+
+ if (!buf) {
+ d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be "
+ "corrupt: %s", declen, decoded, g_strerror (errno)));
+ }
+
+ return buf;
+ } else {
+ return g_strndup (decoded, declen);
+ }
}
return NULL;
@@ -1080,8 +1132,8 @@ g_mime_utils_8bit_header_decode (const unsigned char *in)
const unsigned char *word;
gboolean was_encoded;
- if ((was_encoded = is_8bit_word_encoded (atom->str, atom->len)))
- word = dword = decode_encoded_8bit_word (atom->str);
+ if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len)))
+ word = dword = rfc2047_decode_word (atom->str, atom->len);
else
word = atom->str;
@@ -1135,8 +1187,8 @@ g_mime_utils_8bit_header_decode (const unsigned char *in)
const unsigned char *word;
gboolean was_encoded;
- if ((was_encoded = is_8bit_word_encoded (atom->str, atom->len)))
- word = dword = decode_encoded_8bit_word (atom->str);
+ if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len)))
+ word = dword = rfc2047_decode_word (atom->str, atom->len);
else
word = atom->str;
@@ -1195,16 +1247,46 @@ quoted_encode (const unsigned char *in, size_t len, unsigned char *out, gushort
}
static char *
-encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_was_encoded)
+rfc2047_encode_word (const unsigned char *word, gushort safemask)
{
unsigned char *encoded, *ptr;
+ unsigned char *uword = NULL;
+ iconv_t cd = (iconv_t) -1;
size_t enclen, pos, len;
+ const char *charset;
int state = 0;
int save = 0;
char encoding;
len = strlen (word);
+ if (gmime_interfaces_utf8) {
+ charset = g_mime_charset_best (word, len);
+ if (!charset)
+ charset = "iso-8859-1";
+ } else {
+ charset = g_mime_charset_locale_name ();
+ }
+
+ if (gmime_interfaces_utf8) {
+ if (strcasecmp (charset, "UTF-8") != 0) {
+ charset = g_mime_charset_name (charset);
+ cd = g_mime_iconv_open (charset, "UTF-8");
+ }
+
+ if (cd != (iconv_t) -1) {
+ uword = g_mime_iconv_strndup (cd, word, len);
+ g_mime_iconv_close (cd);
+ }
+
+ if (uword) {
+ len = strlen (uword);
+ word = uword;
+ } else {
+ charset = "UTF-8";
+ }
+ }
+
switch (g_mime_utils_best_encoding (word, len)) {
case GMIME_PART_ENCODING_BASE64:
enclen = BASE64_ENCODE_LEN (len);
@@ -1232,16 +1314,16 @@ encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_wa
break;
default:
- if (this_was_encoded)
- *this_was_encoded = FALSE;
-
- return g_strdup (word);
+ g_assert_not_reached ();
}
- if (this_was_encoded)
- *this_was_encoded = TRUE;
+ g_free (uword);
+
+ uword = g_strdup_printf ("=?%s?%c?%s?=", charset, encoding, encoded);
- return g_strdup_printf ("=?%s?%c?%s?=", g_mime_charset_locale_name (), encoding, encoded);
+ fprintf (stderr, "resultant encoded word: %s\n", uword);
+
+ return uword;
}
@@ -1257,7 +1339,7 @@ encode_8bit_word (const unsigned char *word, gushort safemask, gboolean *this_wa
char *
g_mime_utils_8bit_header_encode_phrase (const unsigned char *in)
{
- return encode_8bit_word (in, IS_PSAFE, NULL);
+ return rfc2047_encode_word (in, IS_PSAFE);
}
@@ -1276,29 +1358,29 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
register const unsigned char *inptr;
GString *out, *word, *lwsp;
unsigned char *encoded;
- gboolean is8bit = FALSE;
+ gboolean encode_word = FALSE;
gboolean last_was_encoded = FALSE;
gboolean last_was_space = FALSE;
out = g_string_new ("");
word = g_string_new ("");
lwsp = g_string_new ("");
- inptr = (guchar *) in;
+
+ inptr = in;
while (inptr && *inptr) {
unsigned char c = *inptr++;
if (isspace (c) && !last_was_space) {
- gboolean this_was_encoded = FALSE;
char *eword;
- if (is8bit)
- eword = encode_8bit_word (word->str, IS_ESAFE, &this_was_encoded);
+ if (encode_word)
+ eword = rfc2047_encode_word (word->str, IS_ESAFE);
else
- eword = g_strdup (word->str);
+ eword = word->str;
/* append any whitespace */
- if (last_was_encoded && this_was_encoded) {
+ if (last_was_encoded && encode_word) {
/* we need to encode the whitespace */
unsigned char *elwsp;
size_t len;
@@ -1307,20 +1389,22 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
len = quoted_encode (lwsp->str, lwsp->len, elwsp, IS_SPACE);
elwsp[len] = '\0';
- g_string_sprintfa (out, " =?%s?q?%s?= ", g_mime_charset_locale_name (), elwsp);
+ g_string_sprintfa (out, " =?iso-8859-1?q?%s?= ", elwsp);
} else {
g_string_append (out, lwsp->str);
}
- /* append the encoded word */
+ /* append the (encoded) word */
g_string_append (out, eword);
- g_free (eword);
+
+ if (encode_word)
+ g_free (eword);
g_string_truncate (lwsp, 0);
g_string_truncate (word, 0);
- last_was_encoded = this_was_encoded;
- is8bit = FALSE;
+ last_was_encoded = encode_word;
+ encode_word = FALSE;
}
if (isspace (c)) {
@@ -1328,7 +1412,7 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
last_was_space = TRUE;
} else {
if (c > 127)
- is8bit = TRUE;
+ encode_word = TRUE;
g_string_append_c (word, c);
last_was_space = FALSE;
@@ -1336,16 +1420,15 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
}
if (word->len || lwsp->len) {
- gboolean this_was_encoded = FALSE;
char *eword;
- if (is8bit)
- eword = encode_8bit_word (word->str, IS_ESAFE, &this_was_encoded);
+ if (encode_word)
+ eword = rfc2047_encode_word (word->str, IS_ESAFE);
else
- eword = g_strdup (word->str);
+ eword = word->str;
/* append any whitespace */
- if (last_was_encoded && this_was_encoded) {
+ if (last_was_encoded && encode_word) {
/* we need to encode the whitespace */
unsigned char *elwsp;
size_t len;
@@ -1354,14 +1437,16 @@ g_mime_utils_8bit_header_encode (const unsigned char *in)
len = quoted_encode (lwsp->str, lwsp->len, elwsp, IS_SPACE);
elwsp[len] = '\0';
- g_string_sprintfa (out, " =?%s?q?%s?= ", g_mime_charset_locale_name (), elwsp);
+ g_string_sprintfa (out, " =?iso-8859-1?q?%s?= ", elwsp);
} else {
g_string_append (out, lwsp->str);
}
/* append the encoded word */
g_string_append (out, eword);
- g_free (eword);
+
+ if (encode_word)
+ g_free (eword);
}
g_string_free (lwsp, TRUE);
diff --git a/gmime/gmime.c b/gmime/gmime.c
new file mode 100644
index 00000000..20c422c9
--- /dev/null
+++ b/gmime/gmime.c
@@ -0,0 +1,56 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Authors: Jeffrey Stedfast <fejj@ximian.com>
+ *
+ * Copyright 2002 Ximain, Inc. (www.ximian.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "gmime.h"
+
+
+int gmime_interfaces_utf8 = FALSE;
+
+
+/**
+ * g_mime_init:
+ * @flags: initialization flags
+ *
+ * Initailizes GMime.
+ **/
+void
+g_mime_init (guint32 flags)
+{
+ static int initialized = FALSE;
+
+ if (initialized)
+ return;
+
+ initialized = TRUE;
+
+ if (flags & GMIME_INIT_FLAG_UTF8)
+ gmime_interfaces_utf8 = TRUE;
+
+ g_mime_charset_init ();
+
+ g_mime_iconv_init ();
+}
diff --git a/gmime/gmime.h.in b/gmime/gmime.h.in
index 66fa4268..cc586168 100644
--- a/gmime/gmime.h.in
+++ b/gmime/gmime.h.in
@@ -51,6 +51,7 @@
#include "gmime-filter-crlf.h"
#include "gmime-filter-from.h"
#include "gmime-filter-html.h"
+#include "gmime-filter-yenc.h"
/* GMIME version */
static const guint gmime_major_version = @GMIME_MAJOR_VERSION@;
@@ -65,4 +66,8 @@ static const guint gmime_binary_age = 0;
(gmime_major_version == (major) && gmime_minor_version == (minor) && \
gmime_micro_version >= (micro)))
+#define GMIME_INIT_FLAG_UTF8 (1 << 0)
+
+void g_mime_init (guint32 flags);
+
#endif /* __GMIME_H__ */
diff --git a/gmime/internet-address.c b/gmime/internet-address.c
index cc859774..12fa48f0 100644
--- a/gmime/internet-address.c
+++ b/gmime/internet-address.c
@@ -27,15 +27,20 @@
#include <string.h>
#include <ctype.h>
+#include <errno.h>
#include "internet-address.h"
#include "gmime-table-private.h"
#include "gmime-utils.h"
+#include "gmime-iconv-utils.h"
#define w(x) x
+extern int gmime_interfaces_utf8;
+
+
/**
* internet_address_new:
*
@@ -558,48 +563,6 @@ decode_quoted_string (const char **in)
return out;
}
-#if 0
-static char *
-decode_quoted_string (const char **in)
-{
- const char *inptr = *in;
- char *out = NULL, *outptr;
- int outlen;
- int c;
-
- decode_lwsp (&inptr);
- if (*inptr == '"') {
- const char *intmp;
- int skip = 0;
-
- /* first, calc length */
- inptr++;
- intmp = inptr + 1;
- while ((c = *intmp++) && c != '"') {
- if (c == '\\' && *intmp) {
- intmp++;
- skip++;
- }
- }
-
- outlen = intmp - inptr - skip;
- out = outptr = g_malloc (outlen + 1);
-
- while ((c = *inptr++) && c != '"') {
- if (c == '\\' && *inptr) {
- c = *inptr++;
- }
- *outptr++ = c;
- }
- *outptr = '\0';
- }
-
- *in = inptr;
-
- return out;
-}
-#endif
-
static char *
decode_atom (const char **in)
{
@@ -867,8 +830,26 @@ decode_mailbox (const char **in)
*in = inptr;
- if (addr->len)
+ if (addr->len) {
+ if (gmime_interfaces_utf8 && name && g_mime_utils_text_is_8bit (name->str, name->len)) {
+ /* A (broken) mailer has sent us an unencoded 8bit value.
+ * Attempt to save it by assuming it's in the user's
+ * locale and converting to UTF-8 */
+ char *buf;
+
+ buf = g_mime_iconv_locale_to_utf8 (name->str);
+ if (buf) {
+ g_string_truncate (name, 0);
+ g_string_append (name, buf);
+ g_free (buf);
+ } else {
+ (g_warning ("Failed to convert \"%s\" to UTF-8: %s",
+ name->str, g_strerror (errno)));
+ }
+ }
+
mailbox = internet_address_new_name (name ? name->str : NULL, addr->str);
+ }
g_string_free (addr, TRUE);
if (name)
diff --git a/internet-address.c b/internet-address.c
index cc859774..12fa48f0 100644
--- a/internet-address.c
+++ b/internet-address.c
@@ -27,15 +27,20 @@
#include <string.h>
#include <ctype.h>
+#include <errno.h>
#include "internet-address.h"
#include "gmime-table-private.h"
#include "gmime-utils.h"
+#include "gmime-iconv-utils.h"
#define w(x) x
+extern int gmime_interfaces_utf8;
+
+
/**
* internet_address_new:
*
@@ -558,48 +563,6 @@ decode_quoted_string (const char **in)
return out;
}
-#if 0
-static char *
-decode_quoted_string (const char **in)
-{
- const char *inptr = *in;
- char *out = NULL, *outptr;
- int outlen;
- int c;
-
- decode_lwsp (&inptr);
- if (*inptr == '"') {
- const char *intmp;
- int skip = 0;
-
- /* first, calc length */
- inptr++;
- intmp = inptr + 1;
- while ((c = *intmp++) && c != '"') {
- if (c == '\\' && *intmp) {
- intmp++;
- skip++;
- }
- }
-
- outlen = intmp - inptr - skip;
- out = outptr = g_malloc (outlen + 1);
-
- while ((c = *inptr++) && c != '"') {
- if (c == '\\' && *inptr) {
- c = *inptr++;
- }
- *outptr++ = c;
- }
- *outptr = '\0';
- }
-
- *in = inptr;
-
- return out;
-}
-#endif
-
static char *
decode_atom (const char **in)
{
@@ -867,8 +830,26 @@ decode_mailbox (const char **in)
*in = inptr;
- if (addr->len)
+ if (addr->len) {
+ if (gmime_interfaces_utf8 && name && g_mime_utils_text_is_8bit (name->str, name->len)) {
+ /* A (broken) mailer has sent us an unencoded 8bit value.
+ * Attempt to save it by assuming it's in the user's
+ * locale and converting to UTF-8 */
+ char *buf;
+
+ buf = g_mime_iconv_locale_to_utf8 (name->str);
+ if (buf) {
+ g_string_truncate (name, 0);
+ g_string_append (name, buf);
+ g_free (buf);
+ } else {
+ (g_warning ("Failed to convert \"%s\" to UTF-8: %s",
+ name->str, g_strerror (errno)));
+ }
+ }
+
mailbox = internet_address_new_name (name ? name->str : NULL, addr->str);
+ }
g_string_free (addr, TRUE);
if (name)
diff --git a/test-mime.c b/test-mime.c
index ae24227b..a2d0a421 100644
--- a/test-mime.c
+++ b/test-mime.c
@@ -422,6 +422,8 @@ test_date (void)
int main (int argc, char *argv[])
{
+ g_mime_init (GMIME_INIT_FLAG_UTF8);
+
test_date ();
test_onepart ();
diff --git a/test-parser.c b/test-parser.c
index 957acbfa..c1fb58ea 100644
--- a/test-parser.c
+++ b/test-parser.c
@@ -124,6 +124,8 @@ int main (int argc, char **argv)
if (fd == -1)
return 0;
+ g_mime_init (GMIME_INIT_FLAG_UTF8);
+
#ifdef STREAM_MMAP
stream = g_mime_stream_mmap_new (fd, PROT_READ, MAP_PRIVATE);
g_assert (stream != NULL);
diff --git a/tests/test-mime.c b/tests/test-mime.c
index ae24227b..a2d0a421 100644
--- a/tests/test-mime.c
+++ b/tests/test-mime.c
@@ -422,6 +422,8 @@ test_date (void)
int main (int argc, char *argv[])
{
+ g_mime_init (GMIME_INIT_FLAG_UTF8);
+
test_date ();
test_onepart ();
diff --git a/tests/test-parser.c b/tests/test-parser.c
index 957acbfa..c1fb58ea 100644
--- a/tests/test-parser.c
+++ b/tests/test-parser.c
@@ -124,6 +124,8 @@ int main (int argc, char **argv)
if (fd == -1)
return 0;
+ g_mime_init (GMIME_INIT_FLAG_UTF8);
+
#ifdef STREAM_MMAP
stream = g_mime_stream_mmap_new (fd, PROT_READ, MAP_PRIVATE);
g_assert (stream != NULL);