diff options
author | Milan Crha <mcrha@redhat.com> | 2021-05-27 10:39:34 +0200 |
---|---|---|
committer | Milan Crha <mcrha@redhat.com> | 2021-05-27 10:39:34 +0200 |
commit | 7c7bab74fab6d01d2ebdecf2afd1b0d31d2abc7f (patch) | |
tree | 41ca381f0800c0a1149144a04e6db0a04632e488 | |
parent | 686b9264f1df86eda95ecc76d0bd2167e123be66 (diff) | |
download | evolution-7c7bab74fab6d01d2ebdecf2afd1b0d31d2abc7f.tar.gz |
I#1428 - Can't import UTF-16 encoded .ics files
Closes https://gitlab.gnome.org/GNOME/evolution/-/issues/1428
-rw-r--r-- | src/addressbook/importers/evolution-vcard-importer.c | 183 | ||||
-rw-r--r-- | src/calendar/importers/icalendar-importer.c | 17 | ||||
-rw-r--r-- | src/e-util/e-import.c | 97 | ||||
-rw-r--r-- | src/e-util/e-import.h | 3 |
4 files changed, 124 insertions, 176 deletions
diff --git a/src/addressbook/importers/evolution-vcard-importer.c b/src/addressbook/importers/evolution-vcard-importer.c index 9117a7b150..1bf8d6d4c7 100644 --- a/src/addressbook/importers/evolution-vcard-importer.c +++ b/src/addressbook/importers/evolution-vcard-importer.c @@ -40,15 +40,6 @@ #include "evolution-addressbook-importers.h" -enum _VCardEncoding { - VCARD_ENCODING_NONE, - VCARD_ENCODING_UTF8, - VCARD_ENCODING_UTF16, - VCARD_ENCODING_LOCALE -}; - -typedef enum _VCardEncoding VCardEncoding; - typedef struct { EImport *import; EImportTarget *target; @@ -67,7 +58,6 @@ typedef struct { /* when opening book */ gchar *contents; - VCardEncoding encoding; } VCardImporter; static void vcard_import_done (VCardImporter *gci); @@ -250,110 +240,6 @@ vcard_import_contacts (gpointer data) } } -#define BOM (gunichar2)0xFEFF -#define ANTIBOM (gunichar2)0xFFFE - -static gboolean -has_bom (const gunichar2 *utf16) -{ - - if ((utf16 == NULL) || (*utf16 == '\0')) { - return FALSE; - } - - return ((*utf16 == BOM) || (*utf16 == ANTIBOM)); -} - -static void -fix_utf16_endianness (gunichar2 *utf16) -{ - gunichar2 *it; - - if ((utf16 == NULL) || (*utf16 == '\0')) { - return; - } - - if (*utf16 != ANTIBOM) { - return; - } - - for (it = utf16; *it != '\0'; it++) { - *it = GUINT16_SWAP_LE_BE (*it); - } -} - -/* Converts an UTF-16 string to an UTF-8 string removing the BOM character - * WARNING: this may modify the utf16 argument if the function detects the - * string isn't using the local endianness - */ -static gchar * -utf16_to_utf8 (gunichar2 *utf16) -{ - - if (utf16 == NULL) { - return NULL; - } - - fix_utf16_endianness (utf16); - - if (*utf16 == BOM) { - utf16++; - } - - return g_utf16_to_utf8 (utf16, -1, NULL, NULL, NULL); -} - -/* Actually check the contents of this file */ -static VCardEncoding -guess_vcard_encoding (const gchar *filename) -{ - FILE *handle; - gchar line[4096]; - gchar *line_utf8; - VCardEncoding encoding = VCARD_ENCODING_NONE; - - handle = g_fopen (filename, "r"); - if (handle == NULL) { - g_print ("\n"); - return VCARD_ENCODING_NONE; - } - - if (fgets (line, 4096, handle) == NULL) { - fclose (handle); - g_print ("\n"); - return VCARD_ENCODING_NONE; - } - fclose (handle); - - if (has_bom ((gunichar2 *) line)) { - gunichar2 *utf16 = (gunichar2 *) line; - /* Check for a BOM to try to detect UTF-16 encoded vcards - * (MacOSX address book creates such vcards for example) - */ - line_utf8 = utf16_to_utf8 (utf16); - if (line_utf8 == NULL) { - return VCARD_ENCODING_NONE; - } - encoding = VCARD_ENCODING_UTF16; - } else if (g_utf8_validate (line, -1, NULL)) { - line_utf8 = g_strdup (line); - encoding = VCARD_ENCODING_UTF8; - } else { - line_utf8 = g_locale_to_utf8 (line, -1, NULL, NULL, NULL); - if (line_utf8 == NULL) { - return VCARD_ENCODING_NONE; - } - encoding = VCARD_ENCODING_LOCALE; - } - - if (g_ascii_strncasecmp (line_utf8, "BEGIN:VCARD", 11) != 0) { - encoding = VCARD_ENCODING_NONE; - } - - g_free (line_utf8); - return encoding; -} - static void primary_selection_changed_cb (ESourceSelector *selector, EImportTarget *target) @@ -429,7 +315,7 @@ vcard_supported (EImport *ei, EImportImporter *im) { EImportTargetURI *s; - gchar *filename; + gchar *filename, *contents; gboolean retval; if (target->type != E_IMPORT_TARGET_URI) @@ -445,7 +331,9 @@ vcard_supported (EImport *ei, filename = g_filename_from_uri (s->uri_src, NULL, NULL); if (filename == NULL) return FALSE; - retval = (guess_vcard_encoding (filename) != VCARD_ENCODING_NONE); + contents = e_import_util_get_file_contents (filename, NULL); + retval = contents && g_ascii_strncasecmp (contents, "BEGIN:VCARD", 11) == 0; + g_free (contents); g_free (filename); return retval; @@ -482,22 +370,6 @@ book_client_connect_cb (GObject *source_object, } gci->book_client = E_BOOK_CLIENT (client); - - if (gci->encoding == VCARD_ENCODING_UTF16) { - gchar *tmp; - - gunichar2 *contents_utf16 = (gunichar2 *) gci->contents; - tmp = utf16_to_utf8 (contents_utf16); - g_free (gci->contents); - gci->contents = tmp; - - } else if (gci->encoding == VCARD_ENCODING_LOCALE) { - gchar *tmp; - tmp = g_locale_to_utf8 (gci->contents, -1, NULL, NULL, NULL); - g_free (gci->contents); - gci->contents = tmp; - } - gci->contactlist = eab_contact_list_from_string (gci->contents); g_free (gci->contents); gci->contents = NULL; @@ -520,7 +392,6 @@ vcard_import (EImport *ei, EImportTargetURI *s = (EImportTargetURI *) target; gchar *filename; gchar *contents; - VCardEncoding encoding; GError *error = NULL; filename = g_filename_from_uri (s->uri_src, NULL, &error); @@ -530,17 +401,9 @@ vcard_import (EImport *ei, return; } - encoding = guess_vcard_encoding (filename); - if (encoding == VCARD_ENCODING_NONE) { - g_free (filename); - /* This check is superfluous, we've already - * checked otherwise we can't get here ... */ - e_import_complete (ei, target, NULL); - return; - } - - if (!g_file_get_contents (filename, &contents, NULL, &error)) { + contents = e_import_util_get_file_contents (filename, &error); + if (!contents) { g_free (filename); e_import_complete (ei, target, error); g_clear_error (&error); @@ -553,7 +416,6 @@ vcard_import (EImport *ei, g_datalist_set_data (&target->data, "vcard-data", gci); gci->import = g_object_ref (ei); gci->target = target; - gci->encoding = encoding; gci->contents = contents; source = g_datalist_get_data (&target->data, "vcard-source"); @@ -580,44 +442,27 @@ vcard_get_preview (EImport *ei, GtkWidget *preview; GSList *contacts; gchar *contents; - VCardEncoding encoding; EImportTargetURI *s = (EImportTargetURI *) target; gchar *filename; + GError *error = NULL; - filename = g_filename_from_uri (s->uri_src, NULL, NULL); + filename = g_filename_from_uri (s->uri_src, NULL, &error); if (filename == NULL) { - g_message (G_STRLOC ": Couldn't get filename from URI '%s'", s->uri_src); - return NULL; - } - - encoding = guess_vcard_encoding (filename); - if (encoding == VCARD_ENCODING_NONE) { - g_free (filename); + g_message (G_STRLOC ": Couldn't get filename from URI '%s': %s", s->uri_src, error ? error->message : "Unknown error"); + g_clear_error (&error); return NULL; } - if (!g_file_get_contents (filename, &contents, NULL, NULL)) { - g_message (G_STRLOC ": Couldn't read file."); + contents = e_import_util_get_file_contents (filename, &error); + if (!contents) { + g_message (G_STRLOC ": Couldn't read file '%s': %s", filename, error ? error->message : "Unknown error"); + g_clear_error (&error); g_free (filename); return NULL; } g_free (filename); - if (encoding == VCARD_ENCODING_UTF16) { - gchar *tmp; - - gunichar2 *contents_utf16 = (gunichar2 *) contents; - tmp = utf16_to_utf8 (contents_utf16); - g_free (contents); - contents = tmp; - } else if (encoding == VCARD_ENCODING_LOCALE) { - gchar *tmp; - tmp = g_locale_to_utf8 (contents, -1, NULL, NULL, NULL); - g_free (contents); - contents = tmp; - } - contacts = eab_contact_list_from_string (contents); g_free (contents); diff --git a/src/calendar/importers/icalendar-importer.c b/src/calendar/importers/icalendar-importer.c index 97e26c9301..f2ddcaa018 100644 --- a/src/calendar/importers/icalendar-importer.c +++ b/src/calendar/importers/icalendar-importer.c @@ -572,7 +572,8 @@ ical_supported (EImport *ei, if (!filename) return FALSE; - if (g_file_get_contents (filename, &contents, NULL, NULL)) { + contents = e_import_util_get_file_contents (filename, NULL); + if (contents) { ICalComponent *icomp; icomp = e_cal_util_parse_ics_string (contents); @@ -606,7 +607,8 @@ ical_import (EImport *ei, return; } - if (!g_file_get_contents (filename, &contents, NULL, &error)) { + contents = e_import_util_get_file_contents (filename, &error); + if (!contents) { g_free (filename); e_import_complete (ei, target, error); g_clear_error (&error); @@ -640,7 +642,8 @@ ivcal_get_preview (EImport *ei, return NULL; } - if (!g_file_get_contents (filename, &contents, NULL, NULL)) { + contents = e_import_util_get_file_contents (filename, NULL); + if (!contents) { g_free (filename); return NULL; } @@ -707,9 +710,8 @@ vcal_supported (EImport *ei, if (!filename) return FALSE; - /* Z: Wow, this is *efficient* */ - - if (g_file_get_contents (filename, &contents, NULL, NULL)) { + contents = e_import_util_get_file_contents (filename, NULL); + if (contents) { VObject *vcal; ICalComponent *icomp; @@ -770,7 +772,8 @@ load_vcalendar_file (const gchar *filename) defaults.alarm_audio_fmttype = (gchar *) "audio/x-wav"; defaults.alarm_description = (gchar *) _("Reminder!"); - if (g_file_get_contents (filename, &contents, NULL, NULL)) { + contents = e_import_util_get_file_contents (filename, NULL); + if (contents) { VObject *vcal; /* parse the file */ diff --git a/src/e-util/e-import.c b/src/e-util/e-import.c index 62822d7954..0a1b65301a 100644 --- a/src/e-util/e-import.c +++ b/src/e-util/e-import.c @@ -467,6 +467,103 @@ e_import_target_new_home (EImport *import) import, E_IMPORT_TARGET_HOME, sizeof (EImportTargetHome)); } +/** + * e_import_util_get_file_contents: + * @filename: a local file name to read the contents from + * @error: (nullable): a return location for a #GError, or %NULL + * + * Reads the @filename content and returns it in a single-byte encoding. + * + * Returns: (transfer full) (nullable): the file content, or %NULL on error, + * in which case the @error is set. + * + * Since: 3.42 + **/ +gchar * +e_import_util_get_file_contents (const gchar *filename, + GError **error) +{ + gchar *raw_content = NULL; + gsize length = 0; + gunichar2 *utf16; + gboolean is_utf16, is_utf16_swapped; + gchar *res = NULL; + + g_return_val_if_fail (filename != NULL, NULL); + + if (!g_file_get_contents (filename, &raw_content, &length, error)) + return NULL; + + if (length < 2) + return raw_content; + + utf16 = (gunichar2 *) raw_content; + + /* check the UTF-16 BOM */ + is_utf16 = *utf16 == ((gunichar2) 0xFEFF); + is_utf16_swapped = *utf16 == ((gunichar2) 0xFFFE); + + if (length > 4 && !is_utf16 && !is_utf16_swapped) { + /* Only guess it can be UTF-16 without the leading BOM, which can fail + when the first two characters are encoded into multiple bytes... */ + is_utf16 = utf16[0] && !(utf16[0] & 0xFF00) && utf16[1] && !(utf16[1] & 0xFF00); + is_utf16_swapped = utf16[0] && !(utf16[0] & 0xFF) && utf16[1] && !(utf16[1] & 0xFF); + } + + if (is_utf16 || is_utf16_swapped) { + glong len = length / 2; + + /* Swap the bytes, to match the local endianness */ + if (is_utf16_swapped) { + gunichar2 *pos_str; + gsize npos; + + for (npos = 0, pos_str = utf16; npos < len; npos++, pos_str++) { + *pos_str = GUINT16_SWAP_LE_BE (*pos_str); + } + } + + if (*utf16 == ((gunichar2) 0xFEFF)) { + utf16++; + len--; + } + + res = g_utf16_to_utf8 (utf16, len, NULL, NULL, NULL); + + if (res) { + g_free (raw_content); + return res; + } + + /* Return back any changes */ + if (len != length / 2) { + utf16--; + len++; + } + + if (is_utf16_swapped) { + gunichar2 *pos_str; + gsize npos; + + for (npos = 0, pos_str = utf16; npos < len; npos++, pos_str++) { + *pos_str = GUINT16_SWAP_LE_BE (*pos_str); + } + } + } + + if (g_utf8_validate (raw_content, -1, NULL)) + return raw_content; + + res = g_locale_to_utf8 (raw_content, length, NULL, NULL, NULL); + + if (res) + g_free (raw_content); + else + res = raw_content; + + return res; +} + /* ********************************************************************** */ /* Import menu plugin handler */ diff --git a/src/e-util/e-import.h b/src/e-util/e-import.h index fd85a7ecff..1f9fef0974 100644 --- a/src/e-util/e-import.h +++ b/src/e-util/e-import.h @@ -245,6 +245,9 @@ EImportTargetURI * EImportTargetHome * e_import_target_new_home (EImport *import); +gchar * e_import_util_get_file_contents (const gchar *filename, + GError **error); + /* ********************************************************************** */ /* import plugin target, they are closely integrated */ |