diff options
Diffstat (limited to 'src/libtracker-extract/tracker-utils.c')
-rw-r--r-- | src/libtracker-extract/tracker-utils.c | 979 |
1 files changed, 0 insertions, 979 deletions
diff --git a/src/libtracker-extract/tracker-utils.c b/src/libtracker-extract/tracker-utils.c deleted file mode 100644 index 500234c21..000000000 --- a/src/libtracker-extract/tracker-utils.c +++ /dev/null @@ -1,979 +0,0 @@ -/* - * Copyright (C) 2009, Nokia <ivan.frade@nokia.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" - -#define _XOPEN_SOURCE -#define _XOPEN_SOURCE_EXTENDED 1 /* strptime is XPG4v2 */ - -#include <time.h> -#include <string.h> -#include <stdio.h> - -#include <libtracker-common/tracker-utils.h> -#include <libtracker-common/tracker-date-time.h> - -#include "tracker-utils.h" - -#ifndef HAVE_GETLINE - -#include <stddef.h> -#include <stdlib.h> -#include <limits.h> -#include <errno.h> - -#undef getdelim -#undef getline - -#define GROW_BY 80 - -#endif /* HAVE_GETLINE */ - -#define DATE_FORMAT_ISO8601 "%Y-%m-%dT%H:%M:%S%z" - -/** - * SECTION:tracker-utils - * @title: Data utilities - * @short_description: Functions for coalescing, merging, date - * handling and normalizing - * @stability: Stable - * @include: libtracker-extract/tracker-extract.h - * - * This API is provided to facilitate common more general functions - * which extractors may find useful. These functions are also used by - * the in-house extractors quite frequently. - **/ - -static const char *months[] = { - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" -}; - -static const char imonths[] = { - '1', '2', '3', '4', '5', - '6', '7', '8', '9', '0', '1', '2' -}; - - -/** - * tracker_coalesce_strip: - * @n_values: the number of @... supplied - * @...: the string pointers to coalesce - * - * This function iterates through a series of string pointers passed - * using @... and returns the first which is not %NULL, not empty - * (i.e. "") and not comprised of one or more spaces (i.e. " "). - * - * The returned value is stripped using g_strstrip(). It is MOST - * important NOT to pass constant string pointers to this function! - * - * Returns: the first string pointer from those provided which - * matches, otherwise %NULL. - * - * Since: 0.10 - **/ -const gchar * -tracker_coalesce_strip (gint n_values, - ...) -{ - va_list args; - gint i; - const gchar *result = NULL; - - va_start (args, n_values); - - for (i = 0; i < n_values; i++) { - gchar *value; - - value = va_arg (args, gchar *); - if (!result && !tracker_is_blank_string (value)) { - result = (const gchar *) g_strstrip (value); - break; - } - } - - va_end (args); - - return result; -} - -// LCOV_EXCL_START - -/** - * tracker_coalesce: - * @n_values: the number of @Varargs supplied - * @...: the string pointers to coalesce - * - * This function iterates through a series of string pointers passed - * using @... and returns the first which is not %NULL, not empty - * (i.e. "") and not comprised of one or more spaces (i.e. " "). - * - * The returned value is stripped using g_strstrip(). All other values - * supplied are freed. It is MOST important NOT to pass constant - * string pointers to this function! - * - * Returns: the first string pointer from those provided which - * matches, otherwise %NULL. - * - * Since: 0.8 - * - * Deprecated: 0.10: Use tracker_coalesce_strip() instead. - * - **/ -gchar * -tracker_coalesce (gint n_values, - ...) -{ - va_list args; - gint i; - gchar *result = NULL; - - va_start (args, n_values); - - for (i = 0; i < n_values; i++) { - gchar *value; - - value = va_arg (args, gchar *); - if (!result && !tracker_is_blank_string (value)) { - result = g_strstrip (value); - } else { - g_free (value); - } - } - - va_end (args); - - return result; -} -// LCOV_EXCL_STOP - -/** - * tracker_merge_const: - * @delimiter: the delimiter to use when merging - * @n_values: the number of @... supplied - * @...: the string pointers to merge - * - * This function iterates through a series of string pointers passed - * using @... and returns a newly allocated string of the merged - * strings. - * - * The @delimiter can be %NULL. If specified, it will be used in - * between each merged string in the result. - * - * Returns: a newly-allocated string holding the result which should - * be freed with g_free() when finished with, otherwise %NULL. - * - * Since: 0.10 - **/ -gchar * -tracker_merge_const (const gchar *delimiter, - gint n_values, - ...) -{ - va_list args; - gint i; - GString *str = NULL; - - va_start (args, n_values); - - for (i = 0; i < n_values; i++) { - gchar *value; - - value = va_arg (args, gchar *); - if (value) { - if (!str) { - str = g_string_new (value); - } else { - if (delimiter) { - g_string_append (str, delimiter); - } - g_string_append (str, value); - } - } - } - - va_end (args); - - if (!str) { - return NULL; - } - - return g_string_free (str, FALSE); -} - -// LCOV_EXCL_START - -/** - * tracker_merge: - * @delimiter: the delimiter to use when merging - * @n_values: the number of @... supplied - * @...: the string pointers to merge - * - * This function iterates through a series of string pointers passed - * using @... and returns a newly allocated string of the merged - * strings. All passed strings are freed (don't pass const values)/ - * - * The @delimiter can be %NULL. If specified, it will be used in - * between each merged string in the result. - * - * Returns: a newly-allocated string holding the result which should - * be freed with g_free() when finished with, otherwise %NULL. - * - * Since: 0.8 - * - * Deprecated: 0.10: Use tracker_merge_const() instead. - **/ -gchar * -tracker_merge (const gchar *delimiter, - gint n_values, - ...) -{ - va_list args; - gint i; - GString *str = NULL; - - va_start (args, n_values); - - for (i = 0; i < n_values; i++) { - gchar *value; - - value = va_arg (args, gchar *); - if (value) { - if (!str) { - str = g_string_new (value); - } else { - if (delimiter) { - g_string_append (str, delimiter); - } - g_string_append (str, value); - } - g_free (value); - } - } - - va_end (args); - - if (!str) { - return NULL; - } - - return g_string_free (str, FALSE); -} - -/** - * tracker_text_normalize: - * @text: the text to normalize - * @max_words: the maximum words of @text to normalize - * @n_words: the number of words actually normalized - * - * This function iterates through @text checking for UTF-8 validity - * using g_utf8_get_char_validated(). For each character found, the - * %GUnicodeType is checked to make sure it is one fo the following - * values: - * <itemizedlist> - * <listitem><para>%G_UNICODE_LOWERCASE_LETTER</para></listitem> - * <listitem><para>%G_UNICODE_MODIFIER_LETTER</para></listitem> - * <listitem><para>%G_UNICODE_OTHER_LETTER</para></listitem> - * <listitem><para>%G_UNICODE_TITLECASE_LETTER</para></listitem> - * <listitem><para>%G_UNICODE_UPPERCASE_LETTER</para></listitem> - * </itemizedlist> - * - * All other symbols, punctuation, marks, numbers and separators are - * stripped. A regular space (i.e. " ") is used to separate the words - * in the returned string. - * - * The @n_words can be %NULL. If specified, it will be populated with - * the number of words that were normalized in the result. - * - * Returns: a newly-allocated string holding the result which should - * be freed with g_free() when finished with, otherwise %NULL. - * - * Since: 0.8 - * - * Deprecated: 0.10: Use tracker_text_validate_utf8() instead. - **/ -gchar * -tracker_text_normalize (const gchar *text, - guint max_words, - guint *n_words) -{ - GString *string; - gboolean in_break = TRUE; - gunichar ch; - gint words = 0; - - string = g_string_new (NULL); - - while ((ch = g_utf8_get_char_validated (text, -1)) > 0) { - GUnicodeType type; - - type = g_unichar_type (ch); - - if (type == G_UNICODE_LOWERCASE_LETTER || - type == G_UNICODE_MODIFIER_LETTER || - type == G_UNICODE_OTHER_LETTER || - type == G_UNICODE_TITLECASE_LETTER || - type == G_UNICODE_UPPERCASE_LETTER) { - /* Append regular chars */ - g_string_append_unichar (string, ch); - in_break = FALSE; - } else if (!in_break) { - /* Non-regular char found, treat as word break */ - g_string_append_c (string, ' '); - in_break = TRUE; - words++; - - if (words > max_words) { - break; - } - } - - text = g_utf8_find_next_char (text, NULL); - } - - if (n_words) { - if (!in_break) { - /* Count the last word */ - words += 1; - } - *n_words = words; - } - - return g_string_free (string, FALSE); -} - -// LCOV_EXCL_STOP - -/** - * tracker_text_validate_utf8: - * @text: the text to validate - * @text_len: length of @text, or -1 if NUL-terminated - * @str: the string where to place the validated UTF-8 characters, or %NULL if - * not needed. - * @valid_len: Output number of valid UTF-8 bytes found, or %NULL if not needed - * - * This function iterates through @text checking for UTF-8 validity - * using g_utf8_validate(), appends the first chunk of valid characters - * to @str, and gives the number of valid UTF-8 bytes in @valid_len. - * - * Returns: %TRUE if some bytes were found to be valid, %FALSE otherwise. - * - * Since: 0.10 - **/ -gboolean -tracker_text_validate_utf8 (const gchar *text, - gssize text_len, - GString **str, - gsize *valid_len) -{ - gsize len_to_validate; - - g_return_val_if_fail (text, FALSE); - - len_to_validate = text_len >= 0 ? text_len : strlen (text); - - if (len_to_validate > 0) { - const gchar *end = text; - - /* Validate string, getting the pointer to first non-valid character - * (if any) or to the end of the string. */ - g_utf8_validate (text, len_to_validate, &end); - if (end > text) { - /* If str output required... */ - if (str) { - /* Create string to output if not already as input */ - *str = (*str == NULL ? - g_string_new_len (text, end - text) : - g_string_append_len (*str, text, end - text)); - } - - /* If utf8 len output required... */ - if (valid_len) { - *valid_len = end - text; - } - - return TRUE; - } - } - - return FALSE; -} - -/** - * tracker_date_format_to_iso8601: - * @date_string: the date in a string pointer - * @format: the format of the @date_string - * - * This function uses strptime() to create a time tm structure using - * @date_string and @format. - * - * Returns: a newly-allocated string with the time represented in - * ISO8601 date format which should be freed with g_free() when - * finished with, otherwise %NULL. - * - * Since: 0.8 - **/ -gchar * -tracker_date_format_to_iso8601 (const gchar *date_string, - const gchar *format) -{ - gchar *result; - struct tm date_tm = { 0 }; - - g_return_val_if_fail (date_string != NULL, NULL); - g_return_val_if_fail (format != NULL, NULL); - - if (strptime (date_string, format, &date_tm) == 0) { - return NULL; - } - - /* If the input format string doesn't parse timezone information with - * either %z or %Z, strptime() won't set the tm_gmtoff member in the - * broken-down time, and the value during initialization (0) will be - * left. This effectively means that every broken-down time obtained - * with strptime() without parsing timezone information will be based - * on UTC, instead of being treated as localtime. In order to fix this - * and set the correct value for the offset w.r.t gmt, we can just - * use mktime() to fill in the daylight saving flag as well as the - * gmt offset value. */ - if (!strstr (format, "%z") && !strstr (format, "%Z")) { - /* tm_isdst not set by strptime(), we set -1 on it in order to ask - * mktime to 'normalize' its contents and fill in the gmt offset - * and daylight saving time information */ - date_tm.tm_isdst = -1; - - /* Note: no real problem if mktime() fails. In this case, tm_isdst - * will be -1, and therefore strftime() will not write the timezone - * information, which is equally right to represent localtime. */ - mktime (&date_tm); - } - - result = g_malloc (sizeof (char) * 25); - strftime (result, 25, DATE_FORMAT_ISO8601 , &date_tm); - return result; -} - -static gboolean -is_int (const gchar *str) -{ - gint i, len; - - if (!str || str[0] == '\0') { - return FALSE; - } - - len = strlen (str); - - for (i = 0; i < len; i++) { - if (!g_ascii_isdigit (str[i])) { - return FALSE; - } - } - - return TRUE ; -} - -static gint -parse_month (const gchar *month) -{ - gint i; - - for (i = 0; i < 12; i++) { - if (!strncmp (month, months[i], 3)) { - return i; - } - } - - return -1; -} - -/* Determine date format and convert to ISO 8601 format */ -/* FIXME We should handle all the fractions here (see ISO 8601), as well as YYYY:DDD etc */ - -/** - * tracker_date_guess: - * @date_string: the date in a string pointer - * - * This function uses a number of methods to try and guess the date - * held in @date_string. The @date_string must be at least 5 - * characters in length or longer for any guessing to be attempted. - * Some of the string formats guessed include: - * - * <itemizedlist> - * <listitem><para>"YYYY-MM-DD" (Simple format)</para></listitem> - * <listitem><para>"20050315113224-08'00'" (PDF format)</para></listitem> - * <listitem><para>"20050216111533Z" (PDF format)</para></listitem> - * <listitem><para>"Mon Feb 9 10:10:00 2004" (Microsoft Office format)</para></listitem> - * <listitem><para>"2005:04:29 14:56:54" (Exif format)</para></listitem> - * <listitem><para>"YYYY-MM-DDThh:mm:ss.ff+zz:zz</para></listitem> - * </itemizedlist> - * - * Returns: a newly-allocated string with the time represented in - * ISO8601 date format which should be freed with g_free() when - * finished with, otherwise %NULL. - * - * Since: 0.8 - **/ -gchar * -tracker_date_guess (const gchar *date_string) -{ - gchar buf[30]; - gint len; - GError *error = NULL; - - if (!date_string) { - return NULL; - } - - len = strlen (date_string); - - /* We cannot format a date without at least a four digit - * year. - */ - if (len < 4) { - return NULL; - } - - /* Check for year only dates (EG ID3 music tags might have - * Audio.ReleaseDate as 4 digit year) - */ - if (len == 4) { - if (is_int (date_string)) { - buf[0] = date_string[0]; - buf[1] = date_string[1]; - buf[2] = date_string[2]; - buf[3] = date_string[3]; - buf[4] = '-'; - buf[5] = '0'; - buf[6] = '1'; - buf[7] = '-'; - buf[8] = '0'; - buf[9] = '1'; - buf[10] = 'T'; - buf[11] = '0'; - buf[12] = '0'; - buf[13] = ':'; - buf[14] = '0'; - buf[15] = '0'; - buf[16] = ':'; - buf[17] = '0'; - buf[18] = '0'; - buf[19] = 'Z'; - buf[20] = '\0'; - - tracker_string_to_date (buf, NULL, &error); - - if (error != NULL) { - g_error_free (error); - return NULL; - } - - return g_strdup (buf); - } else { - return NULL; - } - } else if (len == 10) { - /* Check for date part only YYYY-MM-DD */ - buf[0] = date_string[0]; - buf[1] = date_string[1]; - buf[2] = date_string[2]; - buf[3] = date_string[3]; - buf[4] = '-'; - buf[5] = date_string[5]; - buf[6] = date_string[6]; - buf[7] = '-'; - buf[8] = date_string[8]; - buf[9] = date_string[9]; - buf[10] = 'T'; - buf[11] = '0'; - buf[12] = '0'; - buf[13] = ':'; - buf[14] = '0'; - buf[15] = '0'; - buf[16] = ':'; - buf[17] = '0'; - buf[18] = '0'; - buf[19] = '\0'; - - tracker_string_to_date (buf, NULL, &error); - - if (error != NULL) { - g_error_free (error); - return NULL; - } - - return g_strdup (buf); - } else if (len == 14) { - /* Check for pdf format EG 20050315113224-08'00' or - * 20050216111533Z - */ - buf[0] = date_string[0]; - buf[1] = date_string[1]; - buf[2] = date_string[2]; - buf[3] = date_string[3]; - buf[4] = '-'; - buf[5] = date_string[4]; - buf[6] = date_string[5]; - buf[7] = '-'; - buf[8] = date_string[6]; - buf[9] = date_string[7]; - buf[10] = 'T'; - buf[11] = date_string[8]; - buf[12] = date_string[9]; - buf[13] = ':'; - buf[14] = date_string[10]; - buf[15] = date_string[11]; - buf[16] = ':'; - buf[17] = date_string[12]; - buf[18] = date_string[13]; - buf[19] = '\0'; - - tracker_string_to_date (buf, NULL, &error); - - if (error != NULL) { - g_error_free (error); - return NULL; - } - - return g_strdup (buf); - } else if (len == 15 && date_string[14] == 'Z') { - buf[0] = date_string[0]; - buf[1] = date_string[1]; - buf[2] = date_string[2]; - buf[3] = date_string[3]; - buf[4] = '-'; - buf[5] = date_string[4]; - buf[6] = date_string[5]; - buf[7] = '-'; - buf[8] = date_string[6]; - buf[9] = date_string[7]; - buf[10] = 'T'; - buf[11] = date_string[8]; - buf[12] = date_string[9]; - buf[13] = ':'; - buf[14] = date_string[10]; - buf[15] = date_string[11]; - buf[16] = ':'; - buf[17] = date_string[12]; - buf[18] = date_string[13]; - buf[19] = 'Z'; - buf[20] = '\0'; - - tracker_string_to_date (buf, NULL, &error); - - if (error != NULL) { - g_error_free (error); - return NULL; - } - - return g_strdup (buf); - } else if (len == 21 && (date_string[14] == '-' || date_string[14] == '+' )) { - buf[0] = date_string[0]; - buf[1] = date_string[1]; - buf[2] = date_string[2]; - buf[3] = date_string[3]; - buf[4] = '-'; - buf[5] = date_string[4]; - buf[6] = date_string[5]; - buf[7] = '-'; - buf[8] = date_string[6]; - buf[9] = date_string[7]; - buf[10] = 'T'; - buf[11] = date_string[8]; - buf[12] = date_string[9]; - buf[13] = ':'; - buf[14] = date_string[10]; - buf[15] = date_string[11]; - buf[16] = ':'; - buf[17] = date_string[12]; - buf[18] = date_string[13]; - buf[19] = date_string[14]; - buf[20] = date_string[15]; - buf[21] = date_string[16]; - buf[22] = ':'; - buf[23] = date_string[18]; - buf[24] = date_string[19]; - buf[25] = '\0'; - - tracker_string_to_date (buf, NULL, &error); - - if (error != NULL) { - g_error_free (error); - return NULL; - } - - return g_strdup (buf); - } else if ((len == 24) && (date_string[3] == ' ')) { - /* Check for msoffice date format "Mon Feb 9 10:10:00 2004" */ - gint num_month; - gchar mon1; - gchar day1; - - num_month = parse_month (date_string + 4); - - if (num_month < 0) { - return NULL; - } - - mon1 = imonths[num_month]; - - if (date_string[8] == ' ') { - day1 = '0'; - } else { - day1 = date_string[8]; - } - - buf[0] = date_string[20]; - buf[1] = date_string[21]; - buf[2] = date_string[22]; - buf[3] = date_string[23]; - buf[4] = '-'; - - if (num_month < 10) { - buf[5] = '0'; - buf[6] = mon1; - } else { - buf[5] = '1'; - buf[6] = mon1; - } - - buf[7] = '-'; - buf[8] = day1; - buf[9] = date_string[9]; - buf[10] = 'T'; - buf[11] = date_string[11]; - buf[12] = date_string[12]; - buf[13] = ':'; - buf[14] = date_string[14]; - buf[15] = date_string[15]; - buf[16] = ':'; - buf[17] = date_string[17]; - buf[18] = date_string[18]; - buf[19] = '\0'; - - tracker_string_to_date (buf, NULL, &error); - - if (error != NULL) { - g_error_free (error); - return NULL; - } - - return g_strdup (buf); - } else if ((len == 19) && (date_string[4] == ':') && (date_string[7] == ':')) { - /* Check for Exif date format "2005:04:29 14:56:54" */ - buf[0] = date_string[0]; - buf[1] = date_string[1]; - buf[2] = date_string[2]; - buf[3] = date_string[3]; - buf[4] = '-'; - buf[5] = date_string[5]; - buf[6] = date_string[6]; - buf[7] = '-'; - buf[8] = date_string[8]; - buf[9] = date_string[9]; - buf[10] = 'T'; - buf[11] = date_string[11]; - buf[12] = date_string[12]; - buf[13] = ':'; - buf[14] = date_string[14]; - buf[15] = date_string[15]; - buf[16] = ':'; - buf[17] = date_string[17]; - buf[18] = date_string[18]; - buf[19] = '\0'; - - tracker_string_to_date (buf, NULL, &error); - - if (error != NULL) { - g_error_free (error); - return NULL; - } - - return g_strdup (buf); - } - - tracker_string_to_date (date_string, NULL, &error); - - if (error != NULL) { - g_error_free (error); - return NULL; - } - - return g_strdup (date_string); -} - -#ifndef HAVE_GETLINE - -static gint -my_igetdelim (gchar **linebuf, - gsize *linebufsz, - gint delimiter, - FILE *file) -{ - gint ch; - gint idx; - - if ((file == NULL || linebuf == NULL || *linebuf == NULL || *linebufsz == 0) && - !(*linebuf == NULL && *linebufsz == 0)) { - errno = EINVAL; - return -1; - } - - if (*linebuf == NULL && *linebufsz == 0) { - *linebuf = g_malloc (GROW_BY); - - if (!*linebuf) { - errno = ENOMEM; - return -1; - } - - *linebufsz += GROW_BY; - } - - idx = 0; - - while ((ch = fgetc (file)) != EOF) { - /* Grow the line buffer as necessary */ - while (idx > *linebufsz - 2) { - *linebuf = g_realloc (*linebuf, *linebufsz += GROW_BY); - - if (!*linebuf) { - errno = ENOMEM; - return -1; - } - } - (*linebuf)[idx++] = (gchar) ch; - - if ((gchar) ch == delimiter) { - break; - } - } - - if (idx != 0) { - (*linebuf)[idx] = 0; - } else if ( ch == EOF ) { - return -1; - } - - return idx; -} - -#endif /* HAVE_GETLINE */ - -/** - * tracker_getline: - * @lineptr: Buffer to write into - * @n: Max bytes of linebuf - * @stream: Filestream to read from - * - * Reads an entire line from stream, storing the address of the buffer - * containing the text into *lineptr. The buffer is null-terminated - * and includes the newline character, if one was found. - * - * Read GNU getline()'s manpage for more information - * - * Returns: the number of characters read, including the delimiter - * character, but not including the terminating %NULL byte. This value - * can be used to handle embedded %NULL bytes in the line read. Upon - * failure, -1 is returned. - * - * Since: 0.10 - **/ -gssize -tracker_getline (gchar **lineptr, - gsize *n, - FILE *stream) -{ -#ifndef HAVE_GETLINE - return my_igetdelim (lineptr, n, '\n', stream); -#else /* HAVE_GETLINE */ - return getline (lineptr, n, stream); -#endif /* HAVE_GETLINE */ -} - -/** - * tracker_keywords_parse: - * @store: Array where to store the keywords - * @keywords: Keywords line to parse - * - * Parses a keywords line into store, avoiding duplicates and stripping leading - * and trailing spaces from keywords. Allowed delimiters are , and ; - * - * Since: 0.10 - **/ -void -tracker_keywords_parse (GPtrArray *store, - const gchar *keywords) -{ - gchar *orig, *keywords_d; - char *saveptr, *p; - size_t len; - - keywords_d = orig = g_strdup (keywords); - p = keywords_d; - keywords_d = strchr (keywords_d, '"'); - - if (keywords_d) { - keywords_d++; - } else { - keywords_d = p; - } - - len = strlen (keywords_d); - if (len > 0 && keywords_d[len - 1] == '"') { - keywords_d[len - 1] = '\0'; - } - - for (p = strtok_r (keywords_d, ",;", &saveptr); p; - p = strtok_r (NULL, ",;", &saveptr)) { - guint i; - gboolean found = FALSE; - gchar *p_do = g_strdup (p); - gchar *p_dup = p_do; - guint len = strlen (p_dup); - - if (*p_dup == ' ') - p_dup++; - - if (p_dup[len-1] == ' ') - p_dup[len-1] = '\0'; - - /* ignore keywords containing invalid UTF-8 */ - if (!g_utf8_validate (p_dup, -1, NULL)) { - g_free (p_do); - continue; - } - - for (i = 0; i < store->len; i++) { - const gchar *earlier = g_ptr_array_index (store, i); - if (g_strcmp0 (earlier, p_dup) == 0) { - found = TRUE; - break; - } - } - - if (!found) { - g_ptr_array_add (store, g_strdup (p_dup)); - } - - g_free (p_do); - } - - g_free (orig); -} |