/* Pango * pango-language.c: Language handling routines * * Copyright (C) 2000 Red Hat Software * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include #include #include #include #include #include #include "pango-language.h" #include "pango-impl-utils.h" static const char canon_map[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-', 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 }; static gboolean lang_equal (gconstpointer v1, gconstpointer v2) { const guchar *p1 = v1; const guchar *p2 = v2; while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2]) { p1++, p2++; } return (canon_map[*p1] == canon_map[*p2]); } static guint lang_hash (gconstpointer key) { const guchar *p = key; guint h = 0; while (canon_map[*p]) { h = (h << 5) - h + canon_map[*p]; p++; } return h; } static PangoLanguage * pango_language_copy (PangoLanguage *language) { return language; /* language tags are const */ } static void pango_language_free (PangoLanguage *language) { return; /* nothing */ } GType pango_language_get_type (void) { static GType our_type = 0; if (our_type == 0) our_type = g_boxed_type_register_static (I_("PangoLanguage"), (GBoxedCopyFunc)pango_language_copy, (GBoxedFreeFunc)pango_language_free); return our_type; } /** * _pango_get_lc_ctype: * * Return the Unix-style locale string for the language currently in * effect. On Unix systems, this is the return value from * setlocale(LC_CTYPE, NULL), and the user can * affect this through the environment variables LC_ALL, LC_CTYPE or * LANG (checked in that order). The locale strings typically is in * the form lang_COUNTRY, where lang is an ISO-639 language code, and * COUNTRY is an ISO-3166 country code. For instance, sv_FI for * Swedish as written in Finland or pt_BR for Portuguese as written in * Brazil. * * On Windows, the C library doesn't use any such environment * variables, and setting them won't affect the behavior of functions * like ctime(). The user sets the locale through the Regional Options * in the Control Panel. The C library (in the setlocale() function) * does not use country and language codes, but country and language * names spelled out in English. * However, this function does check the above environment * variables, and does return a Unix-style locale string based on * either said environment variables or the thread's current locale. * * Return value: a dynamically allocated string, free with g_free(). */ static gchar * _pango_get_lc_ctype (void) { #ifdef G_OS_WIN32 /* Somebody might try to set the locale for this process using the * LANG or LC_ environment variables. The Microsoft C library * doesn't know anything about them. You set the locale in the * Control Panel. Setting these env vars won't have any affect on * locale-dependent C library functions like ctime(). But just for * kicks, do obey LC_ALL, LC_CTYPE and LANG in Pango. (This also makes * it easier to test GTK and Pango in various default languages, you * don't have to clickety-click in the Control Panel, you can simply * start the program with LC_ALL=something on the command line.) */ gchar *p; p = getenv ("LC_ALL"); if (p != NULL) return g_strdup (p); p = getenv ("LC_CTYPE"); if (p != NULL) return g_strdup (p); p = getenv ("LANG"); if (p != NULL) return g_strdup (p); return g_win32_getlocale (); #else return g_strdup (setlocale (LC_CTYPE, NULL)); #endif } /** * pango_language_get_default: * * Returns the #PangoLanguage for the current locale of the process. * Note that this can change over the life of an application. * * On Unix systems, this is the return value is derived from * setlocale(LC_CTYPE, NULL), and the user can * affect this through the environment variables LC_ALL, LC_CTYPE or * LANG (checked in that order). The locale string typically is in * the form lang_COUNTRY, where lang is an ISO-639 language code, and * COUNTRY is an ISO-3166 country code. For instance, sv_FI for * Swedish as written in Finland or pt_BR for Portuguese as written in * Brazil. * * On Windows, the C library does not use any such environment * variables, and setting them won't affect the behavior of functions * like ctime(). The user sets the locale through the Regional Options * in the Control Panel. The C library (in the setlocale() function) * does not use country and language codes, but country and language * names spelled out in English. * However, this function does check the above environment * variables, and does return a Unix-style locale string based on * either said environment variables or the thread's current locale. * * Your application should call setlocale(LC_ALL, ""); * for the user settings to take effect. Gtk+ does this in its initialization * functions automatically (by calling gtk_set_locale()). * See man setlocale for more details. * * Return value: the default language as a #PangoLanguage, must not be * freed. * * Since: 1.16 **/ PangoLanguage * pango_language_get_default (void) { gchar *lang; PangoLanguage *result; lang = _pango_get_lc_ctype (); result = pango_language_from_string (lang); g_free (lang); return result; } /** * pango_language_from_string: * @language: a string representing a language tag * * Take a RFC-3066 format language tag as a string and convert it to a * #PangoLanguage pointer that can be efficiently copied (copy the * pointer) and compared with other language tags (compare the * pointer.) * * This function first canonicalizes the string by converting it to * lowercase, mapping '_' to '-', and stripping all characters other * than letters and '-'. * * Use pango_language_get_default() if you want to get the #PangoLanguage for * the current locale of the process. * * Return value: an opaque pointer to a #PangoLanguage structure. * this will be valid forever after. **/ PangoLanguage * pango_language_from_string (const char *language) { static GHashTable *hash = NULL; char *result; int len; char *p; if (G_UNLIKELY (!hash)) hash = g_hash_table_new (lang_hash, lang_equal); else { result = g_hash_table_lookup (hash, language); if (result) return (PangoLanguage *)result; } len = strlen (language); result = g_malloc (len + 1); p = result; while ((*(p++) = canon_map[*(guchar *)language++])) ; g_hash_table_insert (hash, result, result); return (PangoLanguage *)result; } /** * pango_language_matches: * @language: a language tag (see pango_language_from_string()), * %NULL is allowed and matches nothing but '*' * @range_list: a list of language ranges, separated by ';', ':', * ',', or space characters. * Each element must either be '*', or a RFC 3066 language range * canonicalized as by pango_language_from_string() * * Checks if a language tag matches one of the elements in a list of * language ranges. A language tag is considered to match a range * in the list if the range is '*', the range is exactly the tag, * or the range is a prefix of the tag, and the character after it * in the tag is '-'. * * Return value: %TRUE if a match was found. **/ gboolean pango_language_matches (PangoLanguage *language, const char *range_list) { const char *lang_str = pango_language_to_string (language); const char *p = range_list; gboolean done = FALSE; while (!done) { const char *end = strpbrk (p, ";:, \t"); if (!end) { end = p + strlen (p); done = TRUE; } if (strncmp (p, "*", 1) == 0 || (lang_str && strncmp (lang_str, p, end - p) == 0 && (lang_str[end - p] == '\0' || lang_str[end - p] == '-'))) return TRUE; if (!done) p = end + 1; } return FALSE; } typedef struct { const char lang[4]; const char *str; } LangInfo; static int lang_info_compare (const void *key, const void *val) { const LangInfo *lang_info = val; return strncmp (key, lang_info->lang, 2); } /* The following array is supposed to contain enough text to tickle all necessary fonts for each * of the languages in the following. Yes, it's pretty lame. Not all of the languages * in the following have sufficient text to exercise all the accents for the language, and * there are obviously many more languages to include as well. */ static const LangInfo lang_texts[] = { { "ar", "Arabic \330\247\331\204\330\263\331\204\330\247\331\205 \330\271\331\204\331\212\331\203\331\205" }, { "cs", "Czech (\304\215esky) Dobr\303\275 den" }, { "da", "Danish (Dansk) Hej, Goddag" }, { "el", "Greek (\316\225\316\273\316\273\316\267\316\275\316\271\316\272\316\254) \316\223\316\265\316\271\316\254 \317\203\316\261\317\202" }, { "en", "English Hello" }, { "eo", "Esperanto Saluton" }, { "es", "Spanish (Espa\303\261ol) \302\241Hola!" }, { "et", "Estonian Tere, Tervist" }, { "fi", "Finnish (Suomi) Hei, Hyv\303\244\303\244 p\303\244iv\303\244\303\244" }, { "fr", "French (Fran\303\247ais)" }, { "de", "German Gr\303\274\303\237 Gott" }, { "he", "Hebrew \327\251\327\234\327\225\327\235" }, { "it", "Italiano Ciao, Buon giorno" }, { "ja", "Japanese (\346\227\245\346\234\254\350\252\236) \343\201\223\343\202\223\343\201\253\343\201\241\343\201\257, \357\275\272\357\276\235\357\276\206\357\276\201\357\276\212" }, { "ko", "Korean (\355\225\234\352\270\200) \354\225\210\353\205\225\355\225\230\354\204\270\354\232\224, \354\225\210\353\205\225\355\225\230\354\213\255\353\213\210\352\271\214" }, { "mt", "Maltese \304\212aw, Sa\304\247\304\247a" }, { "nl", "Nederlands, Vlaams Hallo, Dag" }, { "no", "Norwegian (Norsk) Hei, God dag" }, { "pl", "Polish Dzie\305\204 dobry, Hej" }, { "ru", "Russian (\320\240\321\203\321\201\321\201\320\272\320\270\320\271)" }, { "sk", "Slovak Dobr\303\275 de\305\210" }, { "sv", "Swedish (Svenska) Hej p\303\245 dej, Goddag" }, { "tr", "Turkish (T\303\274rk\303\247e) Merhaba" }, { "zh", "Chinese (\344\270\255\346\226\207,\346\231\256\351\200\232\350\257\235,\346\261\211\350\257\255)" } }; /** * pango_language_get_sample_string: * @language: a #PangoLanguage * * Get a string that is representative of the characters needed to * render a particular language. This function is a bad hack for * internal use by renderers and Pango. * * Return value: the sample string. This value is owned by Pango * and must not be freed. **/ G_CONST_RETURN char * pango_language_get_sample_string (PangoLanguage *language) { const char *result; if (language) { const char *lang_str = pango_language_to_string (language); LangInfo *lang_info = bsearch (lang_str, lang_texts, G_N_ELEMENTS (lang_texts), sizeof (LangInfo), lang_info_compare); if (lang_info) result = lang_info->str; else result = "French (Fran\303\247ais)"; /* Assume iso-8859-1 */ } else { /* Complete junk */ result = "\330\247\331\204\330\263\331\204\330\247\331\205 \330\271\331\204\331\212\331\203\331\205 \304\215esky \316\225\316\273\316\273\316\267\316\275\316\271\316\272\316\254 Fran\303\247ais \346\227\245\346\234\254\350\252\236 \355\225\234\352\270\200 \320\240\321\203\321\201\321\201\320\272\320\270\320\271 \344\270\255\346\226\207,\346\231\256\351\200\232\350\257\235,\346\261\211\350\257\255 T\303\274rk\303\247e"; } return result; }