summaryrefslogtreecommitdiff
path: root/psi/zutf8.c
blob: 235bb943f05f7e4bb9b9f75b87f8094baecd5d9a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/* Copyright (C) 2001-2023 Artifex Software, Inc.
   All Rights Reserved.

   This software is provided AS-IS with no warranty, either express or
   implied.

   This software is distributed under license and may not be copied,
   modified or distributed except as expressly authorized under the terms
   of the license contained in the file LICENSE in this distribution.

   Refer to licensing information at http://www.artifex.com or contact
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
   CA 94129, USA, for further information.
*/


/* The .locale_to_utf8 operator, for converting text from the locale
 * charset to UTF-8.  This code is not used on Windows; there's a
 * separate Windows implementation in zwinutf8.c. */

#include "ghost.h"
#include "oper.h"
#include "iutil.h"
#include "ialloc.h"
#include "malloc_.h"
#include "errno_.h"
#include "string_.h"
#include "store.h"
#include <stringprep.h>

/* Convert a string from the current locale's character set to UTF-8.
 * <string> .locale_to_utf8 <string> */
static int
zlocale_to_utf8(i_ctx_t *i_ctx_p)
{
    os_ptr op = osp;
    char *input;
    char *output;
    int code;

    check_read_type(*op, t_string);
    input = ref_to_string(op, imemory, "locale_to_utf8 input");
    if (input == 0)
        return_error(gs_error_VMerror);

    output = stringprep_locale_to_utf8(input);
    ifree_string((byte *)input, r_size(op) + 1, "locale_to_utf8 input");
    if (output == 0) {
        /* This function is intended to be used on strings whose
         * character set is unknown, so it's not an error if the
         * input contains invalid characters.  Just return the input
         * string unchanged.
         *
         * Sadly, EINVAL from stringprep_locale_to_utf8 can mean
         * either an invalid character set conversion (which we care
         * about), or an incomplete input string (which we don't).
         * For now, we ignore EINVAL; the right solution is probably
         * to not use stringprep_locale_to_utf8, and just call iconv
         * by hand. */
        if (errno == EILSEQ || errno == EINVAL)
            return 0;

        /* Other errors (like ENFILE) are real errors, which we
         * want to return to the user. */
        return_error(gs_error_ioerror);
    }

    code = string_to_ref(output, op, iimemory, "locale_to_utf8 output");
    free(output);
    if (code < 0)
        return code;

    return 0;
}

/* ------ Initialization procedure ------ */

const op_def zutf8_op_defs[] =
{
    {"1.locale_to_utf8", zlocale_to_utf8},
    op_def_end(0)
};