summaryrefslogtreecommitdiff
path: root/src/libtracker-extract/tracker-encoding-libicu.c
blob: feec102d25c012fdcfda649fe8e43af6a7561e93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/*
 * Copyright (C) 2013 Carlos Garnacho <carlos@lanedo.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA  02110-1301, USA.
 */

#include "config.h"

#include "unicode/utypes.h"
#include "unicode/localpointer.h"
#include "unicode/uenum.h"
#include "unicode/ucsdet.h"

#include <glib.h>
#include "tracker-encoding-libicu.h"

gchar *
tracker_encoding_guess_icu (const gchar *buffer,
			    gsize        size)
{
	UCharsetDetector *detector = NULL;
	const UCharsetMatch *match;
	gchar *charset = NULL;
	UErrorCode status;
	const char *p_match = NULL;

	detector = ucsdet_open (&status);

	if (U_FAILURE (status))
		goto failure;

	if (size >= G_MAXINT32)
		goto failure;

	ucsdet_setText (detector, buffer, (int32_t) size, &status);

	if (U_FAILURE (status))
		goto failure;

	match = ucsdet_detect (detector, &status);

	if (match == NULL || U_FAILURE (status))
		goto failure;

	p_match = ucsdet_getName (match, &status);

	if (p_match == NULL || U_FAILURE (status))
		goto failure;

        charset = g_strdup ((const gchar *) p_match);

	if (charset)
		g_debug ("Guessing charset as '%s'", charset);

failure:
	if (detector)
		ucsdet_close (detector);

	return charset;
}