summaryrefslogtreecommitdiff
path: root/storage/mroonga/vendor/groonga/lib/string.c
diff options
context:
space:
mode:
authorKentoku SHIBA <kentokushiba@gmail.com>2014-09-21 00:33:45 +0900
committerKentoku SHIBA <kentokushiba@gmail.com>2014-09-21 00:33:45 +0900
commit0cc855cdc8cd0baa6ba50662632b299a3843ff13 (patch)
treeeaf50856703412b5c1c43f8c0e5a6a5318601c17 /storage/mroonga/vendor/groonga/lib/string.c
parent989dd4d9ec09450ff7b25987b14ee9fdfd21ad4e (diff)
downloadmariadb-git-0cc855cdc8cd0baa6ba50662632b299a3843ff13.tar.gz
Update Mroonga to the latest version on 2014-09-21T00:33:44+0900
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/string.c')
-rw-r--r--storage/mroonga/vendor/groonga/lib/string.c406
1 files changed, 406 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/string.c b/storage/mroonga/vendor/groonga/lib/string.c
new file mode 100644
index 00000000000..f0fcf561b5e
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/lib/string.c
@@ -0,0 +1,406 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2009-2012 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "groonga_in.h"
+#include <string.h>
+#include "string_in.h"
+#include "normalizer_in.h"
+#include "str.h"
+#include "util.h"
+
+#include <groonga/tokenizer.h>
+
+static grn_string *
+grn_fake_string_open(grn_ctx *ctx, grn_string *string)
+{
+ /* TODO: support GRN_STRING_REMOVE_BLANK flag and ctypes */
+ grn_string *nstr = string;
+ const char *str;
+ unsigned int str_len;
+
+ str = nstr->original;
+ str_len = nstr->original_length_in_bytes;
+
+ if (!(nstr->normalized = GRN_MALLOC(str_len + 1))) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "[strinig][fake] failed to allocate normalized text space");
+ grn_string_close(ctx, (grn_obj *)nstr);
+ return NULL;
+ }
+
+ if (nstr->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER &&
+ ctx->encoding == GRN_ENC_UTF8) {
+ int char_length;
+ const char *source_current = str;
+ const char *source_end = str + str_len;
+ char *destination = nstr->normalized;
+ unsigned int destination_length = 0;
+ while ((char_length = grn_charlen(ctx, source_current, source_end)) > 0) {
+ if (!grn_tokenizer_is_tokenized_delimiter(ctx,
+ source_current, char_length,
+ ctx->encoding)) {
+ memcpy(destination, source_current, char_length);
+ destination += char_length;
+ destination_length += char_length;
+ }
+ source_current += char_length;
+ }
+ nstr->normalized[destination_length] = '\0';
+ nstr->normalized_length_in_bytes = destination_length;
+ } else {
+ memcpy(nstr->normalized, str, str_len);
+ nstr->normalized[str_len] = '\0';
+ nstr->normalized_length_in_bytes = str_len;
+ }
+
+ if (nstr->flags & GRN_STRING_WITH_CHECKS) {
+ int16_t f = 0;
+ unsigned char c;
+ size_t i;
+ if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) {
+ grn_string_close(ctx, (grn_obj *)nstr);
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "[strinig][fake] failed to allocate checks space");
+ return NULL;
+ }
+ switch (nstr->encoding) {
+ case GRN_ENC_EUC_JP:
+ for (i = 0; i < str_len; i++) {
+ if (!f) {
+ c = (unsigned char) str[i];
+ f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1)
+ );
+ nstr->checks[i] = f;
+ } else {
+ nstr->checks[i] = 0;
+ }
+ f--;
+ }
+ break;
+ case GRN_ENC_SJIS:
+ for (i = 0; i < str_len; i++) {
+ if (!f) {
+ c = (unsigned char) str[i];
+ f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1);
+ nstr->checks[i] = f;
+ } else {
+ nstr->checks[i] = 0;
+ }
+ f--;
+ }
+ break;
+ case GRN_ENC_UTF8:
+ for (i = 0; i < str_len; i++) {
+ if (!f) {
+ c = (unsigned char) str[i];
+ f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3)
+ : 2)
+ : 1);
+ nstr->checks[i] = f;
+ } else {
+ nstr->checks[i] = 0;
+ }
+ f--;
+ }
+ break;
+ default:
+ for (i = 0; i < str_len; i++) {
+ nstr->checks[i] = 1;
+ }
+ break;
+ }
+ }
+ return nstr;
+}
+
+grn_obj *
+grn_string_open_(grn_ctx *ctx, const char *str, unsigned int str_len,
+ grn_obj *normalizer, int flags, grn_encoding encoding)
+{
+ grn_string *string;
+ grn_obj *obj;
+ grn_bool is_normalizer_auto;
+
+ if (!str || !str_len) {
+ return NULL;
+ }
+
+ string = GRN_MALLOCN(grn_string, 1);
+ if (!string) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "[string][open] failed to allocate memory");
+ return NULL;
+ }
+
+ obj = (grn_obj *)string;
+ GRN_OBJ_INIT(obj, GRN_STRING, GRN_OBJ_ALLOCATED, GRN_ID_NIL);
+ string->original = str;
+ string->original_length_in_bytes = str_len;
+ string->normalized = NULL;
+ string->normalized_length_in_bytes = 0;
+ string->n_characters = 0;
+ string->checks = NULL;
+ string->ctypes = NULL;
+ string->encoding = encoding;
+ string->flags = flags;
+
+ if (!normalizer) {
+ return (grn_obj *)grn_fake_string_open(ctx, string);
+ }
+
+ is_normalizer_auto = (normalizer == GRN_NORMALIZER_AUTO);
+ if (is_normalizer_auto) {
+ normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
+ }
+
+ /* TODO: check rc */
+ grn_normalizer_normalize(ctx, normalizer, (grn_obj *)string);
+ if (ctx->rc) {
+ grn_obj_close(ctx, obj);
+ obj = NULL;
+ }
+
+ if (is_normalizer_auto) {
+ grn_obj_unlink(ctx, normalizer);
+ }
+
+ return obj;
+}
+
+grn_obj *
+grn_string_open(grn_ctx *ctx, const char *str, unsigned int str_len,
+ grn_obj *normalizer, int flags)
+{
+ return grn_string_open_(ctx, str, str_len, normalizer, flags, ctx->encoding);
+}
+
+grn_rc
+grn_string_get_original(grn_ctx *ctx, grn_obj *string,
+ const char **original,
+ unsigned int *length_in_bytes)
+{
+ grn_rc rc;
+ grn_string *string_ = (grn_string *)string;
+ GRN_API_ENTER;
+ if (string_) {
+ if (original) { *original = string_->original; }
+ if (length_in_bytes) {
+ *length_in_bytes = string_->original_length_in_bytes;
+ }
+ rc = GRN_SUCCESS;
+ } else {
+ rc = GRN_INVALID_ARGUMENT;
+ }
+ GRN_API_RETURN(rc);
+}
+
+int
+grn_string_get_flags(grn_ctx *ctx, grn_obj *string)
+{
+ int flags = 0;
+ grn_string *string_ = (grn_string *)string;
+ GRN_API_ENTER;
+ if (string_) {
+ flags = string_->flags;
+ }
+ GRN_API_RETURN(flags);
+}
+
+grn_rc
+grn_string_get_normalized(grn_ctx *ctx, grn_obj *string,
+ const char **normalized,
+ unsigned int *length_in_bytes,
+ unsigned int *n_characters)
+{
+ grn_rc rc;
+ grn_string *string_ = (grn_string *)string;
+ GRN_API_ENTER;
+ if (string_) {
+ if (normalized) { *normalized = string_->normalized; }
+ if (length_in_bytes) {
+ *length_in_bytes = string_->normalized_length_in_bytes;
+ }
+ if (n_characters) { *n_characters = string_->n_characters; }
+ rc = GRN_SUCCESS;
+ } else {
+ rc = GRN_INVALID_ARGUMENT;
+ }
+ GRN_API_RETURN(rc);
+}
+
+grn_rc
+grn_string_set_normalized(grn_ctx *ctx, grn_obj *string,
+ char *normalized, unsigned int length_in_bytes,
+ unsigned int n_characters)
+{
+ grn_rc rc;
+ grn_string *string_ = (grn_string *)string;
+ GRN_API_ENTER;
+ if (string_) {
+ if (string_->normalized) { GRN_FREE(string_->normalized); }
+ string_->normalized = normalized;
+ string_->normalized_length_in_bytes = length_in_bytes;
+ string_->n_characters = n_characters;
+ rc = GRN_SUCCESS;
+ } else {
+ rc = GRN_INVALID_ARGUMENT;
+ }
+ GRN_API_RETURN(rc);
+}
+
+const short *
+grn_string_get_checks(grn_ctx *ctx, grn_obj *string)
+{
+ int16_t *checks = NULL;
+ grn_string *string_ = (grn_string *)string;
+ GRN_API_ENTER;
+ if (string_) {
+ checks = string_->checks;
+ } else {
+ checks = NULL;
+ }
+ GRN_API_RETURN(checks);
+}
+
+grn_rc
+grn_string_set_checks(grn_ctx *ctx, grn_obj *string, short *checks)
+{
+ grn_rc rc;
+ grn_string *string_ = (grn_string *)string;
+ GRN_API_ENTER;
+ if (string_) {
+ if (string_->checks) { GRN_FREE(string_->checks); }
+ string_->checks = checks;
+ rc = GRN_SUCCESS;
+ } else {
+ rc = GRN_INVALID_ARGUMENT;
+ }
+ GRN_API_RETURN(rc);
+}
+
+const unsigned char *
+grn_string_get_types(grn_ctx *ctx, grn_obj *string)
+{
+ unsigned char *types = NULL;
+ grn_string *string_ = (grn_string *)string;
+ GRN_API_ENTER;
+ if (string_) {
+ types = string_->ctypes;
+ } else {
+ types = NULL;
+ }
+ GRN_API_RETURN(types);
+}
+
+grn_rc
+grn_string_set_types(grn_ctx *ctx, grn_obj *string, unsigned char *types)
+{
+ grn_rc rc;
+ grn_string *string_ = (grn_string *)string;
+ GRN_API_ENTER;
+ if (string_) {
+ if (string_->ctypes) { GRN_FREE(string_->ctypes); }
+ string_->ctypes = types;
+ rc = GRN_SUCCESS;
+ } else {
+ rc = GRN_INVALID_ARGUMENT;
+ }
+ GRN_API_RETURN(rc);
+}
+
+grn_encoding
+grn_string_get_encoding(grn_ctx *ctx, grn_obj *string)
+{
+ grn_encoding encoding = GRN_ENC_NONE;
+ grn_string *string_ = (grn_string *)string;
+ GRN_API_ENTER;
+ if (string_) {
+ encoding = string_->encoding;
+ }
+ GRN_API_RETURN(encoding);
+}
+
+grn_rc
+grn_string_inspect(grn_ctx *ctx, grn_obj *buffer, grn_obj *string)
+{
+ grn_string *string_ = (grn_string *)string;
+
+ GRN_TEXT_PUTS(ctx, buffer, "#<string:");
+
+ GRN_TEXT_PUTS(ctx, buffer, " original:<");
+ GRN_TEXT_PUT(ctx, buffer,
+ string_->original,
+ string_->original_length_in_bytes);
+ GRN_TEXT_PUTS(ctx, buffer, ">");
+ GRN_TEXT_PUTS(ctx, buffer, "(");
+ grn_text_itoa(ctx, buffer, string_->original_length_in_bytes);
+ GRN_TEXT_PUTS(ctx, buffer, ")");
+
+ GRN_TEXT_PUTS(ctx, buffer, " normalized:<");
+ GRN_TEXT_PUT(ctx, buffer,
+ string_->normalized,
+ string_->normalized_length_in_bytes);
+ GRN_TEXT_PUTS(ctx, buffer, ">");
+ GRN_TEXT_PUTS(ctx, buffer, "(");
+ grn_text_itoa(ctx, buffer, string_->normalized_length_in_bytes);
+ GRN_TEXT_PUTS(ctx, buffer, ")");
+
+ GRN_TEXT_PUTS(ctx, buffer, " n_characters:");
+ grn_text_itoa(ctx, buffer, string_->n_characters);
+
+ GRN_TEXT_PUTS(ctx, buffer, " encoding:");
+ grn_inspect_encoding(ctx, buffer, string_->encoding);
+
+ GRN_TEXT_PUTS(ctx, buffer, " flags:");
+ if (string_->flags & GRN_STRING_REMOVE_BLANK) {
+ GRN_TEXT_PUTS(ctx, buffer, "REMOVE_BLANK|");
+ }
+ if (string_->flags & GRN_STRING_WITH_TYPES) {
+ GRN_TEXT_PUTS(ctx, buffer, "WITH_TYPES|");
+ }
+ if (string_->flags & GRN_STRING_WITH_CHECKS) {
+ GRN_TEXT_PUTS(ctx, buffer, "WITH_CHECKS|");
+ }
+ if (string_->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER) {
+ GRN_TEXT_PUTS(ctx, buffer, "REMOVE_TOKENIZED_DELIMITER|");
+ }
+ if (GRN_TEXT_VALUE(buffer)[GRN_TEXT_LEN(buffer) - 1] == '|') {
+ grn_bulk_truncate(ctx, buffer, GRN_TEXT_LEN(buffer) - 1);
+ }
+
+ GRN_TEXT_PUTS(ctx, buffer, ">");
+
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_string_close(grn_ctx *ctx, grn_obj *string)
+{
+ grn_rc rc;
+ grn_string *string_ = (grn_string *)string;
+ if (string_) {
+ if (string_->normalized) { GRN_FREE(string_->normalized); }
+ if (string_->ctypes) { GRN_FREE(string_->ctypes); }
+ if (string_->checks) { GRN_FREE(string_->checks); }
+ GRN_FREE(string);
+ rc = GRN_SUCCESS;
+ } else {
+ rc = GRN_INVALID_ARGUMENT;
+ }
+ return rc;
+}