summaryrefslogtreecommitdiff
path: root/include/m_ctype.h
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mnogosearch.org>2013-10-31 14:24:24 +0400
committerAlexander Barkov <bar@mnogosearch.org>2013-10-31 14:24:24 +0400
commitbd3dc54261f10f387a03ad99ce74c3824c42e462 (patch)
tree2eb1a284095b7d7bd28368bab9e229880a56fc95 /include/m_ctype.h
parenteea91f633f903b8c223b7d470e4be7366cbf57c8 (diff)
downloadmariadb-git-bd3dc54261f10f387a03ad99ce74c3824c42e462.tar.gz
A few minor Unicode collation customization improvements were made,
which makes it possible to add more world language collations with very complex collation rules (e.g. Myanmar): - Weight string for a single character in a user defined collation was erroneously limited to 7 weights (instead of 8 weights). Added an extra element in the user-defined weight arrays, to fit 8 non-zero weights. - Weight string limit for contractions was made two times longer (16 weights), which allows longer contractions without affecting the performance of filesort. - A user-defined collation now refuses to initialize and reports an error in case if a weight string gets longer than 8 weights for a single character, or longer than 16 weights for a contraction. Previously weight strings for such characters (and contractions) were cut, so a collation could silently start with wrong rules. - Fixed a bug in handling rules like "&a << b" in combination with shift-after-method="expand". The primary weight for "b" was not correctly calculated, which erroneously made "b" primary greater than "a" instead of primary equal to "a".
Diffstat (limited to 'include/m_ctype.h')
-rw-r--r--include/m_ctype.h16
1 files changed, 14 insertions, 2 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index b9682df12bf..1b60f2091b1 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -88,13 +88,25 @@ extern MY_UNICASE_INFO my_unicase_mysql500;
extern MY_UNICASE_INFO my_unicase_unicode520;
#define MY_UCA_MAX_CONTRACTION 6
-#define MY_UCA_MAX_WEIGHT_SIZE 8
+/*
+ The DUCET tables in ctype-uca.c are dumped with a limit of 8 weights
+ per character. cs->strxfrm_multiply is set to 8 for all UCA based collations.
+
+ In language-specific UCA collations (with tailorings) we also do not allow
+ a single character to have more than 8 weights to stay with the same
+ strxfrm_multiply limit. Note, contractions are allowed to have twice longer
+ weight strings (up to 16 weights). As a contraction consists of at
+ least 2 characters, this makes sure that strxfrm_multiply ratio of 8
+ is respected.
+*/
+#define MY_UCA_MAX_WEIGHT_SIZE (8+1) /* Including 0 terminator */
+#define MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE (2*8+1) /* Including 0 terminator */
#define MY_UCA_WEIGHT_LEVELS 1
typedef struct my_contraction_t
{
my_wc_t ch[MY_UCA_MAX_CONTRACTION]; /* Character sequence */
- uint16 weight[MY_UCA_MAX_WEIGHT_SIZE];/* Its weight string, 0-terminated */
+ uint16 weight[MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE];/* Its weight string, 0-terminated */
my_bool with_context;
} MY_CONTRACTION;