summaryrefslogtreecommitdiff
path: root/strings/ctype-uca.c
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2021-11-23 16:33:08 +0400
committerOleksandr Byelkin <sanja@mariadb.com>2022-08-10 15:03:35 +0200
commitbb84f61a2608588cb0182cb48ecd80540cb0c2c3 (patch)
tree63be53fd300980da4b2ee78ccc90fa633d2a81f7 /strings/ctype-uca.c
parent45e0373a7818f137bfbbf5d354c78055cec6c82b (diff)
downloadmariadb-git-bb84f61a2608588cb0182cb48ecd80540cb0c2c3.tar.gz
MDEV-27009 Add UCA-14.0.0 collations - adding uca-dump into build targets
- Adding uca-dump into build targets - Adding ctype-uca.h and moving implicit weight related routines there - Reusing implicit weight routines in ctype-uca.c and uca-dump.c - Adding handling of command line arguments to uca-dump - Fixing some compile-time warnings in uca-dump.c
Diffstat (limited to 'strings/ctype-uca.c')
-rw-r--r--strings/ctype-uca.c65
1 files changed, 9 insertions, 56 deletions
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 094c02618d0..7a46b062351 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -34,6 +34,7 @@
#include "strings_def.h"
#include <m_ctype.h>
+#include "ctype-uca.h"
typedef struct
{
@@ -31689,62 +31690,13 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t currwc,
/****************************************************************/
-/**
- Implicit weights for a code CP are constructed as follows:
- [.AAAA.0020.0002][.BBBB.0000.0000]
-
- where:
- AAAA= BASE + (CP >> 15);
- BBBB= (CP & 0x7FFF) | 0x8000;
-
- There are two weights in the primary level (AAAA followed by BBBB).
- There is one weight on other levels:
- - 0020 on the secondary level
- - 0002 on the tertiary level
-*/
-
-
-/**
- Return BASE for an implicit weight on the primary level
-
- According to UCA, BASE is calculated as follows:
- - FB40 for Unified_Ideograph=True AND
- ((Block=CJK_Unified_Ideograph) OR
- (Block=CJK_Compatibility_Ideographs))
- - FB80 for Unified_Ideograph=True AND NOT
- ((Block=CJK_Unified_Ideograph) OR
- (Block=CJK_Compatibility_Ideographs))
- - FBC0 for any other code point
- TODO: it seems we're not handling BASE correctly:
- - check what are those blocks
- - there are more Unified Ideograph blocks in the latest Unicode versions
-*/
-static inline uint16
-my_uca_implicit_weight_base(my_wc_t code)
-{
- if (code >= 0x3400 && code <= 0x4DB5)
- return 0xFB80;
- if (code >= 0x4E00 && code <= 0x9FA5)
- return 0xFB40;
- return 0xFBC0;
-}
-
-
static inline void
my_uca_implicit_weight_put(uint16 *to, my_wc_t code, uint level)
{
- switch (level) {
- case 1: to[0]= 0x0020; to[1]= 0; break; /* Secondary level */
- case 2: to[0]= 0x0002; to[1]= 0; break; /* Tertiary level */
- case 3: to[0]= 0x0001; to[1]= 0; break; /* Quaternary level */
- default:
- DBUG_ASSERT(0);
- case 0:
- break;
- }
- /* Primary level */
- to[0]= (uint16)(code >> 15) + my_uca_implicit_weight_base(code);
- to[1]= (code & 0x7FFF) | 0x8000;
+ MY_UCA_IMPLICIT_WEIGHT weight;
+ weight= my_uca_520_implicit_weight_on_level(code, level);
+ to[0]= weight.weight[0];
+ to[1]= weight.weight[1];
to[2]= 0;
}
@@ -31766,10 +31718,11 @@ static inline int
my_uca_scanner_next_implicit_primary(my_uca_scanner *scanner)
{
my_wc_t wc= (scanner->page << 8) + scanner->code;
- scanner->implicit[0]= (wc & 0x7FFF) | 0x8000; /* The second weight */
- scanner->implicit[1]= 0; /* 0 terminator */
+ MY_UCA_IMPLICIT_WEIGHT weight= my_uca_520_implicit_weight_primary(wc);
+ scanner->implicit[0]= weight.weight[1]; /* The second weight */
+ scanner->implicit[1]= 0; /* 0 terminator */
scanner->wbeg= scanner->implicit;
- return my_uca_implicit_weight_base(wc) + (wc >> 15);
+ return weight.weight[0]; /* The first weight */
}