summaryrefslogtreecommitdiff
path: root/libraries/base/cbits/ubconfc
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/base/cbits/ubconfc')
-rwxr-xr-xlibraries/base/cbits/ubconfc363
1 files changed, 0 insertions, 363 deletions
diff --git a/libraries/base/cbits/ubconfc b/libraries/base/cbits/ubconfc
deleted file mode 100755
index cd29641c58..0000000000
--- a/libraries/base/cbits/ubconfc
+++ /dev/null
@@ -1,363 +0,0 @@
-#!/bin/sh
-
-# --------------------------------------------------------------------------
-# This is the script to create the unicode chars property table
-# Written by Dimitry Golubovsky (dimitry@golubovsky.org) as part
-# of the Partial Unicode Support patch
-#
-# Adopted for use with GHC.
-# License: see libraries/base/LICENSE
-#
-# -------------------------------------------------------------------------
-
-# The script reads the file from the standard input,
-# and outputs C code into the standard output.
-# The C code contains the chars property table, and basic functions
-# to access properties.
-
-# Output the file header
-
-VERSION="$1"
-if [ -z "$VERSION" ]; then
- echo "Usage: $0 [unicode version]"
- exit 1
-fi
-
-# This file is #included from GHC.Unicode and is used to define
-# GHC.Unicode.unicodeVersion.
-cat > $(dirname $0)/../include/UnicodeVersion.h <<EOF
-#if 0
-This is an automatically generated file: do not edit
-Generated by `basename $0` at `date`
-@generated
-#endif
-
-#define UNICODE_VERSION_NUMS [$(echo $VERSION | sed 's/\./,/g')]
-EOF
-
-exec > $(dirname $0)/WCsubst.c
-
-echo "/*-------------------------------------------------------------------------"
-echo "This is an automatically generated file: do not edit"
-echo "Generated by `basename $0` at `date`"
-echo "@generated"
-echo "-------------------------------------------------------------------------*/"
-echo
-echo "#include \"WCsubst.h\""
-
-# Define structures
-
-cat <<EOF
-
-/* Unicode general categories, listed in the same order as in the Unicode
- * standard -- this must be the same order as in GHC.Unicode.
- */
-
-enum {
- NUMCAT_LU, /* Letter, Uppercase */
- NUMCAT_LL, /* Letter, Lowercase */
- NUMCAT_LT, /* Letter, Titlecase */
- NUMCAT_LM, /* Letter, Modifier */
- NUMCAT_LO, /* Letter, Other */
- NUMCAT_MN, /* Mark, Non-Spacing */
- NUMCAT_MC, /* Mark, Spacing Combining */
- NUMCAT_ME, /* Mark, Enclosing */
- NUMCAT_ND, /* Number, Decimal */
- NUMCAT_NL, /* Number, Letter */
- NUMCAT_NO, /* Number, Other */
- NUMCAT_PC, /* Punctuation, Connector */
- NUMCAT_PD, /* Punctuation, Dash */
- NUMCAT_PS, /* Punctuation, Open */
- NUMCAT_PE, /* Punctuation, Close */
- NUMCAT_PI, /* Punctuation, Initial quote */
- NUMCAT_PF, /* Punctuation, Final quote */
- NUMCAT_PO, /* Punctuation, Other */
- NUMCAT_SM, /* Symbol, Math */
- NUMCAT_SC, /* Symbol, Currency */
- NUMCAT_SK, /* Symbol, Modifier */
- NUMCAT_SO, /* Symbol, Other */
- NUMCAT_ZS, /* Separator, Space */
- NUMCAT_ZL, /* Separator, Line */
- NUMCAT_ZP, /* Separator, Paragraph */
- NUMCAT_CC, /* Other, Control */
- NUMCAT_CF, /* Other, Format */
- NUMCAT_CS, /* Other, Surrogate */
- NUMCAT_CO, /* Other, Private Use */
- NUMCAT_CN /* Other, Not Assigned */
-};
-
-struct _convrule_
-{
- unsigned int category;
- unsigned int catnumber;
- int possible;
- int updist;
- int lowdist;
- int titledist;
-};
-
-struct _charblock_
-{
- int start;
- int length;
- const struct _convrule_ *rule;
-};
-
-EOF
-
-# Convert the stdin file to the C table
-
-awk '
-BEGIN {
- FS=";"
- catidx=0
- rulidx=0
- blockidx=0
- cblckidx=0
- sblckidx=0
- blockb=-1
- blockl=0
- digs="0123456789ABCDEF"
- for(i=0;i<16;i++)
- {
- hex[substr(digs,i+1,1)]=i;
- }
-}
-function em1(a)
-{
- if(a=="") return "-1"
- return "0x"a
-}
-function h2d(a)
-{
- l=length(a)
- acc=0
- for(i=1;i<=l;i++)
- {
- acc=acc*16+hex[substr(a,i,1)];
- }
- return acc
-}
-function dumpblock()
-{
- blkd=blockb ", " blockl ", &rule" rules[blockr]
- blocks[blockidx]=blkd
- blockidx++
- if(blockb<=256) lat1idx++
- split(blockr,rsp,",")
- if(substr(rsp[3],2,1)=="1")
- {
- cblcks[cblckidx]=blkd
- cblckidx++
- }
- if(rsp[1]=="GENCAT_ZS")
- {
- sblcks[sblckidx]=blkd
- sblckidx++
- }
- blockb=self
- blockl=1
- blockr=rule
-}
-{
- name=$2
- cat=toupper($3)
- self=h2d($1)
- up=h2d($13)
- low=h2d($14)
- title=h2d($15)
- convpos=1
- if((up==0)&&(low==0)&&(title==0)) convpos=0
- if(up==0) up=self
- if(low==0) low=self
- if(title==0) title=self
- updist=up-self
- lowdist=low-self
- titledist=title-self
- rule="GENCAT_"cat", NUMCAT_"cat", "((convpos==1)? \
- ("1, " updist ", " lowdist ", " titledist): \
- ("0, 0, 0, 0"))
- if(cats[cat]=="")
- {
- cats[cat]=(2^catidx);
- catidx++;
- }
- if(rules[rule]=="")
- {
- rules[rule]=rulidx;
- rulidx++;
- }
- if(blockb==-1)
- {
- blockb=self
- blockl=1
- blockr=rule
- }
- else
- {
- if (index(name,"First>")!=0)
- {
- dumpblock()
- }
- else if (index(name,"Last>")!=0)
- {
- blockl+=(self-blockb)
- }
- else if((self==blockb+blockl)&&(rule==blockr)) blockl++
- else
- {
- dumpblock()
- }
- }
-}
-END {
- dumpblock()
- for(c in cats) print "#define GENCAT_"c" "cats[c]
- print "#define MAX_UNI_CHAR " self
- print "#define NUM_BLOCKS " blockidx
- print "#define NUM_CONVBLOCKS " cblckidx
- print "#define NUM_SPACEBLOCKS " sblckidx
- print "#define NUM_LAT1BLOCKS " lat1idx
- print "#define NUM_RULES " rulidx
- for(r in rules)
- {
- printf "static const struct _convrule_ rule" rules[r] "={" r "};\n"
- }
- print "static const struct _charblock_ allchars[]={"
- for(i=0;i<blockidx;i++)
- {
- printf "\t{" blocks[i] "}"
- print (i<(blockidx-1))?",":""
- }
- print "};"
- print "static const struct _charblock_ convchars[]={"
- for(i=0;i<cblckidx;i++)
- {
- printf "\t{" cblcks[i] "}"
- print (i<(cblckidx-1))?",":""
- }
- print "};"
- print "static const struct _charblock_ spacechars[]={"
- for(i=0;i<sblckidx;i++)
- {
- printf "\t{" sblcks[i] "}"
- print (i<(sblckidx-1))?",":""
- }
- print "};"
-}
-'
-# Output the C procedures code
-
-cat <<EOF
-
-/*
- Obtain the reference to character rule by doing
- binary search over the specified array of blocks.
- To make checkattr shorter, the address of
- nullrule is returned if the search fails:
- this rule defines no category and no conversion
- distances. The compare function returns 0 when
- key->start is within the block. Otherwise
- result of comparison of key->start and start of the
- current block is returned as usual.
-*/
-
-static const struct _convrule_ nullrule={0,NUMCAT_CN,0,0,0,0};
-
-static int blkcmp(const void *vk,const void *vb)
-{
- const struct _charblock_ *key,*cur;
- key=vk;
- cur=vb;
- if((key->start>=cur->start)&&(key->start<(cur->start+cur->length)))
- {
- return 0;
- }
- if(key->start>cur->start) return 1;
- return -1;
-}
-
-static const struct _convrule_ *getrule(
- const struct _charblock_ *blocks,
- int numblocks,
- int unichar)
-{
- struct _charblock_ key={unichar,1,(void *)0};
- struct _charblock_ *cb=bsearch(&key,blocks,numblocks,sizeof(key),blkcmp);
- if(cb==(void *)0) return &nullrule;
- return cb->rule;
-}
-
-
-
-/*
- Check whether a character (internal code) has certain attributes.
- Attributes (category flags) may be ORed. The function ANDs
- character category flags and the mask and returns the result.
- If the character belongs to one of the categories requested,
- the result will be nonzero.
-*/
-
-inline static int checkattr(int c,unsigned int catmask)
-{
- return (catmask & (getrule(allchars,(c<256)?NUM_LAT1BLOCKS:NUM_BLOCKS,c)->category));
-}
-
-inline static int checkattr_s(int c,unsigned int catmask)
-{
- return (catmask & (getrule(spacechars,NUM_SPACEBLOCKS,c)->category));
-}
-
-/*
- Define predicate functions for some combinations of categories.
-*/
-
-#define unipred(p,m) \\
-HsInt p(HsInt c) \\
-{ \\
- return checkattr(c,m); \\
-}
-
-#define unipred_s(p,m) \\
-HsInt p(HsInt c) \\
-{ \\
- return checkattr_s(c,m); \\
-}
-
-/*
- Make these rules as close to Hugs as possible.
-*/
-
-unipred(u_iswcntrl,GENCAT_CC)
-unipred(u_iswprint, \
-(GENCAT_MC | GENCAT_NO | GENCAT_SK | GENCAT_ME | GENCAT_ND | \
- GENCAT_PO | GENCAT_LT | GENCAT_PC | GENCAT_SM | GENCAT_ZS | \
- GENCAT_LU | GENCAT_PD | GENCAT_SO | GENCAT_PE | GENCAT_PF | \
- GENCAT_PS | GENCAT_SC | GENCAT_LL | GENCAT_LM | GENCAT_PI | \
- GENCAT_NL | GENCAT_MN | GENCAT_LO))
-unipred_s(u_iswspace,GENCAT_ZS)
-unipred(u_iswupper,(GENCAT_LU|GENCAT_LT))
-unipred(u_iswlower,GENCAT_LL)
-unipred(u_iswalpha,(GENCAT_LL|GENCAT_LU|GENCAT_LT|GENCAT_LM|GENCAT_LO))
-unipred(u_iswdigit,GENCAT_ND)
-
-unipred(u_iswalnum,(GENCAT_LT|GENCAT_LU|GENCAT_LL|GENCAT_LM|GENCAT_LO|
- GENCAT_NO|GENCAT_ND|GENCAT_NL))
-
-#define caseconv(p,to) \\
-HsInt p(HsInt c) \\
-{ \\
- const struct _convrule_ *rule=getrule(convchars,NUM_CONVBLOCKS,c);\\
- if(rule==&nullrule) return c;\\
- return c+rule->to;\\
-}
-
-caseconv(u_towupper,updist)
-caseconv(u_towlower,lowdist)
-caseconv(u_towtitle,titledist)
-
-HsInt u_gencat(HsInt c)
-{
- return getrule(allchars,NUM_BLOCKS,c)->catnumber;
-}
-EOF