diff options
Diffstat (limited to 'libraries/base/cbits/ubconfc')
-rwxr-xr-x | libraries/base/cbits/ubconfc | 363 |
1 files changed, 0 insertions, 363 deletions
diff --git a/libraries/base/cbits/ubconfc b/libraries/base/cbits/ubconfc deleted file mode 100755 index cd29641c58..0000000000 --- a/libraries/base/cbits/ubconfc +++ /dev/null @@ -1,363 +0,0 @@ -#!/bin/sh - -# -------------------------------------------------------------------------- -# This is the script to create the unicode chars property table -# Written by Dimitry Golubovsky (dimitry@golubovsky.org) as part -# of the Partial Unicode Support patch -# -# Adopted for use with GHC. -# License: see libraries/base/LICENSE -# -# ------------------------------------------------------------------------- - -# The script reads the file from the standard input, -# and outputs C code into the standard output. -# The C code contains the chars property table, and basic functions -# to access properties. - -# Output the file header - -VERSION="$1" -if [ -z "$VERSION" ]; then - echo "Usage: $0 [unicode version]" - exit 1 -fi - -# This file is #included from GHC.Unicode and is used to define -# GHC.Unicode.unicodeVersion. -cat > $(dirname $0)/../include/UnicodeVersion.h <<EOF -#if 0 -This is an automatically generated file: do not edit -Generated by `basename $0` at `date` -@generated -#endif - -#define UNICODE_VERSION_NUMS [$(echo $VERSION | sed 's/\./,/g')] -EOF - -exec > $(dirname $0)/WCsubst.c - -echo "/*-------------------------------------------------------------------------" -echo "This is an automatically generated file: do not edit" -echo "Generated by `basename $0` at `date`" -echo "@generated" -echo "-------------------------------------------------------------------------*/" -echo -echo "#include \"WCsubst.h\"" - -# Define structures - -cat <<EOF - -/* Unicode general categories, listed in the same order as in the Unicode - * standard -- this must be the same order as in GHC.Unicode. - */ - -enum { - NUMCAT_LU, /* Letter, Uppercase */ - NUMCAT_LL, /* Letter, Lowercase */ - NUMCAT_LT, /* Letter, Titlecase */ - NUMCAT_LM, /* Letter, Modifier */ - NUMCAT_LO, /* Letter, Other */ - NUMCAT_MN, /* Mark, Non-Spacing */ - NUMCAT_MC, /* Mark, Spacing Combining */ - NUMCAT_ME, /* Mark, Enclosing */ - NUMCAT_ND, /* Number, Decimal */ - NUMCAT_NL, /* Number, Letter */ - NUMCAT_NO, /* Number, Other */ - NUMCAT_PC, /* Punctuation, Connector */ - NUMCAT_PD, /* Punctuation, Dash */ - NUMCAT_PS, /* Punctuation, Open */ - NUMCAT_PE, /* Punctuation, Close */ - NUMCAT_PI, /* Punctuation, Initial quote */ - NUMCAT_PF, /* Punctuation, Final quote */ - NUMCAT_PO, /* Punctuation, Other */ - NUMCAT_SM, /* Symbol, Math */ - NUMCAT_SC, /* Symbol, Currency */ - NUMCAT_SK, /* Symbol, Modifier */ - NUMCAT_SO, /* Symbol, Other */ - NUMCAT_ZS, /* Separator, Space */ - NUMCAT_ZL, /* Separator, Line */ - NUMCAT_ZP, /* Separator, Paragraph */ - NUMCAT_CC, /* Other, Control */ - NUMCAT_CF, /* Other, Format */ - NUMCAT_CS, /* Other, Surrogate */ - NUMCAT_CO, /* Other, Private Use */ - NUMCAT_CN /* Other, Not Assigned */ -}; - -struct _convrule_ -{ - unsigned int category; - unsigned int catnumber; - int possible; - int updist; - int lowdist; - int titledist; -}; - -struct _charblock_ -{ - int start; - int length; - const struct _convrule_ *rule; -}; - -EOF - -# Convert the stdin file to the C table - -awk ' -BEGIN { - FS=";" - catidx=0 - rulidx=0 - blockidx=0 - cblckidx=0 - sblckidx=0 - blockb=-1 - blockl=0 - digs="0123456789ABCDEF" - for(i=0;i<16;i++) - { - hex[substr(digs,i+1,1)]=i; - } -} -function em1(a) -{ - if(a=="") return "-1" - return "0x"a -} -function h2d(a) -{ - l=length(a) - acc=0 - for(i=1;i<=l;i++) - { - acc=acc*16+hex[substr(a,i,1)]; - } - return acc -} -function dumpblock() -{ - blkd=blockb ", " blockl ", &rule" rules[blockr] - blocks[blockidx]=blkd - blockidx++ - if(blockb<=256) lat1idx++ - split(blockr,rsp,",") - if(substr(rsp[3],2,1)=="1") - { - cblcks[cblckidx]=blkd - cblckidx++ - } - if(rsp[1]=="GENCAT_ZS") - { - sblcks[sblckidx]=blkd - sblckidx++ - } - blockb=self - blockl=1 - blockr=rule -} -{ - name=$2 - cat=toupper($3) - self=h2d($1) - up=h2d($13) - low=h2d($14) - title=h2d($15) - convpos=1 - if((up==0)&&(low==0)&&(title==0)) convpos=0 - if(up==0) up=self - if(low==0) low=self - if(title==0) title=self - updist=up-self - lowdist=low-self - titledist=title-self - rule="GENCAT_"cat", NUMCAT_"cat", "((convpos==1)? \ - ("1, " updist ", " lowdist ", " titledist): \ - ("0, 0, 0, 0")) - if(cats[cat]=="") - { - cats[cat]=(2^catidx); - catidx++; - } - if(rules[rule]=="") - { - rules[rule]=rulidx; - rulidx++; - } - if(blockb==-1) - { - blockb=self - blockl=1 - blockr=rule - } - else - { - if (index(name,"First>")!=0) - { - dumpblock() - } - else if (index(name,"Last>")!=0) - { - blockl+=(self-blockb) - } - else if((self==blockb+blockl)&&(rule==blockr)) blockl++ - else - { - dumpblock() - } - } -} -END { - dumpblock() - for(c in cats) print "#define GENCAT_"c" "cats[c] - print "#define MAX_UNI_CHAR " self - print "#define NUM_BLOCKS " blockidx - print "#define NUM_CONVBLOCKS " cblckidx - print "#define NUM_SPACEBLOCKS " sblckidx - print "#define NUM_LAT1BLOCKS " lat1idx - print "#define NUM_RULES " rulidx - for(r in rules) - { - printf "static const struct _convrule_ rule" rules[r] "={" r "};\n" - } - print "static const struct _charblock_ allchars[]={" - for(i=0;i<blockidx;i++) - { - printf "\t{" blocks[i] "}" - print (i<(blockidx-1))?",":"" - } - print "};" - print "static const struct _charblock_ convchars[]={" - for(i=0;i<cblckidx;i++) - { - printf "\t{" cblcks[i] "}" - print (i<(cblckidx-1))?",":"" - } - print "};" - print "static const struct _charblock_ spacechars[]={" - for(i=0;i<sblckidx;i++) - { - printf "\t{" sblcks[i] "}" - print (i<(sblckidx-1))?",":"" - } - print "};" -} -' -# Output the C procedures code - -cat <<EOF - -/* - Obtain the reference to character rule by doing - binary search over the specified array of blocks. - To make checkattr shorter, the address of - nullrule is returned if the search fails: - this rule defines no category and no conversion - distances. The compare function returns 0 when - key->start is within the block. Otherwise - result of comparison of key->start and start of the - current block is returned as usual. -*/ - -static const struct _convrule_ nullrule={0,NUMCAT_CN,0,0,0,0}; - -static int blkcmp(const void *vk,const void *vb) -{ - const struct _charblock_ *key,*cur; - key=vk; - cur=vb; - if((key->start>=cur->start)&&(key->start<(cur->start+cur->length))) - { - return 0; - } - if(key->start>cur->start) return 1; - return -1; -} - -static const struct _convrule_ *getrule( - const struct _charblock_ *blocks, - int numblocks, - int unichar) -{ - struct _charblock_ key={unichar,1,(void *)0}; - struct _charblock_ *cb=bsearch(&key,blocks,numblocks,sizeof(key),blkcmp); - if(cb==(void *)0) return &nullrule; - return cb->rule; -} - - - -/* - Check whether a character (internal code) has certain attributes. - Attributes (category flags) may be ORed. The function ANDs - character category flags and the mask and returns the result. - If the character belongs to one of the categories requested, - the result will be nonzero. -*/ - -inline static int checkattr(int c,unsigned int catmask) -{ - return (catmask & (getrule(allchars,(c<256)?NUM_LAT1BLOCKS:NUM_BLOCKS,c)->category)); -} - -inline static int checkattr_s(int c,unsigned int catmask) -{ - return (catmask & (getrule(spacechars,NUM_SPACEBLOCKS,c)->category)); -} - -/* - Define predicate functions for some combinations of categories. -*/ - -#define unipred(p,m) \\ -HsInt p(HsInt c) \\ -{ \\ - return checkattr(c,m); \\ -} - -#define unipred_s(p,m) \\ -HsInt p(HsInt c) \\ -{ \\ - return checkattr_s(c,m); \\ -} - -/* - Make these rules as close to Hugs as possible. -*/ - -unipred(u_iswcntrl,GENCAT_CC) -unipred(u_iswprint, \ -(GENCAT_MC | GENCAT_NO | GENCAT_SK | GENCAT_ME | GENCAT_ND | \ - GENCAT_PO | GENCAT_LT | GENCAT_PC | GENCAT_SM | GENCAT_ZS | \ - GENCAT_LU | GENCAT_PD | GENCAT_SO | GENCAT_PE | GENCAT_PF | \ - GENCAT_PS | GENCAT_SC | GENCAT_LL | GENCAT_LM | GENCAT_PI | \ - GENCAT_NL | GENCAT_MN | GENCAT_LO)) -unipred_s(u_iswspace,GENCAT_ZS) -unipred(u_iswupper,(GENCAT_LU|GENCAT_LT)) -unipred(u_iswlower,GENCAT_LL) -unipred(u_iswalpha,(GENCAT_LL|GENCAT_LU|GENCAT_LT|GENCAT_LM|GENCAT_LO)) -unipred(u_iswdigit,GENCAT_ND) - -unipred(u_iswalnum,(GENCAT_LT|GENCAT_LU|GENCAT_LL|GENCAT_LM|GENCAT_LO| - GENCAT_NO|GENCAT_ND|GENCAT_NL)) - -#define caseconv(p,to) \\ -HsInt p(HsInt c) \\ -{ \\ - const struct _convrule_ *rule=getrule(convchars,NUM_CONVBLOCKS,c);\\ - if(rule==&nullrule) return c;\\ - return c+rule->to;\\ -} - -caseconv(u_towupper,updist) -caseconv(u_towlower,lowdist) -caseconv(u_towtitle,titledist) - -HsInt u_gencat(HsInt c) -{ - return getrule(allchars,NUM_BLOCKS,c)->catnumber; -} -EOF |