summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-06-06 10:29:27 +0800
committerPeng Wu <alexepico@gmail.com>2012-06-06 10:29:27 +0800
commit928a7cce363dee8b42452bc0cf99c6070c6d4626 (patch)
tree7bb825b1b58a8271f30f4302fa08c2866db30528 /scripts
parenteb35544148080f975b82ba279653cf9e4e7c589a (diff)
downloadibus-libpinyin-928a7cce363dee8b42452bc0cf99c6070c6d4626.tar.gz
clean up scripts
Diffstat (limited to 'scripts')
-rw-r--r--scripts/Makefile9
-rw-r--r--scripts/create_db.py94
-rw-r--r--scripts/create_index.py21
-rw-r--r--scripts/create_unique_index.py27
4 files changed, 0 insertions, 151 deletions
diff --git a/scripts/Makefile b/scripts/Makefile
deleted file mode 100644
index 046dd70..0000000
--- a/scripts/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-
-all:
-
-py-new.db: py.db create_db.py
- $(RM) py-new.db
- python create_db.py
-
-index:
- python create_index.py
diff --git a/scripts/create_db.py b/scripts/create_db.py
deleted file mode 100644
index e4826ae..0000000
--- a/scripts/create_db.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import sqlite3
-from pydict import *
-from id import *
-import sys
-
-con1 = sqlite3.connect("py.db")
-con2 = sqlite3.connect("py-new.db")
-con2.execute ("PRAGMA synchronous = NORMAL;")
-con2.execute ("PRAGMA temp_store = MEMORY;")
-con2.execute ("PRAGMA default_cache_size = 5000;")
-
-sql = "CREATE TABLE py_phrase_%d (phrase TEXT, freq INTEGER, %s)"
-
-for i in range(0, 16):
- column= []
- for j in range(0, i + 1):
- column.append ("s%d INTEGER" % j)
- column.append ("y%d INTEGER" % j)
- column = ",".join(column)
- con2.execute(sql % (i, column))
-con2.commit()
-
-def get_sheng_yun(pinyin):
- if pinyin == None:
- return None, None
- if pinyin == "ng":
- return "", "en"
- for i in xrange(2, 0, -1):
- t = pinyin[:i]
- if t in SHENGMU_DICT:
- return t, pinyin[len(t):]
- return "", pinyin
-
-def encode_pinyin(pinyin):
- if pinyin == None or pinyin == "":
- return 0
- return pinyin_id[pinyin]
- e = 0
- for c in pinyin:
- e = (e << 5) + (ord(c) - ord('a') + 1)
- return e
-
-insert_sql = "INSERT INTO py_phrase_%d VALUES (%s);"
-con2.commit()
-new_freq = 0
-freq = 0
-
-print "INSERTING"
-for r in con1.execute("SELECT * FROM py_phrase ORDER BY freq"):
- ylen = r[0]
- phrase = r[10]
- if r[11] > freq:
- freq = r[11]
- new_freq += 1
-
- if ylen <= 4:
- pys = map(lambda id: ID_PINYIN_DICT[id], r[1: 1 + ylen])
- else:
- pys = map(lambda id: ID_PINYIN_DICT[id], r[1: 5]) + r[5].encode("utf8").split("'")
-
- i = ylen - 1
- if i >= 15:
- i = 15
-
- pys = pys[0:16]
-
- sheng_yun = []
- for s, y in map(get_sheng_yun, pys):
- sheng_yun.append(s)
- sheng_yun.append(y)
-
-
- column = [phrase, new_freq] + map(encode_pinyin, sheng_yun)
-
- sql = insert_sql % (i, ",".join(["?"] * len(column)))
- con2.execute (sql, column)
-
-print "Remove duplicate"
-for i in xrange(0, 16):
- sql = "DELETE FROM py_phrase_%d WHERE rowid IN (SELECT rowid FROM (SELECT count() as count, rowid FROM py_phrase_%d GROUP by %s,phrase) WHERE count > 1)" % (i, i, ",".join(map(lambda i: "s%d,y%d"%(i,i), range(0, i + 1))))
- con2.execute(sql)
-con2.commit()
-print "CACUUM"
-con2.execute("VACUUM;")
-con2.commit()
-
-# con2.execute("create index index_0_0 on py_phrase_0(s0, y0)")
-#
-# for i in xrange(1, 16):
-# con2.execute("create index index_%d_0 on py_phrase_%d(s0, y0, s1, y1)" % (i, i))
-# con2.execute("create index index_%d_1 on py_phrase_%d(s0, s1, y1)" % (i, i))
-#
-# con2.execute("vacuum")
-# con2.commit()
diff --git a/scripts/create_index.py b/scripts/create_index.py
deleted file mode 100644
index 823e616..0000000
--- a/scripts/create_index.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import sqlite3
-
-con2 = sqlite3.connect("py-new.db")
-con2.execute ("PRAGMA synchronous = NORMAL;")
-con2.execute ("PRAGMA temp_store = MEMORY;")
-
-
-con2.execute("CREATE INDEX index_0_0 ON py_phrase_0(s0, y0)")
-print "py_phrase_%d done" % 0
-
-con2.execute("CREATE INDEX index_1_0 ON py_phrase_1(s0, y0, s1, y1)")
-con2.execute("CREATE INDEX index_1_1 ON py_phrase_1(s0, s1, y1)")
-print "py_phrase_%d done" % 1
-
-for i in xrange(2, 16):
- con2.execute("CREATE INDEX index_%d_0 ON py_phrase_%d(s0, y0, s1, y1, s2, y2)" % (i, i))
- con2.execute("CREATE INDEX index_%d_1 ON py_phrase_%d(s0, s1, s2, y2)" % (i, i))
- print "py_phrase_%d done" % i
-
-# con2.execute("vacuum")
-con2.commit()
diff --git a/scripts/create_unique_index.py b/scripts/create_unique_index.py
deleted file mode 100644
index e22d986..0000000
--- a/scripts/create_unique_index.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import sqlite3
-
-con2 = sqlite3.connect("py-new.db")
-con2.execute ("PRAGMA synchronous = NORMAL;")
-con2.execute ("PRAGMA temp_store = MEMORY;")
-
-
-con2.execute("CREATE UNIQUE INDEX IF NOT EXISTS index_0_0 ON py_phrase_0(s0, y0, phrase)")
-print "py_phrase_%d done" % 0
-
-con2.execute("CREATE UNIQUE INDEX IF NOT EXISTS index_1_0 ON py_phrase_1(s0, y0, s1, y1, phrase)")
-con2.execute("CREATE INDEX IF NOT EXISTS index_1_1 ON py_phrase_1(s0, s1, y1)")
-print "py_phrase_%d done" % 1
-
-for i in xrange(2, 16):
- sql = "CREATE UNIQUE INDEX IF NOT EXISTS index_%d_0 ON py_phrase_%d (" % (i, i)
- sql = sql + "s0,y0"
- for j in xrange(1, i + 1):
- sql = sql + ",s%d,y%d" % (j, j)
- sql = sql + ", phrase)"
- print sql
- con2.execute(sql)
- con2.execute("CREATE INDEX IF NOT EXISTS index_%d_1 ON py_phrase_%d(s0, s1, s2, y2)" % (i, i))
- print "py_phrase_%d done" % i
-
-# con2.execute("vacuum")
-con2.commit()