diff options
Diffstat (limited to 'contrib')
85 files changed, 1207 insertions, 24507 deletions
diff --git a/contrib/tsearch2/Makefile b/contrib/tsearch2/Makefile index b4dbec036f..8ab634323f 100644 --- a/contrib/tsearch2/Makefile +++ b/contrib/tsearch2/Makefile @@ -1,29 +1,10 @@ -# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.19 2007/06/26 22:05:03 tgl Exp $ +# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.20 2007/11/13 21:02:28 tgl Exp $ -MODULE_big = tsearch2 -OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \ - dict_snowball.o dict_ispell.o dict_syn.o dict_thesaurus.o \ - wparser.o wparser_def.o \ - ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \ - tsvector_op.o rank.o ts_stat.o \ - query_util.o query_support.o query_rewrite.o query_gist.o \ - ts_locale.o ts_lexize.o ginidx.o - -SUBDIRS = snowball ispell wordparser -SUBDIROBJS = $(SUBDIRS:%=%/SUBSYS.o) - -OBJS += $(SUBDIROBJS) - -PG_CPPFLAGS = -I$(srcdir)/snowball -I$(srcdir)/ispell -I$(srcdir)/wordparser - -DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8 thesaurus -DATA_built = tsearch2.sql uninstall_tsearch2.sql -DOCS = README.tsearch2 +MODULES = tsearch2 +DATA_built = tsearch2.sql +DATA = uninstall_tsearch2.sql REGRESS = tsearch2 -SHLIB_LINK += $(filter -lm, $(LIBS)) - - ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) @@ -34,23 +15,3 @@ top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif - - -$(SUBDIROBJS): $(SUBDIRS:%=%-recursive) ; - -.PHONY: $(SUBDIRS:%=%-recursive) - -$(SUBDIRS:%=%-recursive): - $(MAKE) -C $(subst -recursive,,$@) SUBSYS.o - -tsearch2.sql: tsearch.sql.in - sed -e 's,MODULE_PATHNAME,$$libdir/$(MODULE_big),g' $< >$@ - -uninstall_tsearch2.sql: untsearch.sql.in - cp $< $@ - -.PHONY: subclean -clean: subclean - -subclean: - for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean || exit; done diff --git a/contrib/tsearch2/README.tsearch2 b/contrib/tsearch2/README.tsearch2 deleted file mode 100644 index 5d7b19d55f..0000000000 --- a/contrib/tsearch2/README.tsearch2 +++ /dev/null @@ -1,210 +0,0 @@ -Tsearch2 - full text search extension for PostgreSQL - - [1]Online version of this document is available - - Tsearch2 - is the full text engine, fully integrated into PostgreSQL - RDBMS. - -Main features - - * Full online update - * Supports multiple table driven configurations - * flexible and rich linguistic support (dictionaries, stop words), - thesaurus - * full multibyte (UTF-8) support - * Sophisticated ranking functions with support of proximity and - structure information (rank, rank_cd) - * Index support (GiST and Gin) with concurrency and recovery support - * Rich query language with query rewriting support - * Headline support (text fragments with highlighted search terms) - * Ability to plug-in custom dictionaries and parsers - * Template generator for tsearch2 dictionaries with [2]snowball - stemmer support - * It is mature (5 years of development) - - Tsearch2, in a nutshell, provides FTS operator (contains) for the new - data types, representing document (tsvector) and query (tsquery). - Table driven configuration allows creation of custom searches using - standard SQL commands. - - tsvector is a searchable data type, representing document. It is a set - of unique words along with their positional information in the - document, organized in a special structure optimized for fast access - and lookup. Each entry could be labelled to reflect its importance in - document. - - tsquery is a data type for textual queries with support of boolean - operators. It consists of lexemes (optionally labelled) with boolean - operators between. - - Table driven configuration allows to specify: - * parser, which used to break document onto lexemes - * what lexemes to index and the way they are processed - * dictionaries to be used along with stop words recognition. - -OpenFTS vs Tsearch2 - - [3]OpenFTS is a middleware between application and database. OpenFTS - uses tsearch2 as a storage and database engine as a query executor - (searching). Everything else, i.e. parsing of documents, query - processing, linguistics, carry outs on client side. That's why OpenFTS - has its own configuration table (fts_conf) and works with its own set - of dictionaries. OpenFTS is more flexible, because it could be used in - multi-server architecture with separate machines for repository of - documents (documents could be stored in filesystem), database and - query engine. - - See [4]Documentation Roadmap for links to documentation. - -Authors - - * Oleg Bartunov <oleg@sai.msu.su>, Moscow, Moscow University, Russia - * Teodor Sigaev <teodor@sigaev.ru>, Moscow,Moscow University,Russia - -Contributors - - * Robert John Shepherd and Andrew J. Kopciuch submitted - "Introduction to tsearch" (Robert - tsearch v1, Andrew - tsearch - v2) - * Brandon Craig Rhodes wrote "Tsearch2 Guide" and "Tsearch2 - Reference" and proposed new naming convention for tsearch V2 - -Sponsors - - * ABC Startsiden - compound words support - * University of Mannheim for UTF-8 support (in 8.2) - * jfg:networks ([5]http:www.jfg-networks.com/) for Gin - Generalized - Inverted index (in 8.2) - * Georgia Public Library Service and LibLime, Inc. for Thesaurus - dictionary - * PostGIS community - GiST Concurrency and Recovery - - The authors are grateful to the Russian Foundation for Basic Research - and Delta-Soft Ltd., Moscow, Russia for support. - -Limitations - - * Length of lexeme < 2K - * Length of tsvector (lexemes + positions) < 1Mb - * The number of lexemes < 4^32 - * 0< Positional information < 16383 - * No more than 256 positions per lexeme - * The number of nodes ( lexemes + operations) in tsquery < 32768 - -References - - * GiST development site - - [6]http://www.sai.msu.su/~megera/postgres/gist - * GiN development - [7]http://www.sigaev.ru/gin/ - * OpenFTS home page - [8]http://openfts.sourceforge.net/ - * Mailing list - - [9]http://sourceforge.net/mailarchive/forum.php?forum=openfts-gene - ral - -Documentation Roadmap - - * Several docs are available from docs/ subdirectory - + "Tsearch V2 Introduction" by Andrew Kopciuch - + "Tsearch2 Guide" by Brandon Rhodes - + "Tsearch2 Reference" by Brandon Rhodes - * Readme.gendict in gendict/ subdirectory - + Also, check [10]Gendict tutorial - * Check [11]tsearch2 Wiki pages for various documentation - -Support - - Authors urgently recommend people to use [12]openfts-general or - [13]pgsql-general mailing lists for questions and discussions. - -Development History - - Latest news - - To the PostgreSQL 8.2 release we added: - * multibyte (UTF-8) support - * Thesaurus dictionary - * Query rewriting - * rank_cd relevation function now support different weights of - lexemes - * GiN support adds scalability of tsearch2 - - Pre-tsearch era - Development of OpenFTS began in 2000 after realizing that we - need a search engine optimized for online updates with access - to metadata from the database. This is essential for online - news agencies, web portals, digital libraries, etc. Most search - engines available utilize an inverted index which is very fast - for searching but very slow for online updates. Incremental - updates of an inverted index is a complex engineering task - while we needed something light, free and with the ability to - access metadata from the database. The last requirement was - very important because in a real life application search engine - should always consult metadata ( topic, permissions, date - range, version, etc.). We extensively use PostgreSQL as a - database backend and have no intention to move from it, so the - problem was to find a data structure and a fast way to access - it. PostgreSQL has rather unique data type for storing sets - (think about words) - arrays, but lacks index access to them. - During our research we found a paper of Joseph Hellerstein, who - introduced an interesting data structure suitable for sets - - RD-tree (Russian Doll tree). Further research lead us to the - idea to use GiST for implementing RD-tree, but at that time the - GiST code was untouched for a long time and contained several - bugs. After work on improving GiST for version 7.0.3 of - PostgreSQL was done, we were able to implement RD-Tree and use - it for index access to arrays of integers. This implementation - was ideally suited for small arrays and eliminated complex - joins, but was practically useless for indexing large arrays. - The next improvement came from an idea to represent a document - by a single bit-signature, a so-called superimposed signature - (see "Index Structures for Databases Containing Data Items with - Set-valued Attributes", 1997, Sven Helmer for details). We - developed the contrib/intarray module and used it for full - text indexing. - - tsearch v1 - It was inconvenient to use integer id's instead of words, so we - introduced a new data type called 'txtidx' - a searchable data - type (textual) with indexed access. This was a first step of - our work on an implementation of a built-in PostgreSQL full - text search engine. Even though tsearch v1 had many features of - a search engine it lacked configuration support and relevance - ranking. People were encouraged to use OpenFTS, which provided - relevance ranking based on positional information and flexible - configuration. OpenFTS v.0.34 is the last version based on - tsearch v1. - - tsearch V2 - People recognized tsearch as a powerful tool for full text - searching and insisted on adding ranking support, better - configurability, etc. We already thought about moving most of - the features of OpenFTS to tsearch, and in the early 2003 we - decided to work on a new version of tsearch. We abandoned - auxiliary index tables which were used by OpenFTS to store - positional information and modified the txtidx type to store - them internally. We added table-driven configuration, support - of ispell dictionaries, snowball stemmers and the ability to - specify which types of lexemes to index. Now, it's possible to - generate headlines of documents with highlighted search terms. - These changes make tsearch more user friendly and turn it into - a really powerful full text search engine. Brandon Rhodes - proposed to rename tsearch functions for consistency and we - renamed txtidx type to tsvector and other things as well. To - allow users of tsearch v1 smooth upgrade, we named the module - as tsearch2. Since version 0.35 OpenFTS uses tsearch2. - -References - - 1. http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/Tsearch_V2_Readme.html - 2. http://snowball.tartarus.org/ - 3. http://openfts.sourceforge.net/ - 4. file://localhost/u/megera/WWW/postgres/gist/tsearch/V2/docs/Tsearch_V2_Readme82.html#dm - 5. http:www.jfg-networks.com/ - 6. http://www.sai.msu.su/~megera/postgres/gist - 7. http://www.sigaev.ru/gin/ - 8. http://openfts.sourceforge.net/ - 9. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general - 10. http://www.sai.msu.su/~megera/wiki/Gendict - 11. http://www.sai.msu.su/~megera/wiki/Tsearch2 - 12. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general - 13. http://archives.postgresql.org/pgsql-general/ diff --git a/contrib/tsearch2/common.c b/contrib/tsearch2/common.c deleted file mode 100644 index 983399a438..0000000000 --- a/contrib/tsearch2/common.c +++ /dev/null @@ -1,188 +0,0 @@ -#include "postgres.h" - -#include "fmgr.h" -#include "catalog/pg_namespace.h" -#include "catalog/pg_proc.h" -#include "utils/syscache.h" -#include "miscadmin.h" - -#include "ts_cfg.h" -#include "dict.h" -#include "wparser.h" -#include "snmap.h" -#include "common.h" -#include "tsvector.h" - - - -#include "common.h" -#include "wparser.h" -#include "ts_cfg.h" -#include "dict.h" - - -Oid TSNSP_FunctionOid = InvalidOid; - - -text * -char2text(char *in) -{ - return charl2text(in, strlen(in)); -} - -text * -charl2text(char *in, int len) -{ - text *out = (text *) palloc(len + VARHDRSZ); - - memcpy(VARDATA(out), in, len); - SET_VARSIZE(out, len + VARHDRSZ); - return out; -} - -char - * -text2char(text *in) -{ - char *out = palloc(VARSIZE(in)); - - memcpy(out, VARDATA(in), VARSIZE(in) - VARHDRSZ); - out[VARSIZE(in) - VARHDRSZ] = '\0'; - return out; -} - -char - * -pnstrdup(char *in, int len) -{ - char *out = palloc(len + 1); - - memcpy(out, in, len); - out[len] = '\0'; - return out; -} - -text - * -ptextdup(text *in) -{ - text *out = (text *) palloc(VARSIZE(in)); - - memcpy(out, in, VARSIZE(in)); - return out; -} - -text - * -mtextdup(text *in) -{ - text *out = (text *) malloc(VARSIZE(in)); - - if (!out) - ts_error(ERROR, "No memory"); - memcpy(out, in, VARSIZE(in)); - return out; -} - -void -ts_error(int state, const char *format,...) -{ - va_list args; - int tlen = 128, - len = 0; - char *buf; - - reset_cfg(); - reset_dict(); - reset_prs(); - - va_start(args, format); - buf = palloc(tlen); - len = vsnprintf(buf, tlen - 1, format, args); - if (len >= tlen) - { - tlen = len + 1; - buf = repalloc(buf, tlen); - vsnprintf(buf, tlen - 1, format, args); - } - va_end(args); - - /* ?? internal error ?? */ - elog(state, "%s", buf); - pfree(buf); -} - -int -text_cmp(text *a, text *b) -{ - if (VARSIZE(a) == VARSIZE(b)) - return strncmp(VARDATA(a), VARDATA(b), VARSIZE(a) - VARHDRSZ); - return (int) VARSIZE(a) - (int) VARSIZE(b); - -} - -char * -get_namespace(Oid funcoid) -{ - HeapTuple tuple; - Form_pg_proc proc; - Form_pg_namespace nsp; - Oid nspoid; - char *txt; - - tuple = SearchSysCache(PROCOID, ObjectIdGetDatum(funcoid), 0, 0, 0); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for proc oid %u", funcoid); - proc = (Form_pg_proc) GETSTRUCT(tuple); - nspoid = proc->pronamespace; - ReleaseSysCache(tuple); - - tuple = SearchSysCache(NAMESPACEOID, ObjectIdGetDatum(nspoid), 0, 0, 0); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for namespace oid %u", nspoid); - nsp = (Form_pg_namespace) GETSTRUCT(tuple); - txt = pstrdup(NameStr((nsp->nspname))); - ReleaseSysCache(tuple); - - return txt; -} - -Oid -get_oidnamespace(Oid funcoid) -{ - HeapTuple tuple; - Form_pg_proc proc; - Oid nspoid; - - tuple = SearchSysCache(PROCOID, ObjectIdGetDatum(funcoid), 0, 0, 0); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for proc oid %u", funcoid); - proc = (Form_pg_proc) GETSTRUCT(tuple); - nspoid = proc->pronamespace; - ReleaseSysCache(tuple); - - return nspoid; -} - - /* if path is relative, take it as relative to share dir */ -char * -to_absfilename(char *filename) -{ - if (!is_absolute_path(filename)) - { - char sharepath[MAXPGPATH]; - char *absfn; - -#ifdef WIN32 - char delim = '\\'; -#else - char delim = '/'; -#endif - get_share_path(my_exec_path, sharepath); - absfn = palloc(strlen(sharepath) + strlen(filename) + 2); - sprintf(absfn, "%s%c%s", sharepath, delim, filename); - filename = absfn; - } - - return filename; -} diff --git a/contrib/tsearch2/common.h b/contrib/tsearch2/common.h deleted file mode 100644 index ff6ebd6240..0000000000 --- a/contrib/tsearch2/common.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __TS_COMMON_H__ -#define __TS_COMMON_H__ - -#include "postgres.h" -#include "fmgr.h" -#include "utils/array.h" - -text *char2text(char *in); -text *charl2text(char *in, int len); -char *text2char(text *in); -char *pnstrdup(char *in, int len); -text *ptextdup(text *in); -text *mtextdup(text *in); - -int text_cmp(text *a, text *b); - -char *to_absfilename(char *filename); - -#define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) ) -#define ARRNELEMS(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x)) - -void ts_error(int state, const char *format,...); - -extern Oid TSNSP_FunctionOid; /* oid of called function, needed only for - * determ namespace, no more */ -char *get_namespace(Oid funcoid); -Oid get_oidnamespace(Oid funcoid); - -#define SET_FUNCOID() do { \ - if ( fcinfo->flinfo && fcinfo->flinfo->fn_oid != InvalidOid ) \ - TSNSP_FunctionOid = fcinfo->flinfo->fn_oid; \ -} while(0) - -#endif diff --git a/contrib/tsearch2/crc32.c b/contrib/tsearch2/crc32.c deleted file mode 100644 index bc5c7605dd..0000000000 --- a/contrib/tsearch2/crc32.c +++ /dev/null @@ -1,105 +0,0 @@ -/* Both POSIX and CRC32 checksums */ - -/* $PostgreSQL: pgsql/contrib/tsearch2/crc32.c,v 1.4 2007/07/15 22:40:28 tgl Exp $ */ - -#include <sys/types.h> -#include <stdio.h> -#include <sys/types.h> - -#include "crc32.h" - -/* - * This code implements the AUTODIN II polynomial - * The variable corresponding to the macro argument "crc" should - * be an unsigned long. - * Oroginal code by Spencer Garrett <srg@quick.com> - */ - -#define _CRC32_(crc, ch) ((crc) = ((crc) >> 8) ^ crc32tab[((crc) ^ (ch)) & 0xff]) - -/* generated using the AUTODIN II polynomial - * x^32 + x^26 + x^23 + x^22 + x^16 + - * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1 - */ - -static const unsigned int crc32tab[256] = { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, - 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, - 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, - 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, - 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, - 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, - 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, - 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, - 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, - 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, - 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, - 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, - 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, - 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, - 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, - 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, - 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, - 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, - 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, - 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, - 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, - 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, - 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, - 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, - 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, - 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, - 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, - 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, - 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, - 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, - 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, - 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, - 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, - 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, - 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, -}; - -unsigned int -crc32_sz(char *buf, int size) -{ - unsigned int crc = ~((unsigned int) 0); - char *p; - int len, - nr; - - len = 0; - nr = size; - for (len += nr, p = buf; nr--; ++p) - _CRC32_(crc, *p); - return ~crc; -} diff --git a/contrib/tsearch2/crc32.h b/contrib/tsearch2/crc32.h deleted file mode 100644 index 420c9594ac..0000000000 --- a/contrib/tsearch2/crc32.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _CRC32_H -#define _CRC32_H - -/* $PostgreSQL: pgsql/contrib/tsearch2/crc32.h,v 1.2 2006/03/11 04:38:30 momjian Exp $ */ - -/* Returns crc32 of data block */ -extern unsigned int crc32_sz(char *buf, int size); - -/* Returns crc32 of null-terminated string */ -#define crc32(buf) crc32_sz((buf),strlen(buf)) - -#endif diff --git a/contrib/tsearch2/dict.c b/contrib/tsearch2/dict.c deleted file mode 100644 index 182f5c3291..0000000000 --- a/contrib/tsearch2/dict.c +++ /dev/null @@ -1,349 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.13 2006/10/04 00:29:46 momjian Exp $ */ - -/* - * interface functions to dictionary - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include <ctype.h> - -#include "catalog/pg_type.h" -#include "executor/spi.h" -#include "fmgr.h" -#include "utils/array.h" -#include "utils/memutils.h" - -#include "dict.h" -#include "common.h" -#include "snmap.h" - -/*********top interface**********/ - -void -init_dict(Oid id, DictInfo * dict) -{ - Oid arg[1]; - bool isnull; - Datum pars[1]; - int stat; - void *plan; - char buf[1024]; - char *nsp = get_namespace(TSNSP_FunctionOid); - - arg[0] = OIDOID; - pars[0] = ObjectIdGetDatum(id); - - memset(dict, 0, sizeof(DictInfo)); - SPI_connect(); - sprintf(buf, "select dict_init, dict_initoption, dict_lexize from %s.pg_ts_dict where oid = $1", nsp); - pfree(nsp); - plan = SPI_prepare(buf, 1, arg); - if (!plan) - ts_error(ERROR, "SPI_prepare() failed"); - - stat = SPI_execp(plan, pars, " ", 1); - if (stat < 0) - ts_error(ERROR, "SPI_execp return %d", stat); - if (SPI_processed > 0) - { - Datum opt; - Oid oid = InvalidOid; - - /* setup dictlexize method */ - oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull)); - if (isnull || oid == InvalidOid) - ts_error(ERROR, "Null dict_lexize for dictonary %d", id); - fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext); - - /* setup and call dictinit method, optinally */ - oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); - if (!(isnull || oid == InvalidOid)) - { - opt = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull); - dict->dictionary = (void *) DatumGetPointer(OidFunctionCall1(oid, opt)); - } - dict->dict_id = id; - } - else - ts_error(ERROR, "No dictionary with id %d", id); - SPI_freeplan(plan); - SPI_finish(); -} - -typedef struct -{ - DictInfo *last_dict; - int len; - int reallen; - DictInfo *list; - SNMap name2id_map; -} DictList; - -static DictList DList = {NULL, 0, 0, NULL, {0, 0, NULL}}; - -void -reset_dict(void) -{ - freeSNMap(&(DList.name2id_map)); - /* XXX need to free DList.list[*].dictionary */ - if (DList.list) - free(DList.list); - memset(&DList, 0, sizeof(DictList)); -} - - -static int -comparedict(const void *a, const void *b) -{ - if (((DictInfo *) a)->dict_id == ((DictInfo *) b)->dict_id) - return 0; - return (((DictInfo *) a)->dict_id < ((DictInfo *) b)->dict_id) ? -1 : 1; -} - -static void -insertdict(Oid id) -{ - DictInfo newdict; - - if (DList.len == DList.reallen) - { - DictInfo *tmp; - int reallen = (DList.reallen) ? 2 * DList.reallen : 16; - - tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen); - if (!tmp) - ts_error(ERROR, "No memory"); - DList.reallen = reallen; - DList.list = tmp; - } - init_dict(id, &newdict); - - DList.list[DList.len] = newdict; - DList.len++; - - qsort(DList.list, DList.len, sizeof(DictInfo), comparedict); -} - -DictInfo * -finddict(Oid id) -{ - /* last used dict */ - if (DList.last_dict && DList.last_dict->dict_id == id) - return DList.last_dict; - - - /* already used dict */ - if (DList.len != 0) - { - DictInfo key; - - key.dict_id = id; - DList.last_dict = bsearch(&key, DList.list, DList.len, sizeof(DictInfo), comparedict); - if (DList.last_dict != NULL) - return DList.last_dict; - } - - /* insert new dictionary */ - insertdict(id); - return finddict(id); /* qsort changed order!! */ ; -} - -Oid -name2id_dict(text *name) -{ - Oid arg[1]; - bool isnull; - Datum pars[1]; - int stat; - Oid id = findSNMap_t(&(DList.name2id_map), name); - void *plan; - char buf[1024], - *nsp; - - arg[0] = TEXTOID; - pars[0] = PointerGetDatum(name); - - if (id) - return id; - - nsp = get_namespace(TSNSP_FunctionOid); - SPI_connect(); - sprintf(buf, "select oid from %s.pg_ts_dict where dict_name = $1", nsp); - pfree(nsp); - plan = SPI_prepare(buf, 1, arg); - if (!plan) - ts_error(ERROR, "SPI_prepare() failed"); - - stat = SPI_execp(plan, pars, " ", 1); - if (stat < 0) - ts_error(ERROR, "SPI_execp return %d", stat); - if (SPI_processed > 0) - id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); - else - ts_error(ERROR, "No dictionary with name '%s'", text2char(name)); - SPI_freeplan(plan); - SPI_finish(); - addSNMap_t(&(DList.name2id_map), name, id); - return id; -} - - -/******sql-level interface******/ -PG_FUNCTION_INFO_V1(lexize); -Datum lexize(PG_FUNCTION_ARGS); - -Datum -lexize(PG_FUNCTION_ARGS) -{ - text *in = PG_GETARG_TEXT_P(1); - DictInfo *dict; - TSLexeme *res, - *ptr; - Datum *da; - ArrayType *a; - DictSubState dstate = {false, false, NULL}; - - SET_FUNCOID(); - dict = finddict(PG_GETARG_OID(0)); - - ptr = res = (TSLexeme *) DatumGetPointer( - FunctionCall4(&(dict->lexize_info), - PointerGetDatum(dict->dictionary), - PointerGetDatum(VARDATA(in)), - Int32GetDatum(VARSIZE(in) - VARHDRSZ), - PointerGetDatum(&dstate) - ) - ); - - if (dstate.getnext) - { - dstate.isend = true; - ptr = res = (TSLexeme *) DatumGetPointer( - FunctionCall4(&(dict->lexize_info), - PointerGetDatum(dict->dictionary), - PointerGetDatum(VARDATA(in)), - Int32GetDatum(VARSIZE(in) - VARHDRSZ), - PointerGetDatum(&dstate) - ) - ); - } - - PG_FREE_IF_COPY(in, 1); - if (!res) - { - if (PG_NARGS() > 2) - PG_RETURN_POINTER(NULL); - else - PG_RETURN_NULL(); - } - - while (ptr->lexeme) - ptr++; - da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1)); - ptr = res; - while (ptr->lexeme) - { - da[ptr - res] = PointerGetDatum(char2text(ptr->lexeme)); - ptr++; - } - - a = construct_array( - da, - ptr - res, - TEXTOID, - -1, - false, - 'i' - ); - - ptr = res; - while (ptr->lexeme) - { - pfree(DatumGetPointer(da[ptr - res])); - pfree(ptr->lexeme); - ptr++; - } - pfree(res); - pfree(da); - - PG_RETURN_POINTER(a); -} - -PG_FUNCTION_INFO_V1(lexize_byname); -Datum lexize_byname(PG_FUNCTION_ARGS); -Datum -lexize_byname(PG_FUNCTION_ARGS) -{ - text *dictname = PG_GETARG_TEXT_P(0); - Datum res; - - SET_FUNCOID(); - - res = DirectFunctionCall3( - lexize, - ObjectIdGetDatum(name2id_dict(dictname)), - PG_GETARG_DATUM(1), - (Datum) 0 - ); - PG_FREE_IF_COPY(dictname, 0); - if (res) - PG_RETURN_DATUM(res); - else - PG_RETURN_NULL(); -} - -static Oid currect_dictionary_id = 0; - -PG_FUNCTION_INFO_V1(set_curdict); -Datum set_curdict(PG_FUNCTION_ARGS); -Datum -set_curdict(PG_FUNCTION_ARGS) -{ - SET_FUNCOID(); - finddict(PG_GETARG_OID(0)); - currect_dictionary_id = PG_GETARG_OID(0); - PG_RETURN_VOID(); -} - -PG_FUNCTION_INFO_V1(set_curdict_byname); -Datum set_curdict_byname(PG_FUNCTION_ARGS); -Datum -set_curdict_byname(PG_FUNCTION_ARGS) -{ - text *dictname = PG_GETARG_TEXT_P(0); - - SET_FUNCOID(); - DirectFunctionCall1( - set_curdict, - ObjectIdGetDatum(name2id_dict(dictname)) - ); - PG_FREE_IF_COPY(dictname, 0); - PG_RETURN_VOID(); -} - -PG_FUNCTION_INFO_V1(lexize_bycurrent); -Datum lexize_bycurrent(PG_FUNCTION_ARGS); -Datum -lexize_bycurrent(PG_FUNCTION_ARGS) -{ - Datum res; - - SET_FUNCOID(); - if (currect_dictionary_id == 0) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("no currect dictionary"), - errhint("Execute select set_curdict()."))); - - res = DirectFunctionCall3( - lexize, - ObjectIdGetDatum(currect_dictionary_id), - PG_GETARG_DATUM(0), - (Datum) 0 - ); - if (res) - PG_RETURN_DATUM(res); - else - PG_RETURN_NULL(); -} diff --git a/contrib/tsearch2/dict.h b/contrib/tsearch2/dict.h deleted file mode 100644 index 78d5111f14..0000000000 --- a/contrib/tsearch2/dict.h +++ /dev/null @@ -1,114 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.8 2006/10/04 00:29:46 momjian Exp $ */ - -#ifndef __DICT_H__ -#define __DICT_H__ -#include "postgres.h" -#include "fmgr.h" -#include "ts_cfg.h" - -typedef struct -{ - int len; - char **stop; - char *(*wordop) (char *); -} StopList; - -void sortstoplist(StopList * s); -void freestoplist(StopList * s); -void readstoplist(text *in, StopList * s); -bool searchstoplist(StopList * s, char *key); - -typedef struct -{ - Oid dict_id; - FmgrInfo lexize_info; - void *dictionary; -} DictInfo; - -void init_dict(Oid id, DictInfo * dict); -DictInfo *finddict(Oid id); -Oid name2id_dict(text *name); -void reset_dict(void); - -typedef struct -{ - bool isend; /* in: marks for lexize_info about text end is - * reached */ - bool getnext; /* out: dict wants next lexeme */ - void *private; /* internal dict state between calls with - * getnext == true */ -} DictSubState; - -/* simple parser of cfg string */ -typedef struct -{ - char *key; - char *value; -} Map; - -void parse_cfgdict(text *in, Map ** m); - -/* return struct for any lexize function */ -typedef struct -{ - /* - * number of variant of split word , for example Word 'fotballklubber' - * (norwegian) has two varian to split: ( fotball, klubb ) and ( fot, - * ball, klubb ). So, dictionary should return: nvariant lexeme 1 - * fotball 1 klubb 2 fot 2 ball 2 klubb - */ - uint16 nvariant; - - uint16 flags; - - /* C-string */ - char *lexeme; -} TSLexeme; - -#define TSL_ADDPOS 0x01 - - -/* - * Lexize subsystem - */ - -typedef struct ParsedLex -{ - int type; - char *lemm; - int lenlemm; - bool resfollow; - struct ParsedLex *next; -} ParsedLex; - -typedef struct ListParsedLex -{ - ParsedLex *head; - ParsedLex *tail; -} ListParsedLex; - -typedef struct -{ - TSCfgInfo *cfg; - Oid curDictId; - int posDict; - DictSubState dictState; - ParsedLex *curSub; - ListParsedLex towork; /* current list to work */ - ListParsedLex waste; /* list of lexemes that already lexized */ - - /* - * fields to store last variant to lexize (basically, thesaurus or similar - * to, which wants several lexemes - */ - - ParsedLex *lastRes; - TSLexeme *tmpRes; -} LexizeData; - - -void LexizeInit(LexizeData * ld, TSCfgInfo * cfg); -void LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm); -TSLexeme *LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem); - -#endif diff --git a/contrib/tsearch2/dict_ex.c b/contrib/tsearch2/dict_ex.c deleted file mode 100644 index 2fd5cbb700..0000000000 --- a/contrib/tsearch2/dict_ex.c +++ /dev/null @@ -1,70 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/dict_ex.c,v 1.9 2006/11/20 14:03:30 teodor Exp $ */ - -/* - * example of dictionary - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include "dict.h" -#include "common.h" -#include "ts_locale.h" - -typedef struct -{ - StopList stoplist; -} DictExample; - - -PG_FUNCTION_INFO_V1(dex_init); -Datum dex_init(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(dex_lexize); -Datum dex_lexize(PG_FUNCTION_ARGS); - -Datum -dex_init(PG_FUNCTION_ARGS) -{ - DictExample *d = (DictExample *) malloc(sizeof(DictExample)); - - if (!d) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - memset(d, 0, sizeof(DictExample)); - - d->stoplist.wordop = lowerstr; - - if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL) - { - text *in = PG_GETARG_TEXT_P(0); - - readstoplist(in, &(d->stoplist)); - sortstoplist(&(d->stoplist)); - PG_FREE_IF_COPY(in, 0); - } - - PG_RETURN_POINTER(d); -} - -Datum -dex_lexize(PG_FUNCTION_ARGS) -{ - DictExample *d = (DictExample *) PG_GETARG_POINTER(0); - char *in = (char *) PG_GETARG_POINTER(1); - char *utxt = pnstrdup(in, PG_GETARG_INT32(2)); - TSLexeme *res = palloc(sizeof(TSLexeme) * 2); - char *txt = lowerstr(utxt); - - pfree(utxt); - memset(res, 0, sizeof(TSLexeme) * 2); - - if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) - { - pfree(txt); - } - else - res[0].lexeme = txt; - - PG_RETURN_POINTER(res); -} diff --git a/contrib/tsearch2/dict_ispell.c b/contrib/tsearch2/dict_ispell.c deleted file mode 100644 index 301b19d592..0000000000 --- a/contrib/tsearch2/dict_ispell.c +++ /dev/null @@ -1,196 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/dict_ispell.c,v 1.10 2006/03/11 04:38:30 momjian Exp $ */ - -/* - * ISpell interface - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include <ctype.h> - -#include "dict.h" -#include "common.h" -#include "ispell/spell.h" -#include "ts_locale.h" - -typedef struct -{ - StopList stoplist; - IspellDict obj; -} DictISpell; - -PG_FUNCTION_INFO_V1(spell_init); -Datum spell_init(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(spell_lexize); -Datum spell_lexize(PG_FUNCTION_ARGS); - -static void -freeDictISpell(DictISpell * d) -{ - NIFree(&(d->obj)); - freestoplist(&(d->stoplist)); - free(d); -} - -Datum -spell_init(PG_FUNCTION_ARGS) -{ - DictISpell *d; - Map *cfg, - *pcfg; - text *in; - bool affloaded = false, - dictloaded = false, - stoploaded = false; - - if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) - ereport(ERROR, - (errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("ISpell confguration error"))); - - d = (DictISpell *) malloc(sizeof(DictISpell)); - if (!d) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - memset(d, 0, sizeof(DictISpell)); - d->stoplist.wordop = lowerstr; - - in = PG_GETARG_TEXT_P(0); - parse_cfgdict(in, &cfg); - PG_FREE_IF_COPY(in, 0); - pcfg = cfg; - while (pcfg->key) - { - if (pg_strcasecmp("DictFile", pcfg->key) == 0) - { - if (dictloaded) - { - freeDictISpell(d); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("dictionary already loaded"))); - } - if (NIImportDictionary(&(d->obj), pcfg->value)) - { - freeDictISpell(d); - ereport(ERROR, - (errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("could not load dictionary file \"%s\"", - pcfg->value))); - } - dictloaded = true; - } - else if (pg_strcasecmp("AffFile", pcfg->key) == 0) - { - if (affloaded) - { - freeDictISpell(d); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("affixes already loaded"))); - } - if (NIImportAffixes(&(d->obj), pcfg->value)) - { - freeDictISpell(d); - ereport(ERROR, - (errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("could not load affix file \"%s\"", - pcfg->value))); - } - affloaded = true; - } - else if (pg_strcasecmp("StopFile", pcfg->key) == 0) - { - text *tmp = char2text(pcfg->value); - - if (stoploaded) - { - freeDictISpell(d); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("stop words already loaded"))); - } - readstoplist(tmp, &(d->stoplist)); - sortstoplist(&(d->stoplist)); - pfree(tmp); - stoploaded = true; - } - else - { - freeDictISpell(d); - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unrecognized option: %s => %s", - pcfg->key, pcfg->value))); - } - pfree(pcfg->key); - pfree(pcfg->value); - pcfg++; - } - pfree(cfg); - - if (affloaded && dictloaded) - { - NISortDictionary(&(d->obj)); - NISortAffixes(&(d->obj)); - } - else if (!affloaded) - { - freeDictISpell(d); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("no affixes"))); - } - else - { - freeDictISpell(d); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("no dictionary"))); - } - - PG_RETURN_POINTER(d); -} - -Datum -spell_lexize(PG_FUNCTION_ARGS) -{ - DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); - char *in = (char *) PG_GETARG_POINTER(1); - char *txt; - TSLexeme *res; - TSLexeme *ptr, - *cptr; - - if (!PG_GETARG_INT32(2)) - PG_RETURN_POINTER(NULL); - - txt = pnstrdup(in, PG_GETARG_INT32(2)); - res = NINormalizeWord(&(d->obj), txt); - pfree(txt); - - if (res == NULL) - PG_RETURN_POINTER(NULL); - - ptr = cptr = res; - while (ptr->lexeme) - { - if (searchstoplist(&(d->stoplist), ptr->lexeme)) - { - pfree(ptr->lexeme); - ptr->lexeme = NULL; - ptr++; - } - else - { - memcpy(cptr, ptr, sizeof(TSLexeme)); - cptr++; - ptr++; - } - } - cptr->lexeme = NULL; - - PG_RETURN_POINTER(res); -} diff --git a/contrib/tsearch2/dict_snowball.c b/contrib/tsearch2/dict_snowball.c deleted file mode 100644 index 6667744824..0000000000 --- a/contrib/tsearch2/dict_snowball.c +++ /dev/null @@ -1,169 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/dict_snowball.c,v 1.13 2006/11/20 14:03:30 teodor Exp $ */ - -/* - * example of Snowball dictionary - * http://snowball.tartarus.org/ - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include "dict.h" -#include "common.h" -#include "snowball/english_stem.h" -#include "snowball/header.h" -#include "snowball/russian_stem.h" -#include "snowball/russian_stem_UTF8.h" -#include "ts_locale.h" - -typedef struct -{ - struct SN_env *z; - StopList stoplist; - int (*stem) (struct SN_env * z); -} DictSnowball; - - -PG_FUNCTION_INFO_V1(snb_en_init); -Datum snb_en_init(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(snb_ru_init_koi8); -Datum snb_ru_init_koi8(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(snb_ru_init_utf8); -Datum snb_ru_init_utf8(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(snb_lexize); -Datum snb_lexize(PG_FUNCTION_ARGS); - -Datum -snb_en_init(PG_FUNCTION_ARGS) -{ - DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball)); - - if (!d) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - memset(d, 0, sizeof(DictSnowball)); - d->stoplist.wordop = lowerstr; - - if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL) - { - text *in = PG_GETARG_TEXT_P(0); - - readstoplist(in, &(d->stoplist)); - sortstoplist(&(d->stoplist)); - PG_FREE_IF_COPY(in, 0); - } - - d->z = english_ISO_8859_1_create_env(); - if (!d->z) - { - freestoplist(&(d->stoplist)); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - d->stem = english_ISO_8859_1_stem; - - PG_RETURN_POINTER(d); -} - -Datum -snb_ru_init_koi8(PG_FUNCTION_ARGS) -{ - DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball)); - - if (!d) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - memset(d, 0, sizeof(DictSnowball)); - d->stoplist.wordop = lowerstr; - - if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL) - { - text *in = PG_GETARG_TEXT_P(0); - - readstoplist(in, &(d->stoplist)); - sortstoplist(&(d->stoplist)); - PG_FREE_IF_COPY(in, 0); - } - - d->z = russian_KOI8_R_create_env(); - if (!d->z) - { - freestoplist(&(d->stoplist)); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - d->stem = russian_KOI8_R_stem; - - PG_RETURN_POINTER(d); -} - -Datum -snb_ru_init_utf8(PG_FUNCTION_ARGS) -{ - DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball)); - - if (!d) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - memset(d, 0, sizeof(DictSnowball)); - d->stoplist.wordop = lowerstr; - - if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL) - { - text *in = PG_GETARG_TEXT_P(0); - - readstoplist(in, &(d->stoplist)); - sortstoplist(&(d->stoplist)); - PG_FREE_IF_COPY(in, 0); - } - - d->z = russian_UTF_8_create_env(); - if (!d->z) - { - freestoplist(&(d->stoplist)); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - d->stem = russian_UTF_8_stem; - - PG_RETURN_POINTER(d); -} - -Datum -snb_lexize(PG_FUNCTION_ARGS) -{ - DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0); - char *in = (char *) PG_GETARG_POINTER(1); - char *utxt = pnstrdup(in, PG_GETARG_INT32(2)); - TSLexeme *res = palloc(sizeof(TSLexeme) * 2); - char *txt = lowerstr(utxt); - - pfree(utxt); - memset(res, 0, sizeof(TSLexeme) * 2); - if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) - { - pfree(txt); - } - else - { - SN_set_current(d->z, strlen(txt), (symbol *) txt); - (d->stem) (d->z); - if (d->z->p && d->z->l) - { - txt = repalloc(txt, d->z->l + 1); - memcpy(txt, d->z->p, d->z->l); - txt[d->z->l] = '\0'; - } - res->lexeme = txt; - } - - PG_RETURN_POINTER(res); -} diff --git a/contrib/tsearch2/dict_syn.c b/contrib/tsearch2/dict_syn.c deleted file mode 100644 index c76208eccb..0000000000 --- a/contrib/tsearch2/dict_syn.c +++ /dev/null @@ -1,185 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/dict_syn.c,v 1.14 2007/03/28 01:28:34 tgl Exp $ */ - -/* - * ISpell interface - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include <ctype.h> - -#include "dict.h" -#include "common.h" -#include "ts_locale.h" - -#define SYNBUFLEN 4096 -typedef struct -{ - char *in; - char *out; -} Syn; - -typedef struct -{ - int len; - Syn *syn; -} DictSyn; - -PG_FUNCTION_INFO_V1(syn_init); -Datum syn_init(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(syn_lexize); -Datum syn_lexize(PG_FUNCTION_ARGS); - -static char * -findwrd(char *in, char **end) -{ - char *start; - - *end = NULL; - while (*in && isspace((unsigned char) *in)) - in++; - - if (*in=='\0') - return NULL; - start = in; - - while (*in && !isspace((unsigned char) *in)) - in++; - - *end = in; - return start; -} - -static int -compareSyn(const void *a, const void *b) -{ - return strcmp(((Syn *) a)->in, ((Syn *) b)->in); -} - - -Datum -syn_init(PG_FUNCTION_ARGS) -{ - text *in; - DictSyn *d; - int cur = 0; - FILE *fin; - char *filename; - char buf[SYNBUFLEN]; - char *starti, - *starto, - *end = NULL; - int slen; - - if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("NULL config"))); - - in = PG_GETARG_TEXT_P(0); - if (VARSIZE(in) - VARHDRSZ == 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("VOID config"))); - - filename = text2char(in); - PG_FREE_IF_COPY(in, 0); - if ((fin = fopen(filename, "r")) == NULL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", - filename))); - - d = (DictSyn *) malloc(sizeof(DictSyn)); - if (!d) - { - fclose(fin); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - memset(d, 0, sizeof(DictSyn)); - - while (fgets(buf, sizeof(buf), fin)) - { - slen = strlen(buf); - pg_verifymbstr(buf, slen, false); - if (cur == d->len) - { - d->len = (d->len) ? 2 * d->len : 16; - d->syn = (Syn *) realloc(d->syn, sizeof(Syn) * d->len); - if (!d->syn) - { - fclose(fin); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - } - - starti = findwrd(buf, &end); - if (!starti) - continue; - *end = '\0'; - if (end >= buf + slen) - continue; - - starto = findwrd(end + 1, &end); - if (!starto) - continue; - *end = '\0'; - - d->syn[cur].in = strdup(lowerstr(starti)); - d->syn[cur].out = strdup(lowerstr(starto)); - if (!(d->syn[cur].in && d->syn[cur].out)) - { - fclose(fin); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - - cur++; - } - - fclose(fin); - - d->len = cur; - if (cur > 1) - qsort(d->syn, d->len, sizeof(Syn), compareSyn); - - pfree(filename); - PG_RETURN_POINTER(d); -} - -Datum -syn_lexize(PG_FUNCTION_ARGS) -{ - DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0); - char *in = (char *) PG_GETARG_POINTER(1); - Syn key, - *found; - TSLexeme *res = NULL; - char *wrd; - - if (!PG_GETARG_INT32(2)) - PG_RETURN_POINTER(NULL); - - key.out = NULL; - wrd = pnstrdup(in, PG_GETARG_INT32(2)); - key.in = lowerstr(wrd); - pfree(wrd); - - found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn); - pfree(key.in); - - if (!found) - PG_RETURN_POINTER(NULL); - - res = palloc(sizeof(TSLexeme) * 2); - memset(res, 0, sizeof(TSLexeme) * 2); - res[0].lexeme = pstrdup(found->out); - - PG_RETURN_POINTER(res); -} diff --git a/contrib/tsearch2/dict_thesaurus.c b/contrib/tsearch2/dict_thesaurus.c deleted file mode 100644 index cb5d9cbb78..0000000000 --- a/contrib/tsearch2/dict_thesaurus.c +++ /dev/null @@ -1,921 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.9 2007/07/15 22:57:48 tgl Exp $ */ - -/* - * thesaurus - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" -#include "executor/spi.h" - -#include <ctype.h> - -#include "dict.h" -#include "common.h" -#include "ts_locale.h" - -/* - * Temporay we use TSLexeme.flags for inner use... - */ -#define DT_USEASIS 0x1000 - -typedef struct LexemeInfo -{ - uint16 idsubst; /* entry's number in DictThesaurus->subst */ - uint16 posinsubst; /* pos info in entry */ - uint16 tnvariant; /* total num lexemes in one variant */ - struct LexemeInfo *nextentry; - struct LexemeInfo *nextvariant; -} LexemeInfo; - -typedef struct -{ - char *lexeme; - LexemeInfo *entries; -} TheLexeme; - -typedef struct -{ - uint16 lastlexeme; /* number lexemes to substitute */ - uint16 reslen; - TSLexeme *res; /* prepared substituted result */ -} TheSubstitute; - -typedef struct -{ - /* subdictionary to normalize lexemes */ - DictInfo subdict; - - /* Array to search lexeme by exact match */ - TheLexeme *wrds; - int nwrds; - int ntwrds; - - /* - * Storage of substituted result, n-th element is for n-th expression - */ - TheSubstitute *subst; - int nsubst; -} DictThesaurus; - -PG_FUNCTION_INFO_V1(thesaurus_init); -Datum thesaurus_init(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(thesaurus_lexize); -Datum thesaurus_lexize(PG_FUNCTION_ARGS); - -static void -freeDictThesaurus(DictThesaurus * d) -{ - free(d); -} - -static void -newLexeme(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 posinsubst) -{ - TheLexeme *ptr; - - if (d->nwrds >= d->ntwrds) - { - if (d->ntwrds == 0) - { - d->ntwrds = 16; - d->wrds = (TheLexeme *) malloc(sizeof(TheLexeme) * d->ntwrds); - } - else - { - d->ntwrds *= 2; - d->wrds = (TheLexeme *) realloc(d->wrds, sizeof(TheLexeme) * d->ntwrds); - } - if (!d->wrds) - elog(ERROR, "Out of memory"); - } - - ptr = d->wrds + d->nwrds; - d->nwrds++; - - if ((ptr->lexeme = malloc(e - b + 1)) == NULL) - elog(ERROR, "Out of memory"); - - memcpy(ptr->lexeme, b, e - b); - ptr->lexeme[e - b] = '\0'; - - if ((ptr->entries = (LexemeInfo *) malloc(sizeof(LexemeInfo))) == NULL) - elog(ERROR, "Out of memory"); - - ptr->entries->nextentry = NULL; - ptr->entries->idsubst = idsubst; - ptr->entries->posinsubst = posinsubst; -} - -static void -addWrd(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis) -{ - static int nres = 0; - static int ntres = 0; - TheSubstitute *ptr; - - if (nwrd == 0) - { - nres = ntres = 0; - - if (idsubst >= d->nsubst) - { - if (d->nsubst == 0) - { - d->nsubst = 16; - d->subst = (TheSubstitute *) malloc(sizeof(TheSubstitute) * d->nsubst); - } - else - { - d->nsubst *= 2; - d->subst = (TheSubstitute *) realloc(d->subst, sizeof(TheSubstitute) * d->nsubst); - } - if (!d->subst) - elog(ERROR, "Out of memory"); - } - } - - ptr = d->subst + idsubst; - - ptr->lastlexeme = posinsubst - 1; - - if (nres + 1 >= ntres) - { - if (ntres == 0) - { - ntres = 2; - ptr->res = (TSLexeme *) malloc(sizeof(TSLexeme) * ntres); - } - else - { - ntres *= 2; - ptr->res = (TSLexeme *) realloc(ptr->res, sizeof(TSLexeme) * ntres); - } - - if (!ptr->res) - elog(ERROR, "Out of memory"); - } - - if ((ptr->res[nres].lexeme = malloc(e - b + 1)) == 0) - elog(ERROR, "Out of memory"); - memcpy(ptr->res[nres].lexeme, b, e - b); - ptr->res[nres].lexeme[e - b] = '\0'; - - ptr->res[nres].nvariant = nwrd; - if (useasis) - ptr->res[nres].flags = DT_USEASIS; - else - ptr->res[nres].flags = 0; - - ptr->res[++nres].lexeme = NULL; -} - -#define TR_WAITLEX 1 -#define TR_INLEX 2 -#define TR_WAITSUBS 3 -#define TR_INSUBS 4 - -static void -thesaurusRead(char *filename, DictThesaurus * d) -{ - FILE *fh; - char str[BUFSIZ]; - int lineno = 0; - uint16 idsubst = 0; - bool useasis = false; - - fh = fopen(to_absfilename(filename), "r"); - if (!fh) - elog(ERROR, "Thesaurus: cannot open '%s' file", filename); - - while (fgets(str, sizeof(str), fh)) - { - char *ptr = str; - int state = TR_WAITLEX; - char *beginwrd = NULL; - uint16 posinsubst = 0; - uint16 nwrd = 0; - - lineno++; - - /* is it comment ? */ - while (t_isspace(ptr)) - ptr += pg_mblen(ptr); - if (t_iseq(str, '#') || *str == '\0' || t_iseq(str, '\n') || t_iseq(str, '\r')) - continue; - - pg_verifymbstr(ptr, strlen(ptr), false); - while (*ptr) - { - if (state == TR_WAITLEX) - { - if (t_iseq(ptr, ':')) - { - if (posinsubst == 0) - { - fclose(fh); - elog(ERROR, "Thesaurus: Unexpected delimiter at %d line", lineno); - } - state = TR_WAITSUBS; - } - else if (!t_isspace(ptr)) - { - beginwrd = ptr; - state = TR_INLEX; - } - } - else if (state == TR_INLEX) - { - if (t_iseq(ptr, ':')) - { - newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); - state = TR_WAITSUBS; - } - else if (t_isspace(ptr)) - { - newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); - state = TR_WAITLEX; - } - } - else if (state == TR_WAITSUBS) - { - if (t_iseq(ptr, '*')) - { - useasis = true; - state = TR_INSUBS; - beginwrd = ptr + pg_mblen(ptr); - } - else if (t_iseq(ptr, '\\')) - { - useasis = false; - state = TR_INSUBS; - beginwrd = ptr + pg_mblen(ptr); - } - else if (!t_isspace(ptr)) - { - useasis = false; - beginwrd = ptr; - state = TR_INSUBS; - } - } - else if (state == TR_INSUBS) - { - if (t_isspace(ptr)) - { - if (ptr == beginwrd) - elog(ERROR, "Thesaurus: Unexpected end of line or lexeme at %d line", lineno); - addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); - state = TR_WAITSUBS; - } - } - else - elog(ERROR, "Thesaurus: Unknown state: %d", state); - - ptr += pg_mblen(ptr); - } - - if (state == TR_INSUBS) - { - if (ptr == beginwrd) - elog(ERROR, "Thesaurus: Unexpected end of line or lexeme at %d line", lineno); - addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); - } - - idsubst++; - - if (!(nwrd && posinsubst)) - { - fclose(fh); - elog(ERROR, "Thesaurus: Unexpected end of line at %d line", lineno); - } - - } - - d->nsubst = idsubst; - - fclose(fh); -} - -static TheLexeme * -addCompiledLexeme(TheLexeme * newwrds, int *nnw, int *tnm, TSLexeme * lexeme, LexemeInfo * src, uint16 tnvariant) -{ - - if (*nnw >= *tnm) - { - *tnm *= 2; - newwrds = (TheLexeme *) realloc(newwrds, sizeof(TheLexeme) * *tnm); - if (!newwrds) - elog(ERROR, "Out of memory"); - } - - newwrds[*nnw].entries = (LexemeInfo *) malloc(sizeof(LexemeInfo)); - if (!newwrds[*nnw].entries) - elog(ERROR, "Out of memory"); - - if (lexeme && lexeme->lexeme) - { - newwrds[*nnw].lexeme = strdup(lexeme->lexeme); - if (!newwrds[*nnw].lexeme) - elog(ERROR, "Out of memory"); - - newwrds[*nnw].entries->tnvariant = tnvariant; - } - else - { - newwrds[*nnw].lexeme = NULL; - newwrds[*nnw].entries->tnvariant = 1; - } - - newwrds[*nnw].entries->idsubst = src->idsubst; - newwrds[*nnw].entries->posinsubst = src->posinsubst; - - newwrds[*nnw].entries->nextentry = NULL; - - (*nnw)++; - return newwrds; -} - -static int -cmpLexemeInfo(LexemeInfo * a, LexemeInfo * b) -{ - if (a == NULL || b == NULL) - return 0; - - if (a->idsubst == b->idsubst) - { - if (a->posinsubst == b->posinsubst) - { - if (a->tnvariant == b->tnvariant) - return 0; - - return (a->tnvariant > b->tnvariant) ? 1 : -1; - } - - return (a->posinsubst > b->posinsubst) ? 1 : -1; - } - - return (a->idsubst > b->idsubst) ? 1 : -1; -} - -static int -cmpLexeme(TheLexeme * a, TheLexeme * b) -{ - if (a->lexeme == NULL) - { - if (b->lexeme == NULL) - return 0; - else - return 1; - } - else if (b->lexeme == NULL) - return -1; - - return strcmp(a->lexeme, b->lexeme); -} - -static int -cmpLexemeQ(const void *a, const void *b) -{ - return cmpLexeme((TheLexeme *) a, (TheLexeme *) b); -} - -static int -cmpTheLexeme(const void *a, const void *b) -{ - TheLexeme *la = (TheLexeme *) a; - TheLexeme *lb = (TheLexeme *) b; - int res; - - if ((res = cmpLexeme(la, lb)) != 0) - return res; - - return -cmpLexemeInfo(la->entries, lb->entries); -} - -static void -compileTheLexeme(DictThesaurus * d) -{ - int i, - nnw = 0, - tnm = 16; - TheLexeme *newwrds = (TheLexeme *) malloc(sizeof(TheLexeme) * tnm), - *ptrwrds; - - if (!newwrds) - elog(ERROR, "Out of memory"); - - for (i = 0; i < d->nwrds; i++) - { - TSLexeme *ptr; - - ptr = (TSLexeme *) DatumGetPointer( - FunctionCall4( - &(d->subdict.lexize_info), - PointerGetDatum(d->subdict.dictionary), - PointerGetDatum(d->wrds[i].lexeme), - Int32GetDatum(strlen(d->wrds[i].lexeme)), - PointerGetDatum(NULL) - ) - ); - - if (!(ptr && ptr->lexeme)) - { - if (!ptr) - elog(ERROR, "Thesaurus: word-sample '%s' isn't recognized by subdictionary (rule %d)", - d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1); - else - elog(NOTICE, "Thesaurus: word-sample '%s' is recognized as stop-word, assign any stop-word (rule %d)", - d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1); - - newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0); - } - else - { - while (ptr->lexeme) - { - TSLexeme *remptr = ptr + 1; - int tnvar = 1; - int curvar = ptr->nvariant; - - /* compute n words in one variant */ - while (remptr->lexeme) - { - if (remptr->nvariant != (remptr - 1)->nvariant) - break; - tnvar++; - remptr++; - } - - remptr = ptr; - while (remptr->lexeme && remptr->nvariant == curvar) - { - newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar); - remptr++; - } - - ptr = remptr; - } - } - - free(d->wrds[i].lexeme); - free(d->wrds[i].entries); - } - - free(d->wrds); - d->wrds = newwrds; - d->nwrds = nnw; - d->ntwrds = tnm; - - if (d->nwrds > 1) - { - qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme); - - /* uniq */ - newwrds = d->wrds; - ptrwrds = d->wrds + 1; - while (ptrwrds - d->wrds < d->nwrds) - { - if (cmpLexeme(ptrwrds, newwrds) == 0) - { - if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries)) - { - ptrwrds->entries->nextentry = newwrds->entries; - newwrds->entries = ptrwrds->entries; - } - else - free(ptrwrds->entries); - - if (ptrwrds->lexeme) - free(ptrwrds->lexeme); - } - else - { - newwrds++; - *newwrds = *ptrwrds; - } - - ptrwrds++; - } - - d->nwrds = newwrds - d->wrds + 1; - d->wrds = (TheLexeme *) realloc(d->wrds, sizeof(TheLexeme) * d->nwrds); - } -} - -static void -compileTheSubstitute(DictThesaurus * d) -{ - int i; - - for (i = 0; i < d->nsubst; i++) - { - TSLexeme *rem = d->subst[i].res, - *outptr, - *inptr; - int n = 2; - - outptr = d->subst[i].res = (TSLexeme *) malloc(sizeof(TSLexeme) * n); - if (d->subst[i].res == NULL) - elog(ERROR, "Out of Memory"); - outptr->lexeme = NULL; - inptr = rem; - - while (inptr && inptr->lexeme) - { - TSLexeme *lexized, - tmplex[2]; - - if (inptr->flags & DT_USEASIS) - { /* do not lexize */ - tmplex[0] = *inptr; - tmplex[0].flags = 0; - tmplex[1].lexeme = NULL; - lexized = tmplex; - } - else - { - lexized = (TSLexeme *) DatumGetPointer( - FunctionCall4( - &(d->subdict.lexize_info), - PointerGetDatum(d->subdict.dictionary), - PointerGetDatum(inptr->lexeme), - Int32GetDatum(strlen(inptr->lexeme)), - PointerGetDatum(NULL) - ) - ); - } - - if (lexized && lexized->lexeme) - { - int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1; - - while (lexized->lexeme) - { - if (outptr - d->subst[i].res + 1 >= n) - { - int diff = outptr - d->subst[i].res; - - n *= 2; - d->subst[i].res = (TSLexeme *) realloc(d->subst[i].res, sizeof(TSLexeme) * n); - if (d->subst[i].res == NULL) - elog(ERROR, "Out of Memory"); - outptr = d->subst[i].res + diff; - } - - *outptr = *lexized; - if ((outptr->lexeme = strdup(lexized->lexeme)) == NULL) - elog(ERROR, "Out of Memory"); - - outptr++; - lexized++; - } - - if (toset > 0) - d->subst[i].res[toset].flags |= TSL_ADDPOS; - } - else if (lexized) - { - elog(NOTICE, "Thesaurus: word '%s' in substition is a stop-word, ignored (rule %d)", inptr->lexeme, i + 1); - } - else - { - elog(ERROR, "Thesaurus: word '%s' in substition isn't recognized (rule %d)", inptr->lexeme, i + 1); - } - - if (inptr->lexeme) - free(inptr->lexeme); - inptr++; - } - - if (outptr == d->subst[i].res) - elog(ERROR, "Thesaurus: all words in subsitution are stop word (rule %d)", i + 1); - - d->subst[i].reslen = outptr - d->subst[i].res; - - free(rem); - } -} - -Datum -thesaurus_init(PG_FUNCTION_ARGS) -{ - DictThesaurus *d; - Map *cfg, - *pcfg; - text *in, - *subdictname = NULL; - bool fileloaded = false; - - if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) - ereport(ERROR, - (errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("Thesaurus confguration error"))); - - d = (DictThesaurus *) malloc(sizeof(DictThesaurus)); - if (!d) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - memset(d, 0, sizeof(DictThesaurus)); - - in = PG_GETARG_TEXT_P(0); - parse_cfgdict(in, &cfg); - PG_FREE_IF_COPY(in, 0); - pcfg = cfg; - while (pcfg->key) - { - if (pg_strcasecmp("DictFile", pcfg->key) == 0) - { - if (fileloaded) - { - freeDictThesaurus(d); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("Thesaurus file is already loaded"))); - } - fileloaded = true; - thesaurusRead(pcfg->value, d); - } - else if (pg_strcasecmp("Dictionary", pcfg->key) == 0) - { - if (subdictname) - { - freeDictThesaurus(d); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("Thesaurus: SubDictionary is already defined"))); - } - subdictname = char2text(pcfg->value); - } - else - { - freeDictThesaurus(d); - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unrecognized option: %s => %s", - pcfg->key, pcfg->value))); - } - pfree(pcfg->key); - pfree(pcfg->value); - pcfg++; - } - pfree(cfg); - - if (!fileloaded) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("Thesaurus file isn't defined"))); - - if (subdictname) - { - DictInfo *subdictptr; - - /* - * we already in SPI, but name2id_dict()/finddict() invoke - * SPI_connect() - */ - SPI_push(); - - subdictptr = finddict(name2id_dict(subdictname)); - - SPI_pop(); - - d->subdict = *subdictptr; - } - else - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("Thesaurus: SubDictionary isn't defined"))); - - compileTheLexeme(d); - compileTheSubstitute(d); - - PG_RETURN_POINTER(d); -} - -static LexemeInfo * -findTheLexeme(DictThesaurus * d, char *lexeme) -{ - TheLexeme key, *res; - - if (d->nwrds == 0) - return NULL; - - key.lexeme = lexeme; - key.entries = NULL; - - res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ); - - if (res == NULL) - return NULL; - return res->entries; -} - -static bool -matchIdSubst(LexemeInfo * stored, uint16 idsubst) -{ - bool res = true; - - if (stored) - { - res = false; - - for (; stored; stored = stored->nextvariant) - if (stored->idsubst == idsubst) - { - res = true; - break; - } - } - - return res; -} - -static LexemeInfo * -findVariant(LexemeInfo * in, LexemeInfo * stored, uint16 curpos, LexemeInfo ** newin, int newn) -{ - for (;;) - { - int i; - LexemeInfo *ptr = newin[0]; - - for (i = 0; i < newn; i++) - { - while (newin[i] && newin[i]->idsubst < ptr->idsubst) - newin[i] = newin[i]->nextentry; - - if (newin[i] == NULL) - return in; - - if (newin[i]->idsubst > ptr->idsubst) - { - ptr = newin[i]; - i = -1; - continue; - } - - while (newin[i]->idsubst == ptr->idsubst) - { - if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn) - { - ptr = newin[i]; - break; - } - - newin[i] = newin[i]->nextentry; - if (newin[i] == NULL) - return in; - } - - if (newin[i]->idsubst != ptr->idsubst) - { - ptr = newin[i]; - i = -1; - continue; - } - } - - if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst))) - { /* found */ - - ptr->nextvariant = in; - in = ptr; - } - - /* step forward */ - for (i = 0; i < newn; i++) - newin[i] = newin[i]->nextentry; - } - - return NULL; -} - -static TSLexeme * -copyTSLexeme(TheSubstitute * ts) -{ - TSLexeme *res; - uint16 i; - - res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1)); - for (i = 0; i < ts->reslen; i++) - { - res[i] = ts->res[i]; - res[i].lexeme = pstrdup(ts->res[i].lexeme); - } - - res[ts->reslen].lexeme = NULL; - - return res; -} - -static TSLexeme * -checkMatch(DictThesaurus * d, LexemeInfo * info, uint16 curpos, bool *moreres) -{ - *moreres = false; - while (info) - { - Assert(info->idsubst < d->nsubst); - if (info->nextvariant) - *moreres = true; - if (d->subst[info->idsubst].lastlexeme == curpos) - return copyTSLexeme(d->subst + info->idsubst); - info = info->nextvariant; - } - - return NULL; -} - -Datum -thesaurus_lexize(PG_FUNCTION_ARGS) -{ - DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0); - DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3); - TSLexeme *res = NULL; - LexemeInfo *stored, - *info = NULL; - uint16 curpos = 0; - bool moreres = false; - - if (dstate == NULL || PG_NARGS() < 4) - elog(ERROR, "Forbidden call of thesaurus or nested call"); - - if (dstate->isend) - PG_RETURN_POINTER(NULL); - stored = (LexemeInfo *) dstate->private; - - if (stored) - curpos = stored->posinsubst + 1; - - res = (TSLexeme *) DatumGetPointer( - FunctionCall4( - &(d->subdict.lexize_info), - PointerGetDatum(d->subdict.dictionary), - PG_GETARG_DATUM(1), - PG_GETARG_INT32(2), - PointerGetDatum(NULL) - ) - ); - - if (res && res->lexeme) - { - TSLexeme *ptr = res, - *basevar; - - while (ptr->lexeme) - { - uint16 nv = ptr->nvariant; - uint16 i, - nlex = 0; - LexemeInfo **infos; - - basevar = ptr; - while (ptr->lexeme && nv == ptr->nvariant) - { - nlex++; - ptr++; - } - - infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex); - for (i = 0; i < nlex; i++) - if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL) - break; - - if (i < nlex) - { - /* no chance to find */ - pfree(infos); - continue; - } - - info = findVariant(info, stored, curpos, infos, nlex); - } - } - else if (res) - { /* stop-word */ - LexemeInfo *infos = findTheLexeme(d, NULL); - - info = findVariant(NULL, stored, curpos, &infos, 1); - } - else - { - info = NULL; /* word isn't recognized */ - } - - dstate->private = (void *) info; - - if (!info) - { - dstate->getnext = false; - PG_RETURN_POINTER(NULL); - } - - if ((res = checkMatch(d, info, curpos, &moreres)) != NULL) - { - dstate->getnext = moreres; - PG_RETURN_POINTER(res); - } - - dstate->getnext = true; - - PG_RETURN_POINTER(NULL); -} diff --git a/contrib/tsearch2/docs/tsearch-V2-intro.html b/contrib/tsearch2/docs/tsearch-V2-intro.html deleted file mode 100644 index 8b2514e5be..0000000000 --- a/contrib/tsearch2/docs/tsearch-V2-intro.html +++ /dev/null @@ -1,1073 +0,0 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> -<html> -<head> - <title>tsearch-v2-intro</title> -</head> -<body class="content"> -<div class="content"> -<h2>Tsearch2 - Introduction</h2> -<p><a href=" -http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch-V2-intro.html"> -[Online version]</a> of this document is available.</p> - -<p>The tsearch2 module is available to add as an extension to the -PostgreSQL database to allow for Full Text Indexing. This document -is an introduction to installing, configuring, using and -maintaining the database with the tsearch2 module activated.</p> -<p>Please, note, tsearch2 module is fully incompatible with old -tsearch, which is deprecated in 7.4 and will be obsoleted in -8.0.</p> -<h3>USING TSEARCH2 AND POSTGRESQL FOR A WEB BASED SEARCH -ENGINE</h3> -<p>This documentation is provided as a short guide on how to -quickly get up and running with tsearch2 and PostgreSQL, for those -who want to implement a full text indexed based search engine. It -is not meant to be a complete in-depth guide into the full ins and -outs of the contrib/tsearch2 module, and is primarily aimed at -beginners who want to speed up searching of large text fields, or -those migrating from other database systems such as MS-SQL.</p> -<p>The README.tsearch2 file included in the contrib/tsearch2 -directory contains a brief overview and history behind tsearch. -This can also be found online <a href=" -http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[right -here]</a>.</p> -<p>Further in depth documentation such as a full function -reference, and user guide can be found online at the <a href=" -http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/">[tsearch -documentation home]</a>.</p> -<h3>ACKNOWLEDGEMENTS</h3> - -<p>Robert John Shepherd originally wrote this documentation for the -previous version of tsearch module (v1) included with the postgres -release. I took his documentation and updated it to comply with the -tsearch2 modifications.</p> -<p>Robert's original acknowledgements:</p> -<p>"Thanks to Oleg Bartunov for taking the time to answer many of -my questions regarding this module, and also to Teodor Sigaev for -clearing up the process of making your own dictionaries. Plus of -course a big thanks to the pair of them for writing this module in -the first place!"</p> -<p>I would also like to extend my thanks to the developers, and -Oleg Bartunov for all of his direction and help with the new -features of tsearch2.</p> -<h3>OVERVIEW</h3> -<p>MS-SQL provides a full text indexing (FTI) system which enables -the fast searching of text based fields, very useful for websites -(and other applications) that require a results set based on key -words. PostgreSQL ships with a contributed module called tsearch2, -which implements a special type of index that can also be used for -full text indexing. Further more, unlike MS' offering which -requires regular incremental rebuilds of the text indexes -themselves, tsearch2 indexes are always up-to-date and keeping them -so induces very little overhead.</p> -<p>Before we get into the details, it is recommended that you have -installed and tested PostgreSQL, are reasonably familiar with -databases, the SQL query language and also understand the basics of -connecting to PostgreSQL from the local shell. This document isn't -intended for the complete PostgreSQL newbie, but anyone with a -reasonable grasp of the basics should be able to follow it.</p> -<h3>INSTALLATION</h3> -<p>Starting with PostgreSQL version 7.4 tsearch2 is now included in -the contrib directory with the PostgreSQL sources. contrib/tsearch2 -is where you will find everything needed to install and use -tsearch2. Please note that tsearch2 will also work with PostgreSQL -version 7.3.x, but it is not the module included with the source -distribution. You will have to download the module separately and -install it in the same fashion.</p> - -<p>I installed the tsearch2 module to a PostgreSQL 7.3 database -from the contrib directory without squashing the original (old) -tsearch module. What I did was move the modules tsearch src -driectory into the contrib tree under the name tsearchV2.</p> -<p>Step one is to download the tsearch V2 module :</p> -<p><a href=" -http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/]</a> -(check Development History for latest stable version !)</p> -<pre> - tar -zxvf tsearch-v2.tar.gz - mv tsearch2 $PGSQL_SRC/contrib/ - cd $PGSQL_SRC/contrib/tsearch2 -</pre> -<p>If you are installing from PostgreSQL version 7.4 or higher, you -can skip those steps and just change to the contrib/tsearch2 -directory in the source tree and continue from there.</p> -<p>As of May 9, 2004 there is a source patch available for -tsearch2. The patch provides changes to the pg_ts_ configuration -tables to allow for easy dump and restore of a database containing -tsearch2. The patch is available here : <a href=" -http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/regprocedure_7.4.patch.gz"> -[http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/regprocedure_7.4.patch.gz]</a></p> - -<p>To apply this patch, download the mentioned file and place it in -your postgreSQL source tree ($PGSQL_SRC). This patch is not -required for tsearch2 to work. I would however, highly recommend it -as it makes the backup and restore procedures very simple.</p> -<pre> - cd $PGSQL_SRC - gunzip regprocedure_7.4.patch.gz - patch -b -p1 < regprocedure_7.4.patch -</pre> -<p>If you have a working version of tsearch2 in your database, you -do not need to re-install the tsearch2 module. Just apply the patch -and run make. This patch only affects the tsearch2.sql file. You -can run the SQL script found : <a href=" -http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/regprocedure_update.sql"> -[right here]</a> This script will make the modifications found in -the patch, and update the fields from the existing data. From this -point on, you can dump and restore the database in a normal -fashion. Without this patch, you must follow the instructions later -in this document for backup and restore.</p> -<p>This patch is only needed for tsearch2 in PostgreSQL versions -7.3.x and 7.4.x. The patch has been applied to the sources for -8.0.x.</p> -<p>When you have your source tree for tsearch2 ready, you can -continue with the regular building and installation process</p> - -<pre> - gmake - gmake install - gmake installcheck -</pre> -<p>That is pretty much all you have to do, unless of course you get -errors. However if you get those, you better go check with the -mailing lists over at <a href=" -http://www.postgresql.org">http://www.postgresql.org</a> or -<a href=" -http://openfts.sourceforge.net/">http://openfts.sourceforge.net/</a> -since its never failed for me.</p> -<p>If you ever need to revert this patch, and go back to the -unpatched version of tsearch2, it is simple if you followed the -above patch command. The -b option creates a backup of the original -file, so we can just copy it back.</p> -<pre> - cd $PGSQL_SRC/contrib/tsearch2 - cp tsearch.sql.in.orig tsearch.sql.in - make - -</pre> -<p>If you need the patched version again, just follow the patch -instructions again.</p> -<p>The directory in the contib/ and the directory from the archive -is called tsearch2. Tsearch2 is completely incompatible with the -previous version of tsearch. This means that both versions can be -installed into a single database, and migration the new version may -be much easier.</p> -<p>NOTE: the previous version of tsearch found in the -contrib/tsearch directory is depricated. Although it is still -available and included within PostgreSQL version 7.4. It will be -removed in version 8.0.</p> -<h3>ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE</h3> -<p>We should create a database to use as an example for the -remainder of this file. We can call the database "ftstest". You can -create it from the command line like this:</p> -<pre> - #createdb ftstest -</pre> -<p>If you thought installation was easy, this next bit is even -easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and -type:</p> - -<pre> - psql ftstest < tsearch2.sql -</pre> -<p>The file "tsearch2.sql" holds all the wonderful little goodies -you need to do full text indexing. It defines numerous functions -and operators, and creates the needed tables in the database. There -will be 4 new tables created after running the tsearch2.sql file : -pg_ts_dict, pg_ts_parser, pg_ts_cfg, pg_ts_cfgmap are added.</p> -<p>You can check out the tables if you like:</p> -<pre> - #psql ftstest - ftstest=# \d - List of relations - Schema | Name | Type | Owner - --------+--------------+-------+---------- - public | pg_ts_cfg | table | kopciuch - public | pg_ts_cfgmap | table | kopciuch - public | pg_ts_dict | table | kopciuch - public | pg_ts_parser | table | kopciuch - (4 rows) -</pre> -<p>You may need to grant permissions to use on pg_ts_dict, pg_ts_parser, pg_ts_cfg, pg_ts_cfgmap tables to let non-superuser works with tsearch2. GRANT SELECT should be enough for search-only access. </p> -<h3>TYPES AND FUNCTIONS PROVIDED BY TSEARCH2</h3> -<p>The first thing we can do is try out some of the types that are -provided for us. Lets look at the tsvector type provided for -us:</p> - -<pre> - SELECT 'Our first string used today'::tsvector; - tsvector - --------------------------------------- - 'Our' 'used' 'first' 'today' 'string' - (1 row) -</pre> -<p>The results are the words used within our string. Notice they -are not in any particular order. The tsvector type returns a string -of space separated words.</p> -<pre> - SELECT 'Our first string used today first string'::tsvector; - tsvector - ----------------------------------------------- - 'Our' 'used' 'first' 'today' 'string' - (1 row) -</pre> -<p>Notice the results string has each unique word ('first' and -'string' only appear once in the tsvector value). Which of course -makes sense if you are searching the full text ... you only need to -know each unique word in the text.</p> -<p>Those examples were just casting a text field to that of type -tsvector. Lets check out one of the new functions created by the -tsearch2 module.</p> -<p>The function to_tsvector has 3 possible signatures:</p> -<pre> - - to_tsvector(oid, text); - to_tsvector(text, text); - to_tsvector(text); -</pre> -<p>We will use the second method using two text fields. The -overloaded methods provide us with a way to specifiy the way the -searchable text is broken up into words (Stemming process). Right -now we will specify the 'default' configuration. See the section on -TSEARCH2 CONFIGURATION to learn more about this.</p> -<pre> - SELECT to_tsvector('default', - 'Our first string used today first string'); - to_tsvector - -------------------------------------------- - 'use':4 'first':2,6 'today':5 'string':3,7 - (1 row) -</pre> -<p>The result returned from this function is of type tsvector. The -results came about by this reasoning: All of the words in the text -passed in are stemmed, or not used because they are stop words -defined in our configuration. Each lower case morphed word is -returned with all of the positons in the text.</p> -<p>In this case the word "Our" is a stop word in the default -configuration. That means it will not be included in the result. -The word "first" is found at positions 2 and 6 (although "Our" is a -stop word, it's position is maintained). The word(s) positioning is -maintained exactly as in the original string. The word "used" is -morphed to the word "use" based on the default configuration for -word stemming, and is found at position 4. The rest of the results -follow the same logic. Just a reminder again ... the order of the -'word' position in the output is not in any kind of order. (ie -'use':4 appears first)</p> -<p>If you want to view the output of the tsvector fields without -their positions, you can do so with the function -"strip(tsvector)".</p> -<pre> - SELECT strip(to_tsvector('default', - 'Our first string used today first string')); - strip - -------------------------------- - 'use' 'first' 'today' 'string' - -</pre> -<p>If you wish to know the number of unique words returned in the -tsvector you can do so by using the function "length(tsvector)"</p> -<pre> - SELECT length(to_tsvector('default', - 'Our first string used today first string')); - length - -------- - 4 - (1 row) -</pre> -<p>Lets take a look at the function to_tsquery. It also has 3 -signatures which follow the same rational as the to_tsvector -function:</p> -<pre> - to_tsquery(oid, text); - to_tsquery(text, text); - to_tsquery(text); -</pre> -<p>Lets try using the function with a single word :</p> -<pre> - SELECT to_tsquery('default', 'word'); - to_tsquery - ----------- - 'word' - (1 row) - -</pre> -<p>I call the function the same way I would a to_tsvector function, -specifying the 'default' configuration for morphing, and the result -is the stemmed output 'word'.</p> -<p>Lets attempt to use the function with a string of multiple -words:</p> -<pre> - SELECT to_tsquery('default', 'this is many words'); - ERROR: Syntax error -</pre> -<p>The function can not accept a space separated string. The -intention of the to_tsquery function is to return a type of -"tsquery" used for searching a tsvector field. What we need to do -is search for one to many words with some kind of logic (for now -simple boolean).</p> -<pre> - SELECT to_tsquery('default', 'searching|sentence'); - to_tsquery - ---------------------- - 'search' | 'sentenc' - (1 row) -</pre> -<p>Notice that the words are separated by the boolean logic "OR", -the text could contain boolean operators &,|,!,() with their -usual meaning.</p> - -<p>You can not use words defined as being a stop word in your -configuration. The function will not fail ... you will just get no -result, and a NOTICE like this:</p> -<pre> - SELECT to_tsquery('default', 'a|is&not|!the'); - NOTICE: Query contains only stopword(s) - or doesn't contain lexeme(s), ignored - to_tsquery - ----------- - (1 row) -</pre> -<p>That is a beginning to using the types, and functions defined in -the tsearch2 module. There are numerous more functions that I have -not touched on. You can read through the tsearch2.sql file built -when compiling to get more familiar with what is included.</p> -<h3>INDEXING FIELDS IN A TABLE</h3> -<p>The next stage is to add a full text index to an existing table. -In this example we already have a table defined as follows:</p> -<pre> - CREATE TABLE tblMessages - ( - intIndex int4, - strTopic varchar(100), - strMessage text - ); -</pre> - -<p>We are assuming there are several rows with some kind of data in -them. Any data will do, just do several inserts with test strings -for a topic, and a message. here is some test data I inserted. (yes -I know it's completely useless stuff ;-) but it will serve our -purpose right now).</p> -<pre> - INSERT INTO tblMessages - VALUES ('1', 'Testing Topic', 'Testing message data input'); - INSERT INTO tblMessages - VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s'); - INSERT INTO tblMessages - VALUES ('3', 'Famous Author', 'Stephen King'); - INSERT INTO tblMessages - VALUES ('4', 'Political Topic', - 'Nelson Mandella is released from prison'); - INSERT INTO tblMessages - VALUES ('5', 'Nursery rhyme phrase', - 'Little jack horner sat in a corner'); - INSERT INTO tblMessages - VALUES ('6', 'Gettysburg address quotation', - 'Four score and seven years ago' - ' our fathers brought forth on this' - ' continent a new nation, conceived in' - ' liberty and dedicated to the proposition' - ' that all men are created equal'); - INSERT INTO tblMessages - VALUES ('7', 'Classic Rock Bands', - 'Led Zeppelin Grateful Dead and The Sex Pistols'); - INSERT INTO tblMessages - VALUES ('8', 'My birth address', - '18 Sommervile road, Regina, Saskatchewan'); - INSERT INTO tblMessages - VALUES ('9', 'Joke', 'knock knock : who\'s there?' - ' I will not finish this joke'); - INSERT INTO tblMessages - VALUES ('10', 'Computer information', - 'My computer is a pentium III 400 mHz' - ' with 192 megabytes of RAM'); -</pre> -<p>The next stage is to create a special text index which we will -use for FTI, so we can search our table of messages for words or a -phrase. We do this using the SQL command:</p> -<pre> - ALTER TABLE tblMessages ADD COLUMN idxFTI tsvector; -</pre> -<p>Note that unlike traditional indexes, this is actually a new -field in the same table, which is then used (through the magic of -the tsearch2 operators and functions) by a special index we will -create in a moment.</p> -<p>The general rule for the initial insertion of data will follow -four steps:</p> -<pre> - - 1. update table - 2. vacuum full analyze - 3. create index - 4. vacuum full analyze -</pre> -<p>The data can be updated into the table, the vacuum full analyze -will reclaim unused space. The index can be created on the table -after the data has been inserted. Having the index created prior to -the update will slow down the process. It can be done in that -manner, this way is just more efficient. After the index has been -created on the table, vacuum full analyze is run again to update -postgres's statistics (ie having the index take effect).</p> -<pre> - UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage); - VACUUM FULL ANALYZE; -</pre> -<p>Note that this only inserts the field strMessage as a tsvector, -so if you want to also add strTopic to the information stored, you -should instead do the following, which effectively concatenates the -two fields into one before being inserted into the table:</p> -<pre> - UPDATE tblMessages - SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,'')); - VACUUM FULL ANALYZE; -</pre> -<p><strong>Using the coalesce function makes sure this -concatenation also works with NULL fields.</strong></p> - -<p>We need to create the index on the column idxFTI. Keep in mind -that the database will update the index when some action is taken. -In this case we _need_ the index (The whole point of Full Text -INDEXING ;-)), so don't worry about any indexing overhead. We will -create an index based on the gist or gin function. GiST is an index -structure for Generalized Search Tree, GIN is a inverted index (see <a href="tsearch2-ref.html#indexes">The tsearch2 Reference: Indexes</a>).</p> -<pre> - CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI); - VACUUM FULL ANALYZE; -</pre> -<p>After you have converted all of your data and indexed the -column, you can select some rows to see what actually happened. I -will not display output here but you can play around yourselves and -see what happened.</p> -<p>The last thing to do is set up a trigger so every time a row in -this table is changed, the text index is automatically updated. -This is easily done using:</p> -<pre> - CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages - FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage); -</pre> -<p>Or if you are indexing both strMessage and strTopic you should -instead do:</p> -<pre> - - CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages - FOR EACH ROW EXECUTE PROCEDURE - tsearch2(idxFTI, strTopic, strMessage); -</pre> -<p>Before you ask, the tsearch2 function accepts multiple fields as -arguments so there is no need to concatenate the two into one like -we did before.</p> -<p>If you want to do something specific with columns, you may write -your very own trigger function using plpgsql or other procedural -languages (but not SQL, unfortunately) and use it instead of -<em>tsearch2</em> trigger.</p> -<p>You could however call other stored procedures from within the -tsearch2 function. Lets say we want to create a function to remove -certain characters (like the @ symbol from all text).</p> -<pre> - CREATE FUNCTION dropatsymbol(text) - RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL; -</pre> -<p>Now we can use this function within the tsearch2 function on the -trigger.</p> - -<pre> - DROP TRIGGER tsvectorupdate ON tblmessages; - CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages - FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage); - INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', 'Test@test.com'); -</pre> -<p>If at this point you receive an error stating: ERROR: Can't find -tsearch config by locale</p> -<p>Do not worry. You have done nothing wrong. And tsearch2 is not -broken. All that has happened here is that the configuration is -setup to use a configuration based on the locale of the server. All -you have to do is change your default configuration, or add a new -one for your specific locale. See the section on TSEARCH2 -CONFIGURATION.</p> -<pre class="real"> - SELECT * FROM tblmessages WHERE intindex = 69; - - intindex | strtopic | strmessage | idxfti - ----------+--------------------------+---------------+----------------------- - 69 | Attempt for dropatsymbol | Test@test.com | 'test':1 'test.com':2 - (1 row) -</pre> -Notice that the string content was passed throught the stored -procedure dropatsymbol. The '@' character was replaced with a -single space ... and the output from the procedure was then stored -in the tsvector column. -<p>This could be useful for removing other characters from indexed -text, or any kind of preprocessing needed to be done on the text -prior to insertion into the index.</p> -<h3>QUERYING A TABLE</h3> - -<p>There are some examples in the README.tsearch2 file for querying -a table. One major difference between tsearch and tsearch2 is the -operator ## is no longer available. Only the operator @@ is -defined, using the types tsvector on one side and tsquery on the -other side.</p> -<p>Lets search the indexed data for the word "Test". I indexed -based on the the concatenation of the strTopic, and the -strMessage:</p> -<pre> - SELECT intindex, strtopic FROM tblmessages - WHERE idxfti @@ 'test'::tsquery; - intindex | strtopic - ----------+--------------- - 1 | Testing Topic - (1 row) -</pre> -<p>The only result that matched was the row with a topic "Testing -Topic". Notice that the word I search for was all lowercase. Let's -see what happens when I query for uppercase "Test".</p> -<pre> - SELECT intindex, strtopic FROM tblmessages - WHERE idxfti @@ 'Test'::tsquery; - intindex | strtopic - ----------+---------- - (0 rows) -</pre> -<p>We get zero rows returned. The reason is because when the text -was inserted, it was morphed to my default configuration (because -of the call to to_tsvector in the UPDATE statement). If there was -no morphing done, and the tsvector field(s) contained the word -'Text', a match would have been found.</p> -<p>Most likely the best way to query the field is to use the -to_tsquery function on the right hand side of the @@ operator like -this:</p> - -<pre> - SELECT intindex, strtopic FROM tblmessages - WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin'); - intindex | strtopic - ----------+-------------------- - 1 | Testing Topic - 7 | Classic Rock Bands - (2 rows) -</pre> -<p>That query searched for all instances of "Test" OR "Zeppelin". -It returned two rows: the "Testing Topic" row, and the "Classic -Rock Bands" row. The to_tsquery function performed the correct -morphology upon the parameters, and searched the tsvector field -appropriately.</p> -<p>The last example here relates to searching for a phrase, for -example "minority report". This poses a problem with regard to -tsearch2, as it doesn't index phrases, only words. But there is a -way around which doesn't appear to have a significant impact on -query time, and that is to use a query such as the following:</p> -<pre> - SELECT intindex, strTopic FROM tblmessages - WHERE idxfti @@ to_tsquery('default', 'gettysburg & address') - AND strMessage ~* '.*men are created equal.*'; - intindex | strtopic - ----------+------------------------------ - 6 | Gettysburg address quotation - (1 row) - SELECT intindex, strTopic FROM tblmessages - WHERE idxfti @@ to_tsquery('default', 'gettysburg & address') - AND strMessage ~* '.*something that does not exist.*'; - intindex | strtopic - ----------+---------- - (0 rows) -</pre> -<p>Of course if your indexing both strTopic and strMessage, and -want to search for this phrase on both, then you will have to get -out the brackets and extend this query a little more.</p> - -<h3>TSEARCH2 CONFIGURATION</h3> -<p>Some words such as "and", "the", and "who" are automatically not -indexed, since they belong to a pre-existing dictionary of "Stop -Words" which tsearch2 does not perform indexing on. If someone -needs to search for "The Who" in your database, they are going to -have a tough time coming up with any results, since both are -ignored in the indexes. But there is a solution.</p> -<p>Lets say we want to add a word into the stop word list for -english stemming. We could edit the file -:'/usr/local/pgsql/share/english.stop' and add a word to the list. -I edited mine to exclude my name from indexing:</p> -<pre> - - Edit /usr/local/pgsql/share/english.stop - - Add 'andy' to the list - - Save the file. -</pre> -<p>When you connect to the database, the dict_init procedure is run -during initialization. And in my configuration it will read the -stop words from the file I just edited. If you were connected to -the DB while editing the stop words, you will need to end the -current session and re-connect. When you re-connect to the -database, 'andy' is no longer indexed:</p> -<pre> - SELECT to_tsvector('default', 'Andy'); - to_tsvector - ------------ - (1 row) -</pre> -<p>Originally I would get the result :</p> - -<pre> - SELECT to_tsvector('default', 'Andy'); - to_tsvector - ------------ - 'andi':1 - (1 row) -</pre> -<p>But since I added it as a stop word, it would be ingnored on the -indexing. The stop word added was used in the dictionary "en_stem". -If I were to use a different configuration such as 'simple', the -results would be different. There are no stop words for the simple -dictionary. It will just convert to lower case, and index every -unique word.</p> -<pre> - SELECT to_tsvector('simple', 'Andy andy The the in out'); - to_tsvector - ------------------------------------- - 'in':5 'out':6 'the':3,4 'andy':1,2 - (1 row) -</pre> -<p>All this talk about which configuration to use is leading us -into the actual configuration of tsearch2. In the examples in this -document the configuration has always been specified when using the -tsearch2 functions:</p> -<pre> - SELECT to_tsvector('default', 'Testing the default config'); - SELECT to_tsvector('simple', 'Example of simple Config'); -</pre> -<p>The pg_ts_cfg table holds each configuration you can use with -the tsearch2 functions. As you can see the ts_name column contains -both the 'default' configurations based on the 'C' locale. And the -'simple' configuration which is not based on any locale.</p> - -<pre> - SELECT * from pg_ts_cfg; - ts_name | prs_name | locale - -----------------+----------+-------------- - default | default | C - default_russian | default | ru_RU.KOI8-R - simple | default | - (3 rows) -</pre> -<p>Each row in the pg_ts_cfg table contains the name of the -tsearch2 configuration, the name of the parser to use, and the -locale mapped to the configuration. There is only one parser to -choose from the table pg_ts_parser called 'default'. More parsers -could be written, but for our needs we will use the default.</p> -<p>There are 3 configurations installed by tsearch2 initially. If -your locale is set to 'en_US' for example (like my laptop), then as -you can see there is currently no dictionary configured to use with -that locale. You can either set up a new configuration or just use -one that already exists. If I do not specify which configuration to -use in the to_tsvector function, I receive the following error.</p> -<pre> - SELECT to_tsvector('learning tsearch is like going to school'); - ERROR: Can't find tsearch config by locale -</pre> -<p>We will create a new configuration for use with the server -encoding 'en_US'. The first step is to add a new configuration into -the pg_ts_cfg table. We will call the configuration -'default_english', with the default parser and use the locale -'en_US'.</p> -<pre> - INSERT INTO pg_ts_cfg (ts_name, prs_name, locale) - VALUES ('default_english', 'default', 'en_US'); - -</pre> -<p>We have only declared that there is a configuration called -'default_english'. We need to set the configuration of how -'default_english' will work. The next step is creating a new -dictionary to use. The configuration of the dictionary is -completlely different in tsearch2. In the prior versions to make -changes, you would have to re-compile your changes into the -tsearch.so. All of the configuration has now been moved into the -system tables created by executing the SQL code from -tsearch2.sql</p> -<p>Lets take a first look at the pg_ts_dict table</p> -<pre> - ftstest=# \d pg_ts_dict - Table "public.pg_ts_dict" - Column | Type | Modifiers - -----------------+--------------+----------- - dict_name | text | not null - dict_init | regprocedure | - dict_initoption | text | - dict_lexize | regprocedure | not null - dict_comment | text | - Indexes: pg_ts_dict_pkey primary key btree (dict_name) -</pre> -<p>The dict_name column is the name of the dictionary, for example -'simple', 'en_stem' or 'ru_stem'. The dict_init column is a text -representation of a stored procedure to run for initialization of -that dictionary, for example 'snb_en_init(text)' or -'snb_ru_init(text)'. The initial configuration of tsearch2 had the -dict_init and dict_lexize columns as type oid. The patch mentioned -in the Installation Notes changes these types to regprocedure. The -data inserted, or updated can still be the oid of the stored -procedure. The representation is just different. This makes backup -and restore procedures much easier for tsearch2. The dict_init -option is used for options passed to the init function for the -stored procedure. In the cases of 'en_stem' or 'ru_stem' it is a -path to a stopword file for that dictionary, for example -'/usr/local/pgsql/share/english.stop'. This is however dictated by -the dictionary. ISpell dictionaries may require different options. -The dict_lexize column is another OID of a stored procedure to the -function used to lexize, for example 'snb_lexize(internal, -internal, integer)'. The dict_comment column is just a comment.</p> -<p>Next we will configure the use of a new dictionary based on -ISpell. We will assume you have ISpell installed on you machine. -(in /usr/local/lib)</p> -<p>There has been some confusion in the past as to which files are -used from ISpell. ISpell operates using a hash file. This is a -binary file created by the ISpell command line utility "buildhash". -This utility accepts a file containing the words from the -dictionary, and the affixes file and the output is the hash file. -The default installation of ISPell installs the english hash file -english.hash, which is the exact same file as american.hash. ISpell -uses this as the fallback dictionary to use.</p> -<p>This hash file is not what tsearch2 requires as the ISpell -interface. The file(s) needed are those used to create the hash. -Tsearch uses the dictionary words for morphology, so the listing is -needed not spellchecking. Regardless, these files are included in -the ISpell sources, and you can use them to integrate into -tsearch2. This is not complicated, but is not very obvious to begin -with. The tsearch2 ISpell interface needs only the listing of -dictionary words, it will parse and load those words, and use the -ISpell dictionary for lexeme processing.</p> - -<p>I found the ISPell make system to be very finicky. Their -documentation actually states this to be the case. So I just did -things the command line way. In the ISpell source tree under -languages/english there are several files in this directory. For a -complete description, please read the ISpell README. Basically for -the english dictionary there is the option to create the small, -medium, large and extra large dictionaries. The medium dictionary -is recommended. If the make system is configured correctly, it -would build and install the english.has file from the medium size -dictionary. Since we are only concerned with the dictionary word -listing ... it can be created from the /languages/english directory -with the following command:</p> -<pre> - sort -u -t/ +0f -1 +0 -T /usr/tmp -o english.med english.0 english.1 -</pre> -<p>This will create a file called english.med. You can copy this -file to whever you like. I placed mine in /usr/local/lib so it -coincides with the ISpell hash files. You can now add the tsearch2 -configuration entry for the ISpell english dictionary. We will also -continue to use the english word stop file that was installed for -the en_stem dictionary. You could use a different one if you like. -The ISpell configuration is based on the "ispell_template" -dictionary installed by default with tsearch2. We will use the OIDs -to the stored procedures from the row where the dict_name = -'ispell_template'.</p> -<pre> - INSERT INTO pg_ts_dict - (SELECT 'en_ispell', - dict_init, - 'DictFile="/usr/local/lib/english.med",' - 'AffFile="/usr/local/lib/english.aff",' - 'StopFile="/usr/local/pgsql/share/contrib/english.stop"', - dict_lexize - FROM pg_ts_dict - WHERE dict_name = 'ispell_template'); -</pre> -<p>Now that we have a dictionary we can specify it's use in a query -to get a lexeme. For this we will use the lexize function. The -lexize function takes the name of the dictionary to use as an -argument. Just as the other tsearch2 functions operate. You will -need to stop your psql session and start it again in order for this -modification to take place.</p> -<pre> - SELECT lexize('en_ispell', 'program'); - lexize - ----------- - {program} - (1 row) - -</pre> -<p>If you wanted to always use the ISpell english dictionary you -have installed, you can configure tsearch2 to always use a specific -dictionary.</p> -<pre> - SELECT set_curdict('en_ispell'); -</pre> -<p>Lexize is meant to turn a word into a lexeme. It is possible to -receive more than one lexeme returned for a single word.</p> -<pre> - SELECT lexize('en_ispell', 'conditionally'); - lexize - ----------------------------- - {conditionally,conditional} - (1 row) -</pre> -<p>The lexize function is not meant to take a full string as an -argument to return lexemes for. If you passed in an entire sentence, -it attempts to find that entire sentence in the dictionary. Since -the dictionary contains only words, you will receive an empty -result set back.</p> -<pre> - SELECT lexize('en_ispell', 'This is a senctece to lexize'); - lexize - -------- - - (1 row) - -If you parse a lexeme from a word not in the dictionary, then you will receive an empty result. This makes sense because the word "tsearch" is not in the english dictionary. You can create your own additions to the dictionary if you like. This may be useful for scientific or technical glossaries that need to be indexed. SELECT lexize('en_ispell', 'tsearch'); lexize -------- (1 row) - -</pre> -<p>This is not to say that tsearch will be ignored when adding text -information to the the tsvector index column. This will be -explained in greater detail with the table pg_ts_cfgmap.</p> -<p>Next we need to set up the configuration for mapping the -dictionay use to the lexxem parsings. This will be done by altering -the pg_ts_cfgmap table. We will insert several rows, specifying to -use the new dictionary we installed and configured for lexizing -within tsearch2. There are several type of lexims we would be -concerned with forcing the use of the ISpell dictionary.</p> -<pre> - INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name) - VALUES ('default_english', 'lhword', '{en_ispell,en_stem}'); - INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name) - VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}'); - INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name) - VALUES ('default_english', 'lword', '{en_ispell,en_stem}'); -</pre> -<p>We have just inserted 3 records to the configuration mapping, -specifying that the lexeme types for "lhword, lpart_hword and lword" -are to be stemmed using the 'en_ispell' dictionary we added into -pg_ts_dict, when using the configuration ' default_english' which -we added to pg_ts_cfg.</p> -<p>There are several other lexeme types used that we do not need to -specify as using the ISpell dictionary. We can simply insert values -using the 'simple' stemming process dictionary.</p> -<pre> - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'url', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'host', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'sfloat', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'uri', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'int', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'float', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'email', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'word', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'hword', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'nlword', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'nlpart_hword', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'part_hword', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'nlhword', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'file', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'uint', '{simple}'); - INSERT INTO pg_ts_cfgmap - VALUES ('default_english', 'version', '{simple}'); -</pre> - -<p>Our addition of a configuration for 'default_english' is now -complete. We have successfully created a new tsearch2 -configuration. At the same time we have also set the new -configuration to be our default for en_US locale.</p> -<pre> - SELECT to_tsvector('default_english', - 'learning tsearch is like going to school'); - to_tsvector - -------------------------------------------------- - 'go':5 'like':4 'learn':1 'school':7 'tsearch':2 - SELECT to_tsvector('learning tsearch is like going to school'); - to_tsvector - -------------------------------------------------- - 'go':5 'like':4 'learn':1 'school':7 'tsearch':2 - (1 row) -</pre> -<p>Notice here that words like "tsearch" are still parsed and -indexed in the tsvector column. There is a lexeme returned for the -word becuase in the configuration mapping table, we specify words -to be used from the 'en_ispell' dictionary first, but as a fallback -to use the 'en_stem' dictionary. Therefore a lexeme is not returned -from en_ispell, but is returned from en_stem, and added to the -tsvector.</p> -<pre> - SELECT to_tsvector('learning tsearch is like going to computer school'); - to_tsvector - --------------------------------------------------------------------------- - 'go':5 'like':4 'learn':1 'school':8 'compute':7 'tsearch':2 'computer':7 - (1 row) -</pre> -<p>Notice in this last example I added the word "computer" to the -text to be converted into a tsvector. Because we have setup our -default configuration to use the ISpell english dictionary, the -words are lexized, and computer returns 2 lexemes at the same -position. 'compute':7 and 'computer':7 are now both indexed for the -word computer.</p> -<p>You can create additional dictionary lists, or use the extra -large dictionary from ISpell. You can read through the ISpell -documents, and source tree to make modifications as you see -fit.</p> -<p>In the case that you already have a configuration set for the -locale, and you are changing it to your new dictionary -configuration. You will have to set the old locale to NULL. If we -are using the 'C' locale then we would do this:</p> - -<pre> - UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C'; -</pre> -<p>That about wraps up the configuration of tsearch2. There is much -more you can do with the tables provided. This was just an -introduction to get things working rather quickly.</p> -<h3>ADDING NEW DICTIONARIES TO TSEARCH2</h3> -<p>To aid in the addition of new dictionaries to the tsearch2 -module you can use another additional module in combination with -tsearch2. The gendict module is included into tsearch2 distribution -and is available from gendict/ subdirectory.</p> -<p>I will not go into detail about installation and instructions on -how to use gendict to it's fullest extent right now. You can read -the README.gendict ... it has all of the instructions and -information you will need.</p> -<h3>BACKING UP AND RESTORING DATABASES THAT FEATURE TSEARCH2</h3> -<p><strong>Never rely on anyone elses instructions to backup and -restore a database system, always develop and understand your own -methodology, and test it numerous times before you need to do it -for real.</strong></p> -<p>The backup and restore procedure has changed over time. This is -not meant to be the bible for tsearch2 back up and restore. Please -read all sections so you have a complete understanding of some -backup and restore issues. Please test your own procedures, and do -not rely on these instructions solely.</p> - -<p>If you come accross some issues in your own procedures, please -feel free to bring the question up on the Open-FTS, and PostgreSQL -mailing lists.</p> -<h3>ORIGINAL BACKUP PROCEDURES</h3> -<p>Originally, tsearch2 had problems when using the pg_dump, and or -the pg_dumpall utilities. The problem lies within the original use -of OIDs for column types. Since OIDs are not consistent accross -pg_dumps, when you reload the data values into the pg_ts_dict -table, for example, those oids no longer point to anything. You -would then end up trying to use a "broken" tsearch2 -configuration.</p> -<p>The solution was to backup and restore a database using the -tsearch2 module into small unique parts, and then load them in the -correct order. You would have to edit the schema and remove the -tsearch stored procedure references in the sql file. You would have -to load your global objects, then the tsearch2 objects. You had to -re-create the tsearch module before restoring your schema so no -conflicts would arise. Then you could restore your data (all -schemas, and types needed for the data were now available).</p> -<p><strong>The original backup instructions were as -follows</strong></p> -<p>1) Backup any global database objects such as users and groups -(this step is usually only necessary when you will be restoring to -a virgin system)</p> -<pre> - pg_dumpall -g > GLOBALobjects.sql - -</pre> -<p>2) Backup the full database schema using pg_dump</p> -<pre> - pg_dump -s DATABASE > DATABASEschema.sql -</pre> -<p>3) Backup the full database using pg_dump</p> -<pre> - pg_dump -Fc DATABASE > DATABASEdata.tar -</pre> - -<p><strong>The original restore procedures were as -follows</strong></p> -<p>1) Create the blank database</p> -<pre> - createdb DATABASE -</pre> -<p>2) Restore any global database objects such as users and groups -(this step is usually only necessary when you will be restoring to -a virgin system)</p> -<pre> - psql DATABASE < GLOBALobjects.sql -</pre> -<p>3) Create the tsearch2 objects, functions and operators</p> - -<pre> - psql DATABASE < tsearch2.sql -</pre> -<p>4) Edit the backed up database schema and delete all SQL -commands which create tsearch2 related functions, operators and -data types, BUT NOT fields in table definitions that specify -tsvector types. If your not sure what these are, they are the ones -listed in tsearch2.sql. Then restore the edited schema to the -database</p> -<pre> - psql DATABASE < DATABASEschema.sql -</pre> -<p>5) Restore the data for the database</p> -<pre> - - pg_restore -N -a -d DATABASE DATABASEdata.tar -</pre> -<p>If you get any errors in step 4, it will most likely be because -you forgot to remove an object that was created in tsearch2.sql. -Any errors in step 5 will mean the database schema was probably -restored wrongly.</p> -<p><strong>Issues with this procedure</strong></p> -<p>As I mentioned before, it is vital that you test out your own -backup and restore procedures. These procedures were originally -adopted from this document's orignal author. Robert John Shepherd. -It makes use of the pg_dump custom archive functionality. I am not -that familiar with the formatting output of pg_dump, and using -pg_restore. I have always had the luxury of using text files -(Everything is DATABASE.sql).</p> -<p>One issue not forseen in the case of using a binary dump is the -when you have added more than the default tsearch2 configurations. -Upon reload of the data it will fail due to duplicate primary keys. -If you load the tsearch2 module, and then delete the data loaded by -tsearch2 into the configuration tables, the data will restore. The -configurations are incorrect because you can not remove the data -using OID references from the custom archive.</p> -<p>It would be very simple to fix this problem if the data was not -in an archive format. I do believe all of your data would have been -restored properly and you can get things working fairly easy. All -one would have to do is create the configurations as in the -tsearch2.sql file. And then create your custom configurations -again.</p> -<p>I have read in the pg_dump man page that if the tar archive -format is used, it is possible to limit which data is restored -using pg_restore. If anyone has more experience with pg_dump -archives, and pg_restore. Please feel free to test and contribute -your procedure(s).</p> -<h3>CURRENT BACKUP AND RESTORE PROCEDURES</h3> - -<p>Currently a backup and restore of a database using the tsearch2 -module can be quite simple. If you have applied the patch mentioned -in the installation instructions prior to tsearch2 installation. -This patch removes the use of the oid column. The text -representation for the stored procedures used are dumped with the -data and the restoration of the data works seemlessly.</p> -<p>1) to backup the database</p> -<pre> - pg_dump DATABASE > DATABASE.sql -</pre> -<p>1) to restore the database</p> -<pre> - createdb DATABASE - psql -d DATABASE -f DATABASE.sql -</pre> -<p>This procedure is now like any normal backup and restore -procedure. I can say whether this has been proven using the pg_dump -archive, and restoring with pg_restore. In theory there should be -no problems with any format after the patch is applied.</p> - -<p>This restoration procedure should never be an issue with the -patch applied to version 8.0 of PostgreSQL. Only versions 7.3 and -7.4 are affected. You can avoid any troubles by applying the patch -prior to installation, or running the SQL script provided to live -database before backup and restoring is done.</p> -</div> -</body> -</html> diff --git a/contrib/tsearch2/docs/tsearch2-guide.html b/contrib/tsearch2/docs/tsearch2-guide.html deleted file mode 100644 index 29b7518391..0000000000 --- a/contrib/tsearch2/docs/tsearch2-guide.html +++ /dev/null @@ -1,1085 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> -<html> -<head> -<title>tsearch2 guide</title> -</head> -<body> -<h1 align=center>The tsearch2 Guide</h1> - -<p align=center> -Brandon Craig Rhodes<br>30 June 2003 -<br>Updated to 8.2 release by Oleg Bartunov, October 2006</br> -<p> -This Guide introduces the reader to the PostgreSQL tsearch2 module, -version 2. -More formal descriptions of the module's types and functions -are provided in the <a href="tsearch2-ref.html">tsearch2 Reference</a>, -which is a companion to this document. -<p> -First we will examine the <tt>tsvector</tt> and <tt>tsquery</tt> types -and how they are used to search documents; -next, we will use them to build a simple search engine in SQL; -and finally, we will study the internals of document conversion -and how you might tune the internals to accommodate various searching needs. -<p> -Once you have tsearch2 working with PostgreSQL, -you should be able to run the examples here exactly as they are typed. -<p> -<hr> -<h2>Table of Contents</h2> -<blockquote> -<a href="#intro">Introduction to FTS with tsearch2</a><br> -<a href="#vectors_queries">Vectors and Queries</a><br> -<a href="#simple_search">A Simple Search Engine</a><br> -<a href="#weights">Ranking and Position Weights</a><br> -<a href="#casting">Casting Vectors and Queries</a><br> -<a href="#parsing_lexing">Parsing and Lexing</a><br> -<a href="#ref">Additional information</a> -</blockquote> - -<hr> - - -<h2><a name="intro">Introduction to FTS with tsearch2</a></h2> -The purpose of FTS is to -find <b>documents</b>, which satisfy <b>query</b> and optionally return -them in some <b>order</b>. -Most common case: Find documents containing all query terms and return them in order -of their similarity to the query. Document in database can be -any text attribute, or combination of text attributes from one or many tables -(using joins). -Text search operators existed for years, in PostgreSQL they are -<tt><b>~,~*, LIKE, ILIKE</b></tt>, but they lack linguistic support, -tends to be slow and have no relevance ranking. The idea behind tsearch2 is -is rather simple - preprocess document at index time to save time at search stage. -Preprocessing includes -<ul> -<li>document parsing onto words -<li>linguistic - normalize words to obtain lexemes -<li>store document in optimized for searching way -</ul> -Tsearch2, in a nutshell, provides FTS operator (contains) for two new data types, -which represent document and query - <tt>tsquery @@ tsvector</tt>. - -<P> -<h2><a name=vectors_queries>Vectors and Queries</a></h2> - -<blockquote> -<i>This section introduces -the two data types upon which tsearch2 search engines are based, -and illustrates their interaction using the simplest possible case. -The complex examples we present later on -are merely variations and elaborations of this basic mechanism.</i> -</blockquote> -<p> -The tsearch2 module allows you to index documents by the words they contain, -and then perform very efficient searches -for documents that contain a given combination of words. -Preparing your document index involves two steps: -<ul> -<li><b>Making a list of the words each document contains.</b> - You must reduce each document to a <tt>tsvector</tt> - which lists each word that appears in the document. - This process offers many options, - because there is no requirement - that you must copy words into the vector - exactly as they appear in the document. - For example, - many developers omit frequent and content-free <b>stop words</b> - like <i>the</i> to reduce the size of their index; - others reduce different forms of the same word - (<i>forked</i>, <i>forking</i>, <i>forks</i>) - to a common form (<i>fork</i>) - to make search results independent of tense and case. - Because words are very often stored in a modified form, - we use the special term <b>lexemes</b> - for the word forms we actually store in the vector. -<li><b>Creating an index of the documents by lexeme.</b> - This is managed automatically by tsearch2 - when you creat a <tt>gist()</tt> index - on the <tt>tsvector</tt> column of a table, - which implements a form of the Berkeley - <a href="http://gist.cs.berkeley.edu/"><i>Generalized Search Tree</i></a>. - Since PostgreSQL 8.2 tsearch2 supports <a href="http://www.sigaev.ru/gin/">Gin</a> index, - which is an inverted index, commonly used in search engines. It adds scalability to tsearch2. -</ul> -Once your documents are indexed, -performing a search involves: -<ul> -<li><b>Reducing the search terms to lexemes.</b> - You must express each search you want to perform - as a <tt>tsquery</tt> specifying a boolean combination of lexemes. - Note that tsearch2 only finds <i>exact</i> matches - between the lexemes in your query and the ones in each vector — - even capitalization counts as a difference - (which is why all lexemes are usually kept lowercase). - So you must process search words the same way you processed document words; - if <i>forking</i> became <i>fork</i> in the document's <tt>tsvector</tt>, - then the search term <i>forking</i> must also become <i>fork</i> - or the search will not find the document. -<li><b>Retrieving the documents that match the query.</b> - Running a <tt>SELECT</tt> ... <tt>WHERE</tt> - <tt><i>query</i></tt> <tt>@@</tt> <tt><i>vector</i></tt> - on the table with the <tt><i>vector</i></tt> column - will return the documents that match your query. -<li><b>Presenting your results.</b> - This final stage offers as many options - as turning documents into vectors. - You can order documents by how well they matched the search terms; - create a headline for each document - showing some of the phrases in which it uses the search terms; - and restrict the number of results retrieved. - You will of course want some way to identify each document, - so the user can ask for the full text of the ones he wants to read. -</ul> -And beyond deciding upon rules for turning documents into vectors -and for presenting search results to users, -you have to decide <i>where</i> to perform these operations — -whether one database server -will parse documents, perform searches, and prepare search results, -or whether to spread the load of these operations across several machines. -These are complicated design issues -which we will explore later; -in this section and the next, -we will illustrate what can be accomplished -using a single database server. -<p> -The <tt>default</tt> tsearch2 configuration, -which we will learn more about later, -provides a good example of a process for reducing documents to vectors: - -<pre> -=# <b>SELECT set_curcfg('default')</b> -=# <b>SELECT to_tsvector('The air smells of sea water.')</b> - to_tsvector -------------------------------------- - 'air':2 'sea':5 'smell':3 'water':6 -(1 row) -</pre> - -Note the complex relationship between this document and its vector. -The vector lists only words from the document — -spaces and punctuation have disappeared. -Common words like <i>the</i> and <i>of</i> have been eliminated. -The <i>-s</i> that makes <i>smells</i> a plural has been removed, -leaving a lexeme that represents the word in its simplest form. -And finally, -though the vector remembers the positions in which each word appeared, -it does not store the lexemes in that order. -<p> -Keeping word positions in your vectors is optional, by the way. -The positions are necessary for the tsearch2 ranking functions, -which you can use to prioritize documents -based on how often each document uses the search terms -and whether they appear in close proximity. -But if you do not perform ranking, -or use your own process that ignores the word positions stored in the vector, -then you can save space by stripping them from your vectors: - -<pre> -=# <b>SELECT strip(to_tsvector('The air smells of sea water.'))</b> - strip ------------------------------ - 'air' 'sea' 'smell' 'water' -(1 row) -</pre> - -Now that we have a procedure for creating vectors, -we can build an indexed table of vectors very simply: - -<pre> -=# <b>CREATE TABLE vectors ( vector tsvector )</b> -=# <b>CREATE INDEX vector_index ON vectors USING gist(vector)</b> -=# <b>INSERT INTO vectors VALUES (to_tsvector('The path forks here'))</b> -=# <b>INSERT INTO vectors VALUES (to_tsvector('A crawl leads west'))</b> -=# <b>INSERT INTO vectors VALUES (to_tsvector('The left fork leads northeast'))</b> -=# <b>SELECT * FROM vectors</b> - vector ------------------------------------------- - 'fork':3 'path':2 - 'lead':3 'west':4 'crawl':2 - 'fork':3 'lead':4 'left':2 'northeast':5 -(3 rows) -</pre> - -Now we can search this collection of document vectors -using the <tt>@@</tt> operator and a <tt>tsquery</tt> -that specifies the combination of lexemes we are looking for. -Note that while vectors simply list lexemes, -queries always combine them with the operators -‘<tt>&</tt>’ and, -‘<tt>|</tt>’ or, -and ‘<tt>!</tt>’ not, -plus parentheses for grouping. -Some examples of the query syntax: -<table align=center> -<tr> - <td>‘find documents with the word <i>forks</i> in them’<br> - <td><tt>'forks'</tt> -<tr> - <td>‘... with both <i>forks</i> and <i>leads</i>’<br> - <td><tt>'forks & leads'</tt> -<tr> - <td>‘... with either <i>forks</i> or <i>leads</i>’<br> - <td><tt>'forks | leads'</tt> -<tr> - <td>‘... with either <i>forks</i> or <i>leads</i>, - but without <i>crawl</i>’<br> - <td><tt>'(forks|leads) & !crawl'</tt> -</table> -The tsearch2 module -provides a <tt>to_tsquery()</tt> function for creating queries -that uses the same process as <tt>to_tsvector()</tt> uses -to reduce words to lexemes. -For instance, -it will remove the <i>-s</i> from the plurals in the last example above: - -<pre> -=# <b>SELECT to_tsquery('(leads|forks) & !crawl')</b> - to_tsquery --------------------------------- - ( 'lead' | 'fork' ) & !'crawl' -(1 row) -</pre> - -Again, -this is critically important because the search operator <tt>@@</tt> -only finds <i>exact</i> matches -between the words in a query and the words in a vector; -if the document vector lists the lexeme <i>fork</i> -but the query looks for the plural form <i>forks</i>, -the query would not match that document. -Thanks to the symmetry between our process -for producing vectors and queries, however, -the above searches return correct results: - -<pre> -=# <b>SELECT * FROM vectors WHERE vector @@ to_tsquery('(leads|forks) & !crawl')</b> - vector ------------------------------------------- - 'fork':3 'path':2 - 'fork':3 'lead':4 'left':2 'northeast':5 -(2 rows) -</pre> - -You may want to try the other queries shown above, -and perhaps invent some of your own. -<p> -You should not include stop words in a query, -since you cannot search for words you have discarded. -If you throw out the word <i>the</i> when building vectors, for example, -your index will obviously not know which documents included it. -The <tt>to_tsquery()</tt> function will automatically detect this -and give you an error to prevent this mistake: - -<pre> -=# <b>SELECT to_tsquery('the')</b> -NOTICE: Query contains only stopword(s) or doesn't contain lexem(s), ignored - to_tsquery ------------- - -(1 row) -</pre> - -But if you every build vectors and queries using your own routines, -a possibility we will discuss later, -then you will need to enforce this rule yourself. - -<blockquote><i> -Now that you understand how vectors and queries work together, -you are prepared to tackle many additional topics: -how to distribute searching across many servers; -how to customize the process -by which tsearch2 turns documents and queries into lexemes, -or use a process of your own; -and how to sort and display search results to your users. -But before discussing these detailed questions, -we will build a simple search engine -to see how easily its basic features work together. -</i></blockquote> - -<h2><a name=simple_search>A Simple Search Engine</a></h2> - -<blockquote><i> -In this section we build a simple search engine out of SQL functions -that use the vector and query types described in the previous section. -While this example is simpler -than a search engine that has to interface with the outside world, -it will illustrate the basic principles of building a search engine, -and better prepare you for developing your own. -</i></blockquote> -Building a search engine involves only a few improvements -upon the rudimentary vector searches described in the last section. -<ul> -<li>Because the user wants to read documents, not vectors, - you must provide some way - for the full text of each document to be accessed — - either by storing the entire text of each document in the database, - or storing an identifier - like a URL, file name, or document routing number - that lets you fetch the document from other storage. -<li>You can make it easier for user interface code to refer to each document - by providing a unique identifier for each document, - perhaps with a <tt>SERIAL</tt> column. -<li>Search results should be ordered by relevance. - If you leave word positions in your vectors, - you can either have PostgreSQL <tt>ORDER</tt> your results - <tt>BY</tt> a ranking function, - or you can fetch the vectors yourself and perform your own sort. - If you choose to ignore word positions or strip them from your vectors, - you will have to determine relevance yourself, - using either the full text of the document - or other information about each document you may possess. -<li>For each document returned by a search, - you will usually want to display a summary called a <i>headline</i> - that shows short excerpts - illustrating how the document uses the query words. - Headlines are usually generated from the full text of the document, - not from position information in the <tt>tsvector</tt>, - since excerpts lacking stop words, punctuation, and suffixes - would not be comprehensible. - If you store the full text of each document in the database, - headlines can be generated very simply by a tsearch2 function. - If you store your documents elsewhere, - then you will either have to transmit each document to the database - every time you want to run the headline function on it, - or use your own headline code outside of the database. -</ul> -<p> -We can easily construct a simple search engine -that accomplishes these goals. -First we build a table that, for each document, -stores a unique identifier, the full text of the document, -and its <tt>tsvector</tt>: - -<pre> -=# <b>CREATE TABLE docs ( id SERIAL, doc TEXT, vector tsvector )</b> -=# <b>CREATE INDEX docs_index ON docs USING gist(vector);</b> -</pre> - -Note that although searches will still work -on tables where you have neglected -to create a <tt>gist()</tt> index over your vectors, -they will run much more slowly -since they will have to compare the query -against every document vector in the table. -<p> -Because the table we have created -stores each document in two different ways — -both as text and as a vector — -our <tt>INSERT</tt> statements must provide the document in both forms. -While more advanced PostgreSQL programmers -might accomplish this with a database trigger or rule, -for this simple example we will use a small SQL function: - -<pre> -=# <b>CREATE FUNCTION insdoc(text) RETURNS void LANGUAGE sql AS - 'INSERT INTO docs (doc, vector) VALUES ($1, to_tsvector($1));'</b> -</pre> - -Now, by calling <tt>insdoc()</tt> several times, -we can populate our table with documents: - -<pre> -=# <b>SELECT insdoc('A low crawl over cobbles leads inward to the west.')</b> -=# <b>SELECT insdoc('The canyon runs into a mass of boulders -- dead end.')</b> -=# <b>SELECT insdoc('You are crawling over cobbles in a low passage.')</b> -=# <b>SELECT insdoc('Cavernous passages lead east, north, and south.')</b> -=# <b>SELECT insdoc('To the east a low wide crawl slants up.')</b> -=# <b>SELECT insdoc('You are in the south side chamber.')</b> -=# <b>SELECT insdoc('The passage here is blocked by a recent cave-in.')</b> -=# <b>SELECT insdoc('You are in a splendid chamber thirty feet high.')</b> -</pre> - -Now we can build a search function. -Its <tt>SELECT</tt> statement is based upon -the same <tt>@@</tt> operation illustrated in the previous section. -But instead of returning matching vectors, -we return for each document -its <tt>SERIAL</tt> identifier, so the user can retrieve it later; -a headline that illustrates its use of the search terms; -and a ranking with which we also order the results. -Our search operation can be coded as a single <tt>SELECT</tt> statement -returning its own kind of table row, -which we call a <tt>finddoc_t</tt>: - -<pre> -=# <b>CREATE TYPE finddoc_t AS (id INTEGER, headline TEXT, rank REAL)</b> -=# <b>CREATE FUNCTION finddoc(text) RETURNS SETOF finddoc_t LANGUAGE sql AS ' - SELECT id, headline(doc, q), rank(vector, q) - FROM docs, to_tsquery($1) AS q - WHERE vector @@ q ORDER BY rank(vector, q) DESC'</b> -</pre> - -This function is a rather satisfactory search engine. -Here is one example search, -after which the user fetches the top-ranking document itself; -with similar commands you can try queries of your own: - -<pre> -=# <b>SELECT * FROM finddoc('passage|crawl')</b> - id | headline | rank -----+-------------------------------------------------------+------ - 3 | <b>crawling</b> over cobbles in a low <b>passage</b>. | 0.19 - 1 | <b>crawl</b> over cobbles leads inward to the west. | 0.1 - 4 | <b>passages</b> lead east, north, and south. | 0.1 - 5 | <b>crawl</b> slants up. | 0.1 - 7 | <b>passage</b> here is blocked by a recent cave-in. | 0.1 -(5 rows) -=# <b>SELECT doc FROM docs WHERE id = 3</b> - doc -------------------------------------------------- - You are crawling over cobbles in a low passage. -(1 row) -</pre> - -While by default the <tt>headline()</tt> function -surrounds matching words with <tt><b></tt> and <tt></b></tt> -in order to distinguish them from the surrounding text, -you can provide options that change its behavior; -consult the tsearch2 Reference for more details about -<a href="tsearch2-ref.html#headlines">Headline Functions</a>. -<p> -Though a search may match hundreds or thousands of documents, -you will usually present only ten or twenty results to the user at a time. -This can be most easily accomplished -by limiting your query with a <tt>LIMIT</tt> -and an <tt>OFFSET</tt> clause — -to display results ten at a time, for example, -your would generate your first page of results -with <tt>LIMIT</tt> <tt>10</tt> <tt>OFFSET</tt> <tt>0</tt>, -your second page -with <tt>LIMIT</tt> <tt>10</tt> <tt>OFFSET</tt> <tt>10</tt>, -your third page -with <tt>LIMIT</tt> <tt>10</tt> <tt>OFFSET</tt> <tt>20</tt>, -and so forth. -There are two problems with this approach, however. -<p> -The first problem is the strain of running the query over again -for every page of results the user views. -For small document collections or lightly loaded servers, -this may not be a problem; -but the impact can be high -when a search must repeatedly rank and sort -the same ten thousand results -on an already busy server. -So instead of selecting only one page of results, -you will probably use <tt>LIMIT</tt> and <tt>OFFSET</tt> -to return a few dozen or few hundred results, -which you can cache and display to the user one page at a time. -Whether a result cache rewards your effort -will depend principally on the behavior of your users — -how often they even view the second page of results, for instance. -<p> -The second issue solved by caching involves consistency. -If the database is changing while the user browses their results, -then documents might appear and disappear as they page through them. -In some cases the user might even miss a particular result — -perhaps the one they were looking for — -if, say, its rank improves from 31th to 30th -after they load results 21–30 but before they view results 31–40. -While many databases are static or infrequently updated, -and will not present this problem, -users searching very dymanic document collections -might benefit from the stable results that caches yield. - -<blockquote><i> -Having seen the features of a search engine -implemented entirely within the database, -we will learn about some specific tsearch2 features. -First we will look in more detail at document ranking. -</i></blockquote> - -<h2><a name=weights>Ranking and Position Weights</a></h2> - -<blockquote><i> -When we built our simple search engine, -we used the </i><tt>rank()</tt><i> function to order our results. -Here we describe tsearch2 ranking in more detail. -</i></blockquote> - -There are two functions with which tsearch2 can rank search results. -They both use the lexeme positions listed in the <tt>tsvector</tt>, -so you cannot rank vectors -from which these have been removed with <tt>strip()</tt>. -The <tt>rank()</tt> function existed in older versions of OpenFTS, -and has the feature that you can assign different weights -to words from different sections of your document. -The <tt>rank_cd()</tt> uses a recent technique for weighting results -and also allows different weight to be given -to different sections of your document (since 8.2). -<p> -Both ranking functions allow you to specify, -as an optional last argument, -whether you want their results <i>normalized</i> — -whether the rank returned should be adjusted for document length. -Specifying a last argument of <tt>0</tt> (zero) makes no adjustment; -<tt>1</tt> (one) divides the document rank -by the logarithm of the document length; -and <tt>2</tt> divides it by the plain length. -In all of these examples we omit this optional argument, -which is the same as specifying zero — -we are making no adjustment for document length. -<p> -The <tt>rank_cd()</tt> function uses an experimental measurement -called <i>cover density ranking</i> that rewards documents -when they make frequent use of the search terms -that are close together in the document. -You can read about the algorithm in more detail -in Clarke et al., - “<a href="http://citeseer.nj.nec.com/clarke00relevance.html" ->Relevance Ranking for One to Three Term Queries</a>.” -An optional first argument allows you to tune their formula; -for details -see the <a href="tsearch2-ref.html#ranking">section on ranking</a> -in the Reference. -<p> -Currently tsearch2 supports four different weight labels: -<tt>'D'</tt>, the default weight; -and <tt>'A'</tt>, <tt>'B'</tt>, and <tt>'C'</tt>. -All vectors created with <tt>to_tsvector()</tt> -assign the weight <tt>'D'</tt> to each position, -which as the default is not displayed when you print a vector out. -<p> -If you want positions with weights other than <tt>'D'</tt>, -you have two options: -either you can author a vector directly through the <tt>::tsvector</tt> -casting operation, -as described in the following section, -which lets you give each position whichever weight you want; -or you can pass a vector through the <tt>setweight()</tt> function -which sets all of its position weights to a single value. -An example of the latter: - - -<pre> -=# <b>SELECT vector FROM docs WHERE id = 3</b> - vector ----------------------------------------- - 'low':8 'cobbl':5 'crawl':3 'passag':9 -(1 row) -=# <b>SELECT setweight(vector, 'A') FROM docs WHERE id = 3</b> - setweight --------------------------------------------- - 'low':8A 'cobbl':5A 'crawl':3A 'passag':9A -(1 row) -</pre> - - -Merely changing all of the weights in a vector is not very useful, -of course, -since this results still in all words having the same weight. -But if we parse different parts of a document separately, -giving each section its own weight, -and then concatenate the vectors of each part into a single vector, -the result can be very useful. -We can construct a simple example -in which document titles are given greater weight -that text in the body of the document: - - -<pre> -=# <b>CREATE TABLE tdocs ( id SERIAL, title TEXT, doc TEXT, vector tsvector )</b> -=# <b>CREATE INDEX tdocs_index ON tdocs USING gist(vector);</b> -=# <b>CREATE FUNCTION instdoc(text, text) RETURNS void LANGUAGE sql AS - 'INSERT INTO tdocs (title, doc, vector) - VALUES ($1, $2, setweight(to_tsvector($1), ''A'') || to_tsvector($2));'</b> -</pre> - - -Now words from a document title will be weighted differently -than those in the main text -if we provide the title and body as separate arguments: - - -<pre> -=# <b>SELECT instdoc('Spendid Chamber', - 'The walls are frozen rivers of orange stone.')</b> - instdoc ---------- - -(1 row) -=# <b>SELECT vector FROM tdocs</b> - vector ------------------------------------------------------------------------------- - 'wall':4 'orang':9 'river':7 'stone':10 'frozen':6 'chamber':2A 'spendid':1A -(1 row) -</pre> - - -Note that although the necessity is unusual, -you can constrain search terms -to only match words from certain sections -by following them with a colon -and a list of the sections in which the word can occur; -by default this list is <tt>'ABCD'</tt> -so that search terms match words from all sections. -For example, -here we search for a word both generally, -and then looking only for specific weights: - - -<pre> -=# <b>SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid')</b> - title | doc ------------------+---------------------------------------------- - Spendid Chamber | The walls are frozen rivers of orange stone. -(1 row) -=# <b>SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:A')</b> - title | doc ------------------+---------------------------------------------- - Spendid Chamber | The walls are frozen rivers of orange stone. -(1 row) -=# <b>SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:D')</b> - title | doc --------+----- -(0 rows) -</pre> - - - - -<blockquote><i> -Our examples so far use tsearch2 to parse our documents into vectors. -When your application needs absolute control over vector content, -you will want to use direct type casting, -which is described in the next section. -</i></blockquote> - -<h2><a name=casting>Casting Vectors and Queries</a></h2> - -<blockquote><i> -While tsearch2 has powerful and flexible ways -to process documents and turn them into document vectors, -you will sometimes want to parse documents on your own -and place the results directly in vectors. -Here we show you how. -</i></blockquote> - -In the preceding examples, -we used the <tt>to_tsvector()</tt> function -when we needed a document's text reduced to a document vector. -We saw that the function stripped whitespace and punctuation, -eliminated common words, -and altered suffixes to reduce words to a common form. -While these operations are often desirable, -and while in the sections below -we will gain precise control over this process, -there are occasions on which -you want to avoid the changes that <tt>to_tsvector()</tt> makes to text -and specify explicitly the words that you want in your vectors. -Or you may want to create queries directly -rather than through <tt>to_tsquery()</tt>. -<p> -For example, -you may have already developed your own routine -for reducing your documents to searchable lexemes, -and do not want your carefully generated terms altered -by passing them through <tt>to_tsvector()</tt>. -Or you might be developing and debugging parsing routines of your own -that you are not ready to load into the database. -In either case, -you will find that direct insertion is easily accomplished -if you simply follow some simple rules. -<p> -Vectors are created directly -when you cast a string of whitespace separated lexemes -to the <tt>tsvector</tt> type: - - -<pre> -=# <b>select 'the only exit is the way you came in'::tsvector</b> - tsvector --------------------------------------------------- - 'in' 'is' 'the' 'way' 'you' 'came' 'exit' 'only' -(1 row) -</pre> - - -Notice that the conversion interpreted the string -simply as a list of lexemes to be included in the vector. -Their order was lost, -as was the number of times each lexeme appeared. -You must keep in mind that directly creating vectors with casting -is <i>not</i> an alternative means of parsing; -it is a way of directly entering lexemes into a vector <i>without</i> parsing. -<p> -Queries can also be created through casting, -if you separate lexemes with boolean operators -rather than with whitespace. -When creating your own vectors and queries, -remember that the search operator <tt>@@</tt> -finds only <i>exact</i> matches between query lexemes and vector lexemes -— -if they are not exactly the same string, -they will not be considered a match. -<p> -To include lexeme positions in your vector, -write the positions exactly the way tsearch2 displays them -when it prints vectors: -by following each lexeme with a colon -and a comma-separated list of integer positions. -If you list a lexeme more than once, -then all the positions listed for it are combined into a single list. -For example, -here are two ways of writing the same vector, -depending on whether you mention ‘<tt>the</tt>’ twice -or combine its positions into a list yourself: - - -<pre> -=# <b>select 'the:1 only:2 exit:3 is:4 the:5 way:6 you:7 came:8 in:9'::tsvector</b> - tsvector --------------------------------------------------------------------- - 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2 -(1 row) -=# <b>select 'the:1,5 only:2 exit:3 is:4 way:6 you:7 came:8 in:9'::tsvector</b> - tsvector --------------------------------------------------------------------- - 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2 -(1 row) -</pre> - - -Things can get slightly tricky -if you want to include apostrophes, backslashes, or spaces -inside your lexemes -(wanting to include either of the latter would be unusual, -but they can be included if you follow the rules). -The main problem is that the apostrophe and backslash -are important <i>both</i> to PostgreSQL when it is interpreting a string, -<i>and</i> to the <tt>tsvector</tt> conversion function. -You may want to review section -<a href="http://www.postgresql.org/docs/current/static/sql-syntax.html#SQL-SYNTAX-STRINGS"> -“String Constants”</a> -in the PostgreSQL documentation before proceeding. -<p> -When you cast strings directly into vectors: -<ul> -<li>The string is interpreted as a whitespace-separated list of lexemes, - any of which can be suffixed with a colon and a list of positions. -<li>A lexeme can be quoted by preceding it with an apostrophe, - in which case it runs until the next apostrophe; - otherwise a lexeme ends with the first whitespace or colon encountered. -<li>Any character preceded by a backslash, - including whitespace, the apostrophe, the colon, and the backslash itself, - loses its normal meaning and is treated as a letter. - Backslashes are effective - both inside and outside of apostrophe-quoted lexemes. -<li>A lexeme can be suffixed with a list of positions - by appending a colon and a comma-separated list of integers, - each of which can itself be followed by a letter - to designate a position weight - (position weights are <a href="#weights">described below</a>). -</ul> - -Here are some example strings, -showing the lexeme you want to insert -together with the string that the <tt>::tsvector</tt> operator -needs to see, -and how you would type that string at the PostgreSQL prompt: - -<table align=center> -<tr> -<td><i>For the lexeme...</i> -<td><i>you need the string...</i> -<td><i>which you can type as:</i> -<tr> -<td><tt>nugget</tt> -<td><tt>nugget</tt> -<td><tt>'nugget'</tt> -<tr> -<td><tt>won't</tt> -<td><tt>won't</tt> -<td><tt>'won''t'</tt> -<tr> -<td><tt>pinin'</tt> -<td><tt>pinin'</tt> -<td><tt>'pinin'''</tt> -<tr> -<td><tt>'bout</tt> -<td><tt>\'bout</tt> -<td><tt>'\\''bout'</tt> -<tr> -<td><tt>white mist</tt> -<td><tt>white\ mist</tt> -<td><tt>'white\\ mist'</tt> -<tr> -<td align=right><tt><i>or:</i></tt> -<td><tt>'white mist'</tt> -<td><tt>'''white mist'''</tt> -<tr> -<td><tt>won't budge</tt> -<td><tt>won\'t\ budge</tt> -<td><tt>'won\\''t\\ budge'</tt> -<tr> -<td align=right><tt><i>or:</i></tt> -<td><tt>'won\'t budge'</tt> -<td><tt>'''won\\''t budge'''</tt> -<tr> -<td><tt>back\slashed</tt> -<td><tt>back\\slashed</tt> -<td><tt>'back\\\\slashed'</tt> -</table> - -Remember to use the quoted quoting shown at the right -only when typing in strings as part of a PostgreSQL query. -If you are providing strings through a library -that automatically quotes them -or provides them in binary form to PostgreSQL, -then you can use the strings in the middle instead — -suitably quoted in the language you are using, of course. -<p> -Position weights are <a href="#weights">described below</a> -and can be written exactly as they will be displayed -when you select a weighted vector: - -<pre> -=# <b>select 'weighty:1,3A trivial:2B,4'::tsvector</b> - tsvector -------------------------------- - 'trivial':2B,4 'weighty':1,3A -(1 row) -</pre> - -<p> -Note that if you are composing SQL queries -in a scripting language like Perl or Python, -that itself considers quotes and backslashes special, -then you may have another quoting layer to deal with -on top of the two layers already shown above. -In such cases you may want to write a function -that performs the necessary quoting for you. - -<blockquote><i> -Having seen how to create vectors of your own, -it is time to learn how the native tsearch2 parser -reduces documents to vectors. -</i></blockquote> - -<h2><a name=parsing_lexing>Parsing and Lexing</a></h2> - -<blockquote><i> -The previous section -described how you can bypass the parser provided by tsearch2 -and populate your table of documents -with vectors of your own devising. -But for those interested in the native tsearch2 facilities, -we present here an overview of how it goes about -reducing documents to vectors. -</i></blockquote> - -The <tt>to_tsvector()</tt> function reduces documents to vectors -in two stages. -First, a <i>parser</i> breaks the input document -into short sequences of text called <i>tokens</i>. -Each token is usually a word, space, or piece of punctuation, -though some parsers return larger and more exotic items -like HTML tags as single tokens. -Each token returned by the parser -is either discarded -or passed to a <i>dictionary</i> that converts it into a lexeme. -The resulting lexemes are collected into a vector and returned. -<p> -The choice of which parser and dictionaries <tt>to_tsvector()</tt> should use -is controlled by your choice of <i>configuration</i>. -The tsearch2 module comes with several configurations, -and you can define more of your own; -in fact the creation of a new configuration is illustrated below, -in the section on position weights. -<p> -To learn about parsing in more detail, -we will study this example: - -<pre> -=# <b>select to_tsvector('default', - 'The walls extend upward for well over 100 feet.')</b> - to_tsvector ----------------------------------------------------------- - '100':8 'feet':9 'wall':2 'well':6 'extend':3 'upward':4 -(1 row) -</pre> - -Unlike the <tt>to_tsvector()</tt> calls used in the above examples, -this one specifies the <tt>'default'</tt> configuration explicitly. -When we called <tt>to_tsvector()</tt> in earlier examples -with only one argument, -it used the <i>current</i> configuration, -which is chosen automatically based on your <tt>LOCALE</tt> -if that locale is mentioned in the <tt>pg_ts_cfg</tt> table -(which is shown under the first bullet in the description below). -If your locale is not listed in the table, -your attempts to use the current configuration will return: - -<pre> -ERROR: Can't find tsearch2 config by locale -</pre> - -You can always change the current configuration manually -by calling the <tt>set_curcfg()</tt> function -described in the section on -<a href="tsearch2-ref.html#configurations">Configurations</a> -in the Reference. -<p> -Each configuration serves as an index into two different tables: -in <tt>pg_ts_cfg</tt> it determines -which parser will break our text into tokens, -and in <tt>pg_ts_cfgmap</tt> -it directs each token to a dictionary for processing. -The steps in detail are: - -<ul> -<li class=big> -<p>First, our text is parsed, -using the parser listed for our configuration in the <tt>pg_ts_cfg</tt> table. -We are using the <tt>'default'</tt> configuration, -so the table tells us to use the <tt>'default'</tt> parser: - -<pre> -=# <b>SELECT * FROM pg_ts_cfg WHERE ts_name = 'default'</b> - ts_name | prs_name | locale ----------+----------+-------- - default | default | C -(1 row) -</pre> - -So our text will be parsed as though we had called: - -<pre> -=# <b>select * from parse('default', - 'The walls extend upward for well over 100 feet.')</b> -</pre> - -This breaks the text into a list of tokens -which are each labelled with an integer type: -<p align=center> -The<sub>1</sub>♦<sub>12</sub ->walls<sub>1</sub>♦<sub>12</sub ->extend<sub>1</sub>♦<sub>12</sub ->upward<sub>1</sub>♦<sub>12</sub ->for<sub>1</sub>♦<sub>12</sub ->well<sub>1</sub>♦<sub>12</sub ->over<sub>1</sub>♦<sub>12</sub ->100<sub>22</sub>♦<sub>12</sub ->feet<sub>1</sub>.<sub>12</sub> -<p> -Each word has been assigned type 1; -each space (represented here by a diamond) and the period, type 12; -and the number one hundred, type 22. -We can retrieve the alias for each type -through the <tt>token_type</tt> function: - -<pre> -=# <b>select * from token_type('default') - where tokid = 1 or tokid = 12 or tokid = 22</b> - tokid | alias | descr --------+-------+------------------ - 1 | lword | Latin word - 12 | blank | Space symbols - 22 | uint | Unsigned integer -(3 rows) -</pre> - - -<li class=big> -Next, the tokens are assigned to dictionaries -by looking up their type aliases in <tt>pg_ts_cfgmap</tt> -to determine which dictionary should process each token. -Since we are using the <tt>'default'</tt> configuration: - -<pre> -=# <b>select * from pg_ts_cfgmap where ts_name = 'default' and - (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')</b> - ts_name | tok_alias | dict_name ----------+-----------+----------- - default | lword | {en_stem} - default | uint | {simple} -(2 rows) -</pre> - -Since this map provides no dictionary for <tt>blank</tt> tokens, -the spaces and period are simply discarded, -leaving nine tokens, -which are then numbered by their position: -<p align=center> -The<sup>1</sup> -walls<sup>2</sup> -extend<sup>3</sup> -upward<sup>4</sup> -for<sup>5</sup> -well<sup>6</sup> -over<sup>7</sup> -100<sup>8</sup> -feet<sup>9</sup> - -<li class=big> -Finally, the words are reduced to lexemes by their respective dictionaries. -The <tt>100</tt> is submitted to the <tt>simple</tt> dictionary, -which returns tokens unaltered except for making them lowercase: - -<pre> -=# <b>select lexize('simple', '100')</b> - lexize --------- - {100} -(1 row) -</pre> - -The other words are submitted to <tt>en_stem</tt> -which reduces each English word to a linguistic stem, -and then discards stems which belong to its list of stop words; -you can see the list of stop words -in the file whose path is in the <tt>dict_initoption</tt> field -of the <tt>pg_ts_dict</tt> table entry for <tt>en_stem</tt>. -The first three words of our text illustrate respectively -an <tt>en_stem</tt> stop word, -a word which <tt>en_stem</tt> alters by stemming, -and a word which <tt>en_stem</tt> leaves alone: - -<pre> -=# <b>select lexize('en_stem', 'The')</b> - lexize --------- - {} -(1 row) -=# <b>select lexize('en_stem', 'walls')</b> - lexize --------- - {wall} -(1 row) -=# <b>select lexize('en_stem', 'extend')</b> - lexize ----------- - {extend} -(1 row) -</pre> - -Once <tt>en_stem</tt> is done discarding stop words and stemming the rest, -we are left with: -<p align=center> -wall<sup>2</sup> -extend<sup>3</sup> -upward<sup>4</sup> -well<sup>6</sup> -100<sup>8</sup> -feet<sup>9</sup> -<p> -Which is precisely the result of the example that began this section. -</ul> -Query words are stemmed by the <tt>to_tsquery()</tt> function -using the same scheme to determine the dictionary for each token, -with the difference that the query parser recognizes as special -the boolean operators that separate query words. - - -<h2><a name="ref">Additional information</a></h2> -More information about tsearch2 is available from -<a href="http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2">tsearch2</a> page. -Also, it's worth to check -<a href="http://www.sai.msu.su/~megera/wiki/Tsearch2">tsearch2 wiki</a> pages. - - -</body> -</html> - - diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html deleted file mode 100644 index 7edcc55a9b..0000000000 --- a/contrib/tsearch2/docs/tsearch2-ref.html +++ /dev/null @@ -1,842 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> -<html><head> - -<title>tsearch2 reference</title></head> - -<body> -<h1 align="center">The tsearch2 Reference</h1> - -<p align="center"> -Brandon Craig Rhodes<br>30 June 2003 (edited by Oleg Bartunov, 2 Aug 2003). -<br>Massive update for 8.2 release by Oleg Bartunov, October 2006 -</p> -<p> -This Reference documents the user types and functions -of the tsearch2 module for PostgreSQL. -An introduction to the module is provided -by the <a href="http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch2-guide.html">tsearch2 Guide</a>, -a companion document to this one. -</p> - -<h2>Table of Contents</h2> -<blockquote> -<a href="#vq">Vectors and Queries</a><br> -<a href="#vqo">Vector Operations</a><br> -<a href="#qo">Query Operations</a><br> -<a href="#fts">Full Text Search Operator</a><br> -<a href="#configurations">Configurations</a><br> -<a href="#testing">Testing</a><br> -<a href="#parsers">Parsers</a><br> -<a href="#dictionaries">Dictionaries</a><br> -<a href="#ranking">Ranking</a><br> -<a href="#headlines">Headlines</a><br> -<a href="#indexes">Indexes</a><br> -<a href="#tz">Thesaurus dictionary</a><br> -</blockquote> - - - - -<h2><a name="vq">Vectors and Queries</a></h2> - -Vectors and queries both store lexemes, -but for different purposes. -A <tt>tsvector</tt> stores the lexemes -of the words that are parsed out of a document, -and can also remember the position of each word. -A <tt>tsquery</tt> specifies a boolean condition among lexemes. -<p> -Any of the following functions with a <tt><i>configuration</i></tt> argument -can use either an integer <tt>id</tt> or textual <tt>ts_name</tt> -to select a configuration; -if the option is omitted, then the current configuration is used. -For more information on the current configuration, -read the next section on Configurations. -</p> - -<h3><a name="vqo">Vector Operations</a></h3> - -<dl><dt> -<tt>to_tsvector( <em>[</em><i>configuration</i>,<em>]</em> - <i>document</i> TEXT) RETURNS TSVECTOR</tt> -</dt><dd> - Parses a document into tokens, - reduces the tokens to lexemes, - and returns a <tt>tsvector</tt> which lists the lexemes - together with their positions in the document. - For the best description of this process, - see the section on <a href="http://www.sai.msu.su/%7Emegera/postgres/gist/tsearch/V2/docs/tsearch2-guide.html#ps">Parsing and Stemming</a> - in the accompanying tsearch2 Guide. -</dd><dt> - <tt>strip(<i>vector</i> TSVECTOR) RETURNS TSVECTOR</tt> -</dt><dd> - Return a vector which lists the same lexemes - as the given <tt><i>vector</i></tt>, - but which lacks any information - about where in the document each lexeme appeared. - While the returned vector is thus useless for relevance ranking, - it will usually be much smaller. -</dd><dt> - <tt>setweight(<i>vector</i> TSVECTOR, <i>letter</i>) RETURNS TSVECTOR</tt> -</dt><dd> - This function returns a copy of the input vector - in which every location has been labeled - with either the <tt><i>letter</i></tt> - <tt>'A'</tt>, <tt>'B'</tt>, or <tt>'C'</tt>, - or the default label <tt>'D'</tt> - (which is the default with which new vectors are created, - and as such is usually not displayed). - These labels are retained when vectors are concatenated, - allowing words from different parts of a document - to be weighted differently by ranking functions. -</dd> -<dt> - <tt><i>vector1</i> || <i>vector2</i></tt><BR> - <tt>concat(<i>vector1</i> TSVECTOR, <i>vector2</i> TSVECTOR) - RETURNS TSVECTOR</tt> -</dt><dd> - Returns a vector which combines the lexemes and position information - in the two vectors given as arguments. - Position weight labels (described in the previous paragraph) - are retained intact during the concatenation. - This has at least two uses. - First, - if some sections of your document - need be parsed with different configurations than others, - you can parse them separately - and concatenate the resulting vectors into one. - Second, - you can weight words from some sections of you document - more heavily than those from others by: - parsing the sections into separate vectors; - assigning the vectors different position labels - with the <tt>setweight()</tt> function; - concatenating them into a single vector; - and then providing a <tt><i>weights</i></tt> argument - to the <tt>rank()</tt> function - that assigns different weights to positions with different labels. -</dd><dt> - <tt>length(<i>vector</i> TSVECTOR) RETURNS INT4</tt> -</dt><dd> - Returns the number of lexemes stored in the vector. -</dd><dt> - <tt><i>text</i>::TSVECTOR RETURNS TSVECTOR</tt> -</dt><dd> - Directly casting text to a <tt>tsvector</tt> - allows you to directly inject lexemes into a vector, - with whatever positions and position weights you choose to specify. - The <tt><i>text</i></tt> should be formatted - like the vector would be printed by the output of a <tt>SELECT</tt>. - See the <a href="http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch2-guide.html#casting">Casting</a> - section in the Guide for details. -</dd><dt> - <tt>tsearch2(<i>vector_column_name</i>[, (<i>my_filter_name</i> | <i>text_column_name1</i>) [...] ], <i>text_column_nameN</i>)</tt> - </dt><dd> -<tt>tsearch2()</tt> trigger used to automatically update <i>vector_column_name</i>, <i>my_filter_name</i> -is the function name to preprocess <i>text_column_name</i>. There are can be many -functions and text columns specified in <tt>tsearch2()</tt> trigger. -The following rule used: -function applied to all subsequent text columns until next function occurs. -Example, function <tt>dropatsymbol</tt> replaces all entries of <tt>@</tt> -sign by space. -<pre> -CREATE FUNCTION dropatsymbol(text) RETURNS text -AS 'select replace($1, ''@'', '' '');' -LANGUAGE SQL; - -CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT -ON tblMessages FOR EACH ROW EXECUTE PROCEDURE -tsearch2(tsvector_column,dropatsymbol, strMessage); -</pre> -</dd> - -<dt> -<tt>stat(<i>sqlquery</i> text [, <i>weight</i> text]) RETURNS SETOF statinfo</tt> -</dt><dd> -Here <tt>statinfo</tt> is a type, defined as -<tt> -CREATE TYPE statinfo as (<i>word</i> text, <i>ndoc</i> int4, <i>nentry</i> int4) -</tt> and <i>sqlquery</i> is a query, which returns column <tt>tsvector</tt>. -<P> -This returns statistics (the number of documents <i>ndoc</i> and total number <i>nentry</i> of <i>word</i> -in the collection) about column <i>vector</i> <tt>tsvector</tt>. -Useful to check how good is your configuration and -to find stop-words candidates.For example, find top 10 most frequent words: -<pre> -=# select * from stat('select vector from apod') order by ndoc desc, nentry desc,word limit 10; -</pre> -Optionally, one can specify <i>weight</i> to obtain statistics about words with specific weight. -<pre> -=# select * from stat('select vector from apod','a') order by ndoc desc, nentry desc,word limit 10; -</pre> - -</dd> -<dt> -<tt>TSVECTOR < TSVECTOR</tt><BR> -<tt>TSVECTOR <= TSVECTOR</tt><BR> -<tt>TSVECTOR = TSVECTOR</tt><BR> -<tt>TSVECTOR >= TSVECTOR</tt><BR> -<tt>TSVECTOR > TSVECTOR</tt> -</dt><dd> -All btree operations defined for <tt>tsvector</tt> type. <tt>tsvectors</tt> compares -with each other using lexicographical order. -</dd> -</dl> - -<h3><a name="qo">Query Operations</a></h3> - -<dl> -<dt> - <tt>to_tsquery( <em>[</em><i>configuration</i>,<em>]</em> - <i>querytext</i> text) RETURNS TSQUERY[A</tt> -</dt> -<dd> - Parses a query, - which should be single words separated by the boolean operators - "<tt>&</tt>" and, - "<tt>|</tt>" or, - and "<tt>!</tt>" not, - which can be grouped using parenthesis. - Each word is reduced to a lexeme using the current - or specified configuration. - Weight class can be assigned to each lexeme entry - to restrict search region - (see <tt>setweight</tt> for explanation), for example - "<tt>fat:a & rats</tt>". -</dd><dt> -<dt> - <tt>plainto_tsquery( <em>[</em><i>configuration</i>,<em>]</em> - <i>querytext</i> text) RETURNS TSQUERY</tt> -</dt> -<dd> -Transforms unformatted text to tsquery. It is the same as to_tsquery, -but assumes "<tt>&</tt>" boolean operator between words and doesn't -recognizes weight classes. -</dd><dt> - - <tt>querytree(<i>query</i> TSQUERY) RETURNS text</tt> -</dt><dd> -This returns a query which actually used in searching in GiST index. -</dd><dt> - <tt><i>text</i>::TSQUERY RETURNS TSQUERY</tt> -</dt><dd> - Directly casting text to a <tt>tsquery</tt> - allows you to directly inject lexemes into a query, - with whatever positions and position weight flags you choose to specify. - The <tt><i>text</i></tt> should be formatted - like the query would be printed by the output of a <tt>SELECT</tt>. - See the <a href="http://www.sai.msu.su/%7Emegera/postgres/gist/tsearch/V2/docs/tsearch2-guide.html#casting">Casting</a> - section in the Guide for details. -</dd> -<dt> - <tt>numnode(<i>query</i> TSQUERY) RETURNS INTEGER</tt> -</dt><dd> -This returns the number of nodes in query tree -</dd><dt> - <tt>TSQUERY && TSQUERY RETURNS TSQUERY</tt> -</dt><dd> -AND-ed TSQUERY -</dd><dt> - <tt>TSQUERY || TSQUERY RETURNS TSQUERY</tt> -</dt> <dd> - OR-ed TSQUERY -</dd><dt> - <tt>!! TSQUERY RETURNS TSQUERY</tt> -</dt> <dd> - negation of TSQUERY -</dd> -<dt> -<tt>TSQUERY < TSQUERY</tt><BR> -<tt>TSQUERY <= TSQUERY</tt><BR> -<tt>TSQUERY = TSQUERY</tt><BR> -<tt>TSQUERY >= TSQUERY</tt><BR> -<tt>TSQUERY > TSQUERY</tt> -</dt><dd> -All btree operations defined for <tt>tsquery</tt> type. <tt>tsqueries</tt> compares -with each other using lexicographical order. -</dd> -</dl> - -<h3>Query rewriting</h3> -Query rewriting is a set of functions and operators for tsquery type. -It allows to control search at query time without reindexing (opposite to thesaurus), for example, -expand search using synonyms (new york, big apple, nyc, gotham). -<P> -<tt><b>rewrite()</b></tt> function changes original <i>query</i> by replacing <i>target</i> by <i>sample</i>. -There are three possibilities to use <tt>rewrite()</tt> function. Notice, that arguments of <tt>rewrite()</tt> -function can be column names of type <tt>tsquery</tt>. -<pre> -create table rw (q TSQUERY, t TSQUERY, s TSQUERY); -insert into rw values('a & b','a', 'c'); -</pre> -<dl> -<dt> <tt>rewrite (<i>query</i> TSQUERY, <i>target</i> TSQUERY, <i>sample</i> TSQUERY) RETURNS TSQUERY</tt> -</dt> -<dd> -<pre> -=# select rewrite('a & b'::TSQUERY, 'a'::TSQUERY, 'c'::TSQUERY); - rewrite - ----------- - 'c' & 'b' -</pre> -</dd> -<dt> <tt>rewrite (ARRAY[<i>query</i> TSQUERY, <i>target</i> TSQUERY, <i>sample</i> TSQUERY]) RETURNS TSQUERY</tt> -</dt> -<dd> -<pre> -=# select rewrite(ARRAY['a & b'::TSQUERY, t,s]) from rw; - rewrite - ----------- - 'c' & 'b' -</pre> -</dd> -<dt> <tt>rewrite (<i>query</i> TSQUERY,'select <i>target</i> ,<i>sample</i> from test'::text) RETURNS TSQUERY</tt> -</dt> -<dd> -<pre> -=# select rewrite('a & b'::TSQUERY, 'select t,s from rw'::text); - rewrite - ----------- - 'c' & 'b' -</pre> -</dd> -</dl> -Two operators defined for <tt>tsquery</tt> type: -<dl> -<dt><tt>TSQUERY @ TSQUERY</tt></dt> -<dd> - Returns <tt>TRUE</tt> if right agrument might contained in left argument. - </dd> - <dt><tt>TSQUERY ~ TSQUERY</tt></dt> - <dd> - Returns <tt>TRUE</tt> if left agrument might contained in right argument. - </dd> -</dl> -To speed up these operators one can use GiST index with <tt>gist_tp_tsquery_ops</tt> opclass. -<pre> -create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops); -</pre> - -<h2><a name="fts">Full Text Search operator</a></h2> - -<dl><dt> -<tt>TSQUERY @@ TSVECTOR</tt><br> -<tt>TSVECTOR @@ TSQUERY</tt> -</dt> -<dd> -Returns <tt>TRUE</tt> if <tt>TSQUERY</tt> contained in <tt>TSVECTOR</tt> and -<tt>FALSE</tt> otherwise. -<pre> -=# select 'cat & rat':: tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::tsvector; - ?column? - ---------- - t -=# select 'fat & cow':: tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::tsvector; - ?column? - ---------- - f -</pre> -</dd> -</dl> - -<h2><a name="configurations">Configurations</a></h2> - -A configuration specifies all of the equipment necessary -to transform a document into a <tt>tsvector</tt>: -the parser that breaks its text into tokens, -and the dictionaries which then transform each token into a lexeme. -Every call to <tt>to_tsvector(), to_tsquery()</tt> (described above) -uses a configuration to perform its processing. -Three configurations come with tsearch2: - -<ul> -<li><b>default</b> -- Indexes words and numbers, - using the <i>en_stem</i> English Snowball stemmer for Latin-alphabet words - and the <i>simple</i> dictionary for all others. -</li><li><b>default_russian</b> -- Indexes words and numbers, - using the <i>en_stem</i> English Snowball stemmer for Latin-alphabet words - and the <i>ru_stem</i> Russian Snowball dictionary for all others. It's default - for <tt>ru_RU.KOI8-R</tt> locale. -</li><li><b>utf8_russian</b> -- the same as <b>default_russian</b> but -for <tt>ru_RU.UTF-8</tt> locale. -</li><li><b>simple</b> -- Processes both words and numbers - with the <i>simple</i> dictionary, - which neither discards any stop words nor alters them. -</li></ul> - -The tsearch2 modules initially chooses your current configuration -by looking for your current locale in the <tt>locale</tt> field -of the <tt>pg_ts_cfg</tt> table described below. -You can manipulate the current configuration yourself with these functions: - -<dl><dt> - <tt>set_curcfg( <i>id</i> INT <em>|</em> <i>ts_name</i> TEXT - ) RETURNS VOID</tt> -</dt><dd> - Set the current configuration used by <tt>to_tsvector</tt> - and <tt>to_tsquery</tt>. -</dd><dt> - <tt>show_curcfg() RETURNS INT4</tt> -</dt><dd> - Returns the integer <tt>id</tt> of the current configuration. -</dd></dl> - -<p> -Each configuration is defined by a record in the <tt>pg_ts_cfg</tt> table: - -</p><pre>create table pg_ts_cfg ( - id int not null primary key, - ts_name text not null, - prs_name text not null, - locale text -);</pre> - -The <tt>id</tt> and <tt>ts_name</tt> are unique values -which identify the configuration; -the <tt>prs_name</tt> specifies which parser the configuration uses. -Once this parser has split document text into tokens, -the type of each resulting token -- -or, more specifically, the type's <tt>tok_alias</tt> -as specified in the parser's <tt>lexem_type()</tt> table -- -is searched for together with the configuration's <tt>ts_name</tt> -in the <tt>pg_ts_cfgmap</tt> table: - -<pre>create table pg_ts_cfgmap ( - ts_name text not null, - tok_alias text not null, - dict_name text[], - primary key (ts_name,tok_alias) -);</pre> - -Those tokens whose types are not listed are discarded. -The remaining tokens are assigned integer positions, -starting with 1 for the first token in the document, -and turned into lexemes with the help of the dictionaries -whose names are given in the <tt>dict_name</tt> array for their type. -These dictionaries are tried in order, -stopping either with the first one to return a lexeme for the token, -or discarding the token if no dictionary returns a lexeme for it. - -<h2><a name="testing">Testing</a></h2> - -Function <tt>ts_debug</tt> allows easy testing of your <b>current</b> configuration. -You may always test another configuration using <tt>set_curcfg</tt> function. -<p> -Example: -</p><pre>apod=# select * from ts_debug('Tsearch module for PostgreSQL 7.3.3'); - ts_name | tok_type | description | token | dict_name | tsvector ----------+----------+-------------+------------+-----------+-------------- - default | lword | Latin word | Tsearch | {en_stem} | 'tsearch' - default | lword | Latin word | module | {en_stem} | 'modul' - default | lword | Latin word | for | {en_stem} | - default | lword | Latin word | PostgreSQL | {en_stem} | 'postgresql' - default | version | VERSION | 7.3.3 | {simple} | '7.3.3' -</pre> -Here: -<br> -<ul> -<li>tsname - configuration name -</li><li>tok_type - token type -</li><li>description - human readable name of tok_type -</li><li>token - parser's token -</li><li>dict_name - dictionary used for the token -</li><li>tsvector - final result</li> -</ul> - - -<h2><a name="parsers">Parsers</a></h2> - -Each parser is defined by a record in the <tt>pg_ts_parser</tt> table: - -<pre>create table pg_ts_parser ( - prs_name text not null, - prs_start regprocedure not null, - prs_nexttoken regprocedure not null, - prs_end regprocedure not null, - prs_headline regprocedure not null, - prs_lextype regprocedure not null, - prs_comment text -);</pre> - -The <tt>prs_name</tt> uniquely identify the parser, -while <tt>prs_comment</tt> usually describes its name and version -for the reference of users. -The other items identify the low-level functions -which make the parser operate, -and are only of interest to someone writing a parser of their own. -<p> -The tsearch2 module comes with one parser named <tt>default</tt> -which is suitable for parsing most plain text and HTML documents. -</p><p> -Each <tt><i>parser</i></tt> argument below -must designate a parser with <tt><i>prs_name</i></tt>; -the current parser is used when this argument is omitted. - -</p><dl><dt> - <tt>CREATE FUNCTION set_curprs(<i>parser</i>) RETURNS VOID</tt> -</dt><dd> - Selects a current parser - which will be used when any of the following functions - are called without a parser as an argument. -</dd><dt> - <tt>CREATE FUNCTION token_type( - <em>[</em> <i>parser</i> <em>]</em> - ) RETURNS SETOF tokentype</tt> -</dt><dd> - Returns a table which defines and describes - each kind of token the parser may produce as output. - For each token type the table gives the <tt>tokid</tt> - which the parser will label each token of that type, - the <tt>alias</tt> which names the token type, - and a short description <tt>descr</tt> for the user to read. -</dd><dt> - <tt>CREATE FUNCTION parse( - <em>[</em> <i>parser</i>, <em>]</em> <i>document</i> TEXT - ) RETURNS SETOF tokenout</tt> -</dt><dd> - Parses the given document and returns a series of records, - one for each token produced by parsing. - Each token includes a <tt>tokid</tt> giving its type - and a <tt>lexem</tt> which gives its content. -</dd></dl> - -<h2><a name="dictionaries">Dictionaries</a></h2> - -Dictionary is a program, which accepts lexeme(s), usually those produced by a parser, -on input and returns: -<ul> -<li>array of lexeme(s) if input lexeme is known to the dictionary -<li>void array - dictionary knows lexeme, but it's stop word. -<li> NULL - dictionary doesn't recognized input lexeme -</ul> -Usually, dictionaries used for normalization of words ( ispell, stemmer dictionaries), -but see, for example, <tt>intdict</tt> dictionary (available from -<a href="http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">Tsearch2</a> home page, -which controls indexing of integers. - -<P> -Among the dictionaries which come installed with tsearch2 are: - -<ul> -<li><b>simple</b> simply folds uppercase letters to lowercase - before returning the word. -</li> -<li><b>ispell_template</b> - template for ispell dictionaries. -</li> -<li><b>en_stem</b> runs an English Snowball stemmer on each word - that attempts to reduce the various forms of a verb or noun - to a single recognizable form. -</li><li><b>ru_stem_koi8</b>, <b>ru_stem_utf8</b> runs a Russian Snowball stemmer on each word. -</li> -<li><b>synonym</b> - simple lexeme-to-lexeme replacement -</li> -<li><b>thesaurus_template</b> - template for <a href="#tz">thesaurus dictionary</a>. It's -phrase-to-phrase replacement -</li> -</ul> - -<P> -Each dictionary is defined by an entry in the <tt>pg_ts_dict</tt> table: - -<pre>CREATE TABLE pg_ts_dict ( - dict_name text not null, - dict_init regprocedure, - dict_initoption text, - dict_lexize regprocedure not null, - dict_comment text -);</pre> - -The <tt>dict_name</tt> -serve as unique identifiers for the dictionary. -The meaning of the <tt>dict_initoption</tt> varies among dictionaries, -but for the built-in Snowball dictionaries -it specifies a file from which stop words should be read. -The <tt>dict_comment</tt> is a human-readable description of the dictionary. -The other fields are internal function identifiers -useful only to developers trying to implement their own dictionaries. - -<blockquote> -<b>WARNING:</b> Data files, used by dictionaries, should be in <tt>server_encoding</tt> to -avoid possible problems ! -</blockquote> - -<p> -The argument named <tt><i>dictionary</i></tt> -in each of the following functions -should be <tt>dict_name</tt> -identifying which dictionary should be used for the operation; -if omitted then the current dictionary is used. - -</p><dl><dt> - <tt>CREATE FUNCTION set_curdict(<i>dictionary</i>) RETURNS VOID</tt> -</dt><dd> - Selects a current dictionary for use by functions - that do not select a dictionary explicitly. -</dd><dt> - <tt>CREATE FUNCTION lexize( - <em>[</em> <i>dictionary</i>, <em>]</em> <i>word</i> text) - RETURNS TEXT[]</tt> -</dt><dd> - Reduces a single word to a lexeme. - Note that lexemes are arrays of zero or more strings, - since in some languages there might be several base words - from which an inflected form could arise. -</dd></dl> - -<h3>Using dictionaries template</h3> -Templates used to define new dictionaries, for example, -<pre> -INSERT INTO pg_ts_dict - (SELECT 'en_ispell', dict_init, - 'DictFile="/usr/local/share/dicts/ispell/english.dict",' - 'AffFile="/usr/local/share/dicts/ispell/english.aff",' - 'StopFile="/usr/local/share/dicts/english.stop"', - dict_lexize - FROM pg_ts_dict - WHERE dict_name = 'ispell_template'); -</pre> - -<h3>Working with stop words</h3> -Ispell and snowball stemmers treat stop words differently: -<ul> -<li>ispell - normalize word and then lookups normalized form in stop-word file -<li>snowball stemmer - first, it lookups word in stop-word file and then does it job. -The reason - to minimize possible 'noise'. -</ul> - -<h2><a name="ranking">Ranking</a></h2> - -Ranking attempts to measure how relevant documents are to particular queries -by inspecting the number of times each search word appears in the document, -and whether different search terms occur near each other. -Note that this information is only available in unstripped vectors -- -ranking functions will only return a useful result -for a <tt>tsvector</tt> which still has position information! -<p> -Notice, that ranking functions supplied are just an examples and -doesn't belong to the tsearch2 core, you can -write your very own ranking function and/or combine additional -factors to fit your specific interest. -</p> - -The two ranking functions currently available are: - -<dl><dt> - <tt>CREATE FUNCTION rank(<br> - <em>[</em> <i>weights</i> float4[], <em>]</em> - <i>vector</i> TSVECTOR, <i>query</i> TSQUERY, - <em>[</em> <i>normalization</i> int4 <em>]</em><br> - ) RETURNS float4</tt> -</dt><dd> - This is the ranking function from the old version of OpenFTS, - and offers the ability to weight word instances more heavily - depending on how you have classified them. - The <i>weights</i> specify how heavily to weight each category of word: - <pre>{<i>D-weight</i>, <i>C-weight</i>, <i>B-weight</i>, <i>A-weight</i>}</pre> - If no weights are provided, then these defaults are used: - <pre>{0.1, 0.2, 0.4, 1.0}</pre> - Often weights are used to mark words from special areas of the document, - like the title or an initial abstract, - and make them more or less important than words in the document body. -</dd><dt> - <tt>CREATE FUNCTION rank_cd(<br> - <em>[</em> <i>weights</i> float4[], <em>]</em> - <i>vector</i> TSVECTOR, <i>query</i> TSQUERY, - <em>[</em> <i>normalization</i> int4 <em>]</em><br> - ) RETURNS float4</tt> -</dt><dd> - This function computes the cover density ranking - for the given document <i>vector</i> and <i>query</i>, - as described in Clarke, Cormack, and Tudhope's - "<a href="http://citeseer.nj.nec.com/clarke00relevance.html">Relevance Ranking for One to Three Term Queries</a>" - in the 1999 <i>Information Processing and Management</i>. -</dd> -<dt> - <tt>CREATE FUNCTION get_covers(vector TSVECTOR, query TSQUERY) RETURNS text</tt> - </dt> - <dd> - Returns <tt>extents</tt>, which are a shortest and non-nested sequences of words, which satisfy a query. - Extents (covers) used in <tt>rank_cd</tt> algorithm for fast calculation of proximity ranking. - In example below there are two extents - <tt><b>{1</b>...<b>}1</b> and <b>{2</b> ...<b>}2</b></tt>. - <pre> -=# select get_covers('1:1,2,10 2:4'::tsvector,'1& 2'); -get_covers ----------------------- -1 {1 1 {2 2 }1 1 }2 -</pre> - </dd> - -</dl> - -<p> -Both of these (<tt>rank(), rank_cd()</tt>) ranking functions -take an integer <i>normalization</i> option -that specifies whether a document's length should impact its rank. -This is often desirable, -since a hundred-word document with five instances of a search word -is probably more relevant than a thousand-word document with five instances. -The option can have the values, which could be combined using "|" ( 2|4) to -take into account several factors: - -</p> -<ul> -<li><tt>0</tt> (the default) ignores document length.</li> -<li><tt>1</tt> divides the rank by the 1 + logarithm of the length </li> -<li><tt>2</tt> divides the rank by the length itself.</li> -<li><tt>4</tt> divides the rank by the mean harmonic distance between extents</li> -<li><tt>8</tt> divides the rank by the number of unique words in document</li> -<li><tt>16</tt> divides the rank by 1 + logarithm of the number of unique words in document -</li> -</ul> - -<h2><a name="headlines">Headlines</a></h2> - -<dl><dt> - <tt>CREATE FUNCTION headline(<br> - <em>[</em> <i>id</i> int4, <em>|</em> <i>ts_name</i> text, <em>]</em> - <i>document</i> text, <i>query</i> TSQUERY, - <em>[</em> <i>options</i> text <em>]</em><br> - ) RETURNS text</tt> -</dt><dd> - Every form of the the <tt>headline()</tt> function - accepts a <tt>document</tt> along with a <tt>query</tt>, - and returns one or more ellipse-separated excerpts from the document - in which terms from the query are highlighted. - The configuration with which to parse the document - can be specified by either its <i>id</i> or <i>ts_name</i>; - if none is specified that the current configuration is used instead. - <p> - An <i>options</i> string if provided should be a comma-separated list - of one or more '<i>option</i><tt>=</tt><i>value</i>' pairs. - The available options are: - </p><ul> - <li><tt>StartSel</tt>, <tt>StopSel</tt> -- - the strings with which query words appearing in the document - should be delimited to distinguish them from other excerpted words. - </li><li><tt>MaxWords</tt>, <tt>MinWords</tt> -- - limits on the shortest and longest headlines you will accept. - </li><li><tt>ShortWord</tt> -- - this prevents your headline from beginning or ending - with a word which has this many characters or less. - The default value of <tt>3</tt> should eliminate most English - conjunctions and articles. - </li><li><tt>HighlightAll</tt> -- - boolean flag, if TRUE, than the whole document will be highlighted. - </li></ul> - Any unspecified options receive these defaults: - <pre>StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3, HighlightAll=FALSE - </pre> -</dd></dl> - - -<h2><a name="indexes">Indexes</a></h2> -Tsearch2 supports indexed access to tsvector in order to further speedup FTS. Notice, indexes are not mandatory for FTS ! -<ul> -<li> RD-Tree (Russian Doll Tree, matryoshka), based on GiST (Generalized Search Tree) -<pre> - =# create index fts_idx on apod using gist(fts); -</pre> -<li>GIN - Generalized Inverted Index -<pre> - =# create index fts_idx on apod using gin(fts); -</pre> -</ul> -<b>GiST</b> index is very good for online update, but is not as scalable as <b>GIN</b> index, -which, in turn, isn't good for updates. Both indexes support concurrency and recovery. - -<h2><a name="tz">Thesaurus dictionary</a></h2> - -<P> -Thesaurus - is a collection of words with included information about the relationships of words and phrases, -i.e., broader terms (BT), narrower terms (NT), preferred terms, non-preferred, related terms,etc.</p> -<p>Basically,thesaurus dictionary replaces all non-preferred terms by one preferred term and, optionally, -preserves them for indexing. Thesaurus used when indexing, so any changes in thesaurus require reindexing. -Tsearch2's <tt>thesaurus</tt> dictionary (TZ) is an extension of <tt>synonym</tt> dictionary -with <b>phrase</b> support. Thesaurus is a plain file of the following format: -<pre> -# this is a comment -sample word(s) : indexed word(s) -............................... -</pre> -<ul> -<li><strong>Colon</strong> (:) symbol used as a delimiter.</li> -<li>Use asterisk (<b>*</b>) at the beginning of <tt>indexed word</tt> to skip subdictionary. -It's still required, that <tt>sample words</tt> should be known.</li> -<li>thesaurus dictionary looks for the most longest match</li></ul> -<P> -TZ uses <strong>subdictionary</strong> (should be defined in tsearch2 configuration) -to normalize thesaurus text. It's possible to define only <strong>one dictionary</strong>. -Notice, that subdictionary produces an error, if it couldn't recognize word. -In that case, you should remove definition line with this word or teach subdictionary to know it. -</p> -<p>Stop-words recognized by subdictionary replaced by 'stop-word placeholder', i.e., -important only their position. -To break possible ties thesaurus applies the last definition. For example, consider -thesaurus (with simple subdictionary) rules with pattern 'swsw' -('s' designates stop-word and 'w' - known word): </p> -<pre> -a one the two : swsw -the one a two : swsw2 -</pre> -<p>Words 'a' and 'the' are stop-words defined in the configuration of a subdictionary. -Thesaurus considers texts 'the one the two' and 'that one then two' as equal and will use definition -'swsw2'.</p> -<p>As a normal dictionary, it should be assigned to the specific lexeme types. -Since TZ has a capability to recognize phrases it must remember its state and interact with parser. -TZ use these assignments to check if it should handle next word or stop accumulation. -Compiler of TZ should take care about proper configuration to avoid confusion. -For example, if TZ is assigned to handle only <tt>lword</tt> lexeme, then TZ definition like -' one 1:11' will not works, since lexeme type <tt>digit</tt> doesn't assigned to the TZ.</p> - -<h3>Configuration</h3> - -<dl><dt>tsearch2</dt><dd></dd></dl><p>tsearch2 comes with thesaurus template, which could be used to define new dictionary: </p> -<pre class="real">INSERT INTO pg_ts_dict - (SELECT 'tz_simple', dict_init, - 'DictFile="/path/to/tz_simple.txt",' - 'Dictionary="en_stem"', - dict_lexize - FROM pg_ts_dict - WHERE dict_name = 'thesaurus_template'); - -</pre> -<p>Here: </p> -<ul> -<li><tt>tz_simple</tt> - is the dictionary name</li> -<li><tt>DictFile="/path/to/tz_simple.txt"</tt> - is the location of thesaurus file</li> -<li><tt>Dictionary="en_stem"</tt> defines dictionary (snowball english stemmer) to use for thesaurus normalization. Notice, that <em>en_stem</em> dictionary has it's own configuration (stop-words, for example).</li> -</ul> -<p>Now, it's possible to use <tt>tz_simple</tt> in pg_ts_cfgmap, for example: </p> -<pre> -update pg_ts_cfgmap set dict_name='{tz_simple,en_stem}' where ts_name = 'default_russian' and -tok_alias in ('lhword', 'lword', 'lpart_hword'); -</pre> -<h3>Examples</h3> -<p>tz_simple: </p> -<pre> -one : 1 -two : 2 -one two : 12 -the one : 1 -one 1 : 11 -</pre> -<p>To see, how thesaurus works, one could use <tt>to_tsvector</tt>, <tt>to_tsquery</tt> or <tt>plainto_tsquery</tt> functions: </p><pre class="real">=# select plainto_tsquery('default_russian',' one day is oneday'); - plainto_tsquery ------------------------- - '1' & 'day' & 'oneday' - -=# select plainto_tsquery('default_russian','one two day is oneday'); - plainto_tsquery -------------------------- - '12' & 'day' & 'oneday' - -=# select plainto_tsquery('default_russian','the one'); -NOTICE: Thesaurus: word 'the' is recognized as stop-word, assign any stop-word (rule 3) - plainto_tsquery ------------------ - '1' -</pre> - -Additional information about thesaurus dictionary is available from -<a href="http://www.sai.msu.su/~megera/wiki/Thesaurus_dictionary">Wiki</a> page. -</body></html> diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out index 926bc3ad1a..d4a81f69cb 100644 --- a/contrib/tsearch2/expected/tsearch2.out +++ b/contrib/tsearch2/expected/tsearch2.out @@ -382,9 +382,9 @@ select numnode( 'new & york | qwery'::tsquery ); create table test_tsquery (txtkeyword text, txtsample text); \set ECHO none alter table test_tsquery add column keyword tsquery; -update test_tsquery set keyword = to_tsquery('default', txtkeyword); +update test_tsquery set keyword = to_tsquery('english', txtkeyword); alter table test_tsquery add column sample tsquery; -update test_tsquery set sample = to_tsquery('default', txtsample::text); +update test_tsquery set sample = to_tsquery('english', txtsample::text); create unique index bt_tsq on test_tsquery (keyword); select count(*) from test_tsquery where keyword < 'new & york'; count @@ -451,7 +451,7 @@ set enable_seqscan=on; select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city'); rewrite ---------------------------------------------------------------------------------- - 'qq' & 'foo' & 'bar' & ( 'city' & 'york' & 'new' | ( 'nyc' | 'apple' & 'big' ) ) + 'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) ) (1 row) select rewrite('moscow', 'select keyword, sample from test_tsquery'::text ); @@ -463,31 +463,13 @@ select rewrite('moscow', 'select keyword, sample from test_tsquery'::text ); select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::text ); rewrite ----------------------------------- - ( 'moskva' | 'moscow' ) & 'hotel' + 'hotel' & ( 'moskva' | 'moscow' ) (1 row) select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text ); rewrite ------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) -(1 row) - -select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery; - rewrite ---------------------- - 'moskva' | 'moscow' -(1 row) - -select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery; - rewrite ------------------------------------ - ( 'moskva' | 'moscow' ) & 'hotel' -(1 row) - -select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) + 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) (1 row) select keyword from test_tsquery where keyword @> 'new'; @@ -513,42 +495,6 @@ select keyword from test_tsquery where keyword <@ 'moscow'; 'moscow' (1 row) -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword <@ query; - rewrite ---------------------- - 'moskva' | 'moscow' -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword <@ query; - rewrite ------------------------------------ - ( 'moskva' | 'moscow' ) & 'hotel' -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword <@ query; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @> keyword; - rewrite ---------------------- - 'moskva' | 'moscow' -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @> keyword; - rewrite ------------------------------------ - ( 'moskva' | 'moscow' ) & 'hotel' -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @> keyword; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) -(1 row) - create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops); set enable_seqscan='off'; select keyword from test_tsquery where keyword @> 'new'; @@ -574,42 +520,6 @@ select keyword from test_tsquery where keyword <@ 'moscow'; 'moscow' (1 row) -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword <@ query; - rewrite ---------------------- - 'moskva' | 'moscow' -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword <@ query; - rewrite ------------------------------------ - ( 'moskva' | 'moscow' ) & 'hotel' -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword <@ query; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @> keyword; - rewrite ---------------------- - 'moskva' | 'moscow' -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @> keyword; - rewrite ------------------------------------ - ( 'moskva' | 'moscow' ) & 'hotel' -(1 row) - -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @> keyword; - rewrite -------------------------------------------------------------------------------------- - 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) -(1 row) - set enable_seqscan='on'; select lexize('simple', 'ASD56 hsdkf'); lexize @@ -617,38 +527,38 @@ select lexize('simple', 'ASD56 hsdkf'); {"asd56 hsdkf"} (1 row) -select lexize('en_stem', 'SKIES Problems identity'); +select lexize('english_stem', 'SKIES Problems identity'); lexize -------------------------- {"skies problems ident"} (1 row) select * from token_type('default'); - tokid | alias | descr --------+--------------+----------------------------------- - 1 | lword | Latin word - 2 | nlword | Non-latin word - 3 | word | Word - 4 | email | Email - 5 | url | URL - 6 | host | Host - 7 | sfloat | Scientific notation - 8 | version | VERSION - 9 | part_hword | Part of hyphenated word - 10 | nlpart_hword | Non-latin part of hyphenated word - 11 | lpart_hword | Latin part of hyphenated word - 12 | blank | Space symbols - 13 | tag | HTML Tag - 14 | protocol | Protocol head - 15 | hword | Hyphenated word - 16 | lhword | Latin hyphenated word - 17 | nlhword | Non-latin hyphenated word - 18 | uri | URI - 19 | file | File or path name - 20 | float | Decimal notation - 21 | int | Signed integer - 22 | uint | Unsigned integer - 23 | entity | HTML Entity + tokid | alias | descr +-------+-----------------+------------------------------------------ + 1 | asciiword | Word, all ASCII + 2 | word | Word, all letters + 3 | numword | Word, letters and digits + 4 | email | Email address + 5 | url | URL + 6 | host | Host + 7 | sfloat | Scientific notation + 8 | version | Version number + 9 | hword_numpart | Hyphenated word part, letters and digits + 10 | hword_part | Hyphenated word part, all letters + 11 | hword_asciipart | Hyphenated word part, all ASCII + 12 | blank | Space symbols + 13 | tag | HTML tag + 14 | protocol | Protocol head + 15 | numhword | Hyphenated word, letters and digits + 16 | asciihword | Hyphenated word, all ASCII + 17 | hword | Hyphenated word, all letters + 18 | url_path | URL path + 19 | file | File or path name + 20 | float | Decimal notation + 21 | int | Signed integer + 22 | uint | Unsigned integer + 23 | entity | HTML entity (23 rows) select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> @@ -768,15 +678,11 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc 12 | . 20 | 4.2 12 | , - 15 | readline-4.2 - 11 | readline - 12 | - - 20 | 4.2 + 1 | readline + 20 | -4.2 12 | - 15 | readline-4.2 - 11 | readline - 12 | - - 20 | 4.2 + 1 | readline + 20 | -4.2 12 | . 22 | 234 12 | @@ -793,23 +699,23 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc 12 | 12 | <> 1 | qwerty -(135 rows) +(131 rows) -SELECT to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> +SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> /usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 <i <b> wow < jqw <> qwerty'); - to_tsvectorad':17 'dw':19 'jf':39 '234':63 '345':1 '4.2':54,55,56,59,62 '455':31 'jqw':66 'qwe':2,18,27,28,35 'wer':36 'wow':65 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':67 '234.435':30 'qwe-wer':34 'readlin':53,58,61 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 'readline-4.2':57,60 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23 + to_tsvectorad':17 'dw':19 'jf':39 '234':61 '345':1 '4.2':54,55,56 '455':31 'jqw':64 'qwe':2,18,27,28,35 'wer':36 'wow':63 '-4.2':58,60 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':65 '234.435':30 'qwe-wer':34 'readlin':53,57,59 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23 (1 row) -SELECT length(to_tsvector('default', '345 qw')); +SELECT length(to_tsvector('english', '345 qw')); length -------- 2 (1 row) -SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> +SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> /usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 <i <b> wow < jqw <> qwerty')); length @@ -817,7 +723,7 @@ SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://ae 51 (1 row) -select to_tsquery('default', 'qwe & sKies '); +select to_tsquery('english', 'qwe & sKies '); to_tsquery --------------- 'qwe' & 'sky' @@ -829,61 +735,61 @@ select to_tsquery('simple', 'qwe & sKies '); 'qwe' & 'skies' (1 row) -select to_tsquery('default', '''the wether'':dc & '' sKies '':BC '); +select to_tsquery('english', '''the wether'':dc & '' sKies '':BC '); to_tsquery ------------------------ 'wether':CD & 'sky':BC (1 row) -select to_tsquery('default', 'asd&(and|fghj)'); +select to_tsquery('english', 'asd&(and|fghj)'); to_tsquery ---------------- 'asd' & 'fghj' (1 row) -select to_tsquery('default', '(asd&and)|fghj'); +select to_tsquery('english', '(asd&and)|fghj'); to_tsquery ---------------- 'asd' | 'fghj' (1 row) -select to_tsquery('default', '(asd&!and)|fghj'); +select to_tsquery('english', '(asd&!and)|fghj'); to_tsquery ---------------- 'asd' | 'fghj' (1 row) -select to_tsquery('default', '(the|and&(i&1))&fghj'); +select to_tsquery('english', '(the|and&(i&1))&fghj'); to_tsquery -------------- '1' & 'fghj' (1 row) -select plainto_tsquery('default', 'the and z 1))& fghj'); +select plainto_tsquery('english', 'the and z 1))& fghj'); plainto_tsquery -------------------- 'z' & '1' & 'fghj' (1 row) -select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd'); +select plainto_tsquery('english', 'foo bar') && plainto_tsquery('english', 'asd'); ?column? ----------------------- 'foo' & 'bar' & 'asd' (1 row) -select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg'); +select plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd fg'); ?column? ------------------------------ 'foo' & 'bar' | 'asd' & 'fg' (1 row) -select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg'); +select plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg'); ?column? ----------------------------------- 'foo' & 'bar' | !( 'asd' & 'fg' ) (1 row) -select plainto_tsquery('default', 'foo bar') && 'asd | fg'; +select plainto_tsquery('english', 'foo bar') && 'asd | fg'; ?column? ---------------------------------- 'foo' & 'bar' & ( 'asd' | 'fg' ) @@ -995,7 +901,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; 39 (1 row) -select set_curcfg('default'); +select set_curcfg('english'); set_curcfg ------------ @@ -1024,11 +930,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); 0 (1 row) -drop trigger tsvectorupdate on test_tsvector; -create function wow(text) returns text as 'select $1 || '' copyright''; ' language sql; -create trigger tsvectorupdate before update or insert on test_tsvector -for each row execute procedure tsearch2(a, wow, t); -insert into test_tsvector (t) values ('345 qwerty'); +insert into test_tsvector (t) values ('345 qwerty copyright'); select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); count ------- @@ -2135,7 +2037,6 @@ select * from stat('select a from test_tsvector') order by ndoc desc, nentry des 8w | 2 | 2 9f | 2 | 2 9y | 2 | 2 - copyright | 2 | 2 foo | 1 | 3 bar | 1 | 2 0e | 1 | 1 @@ -2227,6 +2128,7 @@ select * from stat('select a from test_tsvector') order by ndoc desc, nentry des 9h | 1 | 1 9r | 1 | 1 9w | 1 | 1 + copyright | 1 | 1 qwerti | 1 | 1 (1146 rows) @@ -2257,11 +2159,11 @@ select * from stat('select a from test_tsvector','d') order by ndoc desc, nentry word | ndoc | nentry -----------+------+-------- a | 2 | 2 - copyright | 2 | 2 foo | 1 | 3 bar | 1 | 2 345 | 1 | 1 b | 1 | 1 + copyright | 1 | 1 qq | 1 | 1 qwerti | 1 | 1 (8 rows) @@ -2271,22 +2173,15 @@ select * from stat('select a from test_tsvector','ad') order by ndoc desc, nentr -----------+------+-------- a | 2 | 4 b | 2 | 4 - copyright | 2 | 2 foo | 1 | 3 bar | 1 | 2 345 | 1 | 1 + copyright | 1 | 1 qq | 1 | 1 qwerti | 1 | 1 (8 rows) -select reset_tsearch(); -NOTICE: TSearch cache cleaned - reset_tsearch ---------------- - -(1 row) - -select to_tsquery('default', 'skies & books'); +select to_tsquery('english', 'skies & books'); to_tsquery ---------------- 'sky' & 'book' @@ -2334,48 +2229,6 @@ Upon a woman s face. E. J. Pratt (1882 1964) 0.2 (1 row) -select get_covers(to_tsvector('Erosion It took the sea a thousand years, -A thousand years to trace -The granite features of this cliff -In crag and scarp and base. -It took the sea an hour one night -An hour of storm to place -The sculpture of these granite seams, -Upon a woman s face. E. J. Pratt (1882 1964) -'), to_tsquery('sea&thousand&years')); - get_covers ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 -(1 row) - -select get_covers(to_tsvector('Erosion It took the sea a thousand years, -A thousand years to trace -The granite features of this cliff -In crag and scarp and base. -It took the sea an hour one night -An hour of storm to place -The sculpture of these granite seams, -Upon a woman s face. E. J. Pratt (1882 1964) -'), to_tsquery('granite&sea')); - get_covers ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 -(1 row) - -select get_covers(to_tsvector('Erosion It took the sea a thousand years, -A thousand years to trace -The granite features of this cliff -In crag and scarp and base. -It took the sea an hour one night -An hour of storm to place -The sculpture of these granite seams, -Upon a woman s face. E. J. Pratt (1882 1964) -'), to_tsquery('sea')); - get_covers ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 -(1 row) - select headline('Erosion It took the sea a thousand years, A thousand years to trace The granite features of this cliff @@ -2456,15 +2309,19 @@ to_tsquery('sea&foo'), 'HighlightAll=true'); (1 row) --check debug -select * from ts_debug('Tsearch module for PostgreSQL 7.3.3'); - ts_name | tok_type | description | token | dict_name | tsvector ----------+----------+-------------+------------+-----------+-------------- - default | lword | Latin word | Tsearch | {en_stem} | 'tsearch' - default | lword | Latin word | module | {en_stem} | 'modul' - default | lword | Latin word | for | {en_stem} | - default | lword | Latin word | PostgreSQL | {en_stem} | 'postgresql' - default | version | VERSION | 7.3.3 | {simple} | '7.3.3' -(5 rows) +select * from public.ts_debug('Tsearch module for PostgreSQL 7.3.3'); + ts_name | tok_type | description | token | dict_name | tsvector +---------+-----------+-----------------+------------+----------------+-------------- + english | asciiword | Word, all ASCII | Tsearch | {english_stem} | 'tsearch' + english | blank | Space symbols | | {} | + english | asciiword | Word, all ASCII | module | {english_stem} | 'modul' + english | blank | Space symbols | | {} | + english | asciiword | Word, all ASCII | for | {english_stem} | + english | blank | Space symbols | | {} | + english | asciiword | Word, all ASCII | PostgreSQL | {english_stem} | 'postgresql' + english | blank | Space symbols | | {} | + english | version | Version number | 7.3.3 | {simple} | '7.3.3' +(9 rows) --check ordering insert into test_tsvector values (null, null); @@ -2480,10 +2337,10 @@ select a is null, a from test_tsvector order by a; f | f | f | - f | '345':1 'qwerti':2 'copyright':3 - f | 'qq':7 'bar':2,8 'foo':1,3,6 'copyright':9 f | 'a':1A,2,3B 'b':5A,6A,7C,8 f | 'a':1A,2,3C 'b':5A,6B,7C,8B + f | 'qq':7 'bar':2,8 'foo':1,3,6 + f | '345':1 'qwerti':2 'copyright':3 f | '7w' 'ch' 'd7' 'eo' 'gw' 'i4' 'lq' 'o6' 'qt' 'y0' f | 'ar' 'ei' 'kq' 'ma' 'qa' 'qh' 'qq' 'qz' 'rx' 'st' f | 'gs' 'i6' 'i9' 'j2' 'l0' 'oq' 'qx' 'sc' 'xe' 'yu' diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN deleted file mode 100644 index c13e496d06..0000000000 --- a/contrib/tsearch2/gendict/Makefile.IN +++ /dev/null @@ -1,12 +0,0 @@ -subdir = contrib/CFG_DIR -top_builddir = ../.. -include $(top_builddir)/src/Makefile.global - -MODULE_big = dict_CFG_MODNAME -OBJS = CFG_OFILE -DATA_built = dict_CFG_MODNAME.sql -DOCS = README.CFG_MODNAME -PG_CPPFLAGS = -SHLIB_LINK = ../tsearch2/libtsearch2.a - -include $(top_srcdir)/contrib/contrib-global.mk diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict deleted file mode 100644 index e5bd010b03..0000000000 --- a/contrib/tsearch2/gendict/README.gendict +++ /dev/null @@ -1,131 +0,0 @@ -Gendict - generate dictionary templates for contrib/tsearch2 module. - -This utility aims to help people creating dictionary for contrib/tsearch v2 -module. Particularly, it has built-in support for snowball stemmers. - -Programming API to tsearch2 dictionaries is described in tsearch v2 -documentation. - - -Prerequisities: - -* PostgreSQL 7.3 and above. - -* You need tsearch2 module sources already compiled - -* Rights to install contrib modules - -Usage: - - run config.sh without parameters to see options and arguments - -Usage: -./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ] - -v - be verbose - -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME) - -C COMMENT - dictionary comment -Generate Snowball stemmer: -./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ] - -s - generate Snowball wrapper - -p - prefix of Snowball's function, (default DICTNAME) -Generate template dictionary: -./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ] - -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory. - These files will be used in Makefile. - -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory. - These files will be used in Makefile and subinclude.h - -i - dictionary has init method - - -Example 1: - - Create Portuguese stemmer - - 0. cd PGSQL_SRC/contrib/tsearch2/gendict - - 1. Obtain stem.{c,h} files for Portuguese - - wget http://snowball.tartarus.org/portuguese/stem.c - wget http://snowball.tartarus.org/portuguese/stem.h - - 2. Create template files for Portuguese - - ./config.sh -n pt -s -p portuguese_ISO_8859_1 -v -C'Snowball stemmer for Portuguese' - - Note, that argument for -p option should be *the same* as name of stemming - function in stem.c (without _stem) - - A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt - directory. - - 3. Compile and install dictionary - - cd PGSQL_SRC/contrib/dict_pt - make - make install - - 4. Test it - - Sample portuguese words with the stemmed forms are available - from http://snowball.tartarus.org/portuguese/stemmer.html - - createdb testdict - psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql - psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql - psql -d testdict -c "select lexize('pt','bobagem');" - lexize - --------- - {bobag} - (1 row) - - Here is what I have in pg_ts_dict table - - psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';" - dict_name | dict_init | dict_initoption | dict_lexize | dict_comment - -----------+--------------------+-----------------+---------------------------------------+--------------------------------- - pt | dinit_pt(internal) | | snb_lexize(internal,internal,integer) | Snowball stemmer for Portuguese - - (1 row) - - - Note, that you have already installed dictionary and corresponding - entry in tsearch configuration and you may modify it using - plain SQL commands, for example, specify stop words. - -Example 2: - - a) Simple template dictionary with init method - - ./config.sh -n wow -v -i -C WOW - - b) Create simple template dict (without init method): - ./config.sh -n wow -v -C WOW - - The same as above, but dictionary will have not init method - - Dictionaries obtained in a) and b) are fully working and ready - for use: - a) lowercase input word and remove it if it is a stop word - b) recognizes any word - - c) Simple template dictionary with source files (with init method): - - ./config.sh -n wow -v -i -c a.c -h a.h -C WOW - - Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory. - These files will be used in Makefile. - - Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory. - These files will be used in Makefile and subinclude.h - - d) Simple template dictionary with source files (without init method): - - ./config.sh -n wow -v -c a.c -h a.h -C WOW - - The same as above, but dictionary will have not init method - - After that you have sources in PGSQL_SRC/contrib/dict_wow and - you may edit them to create actual dictionary. - - Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/) - for additional information about "Gendict tutorial" and dictionaries. diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh deleted file mode 100755 index 2dc71fb73e..0000000000 --- a/contrib/tsearch2/gendict/config.sh +++ /dev/null @@ -1,190 +0,0 @@ -#!/bin/sh - -usage () { - echo Usage: - echo $0 -n DICTNAME \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ] - echo ' -v - be verbose' - echo ' -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)' - echo ' -C COMMENT - dictionary comment' - echo Generate Snowball stemmer: - echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ] - echo ' -s - generate Snowball wrapper' - echo " -p - prefix of Snowball's function, (default DICTNAME)" - echo Generate template dictionary: - echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ] - echo ' -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.' - echo ' These files will be used in Makefile.' - echo ' -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.' - echo ' These files will be used in Makefile and subinclude.h' - echo ' -i - dictionary has init method' - exit 1; -} - -dictname= -stemmode=no -verbose=no -cfile= -hfile= -dir= -hasinit=no -comment= -prefix= - -while getopts n:c:C:h:d:p:vis opt -do - case "$opt" in - v) verbose=yes;; - s) stemmode=yes;; - i) hasinit=yes;; - n) dictname="$OPTARG";; - c) cfile="$OPTARG";; - h) hfile="$OPTARG";; - d) dir="$OPTARG";; - C) comment="$OPTARG";; - p) prefix="$OPTARG";; - \?) usage;; - esac -done - -[ ${#dictname} -eq 0 ] && usage - -dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'` - -if [ $stemmode = "yes" ] ; then - [ ${#prefix} -eq 0 ] && prefix=$dictname - hasinit=yes - cfile="stem.c" - hfile="stem.h" -fi - -[ ${#dir} -eq 0 ] && dir="dict_$dictname" - -if [ ${#comment} -eq 0 ]; then - comment=null -else - comment="'$comment'" -fi - -ofile= -for f in $cfile -do - f=` echo $f | sed 's#c$#o#'` - ofile="$ofile $f" -done - -if [ $stemmode = "yes" ] ; then - ofile="$ofile dict_snowball.o" -else - ofile="$ofile dict_tmpl.o" -fi - -if [ $verbose = "yes" ]; then - echo Dictname: "'"$dictname"'" - echo Snowball stemmer: $stemmode - echo Has init method: $hasinit - [ $stemmode = "yes" ] && echo Function prefix: $prefix - echo Source files: $cfile - echo Header files: $hfile - echo Object files: $ofile - echo Comment: $comment - echo Directory: ../../$dir -fi - - -[ $verbose = "yes" ] && echo -n 'Build directory... ' -if [ ! -d ../../$dir ]; then - if ! mkdir ../../$dir ; then - echo "Can't create directory ../../$dir" - exit 1 - fi -fi -[ $verbose = "yes" ] && echo ok - - -[ $verbose = "yes" ] && echo -n 'Build Makefile... ' -sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp -if [ $stemmode = "yes" ] ; then - sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile -else - sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile -fi -rm ../../$dir/Makefile.tmp -[ $verbose = "yes" ] && echo ok - - -[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in... ' -if [ $hasinit = "yes" ]; then - sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp - if [ $stemmode = "yes" ] ; then - sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in - else - sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in - fi - rm ../../$dir/dict_$dictname.sql.in.tmp -else - sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in -fi -[ $verbose = "yes" ] && echo ok - - - -if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then - [ $verbose = "yes" ] && echo -n 'Copy source and header files... ' - if [ ${#cfile} -ne 0 ] ; then - if [ $stemmode = "yes" ] ; then - for cfn in $cfile - do - sed s#../runtime/## < $cfn > ../../$dir/$cfn - done - else - if ! cp $cfile ../../$dir ; then - echo "Can't cp all or one of files: $cfile" - exit 1 - fi - fi - fi - if [ ${#hfile} -ne 0 ] ; then - if ! cp $hfile ../../$dir ; then - echo "Cant cp all or one of files: $hfile" - exit 1 - fi - fi - [ $verbose = "yes" ] && echo ok -fi - - -[ $verbose = "yes" ] && echo -n 'Build sub-include header... ' -echo -n > ../../$dir/subinclude.h -for i in $hfile -do - echo "#include \"$i\"" >> ../../$dir/subinclude.h -done -[ $verbose = "yes" ] && echo ok - - -if [ $stemmode = "yes" ] ; then - [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer... ' - sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c -else - [ $verbose = "yes" ] && echo -n 'Build dictinonary... ' - sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp - if [ $hasinit = "yes" ]; then - sed s#^HASINIT## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c - else - sed s#^HASINIT.*\$## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c - fi - rm ../../$dir/dict_tmpl.c.tmp -fi -[ $verbose = "yes" ] && echo ok - - -[ $verbose = "yes" ] && echo -n "Build README.$dictname... " -if [ $stemmode = "yes" ] ; then - echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname -else - echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname -fi -[ $verbose = "yes" ] && echo ok - -echo All is done - diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN deleted file mode 100644 index 7136ee19dd..0000000000 --- a/contrib/tsearch2/gendict/dict_snowball.c.IN +++ /dev/null @@ -1,56 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/gendict/dict_snowball.c.IN,v 1.5 2006/07/14 05:28:27 tgl Exp $ */ - -/* - * example of Snowball dictionary - * http://snowball.tartarus.org/ - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include "dict.h" -#include "common.h" -#include "snowball/header.h" -#include "subinclude.h" -#include "ts_locale.h" - -typedef struct { - struct SN_env *z; - StopList stoplist; - int (*stem)(struct SN_env * z); -} DictSnowball; - - -PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME); -Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS); - -Datum -dinit_CFG_MODNAME(PG_FUNCTION_ARGS) { - DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) ); - - if ( !d ) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - memset(d,0,sizeof(DictSnowball)); - d->stoplist.wordop=lowerstr; - - if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) { - text *in = PG_GETARG_TEXT_P(0); - readstoplist(in, &(d->stoplist)); - sortstoplist(&(d->stoplist)); - PG_FREE_IF_COPY(in, 0); - } - - d->z = CFG_PREFIX_create_env(); - if (!d->z) { - freestoplist(&(d->stoplist)); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - d->stem=CFG_PREFIX_stem; - - PG_RETURN_POINTER(d); -} - - diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN deleted file mode 100644 index a47720aa10..0000000000 --- a/contrib/tsearch2/gendict/dict_tmpl.c.IN +++ /dev/null @@ -1,65 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/gendict/dict_tmpl.c.IN,v 1.6 2006/07/14 05:28:27 tgl Exp $ */ - -/* - * example of dictionary - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include "dict.h" -#include "common.h" - -#include "subinclude.h" -#include "ts_locale.h" - -HASINIT typedef struct { -HASINIT StopList stoplist; -HASINIT } DictExample; - - -HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME); -HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS); - -HASINIT Datum -HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) { -HASINIT DictExample *d = (DictExample*)malloc( sizeof(DictExample) ); -HASINIT -HASINIT if ( !d ) -HASINIT ereport(ERROR, -HASINIT (errcode(ERRCODE_OUT_OF_MEMORY), -HASINIT errmsg("out of memory"))); -HASINIT memset(d,0,sizeof(DictExample)); -HASINIT -HASINIT d->stoplist.wordop=lowerstr; -HASINIT -HASINIT /* Your INIT code */ -HASINIT -HASINIT if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) { -HASINIT text *in = PG_GETARG_TEXT_P(0); -HASINIT readstoplist(in, &(d->stoplist)); -HASINIT sortstoplist(&(d->stoplist)); -HASINIT PG_FREE_IF_COPY(in, 0); -HASINIT } -HASINIT -HASINIT PG_RETURN_POINTER(d); -HASINIT } - -PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME); -Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS); -Datum -dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) { -HASINIT DictExample *d = (DictExample*)PG_GETARG_POINTER(0); - char *in = (char*)PG_GETARG_POINTER(1); - char *txt = pnstrdup(in, PG_GETARG_INT32(2)); - TSLexeme *res=palloc(sizeof(TSLexeme*)*2); - - /* Your LEXIZE dictionary code */ -HASINIT if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) { -HASINIT pfree(txt); -HASINIT res[0].lexeme=NULL; -HASINIT } else - res[0].lexeme=txt; - res[1].lexeme=NULL; - - PG_RETURN_POINTER(res); -} diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN deleted file mode 100644 index 399b26b0d9..0000000000 --- a/contrib/tsearch2/gendict/sql.IN +++ /dev/null @@ -1,26 +0,0 @@ -SET search_path = public; -BEGIN; - -HASINIT create function dinit_CFG_MODNAME(internal) -HASINIT returns internal -HASINIT as 'MODULE_PATHNAME' -HASINIT language C; - -NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4) -NOSNOWBALL returns internal -NOSNOWBALL as 'MODULE_PATHNAME' -NOSNOWBALL language C -NOSNOWBALL returns null on null input; - -insert into pg_ts_dict select - 'CFG_MODNAME', -HASINIT (select oid from pg_proc where proname='dinit_CFG_MODNAME'), -NOINIT null, - null, -ISSNOWBALL (select oid from pg_proc where proname='snb_lexize'), -NOSNOWBALL (select oid from pg_proc where proname='dlexize_CFG_MODNAME'), - CFG_COMMENT -; - - -END; diff --git a/contrib/tsearch2/ginidx.c b/contrib/tsearch2/ginidx.c deleted file mode 100644 index d77ee9bb9f..0000000000 --- a/contrib/tsearch2/ginidx.c +++ /dev/null @@ -1,159 +0,0 @@ -#include "postgres.h" - -#include <float.h> - -#include "access/gist.h" -#include "access/itup.h" -#include "access/skey.h" -#include "access/tuptoaster.h" -#include "storage/bufpage.h" -#include "utils/array.h" -#include "utils/builtins.h" - -#include "tsvector.h" -#include "query.h" -#include "query_cleanup.h" - -PG_FUNCTION_INFO_V1(gin_extract_tsvector); -Datum gin_extract_tsvector(PG_FUNCTION_ARGS); - -Datum -gin_extract_tsvector(PG_FUNCTION_ARGS) -{ - tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - int32 *nentries = (int32 *) PG_GETARG_POINTER(1); - Datum *entries = NULL; - - *nentries = 0; - if (vector->size > 0) - { - int i; - WordEntry *we = ARRPTR(vector); - - *nentries = (int32) vector->size; - entries = (Datum *) palloc(sizeof(Datum) * vector->size); - - for (i = 0; i < vector->size; i++) - { - text *txt = (text *) palloc(VARHDRSZ + we->len); - - SET_VARSIZE(txt, VARHDRSZ + we->len); - memcpy(VARDATA(txt), STRPTR(vector) + we->pos, we->len); - - entries[i] = PointerGetDatum(txt); - - we++; - } - } - - PG_FREE_IF_COPY(vector, 0); - PG_RETURN_POINTER(entries); -} - - -PG_FUNCTION_INFO_V1(gin_extract_tsquery); -Datum gin_extract_tsquery(PG_FUNCTION_ARGS); - -Datum -gin_extract_tsquery(PG_FUNCTION_ARGS) -{ - QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - int32 *nentries = (int32 *) PG_GETARG_POINTER(1); - StrategyNumber strategy = DatumGetUInt16(PG_GETARG_DATUM(2)); - Datum *entries = NULL; - - *nentries = 0; - if (query->size > 0) - { - int4 i, - j = 0, - len; - ITEM *item; - - item = clean_NOT_v2(GETQUERY(query), &len); - if (!item) - elog(ERROR, "Query requires full scan, GIN doesn't support it"); - - item = GETQUERY(query); - - for (i = 0; i < query->size; i++) - if (item[i].type == VAL) - (*nentries)++; - - entries = (Datum *) palloc(sizeof(Datum) * (*nentries)); - - for (i = 0; i < query->size; i++) - if (item[i].type == VAL) - { - text *txt; - - txt = (text *) palloc(VARHDRSZ + item[i].length); - - SET_VARSIZE(txt, VARHDRSZ + item[i].length); - memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length); - - entries[j++] = PointerGetDatum(txt); - - if (strategy == 1 && item[i].weight != 0) - elog(ERROR, "With class of lexeme restrictions use @@@ operation"); - } - - } - else - *nentries = -1; /* nothing can be found */ - - PG_FREE_IF_COPY(query, 0); - PG_RETURN_POINTER(entries); -} - -typedef struct -{ - ITEM *frst; - bool *mapped_check; -} GinChkVal; - -static bool -checkcondition_gin(void *checkval, ITEM * val) -{ - GinChkVal *gcv = (GinChkVal *) checkval; - - return gcv->mapped_check[val - gcv->frst]; -} - -PG_FUNCTION_INFO_V1(gin_ts_consistent); -Datum gin_ts_consistent(PG_FUNCTION_ARGS); - -Datum -gin_ts_consistent(PG_FUNCTION_ARGS) -{ - bool *check = (bool *) PG_GETARG_POINTER(0); - QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2)); - bool res = FALSE; - - if (query->size > 0) - { - int4 i, - j = 0; - ITEM *item; - GinChkVal gcv; - - gcv.frst = item = GETQUERY(query); - gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size); - - for (i = 0; i < query->size; i++) - if (item[i].type == VAL) - gcv.mapped_check[i] = check[j++]; - - - res = TS_execute( - GETQUERY(query), - &gcv, - true, - checkcondition_gin - ); - - } - - PG_FREE_IF_COPY(query, 2); - PG_RETURN_BOOL(res); -} diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c deleted file mode 100644 index 14fdecb0b8..0000000000 --- a/contrib/tsearch2/gistidx.c +++ /dev/null @@ -1,751 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/gistidx.c,v 1.16 2007/02/28 22:44:38 tgl Exp $ */ - -#include "postgres.h" - -#include <float.h> - -#include "access/gist.h" -#include "access/itup.h" -#include "access/tuptoaster.h" -#include "storage/bufpage.h" -#include "utils/array.h" -#include "utils/builtins.h" - -#include "tsvector.h" -#include "query.h" -#include "gistidx.h" -#include "crc32.h" - -PG_FUNCTION_INFO_V1(gtsvector_in); -Datum gtsvector_in(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsvector_out); -Datum gtsvector_out(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsvector_compress); -Datum gtsvector_compress(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsvector_decompress); -Datum gtsvector_decompress(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsvector_consistent); -Datum gtsvector_consistent(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsvector_union); -Datum gtsvector_union(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsvector_same); -Datum gtsvector_same(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsvector_penalty); -Datum gtsvector_penalty(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsvector_picksplit); -Datum gtsvector_picksplit(PG_FUNCTION_ARGS); - -#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key)) - -/* Number of one-bits in an unsigned byte */ -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - -static int4 sizebitvec(BITVECP sign); - -Datum -gtsvector_in(PG_FUNCTION_ARGS) -{ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("gtsvector_in not implemented"))); - PG_RETURN_DATUM(0); -} - -#define SINGOUTSTR "%d true bits, %d false bits" -#define ARROUTSTR "%d unique words" -#define EXTRALEN ( 2*13 ) - -static int outbuf_maxlen = 0; - -Datum -gtsvector_out(PG_FUNCTION_ARGS) -{ - GISTTYPE *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_POINTER(0))); - char *outbuf; - - if (outbuf_maxlen == 0) - outbuf_maxlen = 2 * EXTRALEN + Max(strlen(SINGOUTSTR), strlen(ARROUTSTR)) + 1; - outbuf = palloc(outbuf_maxlen); - - if (ISARRKEY(key)) - sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key)); - else - { - int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT : sizebitvec(GETSIGN(key)); - - sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT - cnttrue); - } - - PG_FREE_IF_COPY(key, 0); - PG_RETURN_POINTER(outbuf); -} - -static int -compareint(const void *a, const void *b) -{ - if (*((int4 *) a) == *((int4 *) b)) - return 0; - return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1; -} - -static int -uniqueint(int4 *a, int4 l) -{ - int4 *ptr, - *res; - - if (l == 1) - return l; - - ptr = res = a; - - qsort((void *) a, l, sizeof(int4), compareint); - - while (ptr - a < l) - if (*ptr != *res) - *(++res) = *ptr++; - else - ptr++; - return res + 1 - a; -} - -static void -makesign(BITVECP sign, GISTTYPE * a) -{ - int4 k, - len = ARRNELEM(a); - int4 *ptr = GETARR(a); - - MemSet((void *) sign, 0, sizeof(BITVEC)); - for (k = 0; k < len; k++) - HASH(sign, ptr[k]); -} - -Datum -gtsvector_compress(PG_FUNCTION_ARGS) -{ - GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); - GISTENTRY *retval = entry; - - if (entry->leafkey) - { /* tsvector */ - GISTTYPE *res; - tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key)); - int4 len; - int4 *arr; - WordEntry *ptr = ARRPTR(val); - char *words = STRPTR(val); - - len = CALCGTSIZE(ARRKEY, val->size); - res = (GISTTYPE *) palloc(len); - SET_VARSIZE(res, len); - res->flag = ARRKEY; - arr = GETARR(res); - len = val->size; - while (len--) - { - *arr = crc32_sz(&words[ptr->pos], ptr->len); - arr++; - ptr++; - } - - len = uniqueint(GETARR(res), val->size); - if (len != val->size) - { - /* - * there is a collision of hash-function; len is always less than - * val->size - */ - len = CALCGTSIZE(ARRKEY, len); - res = (GISTTYPE *) repalloc((void *) res, len); - SET_VARSIZE(res, len); - } - - /* make signature, if array is too long */ - if (VARSIZE(res) > TOAST_INDEX_TARGET) - { - GISTTYPE *ressign; - - len = CALCGTSIZE(SIGNKEY, 0); - ressign = (GISTTYPE *) palloc(len); - SET_VARSIZE(ressign, len); - ressign->flag = SIGNKEY; - makesign(GETSIGN(ressign), res); - res = ressign; - } - - retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); - gistentryinit(*retval, PointerGetDatum(res), - entry->rel, entry->page, - entry->offset, FALSE); - } - else if (ISSIGNKEY(DatumGetPointer(entry->key)) && - !ISALLTRUE(DatumGetPointer(entry->key))) - { - int4 i, - len; - GISTTYPE *res; - BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); - - LOOPBYTE( - if ((sign[i] & 0xff) != 0xff) - PG_RETURN_POINTER(retval); - ); - - len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); - res = (GISTTYPE *) palloc(len); - SET_VARSIZE(res, len); - res->flag = SIGNKEY | ALLISTRUE; - - retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); - gistentryinit(*retval, PointerGetDatum(res), - entry->rel, entry->page, - entry->offset, FALSE); - } - PG_RETURN_POINTER(retval); -} - -Datum -gtsvector_decompress(PG_FUNCTION_ARGS) -{ - GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); - GISTTYPE *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key)); - - if (key != (GISTTYPE *) DatumGetPointer(entry->key)) - { - GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); - - gistentryinit(*retval, PointerGetDatum(key), - entry->rel, entry->page, - entry->offset, FALSE); - - PG_RETURN_POINTER(retval); - } - - PG_RETURN_POINTER(entry); -} - -typedef struct -{ - int4 *arrb; - int4 *arre; -} CHKVAL; - -/* - * is there value 'val' in array or not ? - */ -static bool -checkcondition_arr(void *checkval, ITEM * val) -{ - int4 *StopLow = ((CHKVAL *) checkval)->arrb; - int4 *StopHigh = ((CHKVAL *) checkval)->arre; - int4 *StopMiddle; - - /* Loop invariant: StopLow <= val < StopHigh */ - - while (StopLow < StopHigh) - { - StopMiddle = StopLow + (StopHigh - StopLow) / 2; - if (*StopMiddle == val->val) - return (true); - else if (*StopMiddle < val->val) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - - return (false); -} - -static bool -checkcondition_bit(void *checkval, ITEM * val) -{ - return GETBIT(checkval, HASHVAL(val->val)); -} - -Datum -gtsvector_consistent(PG_FUNCTION_ARGS) -{ - QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(1); - GISTTYPE *key = (GISTTYPE *) DatumGetPointer( - ((GISTENTRY *) PG_GETARG_POINTER(0))->key - ); - - if (!query->size) - PG_RETURN_BOOL(false); - - if (ISSIGNKEY(key)) - { - if (ISALLTRUE(key)) - PG_RETURN_BOOL(true); - - PG_RETURN_BOOL(TS_execute( - GETQUERY(query), - (void *) GETSIGN(key), false, - checkcondition_bit - )); - } - else - { /* only leaf pages */ - CHKVAL chkval; - - chkval.arrb = GETARR(key); - chkval.arre = chkval.arrb + ARRNELEM(key); - PG_RETURN_BOOL(TS_execute( - GETQUERY(query), - (void *) &chkval, true, - checkcondition_arr - )); - } -} - -static int4 -unionkey(BITVECP sbase, GISTTYPE * add) -{ - int4 i; - - if (ISSIGNKEY(add)) - { - BITVECP sadd = GETSIGN(add); - - if (ISALLTRUE(add)) - return 1; - - LOOPBYTE( - sbase[i] |= sadd[i]; - ); - } - else - { - int4 *ptr = GETARR(add); - - for (i = 0; i < ARRNELEM(add); i++) - HASH(sbase, ptr[i]); - } - return 0; -} - - -Datum -gtsvector_union(PG_FUNCTION_ARGS) -{ - GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); - int *size = (int *) PG_GETARG_POINTER(1); - BITVEC base; - int4 i, - len; - int4 flag = 0; - GISTTYPE *result; - - MemSet((void *) base, 0, sizeof(BITVEC)); - for (i = 0; i < entryvec->n; i++) - { - if (unionkey(base, GETENTRY(entryvec, i))) - { - flag = ALLISTRUE; - break; - } - } - - flag |= SIGNKEY; - len = CALCGTSIZE(flag, 0); - result = (GISTTYPE *) palloc(len); - SET_VARSIZE(result, len); - result->flag = flag; - if (!ISALLTRUE(result)) - memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC)); - *size = len; - - PG_RETURN_POINTER(result); -} - -Datum -gtsvector_same(PG_FUNCTION_ARGS) -{ - GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0); - GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1); - bool *result = (bool *) PG_GETARG_POINTER(2); - - if (ISSIGNKEY(a)) - { /* then b also ISSIGNKEY */ - if (ISALLTRUE(a) && ISALLTRUE(b)) - *result = true; - else if (ISALLTRUE(a)) - *result = false; - else if (ISALLTRUE(b)) - *result = false; - else - { - int4 i; - BITVECP sa = GETSIGN(a), - sb = GETSIGN(b); - - *result = true; - LOOPBYTE( - if (sa[i] != sb[i]) - { - *result = false; - break; - } - ); - } - } - else - { /* a and b ISARRKEY */ - int4 lena = ARRNELEM(a), - lenb = ARRNELEM(b); - - if (lena != lenb) - *result = false; - else - { - int4 *ptra = GETARR(a), - *ptrb = GETARR(b); - int4 i; - - *result = true; - for (i = 0; i < lena; i++) - if (ptra[i] != ptrb[i]) - { - *result = false; - break; - } - } - } - - PG_RETURN_POINTER(result); -} - -static int4 -sizebitvec(BITVECP sign) -{ - int4 size = 0, - i; - - LOOPBYTE( - size += number_of_ones[(unsigned char) sign[i]]; - ); - return size; -} - -static int -hemdistsign(BITVECP a, BITVECP b) -{ - int i, - diff, - dist = 0; - - LOOPBYTE( - diff = (unsigned char) (a[i] ^ b[i]); - dist += number_of_ones[diff]; - ); - return dist; -} - -static int -hemdist(GISTTYPE * a, GISTTYPE * b) -{ - if (ISALLTRUE(a)) - { - if (ISALLTRUE(b)) - return 0; - else - return SIGLENBIT - sizebitvec(GETSIGN(b)); - } - else if (ISALLTRUE(b)) - return SIGLENBIT - sizebitvec(GETSIGN(a)); - - return hemdistsign(GETSIGN(a), GETSIGN(b)); -} - -Datum -gtsvector_penalty(PG_FUNCTION_ARGS) -{ - GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */ - GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); - float *penalty = (float *) PG_GETARG_POINTER(2); - GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key); - GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key); - BITVECP orig = GETSIGN(origval); - - *penalty = 0.0; - - if (ISARRKEY(newval)) - { - BITVEC sign; - - makesign(sign, newval); - - if (ISALLTRUE(origval)) - *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1); - else - *penalty = hemdistsign(sign, orig); - } - else - *penalty = hemdist(origval, newval); - PG_RETURN_POINTER(penalty); -} - -typedef struct -{ - bool allistrue; - BITVEC sign; -} CACHESIGN; - -static void -fillcache(CACHESIGN * item, GISTTYPE * key) -{ - item->allistrue = false; - if (ISARRKEY(key)) - makesign(item->sign, key); - else if (ISALLTRUE(key)) - item->allistrue = true; - else - memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC)); -} - -#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) -typedef struct -{ - OffsetNumber pos; - int4 cost; -} SPLITCOST; - -static int -comparecost(const void *a, const void *b) -{ - if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost) - return 0; - else - return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1; -} - - -static int -hemdistcache(CACHESIGN * a, CACHESIGN * b) -{ - if (a->allistrue) - { - if (b->allistrue) - return 0; - else - return SIGLENBIT - sizebitvec(b->sign); - } - else if (b->allistrue) - return SIGLENBIT - sizebitvec(a->sign); - - return hemdistsign(a->sign, b->sign); -} - -Datum -gtsvector_picksplit(PG_FUNCTION_ARGS) -{ - GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); - GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); - OffsetNumber k, - j; - GISTTYPE *datum_l, - *datum_r; - BITVECP union_l, - union_r; - int4 size_alpha, - size_beta; - int4 size_waste, - waste = -1; - int4 nbytes; - OffsetNumber seed_1 = 0, - seed_2 = 0; - OffsetNumber *left, - *right; - OffsetNumber maxoff; - BITVECP ptr; - int i; - CACHESIGN *cache; - SPLITCOST *costvector; - - maxoff = entryvec->n - 2; - nbytes = (maxoff + 2) * sizeof(OffsetNumber); - v->spl_left = (OffsetNumber *) palloc(nbytes); - v->spl_right = (OffsetNumber *) palloc(nbytes); - - cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2)); - fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber)); - - for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) - { - for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) - { - if (k == FirstOffsetNumber) - fillcache(&cache[j], GETENTRY(entryvec, j)); - - size_waste = hemdistcache(&(cache[j]), &(cache[k])); - if (size_waste > waste) - { - waste = size_waste; - seed_1 = k; - seed_2 = j; - } - } - } - - left = v->spl_left; - v->spl_nleft = 0; - right = v->spl_right; - v->spl_nright = 0; - - if (seed_1 == 0 || seed_2 == 0) - { - seed_1 = 1; - seed_2 = 2; - } - - /* form initial .. */ - if (cache[seed_1].allistrue) - { - datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - datum_l->flag = SIGNKEY | ALLISTRUE; - } - else - { - datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0)); - SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0)); - datum_l->flag = SIGNKEY; - memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC)); - } - if (cache[seed_2].allistrue) - { - datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - datum_r->flag = SIGNKEY | ALLISTRUE; - } - else - { - datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0)); - SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0)); - datum_r->flag = SIGNKEY; - memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC)); - } - - union_l = GETSIGN(datum_l); - union_r = GETSIGN(datum_r); - maxoff = OffsetNumberNext(maxoff); - fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff)); - /* sort before ... */ - costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); - for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) - { - costvector[j - 1].pos = j; - size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j])); - size_beta = hemdistcache(&(cache[seed_2]), &(cache[j])); - costvector[j - 1].cost = Abs(size_alpha - size_beta); - } - qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); - - for (k = 0; k < maxoff; k++) - { - j = costvector[k].pos; - if (j == seed_1) - { - *left++ = j; - v->spl_nleft++; - continue; - } - else if (j == seed_2) - { - *right++ = j; - v->spl_nright++; - continue; - } - - if (ISALLTRUE(datum_l) || cache[j].allistrue) - { - if (ISALLTRUE(datum_l) && cache[j].allistrue) - size_alpha = 0; - else - size_alpha = SIGLENBIT - sizebitvec( - (cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign) - ); - } - else - size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l)); - - if (ISALLTRUE(datum_r) || cache[j].allistrue) - { - if (ISALLTRUE(datum_r) && cache[j].allistrue) - size_beta = 0; - else - size_beta = SIGLENBIT - sizebitvec( - (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign) - ); - } - else - size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r)); - - if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) - { - if (ISALLTRUE(datum_l) || cache[j].allistrue) - { - if (!ISALLTRUE(datum_l)) - MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC)); - } - else - { - ptr = cache[j].sign; - LOOPBYTE( - union_l[i] |= ptr[i]; - ); - } - *left++ = j; - v->spl_nleft++; - } - else - { - if (ISALLTRUE(datum_r) || cache[j].allistrue) - { - if (!ISALLTRUE(datum_r)) - MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC)); - } - else - { - ptr = cache[j].sign; - LOOPBYTE( - union_r[i] |= ptr[i]; - ); - } - *right++ = j; - v->spl_nright++; - } - } - - *right = *left = FirstOffsetNumber; - v->spl_ldatum = PointerGetDatum(datum_l); - v->spl_rdatum = PointerGetDatum(datum_r); - - PG_RETURN_POINTER(v); -} diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h deleted file mode 100644 index 5362584b80..0000000000 --- a/contrib/tsearch2/gistidx.h +++ /dev/null @@ -1,62 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/gistidx.h,v 1.8 2007/02/28 22:44:38 tgl Exp $ */ - -#ifndef __GISTIDX_H__ -#define __GISTIDX_H__ - -/* -#define GISTIDX_DEBUG -*/ - -/* - * signature defines - */ - -#define SIGLENINT 63 /* >121 => key will toast, so it will not work - * !!! */ -#define SIGLEN ( sizeof(int4) * SIGLENINT ) -#define SIGLENBIT (SIGLEN * BITS_PER_BYTE) - -typedef char BITVEC[SIGLEN]; -typedef char *BITVECP; - -#define LOOPBYTE(a) \ - for(i=0;i<SIGLEN;i++) {\ - a;\ - } - -#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) ) -#define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 ) -#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) ) -#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) ) -#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 ) - -#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT) -#define HASH(sign, val) SETBIT((sign), HASHVAL(val)) - - -/* - * type of index key - */ -typedef struct -{ - int32 vl_len_; /* varlena header (do not touch directly!) */ - int4 flag; - char data[1]; -} GISTTYPE; - -#define ARRKEY 0x01 -#define SIGNKEY 0x02 -#define ALLISTRUE 0x04 - -#define ISARRKEY(x) ( ((GISTTYPE*)(x))->flag & ARRKEY ) -#define ISSIGNKEY(x) ( ((GISTTYPE*)(x))->flag & SIGNKEY ) -#define ISALLTRUE(x) ( ((GISTTYPE*)(x))->flag & ALLISTRUE ) - -#define GTHDRSIZE ( VARHDRSZ + sizeof(int4) ) -#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) - -#define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) ) -#define GETARR(x) ( (int4*)( (char*)(x)+GTHDRSIZE ) ) -#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int4) ) - -#endif diff --git a/contrib/tsearch2/ispell/Makefile b/contrib/tsearch2/ispell/Makefile deleted file mode 100644 index 4302cee743..0000000000 --- a/contrib/tsearch2/ispell/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# $PostgreSQL: pgsql/contrib/tsearch2/ispell/Makefile,v 1.10 2007/06/26 22:05:03 tgl Exp $ - -SUBOBJS = spell.o regis.o - -EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) - -PG_CPPFLAGS = -I$(srcdir)/.. - -ifdef USE_PGXS -PG_CONFIG = pg_config -PGXS := $(shell $(PG_CONFIG) --pgxs) -include $(PGXS) -else -subdir = contrib/tsearch2/ispell -top_builddir = ../../.. -include $(top_builddir)/src/Makefile.global -include $(top_srcdir)/contrib/contrib-global.mk -endif - -override CFLAGS += $(CFLAGS_SL) - -all: SUBSYS.o - -SUBSYS.o: $(SUBOBJS) - $(LD) $(LDREL) $(LDOUT) $@ $^ - - diff --git a/contrib/tsearch2/ispell/regis.c b/contrib/tsearch2/ispell/regis.c deleted file mode 100644 index 8737bd4ccc..0000000000 --- a/contrib/tsearch2/ispell/regis.c +++ /dev/null @@ -1,215 +0,0 @@ -#include "regis.h" -#include "ts_locale.h" -#include "common.h" - -bool -RS_isRegis(const char *str) -{ - while (str && *str) - { - if (t_isalpha(str) || - t_iseq(str, '[') || - t_iseq(str, ']') || - t_iseq(str, '^')) - str += pg_mblen(str); - else - return false; - } - return true; -} - -#define RS_IN_ONEOF 1 -#define RS_IN_ONEOF_IN 2 -#define RS_IN_NONEOF 3 -#define RS_IN_WAIT 4 - -static RegisNode * -newRegisNode(RegisNode * prev, int len) -{ - RegisNode *ptr; - - ptr = (RegisNode *) malloc(RNHDRSZ + len + 1); - if (!ptr) - ts_error(ERROR, "No memory"); - memset(ptr, 0, RNHDRSZ + len + 1); - if (prev) - prev->next = ptr; - return ptr; -} - -void -RS_compile(Regis * r, bool issuffix, char *str) -{ - int len = strlen(str); - int state = RS_IN_WAIT; - char *c = (char *) str; - RegisNode *ptr = NULL; - - memset(r, 0, sizeof(Regis)); - r->issuffix = (issuffix) ? 1 : 0; - - while (*c) - { - if (state == RS_IN_WAIT) - { - if (t_isalpha(c)) - { - if (ptr) - ptr = newRegisNode(ptr, len); - else - ptr = r->node = newRegisNode(NULL, len); - COPYCHAR(ptr->data, c); - ptr->type = RSF_ONEOF; - ptr->len = pg_mblen(c); - } - else if (t_iseq(c, '[')) - { - if (ptr) - ptr = newRegisNode(ptr, len); - else - ptr = r->node = newRegisNode(NULL, len); - ptr->type = RSF_ONEOF; - state = RS_IN_ONEOF; - } - else - ts_error(ERROR, "Error in regis: %s", str); - } - else if (state == RS_IN_ONEOF) - { - if (t_iseq(c, '^')) - { - ptr->type = RSF_NONEOF; - state = RS_IN_NONEOF; - } - else if (t_isalpha(c)) - { - COPYCHAR(ptr->data, c); - ptr->len = pg_mblen(c); - state = RS_IN_ONEOF_IN; - } - else - ts_error(ERROR, "Error in regis: %s", str); - } - else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) - { - if (t_isalpha(c)) - { - COPYCHAR(ptr->data + ptr->len, c); - ptr->len += pg_mblen(c); - } - else if (t_iseq(c, ']')) - state = RS_IN_WAIT; - else - ts_error(ERROR, "Error in regis: %s", str); - } - else - ts_error(ERROR, "Internal error in RS_compile: %d", state); - c += pg_mblen(c); - } - - ptr = r->node; - while (ptr) - { - r->nchar++; - ptr = ptr->next; - } -} - -void -RS_free(Regis * r) -{ - RegisNode *ptr = r->node, - *tmp; - - while (ptr) - { - tmp = ptr->next; - free(ptr); - ptr = tmp; - } - - r->node = NULL; -} - -#ifdef TS_USE_WIDE -static bool -mb_strchr(char *str, char *c) -{ - int clen = pg_mblen(c), - plen, - i; - char *ptr = str; - bool res = false; - - clen = pg_mblen(c); - while (*ptr && !res) - { - plen = pg_mblen(ptr); - if (plen == clen) - { - i = plen; - res = true; - while (i--) - if (*(ptr + i) != *(c + i)) - { - res = false; - break; - } - } - - ptr += plen; - } - - return res; -} -#else -#define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true ) -#endif - - -bool -RS_execute(Regis * r, char *str) -{ - RegisNode *ptr = r->node; - char *c = str; - int len = 0; - - while (*c) - { - len++; - c += pg_mblen(c); - } - - if (len < r->nchar) - return false; - - c = str; - if (r->issuffix) - { - len -= r->nchar; - while (len-- > 0) - c += pg_mblen(c); - } - - - while (ptr) - { - switch (ptr->type) - { - case RSF_ONEOF: - if (mb_strchr((char *) ptr->data, c) != true) - return false; - break; - case RSF_NONEOF: - if (mb_strchr((char *) ptr->data, c) == true) - return false; - break; - default: - ts_error(ERROR, "RS_execute: Unknown type node: %d\n", ptr->type); - } - ptr = ptr->next; - c += pg_mblen(c); - } - - return true; -} diff --git a/contrib/tsearch2/ispell/regis.h b/contrib/tsearch2/ispell/regis.h deleted file mode 100644 index a0b840b0a0..0000000000 --- a/contrib/tsearch2/ispell/regis.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef __REGIS_H__ -#define __REGIS_H__ - -#include "postgres.h" - -typedef struct RegisNode -{ - uint32 - type:2, - len:16, - unused:14; - struct RegisNode *next; - unsigned char data[1]; -} RegisNode; - -#define RNHDRSZ (offsetof(RegisNode,data)) - -#define RSF_ONEOF 1 -#define RSF_NONEOF 2 - -typedef struct Regis -{ - RegisNode *node; - uint32 - issuffix:1, - nchar:16, - unused:15; -} Regis; - -bool RS_isRegis(const char *str); - -void RS_compile(Regis * r, bool issuffix, char *str); -void RS_free(Regis * r); - -/*returns true if matches */ -bool RS_execute(Regis * r, char *str); - -#endif diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c deleted file mode 100644 index c14012a6e7..0000000000 --- a/contrib/tsearch2/ispell/spell.c +++ /dev/null @@ -1,1544 +0,0 @@ -#include "postgres.h" - -#include <ctype.h> - -#include "spell.h" -#include "common.h" -#include "ts_locale.h" - -#define MAX_NORM 1024 -#define MAXNORMLEN 256 - -#define ERRSTRSIZE 1024 - -#define STRNCMP(s,p) strncmp( (s), (p), strlen(p) ) -#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) -#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T ) - -static char *VoidString = ""; - -#define MEMOUT(X) if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))) - -static int -cmpspell(const void *s1, const void *s2) -{ - return (strcmp((*(const SPELL **) s1)->word, (*(const SPELL **) s2)->word)); -} -static int -cmpspellaffix(const void *s1, const void *s2) -{ - return (strcmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag)); -} - -static char * -strnduplicate(char *s, int len) -{ - char *d = (char *) palloc(len + 1); - - memcpy(d, s, len); - d[len] = '\0'; - return d; -} - -static char * -findchar(char *str, int c) -{ - while (*str) - { - if (t_iseq(str, c)) - return str; - str += pg_mblen(str); - } - - return NULL; -} - - -/* backward string compare for suffix tree operations */ -static int -strbcmp(const unsigned char *s1, const unsigned char *s2) -{ - int l1 = strlen((const char *) s1) - 1, - l2 = strlen((const char *) s2) - 1; - - while (l1 >= 0 && l2 >= 0) - { - if (s1[l1] < s2[l2]) - return -1; - if (s1[l1] > s2[l2]) - return 1; - l1--; - l2--; - } - if (l1 < l2) - return -1; - if (l1 > l2) - return 1; - - return 0; -} -static int -strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count) -{ - int l1 = strlen((const char *) s1) - 1, - l2 = strlen((const char *) s2) - 1, - l = count; - - while (l1 >= 0 && l2 >= 0 && l > 0) - { - if (s1[l1] < s2[l2]) - return -1; - if (s1[l1] > s2[l2]) - return 1; - l1--; - l2--; - l--; - } - if (l == 0) - return 0; - if (l1 < l2) - return -1; - if (l1 > l2) - return 1; - return 0; -} - -static int -cmpaffix(const void *s1, const void *s2) -{ - const AFFIX *a1 = (const AFFIX *) s1; - const AFFIX *a2 = (const AFFIX *) s2; - - if (a1->type < a2->type) - return -1; - if (a1->type > a2->type) - return 1; - if (a1->type == FF_PREFIX) - return strcmp(a1->repl, a2->repl); - else - return strbcmp((const unsigned char *) a1->repl, - (const unsigned char *) a2->repl); -} - -int -NIAddSpell(IspellDict * Conf, const char *word, const char *flag) -{ - if (Conf->nspell >= Conf->mspell) - { - if (Conf->mspell) - { - Conf->mspell += 1024 * 20; - Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *)); - } - else - { - Conf->mspell = 1024 * 20; - Conf->Spell = (SPELL **) palloc(Conf->mspell * sizeof(SPELL *)); - } - } - Conf->Spell[Conf->nspell] = (SPELL *) palloc(SPELLHDRSZ + strlen(word) + 1); - strcpy(Conf->Spell[Conf->nspell]->word, word); - strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, 16); - Conf->nspell++; - return (0); -} - - -int -NIImportDictionary(IspellDict * Conf, const char *filename) -{ - char str[BUFSIZ], *pstr; - FILE *dict; - - if (!(dict = fopen(filename, "r"))) - return (1); - while (fgets(str, sizeof(str), dict)) - { - char *s; - const char *flag; - - pg_verifymbstr(str, strlen(str), false); - - flag = NULL; - if ((s = findchar(str, '/'))) - { - *s++ = '\0'; - flag = s; - while (*s) - { - /* we allow only single encoded flags for faster works */ - if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s)) - s++; - else - { - *s = '\0'; - break; - } - } - } - else - flag = ""; - - - s = str; - while (*s) - { - if (t_isspace(s)) - { - *s = '\0'; - break; - } - s += pg_mblen(s); - } - pstr = lowerstr(str); - - NIAddSpell(Conf, pstr, flag); - pfree(pstr); - } - fclose(dict); - return (0); -} - - -static int -FindWord(IspellDict * Conf, const char *word, int affixflag, char compoundonly) -{ - SPNode *node = Conf->Dictionary; - SPNodeData *StopLow, - *StopHigh, - *StopMiddle; - uint8 *ptr = (uint8 *) word; - - while (node && *ptr) - { - StopLow = node->data; - StopHigh = node->data + node->length; - while (StopLow < StopHigh) - { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if (StopMiddle->val == *ptr) - { - if (*(ptr + 1) == '\0' && StopMiddle->isword) - { - if (compoundonly && !StopMiddle->compoundallow) - return 0; - if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL)) - return 1; - } - node = StopMiddle->node; - ptr++; - break; - } - else if (StopMiddle->val < *ptr) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - if (StopLow >= StopHigh) - break; - } - return 0; -} - -int -NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type) -{ - if (Conf->naffixes >= Conf->maffixes) - { - if (Conf->maffixes) - { - Conf->maffixes += 16; - Conf->Affix = (AFFIX *) realloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); - } - else - { - Conf->maffixes = 16; - Conf->Affix = (AFFIX *) malloc(Conf->maffixes * sizeof(AFFIX)); - } - MEMOUT(Conf->Affix); - } - - if (strcmp(mask, ".") == 0) - { - Conf->Affix[Conf->naffixes].issimple = 1; - Conf->Affix[Conf->naffixes].isregis = 0; - Conf->Affix[Conf->naffixes].mask = VoidString; - } - else if (RS_isRegis(mask)) - { - Conf->Affix[Conf->naffixes].issimple = 0; - Conf->Affix[Conf->naffixes].isregis = 1; - Conf->Affix[Conf->naffixes].mask = (mask && *mask) ? strdup(mask) : VoidString; - } - else - { - int masklen = strlen(mask); - - Conf->Affix[Conf->naffixes].issimple = 0; - Conf->Affix[Conf->naffixes].isregis = 0; - Conf->Affix[Conf->naffixes].mask = (char *) malloc(masklen + 2); - if (type == FF_SUFFIX) - sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask); - else - sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask); - } - MEMOUT(Conf->Affix[Conf->naffixes].mask); - - Conf->Affix[Conf->naffixes].compile = 1; - Conf->Affix[Conf->naffixes].flagflags = flagflags; - Conf->Affix[Conf->naffixes].flag = flag; - Conf->Affix[Conf->naffixes].type = type; - - Conf->Affix[Conf->naffixes].find = (find && *find) ? strdup(find) : VoidString; - MEMOUT(Conf->Affix[Conf->naffixes].find); - if ((Conf->Affix[Conf->naffixes].replen = strlen(repl)) > 0) - { - Conf->Affix[Conf->naffixes].repl = strdup(repl); - MEMOUT(Conf->Affix[Conf->naffixes].repl); - } - else - Conf->Affix[Conf->naffixes].repl = VoidString; - Conf->naffixes++; - return (0); -} - -#define PAE_WAIT_MASK 0 -#define PAE_INMASK 1 -#define PAE_WAIT_FIND 2 -#define PAE_INFIND 3 -#define PAE_WAIT_REPL 4 -#define PAE_INREPL 5 - -static bool -parse_affentry(char *str, char *mask, char *find, char *repl, int line) -{ - int state = PAE_WAIT_MASK; - char *pmask = mask, - *pfind = find, - *prepl = repl; - - *mask = *find = *repl = '\0'; - - while (*str) - { - if (state == PAE_WAIT_MASK) - { - if (t_iseq(str, '#')) - return false; - else if (!t_isspace(str)) - { - COPYCHAR(pmask, str); - pmask += pg_mblen(str); - state = PAE_INMASK; - } - } - else if (state == PAE_INMASK) - { - if (t_iseq(str, '>')) - { - *pmask = '\0'; - state = PAE_WAIT_FIND; - } - else if (!t_isspace(str)) - { - COPYCHAR(pmask, str); - pmask += pg_mblen(str); - } - } - else if (state == PAE_WAIT_FIND) - { - if (t_iseq(str, '-')) - { - state = PAE_INFIND; - } - else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ ) - { - COPYCHAR(prepl, str); - prepl += pg_mblen(str); - state = PAE_INREPL; - } - else if (!t_isspace(str)) - ts_error(ERROR, "Affix parse error at %d line", line); - } - else if (state == PAE_INFIND) - { - if (t_iseq(str, ',')) - { - *pfind = '\0'; - state = PAE_WAIT_REPL; - } - else if (t_isalpha(str)) - { - COPYCHAR(pfind, str); - pfind += pg_mblen(str); - } - else if (!t_isspace(str)) - ts_error(ERROR, "Affix parse error at %d line", line); - } - else if (state == PAE_WAIT_REPL) - { - if (t_iseq(str, '-')) - { - break; /* void repl */ - } - else if (t_isalpha(str)) - { - COPYCHAR(prepl, str); - prepl += pg_mblen(str); - state = PAE_INREPL; - } - else if (!t_isspace(str)) - ts_error(ERROR, "Affix parse error at %d line", line); - } - else if (state == PAE_INREPL) - { - if (t_iseq(str, '#')) - { - *prepl = '\0'; - break; - } - else if (t_isalpha(str)) - { - COPYCHAR(prepl, str); - prepl += pg_mblen(str); - } - else if (!t_isspace(str)) - ts_error(ERROR, "Affix parse error at %d line", line); - } - else - ts_error(ERROR, "Unknown state in parse_affentry: %d", state); - - str += pg_mblen(str); - } - - *pmask = *pfind = *prepl = '\0'; - - return (*mask && (*find || *repl)) ? true : false; -} - -int -NIImportAffixes(IspellDict * Conf, const char *filename) -{ - char str[BUFSIZ], *pstr = NULL; - char mask[BUFSIZ]; - char find[BUFSIZ]; - char repl[BUFSIZ]; - char *s; - int suffixes = 0; - int prefixes = 0; - int flag = 0; - char flagflags = 0; - FILE *affix; - int line = 0; - int oldformat = 0; - - if (!(affix = fopen(filename, "r"))) - return (1); - Conf->compoundcontrol = '\t'; - - while (fgets(str, sizeof(str), affix)) - { - line++; - if ( *str == '#' || *str == '\n' ) - continue; - - pg_verifymbstr(str, strlen(str), false); - if ( pstr ) - pfree( pstr ); - pstr = lowerstr(str); - if (STRNCMP(pstr, "compoundwords") == 0) - { - s = findchar(str, 'l'); - if (s) - { - while (*s && !t_isspace(s)) - s++; - while (*s && t_isspace(s)) - s++; - if (*s && pg_mblen(s) == 1) - Conf->compoundcontrol = *s; - oldformat++; - continue; - } - } - if (STRNCMP(pstr, "suffixes") == 0) - { - suffixes = 1; - prefixes = 0; - oldformat++; - continue; - } - if (STRNCMP(pstr, "prefixes") == 0) - { - suffixes = 0; - prefixes = 1; - oldformat++; - continue; - } - if (STRNCMP(pstr, "flag") == 0) - { - s = str + 4; - flagflags = 0; - - while (*s && t_isspace(s)) - s++; - oldformat++; - - /* allow only single-encoded flags */ - if (pg_mblen(s) != 1) - elog(ERROR, "Multiencoded flag at line %d: %s", line, s); - - if (*s == '*') - { - flagflags |= FF_CROSSPRODUCT; - s++; - } - else if (*s == '~') - { - flagflags |= FF_COMPOUNDONLYAFX; - s++; - } - - if (*s == '\\') - s++; - - /* allow only single-encoded flags */ - if (pg_mblen(s) != 1) - { - flagflags = 0; - elog(ERROR, "Multiencoded flag at line %d: %s", line, s); - } - - flag = (unsigned char) *s; - continue; - } - if (STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 || - STRNCMP(str, "PFX") == 0 || STRNCMP(str, "SFX") == 0) - { - - if (oldformat) - elog(ERROR, "Wrong affix file format"); - - fclose(affix); - return NIImportOOAffixes(Conf, filename); - - } - if ((!suffixes) && (!prefixes)) - continue; - - if (!parse_affentry(pstr, mask, find, repl, line)) - continue; - - NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX); - } - fclose(affix); - - if ( pstr ) - pfree( pstr ); - - return (0); -} - -int -NIImportOOAffixes(IspellDict * Conf, const char *filename) -{ - char str[BUFSIZ]; - char type[BUFSIZ], *ptype = NULL; - char sflag[BUFSIZ]; - char mask[BUFSIZ], *pmask; - char find[BUFSIZ], *pfind; - char repl[BUFSIZ], *prepl; - bool isSuffix = false; - int flag = 0; - char flagflags = 0; - FILE *affix; - int line = 0; - int scanread = 0; - char scanbuf[BUFSIZ]; - - sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5); - - if (!(affix = fopen(filename, "r"))) - return (1); - Conf->compoundcontrol = '\t'; - - while (fgets(str, sizeof(str), affix)) - { - line++; - if (*str == '\0' || t_isspace(str) || t_iseq(str, '#')) - continue; - pg_verifymbstr(str, strlen(str), false); - - if (STRNCMP(str, "COMPOUNDFLAG") == 0) - { - char *s = str + strlen("COMPOUNDFLAG"); - - while (*s && t_isspace(s)) - s++; - if (*s && pg_mblen(s) == 1) - Conf->compoundcontrol = *s; - continue; - } - - scanread = sscanf(str, scanbuf, type, sflag, find, repl, mask); - - if (ptype) - pfree(ptype); - ptype = lowerstr(type); - if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx"))) - continue; - - if (scanread == 4) - { - if (strlen(sflag) != 1) - continue; - flag = *sflag; - isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false; - pfind = lowerstr(find); - if (t_iseq(find, 'y')) - flagflags |= FF_CROSSPRODUCT; - else - flagflags = 0; - pfree(pfind); - } - else - { - if (strlen(sflag) != 1 || flag != *sflag || flag == 0) - continue; - prepl = lowerstr(repl); - pfind = lowerstr(find); - pmask = lowerstr(mask); - if (t_iseq(find, '0')) - *pfind = '\0'; - if (t_iseq(repl, '0')) - *prepl = '\0'; - - NIAddAffix(Conf, flag, flagflags, pmask, pfind, prepl, isSuffix ? FF_SUFFIX : FF_PREFIX); - pfree(prepl); - pfree(pfind); - pfree(pmask); - } - } - - if (ptype) - pfree(ptype); - fclose(affix); - - return 0; -} - -static int -MergeAffix(IspellDict * Conf, int a1, int a2) -{ - int naffix = 0; - char **ptr = Conf->AffixData; - - while (*ptr) - { - naffix++; - ptr++; - } - - Conf->AffixData = (char **) realloc(Conf->AffixData, (naffix + 2) * sizeof(char *)); - MEMOUT(Conf->AffixData); - ptr = Conf->AffixData + naffix; - *ptr = malloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + 1 /* space */ + 1 /* \0 */ ); - MEMOUT(ptr); - sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]); - ptr++; - *ptr = '\0'; - return naffix; -} - - -static SPNode * -mkSPNode(IspellDict * Conf, int low, int high, int level) -{ - int i; - int nchar = 0; - char lastchar = '\0'; - SPNode *rs; - SPNodeData *data; - int lownew = low; - - for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level]) - { - nchar++; - lastchar = Conf->Spell[i]->word[level]; - } - - if (!nchar) - return NULL; - - rs = (SPNode *) malloc(SPNHRDSZ + nchar * sizeof(SPNodeData)); - MEMOUT(rs); - memset(rs, 0, SPNHRDSZ + nchar * sizeof(SPNodeData)); - rs->length = nchar; - data = rs->data; - - lastchar = '\0'; - for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level) - { - if (lastchar != Conf->Spell[i]->word[level]) - { - if (lastchar) - { - data->node = mkSPNode(Conf, lownew, i, level + 1); - lownew = i; - data++; - } - lastchar = Conf->Spell[i]->word[level]; - } - data->val = ((uint8 *) (Conf->Spell[i]->word))[level]; - if (Conf->Spell[i]->p.d.len == level + 1) - { - if (data->isword && data->affix != Conf->Spell[i]->p.d.affix) - { - /* - * fprintf(stderr,"Word already exists: %s (affixes: '%s' - * and '%s')\n", Conf->Spell[i]->word, - * Conf->AffixData[data->affix], - * Conf->AffixData[Conf->Spell[i]->p.d.affix] ); - */ - /* MergeAffix called a few times */ - data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix); - } - else - data->affix = Conf->Spell[i]->p.d.affix; - data->isword = 1; - if (strchr(Conf->AffixData[data->affix], Conf->compoundcontrol)) - data->compoundallow = 1; - } - } - - data->node = mkSPNode(Conf, lownew, high, level + 1); - - return rs; -} - -void -NISortDictionary(IspellDict * Conf) -{ - size_t i; - int naffix = 3; - - /* compress affixes */ - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix); - for (i = 1; i < Conf->nspell; i++) - if (strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag)) - naffix++; - - Conf->AffixData = (char **) malloc(naffix * sizeof(char *)); - MEMOUT(Conf->AffixData); - memset(Conf->AffixData, 0, naffix * sizeof(char *)); - naffix = 1; - Conf->AffixData[0] = strdup(""); - MEMOUT(Conf->AffixData[0]); - Conf->AffixData[1] = strdup(Conf->Spell[0]->p.flag); - MEMOUT(Conf->AffixData[1]); - Conf->Spell[0]->p.d.affix = 1; - Conf->Spell[0]->p.d.len = strlen(Conf->Spell[0]->word); - for (i = 1; i < Conf->nspell; i++) - { - if (strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[naffix])) - { - naffix++; - Conf->AffixData[naffix] = strdup(Conf->Spell[i]->p.flag); - MEMOUT(Conf->AffixData[naffix]); - } - Conf->Spell[i]->p.d.affix = naffix; - Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); - } - - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell); - Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); - - for (i = 0; i < Conf->nspell; i++) - pfree(Conf->Spell[i]); - pfree(Conf->Spell); - Conf->Spell = NULL; -} - -static AffixNode * -mkANode(IspellDict * Conf, int low, int high, int level, int type) -{ - int i; - int nchar = 0; - uint8 lastchar = '\0'; - AffixNode *rs; - AffixNodeData *data; - int lownew = low; - - for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type)) - { - nchar++; - lastchar = GETCHAR(Conf->Affix + i, level, type); - } - - if (!nchar) - return NULL; - - rs = (AffixNode *) malloc(ANHRDSZ + nchar * sizeof(AffixNodeData)); - MEMOUT(rs); - memset(rs, 0, ANHRDSZ + nchar * sizeof(AffixNodeData)); - rs->length = nchar; - data = rs->data; - - lastchar = '\0'; - for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level) - { - if (lastchar != GETCHAR(Conf->Affix + i, level, type)) - { - if (lastchar) - { - data->node = mkANode(Conf, lownew, i, level + 1, type); - lownew = i; - data++; - } - lastchar = GETCHAR(Conf->Affix + i, level, type); - } - data->val = GETCHAR(Conf->Affix + i, level, type); - if (Conf->Affix[i].replen == level + 1) - { /* affix stopped */ - if (!data->naff) - { - data->aff = (AFFIX **) malloc(sizeof(AFFIX *) * (high - i + 1)); - MEMOUT(data->aff); - } - data->aff[data->naff] = Conf->Affix + i; - data->naff++; - } - } - - data->node = mkANode(Conf, lownew, high, level + 1, type); - - return rs; -} - -static void -mkVoidAffix(IspellDict * Conf, int issuffix, int startsuffix) -{ - int i, - cnt = 0; - int start = (issuffix) ? startsuffix : 0; - int end = (issuffix) ? Conf->naffixes : startsuffix; - AffixNode *Affix = (AffixNode *) malloc(ANHRDSZ + sizeof(AffixNodeData)); - - MEMOUT(Affix); - memset(Affix, 0, ANHRDSZ + sizeof(AffixNodeData)); - Affix->length = 1; - Affix->isvoid = 1; - - if (issuffix) - { - Affix->data->node = Conf->Suffix; - Conf->Suffix = Affix; - } - else - { - Affix->data->node = Conf->Prefix; - Conf->Prefix = Affix; - } - - - for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) - cnt++; - - if (cnt == 0) - return; - - Affix->data->aff = (AFFIX **) malloc(sizeof(AFFIX *) * cnt); - MEMOUT(Affix->data->aff); - Affix->data->naff = (uint32) cnt; - - cnt = 0; - for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) - { - Affix->data->aff[cnt] = Conf->Affix + i; - cnt++; - } -} - -void -NISortAffixes(IspellDict * Conf) -{ - AFFIX *Affix; - size_t i; - CMPDAffix *ptr; - int firstsuffix = -1; - - if (Conf->naffixes == 0) - return; - - if (Conf->naffixes > 1) - qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix); - Conf->CompoundAffix = ptr = (CMPDAffix *) malloc(sizeof(CMPDAffix) * Conf->naffixes); - MEMOUT(Conf->CompoundAffix); - ptr->affix = NULL; - - for (i = 0; i < Conf->naffixes; i++) - { - Affix = &(((AFFIX *) Conf->Affix)[i]); - if (Affix->type == FF_SUFFIX) - { - if (firstsuffix < 0) - firstsuffix = i; - if ((Affix->flagflags & FF_COMPOUNDONLYAFX) && Affix->replen > 0) - { - if (ptr == Conf->CompoundAffix || - strbncmp((const unsigned char *) (ptr - 1)->affix, - (const unsigned char *) Affix->repl, - (ptr - 1)->len)) - { - /* leave only unique and minimals suffixes */ - ptr->affix = Affix->repl; - ptr->len = Affix->replen; - ptr++; - } - } - } - } - ptr->affix = NULL; - Conf->CompoundAffix = (CMPDAffix *) realloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1)); - - Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); - Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); - mkVoidAffix(Conf, 1, firstsuffix); - mkVoidAffix(Conf, 0, firstsuffix); -} - -static AffixNodeData * -FinfAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type) -{ - AffixNodeData *StopLow, - *StopHigh, - *StopMiddle; - uint8 symbol; - - if (node->isvoid) - { /* search void affixes */ - if (node->data->naff) - return node->data; - node = node->data->node; - } - - while (node && *level < wrdlen) - { - StopLow = node->data; - StopHigh = node->data + node->length; - while (StopLow < StopHigh) - { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - symbol = GETWCHAR(word, wrdlen, *level, type); - if (StopMiddle->val == symbol) - { - (*level)++; - if (StopMiddle->naff) - return StopMiddle; - node = StopMiddle->node; - break; - } - else if (StopMiddle->val < symbol) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - if (StopLow >= StopHigh) - break; - } - return NULL; -} - -static char * -CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword, int *baselen) -{ - - if (flagflags & FF_COMPOUNDONLYAFX) - { - if ((Affix->flagflags & FF_COMPOUNDONLYAFX) == 0) - return NULL; - } - else - { - if (Affix->flagflags & FF_COMPOUNDONLYAFX) - return NULL; - } - - if (Affix->type == FF_SUFFIX) - { - strcpy(newword, word); - strcpy(newword + len - Affix->replen, Affix->find); - if (baselen) /* store length of non-changed part of word */ - *baselen = len - Affix->replen; - } - else - { - /* - * if prefix is a all non-chaged part's length then all word contains - * only prefix and suffix, so out - */ - if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) - return NULL; - strcpy(newword, Affix->find); - strcat(newword, word + Affix->replen); - } - - if (Affix->issimple) - return newword; - else if (Affix->isregis) - { - if (Affix->compile) - { - RS_compile(&(Affix->reg.regis), (Affix->type == FF_SUFFIX) ? true : false, Affix->mask); - Affix->compile = 0; - } - if (RS_execute(&(Affix->reg.regis), newword)) - return newword; - } - else - { - int err; - pg_wchar *data; - size_t data_len; - int newword_len; - - if (Affix->compile) - { - int wmasklen, - masklen = strlen(Affix->mask); - pg_wchar *mask; - - mask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar)); - wmasklen = pg_mb2wchar_with_len(Affix->mask, mask, masklen); - - err = pg_regcomp(&(Affix->reg.regex), mask, wmasklen, REG_ADVANCED | REG_NOSUB); - pfree(mask); - if (err) - { - char regerrstr[ERRSTRSIZE]; - - pg_regerror(err, &(Affix->reg.regex), regerrstr, ERRSTRSIZE); - elog(ERROR, "regex error in '%s': %s", Affix->mask, regerrstr); - } - Affix->compile = 0; - } - - /* Convert data string to wide characters */ - newword_len = strlen(newword); - data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); - data_len = pg_mb2wchar_with_len(newword, data, newword_len); - - if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) - { - pfree(data); - return newword; - } - pfree(data); - } - - return NULL; -} - - -static char ** -NormalizeSubWord(IspellDict * Conf, char *word, char flag) -{ - AffixNodeData *suffix = NULL, - *prefix = NULL; - int slevel = 0, - plevel = 0; - int wrdlen = strlen(word), - swrdlen; - char **forms; - char **cur; - char newword[2 * MAXNORMLEN] = ""; - char pnewword[2 * MAXNORMLEN] = ""; - AffixNode *snode = Conf->Suffix, - *pnode; - int i, - j; - - if (wrdlen > MAXNORMLEN) - return NULL; - cur = forms = (char **) palloc(MAX_NORM * sizeof(char *)); - *cur = NULL; - - - /* Check that the word itself is normal form */ - if (FindWord(Conf, word, 0, flag & FF_COMPOUNDWORD)) - { - *cur = pstrdup(word); - cur++; - *cur = NULL; - } - - /* Find all other NORMAL forms of the 'word' (check only prefix) */ - pnode = Conf->Prefix; - plevel = 0; - while (pnode) - { - prefix = FinfAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); - if (!prefix) - break; - for (j = 0; j < prefix->naff; j++) - { - if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL)) - { - /* prefix success */ - if (FindWord(Conf, newword, prefix->aff[j]->flag, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1)) - { - /* word search success */ - *cur = pstrdup(newword); - cur++; - *cur = NULL; - } - } - } - pnode = prefix->node; - } - - /* - * Find all other NORMAL forms of the 'word' (check suffix and then - * prefix) - */ - while (snode) - { - int baselen = 0; - - /* find possible suffix */ - suffix = FinfAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); - if (!suffix) - break; - /* foreach suffix check affix */ - for (i = 0; i < suffix->naff; i++) - { - if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen)) - { - /* suffix success */ - if (FindWord(Conf, newword, suffix->aff[i]->flag, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1)) - { - /* word search success */ - *cur = pstrdup(newword); - cur++; - *cur = NULL; - } - /* now we will look changed word with prefixes */ - pnode = Conf->Prefix; - plevel = 0; - swrdlen = strlen(newword); - while (pnode) - { - prefix = FinfAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); - if (!prefix) - break; - for (j = 0; j < prefix->naff; j++) - { - if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen)) - { - /* prefix success */ - int ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? - 0 : prefix->aff[j]->flag; - - if (FindWord(Conf, pnewword, ff, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1)) - { - /* word search success */ - *cur = pstrdup(pnewword); - cur++; - *cur = NULL; - } - } - } - pnode = prefix->node; - } - } - } - - snode = suffix->node; - } - - if (cur == forms) - { - pfree(forms); - return (NULL); - } - return (forms); -} - -typedef struct SplitVar -{ - int nstem; - char **stem; - struct SplitVar *next; -} SplitVar; - -static int -CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len, bool CheckInPlace) -{ - if (CheckInPlace) - { - while ((*ptr)->affix) - { - if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) - { - len = (*ptr)->len; - (*ptr)++; - return len; - } - (*ptr)++; - } - } - else - { - char *affbegin; - - while ((*ptr)->affix) - { - if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL) - { - len = (*ptr)->len + (affbegin - word); - (*ptr)++; - return len; - } - (*ptr)++; - } - } - return 0; -} - -static SplitVar * -CopyVar(SplitVar * s, int makedup) -{ - SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar)); - - v->stem = (char **) palloc(sizeof(char *) * (MAX_NORM)); - v->next = NULL; - if (s) - { - int i; - - v->nstem = s->nstem; - for (i = 0; i < s->nstem; i++) - v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i]; - } - else - v->nstem = 0; - return v; -} - - -static SplitVar * -SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word, int wordlen, int startpos, int minpos) -{ - SplitVar *var = NULL; - SPNodeData *StopLow, - *StopHigh, - *StopMiddle = NULL; - SPNode *node = (snode) ? snode : Conf->Dictionary; - int level = (snode) ? minpos : startpos; /* recursive - * minpos==level */ - int lenaff; - CMPDAffix *caff; - char *notprobed; - - notprobed = (char *) palloc(wordlen); - memset(notprobed, 1, wordlen); - var = CopyVar(orig, 1); - - while (level < wordlen) - { - /* find word with epenthetic or/and compound suffix */ - caff = Conf->CompoundAffix; - while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) > 0) - { - /* - * there is one of compound suffixes, so check word for existings - */ - char buf[MAXNORMLEN]; - char **subres; - - lenaff = level - startpos + lenaff; - - if (!notprobed[startpos + lenaff - 1]) - continue; - - if (level + lenaff - 1 <= minpos) - continue; - - memcpy(buf, word + startpos, lenaff); - buf[lenaff] = '\0'; - - subres = NormalizeSubWord(Conf, buf, FF_COMPOUNDWORD | FF_COMPOUNDONLYAFX); - if (subres) - { - /* Yes, it was a word from dictionary */ - SplitVar *new = CopyVar(var, 0); - SplitVar *ptr = var; - char **sptr = subres; - - notprobed[startpos + lenaff - 1] = 0; - - while (*sptr) - { - new->stem[new->nstem] = *sptr; - new->nstem++; - sptr++; - } - pfree(subres); - - while (ptr->next) - ptr = ptr->next; - ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); - - pfree(new->stem); - pfree(new); - } - } - - if (!node) - break; - - StopLow = node->data; - StopHigh = node->data + node->length; - while (StopLow < StopHigh) - { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if (StopMiddle->val == ((uint8 *) (word))[level]) - break; - else if (StopMiddle->val < ((uint8 *) (word))[level]) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - - if (StopLow < StopHigh) - { - - /* find infinitive */ - if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level]) - { - /* ok, we found full compoundallowed word */ - if (level > minpos) - { - /* and its length more than minimal */ - if (wordlen == level + 1) - { - /* well, it was last word */ - var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos); - var->nstem++; - pfree(notprobed); - return var; - } - else - { - /* then we will search more big word at the same point */ - SplitVar *ptr = var; - - while (ptr->next) - ptr = ptr->next; - ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level); - /* we can find next word */ - level++; - var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos); - var->nstem++; - node = Conf->Dictionary; - startpos = level; - continue; - } - } - } - node = StopMiddle->node; - } - else - node = NULL; - level++; - } - - var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos); - var->nstem++; - pfree(notprobed); - return var; -} - -TSLexeme * -NINormalizeWord(IspellDict * Conf, char *uword) -{ - char **res; - char *word; - TSLexeme *lcur = NULL, - *lres = NULL; - uint16 NVariant = 1; - - word = lowerstr(uword); - res = NormalizeSubWord(Conf, word, 0); - - if (res) - { - char **ptr = res; - - lcur = lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme)); - while (*ptr) - { - lcur->lexeme = *ptr; - lcur->flags = 0; - lcur->nvariant = NVariant++; - lcur++; - ptr++; - } - lcur->lexeme = NULL; - pfree(res); - } - - if (Conf->compoundcontrol != '\t') - { - int wordlen = strlen(word); - SplitVar *ptr, - *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1); - int i; - - while (var) - { - if (var->nstem > 1) - { - char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDWORD); - - if (subres) - { - char **subptr = subres; - - if (!lcur) - lcur = lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme)); - - while (*subptr) - { - for (i = 0; i < var->nstem - 1; i++) - { - lcur->lexeme = (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]); - lcur->flags = 0; - lcur->nvariant = NVariant; - lcur++; - } - - lcur->lexeme = *subptr; - lcur->flags = 0; - lcur->nvariant = NVariant; - lcur++; - subptr++; - NVariant++; - } - - lcur->lexeme = NULL; - pfree(subres); - var->stem[0] = NULL; - pfree(var->stem[var->nstem - 1]); - } - } - - for (i = 0; i < var->nstem && var->stem[i]; i++) - pfree(var->stem[i]); - ptr = var->next; - pfree(var->stem); - pfree(var); - var = ptr; - } - } - - pfree(word); - - return lres; -} - - -static void -freeSPNode(SPNode * node) -{ - SPNodeData *data; - - if (!node) - return; - data = node->data; - while (node->length) - { - freeSPNode(data->node); - data++; - node->length--; - } - free(node); -} - -static void -freeANode(AffixNode * node) -{ - AffixNodeData *data; - - if (!node) - return; - data = node->data; - while (node->length) - { - freeANode(data->node); - if (data->naff) - free(data->aff); - data++; - node->length--; - } - free(node); -} - - -void -NIFree(IspellDict * Conf) -{ - int i; - AFFIX *Affix = (AFFIX *) Conf->Affix; - char **aff = Conf->AffixData; - - if (aff) - { - while (*aff) - { - free(*aff); - aff++; - } - free(Conf->AffixData); - } - - - for (i = 0; i < Conf->naffixes; i++) - { - if (Affix[i].compile == 0) - { - if (Affix[i].isregis) - RS_free(&(Affix[i].reg.regis)); - else - pg_regfree(&(Affix[i].reg.regex)); - } - if (Affix[i].mask != VoidString) - free(Affix[i].mask); - if (Affix[i].find != VoidString) - free(Affix[i].find); - if (Affix[i].repl != VoidString) - free(Affix[i].repl); - } - if (Conf->Spell) - { - for (i = 0; i < Conf->nspell; i++) - pfree(Conf->Spell[i]); - pfree(Conf->Spell); - } - - if (Conf->Affix) - free(Conf->Affix); - if (Conf->CompoundAffix) - free(Conf->CompoundAffix); - freeSPNode(Conf->Dictionary); - freeANode(Conf->Suffix); - freeANode(Conf->Prefix); - memset((void *) Conf, 0, sizeof(IspellDict)); - return; -} diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h deleted file mode 100644 index 50a5947680..0000000000 --- a/contrib/tsearch2/ispell/spell.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef __SPELL_H__ -#define __SPELL_H__ - -#include "c.h" - -#include <sys/types.h> - -#include "regex/regex.h" - -#include "regis.h" -#include "dict.h" - -struct SPNode; - - -typedef struct -{ - uint32 - val:8, - isword:1, - compoundallow:1, - affix:22; - struct SPNode *node; -} SPNodeData; - -typedef struct SPNode -{ - uint32 length; - SPNodeData data[1]; -} SPNode; - -#define SPNHRDSZ (sizeof(uint32)) - - -typedef struct spell_struct -{ - union - { - char flag[16]; - struct - { - int affix; - int len; - } d; - } p; - char word[1]; -} SPELL; - -#define SPELLHDRSZ (offsetof(SPELL, word)) - -typedef struct aff_struct -{ - uint32 - flag:8, - type:2, - compile:1, - flagflags:3, - issimple:1, - isregis:1, - unused:1, - replen:16; - char *mask; - char *find; - char *repl; - union - { - regex_t regex; - Regis regis; - } reg; -} AFFIX; - -#define FF_CROSSPRODUCT 0x01 -#define FF_COMPOUNDWORD 0x02 -#define FF_COMPOUNDONLYAFX 0x04 -#define FF_SUFFIX 2 -#define FF_PREFIX 1 - -struct AffixNode; - -typedef struct -{ - uint32 - val:8, - naff:24; - AFFIX **aff; - struct AffixNode *node; -} AffixNodeData; - -typedef struct AffixNode -{ - uint32 isvoid:1, - length:31; - AffixNodeData data[1]; -} AffixNode; - -#define ANHRDSZ (sizeof(uint32)) - -typedef struct -{ - char *affix; - int len; -} CMPDAffix; - -typedef struct -{ - int maffixes; - int naffixes; - AFFIX *Affix; - char compoundcontrol; - - int nspell; - int mspell; - SPELL **Spell; - - AffixNode *Suffix; - AffixNode *Prefix; - - SPNode *Dictionary; - char **AffixData; - CMPDAffix *CompoundAffix; - -} IspellDict; - -TSLexeme *NINormalizeWord(IspellDict * Conf, char *word); -int NIImportAffixes(IspellDict * Conf, const char *filename); -int NIImportOOAffixes(IspellDict * Conf, const char *filename); -int NIImportDictionary(IspellDict * Conf, const char *filename); - -int NIAddSpell(IspellDict * Conf, const char *word, const char *flag); -int NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type); -void NISortDictionary(IspellDict * Conf); -void NISortAffixes(IspellDict * Conf); -void NIFree(IspellDict * Conf); - -#endif diff --git a/contrib/tsearch2/my2ispell/Makefile b/contrib/tsearch2/my2ispell/Makefile deleted file mode 100644 index 426129a8df..0000000000 --- a/contrib/tsearch2/my2ispell/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -ZIPFILE=nb_NO -LANGUAGE=norsk - - -UNZIP=unzip -o - - -all: $(LANGUAGE).dict $(LANGUAGE).aff - -$(ZIPFILE).aff: $(ZIPFILE).zip - $(UNZIP) $? $@ - touch $@ - - -# 1 Cleanup dictionary -# 2 remove " symbol -# 3 add compoundwords controlled flag to word which hasn't it, but -# has compound only suffixes - -$(LANGUAGE).dict: $(ZIPFILE).zip - $(UNZIP) $? $(ZIPFILE).dic - grep -v -E '^[[:digit:]]+$$' < $(ZIPFILE).dic \ - | grep -v '\.' \ - | sed -e 's/"//g' \ - | perl -pi -e 's|/(\S+)| $$q=$$1; ( $$q=~/[\\_`]/ && $$q!~/z/ ) ? "/$${q}z" : "/$${q}"|e' \ - | sort \ - > $@ - -#just convert affix file - -$(LANGUAGE).aff: $(ZIPFILE).aff - grep -v -i zyzyzy $(ZIPFILE).aff \ - | grep -v -i zyzyzy \ - | perl -pi \ - -e 's/^COMPOUNDFLAG\s+(\S+)/compoundwords controlled $$1/;' \ - -e 's/^COMPOUNDMIN\s+(\d+)/compoundmin $$1/;' \ - -e 's/^PFX\s+(\S+)\s+([YN])\s+\d+.*$$/ if ( !$$wasprf ) { $$wasprf=1; "prefixes\n\nflag $$1:" } else { "flag $$1:" } /e;' \ - -e 's/^PFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > $$2")/e;' \ - -e 's/^(.*)SFX\s+(\S+)\s+([YN])\s+\d+.*$$/ $$flg=($$3 eq "Y") ? "*" : ""; $$flg="~$$flg" if length $$1; $$q=$$2; $$q="\\$$q" if $$q!~m#[a-zA-Z]#; if ( !$$wassfx ) { $$wassfx=1; "suffixes\n\nflag $$flg$$q:" } else { "flag $$flg$$q:" } /e;' \ - -e 's/^.*SFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > ".( ($$1 eq "0") ? "" : "-$$1,").( ($$2 eq "0") ? "" : "$$2") )/e;' \ - -e 's/^(SET|TRY)/#$$1/' \ - > $@ - -clean: - rm -rf $(ZIPFILE).aff $(ZIPFILE).dic $(LANGUAGE).dict $(LANGUAGE).aff - - diff --git a/contrib/tsearch2/my2ispell/README b/contrib/tsearch2/my2ispell/README deleted file mode 100644 index 583936ab42..0000000000 --- a/contrib/tsearch2/my2ispell/README +++ /dev/null @@ -1,12 +0,0 @@ -Utility for convert MySpell dictionary and affix from -myspell to ispell format. -Utility tested on nb_NO.zip and nn_NO.zip from -OpenOffice (http://lingucomponent.openoffice.org/download_dictionary.html) - -usage: -For example, make norwegian dictionary and affix: -% cp nb_NO.zip my2ispell -% cd my2ispell -% gmake ZIPFILE=nb_NO LANGUAGE=norsk - -Author: Teodor Sigaev <teodor@sigaev.ru> diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c deleted file mode 100644 index c519f5f3d4..0000000000 --- a/contrib/tsearch2/prs_dcfg.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Simple config parser - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include <ctype.h> - -#include "dict.h" -#include "common.h" -#include "ts_locale.h" - -#define CS_WAITKEY 0 -#define CS_INKEY 1 -#define CS_WAITEQ 2 -#define CS_WAITVALUE 3 -#define CS_INVALUE 4 -#define CS_IN2VALUE 5 -#define CS_WAITDELIM 6 -#define CS_INESC 7 -#define CS_IN2ESC 8 - -static char * -nstrdup(char *ptr, int len) -{ - char *res = palloc(len + 1), - *cptr; - - memcpy(res, ptr, len); - res[len] = '\0'; - cptr = ptr = res; - while (*ptr) - { - if (t_iseq(ptr, '\\')) - ptr++; - COPYCHAR(cptr, ptr); - cptr += pg_mblen(ptr); - ptr += pg_mblen(ptr); - } - *cptr = '\0'; - - return res; -} - -void -parse_cfgdict(text *in, Map ** m) -{ - Map *mptr; - char *ptr = VARDATA(in), - *begin = NULL; - char num = 0; - int state = CS_WAITKEY; - - while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ) - { - if (t_iseq(ptr, ',')) - num++; - ptr += pg_mblen(ptr); - } - - *m = mptr = (Map *) palloc(sizeof(Map) * (num + 2)); - memset(mptr, 0, sizeof(Map) * (num + 2)); - ptr = VARDATA(in); - while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ) - { - if (state == CS_WAITKEY) - { - if (t_isalpha(ptr)) - { - begin = ptr; - state = CS_INKEY; - } - else if (!t_isspace(ptr)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"), - errdetail("Syntax error in position %d.", - (int) (ptr - VARDATA(in))))); - } - else if (state == CS_INKEY) - { - if (t_isspace(ptr)) - { - mptr->key = nstrdup(begin, ptr - begin); - state = CS_WAITEQ; - } - else if (t_iseq(ptr, '=')) - { - mptr->key = nstrdup(begin, ptr - begin); - state = CS_WAITVALUE; - } - else if (!t_isalpha(ptr)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"), - errdetail("Syntax error in position %d.", - (int) (ptr - VARDATA(in))))); - } - else if (state == CS_WAITEQ) - { - if (t_iseq(ptr, '=')) - state = CS_WAITVALUE; - else if (!t_isspace(ptr)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"), - errdetail("Syntax error in position %d.", - (int) (ptr - VARDATA(in))))); - } - else if (state == CS_WAITVALUE) - { - if (t_iseq(ptr, '"')) - { - begin = ptr + 1; - state = CS_INVALUE; - } - else if (!t_isspace(ptr)) - { - begin = ptr; - state = CS_IN2VALUE; - } - } - else if (state == CS_INVALUE) - { - if (t_iseq(ptr, '"')) - { - mptr->value = nstrdup(begin, ptr - begin); - mptr++; - state = CS_WAITDELIM; - } - else if (t_iseq(ptr, '\\')) - state = CS_INESC; - } - else if (state == CS_IN2VALUE) - { - if (t_isspace(ptr) || t_iseq(ptr, ',')) - { - mptr->value = nstrdup(begin, ptr - begin); - mptr++; - state = (t_iseq(ptr, ',')) ? CS_WAITKEY : CS_WAITDELIM; - } - else if (t_iseq(ptr, '\\')) - state = CS_INESC; - } - else if (state == CS_WAITDELIM) - { - if (t_iseq(ptr, ',')) - state = CS_WAITKEY; - else if (!t_isspace(ptr)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"), - errdetail("Syntax error in position %d.", - (int) (ptr - VARDATA(in))))); - } - else if (state == CS_INESC) - state = CS_INVALUE; - else if (state == CS_IN2ESC) - state = CS_IN2VALUE; - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("bad parser state"), - errdetail("%d at position %d.", - state, (int) (ptr - VARDATA(in))))); - ptr += pg_mblen(ptr); - } - - if (state == CS_IN2VALUE) - { - mptr->value = nstrdup(begin, ptr - begin); - mptr++; - } - else if (!(state == CS_WAITDELIM || state == CS_WAITKEY)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unexpected end of line"))); -} diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c deleted file mode 100644 index 62a305ed1e..0000000000 --- a/contrib/tsearch2/query.c +++ /dev/null @@ -1,1050 +0,0 @@ -/* - * IO definitions for tsquery and mtsquery. This type - * are identical, but for parsing mtsquery used parser for text - * and also morphology is used. - * Internal structure: - * query tree, then string with original value. - * Query tree with plain view. It's means that in array of nodes - * right child is always next and left position = item+item->left - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include <float.h> -#include <ctype.h> - -#include "access/gist.h" -#include "access/itup.h" -#include "storage/bufpage.h" -#include "utils/array.h" -#include "utils/builtins.h" - -#include "ts_cfg.h" -#include "tsvector.h" -#include "crc32.h" -#include "query.h" -#include "query_cleanup.h" -#include "common.h" -#include "ts_locale.h" - -PG_FUNCTION_INFO_V1(tsquery_in); -Datum tsquery_in(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(tsquery_out); -Datum tsquery_out(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(exectsq); -Datum exectsq(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(rexectsq); -Datum rexectsq(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(tsquerytree); -Datum tsquerytree(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(to_tsquery); -Datum to_tsquery(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(to_tsquery_name); -Datum to_tsquery_name(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(to_tsquery_current); -Datum to_tsquery_current(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(plainto_tsquery); -Datum plainto_tsquery(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(plainto_tsquery_name); -Datum plainto_tsquery_name(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(plainto_tsquery_current); -Datum plainto_tsquery_current(PG_FUNCTION_ARGS); - -/* parser's states */ -#define WAITOPERAND 1 -#define WAITOPERATOR 2 -#define WAITFIRSTOPERAND 3 -#define WAITSINGLEOPERAND 4 - -/* - * node of query tree, also used - * for storing polish notation in parser - */ -typedef struct NODE -{ - int2 weight; - int2 type; - int4 val; - int2 distance; - int2 length; - struct NODE *next; -} NODE; - -typedef struct -{ - char *buffer; /* entire string we are scanning */ - char *buf; /* current scan point */ - int4 state; - int4 count; - /* reverse polish notation in list (for temprorary usage) */ - NODE *str; - /* number in str */ - int4 num; - - /* user-friendly operand */ - int4 lenop; - int4 sumlen; - char *op; - char *curop; - - /* state for value's parser */ - TI_IN_STATE valstate; - - /* tscfg */ - int cfg_id; -} QPRS_STATE; - -static char * -get_weight(char *buf, int2 *weight) -{ - *weight = 0; - - if (!t_iseq(buf, ':')) - return buf; - - buf++; - while (*buf && pg_mblen(buf) == 1) - { - switch (*buf) - { - case 'a': - case 'A': - *weight |= 1 << 3; - break; - case 'b': - case 'B': - *weight |= 1 << 2; - break; - case 'c': - case 'C': - *weight |= 1 << 1; - break; - case 'd': - case 'D': - *weight |= 1; - break; - default: - return buf; - } - buf++; - } - - return buf; -} - -/* - * get token from query string - */ -static int4 -gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight) -{ - while (1) - { - switch (state->state) - { - case WAITFIRSTOPERAND: - case WAITOPERAND: - if (t_iseq(state->buf, '!')) - { - (state->buf)++; /* can safely ++, t_iseq guarantee - * that pg_mblen()==1 */ - *val = (int4) '!'; - state->state = WAITOPERAND; - return OPR; - } - else if (t_iseq(state->buf, '(')) - { - state->count++; - (state->buf)++; - state->state = WAITOPERAND; - return OPEN; - } - else if (t_iseq(state->buf, ':')) - { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("error at start of operand in tsearch query: \"%s\"", - state->buffer))); - } - else if (!t_isspace(state->buf)) - { - state->valstate.prsbuf = state->buf; - if (gettoken_tsvector(&(state->valstate))) - { - *strval = state->valstate.word; - *lenval = state->valstate.curpos - state->valstate.word; - state->buf = get_weight(state->valstate.prsbuf, weight); - state->state = WAITOPERATOR; - return VAL; - } - else if (state->state == WAITFIRSTOPERAND) - return END; - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("no operand in tsearch query: \"%s\"", - state->buffer))); - } - break; - case WAITOPERATOR: - if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')) - { - state->state = WAITOPERAND; - *val = (int4) *(state->buf); - (state->buf)++; - return OPR; - } - else if (t_iseq(state->buf, ')')) - { - (state->buf)++; - state->count--; - return (state->count < 0) ? ERR : CLOSE; - } - else if (*(state->buf) == '\0') - return (state->count) ? ERR : END; - else if (!t_isspace(state->buf)) - return ERR; - break; - case WAITSINGLEOPERAND: - if (*(state->buf) == '\0') - return END; - *strval = state->buf; - *lenval = strlen(state->buf); - state->buf += strlen(state->buf); - state->count++; - return VAL; - default: - return ERR; - break; - } - state->buf += pg_mblen(state->buf); - } - return END; -} - -/* - * push new one in polish notation reverse view - */ -static void -pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight) -{ - NODE *tmp = (NODE *) palloc(sizeof(NODE)); - - tmp->weight = weight; - tmp->type = type; - tmp->val = val; - if (distance >= MAXSTRPOS) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("value is too big in tsearch query: \"%s\"", - state->buffer))); - if (lenval >= MAXSTRLEN) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("operand is too long in tsearch query: \"%s\"", - state->buffer))); - tmp->distance = distance; - tmp->length = lenval; - tmp->next = state->str; - state->str = tmp; - state->num++; -} - -/* - * This function is used for tsquery parsing - */ -static void -pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight) -{ - if (lenval >= MAXSTRLEN) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("word is too long in tsearch query: \"%s\"", - state->buffer))); - - pushquery(state, type, crc32_sz(strval, lenval), - state->curop - state->op, lenval, weight); - - while (state->curop - state->op + lenval + 1 >= state->lenop) - { - int4 tmp = state->curop - state->op; - - state->lenop *= 2; - state->op = (char *) repalloc((void *) state->op, state->lenop); - state->curop = state->op + tmp; - } - memcpy((void *) state->curop, (void *) strval, lenval); - state->curop += lenval; - *(state->curop) = '\0'; - state->curop++; - state->sumlen += lenval + 1; - return; -} - -/* - * This function is used for morph parsing - */ -static void -pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight) -{ - int4 count = 0; - PRSTEXT prs; - uint32 variant, - pos, - cntvar = 0, - cntpos = 0, - cnt = 0; - - prs.lenwords = 32; - prs.curwords = 0; - prs.pos = 0; - prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords); - - parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval); - - if (prs.curwords > 0) - { - - while (count < prs.curwords) - { - pos = prs.words[count].pos.pos; - cntvar = 0; - while (count < prs.curwords && pos == prs.words[count].pos.pos) - { - variant = prs.words[count].nvariant; - - cnt = 0; - while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant) - { - - pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight); - pfree(prs.words[count].word); - if (cnt) - pushquery(state, OPR, (int4) '&', 0, 0, 0); - cnt++; - count++; - } - - if (cntvar) - pushquery(state, OPR, (int4) '|', 0, 0, 0); - cntvar++; - } - - if (cntpos) - pushquery(state, OPR, (int4) '&', 0, 0, 0); - - cntpos++; - } - - pfree(prs.words); - - } - else - pushval_asis(state, VALSTOP, NULL, 0, 0); -} - -#define STACKDEPTH 32 -/* - * make polish notaion of query - */ -static int4 -makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2)) -{ - int4 val = 0, - type; - int4 lenval = 0; - char *strval = NULL; - int4 stack[STACKDEPTH]; - int4 lenstack = 0; - int2 weight = 0; - - while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END) - { - switch (type) - { - case VAL: - (*pushval) (state, VAL, strval, lenval, weight); - while (lenstack && (stack[lenstack - 1] == (int4) '&' || - stack[lenstack - 1] == (int4) '!')) - { - lenstack--; - pushquery(state, OPR, stack[lenstack], 0, 0, 0); - } - break; - case OPR: - if (lenstack && val == (int4) '|') - pushquery(state, OPR, val, 0, 0, 0); - else - { - if (lenstack == STACKDEPTH) - /* internal error */ - elog(ERROR, "stack too short"); - stack[lenstack] = val; - lenstack++; - } - break; - case OPEN: - if (makepol(state, pushval) == ERR) - return ERR; - if (lenstack && (stack[lenstack - 1] == (int4) '&' || - stack[lenstack - 1] == (int4) '!')) - { - lenstack--; - pushquery(state, OPR, stack[lenstack], 0, 0, 0); - } - break; - case CLOSE: - while (lenstack) - { - lenstack--; - pushquery(state, OPR, stack[lenstack], 0, 0, 0); - }; - return END; - break; - case ERR: - default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error in tsearch query: \"%s\"", - state->buffer))); - return ERR; - - } - } - while (lenstack) - { - lenstack--; - pushquery(state, OPR, stack[lenstack], 0, 0, 0); - }; - return END; -} - -typedef struct -{ - WordEntry *arrb; - WordEntry *arre; - char *values; - char *operand; -} CHKVAL; - -/* - * compare 2 string values - */ -static int4 -ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item) -{ - if (ptr->len == item->length) - return strncmp( - &(chkval->values[ptr->pos]), - &(chkval->operand[item->distance]), - item->length); - - return (ptr->len > item->length) ? 1 : -1; -} - -/* - * check weight info - */ -static bool -checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) -{ - WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16)); - uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len))); - - while (len--) - { - if (item->weight & (1 << WEP_GETWEIGHT(*ptr))) - return true; - ptr++; - } - return false; -} - -/* - * is there value 'val' in array or not ? - */ -static bool -checkcondition_str(void *checkval, ITEM * val) -{ - WordEntry *StopLow = ((CHKVAL *) checkval)->arrb; - WordEntry *StopHigh = ((CHKVAL *) checkval)->arre; - WordEntry *StopMiddle; - int difference; - - /* Loop invariant: StopLow <= val < StopHigh */ - - while (StopLow < StopHigh) - { - StopMiddle = StopLow + (StopHigh - StopLow) / 2; - difference = ValCompare((CHKVAL *) checkval, StopMiddle, val); - if (difference == 0) - return (val->weight && StopMiddle->haspos) ? - checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true; - else if (difference < 0) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - - return (false); -} - -/* - * check for boolean condition - */ -bool -TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val)) -{ - if (curitem->type == VAL) - return (*chkcond) (checkval, curitem); - else if (curitem->val == (int4) '!') - { - return (calcnot) ? - ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true) - : true; - } - else if (curitem->val == (int4) '&') - { - if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) - return TS_execute(curitem + 1, checkval, calcnot, chkcond); - else - return false; - } - else - { /* |-operator */ - if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) - return true; - else - return TS_execute(curitem + 1, checkval, calcnot, chkcond); - } - return false; -} - -/* - * boolean operations - */ -Datum -rexectsq(PG_FUNCTION_ARGS) -{ - SET_FUNCOID(); - return DirectFunctionCall2( - exectsq, - PG_GETARG_DATUM(1), - PG_GETARG_DATUM(0) - ); -} - -Datum -exectsq(PG_FUNCTION_ARGS) -{ - tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); - CHKVAL chkval; - bool result; - - SET_FUNCOID(); - if (!val->size || !query->size) - { - PG_FREE_IF_COPY(val, 0); - PG_FREE_IF_COPY(query, 1); - PG_RETURN_BOOL(false); - } - - chkval.arrb = ARRPTR(val); - chkval.arre = chkval.arrb + val->size; - chkval.values = STRPTR(val); - chkval.operand = GETOPERAND(query); - result = TS_execute( - GETQUERY(query), - &chkval, - true, - checkcondition_str - ); - - PG_FREE_IF_COPY(val, 0); - PG_FREE_IF_COPY(query, 1); - PG_RETURN_BOOL(result); -} - -/* - * find left operand in polish notation view - */ -static void -findoprnd(ITEM * ptr, int4 *pos) -{ -#ifdef BS_DEBUG - elog(DEBUG3, (ptr[*pos].type == OPR) ? - "%d %c" : "%d %d", *pos, ptr[*pos].val); -#endif - if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP) - { - ptr[*pos].left = 0; - (*pos)++; - } - else if (ptr[*pos].val == (int4) '!') - { - ptr[*pos].left = 1; - (*pos)++; - findoprnd(ptr, pos); - } - else - { - ITEM *curitem = &ptr[*pos]; - int4 tmp = *pos; - - (*pos)++; - findoprnd(ptr, pos); - curitem->left = *pos - tmp; - findoprnd(ptr, pos); - } -} - - -/* - * input - */ -static QUERYTYPE * - queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id, bool isplain) -{ - QPRS_STATE state; - int4 i; - QUERYTYPE *query; - int4 commonlen; - ITEM *ptr; - NODE *tmp; - int4 pos = 0; - -#ifdef BS_DEBUG - char pbuf[16384], - *cur; -#endif - - /* init state */ - state.buffer = buf; - state.buf = buf; - state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND; - state.count = 0; - state.num = 0; - state.str = NULL; - state.cfg_id = cfg_id; - - /* init value parser's state */ - state.valstate.oprisdelim = true; - state.valstate.len = 32; - state.valstate.word = (char *) palloc(state.valstate.len); - - /* init list of operand */ - state.sumlen = 0; - state.lenop = 64; - state.curop = state.op = (char *) palloc(state.lenop); - *(state.curop) = '\0'; - - /* parse query & make polish notation (postfix, but in reverse order) */ - makepol(&state, pushval); - pfree(state.valstate.word); - if (!state.num) - { - ereport(NOTICE, - (errmsg("tsearch query doesn't contain lexeme(s): \"%s\"", - state.buffer))); - query = (QUERYTYPE *) palloc(HDRSIZEQT); - SET_VARSIZE(query, HDRSIZEQT); - query->size = 0; - return query; - } - - /* make finish struct */ - commonlen = COMPUTESIZE(state.num, state.sumlen); - query = (QUERYTYPE *) palloc(commonlen); - SET_VARSIZE(query, commonlen); - query->size = state.num; - ptr = GETQUERY(query); - - /* set item in polish notation */ - for (i = 0; i < state.num; i++) - { - ptr[i].weight = state.str->weight; - ptr[i].type = state.str->type; - ptr[i].val = state.str->val; - ptr[i].distance = state.str->distance; - ptr[i].length = state.str->length; - tmp = state.str->next; - pfree(state.str); - state.str = tmp; - } - - /* set user friendly-operand view */ - memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); - pfree(state.op); - - /* set left operand's position for every operator */ - pos = 0; - findoprnd(ptr, &pos); - -#ifdef BS_DEBUG - cur = pbuf; - *cur = '\0'; - for (i = 0; i < query->size; i++) - { - if (ptr[i].type == OPR) - sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left); - else - sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance); - cur = strchr(cur, '\0'); - } - elog(DEBUG3, "POR: %s", pbuf); -#endif - - return query; -} - -/* - * in without morphology - */ -Datum -tsquery_in(PG_FUNCTION_ARGS) -{ - char *in = (char *) PG_GETARG_POINTER(0); - - pg_verifymbstr(in, strlen(in), false); - - SET_FUNCOID(); - PG_RETURN_POINTER(queryin((char *) in, pushval_asis, 0, false)); -} - -/* - * out function - */ -typedef struct -{ - ITEM *curpol; - char *buf; - char *cur; - char *op; - int4 buflen; -} INFIX; - -#define RESIZEBUF(inf,addsize) \ -while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ -{ \ - int4 len = (inf)->cur - (inf)->buf; \ - (inf)->buflen *= 2; \ - (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ - (inf)->cur = (inf)->buf + len; \ -} - -/* - * recursive walk on tree and print it in - * infix (human-readable) view - */ -static void -infix(INFIX * in, bool first) -{ - if (in->curpol->type == VAL) - { - char *op = in->op + in->curpol->distance; - int clen; - - RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5); - *(in->cur) = '\''; - in->cur++; - while (*op) - { - if (t_iseq(op, '\'')) - { - *(in->cur) = '\''; - in->cur++; - } - COPYCHAR(in->cur, op); - - clen = pg_mblen(op); - op += clen; - in->cur += clen; - } - *(in->cur) = '\''; - in->cur++; - if (in->curpol->weight) - { - *(in->cur) = ':'; - in->cur++; - if (in->curpol->weight & (1 << 3)) - { - *(in->cur) = 'A'; - in->cur++; - } - if (in->curpol->weight & (1 << 2)) - { - *(in->cur) = 'B'; - in->cur++; - } - if (in->curpol->weight & (1 << 1)) - { - *(in->cur) = 'C'; - in->cur++; - } - if (in->curpol->weight & 1) - { - *(in->cur) = 'D'; - in->cur++; - } - } - *(in->cur) = '\0'; - in->curpol++; - } - else if (in->curpol->val == (int4) '!') - { - bool isopr = false; - - RESIZEBUF(in, 1); - *(in->cur) = '!'; - in->cur++; - *(in->cur) = '\0'; - in->curpol++; - if (in->curpol->type == OPR) - { - isopr = true; - RESIZEBUF(in, 2); - sprintf(in->cur, "( "); - in->cur = strchr(in->cur, '\0'); - } - infix(in, isopr); - if (isopr) - { - RESIZEBUF(in, 2); - sprintf(in->cur, " )"); - in->cur = strchr(in->cur, '\0'); - } - } - else - { - int4 op = in->curpol->val; - INFIX nrm; - - in->curpol++; - if (op == (int4) '|' && !first) - { - RESIZEBUF(in, 2); - sprintf(in->cur, "( "); - in->cur = strchr(in->cur, '\0'); - } - - nrm.curpol = in->curpol; - nrm.op = in->op; - nrm.buflen = 16; - nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); - - /* get right operand */ - infix(&nrm, false); - - /* get & print left operand */ - in->curpol = nrm.curpol; - infix(in, false); - - /* print operator & right operand */ - RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); - sprintf(in->cur, " %c %s", op, nrm.buf); - in->cur = strchr(in->cur, '\0'); - pfree(nrm.buf); - - if (op == (int4) '|' && !first) - { - RESIZEBUF(in, 2); - sprintf(in->cur, " )"); - in->cur = strchr(in->cur, '\0'); - } - } -} - - -Datum -tsquery_out(PG_FUNCTION_ARGS) -{ - QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - INFIX nrm; - - if (query->size == 0) - { - char *b = palloc(1); - - *b = '\0'; - PG_RETURN_POINTER(b); - } - nrm.curpol = GETQUERY(query); - nrm.buflen = 32; - nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); - *(nrm.cur) = '\0'; - nrm.op = GETOPERAND(query); - infix(&nrm, true); - - PG_FREE_IF_COPY(query, 0); - PG_RETURN_POINTER(nrm.buf); -} - -/* - * debug function, used only for view query - * which will be executed in non-leaf pages in index - */ -Datum -tsquerytree(PG_FUNCTION_ARGS) -{ - QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - INFIX nrm; - text *res; - ITEM *q; - int4 len; - - - if (query->size == 0) - { - res = (text *) palloc(VARHDRSZ); - SET_VARSIZE(res, VARHDRSZ); - PG_RETURN_POINTER(res); - } - - q = clean_NOT_v2(GETQUERY(query), &len); - - if (!q) - { - res = (text *) palloc(1 + VARHDRSZ); - SET_VARSIZE(res, 1 + VARHDRSZ); - *((char *) VARDATA(res)) = 'T'; - } - else - { - nrm.curpol = q; - nrm.buflen = 32; - nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); - *(nrm.cur) = '\0'; - nrm.op = GETOPERAND(query); - infix(&nrm, true); - - res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ); - SET_VARSIZE(res, nrm.cur - nrm.buf + VARHDRSZ); - memcpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf); - pfree(q); - } - - PG_FREE_IF_COPY(query, 0); - - PG_RETURN_POINTER(res); -} - -Datum -to_tsquery(PG_FUNCTION_ARGS) -{ - text *in = PG_GETARG_TEXT_P(1); - char *str; - QUERYTYPE *query; - ITEM *res; - int4 len; - - SET_FUNCOID(); - - str = text2char(in); - PG_FREE_IF_COPY(in, 1); - - query = queryin(str, pushval_morph, PG_GETARG_INT32(0), false); - - if (query->size == 0) - PG_RETURN_POINTER(query); - - res = clean_fakeval_v2(GETQUERY(query), &len); - if (!res) - { - SET_VARSIZE(query, HDRSIZEQT); - query->size = 0; - PG_RETURN_POINTER(query); - } - memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM)); - pfree(res); - PG_RETURN_POINTER(query); -} - -Datum -to_tsquery_name(PG_FUNCTION_ARGS) -{ - text *name = PG_GETARG_TEXT_P(0); - Datum res; - - SET_FUNCOID(); - res = DirectFunctionCall2(to_tsquery, - Int32GetDatum(name2id_cfg(name)), - PG_GETARG_DATUM(1)); - - PG_FREE_IF_COPY(name, 0); - PG_RETURN_DATUM(res); -} - -Datum -to_tsquery_current(PG_FUNCTION_ARGS) -{ - SET_FUNCOID(); - PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery, - Int32GetDatum(get_currcfg()), - PG_GETARG_DATUM(0))); -} - -Datum -plainto_tsquery(PG_FUNCTION_ARGS) -{ - text *in = PG_GETARG_TEXT_P(1); - char *str; - QUERYTYPE *query; - ITEM *res; - int4 len; - - SET_FUNCOID(); - - str = text2char(in); - PG_FREE_IF_COPY(in, 1); - - query = queryin(str, pushval_morph, PG_GETARG_INT32(0), true); - - if (query->size == 0) - PG_RETURN_POINTER(query); - - res = clean_fakeval_v2(GETQUERY(query), &len); - if (!res) - { - SET_VARSIZE(query, HDRSIZEQT); - query->size = 0; - PG_RETURN_POINTER(query); - } - memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM)); - pfree(res); - PG_RETURN_POINTER(query); -} - -Datum -plainto_tsquery_name(PG_FUNCTION_ARGS) -{ - text *name = PG_GETARG_TEXT_P(0); - Datum res; - - SET_FUNCOID(); - res = DirectFunctionCall2(plainto_tsquery, - Int32GetDatum(name2id_cfg(name)), - PG_GETARG_DATUM(1)); - - PG_FREE_IF_COPY(name, 0); - PG_RETURN_DATUM(res); -} - -Datum -plainto_tsquery_current(PG_FUNCTION_ARGS) -{ - SET_FUNCOID(); - PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery, - Int32GetDatum(get_currcfg()), - PG_GETARG_DATUM(0))); -} diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h deleted file mode 100644 index 44f8701213..0000000000 --- a/contrib/tsearch2/query.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef __QUERY_H__ -#define __QUERY_H__ -/* -#define BS_DEBUG -*/ - -#include "ts_locale.h" -/* - * item in polish notation with back link - * to left operand - */ -typedef struct ITEM -{ - int8 type; - int8 weight; - int2 left; - int4 val; - /* user-friendly value, must correlate with WordEntry */ - uint32 - istrue:1, /* use for ranking in Cover */ - length:11, - distance:20; -} ITEM; - -/* - *Storage: - * (len)(size)(array of ITEM)(array of operand in user-friendly form) - */ -typedef struct -{ - int32 vl_len_; /* varlena header (do not touch directly!) */ - int4 size; - char data[1]; -} QUERYTYPE; - -#define HDRSIZEQT ( VARHDRSZ + sizeof(int4) ) -#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + (size) * sizeof(ITEM) + (lenofoperand) ) -#define GETQUERY(x) (ITEM*)( (char*)(x)+HDRSIZEQT ) -#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((QUERYTYPE*)(x))->size * sizeof(ITEM) ) - -#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) - -#define END 0 -#define ERR 1 -#define VAL 2 -#define OPR 3 -#define OPEN 4 -#define CLOSE 5 -#define VALSTOP 6 /* for stop words */ - -bool TS_execute(ITEM * curitem, void *checkval, - bool calcnot, bool (*chkcond) (void *checkval, ITEM * val)); - -#endif diff --git a/contrib/tsearch2/query_cleanup.c b/contrib/tsearch2/query_cleanup.c deleted file mode 100644 index 0d3427ff9b..0000000000 --- a/contrib/tsearch2/query_cleanup.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Rewrite routines of query tree - * Teodor Sigaev <teodor@sigaev.ru> - */ - -#include "postgres.h" - -#include <float.h> - -#include "utils/builtins.h" - -#include "query.h" -#include "query_cleanup.h" - -typedef struct NODE -{ - struct NODE *left; - struct NODE *right; - ITEM *valnode; -} NODE; - -/* - * make query tree from plain view of query - */ -static NODE * -maketree(ITEM * in) -{ - NODE *node = (NODE *) palloc(sizeof(NODE)); - - node->valnode = in; - node->right = node->left = NULL; - if (in->type == OPR) - { - node->right = maketree(in + 1); - if (in->val != (int4) '!') - node->left = maketree(in + in->left); - } - return node; -} - -typedef struct -{ - ITEM *ptr; - int4 len; - int4 cur; -} PLAINTREE; - -static void -plainnode(PLAINTREE * state, NODE * node) -{ - if (state->cur == state->len) - { - state->len *= 2; - state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM)); - } - memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM)); - if (node->valnode->type == VAL) - state->cur++; - else if (node->valnode->val == (int4) '!') - { - state->ptr[state->cur].left = 1; - state->cur++; - plainnode(state, node->right); - } - else - { - int4 cur = state->cur; - - state->cur++; - plainnode(state, node->right); - state->ptr[cur].left = state->cur - cur; - plainnode(state, node->left); - } - pfree(node); -} - -/* - * make plain view of tree from 'normal' view of tree - */ -static ITEM * -plaintree(NODE * root, int4 *len) -{ - PLAINTREE pl; - - pl.cur = 0; - pl.len = 16; - if (root && (root->valnode->type == VAL || root->valnode->type == OPR)) - { - pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM)); - plainnode(&pl, root); - } - else - pl.ptr = NULL; - *len = pl.cur; - return pl.ptr; -} - -static void -freetree(NODE * node) -{ - if (!node) - return; - if (node->left) - freetree(node->left); - if (node->right) - freetree(node->right); - pfree(node); -} - -/* - * clean tree for ! operator. - * It's usefull for debug, but in - * other case, such view is used with search in index. - * Operator ! always return TRUE - */ -static NODE * -clean_NOT_intree(NODE * node) -{ - if (node->valnode->type == VAL) - return node; - - if (node->valnode->val == (int4) '!') - { - freetree(node); - return NULL; - } - - /* operator & or | */ - if (node->valnode->val == (int4) '|') - { - if ((node->left = clean_NOT_intree(node->left)) == NULL || - (node->right = clean_NOT_intree(node->right)) == NULL) - { - freetree(node); - return NULL; - } - } - else - { - NODE *res = node; - - node->left = clean_NOT_intree(node->left); - node->right = clean_NOT_intree(node->right); - if (node->left == NULL && node->right == NULL) - { - pfree(node); - res = NULL; - } - else if (node->left == NULL) - { - res = node->right; - pfree(node); - } - else if (node->right == NULL) - { - res = node->left; - pfree(node); - } - return res; - } - return node; -} - -ITEM * -clean_NOT_v2(ITEM * ptr, int4 *len) -{ - NODE *root = maketree(ptr); - - return plaintree(clean_NOT_intree(root), len); -} - - -#ifdef V_UNKNOWN /* exists in Windows headers */ -#undef V_UNKNOWN -#endif -#ifdef V_FALSE /* exists in Solaris headers */ -#undef V_FALSE -#endif - -#define V_UNKNOWN 0 -#define V_TRUE 1 -#define V_FALSE 2 -#define V_STOP 3 - -/* - * Clean query tree from values which is always in - * text (stopword) - */ -static NODE * -clean_fakeval_intree(NODE * node, char *result) -{ - char lresult = V_UNKNOWN, - rresult = V_UNKNOWN; - - if (node->valnode->type == VAL) - return node; - else if (node->valnode->type == VALSTOP) - { - pfree(node); - *result = V_STOP; - return NULL; - } - - - if (node->valnode->val == (int4) '!') - { - node->right = clean_fakeval_intree(node->right, &rresult); - if (!node->right) - { - *result = V_STOP; - freetree(node); - return NULL; - } - } - else - { - NODE *res = node; - - node->left = clean_fakeval_intree(node->left, &lresult); - node->right = clean_fakeval_intree(node->right, &rresult); - if (lresult == V_STOP && rresult == V_STOP) - { - freetree(node); - *result = V_STOP; - return NULL; - } - else if (lresult == V_STOP) - { - res = node->right; - pfree(node); - } - else if (rresult == V_STOP) - { - res = node->left; - pfree(node); - } - return res; - } - return node; -} - -ITEM * -clean_fakeval_v2(ITEM * ptr, int4 *len) -{ - NODE *root = maketree(ptr); - char result = V_UNKNOWN; - NODE *resroot; - - resroot = clean_fakeval_intree(root, &result); - if (result != V_UNKNOWN) - { - elog(NOTICE, "query contains only stopword(s) or doesn't contain lexeme(s), ignored"); - *len = 0; - return NULL; - } - - return plaintree(resroot, len); -} diff --git a/contrib/tsearch2/query_cleanup.h b/contrib/tsearch2/query_cleanup.h deleted file mode 100644 index 5879f644e6..0000000000 --- a/contrib/tsearch2/query_cleanup.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __REWRITE_H__ -#define __REWRITE_H__ - -#include "query.h" - -ITEM *clean_NOT_v2(ITEM * ptr, int4 *len); -ITEM *clean_fakeval_v2(ITEM * ptr, int4 *len); - -#endif diff --git a/contrib/tsearch2/query_gist.c b/contrib/tsearch2/query_gist.c deleted file mode 100644 index aafb690109..0000000000 --- a/contrib/tsearch2/query_gist.c +++ /dev/null @@ -1,389 +0,0 @@ -#include "postgres.h" - -#include "access/skey.h" -#include "storage/bufpage.h" -#include "access/gist.h" - -#include "query.h" - -typedef uint64 TPQTGist; - -#define SIGLEN (sizeof(TPQTGist)*BITS_PER_BYTE) - - -#define GETENTRY(vec,pos) ((TPQTGist *) DatumGetPointer((vec)->vector[(pos)].key)) - -PG_FUNCTION_INFO_V1(tsq_mcontains); -Datum tsq_mcontains(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(tsq_mcontained); -Datum tsq_mcontained(PG_FUNCTION_ARGS); - -static TPQTGist -makesign(QUERYTYPE * a) -{ - int i; - ITEM *ptr = GETQUERY(a); - TPQTGist sign = 0; - - for (i = 0; i < a->size; i++) - { - if (ptr->type == VAL) - sign |= ((TPQTGist) 1) << (ptr->val % SIGLEN); - ptr++; - } - - return sign; -} - -Datum -tsq_mcontains(PG_FUNCTION_ARGS) -{ - QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - QUERYTYPE *ex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); - TPQTGist sq, - se; - int i, - j; - ITEM *iq, - *ie; - - if (query->size < ex->size) - { - PG_FREE_IF_COPY(query, 0); - PG_FREE_IF_COPY(ex, 1); - - PG_RETURN_BOOL(false); - } - - sq = makesign(query); - se = makesign(ex); - - if ((sq & se) != se) - { - PG_FREE_IF_COPY(query, 0); - PG_FREE_IF_COPY(ex, 1); - - PG_RETURN_BOOL(false); - } - - ie = GETQUERY(ex); - - for (i = 0; i < ex->size; i++) - { - iq = GETQUERY(query); - if (ie[i].type != VAL) - continue; - for (j = 0; j < query->size; j++) - if (iq[j].type == VAL && ie[i].val == iq[j].val) - { - j = query->size + 1; - break; - } - if (j == query->size) - { - PG_FREE_IF_COPY(query, 0); - PG_FREE_IF_COPY(ex, 1); - - PG_RETURN_BOOL(false); - } - } - - PG_FREE_IF_COPY(query, 0); - PG_FREE_IF_COPY(ex, 1); - - PG_RETURN_BOOL(true); -} - -Datum -tsq_mcontained(PG_FUNCTION_ARGS) -{ - PG_RETURN_DATUM( - DirectFunctionCall2( - tsq_mcontains, - PG_GETARG_DATUM(1), - PG_GETARG_DATUM(0) - ) - ); -} - -PG_FUNCTION_INFO_V1(gtsq_in); -Datum gtsq_in(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsq_out); -Datum gtsq_out(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsq_compress); -Datum gtsq_compress(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsq_decompress); -Datum gtsq_decompress(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsq_consistent); -Datum gtsq_consistent(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsq_union); -Datum gtsq_union(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsq_same); -Datum gtsq_same(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsq_penalty); -Datum gtsq_penalty(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(gtsq_picksplit); -Datum gtsq_picksplit(PG_FUNCTION_ARGS); - - -Datum -gtsq_in(PG_FUNCTION_ARGS) -{ - elog(ERROR, "not implemented"); - PG_RETURN_DATUM(0); -} - -Datum -gtsq_out(PG_FUNCTION_ARGS) -{ - elog(ERROR, "not implemented"); - PG_RETURN_DATUM(0); -} - -Datum -gtsq_compress(PG_FUNCTION_ARGS) -{ - GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); - GISTENTRY *retval = entry; - - if (entry->leafkey) - { - TPQTGist *sign = (TPQTGist *) palloc(sizeof(TPQTGist)); - - retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); - *sign = makesign((QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key))); - - gistentryinit(*retval, PointerGetDatum(sign), - entry->rel, entry->page, - entry->offset, FALSE); - } - - PG_RETURN_POINTER(retval); -} - -Datum -gtsq_decompress(PG_FUNCTION_ARGS) -{ - PG_RETURN_DATUM(PG_GETARG_DATUM(0)); -} - -Datum -gtsq_consistent(PG_FUNCTION_ARGS) -{ - GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); - TPQTGist *key = (TPQTGist *) DatumGetPointer(entry->key); - QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); - StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); - TPQTGist sq = makesign(query); - bool retval; - - switch (strategy) - { - case RTContainsStrategyNumber: - case RTOldContainsStrategyNumber: - if (GIST_LEAF(entry)) - retval = (*key & sq) == sq; - else - retval = (*key & sq) != 0; - break; - case RTContainedByStrategyNumber: - case RTOldContainedByStrategyNumber: - if (GIST_LEAF(entry)) - retval = (*key & sq) == *key; - else - retval = (*key & sq) != 0; - break; - default: - retval = FALSE; - } - PG_RETURN_BOOL(retval); -} - -Datum -gtsq_union(PG_FUNCTION_ARGS) -{ - GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); - TPQTGist *sign = (TPQTGist *) palloc(sizeof(TPQTGist)); - int i; - int *size = (int *) PG_GETARG_POINTER(1); - - memset(sign, 0, sizeof(TPQTGist)); - - for (i = 0; i < entryvec->n; i++) - *sign |= *GETENTRY(entryvec, i); - - *size = sizeof(TPQTGist); - - PG_RETURN_POINTER(sign); -} - -Datum -gtsq_same(PG_FUNCTION_ARGS) -{ - TPQTGist *a = (TPQTGist *) PG_GETARG_POINTER(0); - TPQTGist *b = (TPQTGist *) PG_GETARG_POINTER(1); - - PG_RETURN_POINTER(*a == *b); -} - -static int -sizebitvec(TPQTGist sign) -{ - int size = 0, - i; - - for (i = 0; i < SIGLEN; i++) - size += 0x01 & (sign >> i); - - return size; -} - -static int -hemdist(TPQTGist a, TPQTGist b) -{ - TPQTGist res = a ^ b; - - return sizebitvec(res); -} - -Datum -gtsq_penalty(PG_FUNCTION_ARGS) -{ - TPQTGist *origval = (TPQTGist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key); - TPQTGist *newval = (TPQTGist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key); - float *penalty = (float *) PG_GETARG_POINTER(2); - - *penalty = hemdist(*origval, *newval); - - PG_RETURN_POINTER(penalty); -} - - -typedef struct -{ - OffsetNumber pos; - int4 cost; -} SPLITCOST; - -static int -comparecost(const void *a, const void *b) -{ - if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost) - return 0; - else - return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1; -} - -#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) - -Datum -gtsq_picksplit(PG_FUNCTION_ARGS) -{ - GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); - GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); - OffsetNumber maxoff = entryvec->n - 2; - OffsetNumber k, - j; - - TPQTGist *datum_l, - *datum_r; - int4 size_alpha, - size_beta; - int4 size_waste, - waste = -1; - int4 nbytes; - OffsetNumber seed_1 = 0, - seed_2 = 0; - OffsetNumber *left, - *right; - - SPLITCOST *costvector; - - nbytes = (maxoff + 2) * sizeof(OffsetNumber); - left = v->spl_left = (OffsetNumber *) palloc(nbytes); - right = v->spl_right = (OffsetNumber *) palloc(nbytes); - v->spl_nleft = v->spl_nright = 0; - - for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) - for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) - { - size_waste = hemdist(*GETENTRY(entryvec, j), *GETENTRY(entryvec, k)); - if (size_waste > waste) - { - waste = size_waste; - seed_1 = k; - seed_2 = j; - } - } - - - if (seed_1 == 0 || seed_2 == 0) - { - seed_1 = 1; - seed_2 = 2; - } - - datum_l = (TPQTGist *) palloc(sizeof(TPQTGist)); - *datum_l = *GETENTRY(entryvec, seed_1); - datum_r = (TPQTGist *) palloc(sizeof(TPQTGist)); - *datum_r = *GETENTRY(entryvec, seed_2); - - - maxoff = OffsetNumberNext(maxoff); - costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); - for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) - { - costvector[j - 1].pos = j; - size_alpha = hemdist(*GETENTRY(entryvec, seed_1), *GETENTRY(entryvec, j)); - size_beta = hemdist(*GETENTRY(entryvec, seed_2), *GETENTRY(entryvec, j)); - costvector[j - 1].cost = abs(size_alpha - size_beta); - } - qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); - - for (k = 0; k < maxoff; k++) - { - j = costvector[k].pos; - if (j == seed_1) - { - *left++ = j; - v->spl_nleft++; - continue; - } - else if (j == seed_2) - { - *right++ = j; - v->spl_nright++; - continue; - } - size_alpha = hemdist(*datum_l, *GETENTRY(entryvec, j)); - size_beta = hemdist(*datum_r, *GETENTRY(entryvec, j)); - - if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05)) - { - *datum_l |= *GETENTRY(entryvec, j); - *left++ = j; - v->spl_nleft++; - } - else - { - *datum_r |= *GETENTRY(entryvec, j); - *right++ = j; - v->spl_nright++; - } - } - - *right = *left = FirstOffsetNumber; - v->spl_ldatum = PointerGetDatum(datum_l); - v->spl_rdatum = PointerGetDatum(datum_r); - - PG_RETURN_POINTER(v); -} diff --git a/contrib/tsearch2/query_rewrite.c b/contrib/tsearch2/query_rewrite.c deleted file mode 100644 index 0604a49927..0000000000 --- a/contrib/tsearch2/query_rewrite.c +++ /dev/null @@ -1,549 +0,0 @@ -#include "postgres.h" -#include "executor/spi.h" - -#include "query_util.h" - -MemoryContext AggregateContext = NULL; - -static int -addone(int *counters, int last, int total) -{ - counters[last]++; - if (counters[last] >= total) - { - if (last == 0) - return 0; - if (addone(counters, last - 1, total - 1) == 0) - return 0; - counters[last] = counters[last - 1] + 1; - } - return 1; -} - -static QTNode * -findeq(QTNode * node, QTNode * ex, MemoryType memtype, QTNode * subs, bool *isfind) -{ - - if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val) - return node; - - if (node->flags & QTN_NOCHANGE) - return node; - - if (node->valnode->type == OPR) - { - if (node->nchild == ex->nchild) - { - if (QTNEq(node, ex)) - { - QTNFree(node); - if (subs) - { - node = QTNCopy(subs, memtype); - node->flags |= QTN_NOCHANGE; - } - else - node = NULL; - *isfind = true; - } - } - else if (node->nchild > ex->nchild) - { - int *counters = (int *) palloc(sizeof(int) * node->nchild); - int i; - QTNode *tnode = (QTNode *) MEMALLOC(memtype, sizeof(QTNode)); - - memset(tnode, 0, sizeof(QTNode)); - tnode->child = (QTNode **) MEMALLOC(memtype, sizeof(QTNode *) * ex->nchild); - tnode->nchild = ex->nchild; - tnode->valnode = (ITEM *) MEMALLOC(memtype, sizeof(ITEM)); - *(tnode->valnode) = *(ex->valnode); - - for (i = 0; i < ex->nchild; i++) - counters[i] = i; - - do - { - tnode->sign = 0; - for (i = 0; i < ex->nchild; i++) - { - tnode->child[i] = node->child[counters[i]]; - tnode->sign |= tnode->child[i]->sign; - } - - if (QTNEq(tnode, ex)) - { - int j = 0; - - MEMFREE(memtype, tnode->valnode); - MEMFREE(memtype, tnode->child); - MEMFREE(memtype, tnode); - if (subs) - { - tnode = QTNCopy(subs, memtype); - tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE; - } - else - tnode = NULL; - - node->child[counters[0]] = tnode; - - for (i = 1; i < ex->nchild; i++) - node->child[counters[i]] = NULL; - for (i = 0; i < node->nchild; i++) - { - if (node->child[i]) - { - node->child[j] = node->child[i]; - j++; - } - } - - node->nchild = j; - - *isfind = true; - - break; - } - } while (addone(counters, ex->nchild - 1, node->nchild)); - if (tnode && (tnode->flags & QTN_NOCHANGE) == 0) - { - MEMFREE(memtype, tnode->valnode); - MEMFREE(memtype, tnode->child); - MEMFREE(memtype, tnode); - } - else - QTNSort(node); - pfree(counters); - } - } - else if (QTNEq(node, ex)) - { - QTNFree(node); - if (subs) - { - node = QTNCopy(subs, memtype); - node->flags |= QTN_NOCHANGE; - } - else - { - node = NULL; - } - *isfind = true; - } - - return node; -} - -static QTNode * -dofindsubquery(QTNode * root, QTNode * ex, MemoryType memtype, QTNode * subs, bool *isfind) -{ - root = findeq(root, ex, memtype, subs, isfind); - - if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR) - { - int i; - - for (i = 0; i < root->nchild; i++) - root->child[i] = dofindsubquery(root->child[i], ex, memtype, subs, isfind); - } - - return root; -} - -static QTNode * -dropvoidsubtree(QTNode * root) -{ - - if (!root) - return NULL; - - if (root->valnode->type == OPR) - { - int i, - j = 0; - - for (i = 0; i < root->nchild; i++) - { - if (root->child[i]) - { - root->child[j] = root->child[i]; - j++; - } - } - - root->nchild = j; - - if (root->valnode->val == (int4) '!' && root->nchild == 0) - { - QTNFree(root); - root = NULL; - } - else if (root->nchild == 1) - { - QTNode *nroot = root->child[0]; - - pfree(root); - root = nroot; - } - } - - return root; -} - -static QTNode * -findsubquery(QTNode * root, QTNode * ex, MemoryType memtype, QTNode * subs, bool *isfind) -{ - bool DidFind = false; - - root = dofindsubquery(root, ex, memtype, subs, &DidFind); - - if (!subs && DidFind) - root = dropvoidsubtree(root); - - if (isfind) - *isfind = DidFind; - - return root; -} - -static Oid tsqOid = InvalidOid; -static void -get_tsq_Oid(void) -{ - int ret; - bool isnull; - - if ((ret = SPI_exec("select oid from pg_type where typname='tsquery'", 1)) < 0) - /* internal error */ - elog(ERROR, "SPI_exec to get tsquery oid returns %d", ret); - - if (SPI_processed < 1) - /* internal error */ - elog(ERROR, "there is no tsvector type"); - tsqOid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); - if (tsqOid == InvalidOid) - /* internal error */ - elog(ERROR, "tsquery type has InvalidOid"); -} - - -PG_FUNCTION_INFO_V1(tsquery_rewrite); -PG_FUNCTION_INFO_V1(rewrite_accum); -Datum rewrite_accum(PG_FUNCTION_ARGS); - -Datum -rewrite_accum(PG_FUNCTION_ARGS) -{ - QUERYTYPE *acc = (QUERYTYPE *) PG_GETARG_POINTER(0); - ArrayType *qa = (ArrayType *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); - QUERYTYPE *q; - QTNode *qex, - *subs = NULL, - *acctree; - bool isfind = false; - Datum *elemsp; - int nelemsp; - - AggregateContext = ((AggState *) fcinfo->context)->aggcontext; - - if (acc == NULL || PG_ARGISNULL(0)) - { - acc = (QUERYTYPE *) MEMALLOC(AggMemory, sizeof(QUERYTYPE)); - SET_VARSIZE(acc, HDRSIZEQT); - acc->size = 0; - } - - if (qa == NULL || PG_ARGISNULL(1)) - { - PG_FREE_IF_COPY(qa, 1); - PG_RETURN_POINTER(acc); - } - - if (ARR_NDIM(qa) != 1) - elog(ERROR, "array must be one-dimensional, not %d dimension", ARR_NDIM(qa)); - - if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3) - elog(ERROR, "array should have only three elements"); - - if (tsqOid == InvalidOid) - { - SPI_connect(); - get_tsq_Oid(); - SPI_finish(); - } - - if (ARR_ELEMTYPE(qa) != tsqOid) - elog(ERROR, "array should contain tsquery type"); - - deconstruct_array(qa, tsqOid, -1, false, 'i', &elemsp, NULL, &nelemsp); - - q = (QUERYTYPE *) DatumGetPointer(elemsp[0]); - if (q->size == 0) - { - pfree(elemsp); - PG_RETURN_POINTER(acc); - } - - if (!acc->size) - { - if (VARSIZE(acc) > HDRSIZEQT) - { - pfree(elemsp); - PG_RETURN_POINTER(acc); - } - else - acctree = QT2QTN(GETQUERY(q), GETOPERAND(q)); - } - else - acctree = QT2QTN(GETQUERY(acc), GETOPERAND(acc)); - - QTNTernary(acctree); - QTNSort(acctree); - - q = (QUERYTYPE *) DatumGetPointer(elemsp[1]); - if (q->size == 0) - { - pfree(elemsp); - PG_RETURN_POINTER(acc); - } - qex = QT2QTN(GETQUERY(q), GETOPERAND(q)); - QTNTernary(qex); - QTNSort(qex); - - q = (QUERYTYPE *) DatumGetPointer(elemsp[2]); - if (q->size) - subs = QT2QTN(GETQUERY(q), GETOPERAND(q)); - - acctree = findsubquery(acctree, qex, PlainMemory, subs, &isfind); - - if (isfind || !acc->size) - { - /* pfree( acc ); do not pfree(p), because nodeAgg.c will */ - if (acctree) - { - QTNBinary(acctree); - acc = QTN2QT(acctree, AggMemory); - } - else - { - acc = (QUERYTYPE *) MEMALLOC(AggMemory, HDRSIZEQT * 2); - SET_VARSIZE(acc, HDRSIZEQT * 2); - acc->size = 0; - } - } - - pfree(elemsp); - QTNFree(qex); - QTNFree(subs); - QTNFree(acctree); - - PG_RETURN_POINTER(acc); -} - -PG_FUNCTION_INFO_V1(rewrite_finish); -Datum rewrite_finish(PG_FUNCTION_ARGS); - -Datum -rewrite_finish(PG_FUNCTION_ARGS) -{ - QUERYTYPE *acc = (QUERYTYPE *) PG_GETARG_POINTER(0); - QUERYTYPE *rewrited; - - if (acc == NULL || PG_ARGISNULL(0) || acc->size == 0) - { - acc = (QUERYTYPE *) palloc(sizeof(QUERYTYPE)); - SET_VARSIZE(acc, HDRSIZEQT); - acc->size = 0; - } - - rewrited = (QUERYTYPE *) palloc(VARSIZE(acc)); - memcpy(rewrited, acc, VARSIZE(acc)); - pfree(acc); - - PG_RETURN_POINTER(rewrited); -} - -Datum tsquery_rewrite(PG_FUNCTION_ARGS); - -Datum -tsquery_rewrite(PG_FUNCTION_ARGS) -{ - QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); - text *in = PG_GETARG_TEXT_P(1); - QUERYTYPE *rewrited = query; - QTNode *tree; - char *buf; - void *plan; - Portal portal; - bool isnull; - int i; - - if (query->size == 0) - { - PG_FREE_IF_COPY(in, 1); - PG_RETURN_POINTER(rewrited); - } - - tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); - QTNTernary(tree); - QTNSort(tree); - - buf = (char *) palloc(VARSIZE(in)); - memcpy(buf, VARDATA(in), VARSIZE(in) - VARHDRSZ); - buf[VARSIZE(in) - VARHDRSZ] = '\0'; - - SPI_connect(); - - if (tsqOid == InvalidOid) - get_tsq_Oid(); - - if ((plan = SPI_prepare(buf, 0, NULL)) == NULL) - elog(ERROR, "SPI_prepare('%s') returns NULL", buf); - - if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL) - elog(ERROR, "SPI_cursor_open('%s') returns NULL", buf); - - SPI_cursor_fetch(portal, true, 100); - - if (SPI_tuptable->tupdesc->natts != 2) - elog(ERROR, "number of fields doesn't equal to 2"); - - if (SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tsqOid) - elog(ERROR, "column #1 isn't of tsquery type"); - - if (SPI_gettypeid(SPI_tuptable->tupdesc, 2) != tsqOid) - elog(ERROR, "column #2 isn't of tsquery type"); - - while (SPI_processed > 0 && tree) - { - for (i = 0; i < SPI_processed && tree; i++) - { - Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); - Datum sdata; - - if (isnull) - continue; - - sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull); - - if (!isnull) - { - QUERYTYPE *qtex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(qdata)); - QUERYTYPE *qtsubs = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(sdata)); - QTNode *qex, - *qsubs = NULL; - - if (qtex->size == 0) - { - if (qtex != (QUERYTYPE *) DatumGetPointer(qdata)) - pfree(qtex); - if (qtsubs != (QUERYTYPE *) DatumGetPointer(sdata)) - pfree(qtsubs); - continue; - } - - qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex)); - - QTNTernary(qex); - QTNSort(qex); - - if (qtsubs->size) - qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs)); - - tree = findsubquery(tree, qex, SPIMemory, qsubs, NULL); - - QTNFree(qex); - if (qtex != (QUERYTYPE *) DatumGetPointer(qdata)) - pfree(qtex); - QTNFree(qsubs); - if (qtsubs != (QUERYTYPE *) DatumGetPointer(sdata)) - pfree(qtsubs); - } - } - - SPI_freetuptable(SPI_tuptable); - SPI_cursor_fetch(portal, true, 100); - } - - SPI_freetuptable(SPI_tuptable); - SPI_cursor_close(portal); - SPI_freeplan(plan); - SPI_finish(); - - - if (tree) - { - QTNBinary(tree); - rewrited = QTN2QT(tree, PlainMemory); - QTNFree(tree); - PG_FREE_IF_COPY(query, 0); - } - else - { - SET_VARSIZE(rewrited, HDRSIZEQT); - rewrited->size = 0; - } - - pfree(buf); - PG_FREE_IF_COPY(in, 1); - PG_RETURN_POINTER(rewrited); -} - - -PG_FUNCTION_INFO_V1(tsquery_rewrite_query); -Datum tsquery_rewrite_query(PG_FUNCTION_ARGS); - -Datum -tsquery_rewrite_query(PG_FUNCTION_ARGS) -{ - QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); - QUERYTYPE *ex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); - QUERYTYPE *subst = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2))); - QUERYTYPE *rewrited = query; - QTNode *tree, - *qex, - *subs = NULL; - - if (query->size == 0 || ex->size == 0) - { - PG_FREE_IF_COPY(ex, 1); - PG_FREE_IF_COPY(subst, 2); - PG_RETURN_POINTER(rewrited); - } - - tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); - QTNTernary(tree); - QTNSort(tree); - - qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex)); - QTNTernary(qex); - QTNSort(qex); - - if (subst->size) - subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst)); - - tree = findsubquery(tree, qex, PlainMemory, subs, NULL); - QTNFree(qex); - QTNFree(subs); - - if (!tree) - { - SET_VARSIZE(rewrited, HDRSIZEQT); - rewrited->size = 0; - PG_FREE_IF_COPY(ex, 1); - PG_FREE_IF_COPY(subst, 2); - PG_RETURN_POINTER(rewrited); - } - else - { - QTNBinary(tree); - rewrited = QTN2QT(tree, PlainMemory); - QTNFree(tree); - } - - PG_FREE_IF_COPY(query, 0); - PG_FREE_IF_COPY(ex, 1); - PG_FREE_IF_COPY(subst, 2); - PG_RETURN_POINTER(rewrited); -} diff --git a/contrib/tsearch2/query_support.c b/contrib/tsearch2/query_support.c deleted file mode 100644 index 57332cba1e..0000000000 --- a/contrib/tsearch2/query_support.c +++ /dev/null @@ -1,205 +0,0 @@ -#include "postgres.h" -#include "fmgr.h" - -#include "query_util.h" - -PG_FUNCTION_INFO_V1(tsquery_numnode); -Datum tsquery_numnode(PG_FUNCTION_ARGS); - -Datum -tsquery_numnode(PG_FUNCTION_ARGS) -{ - QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); - int nnode = query->size; - - PG_FREE_IF_COPY(query, 0); - PG_RETURN_INT32(nnode); -} - -static QTNode * -join_tsqueries(QUERYTYPE * a, QUERYTYPE * b) -{ - QTNode *res = (QTNode *) palloc0(sizeof(QTNode)); - - res->flags |= QTN_NEEDFREE; - - res->valnode = (ITEM *) palloc0(sizeof(ITEM)); - res->valnode->type = OPR; - - res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); - res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b)); - res->child[1] = QT2QTN(GETQUERY(a), GETOPERAND(a)); - res->nchild = 2; - - return res; -} - -PG_FUNCTION_INFO_V1(tsquery_and); -Datum tsquery_and(PG_FUNCTION_ARGS); - -Datum -tsquery_and(PG_FUNCTION_ARGS) -{ - QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); - QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); - QTNode *res; - QUERYTYPE *query; - - if (a->size == 0) - { - PG_FREE_IF_COPY(a, 1); - PG_RETURN_POINTER(b); - } - else if (b->size == 0) - { - PG_FREE_IF_COPY(b, 1); - PG_RETURN_POINTER(a); - } - - res = join_tsqueries(a, b); - - res->valnode->val = '&'; - - query = QTN2QT(res, PlainMemory); - - QTNFree(res); - PG_FREE_IF_COPY(a, 0); - PG_FREE_IF_COPY(b, 1); - - PG_RETURN_POINTER(query); -} - -PG_FUNCTION_INFO_V1(tsquery_or); -Datum tsquery_or(PG_FUNCTION_ARGS); - -Datum -tsquery_or(PG_FUNCTION_ARGS) -{ - QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); - QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); - QTNode *res; - QUERYTYPE *query; - - if (a->size == 0) - { - PG_FREE_IF_COPY(a, 1); - PG_RETURN_POINTER(b); - } - else if (b->size == 0) - { - PG_FREE_IF_COPY(b, 1); - PG_RETURN_POINTER(a); - } - - res = join_tsqueries(a, b); - - res->valnode->val = '|'; - - query = QTN2QT(res, PlainMemory); - - QTNFree(res); - PG_FREE_IF_COPY(a, 0); - PG_FREE_IF_COPY(b, 1); - - PG_RETURN_POINTER(query); -} - -PG_FUNCTION_INFO_V1(tsquery_not); -Datum tsquery_not(PG_FUNCTION_ARGS); - -Datum -tsquery_not(PG_FUNCTION_ARGS) -{ - QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); - QTNode *res; - QUERYTYPE *query; - - if (a->size == 0) - PG_RETURN_POINTER(a); - - res = (QTNode *) palloc0(sizeof(QTNode)); - - res->flags |= QTN_NEEDFREE; - - res->valnode = (ITEM *) palloc0(sizeof(ITEM)); - res->valnode->type = OPR; - res->valnode->val = '!'; - - res->child = (QTNode **) palloc0(sizeof(QTNode *)); - res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a)); - res->nchild = 1; - - query = QTN2QT(res, PlainMemory); - - QTNFree(res); - PG_FREE_IF_COPY(a, 0); - - PG_RETURN_POINTER(query); -} - -static int -CompareTSQ(QUERYTYPE * a, QUERYTYPE * b) -{ - if (a->size != b->size) - { - return (a->size < b->size) ? -1 : 1; - } - else if (VARSIZE(a) != VARSIZE(b)) - { - return (VARSIZE(a) < VARSIZE(b)) ? -1 : 1; - } - else - { - QTNode *an = QT2QTN(GETQUERY(a), GETOPERAND(a)); - QTNode *bn = QT2QTN(GETQUERY(b), GETOPERAND(b)); - int res = QTNodeCompare(an, bn); - - QTNFree(an); - QTNFree(bn); - - return res; - } - - return 0; -} - -PG_FUNCTION_INFO_V1(tsquery_cmp); -\ -Datum tsquery_cmp(PG_FUNCTION_ARGS); - -Datum -tsquery_cmp(PG_FUNCTION_ARGS) -{ - QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); - QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); - int res = CompareTSQ(a, b); - - PG_FREE_IF_COPY(a, 0); - PG_FREE_IF_COPY(b, 1); - - PG_RETURN_INT32(res); -} - -#define CMPFUNC( NAME, ACTION ) \ -Datum NAME(PG_FUNCTION_ARGS); \ - \ -Datum \ -NAME(PG_FUNCTION_ARGS) { \ - QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); \ - QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); \ - int res = CompareTSQ(a,b); \ - \ - PG_FREE_IF_COPY(a,0); \ - PG_FREE_IF_COPY(b,1); \ - \ - PG_RETURN_BOOL( ACTION ); \ -} \ - \ -PG_FUNCTION_INFO_V1(NAME) - -CMPFUNC(tsquery_lt, res < 0); -CMPFUNC(tsquery_le, res <= 0); -CMPFUNC(tsquery_eq, res == 0); -CMPFUNC(tsquery_ge, res >= 0); -CMPFUNC(tsquery_gt, res > 0); -CMPFUNC(tsquery_ne, res != 0); diff --git a/contrib/tsearch2/query_util.c b/contrib/tsearch2/query_util.c deleted file mode 100644 index 083d173d13..0000000000 --- a/contrib/tsearch2/query_util.c +++ /dev/null @@ -1,301 +0,0 @@ -#include "postgres.h" -#include "executor/spi.h" -#include "query_util.h" - -QTNode * -QT2QTN(ITEM * in, char *operand) -{ - QTNode *node = (QTNode *) palloc0(sizeof(QTNode)); - - node->valnode = in; - - if (in->type == OPR) - { - node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); - node->child[0] = QT2QTN(in + 1, operand); - node->sign = node->child[0]->sign; - if (in->val == (int4) '!') - node->nchild = 1; - else - { - node->nchild = 2; - node->child[1] = QT2QTN(in + in->left, operand); - node->sign |= node->child[1]->sign; - } - } - else if (operand) - { - node->word = operand + in->distance; - node->sign = 1 << (in->val % 32); - } - - return node; -} - -void -QTNFree(QTNode * in) -{ - if (!in) - return; - - if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0) - pfree(in->word); - - if (in->child) - { - if (in->valnode) - { - if (in->valnode->type == OPR && in->nchild > 0) - { - int i; - - for (i = 0; i < in->nchild; i++) - QTNFree(in->child[i]); - } - if (in->flags & QTN_NEEDFREE) - pfree(in->valnode); - } - pfree(in->child); - } - - pfree(in); -} - -int -QTNodeCompare(QTNode * an, QTNode * bn) -{ - if (an->valnode->type != bn->valnode->type) - return (an->valnode->type > bn->valnode->type) ? -1 : 1; - else if (an->valnode->val != bn->valnode->val) - return (an->valnode->val > bn->valnode->val) ? -1 : 1; - else if (an->valnode->type == VAL) - { - if (an->valnode->length == bn->valnode->length) - return strncmp(an->word, bn->word, an->valnode->length); - else - return (an->valnode->length > bn->valnode->length) ? -1 : 1; - } - else if (an->nchild != bn->nchild) - { - return (an->nchild > bn->nchild) ? -1 : 1; - } - else - { - int i, - res; - - for (i = 0; i < an->nchild; i++) - if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0) - return res; - } - - return 0; -} - -static int -cmpQTN(const void *a, const void *b) -{ - return QTNodeCompare(*(QTNode **) a, *(QTNode **) b); -} - -void -QTNSort(QTNode * in) -{ - int i; - - if (in->valnode->type != OPR) - return; - - for (i = 0; i < in->nchild; i++) - QTNSort(in->child[i]); - if (in->nchild > 1) - qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN); -} - -bool -QTNEq(QTNode * a, QTNode * b) -{ - uint32 sign = a->sign & b->sign; - - if (!(sign == a->sign && sign == b->sign)) - return 0; - - return (QTNodeCompare(a, b) == 0) ? true : false; -} - -void -QTNTernary(QTNode * in) -{ - int i; - - if (in->valnode->type != OPR) - return; - - for (i = 0; i < in->nchild; i++) - QTNTernary(in->child[i]); - - for (i = 0; i < in->nchild; i++) - { - if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val) - { - QTNode *cc = in->child[i]; - int oldnchild = in->nchild; - - in->nchild += cc->nchild - 1; - in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *)); - - if (i + 1 != oldnchild) - memmove(in->child + i + cc->nchild, in->child + i + 1, - (oldnchild - i - 1) * sizeof(QTNode *)); - - memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *)); - i += cc->nchild - 1; - - pfree(cc); - } - } -} - -void -QTNBinary(QTNode * in) -{ - int i; - - if (in->valnode->type != OPR) - return; - - for (i = 0; i < in->nchild; i++) - QTNBinary(in->child[i]); - - if (in->nchild <= 2) - return; - - while (in->nchild > 2) - { - QTNode *nn = (QTNode *) palloc0(sizeof(QTNode)); - - nn->valnode = (ITEM *) palloc0(sizeof(ITEM)); - nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); - - nn->nchild = 2; - nn->flags = QTN_NEEDFREE; - - nn->child[0] = in->child[0]; - nn->child[1] = in->child[1]; - nn->sign = nn->child[0]->sign | nn->child[1]->sign; - - nn->valnode->type = in->valnode->type; - nn->valnode->val = in->valnode->val; - - in->child[0] = nn; - in->child[1] = in->child[in->nchild - 1]; - in->nchild--; - } -} - -static void -cntsize(QTNode * in, int4 *sumlen, int4 *nnode) -{ - *nnode += 1; - if (in->valnode->type == OPR) - { - int i; - - for (i = 0; i < in->nchild; i++) - cntsize(in->child[i], sumlen, nnode); - } - else - { - *sumlen += in->valnode->length + 1; - } -} - -typedef struct -{ - ITEM *curitem; - char *operand; - char *curoperand; -} QTN2QTState; - -static void -fillQT(QTN2QTState * state, QTNode * in) -{ - *(state->curitem) = *(in->valnode); - - if (in->valnode->type == VAL) - { - memcpy(state->curoperand, in->word, in->valnode->length); - state->curitem->distance = state->curoperand - state->operand; - state->curoperand[in->valnode->length] = '\0'; - state->curoperand += in->valnode->length + 1; - state->curitem++; - } - else - { - ITEM *curitem = state->curitem; - - Assert(in->nchild <= 2); - state->curitem++; - - fillQT(state, in->child[0]); - - if (in->nchild == 2) - { - curitem->left = state->curitem - curitem; - fillQT(state, in->child[1]); - } - } -} - -QUERYTYPE * -QTN2QT(QTNode * in, MemoryType memtype) -{ - QUERYTYPE *out; - int len; - int sumlen = 0, - nnode = 0; - QTN2QTState state; - - cntsize(in, &sumlen, &nnode); - len = COMPUTESIZE(nnode, sumlen); - - out = (QUERYTYPE *) MEMALLOC(memtype, len); - SET_VARSIZE(out, len); - out->size = nnode; - - state.curitem = GETQUERY(out); - state.operand = state.curoperand = GETOPERAND(out); - - fillQT(&state, in); - return out; -} - -QTNode * -QTNCopy(QTNode * in, MemoryType memtype) -{ - QTNode *out = (QTNode *) MEMALLOC(memtype, sizeof(QTNode)); - - *out = *in; - out->valnode = (ITEM *) MEMALLOC(memtype, sizeof(ITEM)); - *(out->valnode) = *(in->valnode); - out->flags |= QTN_NEEDFREE; - - if (in->valnode->type == VAL) - { - out->word = MEMALLOC(memtype, in->valnode->length + 1); - memcpy(out->word, in->word, in->valnode->length); - out->word[in->valnode->length] = '\0'; - out->flags |= QTN_WORDFREE; - } - else - { - int i; - - out->child = (QTNode **) MEMALLOC(memtype, sizeof(QTNode *) * in->nchild); - - for (i = 0; i < in->nchild; i++) - out->child[i] = QTNCopy(in->child[i], memtype); - } - - return out; -} diff --git a/contrib/tsearch2/query_util.h b/contrib/tsearch2/query_util.h deleted file mode 100644 index 5ed98e628d..0000000000 --- a/contrib/tsearch2/query_util.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef __QUERY_UTIL_H__ -#define __QUERY_UTIL_H__ - -#include "postgres.h" -#include "utils/memutils.h" - -#include "query.h" -#include "executor/spi.h" - -typedef struct QTNode -{ - ITEM *valnode; - uint32 flags; - int4 nchild; - char *word; - uint32 sign; - struct QTNode **child; -} QTNode; - -#define QTN_NEEDFREE 0x01 -#define QTN_NOCHANGE 0x02 -#define QTN_WORDFREE 0x04 - -typedef enum -{ - PlainMemory, - SPIMemory, - AggMemory -} MemoryType; - -QTNode *QT2QTN(ITEM * in, char *operand); -QUERYTYPE *QTN2QT(QTNode * in, MemoryType memtype); -void QTNFree(QTNode * in); -void QTNSort(QTNode * in); -void QTNTernary(QTNode * in); -void QTNBinary(QTNode * in); -int QTNodeCompare(QTNode * an, QTNode * bn); -QTNode *QTNCopy(QTNode * in, MemoryType memtype); -bool QTNEq(QTNode * a, QTNode * b); - - -extern MemoryContext AggregateContext; - -#define MEMALLOC(us, s) ( ((us)==SPIMemory) ? SPI_palloc(s) : ( ( (us)==PlainMemory ) ? palloc(s) : MemoryContextAlloc(AggregateContext, (s)) ) ) -#define MEMFREE(us, p) ( ((us)==SPIMemory) ? SPI_pfree(p) : pfree(p) ) - -#endif diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c deleted file mode 100644 index b6960c3bf3..0000000000 --- a/contrib/tsearch2/rank.c +++ /dev/null @@ -1,924 +0,0 @@ -/* - * Relevation - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include <math.h> - -#include "access/gist.h" -#include "access/itup.h" -#include "catalog/namespace.h" -#include "commands/trigger.h" -#include "executor/spi.h" -#include "fmgr.h" -#include "funcapi.h" -#include "nodes/pg_list.h" -#include "storage/bufpage.h" -#include "utils/array.h" -#include "utils/builtins.h" - -#include "tsvector.h" -#include "query.h" -#include "common.h" - -PG_FUNCTION_INFO_V1(rank); -Datum rank(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(rank_def); -Datum rank_def(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(rank_cd); -Datum rank_cd(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(rank_cd_def); -Datum rank_cd_def(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(get_covers); -Datum get_covers(PG_FUNCTION_ARGS); - -static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f}; - -#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] ) - -#define RANK_NO_NORM 0x00 -#define RANK_NORM_LOGLENGTH 0x01 -#define RANK_NORM_LENGTH 0x02 -#define RANK_NORM_EXTDIST 0x04 -#define RANK_NORM_UNIQ 0x08 -#define RANK_NORM_LOGUNIQ 0x10 -#define DEF_NORM_METHOD RANK_NO_NORM - -static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q); -static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q); - -/* - * Returns a weight of a word collocation - */ -static float4 -word_distance(int4 w) -{ - if (w > 100) - return (float4)1e-30; - - return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2)); -} - -static int -cnt_length(tsvector * t) -{ - WordEntry *ptr = ARRPTR(t), - *end = (WordEntry *) STRPTR(t); - int len = 0, - clen; - - while (ptr < end) - { - if ((clen = POSDATALEN(t, ptr)) == 0) - len += 1; - else - len += clen; - ptr++; - } - - return len; -} - -static int4 -WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) -{ - if (ptr->len == item->length) - return strncmp( - eval + ptr->pos, - qval + item->distance, - item->length); - - return (ptr->len > item->length) ? 1 : -1; -} - -static WordEntry * -find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item) -{ - WordEntry *StopLow = ARRPTR(t); - WordEntry *StopHigh = (WordEntry *) STRPTR(t); - WordEntry *StopMiddle; - int difference; - - /* Loop invariant: StopLow <= item < StopHigh */ - - while (StopLow < StopHigh) - { - StopMiddle = StopLow + (StopHigh - StopLow) / 2; - difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item); - if (difference == 0) - return StopMiddle; - else if (difference < 0) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - - return NULL; -} - - -static int -compareITEM(const void *a, const void *b, void *arg) -{ - char *operand = (char *) arg; - - if ((*(ITEM **) a)->length == (*(ITEM **) b)->length) - return strncmp(operand + (*(ITEM **) a)->distance, - operand + (*(ITEM **) b)->distance, - (*(ITEM **) b)->length); - - return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1; -} - -static ITEM ** -SortAndUniqItems(char *operand, ITEM * item, int *size) -{ - ITEM **res, - **ptr, - **prevptr; - - ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size); - - while ((*size)--) - { - if (item->type == VAL) - { - *ptr = item; - ptr++; - } - item++; - } - - *size = ptr - res; - if (*size < 2) - return res; - - qsort_arg(res, *size, sizeof(ITEM **), compareITEM, (void *) operand); - - ptr = res + 1; - prevptr = res; - - while (ptr - res < *size) - { - if (compareITEM((void *) ptr, (void *) prevptr, (void *) operand) != 0) - { - prevptr++; - *prevptr = *ptr; - } - ptr++; - } - - *size = prevptr + 1 - res; - return res; -} - -static WordEntryPos POSNULL[] = { - 0, - 0 -}; - -static float -calc_rank_and(float *w, tsvector * t, QUERYTYPE * q) -{ - uint16 **pos; - int i, - k, - l, - p; - WordEntry *entry; - WordEntryPos *post, - *ct; - int4 dimt, - lenct, - dist; - float res = -1.0; - ITEM **item; - int size = q->size; - - item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); - if (size < 2) - { - pfree(item); - return calc_rank_or(w, t, q); - } - pos = (uint16 **) palloc(sizeof(uint16 *) * q->size); - memset(pos, 0, sizeof(uint16 *) * q->size); - *(uint16 *) POSNULL = lengthof(POSNULL) - 1; - WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1); - - for (i = 0; i < size; i++) - { - entry = find_wordentry(t, q, item[i]); - if (!entry) - continue; - - if (entry->haspos) - pos[i] = (uint16 *) _POSDATAPTR(t, entry); - else - pos[i] = (uint16 *) POSNULL; - - - dimt = *(uint16 *) (pos[i]); - post = (WordEntryPos *) (pos[i] + 1); - for (k = 0; k < i; k++) - { - if (!pos[k]) - continue; - lenct = *(uint16 *) (pos[k]); - ct = (WordEntryPos *) (pos[k] + 1); - for (l = 0; l < dimt; l++) - { - for (p = 0; p < lenct; p++) - { - dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); - if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL))) - { - float curw; - - if (!dist) - dist = MAXENTRYPOS; - curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); - res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); - } - } - } - } - } - pfree(pos); - pfree(item); - return res; -} - -static float -calc_rank_or(float *w, tsvector * t, QUERYTYPE * q) -{ - WordEntry *entry; - WordEntryPos *post; - int4 dimt, - j, - i; - float res = 0.0; - ITEM **item; - int size = q->size; - - *(uint16 *) POSNULL = lengthof(POSNULL) - 1; - item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); - - for (i = 0; i < size; i++) - { - float resj, - wjm; - int4 jm; - - entry = find_wordentry(t, q, item[i]); - if (!entry) - continue; - - if (entry->haspos) - { - dimt = POSDATALEN(t, entry); - post = POSDATAPTR(t, entry); - } - else - { - dimt = *(uint16 *) POSNULL; - post = POSNULL + 1; - } - - resj = 0.0; - wjm = -1.0; - jm = 0; - for (j = 0; j < dimt; j++) - { - resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); - if (wpos(post[j]) > wjm) - { - wjm = wpos(post[j]); - jm = j; - } - } -/* - limit (sum(i/i^2),i->inf) = pi^2/6 - resj = sum(wi/i^2),i=1,noccurence, - wi - should be sorted desc, - don't sort for now, just choose maximum weight. This should be corrected - Oleg Bartunov -*/ - res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; - } - if (size > 0) - res = res / size; - pfree(item); - return res; -} - -static float -calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method) -{ - ITEM *item = GETQUERY(q); - float res = 0.0; - int len; - - if (!t->size || !q->size) - return 0.0; - - res = (item->type != VAL && item->val == (int4) '&') ? - calc_rank_and(w, t, q) : calc_rank_or(w, t, q); - - if (res < 0) - res = (float)1e-20; - - if ((method & RANK_NORM_LOGLENGTH) && t->size > 0) - res /= log((double) (cnt_length(t) + 1)) / log(2.0); - - if (method & RANK_NORM_LENGTH) - { - len = cnt_length(t); - if (len > 0) - res /= (float) len; - } - - if ((method & RANK_NORM_UNIQ) && t->size > 0) - res /= (float) (t->size); - - if ((method & RANK_NORM_LOGUNIQ) && t->size > 0) - res /= log((double) (t->size + 1)) / log(2.0); - - return res; -} - -Datum -rank(PG_FUNCTION_ARGS) -{ - ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); - QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2)); - int method = DEF_NORM_METHOD; - float res = 0.0; - float ws[lengthof(weights)]; - float4 *arrdata; - int i; - - if (ARR_NDIM(win) != 1) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("array of weight must be one-dimensional"))); - - if (ARRNELEMS(win) < lengthof(weights)) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("array of weight is too short"))); - - if (ARR_HASNULL(win)) - ereport(ERROR, - (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), - errmsg("array of weight must not contain nulls"))); - - arrdata = (float4 *) ARR_DATA_PTR(win); - for (i = 0; i < lengthof(weights); i++) - { - ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i]; - if (ws[i] > 1.0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("weight out of range"))); - } - - if (PG_NARGS() == 4) - method = PG_GETARG_INT32(3); - - res = calc_rank(ws, txt, query, method); - - PG_FREE_IF_COPY(win, 0); - PG_FREE_IF_COPY(txt, 1); - PG_FREE_IF_COPY(query, 2); - PG_RETURN_FLOAT4(res); -} - -Datum -rank_def(PG_FUNCTION_ARGS) -{ - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); - float res = 0.0; - int method = DEF_NORM_METHOD; - - if (PG_NARGS() == 3) - method = PG_GETARG_INT32(2); - - res = calc_rank(weights, txt, query, method); - - PG_FREE_IF_COPY(txt, 0); - PG_FREE_IF_COPY(query, 1); - PG_RETURN_FLOAT4(res); -} - - -typedef struct -{ - ITEM **item; - int16 nitem; - bool needfree; - uint8 wclass; - int32 pos; -} DocRepresentation; - -static int -compareDocR(const void *a, const void *b) -{ - if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos) - return 0; - return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1; -} - -static bool -checkcondition_ITEM(void *checkval, ITEM * val) -{ - return (bool) (val->istrue); -} - -static void -reset_istrue_flag(QUERYTYPE * query) -{ - ITEM *item = GETQUERY(query); - int i; - - /* reset istrue flag */ - for (i = 0; i < query->size; i++) - { - if (item->type == VAL) - item->istrue = 0; - item++; - } -} - -typedef struct -{ - int pos; - int p; - int q; - DocRepresentation *begin; - DocRepresentation *end; -} Extention; - - -static bool -Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext) -{ - DocRepresentation *ptr; - int lastpos = ext->pos; - int i; - bool found = false; - - reset_istrue_flag(query); - - ext->p = 0x7fffffff; - ext->q = 0; - ptr = doc + ext->pos; - - /* find upper bound of cover from current position, move up */ - while (ptr - doc < len) - { - for (i = 0; i < ptr->nitem; i++) - ptr->item[i]->istrue = 1; - if (TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM)) - { - if (ptr->pos > ext->q) - { - ext->q = ptr->pos; - ext->end = ptr; - lastpos = ptr - doc; - found = true; - } - break; - } - ptr++; - } - - if (!found) - return false; - - reset_istrue_flag(query); - - ptr = doc + lastpos; - - /* find lower bound of cover from founded upper bound, move down */ - while (ptr >= doc) - { - for (i = 0; i < ptr->nitem; i++) - ptr->item[i]->istrue = 1; - if (TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM)) - { - if (ptr->pos < ext->p) - { - ext->begin = ptr; - ext->p = ptr->pos; - } - break; - } - ptr--; - } - - if (ext->p <= ext->q) - { - /* - * set position for next try to next lexeme after begining of founded - * cover - */ - ext->pos = (ptr - doc) + 1; - return true; - } - - ext->pos++; - return Cover(doc, len, query, ext); -} - -static DocRepresentation * -get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen) -{ - ITEM *item = GETQUERY(query); - WordEntry *entry; - WordEntryPos *post; - int4 dimt, - j, - i; - int len = query->size * 4, - cur = 0; - DocRepresentation *doc; - char *operand; - - *(uint16 *) POSNULL = lengthof(POSNULL) - 1; - doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); - operand = GETOPERAND(query); - reset_istrue_flag(query); - - for (i = 0; i < query->size; i++) - { - if (item[i].type != VAL || item[i].istrue) - continue; - - entry = find_wordentry(txt, query, &(item[i])); - if (!entry) - continue; - - if (entry->haspos) - { - dimt = POSDATALEN(txt, entry); - post = POSDATAPTR(txt, entry); - } - else - { - dimt = *(uint16 *) POSNULL; - post = POSNULL + 1; - } - - while (cur + dimt >= len) - { - len *= 2; - doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); - } - - for (j = 0; j < dimt; j++) - { - if (j == 0) - { - ITEM *kptr, - *iptr = item + i; - int k; - - doc[cur].needfree = false; - doc[cur].nitem = 0; - doc[cur].item = (ITEM **) palloc(sizeof(ITEM *) * query->size); - - for (k = 0; k < query->size; k++) - { - kptr = item + k; - if (k == i || - (item[k].type == VAL && - compareITEM(&kptr, &iptr, operand) == 0)) - { - doc[cur].item[doc[cur].nitem] = item + k; - doc[cur].nitem++; - kptr->istrue = 1; - } - } - } - else - { - doc[cur].needfree = false; - doc[cur].nitem = doc[cur - 1].nitem; - doc[cur].item = doc[cur - 1].item; - } - doc[cur].pos = WEP_GETPOS(post[j]); - doc[cur].wclass = WEP_GETWEIGHT(post[j]); - cur++; - } - } - - *doclen = cur; - - if (cur > 0) - { - if (cur > 1) - qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR); - return doc; - } - - pfree(doc); - return NULL; -} - -static float4 -calc_rank_cd(float4 *arrdata, tsvector * txt, QUERYTYPE * query, int method) -{ - DocRepresentation *doc; - int len, - i, - doclen = 0; - Extention ext; - double Wdoc = 0.0; - double invws[lengthof(weights)]; - double SumDist = 0.0, - PrevExtPos = 0.0, - CurExtPos = 0.0; - int NExtent = 0; - - for (i = 0; i < lengthof(weights); i++) - { - invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i])); - if (invws[i] > 1.0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("weight out of range"))); - invws[i] = 1.0 / invws[i]; - } - - doc = get_docrep(txt, query, &doclen); - if (!doc) - return 0.0; - - MemSet(&ext, 0, sizeof(Extention)); - while (Cover(doc, doclen, query, &ext)) - { - double Cpos = 0.0; - double InvSum = 0.0; - int nNoise; - DocRepresentation *ptr = ext.begin; - - while (ptr <= ext.end) - { - InvSum += invws[ptr->wclass]; - ptr++; - } - - Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum; - /* - * if doc are big enough then ext.q may be equal to ext.p - * due to limit of posional information. In this case we - * approximate number of noise word as half cover's - * length - */ - nNoise = (ext.q - ext.p) - (ext.end - ext.begin); - if ( nNoise < 0 ) - nNoise = (ext.end - ext.begin) / 2; - Wdoc += Cpos / ((double) (1 + nNoise)); - - CurExtPos = ((double) (ext.q + ext.p)) / 2.0; - if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by - * zero in a case of - multiple lexize */ ) - SumDist += 1.0 / (CurExtPos - PrevExtPos); - - PrevExtPos = CurExtPos; - NExtent++; - } - - if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0) - Wdoc /= log((double) (cnt_length(txt) + 1)); - - if (method & RANK_NORM_LENGTH) - { - len = cnt_length(txt); - if (len > 0) - Wdoc /= (double) len; - } - - if ((method & RANK_NORM_EXTDIST) && SumDist > 0) - Wdoc /= ((double) NExtent) / SumDist; - - if ((method & RANK_NORM_UNIQ) && txt->size > 0) - Wdoc /= (double) (txt->size); - - if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0) - Wdoc /= log((double) (txt->size + 1)) / log(2.0); - - for (i = 0; i < doclen; i++) - if (doc[i].needfree) - pfree(doc[i].item); - pfree(doc); - - return (float4) Wdoc; -} - -Datum -rank_cd(PG_FUNCTION_ARGS) -{ - ArrayType *win; - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); - QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2)); - int method = DEF_NORM_METHOD; - float4 res; - - /* - * Pre-8.2, rank_cd took just a plain int as its first argument. - * It was a mistake to keep the same C function name while changing the - * signature, but it's too late to fix that. Instead, do a runtime test - * to make sure the expected datatype has been passed. This is needed - * to prevent core dumps if tsearch2 function definitions from an old - * database are loaded into an 8.2 server. - */ - if (get_fn_expr_argtype(fcinfo->flinfo, 0) != FLOAT4ARRAYOID) - ereport(ERROR, - (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), - errmsg("rank_cd() now takes real[] as its first argument, not integer"))); - - /* now safe to dereference the first arg */ - win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - - if (ARR_NDIM(win) != 1) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("array of weight must be one-dimensional"))); - - if (ARRNELEMS(win) < lengthof(weights)) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("array of weight is too short"))); - - if (ARR_HASNULL(win)) - ereport(ERROR, - (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), - errmsg("array of weight must not contain nulls"))); - - if (PG_NARGS() == 4) - method = PG_GETARG_INT32(3); - - res = calc_rank_cd((float4 *) ARR_DATA_PTR(win), txt, query, method); - - PG_FREE_IF_COPY(win, 0); - PG_FREE_IF_COPY(txt, 1); - PG_FREE_IF_COPY(query, 2); - - PG_RETURN_FLOAT4(res); -} - - -Datum -rank_cd_def(PG_FUNCTION_ARGS) -{ - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)); - float4 res; - - res = calc_rank_cd(weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD); - - PG_FREE_IF_COPY(txt, 0); - PG_FREE_IF_COPY(query, 1); - - PG_RETURN_FLOAT4(res); -} - -/**************debug*************/ - -typedef struct -{ - char *w; - int2 len; - int2 pos; - int2 start; - int2 finish; -} DocWord; - -static int -compareDocWord(const void *a, const void *b) -{ - if (((DocWord *) a)->pos == ((DocWord *) b)->pos) - return 0; - return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1; -} - - -Datum -get_covers(PG_FUNCTION_ARGS) -{ - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)); - WordEntry *pptr = ARRPTR(txt); - int i, - dlen = 0, - j, - cur = 0, - len = 0, - rlen; - DocWord *dw, - *dwptr; - text *out; - char *cptr; - DocRepresentation *doc; - int olddwpos = 0; - int ncover = 1; - Extention ext; - - doc = get_docrep(txt, query, &rlen); - - if (!doc) - { - out = palloc(VARHDRSZ); - SET_VARSIZE(out, VARHDRSZ); - PG_FREE_IF_COPY(txt, 0); - PG_FREE_IF_COPY(query, 1); - PG_RETURN_POINTER(out); - } - - for (i = 0; i < txt->size; i++) - { - if (!pptr[i].haspos) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("no pos info"))); - dlen += POSDATALEN(txt, &(pptr[i])); - } - - dwptr = dw = palloc(sizeof(DocWord) * dlen); - memset(dw, 0, sizeof(DocWord) * dlen); - - for (i = 0; i < txt->size; i++) - { - WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i])); - - for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++) - { - dw[cur].w = STRPTR(txt) + pptr[i].pos; - dw[cur].len = pptr[i].len; - dw[cur].pos = WEP_GETPOS(posdata[j]); - cur++; - } - len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i])); - } - qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord); - - MemSet(&ext, 0, sizeof(Extention)); - while (Cover(doc, rlen, query, &ext)) - { - dwptr = dw + olddwpos; - while (dwptr->pos < ext.p && dwptr - dw < dlen) - dwptr++; - olddwpos = dwptr - dw; - dwptr->start = ncover; - while (dwptr->pos < ext.q + 1 && dwptr - dw < dlen) - dwptr++; - (dwptr - 1)->finish = ncover; - len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ; - ncover++; - } - - out = palloc(VARHDRSZ + len); - cptr = ((char *) out) + VARHDRSZ; - dwptr = dw; - - while (dwptr - dw < dlen) - { - if (dwptr->start) - { - sprintf(cptr, "{%d ", dwptr->start); - cptr = strchr(cptr, '\0'); - } - memcpy(cptr, dwptr->w, dwptr->len); - cptr += dwptr->len; - *cptr = ' '; - cptr++; - if (dwptr->finish) - { - sprintf(cptr, "}%d ", dwptr->finish); - cptr = strchr(cptr, '\0'); - } - dwptr++; - } - - SET_VARSIZE(out, cptr - ((char *) out)); - - pfree(dw); - for (i = 0; i < rlen; i++) - if (doc[i].needfree) - pfree(doc[i].item); - pfree(doc); - - PG_FREE_IF_COPY(txt, 0); - PG_FREE_IF_COPY(query, 1); - PG_RETURN_POINTER(out); -} diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c deleted file mode 100644 index 9aa0e2214f..0000000000 --- a/contrib/tsearch2/snmap.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * simple but fast map from str to Oid - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include "snmap.h" -#include "common.h" - -static int -compareSNMapEntry(const void *a, const void *b) -{ - if (((SNMapEntry *) a)->nsp < ((SNMapEntry *) b)->nsp) - return -1; - else if (((SNMapEntry *) a)->nsp > ((SNMapEntry *) b)->nsp) - return 1; - else - return strcmp(((SNMapEntry *) a)->key, ((SNMapEntry *) b)->key); -} - -void -addSNMap(SNMap * map, char *key, Oid value) -{ - if (map->len >= map->reallen) - { - SNMapEntry *tmp; - int len = (map->reallen) ? 2 * map->reallen : 16; - - tmp = (SNMapEntry *) realloc(map->list, sizeof(SNMapEntry) * len); - if (!tmp) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - map->reallen = len; - map->list = tmp; - } - map->list[map->len].key = strdup(key); - if (!map->list[map->len].key) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - map->list[map->len].nsp = get_oidnamespace(TSNSP_FunctionOid); - map->list[map->len].value = value; - map->len++; - if (map->len > 1) - qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry); -} - -void -addSNMap_t(SNMap * map, text *key, Oid value) -{ - char *k = text2char(key); - - addSNMap(map, k, value); - pfree(k); -} - -Oid -findSNMap(SNMap * map, char *key) -{ - SNMapEntry *ptr; - SNMapEntry ks; - - ks.key = key; - ks.nsp = get_oidnamespace(TSNSP_FunctionOid); - ks.value = 0; - - if (map->len == 0 || !map->list) - return 0; - ptr = (SNMapEntry *) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry); - return (ptr) ? ptr->value : 0; -} - -Oid -findSNMap_t(SNMap * map, text *key) -{ - char *k = text2char(key); - int res; - - res = findSNMap(map, k); - pfree(k); - return res; -} - -void -freeSNMap(SNMap * map) -{ - SNMapEntry *entry = map->list; - - if (map->list) - { - while (map->len) - { - if (entry->key) - free(entry->key); - entry++; - map->len--; - } - free(map->list); - } - memset(map, 0, sizeof(SNMap)); -} diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h deleted file mode 100644 index c3e80fca57..0000000000 --- a/contrib/tsearch2/snmap.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef __SNMAP_H__ -#define __SNMAP_H__ - -#include "postgres.h" - -typedef struct -{ - char *key; - Oid value; - Oid nsp; -} SNMapEntry; - -typedef struct -{ - int len; - int reallen; - SNMapEntry *list; -} SNMap; - -void addSNMap(SNMap * map, char *key, Oid value); -void addSNMap_t(SNMap * map, text *key, Oid value); -Oid findSNMap(SNMap * map, char *key); -Oid findSNMap_t(SNMap * map, text *key); -void freeSNMap(SNMap * map); - -#endif diff --git a/contrib/tsearch2/snowball/Makefile b/contrib/tsearch2/snowball/Makefile deleted file mode 100644 index dd5c84d723..0000000000 --- a/contrib/tsearch2/snowball/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# $PostgreSQL: pgsql/contrib/tsearch2/snowball/Makefile,v 1.10 2007/06/26 22:05:03 tgl Exp $ - -SUBOBJS = english_stem.o api.o russian_stem.o russian_stem_UTF8.o utilities.o - -EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) - -PG_CPPFLAGS = -I$(srcdir)/.. - -ifdef USE_PGXS -PG_CONFIG = pg_config -PGXS := $(shell $(PG_CONFIG) --pgxs) -include $(PGXS) -else -subdir = contrib/tsearch2/snowball -top_builddir = ../../.. -include $(top_builddir)/src/Makefile.global -include $(top_srcdir)/contrib/contrib-global.mk -endif - -override CFLAGS += $(CFLAGS_SL) - -all: SUBSYS.o - -SUBSYS.o: $(SUBOBJS) - $(LD) $(LDREL) $(LDOUT) $@ $^ - - diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c deleted file mode 100644 index 78e4fe0eef..0000000000 --- a/contrib/tsearch2/snowball/api.c +++ /dev/null @@ -1,85 +0,0 @@ - -#include <stdlib.h> /* for calloc, free */ -#include "header.h" - -extern struct SN_env * -SN_create_env(int S_size, int I_size, int B_size) -{ - struct SN_env *z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); - - if (z == NULL) - return NULL; - z->p = create_s(); - if (z->p == NULL) - goto error; - if (S_size) - { - int i; - - z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); - if (z->S == NULL) - goto error; - - for (i = 0; i < S_size; i++) - { - z->S[i] = create_s(); - if (z->S[i] == NULL) - goto error; - } - z->S_size = S_size; - } - - if (I_size) - { - z->I = (int *) calloc(I_size, sizeof(int)); - if (z->I == NULL) - goto error; - z->I_size = I_size; - } - - if (B_size) - { - z->B = (symbol *) calloc(B_size, sizeof(symbol)); - if (z->B == NULL) - goto error; - z->B_size = B_size; - } - - return z; -error: - SN_close_env(z); - return NULL; -} - -extern void -SN_close_env(struct SN_env * z) -{ - if (z == NULL) - return; - if (z->S_size) - { - int i; - - for (i = 0; i < z->S_size; i++) - { - lose_s(z->S[i]); - } - free(z->S); - } - if (z->I_size) - free(z->I); - if (z->B_size) - free(z->B); - if (z->p) - lose_s(z->p); - free(z); -} - -extern int -SN_set_current(struct SN_env * z, int size, const symbol * s) -{ - int err = replace_s(z, 0, z->l, size, s, NULL); - - z->c = 0; - return err; -} diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h deleted file mode 100644 index a66935efcc..0000000000 --- a/contrib/tsearch2/snowball/api.h +++ /dev/null @@ -1,34 +0,0 @@ - -typedef unsigned char symbol; - -/* Or replace 'char' above with 'short' for 16 bit characters. - - More precisely, replace 'char' with whatever type guarantees the - character width you need. Note however that sizeof(symbol) should divide - HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise - there is an alignment problem. In the unlikely event of a problem here, - consult Martin Porter. - -*/ - -struct SN_env -{ - symbol *p; - int c; - int a; - int l; - int lb; - int bra; - int ket; - int S_size; - int I_size; - int B_size; - symbol **S; - int *I; - symbol *B; -}; - -extern struct SN_env *SN_create_env(int S_size, int I_size, int B_size); -extern void SN_close_env(struct SN_env * z); - -extern int SN_set_current(struct SN_env * z, int size, const symbol * s); diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c deleted file mode 100644 index 9f6f65491c..0000000000 --- a/contrib/tsearch2/snowball/english_stem.c +++ /dev/null @@ -1,1623 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/snowball/english_stem.c,v 1.8 2006/03/11 04:38:30 momjian Exp $ */ - -/* This file was generated automatically by the Snowball to ANSI C compiler */ - -#include "header.h" - -extern int english_ISO_8859_1_stem(struct SN_env * z); -static int r_exception2(struct SN_env * z); -static int r_exception1(struct SN_env * z); -static int r_Step_5(struct SN_env * z); -static int r_Step_4(struct SN_env * z); -static int r_Step_3(struct SN_env * z); -static int r_Step_2(struct SN_env * z); -static int r_Step_1c(struct SN_env * z); -static int r_Step_1b(struct SN_env * z); -static int r_Step_1a(struct SN_env * z); -static int r_R2(struct SN_env * z); -static int r_R1(struct SN_env * z); -static int r_shortv(struct SN_env * z); -static int r_mark_regions(struct SN_env * z); -static int r_postlude(struct SN_env * z); -static int r_prelude(struct SN_env * z); - -extern struct SN_env *english_ISO_8859_1_create_env(void); -extern void english_ISO_8859_1_close_env(struct SN_env * z); - -static symbol s_0_0[6] = {'c', 'o', 'm', 'm', 'u', 'n'}; -static symbol s_0_1[5] = {'g', 'e', 'n', 'e', 'r'}; - -static struct among a_0[2] = -{ - /* 0 */ {6, s_0_0, -1, -1, 0}, - /* 1 */ {5, s_0_1, -1, -1, 0} -}; - -static symbol s_1_0[1] = {'\''}; -static symbol s_1_1[3] = {'\'', 's', '\''}; -static symbol s_1_2[2] = {'\'', 's'}; - -static struct among a_1[3] = -{ - /* 0 */ {1, s_1_0, -1, 1, 0}, - /* 1 */ {3, s_1_1, 0, 1, 0}, - /* 2 */ {2, s_1_2, -1, 1, 0} -}; - -static symbol s_2_0[3] = {'i', 'e', 'd'}; -static symbol s_2_1[1] = {'s'}; -static symbol s_2_2[3] = {'i', 'e', 's'}; -static symbol s_2_3[4] = {'s', 's', 'e', 's'}; -static symbol s_2_4[2] = {'s', 's'}; -static symbol s_2_5[2] = {'u', 's'}; - -static struct among a_2[6] = -{ - /* 0 */ {3, s_2_0, -1, 2, 0}, - /* 1 */ {1, s_2_1, -1, 3, 0}, - /* 2 */ {3, s_2_2, 1, 2, 0}, - /* 3 */ {4, s_2_3, 1, 1, 0}, - /* 4 */ {2, s_2_4, 1, -1, 0}, - /* 5 */ {2, s_2_5, 1, -1, 0} -}; - -static symbol s_3_1[2] = {'b', 'b'}; -static symbol s_3_2[2] = {'d', 'd'}; -static symbol s_3_3[2] = {'f', 'f'}; -static symbol s_3_4[2] = {'g', 'g'}; -static symbol s_3_5[2] = {'b', 'l'}; -static symbol s_3_6[2] = {'m', 'm'}; -static symbol s_3_7[2] = {'n', 'n'}; -static symbol s_3_8[2] = {'p', 'p'}; -static symbol s_3_9[2] = {'r', 'r'}; -static symbol s_3_10[2] = {'a', 't'}; -static symbol s_3_11[2] = {'t', 't'}; -static symbol s_3_12[2] = {'i', 'z'}; - -static struct among a_3[13] = -{ - /* 0 */ {0, 0, -1, 3, 0}, - /* 1 */ {2, s_3_1, 0, 2, 0}, - /* 2 */ {2, s_3_2, 0, 2, 0}, - /* 3 */ {2, s_3_3, 0, 2, 0}, - /* 4 */ {2, s_3_4, 0, 2, 0}, - /* 5 */ {2, s_3_5, 0, 1, 0}, - /* 6 */ {2, s_3_6, 0, 2, 0}, - /* 7 */ {2, s_3_7, 0, 2, 0}, - /* 8 */ {2, s_3_8, 0, 2, 0}, - /* 9 */ {2, s_3_9, 0, 2, 0}, - /* 10 */ {2, s_3_10, 0, 1, 0}, - /* 11 */ {2, s_3_11, 0, 2, 0}, - /* 12 */ {2, s_3_12, 0, 1, 0} -}; - -static symbol s_4_0[2] = {'e', 'd'}; -static symbol s_4_1[3] = {'e', 'e', 'd'}; -static symbol s_4_2[3] = {'i', 'n', 'g'}; -static symbol s_4_3[4] = {'e', 'd', 'l', 'y'}; -static symbol s_4_4[5] = {'e', 'e', 'd', 'l', 'y'}; -static symbol s_4_5[5] = {'i', 'n', 'g', 'l', 'y'}; - -static struct among a_4[6] = -{ - /* 0 */ {2, s_4_0, -1, 2, 0}, - /* 1 */ {3, s_4_1, 0, 1, 0}, - /* 2 */ {3, s_4_2, -1, 2, 0}, - /* 3 */ {4, s_4_3, -1, 2, 0}, - /* 4 */ {5, s_4_4, 3, 1, 0}, - /* 5 */ {5, s_4_5, -1, 2, 0} -}; - -static symbol s_5_0[4] = {'a', 'n', 'c', 'i'}; -static symbol s_5_1[4] = {'e', 'n', 'c', 'i'}; -static symbol s_5_2[3] = {'o', 'g', 'i'}; -static symbol s_5_3[2] = {'l', 'i'}; -static symbol s_5_4[3] = {'b', 'l', 'i'}; -static symbol s_5_5[4] = {'a', 'b', 'l', 'i'}; -static symbol s_5_6[4] = {'a', 'l', 'l', 'i'}; -static symbol s_5_7[5] = {'f', 'u', 'l', 'l', 'i'}; -static symbol s_5_8[6] = {'l', 'e', 's', 's', 'l', 'i'}; -static symbol s_5_9[5] = {'o', 'u', 's', 'l', 'i'}; -static symbol s_5_10[5] = {'e', 'n', 't', 'l', 'i'}; -static symbol s_5_11[5] = {'a', 'l', 'i', 't', 'i'}; -static symbol s_5_12[6] = {'b', 'i', 'l', 'i', 't', 'i'}; -static symbol s_5_13[5] = {'i', 'v', 'i', 't', 'i'}; -static symbol s_5_14[6] = {'t', 'i', 'o', 'n', 'a', 'l'}; -static symbol s_5_15[7] = {'a', 't', 'i', 'o', 'n', 'a', 'l'}; -static symbol s_5_16[5] = {'a', 'l', 'i', 's', 'm'}; -static symbol s_5_17[5] = {'a', 't', 'i', 'o', 'n'}; -static symbol s_5_18[7] = {'i', 'z', 'a', 't', 'i', 'o', 'n'}; -static symbol s_5_19[4] = {'i', 'z', 'e', 'r'}; -static symbol s_5_20[4] = {'a', 't', 'o', 'r'}; -static symbol s_5_21[7] = {'i', 'v', 'e', 'n', 'e', 's', 's'}; -static symbol s_5_22[7] = {'f', 'u', 'l', 'n', 'e', 's', 's'}; -static symbol s_5_23[7] = {'o', 'u', 's', 'n', 'e', 's', 's'}; - -static struct among a_5[24] = -{ - /* 0 */ {4, s_5_0, -1, 3, 0}, - /* 1 */ {4, s_5_1, -1, 2, 0}, - /* 2 */ {3, s_5_2, -1, 13, 0}, - /* 3 */ {2, s_5_3, -1, 16, 0}, - /* 4 */ {3, s_5_4, 3, 12, 0}, - /* 5 */ {4, s_5_5, 4, 4, 0}, - /* 6 */ {4, s_5_6, 3, 8, 0}, - /* 7 */ {5, s_5_7, 3, 14, 0}, - /* 8 */ {6, s_5_8, 3, 15, 0}, - /* 9 */ {5, s_5_9, 3, 10, 0}, - /* 10 */ {5, s_5_10, 3, 5, 0}, - /* 11 */ {5, s_5_11, -1, 8, 0}, - /* 12 */ {6, s_5_12, -1, 12, 0}, - /* 13 */ {5, s_5_13, -1, 11, 0}, - /* 14 */ {6, s_5_14, -1, 1, 0}, - /* 15 */ {7, s_5_15, 14, 7, 0}, - /* 16 */ {5, s_5_16, -1, 8, 0}, - /* 17 */ {5, s_5_17, -1, 7, 0}, - /* 18 */ {7, s_5_18, 17, 6, 0}, - /* 19 */ {4, s_5_19, -1, 6, 0}, - /* 20 */ {4, s_5_20, -1, 7, 0}, - /* 21 */ {7, s_5_21, -1, 11, 0}, - /* 22 */ {7, s_5_22, -1, 9, 0}, - /* 23 */ {7, s_5_23, -1, 10, 0} -}; - -static symbol s_6_0[5] = {'i', 'c', 'a', 't', 'e'}; -static symbol s_6_1[5] = {'a', 't', 'i', 'v', 'e'}; -static symbol s_6_2[5] = {'a', 'l', 'i', 'z', 'e'}; -static symbol s_6_3[5] = {'i', 'c', 'i', 't', 'i'}; -static symbol s_6_4[4] = {'i', 'c', 'a', 'l'}; -static symbol s_6_5[6] = {'t', 'i', 'o', 'n', 'a', 'l'}; -static symbol s_6_6[7] = {'a', 't', 'i', 'o', 'n', 'a', 'l'}; -static symbol s_6_7[3] = {'f', 'u', 'l'}; -static symbol s_6_8[4] = {'n', 'e', 's', 's'}; - -static struct among a_6[9] = -{ - /* 0 */ {5, s_6_0, -1, 4, 0}, - /* 1 */ {5, s_6_1, -1, 6, 0}, - /* 2 */ {5, s_6_2, -1, 3, 0}, - /* 3 */ {5, s_6_3, -1, 4, 0}, - /* 4 */ {4, s_6_4, -1, 4, 0}, - /* 5 */ {6, s_6_5, -1, 1, 0}, - /* 6 */ {7, s_6_6, 5, 2, 0}, - /* 7 */ {3, s_6_7, -1, 5, 0}, - /* 8 */ {4, s_6_8, -1, 5, 0} -}; - -static symbol s_7_0[2] = {'i', 'c'}; -static symbol s_7_1[4] = {'a', 'n', 'c', 'e'}; -static symbol s_7_2[4] = {'e', 'n', 'c', 'e'}; -static symbol s_7_3[4] = {'a', 'b', 'l', 'e'}; -static symbol s_7_4[4] = {'i', 'b', 'l', 'e'}; -static symbol s_7_5[3] = {'a', 't', 'e'}; -static symbol s_7_6[3] = {'i', 'v', 'e'}; -static symbol s_7_7[3] = {'i', 'z', 'e'}; -static symbol s_7_8[3] = {'i', 't', 'i'}; -static symbol s_7_9[2] = {'a', 'l'}; -static symbol s_7_10[3] = {'i', 's', 'm'}; -static symbol s_7_11[3] = {'i', 'o', 'n'}; -static symbol s_7_12[2] = {'e', 'r'}; -static symbol s_7_13[3] = {'o', 'u', 's'}; -static symbol s_7_14[3] = {'a', 'n', 't'}; -static symbol s_7_15[3] = {'e', 'n', 't'}; -static symbol s_7_16[4] = {'m', 'e', 'n', 't'}; -static symbol s_7_17[5] = {'e', 'm', 'e', 'n', 't'}; - -static struct among a_7[18] = -{ - /* 0 */ {2, s_7_0, -1, 1, 0}, - /* 1 */ {4, s_7_1, -1, 1, 0}, - /* 2 */ {4, s_7_2, -1, 1, 0}, - /* 3 */ {4, s_7_3, -1, 1, 0}, - /* 4 */ {4, s_7_4, -1, 1, 0}, - /* 5 */ {3, s_7_5, -1, 1, 0}, - /* 6 */ {3, s_7_6, -1, 1, 0}, - /* 7 */ {3, s_7_7, -1, 1, 0}, - /* 8 */ {3, s_7_8, -1, 1, 0}, - /* 9 */ {2, s_7_9, -1, 1, 0}, - /* 10 */ {3, s_7_10, -1, 1, 0}, - /* 11 */ {3, s_7_11, -1, 2, 0}, - /* 12 */ {2, s_7_12, -1, 1, 0}, - /* 13 */ {3, s_7_13, -1, 1, 0}, - /* 14 */ {3, s_7_14, -1, 1, 0}, - /* 15 */ {3, s_7_15, -1, 1, 0}, - /* 16 */ {4, s_7_16, 15, 1, 0}, - /* 17 */ {5, s_7_17, 16, 1, 0} -}; - -static symbol s_8_0[1] = {'e'}; -static symbol s_8_1[1] = {'l'}; - -static struct among a_8[2] = -{ - /* 0 */ {1, s_8_0, -1, 1, 0}, - /* 1 */ {1, s_8_1, -1, 2, 0} -}; - -static symbol s_9_0[7] = {'s', 'u', 'c', 'c', 'e', 'e', 'd'}; -static symbol s_9_1[7] = {'p', 'r', 'o', 'c', 'e', 'e', 'd'}; -static symbol s_9_2[6] = {'e', 'x', 'c', 'e', 'e', 'd'}; -static symbol s_9_3[7] = {'c', 'a', 'n', 'n', 'i', 'n', 'g'}; -static symbol s_9_4[6] = {'i', 'n', 'n', 'i', 'n', 'g'}; -static symbol s_9_5[7] = {'e', 'a', 'r', 'r', 'i', 'n', 'g'}; -static symbol s_9_6[7] = {'h', 'e', 'r', 'r', 'i', 'n', 'g'}; -static symbol s_9_7[6] = {'o', 'u', 't', 'i', 'n', 'g'}; - -static struct among a_9[8] = -{ - /* 0 */ {7, s_9_0, -1, -1, 0}, - /* 1 */ {7, s_9_1, -1, -1, 0}, - /* 2 */ {6, s_9_2, -1, -1, 0}, - /* 3 */ {7, s_9_3, -1, -1, 0}, - /* 4 */ {6, s_9_4, -1, -1, 0}, - /* 5 */ {7, s_9_5, -1, -1, 0}, - /* 6 */ {7, s_9_6, -1, -1, 0}, - /* 7 */ {6, s_9_7, -1, -1, 0} -}; - -static symbol s_10_0[5] = {'a', 'n', 'd', 'e', 's'}; -static symbol s_10_1[5] = {'a', 't', 'l', 'a', 's'}; -static symbol s_10_2[4] = {'b', 'i', 'a', 's'}; -static symbol s_10_3[6] = {'c', 'o', 's', 'm', 'o', 's'}; -static symbol s_10_4[5] = {'d', 'y', 'i', 'n', 'g'}; -static symbol s_10_5[5] = {'e', 'a', 'r', 'l', 'y'}; -static symbol s_10_6[6] = {'g', 'e', 'n', 't', 'l', 'y'}; -static symbol s_10_7[4] = {'h', 'o', 'w', 'e'}; -static symbol s_10_8[4] = {'i', 'd', 'l', 'y'}; -static symbol s_10_9[5] = {'l', 'y', 'i', 'n', 'g'}; -static symbol s_10_10[4] = {'n', 'e', 'w', 's'}; -static symbol s_10_11[4] = {'o', 'n', 'l', 'y'}; -static symbol s_10_12[6] = {'s', 'i', 'n', 'g', 'l', 'y'}; -static symbol s_10_13[5] = {'s', 'k', 'i', 'e', 's'}; -static symbol s_10_14[4] = {'s', 'k', 'i', 's'}; -static symbol s_10_15[3] = {'s', 'k', 'y'}; -static symbol s_10_16[5] = {'t', 'y', 'i', 'n', 'g'}; -static symbol s_10_17[4] = {'u', 'g', 'l', 'y'}; - -static struct among a_10[18] = -{ - /* 0 */ {5, s_10_0, -1, -1, 0}, - /* 1 */ {5, s_10_1, -1, -1, 0}, - /* 2 */ {4, s_10_2, -1, -1, 0}, - /* 3 */ {6, s_10_3, -1, -1, 0}, - /* 4 */ {5, s_10_4, -1, 3, 0}, - /* 5 */ {5, s_10_5, -1, 9, 0}, - /* 6 */ {6, s_10_6, -1, 7, 0}, - /* 7 */ {4, s_10_7, -1, -1, 0}, - /* 8 */ {4, s_10_8, -1, 6, 0}, - /* 9 */ {5, s_10_9, -1, 4, 0}, - /* 10 */ {4, s_10_10, -1, -1, 0}, - /* 11 */ {4, s_10_11, -1, 10, 0}, - /* 12 */ {6, s_10_12, -1, 11, 0}, - /* 13 */ {5, s_10_13, -1, 2, 0}, - /* 14 */ {4, s_10_14, -1, 1, 0}, - /* 15 */ {3, s_10_15, -1, -1, 0}, - /* 16 */ {5, s_10_16, -1, 5, 0}, - /* 17 */ {4, s_10_17, -1, 8, 0} -}; - -static unsigned char g_v[] = {17, 65, 16, 1}; - -static unsigned char g_v_WXY[] = {1, 17, 65, 208, 1}; - -static unsigned char g_valid_LI[] = {55, 141, 2}; - -static symbol s_0[] = {'\''}; -static symbol s_1[] = {'y'}; -static symbol s_2[] = {'Y'}; -static symbol s_3[] = {'y'}; -static symbol s_4[] = {'Y'}; -static symbol s_5[] = {'s', 's'}; -static symbol s_6[] = {'i', 'e'}; -static symbol s_7[] = {'i'}; -static symbol s_8[] = {'e', 'e'}; -static symbol s_9[] = {'e'}; -static symbol s_10[] = {'e'}; -static symbol s_11[] = {'y'}; -static symbol s_12[] = {'Y'}; -static symbol s_13[] = {'i'}; -static symbol s_14[] = {'t', 'i', 'o', 'n'}; -static symbol s_15[] = {'e', 'n', 'c', 'e'}; -static symbol s_16[] = {'a', 'n', 'c', 'e'}; -static symbol s_17[] = {'a', 'b', 'l', 'e'}; -static symbol s_18[] = {'e', 'n', 't'}; -static symbol s_19[] = {'i', 'z', 'e'}; -static symbol s_20[] = {'a', 't', 'e'}; -static symbol s_21[] = {'a', 'l'}; -static symbol s_22[] = {'f', 'u', 'l'}; -static symbol s_23[] = {'o', 'u', 's'}; -static symbol s_24[] = {'i', 'v', 'e'}; -static symbol s_25[] = {'b', 'l', 'e'}; -static symbol s_26[] = {'l'}; -static symbol s_27[] = {'o', 'g'}; -static symbol s_28[] = {'f', 'u', 'l'}; -static symbol s_29[] = {'l', 'e', 's', 's'}; -static symbol s_30[] = {'t', 'i', 'o', 'n'}; -static symbol s_31[] = {'a', 't', 'e'}; -static symbol s_32[] = {'a', 'l'}; -static symbol s_33[] = {'i', 'c'}; -static symbol s_34[] = {'s'}; -static symbol s_35[] = {'t'}; -static symbol s_36[] = {'l'}; -static symbol s_37[] = {'s', 'k', 'i'}; -static symbol s_38[] = {'s', 'k', 'y'}; -static symbol s_39[] = {'d', 'i', 'e'}; -static symbol s_40[] = {'l', 'i', 'e'}; -static symbol s_41[] = {'t', 'i', 'e'}; -static symbol s_42[] = {'i', 'd', 'l'}; -static symbol s_43[] = {'g', 'e', 'n', 't', 'l'}; -static symbol s_44[] = {'u', 'g', 'l', 'i'}; -static symbol s_45[] = {'e', 'a', 'r', 'l', 'i'}; -static symbol s_46[] = {'o', 'n', 'l', 'i'}; -static symbol s_47[] = {'s', 'i', 'n', 'g', 'l'}; -static symbol s_48[] = {'Y'}; -static symbol s_49[] = {'y'}; - -static int -r_prelude(struct SN_env * z) -{ - z->B[0] = 0; /* unset Y_found, line 26 */ - { - int c = z->c; /* do, line 27 */ - - z->bra = z->c; /* [, line 27 */ - if (!(eq_s(z, 1, s_0))) - goto lab0; - z->ket = z->c; /* ], line 27 */ - { - int ret; - - ret = slice_del(z); /* delete, line 27 */ - if (ret < 0) - return ret; - } -lab0: - z->c = c; - } - { - int c = z->c; /* do, line 28 */ - - z->bra = z->c; /* [, line 28 */ - if (!(eq_s(z, 1, s_1))) - goto lab1; - z->ket = z->c; /* ], line 28 */ - if (!(in_grouping(z, g_v, 97, 121))) - goto lab1; - { - int ret; - - ret = slice_from_s(z, 1, s_2); /* <-, line 28 */ - if (ret < 0) - return ret; - } - z->B[0] = 1; /* set Y_found, line 28 */ -lab1: - z->c = c; - } - { - int c = z->c; /* do, line 29 */ - - while (1) - { /* repeat, line 29 */ - int c = z->c; - - while (1) - { /* goto, line 29 */ - int c = z->c; - - if (!(in_grouping(z, g_v, 97, 121))) - goto lab4; - z->bra = z->c; /* [, line 29 */ - if (!(eq_s(z, 1, s_3))) - goto lab4; - z->ket = z->c; /* ], line 29 */ - z->c = c; - break; - lab4: - z->c = c; - if (z->c >= z->l) - goto lab3; - z->c++; /* goto, line 29 */ - } - { - int ret; - - ret = slice_from_s(z, 1, s_4); /* <-, line 29 */ - if (ret < 0) - return ret; - } - z->B[0] = 1; /* set Y_found, line 29 */ - continue; - lab3: - z->c = c; - break; - } - z->c = c; - } - return 1; -} - -static int -r_mark_regions(struct SN_env * z) -{ - z->I[0] = z->l; - z->I[1] = z->l; - { - int c = z->c; /* do, line 35 */ - - { - int c = z->c; /* or, line 40 */ - - if (!(find_among(z, a_0, 2))) - goto lab2; /* among, line 36 */ - goto lab1; - lab2: - z->c = c; - while (1) - { /* gopast, line 40 */ - if (!(in_grouping(z, g_v, 97, 121))) - goto lab3; - break; - lab3: - if (z->c >= z->l) - goto lab0; - z->c++; /* gopast, line 40 */ - } - while (1) - { /* gopast, line 40 */ - if (!(out_grouping(z, g_v, 97, 121))) - goto lab4; - break; - lab4: - if (z->c >= z->l) - goto lab0; - z->c++; /* gopast, line 40 */ - } - } -lab1: - z->I[0] = z->c; /* setmark p1, line 41 */ - while (1) - { /* gopast, line 42 */ - if (!(in_grouping(z, g_v, 97, 121))) - goto lab5; - break; - lab5: - if (z->c >= z->l) - goto lab0; - z->c++; /* gopast, line 42 */ - } - while (1) - { /* gopast, line 42 */ - if (!(out_grouping(z, g_v, 97, 121))) - goto lab6; - break; - lab6: - if (z->c >= z->l) - goto lab0; - z->c++; /* gopast, line 42 */ - } - z->I[1] = z->c; /* setmark p2, line 42 */ -lab0: - z->c = c; - } - return 1; -} - -static int -r_shortv(struct SN_env * z) -{ - { - int m = z->l - z->c; - - (void) m; /* or, line 50 */ - if (!(out_grouping_b(z, g_v_WXY, 89, 121))) - goto lab1; - if (!(in_grouping_b(z, g_v, 97, 121))) - goto lab1; - if (!(out_grouping_b(z, g_v, 97, 121))) - goto lab1; - goto lab0; -lab1: - z->c = z->l - m; - if (!(out_grouping_b(z, g_v, 97, 121))) - return 0; - if (!(in_grouping_b(z, g_v, 97, 121))) - return 0; - if (z->c > z->lb) - return 0; /* atlimit, line 51 */ - } -lab0: - return 1; -} - -static int -r_R1(struct SN_env * z) -{ - if (!(z->I[0] <= z->c)) - return 0; - return 1; -} - -static int -r_R2(struct SN_env * z) -{ - if (!(z->I[1] <= z->c)) - return 0; - return 1; -} - -static int -r_Step_1a(struct SN_env * z) -{ - int among_var; - - { - int m = z->l - z->c; - - (void) m; /* try, line 58 */ - z->ket = z->c; /* [, line 59 */ - among_var = find_among_b(z, a_1, 3); /* substring, line 59 */ - if (!(among_var)) - { - z->c = z->l - m; - goto lab0; - } - z->bra = z->c; /* ], line 59 */ - switch (among_var) - { - case 0: - { - z->c = z->l - m; - goto lab0; - } - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 61 */ - if (ret < 0) - return ret; - } - break; - } -lab0: - ; - } - z->ket = z->c; /* [, line 64 */ - among_var = find_among_b(z, a_2, 6); /* substring, line 64 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 64 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_from_s(z, 2, s_5); /* <-, line 65 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int m = z->l - z->c; - - (void) m; /* or, line 67 */ - if (z->c <= z->lb) - goto lab2; - z->c--; /* next, line 67 */ - if (z->c > z->lb) - goto lab2; /* atlimit, line 67 */ - { - int ret; - - ret = slice_from_s(z, 2, s_6); /* <-, line 67 */ - if (ret < 0) - return ret; - } - goto lab1; - lab2: - z->c = z->l - m; - { - int ret; - - ret = slice_from_s(z, 1, s_7); /* <-, line 67 */ - if (ret < 0) - return ret; - } - } - lab1: - break; - case 3: - if (z->c <= z->lb) - return 0; - z->c--; /* next, line 68 */ - while (1) - { /* gopast, line 68 */ - if (!(in_grouping_b(z, g_v, 97, 121))) - goto lab3; - break; - lab3: - if (z->c <= z->lb) - return 0; - z->c--; /* gopast, line 68 */ - } - { - int ret; - - ret = slice_del(z); /* delete, line 68 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_Step_1b(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 74 */ - among_var = find_among_b(z, a_4, 6); /* substring, line 74 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 74 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret = r_R1(z); - - if (ret == 0) - return 0; /* call R1, line 76 */ - if (ret < 0) - return ret; - } - { - int ret; - - ret = slice_from_s(z, 2, s_8); /* <-, line 76 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int m_test = z->l - z->c; /* test, line 79 */ - - while (1) - { /* gopast, line 79 */ - if (!(in_grouping_b(z, g_v, 97, 121))) - goto lab0; - break; - lab0: - if (z->c <= z->lb) - return 0; - z->c--; /* gopast, line 79 */ - } - z->c = z->l - m_test; - } - { - int ret; - - ret = slice_del(z); /* delete, line 79 */ - if (ret < 0) - return ret; - } - { - int m_test = z->l - z->c; /* test, line 80 */ - - among_var = find_among_b(z, a_3, 13); /* substring, line 80 */ - if (!(among_var)) - return 0; - z->c = z->l - m_test; - } - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - { - int c = z->c; - - ret = insert_s(z, z->c, z->c, 1, s_9); /* <+, line 82 */ - z->c = c; - } - if (ret < 0) - return ret; - } - break; - case 2: - z->ket = z->c; /* [, line 85 */ - if (z->c <= z->lb) - return 0; - z->c--; /* next, line 85 */ - z->bra = z->c; /* ], line 85 */ - { - int ret; - - ret = slice_del(z); /* delete, line 85 */ - if (ret < 0) - return ret; - } - break; - case 3: - if (z->c != z->I[0]) - return 0; /* atmark, line 86 */ - { - int m_test = z->l - z->c; /* test, line 86 */ - - { - int ret = r_shortv(z); - - if (ret == 0) - return 0; /* call shortv, line 86 */ - if (ret < 0) - return ret; - } - z->c = z->l - m_test; - } - { - int ret; - - { - int c = z->c; - - ret = insert_s(z, z->c, z->c, 1, s_10); /* <+, line 86 */ - z->c = c; - } - if (ret < 0) - return ret; - } - break; - } - break; - } - return 1; -} - -static int -r_Step_1c(struct SN_env * z) -{ - z->ket = z->c; /* [, line 93 */ - { - int m = z->l - z->c; - - (void) m; /* or, line 93 */ - if (!(eq_s_b(z, 1, s_11))) - goto lab1; - goto lab0; -lab1: - z->c = z->l - m; - if (!(eq_s_b(z, 1, s_12))) - return 0; - } -lab0: - z->bra = z->c; /* ], line 93 */ - if (!(out_grouping_b(z, g_v, 97, 121))) - return 0; - { - int m = z->l - z->c; - - (void) m; /* not, line 94 */ - if (z->c > z->lb) - goto lab2; /* atlimit, line 94 */ - return 0; -lab2: - z->c = z->l - m; - } - { - int ret; - - ret = slice_from_s(z, 1, s_13); /* <-, line 95 */ - if (ret < 0) - return ret; - } - return 1; -} - -static int -r_Step_2(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 99 */ - among_var = find_among_b(z, a_5, 24); /* substring, line 99 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 99 */ - { - int ret = r_R1(z); - - if (ret == 0) - return 0; /* call R1, line 99 */ - if (ret < 0) - return ret; - } - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_from_s(z, 4, s_14); /* <-, line 100 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret; - - ret = slice_from_s(z, 4, s_15); /* <-, line 101 */ - if (ret < 0) - return ret; - } - break; - case 3: - { - int ret; - - ret = slice_from_s(z, 4, s_16); /* <-, line 102 */ - if (ret < 0) - return ret; - } - break; - case 4: - { - int ret; - - ret = slice_from_s(z, 4, s_17); /* <-, line 103 */ - if (ret < 0) - return ret; - } - break; - case 5: - { - int ret; - - ret = slice_from_s(z, 3, s_18); /* <-, line 104 */ - if (ret < 0) - return ret; - } - break; - case 6: - { - int ret; - - ret = slice_from_s(z, 3, s_19); /* <-, line 106 */ - if (ret < 0) - return ret; - } - break; - case 7: - { - int ret; - - ret = slice_from_s(z, 3, s_20); /* <-, line 108 */ - if (ret < 0) - return ret; - } - break; - case 8: - { - int ret; - - ret = slice_from_s(z, 2, s_21); /* <-, line 110 */ - if (ret < 0) - return ret; - } - break; - case 9: - { - int ret; - - ret = slice_from_s(z, 3, s_22); /* <-, line 111 */ - if (ret < 0) - return ret; - } - break; - case 10: - { - int ret; - - ret = slice_from_s(z, 3, s_23); /* <-, line 113 */ - if (ret < 0) - return ret; - } - break; - case 11: - { - int ret; - - ret = slice_from_s(z, 3, s_24); /* <-, line 115 */ - if (ret < 0) - return ret; - } - break; - case 12: - { - int ret; - - ret = slice_from_s(z, 3, s_25); /* <-, line 117 */ - if (ret < 0) - return ret; - } - break; - case 13: - if (!(eq_s_b(z, 1, s_26))) - return 0; - { - int ret; - - ret = slice_from_s(z, 2, s_27); /* <-, line 118 */ - if (ret < 0) - return ret; - } - break; - case 14: - { - int ret; - - ret = slice_from_s(z, 3, s_28); /* <-, line 119 */ - if (ret < 0) - return ret; - } - break; - case 15: - { - int ret; - - ret = slice_from_s(z, 4, s_29); /* <-, line 120 */ - if (ret < 0) - return ret; - } - break; - case 16: - if (!(in_grouping_b(z, g_valid_LI, 99, 116))) - return 0; - { - int ret; - - ret = slice_del(z); /* delete, line 121 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_Step_3(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 126 */ - among_var = find_among_b(z, a_6, 9); /* substring, line 126 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 126 */ - { - int ret = r_R1(z); - - if (ret == 0) - return 0; /* call R1, line 126 */ - if (ret < 0) - return ret; - } - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_from_s(z, 4, s_30); /* <-, line 127 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret; - - ret = slice_from_s(z, 3, s_31); /* <-, line 128 */ - if (ret < 0) - return ret; - } - break; - case 3: - { - int ret; - - ret = slice_from_s(z, 2, s_32); /* <-, line 129 */ - if (ret < 0) - return ret; - } - break; - case 4: - { - int ret; - - ret = slice_from_s(z, 2, s_33); /* <-, line 131 */ - if (ret < 0) - return ret; - } - break; - case 5: - { - int ret; - - ret = slice_del(z); /* delete, line 133 */ - if (ret < 0) - return ret; - } - break; - case 6: - { - int ret = r_R2(z); - - if (ret == 0) - return 0; /* call R2, line 135 */ - if (ret < 0) - return ret; - } - { - int ret; - - ret = slice_del(z); /* delete, line 135 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_Step_4(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 140 */ - among_var = find_among_b(z, a_7, 18); /* substring, line 140 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 140 */ - { - int ret = r_R2(z); - - if (ret == 0) - return 0; /* call R2, line 140 */ - if (ret < 0) - return ret; - } - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 143 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int m = z->l - z->c; - - (void) m; /* or, line 144 */ - if (!(eq_s_b(z, 1, s_34))) - goto lab1; - goto lab0; - lab1: - z->c = z->l - m; - if (!(eq_s_b(z, 1, s_35))) - return 0; - } - lab0: - { - int ret; - - ret = slice_del(z); /* delete, line 144 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_Step_5(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 149 */ - among_var = find_among_b(z, a_8, 2); /* substring, line 149 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 149 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int m = z->l - z->c; - - (void) m; /* or, line 150 */ - { - int ret = r_R2(z); - - if (ret == 0) - goto lab1; /* call R2, line 150 */ - if (ret < 0) - return ret; - } - goto lab0; - lab1: - z->c = z->l - m; - { - int ret = r_R1(z); - - if (ret == 0) - return 0; /* call R1, line 150 */ - if (ret < 0) - return ret; - } - { - int m = z->l - z->c; - - (void) m; /* not, line 150 */ - { - int ret = r_shortv(z); - - if (ret == 0) - goto lab2; /* call shortv, line 150 */ - if (ret < 0) - return ret; - } - return 0; - lab2: - z->c = z->l - m; - } - } - lab0: - { - int ret; - - ret = slice_del(z); /* delete, line 150 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret = r_R2(z); - - if (ret == 0) - return 0; /* call R2, line 151 */ - if (ret < 0) - return ret; - } - if (!(eq_s_b(z, 1, s_36))) - return 0; - { - int ret; - - ret = slice_del(z); /* delete, line 151 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_exception2(struct SN_env * z) -{ - z->ket = z->c; /* [, line 157 */ - if (!(find_among_b(z, a_9, 8))) - return 0; /* substring, line 157 */ - z->bra = z->c; /* ], line 157 */ - if (z->c > z->lb) - return 0; /* atlimit, line 157 */ - return 1; -} - -static int -r_exception1(struct SN_env * z) -{ - int among_var; - - z->bra = z->c; /* [, line 169 */ - among_var = find_among(z, a_10, 18); /* substring, line 169 */ - if (!(among_var)) - return 0; - z->ket = z->c; /* ], line 169 */ - if (z->c < z->l) - return 0; /* atlimit, line 169 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_from_s(z, 3, s_37); /* <-, line 173 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret; - - ret = slice_from_s(z, 3, s_38); /* <-, line 174 */ - if (ret < 0) - return ret; - } - break; - case 3: - { - int ret; - - ret = slice_from_s(z, 3, s_39); /* <-, line 175 */ - if (ret < 0) - return ret; - } - break; - case 4: - { - int ret; - - ret = slice_from_s(z, 3, s_40); /* <-, line 176 */ - if (ret < 0) - return ret; - } - break; - case 5: - { - int ret; - - ret = slice_from_s(z, 3, s_41); /* <-, line 177 */ - if (ret < 0) - return ret; - } - break; - case 6: - { - int ret; - - ret = slice_from_s(z, 3, s_42); /* <-, line 181 */ - if (ret < 0) - return ret; - } - break; - case 7: - { - int ret; - - ret = slice_from_s(z, 5, s_43); /* <-, line 182 */ - if (ret < 0) - return ret; - } - break; - case 8: - { - int ret; - - ret = slice_from_s(z, 4, s_44); /* <-, line 183 */ - if (ret < 0) - return ret; - } - break; - case 9: - { - int ret; - - ret = slice_from_s(z, 5, s_45); /* <-, line 184 */ - if (ret < 0) - return ret; - } - break; - case 10: - { - int ret; - - ret = slice_from_s(z, 4, s_46); /* <-, line 185 */ - if (ret < 0) - return ret; - } - break; - case 11: - { - int ret; - - ret = slice_from_s(z, 5, s_47); /* <-, line 186 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_postlude(struct SN_env * z) -{ - if (!(z->B[0])) - return 0; /* Boolean test Y_found, line 202 */ - while (1) - { /* repeat, line 202 */ - int c = z->c; - - while (1) - { /* goto, line 202 */ - int c = z->c; - - z->bra = z->c; /* [, line 202 */ - if (!(eq_s(z, 1, s_48))) - goto lab1; - z->ket = z->c; /* ], line 202 */ - z->c = c; - break; - lab1: - z->c = c; - if (z->c >= z->l) - goto lab0; - z->c++; /* goto, line 202 */ - } - { - int ret; - - ret = slice_from_s(z, 1, s_49); /* <-, line 202 */ - if (ret < 0) - return ret; - } - continue; -lab0: - z->c = c; - break; - } - return 1; -} - -extern int -english_ISO_8859_1_stem(struct SN_env * z) -{ - { - int c = z->c; /* or, line 206 */ - - { - int ret = r_exception1(z); - - if (ret == 0) - goto lab1; /* call exception1, line 206 */ - if (ret < 0) - return ret; - } - goto lab0; -lab1: - z->c = c; - { - int c = z->c; /* not, line 207 */ - - { - int c = z->c + 3; - - if (0 > c || c > z->l) - goto lab3; - z->c = c; /* hop, line 207 */ - } - goto lab2; - lab3: - z->c = c; - } - goto lab0; -lab2: - z->c = c; - { - int c = z->c; /* do, line 208 */ - - { - int ret = r_prelude(z); - - if (ret == 0) - goto lab4; /* call prelude, line 208 */ - if (ret < 0) - return ret; - } - lab4: - z->c = c; - } - { - int c = z->c; /* do, line 209 */ - - { - int ret = r_mark_regions(z); - - if (ret == 0) - goto lab5; /* call mark_regions, line 209 */ - if (ret < 0) - return ret; - } - lab5: - z->c = c; - } - z->lb = z->c; - z->c = z->l; /* backwards, line 210 */ - - { - int m = z->l - z->c; - - (void) m; /* do, line 212 */ - { - int ret = r_Step_1a(z); - - if (ret == 0) - goto lab6; /* call Step_1a, line 212 */ - if (ret < 0) - return ret; - } - lab6: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* or, line 214 */ - { - int ret = r_exception2(z); - - if (ret == 0) - goto lab8; /* call exception2, line 214 */ - if (ret < 0) - return ret; - } - goto lab7; - lab8: - z->c = z->l - m; - { - int m = z->l - z->c; - - (void) m; /* do, line 216 */ - { - int ret = r_Step_1b(z); - - if (ret == 0) - goto lab9; /* call Step_1b, line 216 */ - if (ret < 0) - return ret; - } - lab9: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* do, line 217 */ - { - int ret = r_Step_1c(z); - - if (ret == 0) - goto lab10; /* call Step_1c, line 217 */ - if (ret < 0) - return ret; - } - lab10: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* do, line 219 */ - { - int ret = r_Step_2(z); - - if (ret == 0) - goto lab11; /* call Step_2, line 219 */ - if (ret < 0) - return ret; - } - lab11: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* do, line 220 */ - { - int ret = r_Step_3(z); - - if (ret == 0) - goto lab12; /* call Step_3, line 220 */ - if (ret < 0) - return ret; - } - lab12: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* do, line 221 */ - { - int ret = r_Step_4(z); - - if (ret == 0) - goto lab13; /* call Step_4, line 221 */ - if (ret < 0) - return ret; - } - lab13: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* do, line 223 */ - { - int ret = r_Step_5(z); - - if (ret == 0) - goto lab14; /* call Step_5, line 223 */ - if (ret < 0) - return ret; - } - lab14: - z->c = z->l - m; - } - } -lab7: - z->c = z->lb; - { - int c = z->c; /* do, line 226 */ - - { - int ret = r_postlude(z); - - if (ret == 0) - goto lab15; /* call postlude, line 226 */ - if (ret < 0) - return ret; - } - lab15: - z->c = c; - } - } -lab0: - return 1; -} - -extern struct SN_env * -english_ISO_8859_1_create_env(void) -{ - return SN_create_env(0, 2, 1); -} - -extern void -english_ISO_8859_1_close_env(struct SN_env * z) -{ - SN_close_env(z); -} diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h deleted file mode 100644 index 6918a73dd7..0000000000 --- a/contrib/tsearch2/snowball/english_stem.h +++ /dev/null @@ -1,18 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/snowball/english_stem.h,v 1.6 2006/03/11 04:38:30 momjian Exp $ */ - -/* This file was generated automatically by the Snowball to ANSI C compiler */ - -#ifdef __cplusplus -extern "C" -{ -#endif - -extern struct SN_env *english_ISO_8859_1_create_env(void); -extern void english_ISO_8859_1_close_env(struct SN_env * z); - -extern int english_ISO_8859_1_stem(struct SN_env * z); - -#ifdef __cplusplus -} - -#endif diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h deleted file mode 100644 index 610f998864..0000000000 --- a/contrib/tsearch2/snowball/header.h +++ /dev/null @@ -1,56 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/snowball/header.h,v 1.8 2006/07/10 22:06:11 momjian Exp $ */ - -#include <limits.h> - -#include "api.h" - -#define HEAD (2 * sizeof(int)) - -#define SIZE(p) ((int *)(p))[-1] -#define SET_SIZE(p, n) ((int *)(p))[-1] = n -#define CAPACITY(p) ((int *)(p))[-2] - -struct among -{ - int s_size; /* number of chars in string */ - symbol *s; /* search string */ - int substring_i; /* index to longest matching substring */ - int result; /* result of the lookup */ - int (*function) (struct SN_env *); -}; - -extern symbol *create_s(void); -extern void lose_s(symbol * p); - -extern int skip_utf8(const symbol * p, int c, int lb, int l, int n); - -extern int in_grouping_U(struct SN_env * z, unsigned char *s, int min, int max); -extern int in_grouping_b_U(struct SN_env * z, unsigned char *s, int min, int max); -extern int out_grouping_U(struct SN_env * z, unsigned char *s, int min, int max); -extern int out_grouping_b_U(struct SN_env * z, unsigned char *s, int min, int max); - -extern int in_grouping(struct SN_env * z, unsigned char *s, int min, int max); -extern int in_grouping_b(struct SN_env * z, unsigned char *s, int min, int max); -extern int out_grouping(struct SN_env * z, unsigned char *s, int min, int max); -extern int out_grouping_b(struct SN_env * z, unsigned char *s, int min, int max); - -extern int eq_s(struct SN_env * z, int s_size, symbol * s); -extern int eq_s_b(struct SN_env * z, int s_size, symbol * s); -extern int eq_v(struct SN_env * z, symbol * p); -extern int eq_v_b(struct SN_env * z, symbol * p); - -extern int find_among(struct SN_env * z, struct among * v, int v_size); -extern int find_among_b(struct SN_env * z, struct among * v, int v_size); - -extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int *adjustment); -extern int slice_from_s(struct SN_env * z, int s_size, symbol * s); -extern int slice_from_v(struct SN_env * z, symbol * p); -extern int slice_del(struct SN_env * z); - -extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s); -extern int insert_v(struct SN_env * z, int bra, int ket, symbol * p); - -extern symbol *slice_to(struct SN_env * z, symbol * p); -extern symbol *assign_to(struct SN_env * z, symbol * p); - -extern void debug(struct SN_env * z, int number, int line_count); diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c deleted file mode 100644 index a9558b3ab5..0000000000 --- a/contrib/tsearch2/snowball/russian_stem.c +++ /dev/null @@ -1,928 +0,0 @@ - -/* This file was generated automatically by the Snowball to ANSI C compiler */ - -#include "header.h" - -extern int russian_KOI8_R_stem(struct SN_env * z); -static int r_tidy_up(struct SN_env * z); -static int r_derivational(struct SN_env * z); -static int r_noun(struct SN_env * z); -static int r_verb(struct SN_env * z); -static int r_reflexive(struct SN_env * z); -static int r_adjectival(struct SN_env * z); -static int r_adjective(struct SN_env * z); -static int r_perfective_gerund(struct SN_env * z); -static int r_R2(struct SN_env * z); -static int r_mark_regions(struct SN_env * z); - -extern struct SN_env *russian_KOI8_R_create_env(void); -extern void russian_KOI8_R_close_env(struct SN_env * z); - -static symbol s_0_0[3] = {0xD7, 0xDB, 0xC9}; -static symbol s_0_1[4] = {0xC9, 0xD7, 0xDB, 0xC9}; -static symbol s_0_2[4] = {0xD9, 0xD7, 0xDB, 0xC9}; -static symbol s_0_3[1] = {0xD7}; -static symbol s_0_4[2] = {0xC9, 0xD7}; -static symbol s_0_5[2] = {0xD9, 0xD7}; -static symbol s_0_6[5] = {0xD7, 0xDB, 0xC9, 0xD3, 0xD8}; -static symbol s_0_7[6] = {0xC9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8}; -static symbol s_0_8[6] = {0xD9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8}; - -static struct among a_0[9] = -{ - /* 0 */ {3, s_0_0, -1, 1, 0}, - /* 1 */ {4, s_0_1, 0, 2, 0}, - /* 2 */ {4, s_0_2, 0, 2, 0}, - /* 3 */ {1, s_0_3, -1, 1, 0}, - /* 4 */ {2, s_0_4, 3, 2, 0}, - /* 5 */ {2, s_0_5, 3, 2, 0}, - /* 6 */ {5, s_0_6, -1, 1, 0}, - /* 7 */ {6, s_0_7, 6, 2, 0}, - /* 8 */ {6, s_0_8, 6, 2, 0} -}; - -static symbol s_1_0[2] = {0xC0, 0xC0}; -static symbol s_1_1[2] = {0xC5, 0xC0}; -static symbol s_1_2[2] = {0xCF, 0xC0}; -static symbol s_1_3[2] = {0xD5, 0xC0}; -static symbol s_1_4[2] = {0xC5, 0xC5}; -static symbol s_1_5[2] = {0xC9, 0xC5}; -static symbol s_1_6[2] = {0xCF, 0xC5}; -static symbol s_1_7[2] = {0xD9, 0xC5}; -static symbol s_1_8[2] = {0xC9, 0xC8}; -static symbol s_1_9[2] = {0xD9, 0xC8}; -static symbol s_1_10[3] = {0xC9, 0xCD, 0xC9}; -static symbol s_1_11[3] = {0xD9, 0xCD, 0xC9}; -static symbol s_1_12[2] = {0xC5, 0xCA}; -static symbol s_1_13[2] = {0xC9, 0xCA}; -static symbol s_1_14[2] = {0xCF, 0xCA}; -static symbol s_1_15[2] = {0xD9, 0xCA}; -static symbol s_1_16[2] = {0xC5, 0xCD}; -static symbol s_1_17[2] = {0xC9, 0xCD}; -static symbol s_1_18[2] = {0xCF, 0xCD}; -static symbol s_1_19[2] = {0xD9, 0xCD}; -static symbol s_1_20[3] = {0xC5, 0xC7, 0xCF}; -static symbol s_1_21[3] = {0xCF, 0xC7, 0xCF}; -static symbol s_1_22[2] = {0xC1, 0xD1}; -static symbol s_1_23[2] = {0xD1, 0xD1}; -static symbol s_1_24[3] = {0xC5, 0xCD, 0xD5}; -static symbol s_1_25[3] = {0xCF, 0xCD, 0xD5}; - -static struct among a_1[26] = -{ - /* 0 */ {2, s_1_0, -1, 1, 0}, - /* 1 */ {2, s_1_1, -1, 1, 0}, - /* 2 */ {2, s_1_2, -1, 1, 0}, - /* 3 */ {2, s_1_3, -1, 1, 0}, - /* 4 */ {2, s_1_4, -1, 1, 0}, - /* 5 */ {2, s_1_5, -1, 1, 0}, - /* 6 */ {2, s_1_6, -1, 1, 0}, - /* 7 */ {2, s_1_7, -1, 1, 0}, - /* 8 */ {2, s_1_8, -1, 1, 0}, - /* 9 */ {2, s_1_9, -1, 1, 0}, - /* 10 */ {3, s_1_10, -1, 1, 0}, - /* 11 */ {3, s_1_11, -1, 1, 0}, - /* 12 */ {2, s_1_12, -1, 1, 0}, - /* 13 */ {2, s_1_13, -1, 1, 0}, - /* 14 */ {2, s_1_14, -1, 1, 0}, - /* 15 */ {2, s_1_15, -1, 1, 0}, - /* 16 */ {2, s_1_16, -1, 1, 0}, - /* 17 */ {2, s_1_17, -1, 1, 0}, - /* 18 */ {2, s_1_18, -1, 1, 0}, - /* 19 */ {2, s_1_19, -1, 1, 0}, - /* 20 */ {3, s_1_20, -1, 1, 0}, - /* 21 */ {3, s_1_21, -1, 1, 0}, - /* 22 */ {2, s_1_22, -1, 1, 0}, - /* 23 */ {2, s_1_23, -1, 1, 0}, - /* 24 */ {3, s_1_24, -1, 1, 0}, - /* 25 */ {3, s_1_25, -1, 1, 0} -}; - -static symbol s_2_0[2] = {0xC5, 0xCD}; -static symbol s_2_1[2] = {0xCE, 0xCE}; -static symbol s_2_2[2] = {0xD7, 0xDB}; -static symbol s_2_3[3] = {0xC9, 0xD7, 0xDB}; -static symbol s_2_4[3] = {0xD9, 0xD7, 0xDB}; -static symbol s_2_5[1] = {0xDD}; -static symbol s_2_6[2] = {0xC0, 0xDD}; -static symbol s_2_7[3] = {0xD5, 0xC0, 0xDD}; - -static struct among a_2[8] = -{ - /* 0 */ {2, s_2_0, -1, 1, 0}, - /* 1 */ {2, s_2_1, -1, 1, 0}, - /* 2 */ {2, s_2_2, -1, 1, 0}, - /* 3 */ {3, s_2_3, 2, 2, 0}, - /* 4 */ {3, s_2_4, 2, 2, 0}, - /* 5 */ {1, s_2_5, -1, 1, 0}, - /* 6 */ {2, s_2_6, 5, 1, 0}, - /* 7 */ {3, s_2_7, 6, 2, 0} -}; - -static symbol s_3_0[2] = {0xD3, 0xD1}; -static symbol s_3_1[2] = {0xD3, 0xD8}; - -static struct among a_3[2] = -{ - /* 0 */ {2, s_3_0, -1, 1, 0}, - /* 1 */ {2, s_3_1, -1, 1, 0} -}; - -static symbol s_4_0[1] = {0xC0}; -static symbol s_4_1[2] = {0xD5, 0xC0}; -static symbol s_4_2[2] = {0xCC, 0xC1}; -static symbol s_4_3[3] = {0xC9, 0xCC, 0xC1}; -static symbol s_4_4[3] = {0xD9, 0xCC, 0xC1}; -static symbol s_4_5[2] = {0xCE, 0xC1}; -static symbol s_4_6[3] = {0xC5, 0xCE, 0xC1}; -static symbol s_4_7[3] = {0xC5, 0xD4, 0xC5}; -static symbol s_4_8[3] = {0xC9, 0xD4, 0xC5}; -static symbol s_4_9[3] = {0xCA, 0xD4, 0xC5}; -static symbol s_4_10[4] = {0xC5, 0xCA, 0xD4, 0xC5}; -static symbol s_4_11[4] = {0xD5, 0xCA, 0xD4, 0xC5}; -static symbol s_4_12[2] = {0xCC, 0xC9}; -static symbol s_4_13[3] = {0xC9, 0xCC, 0xC9}; -static symbol s_4_14[3] = {0xD9, 0xCC, 0xC9}; -static symbol s_4_15[1] = {0xCA}; -static symbol s_4_16[2] = {0xC5, 0xCA}; -static symbol s_4_17[2] = {0xD5, 0xCA}; -static symbol s_4_18[1] = {0xCC}; -static symbol s_4_19[2] = {0xC9, 0xCC}; -static symbol s_4_20[2] = {0xD9, 0xCC}; -static symbol s_4_21[2] = {0xC5, 0xCD}; -static symbol s_4_22[2] = {0xC9, 0xCD}; -static symbol s_4_23[2] = {0xD9, 0xCD}; -static symbol s_4_24[1] = {0xCE}; -static symbol s_4_25[2] = {0xC5, 0xCE}; -static symbol s_4_26[2] = {0xCC, 0xCF}; -static symbol s_4_27[3] = {0xC9, 0xCC, 0xCF}; -static symbol s_4_28[3] = {0xD9, 0xCC, 0xCF}; -static symbol s_4_29[2] = {0xCE, 0xCF}; -static symbol s_4_30[3] = {0xC5, 0xCE, 0xCF}; -static symbol s_4_31[3] = {0xCE, 0xCE, 0xCF}; -static symbol s_4_32[2] = {0xC0, 0xD4}; -static symbol s_4_33[3] = {0xD5, 0xC0, 0xD4}; -static symbol s_4_34[2] = {0xC5, 0xD4}; -static symbol s_4_35[3] = {0xD5, 0xC5, 0xD4}; -static symbol s_4_36[2] = {0xC9, 0xD4}; -static symbol s_4_37[2] = {0xD1, 0xD4}; -static symbol s_4_38[2] = {0xD9, 0xD4}; -static symbol s_4_39[2] = {0xD4, 0xD8}; -static symbol s_4_40[3] = {0xC9, 0xD4, 0xD8}; -static symbol s_4_41[3] = {0xD9, 0xD4, 0xD8}; -static symbol s_4_42[3] = {0xC5, 0xDB, 0xD8}; -static symbol s_4_43[3] = {0xC9, 0xDB, 0xD8}; -static symbol s_4_44[2] = {0xCE, 0xD9}; -static symbol s_4_45[3] = {0xC5, 0xCE, 0xD9}; - -static struct among a_4[46] = -{ - /* 0 */ {1, s_4_0, -1, 2, 0}, - /* 1 */ {2, s_4_1, 0, 2, 0}, - /* 2 */ {2, s_4_2, -1, 1, 0}, - /* 3 */ {3, s_4_3, 2, 2, 0}, - /* 4 */ {3, s_4_4, 2, 2, 0}, - /* 5 */ {2, s_4_5, -1, 1, 0}, - /* 6 */ {3, s_4_6, 5, 2, 0}, - /* 7 */ {3, s_4_7, -1, 1, 0}, - /* 8 */ {3, s_4_8, -1, 2, 0}, - /* 9 */ {3, s_4_9, -1, 1, 0}, - /* 10 */ {4, s_4_10, 9, 2, 0}, - /* 11 */ {4, s_4_11, 9, 2, 0}, - /* 12 */ {2, s_4_12, -1, 1, 0}, - /* 13 */ {3, s_4_13, 12, 2, 0}, - /* 14 */ {3, s_4_14, 12, 2, 0}, - /* 15 */ {1, s_4_15, -1, 1, 0}, - /* 16 */ {2, s_4_16, 15, 2, 0}, - /* 17 */ {2, s_4_17, 15, 2, 0}, - /* 18 */ {1, s_4_18, -1, 1, 0}, - /* 19 */ {2, s_4_19, 18, 2, 0}, - /* 20 */ {2, s_4_20, 18, 2, 0}, - /* 21 */ {2, s_4_21, -1, 1, 0}, - /* 22 */ {2, s_4_22, -1, 2, 0}, - /* 23 */ {2, s_4_23, -1, 2, 0}, - /* 24 */ {1, s_4_24, -1, 1, 0}, - /* 25 */ {2, s_4_25, 24, 2, 0}, - /* 26 */ {2, s_4_26, -1, 1, 0}, - /* 27 */ {3, s_4_27, 26, 2, 0}, - /* 28 */ {3, s_4_28, 26, 2, 0}, - /* 29 */ {2, s_4_29, -1, 1, 0}, - /* 30 */ {3, s_4_30, 29, 2, 0}, - /* 31 */ {3, s_4_31, 29, 1, 0}, - /* 32 */ {2, s_4_32, -1, 1, 0}, - /* 33 */ {3, s_4_33, 32, 2, 0}, - /* 34 */ {2, s_4_34, -1, 1, 0}, - /* 35 */ {3, s_4_35, 34, 2, 0}, - /* 36 */ {2, s_4_36, -1, 2, 0}, - /* 37 */ {2, s_4_37, -1, 2, 0}, - /* 38 */ {2, s_4_38, -1, 2, 0}, - /* 39 */ {2, s_4_39, -1, 1, 0}, - /* 40 */ {3, s_4_40, 39, 2, 0}, - /* 41 */ {3, s_4_41, 39, 2, 0}, - /* 42 */ {3, s_4_42, -1, 1, 0}, - /* 43 */ {3, s_4_43, -1, 2, 0}, - /* 44 */ {2, s_4_44, -1, 1, 0}, - /* 45 */ {3, s_4_45, 44, 2, 0} -}; - -static symbol s_5_0[1] = {0xC0}; -static symbol s_5_1[2] = {0xC9, 0xC0}; -static symbol s_5_2[2] = {0xD8, 0xC0}; -static symbol s_5_3[1] = {0xC1}; -static symbol s_5_4[1] = {0xC5}; -static symbol s_5_5[2] = {0xC9, 0xC5}; -static symbol s_5_6[2] = {0xD8, 0xC5}; -static symbol s_5_7[2] = {0xC1, 0xC8}; -static symbol s_5_8[2] = {0xD1, 0xC8}; -static symbol s_5_9[3] = {0xC9, 0xD1, 0xC8}; -static symbol s_5_10[1] = {0xC9}; -static symbol s_5_11[2] = {0xC5, 0xC9}; -static symbol s_5_12[2] = {0xC9, 0xC9}; -static symbol s_5_13[3] = {0xC1, 0xCD, 0xC9}; -static symbol s_5_14[3] = {0xD1, 0xCD, 0xC9}; -static symbol s_5_15[4] = {0xC9, 0xD1, 0xCD, 0xC9}; -static symbol s_5_16[1] = {0xCA}; -static symbol s_5_17[2] = {0xC5, 0xCA}; -static symbol s_5_18[3] = {0xC9, 0xC5, 0xCA}; -static symbol s_5_19[2] = {0xC9, 0xCA}; -static symbol s_5_20[2] = {0xCF, 0xCA}; -static symbol s_5_21[2] = {0xC1, 0xCD}; -static symbol s_5_22[2] = {0xC5, 0xCD}; -static symbol s_5_23[3] = {0xC9, 0xC5, 0xCD}; -static symbol s_5_24[2] = {0xCF, 0xCD}; -static symbol s_5_25[2] = {0xD1, 0xCD}; -static symbol s_5_26[3] = {0xC9, 0xD1, 0xCD}; -static symbol s_5_27[1] = {0xCF}; -static symbol s_5_28[1] = {0xD1}; -static symbol s_5_29[2] = {0xC9, 0xD1}; -static symbol s_5_30[2] = {0xD8, 0xD1}; -static symbol s_5_31[1] = {0xD5}; -static symbol s_5_32[2] = {0xC5, 0xD7}; -static symbol s_5_33[2] = {0xCF, 0xD7}; -static symbol s_5_34[1] = {0xD8}; -static symbol s_5_35[1] = {0xD9}; - -static struct among a_5[36] = -{ - /* 0 */ {1, s_5_0, -1, 1, 0}, - /* 1 */ {2, s_5_1, 0, 1, 0}, - /* 2 */ {2, s_5_2, 0, 1, 0}, - /* 3 */ {1, s_5_3, -1, 1, 0}, - /* 4 */ {1, s_5_4, -1, 1, 0}, - /* 5 */ {2, s_5_5, 4, 1, 0}, - /* 6 */ {2, s_5_6, 4, 1, 0}, - /* 7 */ {2, s_5_7, -1, 1, 0}, - /* 8 */ {2, s_5_8, -1, 1, 0}, - /* 9 */ {3, s_5_9, 8, 1, 0}, - /* 10 */ {1, s_5_10, -1, 1, 0}, - /* 11 */ {2, s_5_11, 10, 1, 0}, - /* 12 */ {2, s_5_12, 10, 1, 0}, - /* 13 */ {3, s_5_13, 10, 1, 0}, - /* 14 */ {3, s_5_14, 10, 1, 0}, - /* 15 */ {4, s_5_15, 14, 1, 0}, - /* 16 */ {1, s_5_16, -1, 1, 0}, - /* 17 */ {2, s_5_17, 16, 1, 0}, - /* 18 */ {3, s_5_18, 17, 1, 0}, - /* 19 */ {2, s_5_19, 16, 1, 0}, - /* 20 */ {2, s_5_20, 16, 1, 0}, - /* 21 */ {2, s_5_21, -1, 1, 0}, - /* 22 */ {2, s_5_22, -1, 1, 0}, - /* 23 */ {3, s_5_23, 22, 1, 0}, - /* 24 */ {2, s_5_24, -1, 1, 0}, - /* 25 */ {2, s_5_25, -1, 1, 0}, - /* 26 */ {3, s_5_26, 25, 1, 0}, - /* 27 */ {1, s_5_27, -1, 1, 0}, - /* 28 */ {1, s_5_28, -1, 1, 0}, - /* 29 */ {2, s_5_29, 28, 1, 0}, - /* 30 */ {2, s_5_30, 28, 1, 0}, - /* 31 */ {1, s_5_31, -1, 1, 0}, - /* 32 */ {2, s_5_32, -1, 1, 0}, - /* 33 */ {2, s_5_33, -1, 1, 0}, - /* 34 */ {1, s_5_34, -1, 1, 0}, - /* 35 */ {1, s_5_35, -1, 1, 0} -}; - -static symbol s_6_0[3] = {0xCF, 0xD3, 0xD4}; -static symbol s_6_1[4] = {0xCF, 0xD3, 0xD4, 0xD8}; - -static struct among a_6[2] = -{ - /* 0 */ {3, s_6_0, -1, 1, 0}, - /* 1 */ {4, s_6_1, -1, 1, 0} -}; - -static symbol s_7_0[4] = {0xC5, 0xCA, 0xDB, 0xC5}; -static symbol s_7_1[1] = {0xCE}; -static symbol s_7_2[1] = {0xD8}; -static symbol s_7_3[3] = {0xC5, 0xCA, 0xDB}; - -static struct among a_7[4] = -{ - /* 0 */ {4, s_7_0, -1, 1, 0}, - /* 1 */ {1, s_7_1, -1, 2, 0}, - /* 2 */ {1, s_7_2, -1, 3, 0}, - /* 3 */ {3, s_7_3, -1, 1, 0} -}; - -static unsigned char g_v[] = {35, 130, 34, 18}; - -static symbol s_0[] = {0xC1}; -static symbol s_1[] = {0xD1}; -static symbol s_2[] = {0xC1}; -static symbol s_3[] = {0xD1}; -static symbol s_4[] = {0xC1}; -static symbol s_5[] = {0xD1}; -static symbol s_6[] = {0xCE}; -static symbol s_7[] = {0xCE}; -static symbol s_8[] = {0xCE}; -static symbol s_9[] = {0xC9}; - -static int -r_mark_regions(struct SN_env * z) -{ - z->I[0] = z->l; - z->I[1] = z->l; - { - int c = z->c; /* do, line 63 */ - - while (1) - { /* gopast, line 64 */ - if (!(in_grouping(z, g_v, 192, 220))) - goto lab1; - break; - lab1: - if (z->c >= z->l) - goto lab0; - z->c++; /* gopast, line 64 */ - } - z->I[0] = z->c; /* setmark pV, line 64 */ - while (1) - { /* gopast, line 64 */ - if (!(out_grouping(z, g_v, 192, 220))) - goto lab2; - break; - lab2: - if (z->c >= z->l) - goto lab0; - z->c++; /* gopast, line 64 */ - } - while (1) - { /* gopast, line 65 */ - if (!(in_grouping(z, g_v, 192, 220))) - goto lab3; - break; - lab3: - if (z->c >= z->l) - goto lab0; - z->c++; /* gopast, line 65 */ - } - while (1) - { /* gopast, line 65 */ - if (!(out_grouping(z, g_v, 192, 220))) - goto lab4; - break; - lab4: - if (z->c >= z->l) - goto lab0; - z->c++; /* gopast, line 65 */ - } - z->I[1] = z->c; /* setmark p2, line 65 */ -lab0: - z->c = c; - } - return 1; -} - -static int -r_R2(struct SN_env * z) -{ - if (!(z->I[1] <= z->c)) - return 0; - return 1; -} - -static int -r_perfective_gerund(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 74 */ - among_var = find_among_b(z, a_0, 9); /* substring, line 74 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 74 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int m = z->l - z->c; - - (void) m; /* or, line 78 */ - if (!(eq_s_b(z, 1, s_0))) - goto lab1; - goto lab0; - lab1: - z->c = z->l - m; - if (!(eq_s_b(z, 1, s_1))) - return 0; - } - lab0: - { - int ret; - - ret = slice_del(z); /* delete, line 78 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret; - - ret = slice_del(z); /* delete, line 85 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_adjective(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 90 */ - among_var = find_among_b(z, a_1, 26); /* substring, line 90 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 90 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 99 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_adjectival(struct SN_env * z) -{ - int among_var; - - { - int ret = r_adjective(z); - - if (ret == 0) - return 0; /* call adjective, line 104 */ - if (ret < 0) - return ret; - } - { - int m = z->l - z->c; - - (void) m; /* try, line 111 */ - z->ket = z->c; /* [, line 112 */ - among_var = find_among_b(z, a_2, 8); /* substring, line 112 */ - if (!(among_var)) - { - z->c = z->l - m; - goto lab0; - } - z->bra = z->c; /* ], line 112 */ - switch (among_var) - { - case 0: - { - z->c = z->l - m; - goto lab0; - } - case 1: - { - int m = z->l - z->c; - - (void) m; /* or, line 117 */ - if (!(eq_s_b(z, 1, s_2))) - goto lab2; - goto lab1; - lab2: - z->c = z->l - m; - if (!(eq_s_b(z, 1, s_3))) - { - z->c = z->l - m; - goto lab0; - } - } - lab1: - { - int ret; - - ret = slice_del(z); /* delete, line 117 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret; - - ret = slice_del(z); /* delete, line 124 */ - if (ret < 0) - return ret; - } - break; - } -lab0: - ; - } - return 1; -} - -static int -r_reflexive(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 131 */ - among_var = find_among_b(z, a_3, 2); /* substring, line 131 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 131 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 134 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_verb(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 139 */ - among_var = find_among_b(z, a_4, 46); /* substring, line 139 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 139 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int m = z->l - z->c; - - (void) m; /* or, line 145 */ - if (!(eq_s_b(z, 1, s_4))) - goto lab1; - goto lab0; - lab1: - z->c = z->l - m; - if (!(eq_s_b(z, 1, s_5))) - return 0; - } - lab0: - { - int ret; - - ret = slice_del(z); /* delete, line 145 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret; - - ret = slice_del(z); /* delete, line 153 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_noun(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 162 */ - among_var = find_among_b(z, a_5, 36); /* substring, line 162 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 162 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 169 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_derivational(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 178 */ - among_var = find_among_b(z, a_6, 2); /* substring, line 178 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 178 */ - { - int ret = r_R2(z); - - if (ret == 0) - return 0; /* call R2, line 178 */ - if (ret < 0) - return ret; - } - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 181 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_tidy_up(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 186 */ - among_var = find_among_b(z, a_7, 4); /* substring, line 186 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 186 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 190 */ - if (ret < 0) - return ret; - } - z->ket = z->c; /* [, line 191 */ - if (!(eq_s_b(z, 1, s_6))) - return 0; - z->bra = z->c; /* ], line 191 */ - if (!(eq_s_b(z, 1, s_7))) - return 0; - { - int ret; - - ret = slice_del(z); /* delete, line 191 */ - if (ret < 0) - return ret; - } - break; - case 2: - if (!(eq_s_b(z, 1, s_8))) - return 0; - { - int ret; - - ret = slice_del(z); /* delete, line 194 */ - if (ret < 0) - return ret; - } - break; - case 3: - { - int ret; - - ret = slice_del(z); /* delete, line 196 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -extern int -russian_KOI8_R_stem(struct SN_env * z) -{ - { - int c = z->c; /* do, line 203 */ - - { - int ret = r_mark_regions(z); - - if (ret == 0) - goto lab0; /* call mark_regions, line 203 */ - if (ret < 0) - return ret; - } -lab0: - z->c = c; - } - z->lb = z->c; - z->c = z->l; /* backwards, line 204 */ - - { - int m3; /* setlimit, line 204 */ - int m = z->l - z->c; - - (void) m; - if (z->c < z->I[0]) - return 0; - z->c = z->I[0]; /* tomark, line 204 */ - m3 = z->lb; - z->lb = z->c; - z->c = z->l - m; - { - int m = z->l - z->c; - - (void) m; /* do, line 205 */ - { - int m = z->l - z->c; - - (void) m; /* or, line 206 */ - { - int ret = r_perfective_gerund(z); - - if (ret == 0) - goto lab3; /* call perfective_gerund, line 206 */ - if (ret < 0) - return ret; - } - goto lab2; - lab3: - z->c = z->l - m; - { - int m = z->l - z->c; - - (void) m; /* try, line 207 */ - { - int ret = r_reflexive(z); - - if (ret == 0) - { - z->c = z->l - m; - goto lab4; - } /* call reflexive, line 207 */ - if (ret < 0) - return ret; - } - lab4: - ; - } - { - int m = z->l - z->c; - - (void) m; /* or, line 208 */ - { - int ret = r_adjectival(z); - - if (ret == 0) - goto lab6; /* call adjectival, line 208 */ - if (ret < 0) - return ret; - } - goto lab5; - lab6: - z->c = z->l - m; - { - int ret = r_verb(z); - - if (ret == 0) - goto lab7; /* call verb, line 208 */ - if (ret < 0) - return ret; - } - goto lab5; - lab7: - z->c = z->l - m; - { - int ret = r_noun(z); - - if (ret == 0) - goto lab1; /* call noun, line 208 */ - if (ret < 0) - return ret; - } - } - lab5: - ; - } - lab2: - lab1: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* try, line 211 */ - z->ket = z->c; /* [, line 211 */ - if (!(eq_s_b(z, 1, s_9))) - { - z->c = z->l - m; - goto lab8; - } - z->bra = z->c; /* ], line 211 */ - { - int ret; - - ret = slice_del(z); /* delete, line 211 */ - if (ret < 0) - return ret; - } - lab8: - ; - } - { - int m = z->l - z->c; - - (void) m; /* do, line 214 */ - { - int ret = r_derivational(z); - - if (ret == 0) - goto lab9; /* call derivational, line 214 */ - if (ret < 0) - return ret; - } - lab9: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* do, line 215 */ - { - int ret = r_tidy_up(z); - - if (ret == 0) - goto lab10; /* call tidy_up, line 215 */ - if (ret < 0) - return ret; - } - lab10: - z->c = z->l - m; - } - z->lb = m3; - } - z->c = z->lb; - return 1; -} - -extern struct SN_env * -russian_KOI8_R_create_env(void) -{ - return SN_create_env(0, 2, 0); -} - -extern void -russian_KOI8_R_close_env(struct SN_env * z) -{ - SN_close_env(z); -} diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h deleted file mode 100644 index 217c20abdd..0000000000 --- a/contrib/tsearch2/snowball/russian_stem.h +++ /dev/null @@ -1,17 +0,0 @@ - -/* This file was generated automatically by the Snowball to ANSI C compiler */ - -#ifdef __cplusplus -extern "C" -{ -#endif - -extern struct SN_env *russian_KOI8_R_create_env(void); -extern void russian_KOI8_R_close_env(struct SN_env * z); - -extern int russian_KOI8_R_stem(struct SN_env * z); - -#ifdef __cplusplus -} - -#endif diff --git a/contrib/tsearch2/snowball/russian_stem_UTF8.c b/contrib/tsearch2/snowball/russian_stem_UTF8.c deleted file mode 100644 index 994ff267ca..0000000000 --- a/contrib/tsearch2/snowball/russian_stem_UTF8.c +++ /dev/null @@ -1,942 +0,0 @@ - -/* This file was generated automatically by the Snowball to ANSI C compiler */ - -#include "header.h" - -extern int russian_UTF_8_stem(struct SN_env * z); -static int r_tidy_up(struct SN_env * z); -static int r_derivational(struct SN_env * z); -static int r_noun(struct SN_env * z); -static int r_verb(struct SN_env * z); -static int r_reflexive(struct SN_env * z); -static int r_adjectival(struct SN_env * z); -static int r_adjective(struct SN_env * z); -static int r_perfective_gerund(struct SN_env * z); -static int r_R2(struct SN_env * z); -static int r_mark_regions(struct SN_env * z); - -extern struct SN_env *russian_UTF_8_create_env(void); -extern void russian_UTF_8_close_env(struct SN_env * z); - -static symbol s_0_0[10] = {0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C}; -static symbol s_0_1[12] = {0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C}; -static symbol s_0_2[12] = {0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C}; -static symbol s_0_3[2] = {0xD0, 0xB2}; -static symbol s_0_4[4] = {0xD1, 0x8B, 0xD0, 0xB2}; -static symbol s_0_5[4] = {0xD0, 0xB8, 0xD0, 0xB2}; -static symbol s_0_6[6] = {0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8}; -static symbol s_0_7[8] = {0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8}; -static symbol s_0_8[8] = {0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8}; - -static struct among a_0[9] = -{ - /* 0 */ {10, s_0_0, -1, 1, 0}, - /* 1 */ {12, s_0_1, 0, 2, 0}, - /* 2 */ {12, s_0_2, 0, 2, 0}, - /* 3 */ {2, s_0_3, -1, 1, 0}, - /* 4 */ {4, s_0_4, 3, 2, 0}, - /* 5 */ {4, s_0_5, 3, 2, 0}, - /* 6 */ {6, s_0_6, -1, 1, 0}, - /* 7 */ {8, s_0_7, 6, 2, 0}, - /* 8 */ {8, s_0_8, 6, 2, 0} -}; - -static symbol s_1_0[6] = {0xD0, 0xB5, 0xD0, 0xBC, 0xD1, 0x83}; -static symbol s_1_1[6] = {0xD0, 0xBE, 0xD0, 0xBC, 0xD1, 0x83}; -static symbol s_1_2[4] = {0xD1, 0x8B, 0xD1, 0x85}; -static symbol s_1_3[4] = {0xD0, 0xB8, 0xD1, 0x85}; -static symbol s_1_4[4] = {0xD1, 0x83, 0xD1, 0x8E}; -static symbol s_1_5[4] = {0xD1, 0x8E, 0xD1, 0x8E}; -static symbol s_1_6[4] = {0xD0, 0xB5, 0xD1, 0x8E}; -static symbol s_1_7[4] = {0xD0, 0xBE, 0xD1, 0x8E}; -static symbol s_1_8[4] = {0xD1, 0x8F, 0xD1, 0x8F}; -static symbol s_1_9[4] = {0xD0, 0xB0, 0xD1, 0x8F}; -static symbol s_1_10[4] = {0xD1, 0x8B, 0xD0, 0xB5}; -static symbol s_1_11[4] = {0xD0, 0xB5, 0xD0, 0xB5}; -static symbol s_1_12[4] = {0xD0, 0xB8, 0xD0, 0xB5}; -static symbol s_1_13[4] = {0xD0, 0xBE, 0xD0, 0xB5}; -static symbol s_1_14[6] = {0xD1, 0x8B, 0xD0, 0xBC, 0xD0, 0xB8}; -static symbol s_1_15[6] = {0xD0, 0xB8, 0xD0, 0xBC, 0xD0, 0xB8}; -static symbol s_1_16[4] = {0xD1, 0x8B, 0xD0, 0xB9}; -static symbol s_1_17[4] = {0xD0, 0xB5, 0xD0, 0xB9}; -static symbol s_1_18[4] = {0xD0, 0xB8, 0xD0, 0xB9}; -static symbol s_1_19[4] = {0xD0, 0xBE, 0xD0, 0xB9}; -static symbol s_1_20[4] = {0xD1, 0x8B, 0xD0, 0xBC}; -static symbol s_1_21[4] = {0xD0, 0xB5, 0xD0, 0xBC}; -static symbol s_1_22[4] = {0xD0, 0xB8, 0xD0, 0xBC}; -static symbol s_1_23[4] = {0xD0, 0xBE, 0xD0, 0xBC}; -static symbol s_1_24[6] = {0xD0, 0xB5, 0xD0, 0xB3, 0xD0, 0xBE}; -static symbol s_1_25[6] = {0xD0, 0xBE, 0xD0, 0xB3, 0xD0, 0xBE}; - -static struct among a_1[26] = -{ - /* 0 */ {6, s_1_0, -1, 1, 0}, - /* 1 */ {6, s_1_1, -1, 1, 0}, - /* 2 */ {4, s_1_2, -1, 1, 0}, - /* 3 */ {4, s_1_3, -1, 1, 0}, - /* 4 */ {4, s_1_4, -1, 1, 0}, - /* 5 */ {4, s_1_5, -1, 1, 0}, - /* 6 */ {4, s_1_6, -1, 1, 0}, - /* 7 */ {4, s_1_7, -1, 1, 0}, - /* 8 */ {4, s_1_8, -1, 1, 0}, - /* 9 */ {4, s_1_9, -1, 1, 0}, - /* 10 */ {4, s_1_10, -1, 1, 0}, - /* 11 */ {4, s_1_11, -1, 1, 0}, - /* 12 */ {4, s_1_12, -1, 1, 0}, - /* 13 */ {4, s_1_13, -1, 1, 0}, - /* 14 */ {6, s_1_14, -1, 1, 0}, - /* 15 */ {6, s_1_15, -1, 1, 0}, - /* 16 */ {4, s_1_16, -1, 1, 0}, - /* 17 */ {4, s_1_17, -1, 1, 0}, - /* 18 */ {4, s_1_18, -1, 1, 0}, - /* 19 */ {4, s_1_19, -1, 1, 0}, - /* 20 */ {4, s_1_20, -1, 1, 0}, - /* 21 */ {4, s_1_21, -1, 1, 0}, - /* 22 */ {4, s_1_22, -1, 1, 0}, - /* 23 */ {4, s_1_23, -1, 1, 0}, - /* 24 */ {6, s_1_24, -1, 1, 0}, - /* 25 */ {6, s_1_25, -1, 1, 0} -}; - -static symbol s_2_0[4] = {0xD0, 0xB2, 0xD1, 0x88}; -static symbol s_2_1[6] = {0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88}; -static symbol s_2_2[6] = {0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88}; -static symbol s_2_3[2] = {0xD1, 0x89}; -static symbol s_2_4[4] = {0xD1, 0x8E, 0xD1, 0x89}; -static symbol s_2_5[6] = {0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x89}; -static symbol s_2_6[4] = {0xD0, 0xB5, 0xD0, 0xBC}; -static symbol s_2_7[4] = {0xD0, 0xBD, 0xD0, 0xBD}; - -static struct among a_2[8] = -{ - /* 0 */ {4, s_2_0, -1, 1, 0}, - /* 1 */ {6, s_2_1, 0, 2, 0}, - /* 2 */ {6, s_2_2, 0, 2, 0}, - /* 3 */ {2, s_2_3, -1, 1, 0}, - /* 4 */ {4, s_2_4, 3, 1, 0}, - /* 5 */ {6, s_2_5, 4, 2, 0}, - /* 6 */ {4, s_2_6, -1, 1, 0}, - /* 7 */ {4, s_2_7, -1, 1, 0} -}; - -static symbol s_3_0[4] = {0xD1, 0x81, 0xD1, 0x8C}; -static symbol s_3_1[4] = {0xD1, 0x81, 0xD1, 0x8F}; - -static struct among a_3[2] = -{ - /* 0 */ {4, s_3_0, -1, 1, 0}, - /* 1 */ {4, s_3_1, -1, 1, 0} -}; - -static symbol s_4_0[4] = {0xD1, 0x8B, 0xD1, 0x82}; -static symbol s_4_1[4] = {0xD1, 0x8E, 0xD1, 0x82}; -static symbol s_4_2[6] = {0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x82}; -static symbol s_4_3[4] = {0xD1, 0x8F, 0xD1, 0x82}; -static symbol s_4_4[4] = {0xD0, 0xB5, 0xD1, 0x82}; -static symbol s_4_5[6] = {0xD1, 0x83, 0xD0, 0xB5, 0xD1, 0x82}; -static symbol s_4_6[4] = {0xD0, 0xB8, 0xD1, 0x82}; -static symbol s_4_7[4] = {0xD0, 0xBD, 0xD1, 0x8B}; -static symbol s_4_8[6] = {0xD0, 0xB5, 0xD0, 0xBD, 0xD1, 0x8B}; -static symbol s_4_9[4] = {0xD1, 0x82, 0xD1, 0x8C}; -static symbol s_4_10[6] = {0xD1, 0x8B, 0xD1, 0x82, 0xD1, 0x8C}; -static symbol s_4_11[6] = {0xD0, 0xB8, 0xD1, 0x82, 0xD1, 0x8C}; -static symbol s_4_12[6] = {0xD0, 0xB5, 0xD1, 0x88, 0xD1, 0x8C}; -static symbol s_4_13[6] = {0xD0, 0xB8, 0xD1, 0x88, 0xD1, 0x8C}; -static symbol s_4_14[2] = {0xD1, 0x8E}; -static symbol s_4_15[4] = {0xD1, 0x83, 0xD1, 0x8E}; -static symbol s_4_16[4] = {0xD0, 0xBB, 0xD0, 0xB0}; -static symbol s_4_17[6] = {0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB0}; -static symbol s_4_18[6] = {0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB0}; -static symbol s_4_19[4] = {0xD0, 0xBD, 0xD0, 0xB0}; -static symbol s_4_20[6] = {0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xB0}; -static symbol s_4_21[6] = {0xD0, 0xB5, 0xD1, 0x82, 0xD0, 0xB5}; -static symbol s_4_22[6] = {0xD0, 0xB8, 0xD1, 0x82, 0xD0, 0xB5}; -static symbol s_4_23[6] = {0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5}; -static symbol s_4_24[8] = {0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5}; -static symbol s_4_25[8] = {0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5}; -static symbol s_4_26[4] = {0xD0, 0xBB, 0xD0, 0xB8}; -static symbol s_4_27[6] = {0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB8}; -static symbol s_4_28[6] = {0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB8}; -static symbol s_4_29[2] = {0xD0, 0xB9}; -static symbol s_4_30[4] = {0xD1, 0x83, 0xD0, 0xB9}; -static symbol s_4_31[4] = {0xD0, 0xB5, 0xD0, 0xB9}; -static symbol s_4_32[2] = {0xD0, 0xBB}; -static symbol s_4_33[4] = {0xD1, 0x8B, 0xD0, 0xBB}; -static symbol s_4_34[4] = {0xD0, 0xB8, 0xD0, 0xBB}; -static symbol s_4_35[4] = {0xD1, 0x8B, 0xD0, 0xBC}; -static symbol s_4_36[4] = {0xD0, 0xB5, 0xD0, 0xBC}; -static symbol s_4_37[4] = {0xD0, 0xB8, 0xD0, 0xBC}; -static symbol s_4_38[2] = {0xD0, 0xBD}; -static symbol s_4_39[4] = {0xD0, 0xB5, 0xD0, 0xBD}; -static symbol s_4_40[4] = {0xD0, 0xBB, 0xD0, 0xBE}; -static symbol s_4_41[6] = {0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xBE}; -static symbol s_4_42[6] = {0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xBE}; -static symbol s_4_43[4] = {0xD0, 0xBD, 0xD0, 0xBE}; -static symbol s_4_44[6] = {0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xBE}; -static symbol s_4_45[6] = {0xD0, 0xBD, 0xD0, 0xBD, 0xD0, 0xBE}; - -static struct among a_4[46] = -{ - /* 0 */ {4, s_4_0, -1, 2, 0}, - /* 1 */ {4, s_4_1, -1, 1, 0}, - /* 2 */ {6, s_4_2, 1, 2, 0}, - /* 3 */ {4, s_4_3, -1, 2, 0}, - /* 4 */ {4, s_4_4, -1, 1, 0}, - /* 5 */ {6, s_4_5, 4, 2, 0}, - /* 6 */ {4, s_4_6, -1, 2, 0}, - /* 7 */ {4, s_4_7, -1, 1, 0}, - /* 8 */ {6, s_4_8, 7, 2, 0}, - /* 9 */ {4, s_4_9, -1, 1, 0}, - /* 10 */ {6, s_4_10, 9, 2, 0}, - /* 11 */ {6, s_4_11, 9, 2, 0}, - /* 12 */ {6, s_4_12, -1, 1, 0}, - /* 13 */ {6, s_4_13, -1, 2, 0}, - /* 14 */ {2, s_4_14, -1, 2, 0}, - /* 15 */ {4, s_4_15, 14, 2, 0}, - /* 16 */ {4, s_4_16, -1, 1, 0}, - /* 17 */ {6, s_4_17, 16, 2, 0}, - /* 18 */ {6, s_4_18, 16, 2, 0}, - /* 19 */ {4, s_4_19, -1, 1, 0}, - /* 20 */ {6, s_4_20, 19, 2, 0}, - /* 21 */ {6, s_4_21, -1, 1, 0}, - /* 22 */ {6, s_4_22, -1, 2, 0}, - /* 23 */ {6, s_4_23, -1, 1, 0}, - /* 24 */ {8, s_4_24, 23, 2, 0}, - /* 25 */ {8, s_4_25, 23, 2, 0}, - /* 26 */ {4, s_4_26, -1, 1, 0}, - /* 27 */ {6, s_4_27, 26, 2, 0}, - /* 28 */ {6, s_4_28, 26, 2, 0}, - /* 29 */ {2, s_4_29, -1, 1, 0}, - /* 30 */ {4, s_4_30, 29, 2, 0}, - /* 31 */ {4, s_4_31, 29, 2, 0}, - /* 32 */ {2, s_4_32, -1, 1, 0}, - /* 33 */ {4, s_4_33, 32, 2, 0}, - /* 34 */ {4, s_4_34, 32, 2, 0}, - /* 35 */ {4, s_4_35, -1, 2, 0}, - /* 36 */ {4, s_4_36, -1, 1, 0}, - /* 37 */ {4, s_4_37, -1, 2, 0}, - /* 38 */ {2, s_4_38, -1, 1, 0}, - /* 39 */ {4, s_4_39, 38, 2, 0}, - /* 40 */ {4, s_4_40, -1, 1, 0}, - /* 41 */ {6, s_4_41, 40, 2, 0}, - /* 42 */ {6, s_4_42, 40, 2, 0}, - /* 43 */ {4, s_4_43, -1, 1, 0}, - /* 44 */ {6, s_4_44, 43, 2, 0}, - /* 45 */ {6, s_4_45, 43, 1, 0} -}; - -static symbol s_5_0[2] = {0xD1, 0x83}; -static symbol s_5_1[4] = {0xD1, 0x8F, 0xD1, 0x85}; -static symbol s_5_2[6] = {0xD0, 0xB8, 0xD1, 0x8F, 0xD1, 0x85}; -static symbol s_5_3[4] = {0xD0, 0xB0, 0xD1, 0x85}; -static symbol s_5_4[2] = {0xD1, 0x8B}; -static symbol s_5_5[2] = {0xD1, 0x8C}; -static symbol s_5_6[2] = {0xD1, 0x8E}; -static symbol s_5_7[4] = {0xD1, 0x8C, 0xD1, 0x8E}; -static symbol s_5_8[4] = {0xD0, 0xB8, 0xD1, 0x8E}; -static symbol s_5_9[2] = {0xD1, 0x8F}; -static symbol s_5_10[4] = {0xD1, 0x8C, 0xD1, 0x8F}; -static symbol s_5_11[4] = {0xD0, 0xB8, 0xD1, 0x8F}; -static symbol s_5_12[2] = {0xD0, 0xB0}; -static symbol s_5_13[4] = {0xD0, 0xB5, 0xD0, 0xB2}; -static symbol s_5_14[4] = {0xD0, 0xBE, 0xD0, 0xB2}; -static symbol s_5_15[2] = {0xD0, 0xB5}; -static symbol s_5_16[4] = {0xD1, 0x8C, 0xD0, 0xB5}; -static symbol s_5_17[4] = {0xD0, 0xB8, 0xD0, 0xB5}; -static symbol s_5_18[2] = {0xD0, 0xB8}; -static symbol s_5_19[4] = {0xD0, 0xB5, 0xD0, 0xB8}; -static symbol s_5_20[4] = {0xD0, 0xB8, 0xD0, 0xB8}; -static symbol s_5_21[6] = {0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8}; -static symbol s_5_22[8] = {0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8}; -static symbol s_5_23[6] = {0xD0, 0xB0, 0xD0, 0xBC, 0xD0, 0xB8}; -static symbol s_5_24[2] = {0xD0, 0xB9}; -static symbol s_5_25[4] = {0xD0, 0xB5, 0xD0, 0xB9}; -static symbol s_5_26[6] = {0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xB9}; -static symbol s_5_27[4] = {0xD0, 0xB8, 0xD0, 0xB9}; -static symbol s_5_28[4] = {0xD0, 0xBE, 0xD0, 0xB9}; -static symbol s_5_29[4] = {0xD1, 0x8F, 0xD0, 0xBC}; -static symbol s_5_30[6] = {0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC}; -static symbol s_5_31[4] = {0xD0, 0xB0, 0xD0, 0xBC}; -static symbol s_5_32[4] = {0xD0, 0xB5, 0xD0, 0xBC}; -static symbol s_5_33[6] = {0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xBC}; -static symbol s_5_34[4] = {0xD0, 0xBE, 0xD0, 0xBC}; -static symbol s_5_35[2] = {0xD0, 0xBE}; - -static struct among a_5[36] = -{ - /* 0 */ {2, s_5_0, -1, 1, 0}, - /* 1 */ {4, s_5_1, -1, 1, 0}, - /* 2 */ {6, s_5_2, 1, 1, 0}, - /* 3 */ {4, s_5_3, -1, 1, 0}, - /* 4 */ {2, s_5_4, -1, 1, 0}, - /* 5 */ {2, s_5_5, -1, 1, 0}, - /* 6 */ {2, s_5_6, -1, 1, 0}, - /* 7 */ {4, s_5_7, 6, 1, 0}, - /* 8 */ {4, s_5_8, 6, 1, 0}, - /* 9 */ {2, s_5_9, -1, 1, 0}, - /* 10 */ {4, s_5_10, 9, 1, 0}, - /* 11 */ {4, s_5_11, 9, 1, 0}, - /* 12 */ {2, s_5_12, -1, 1, 0}, - /* 13 */ {4, s_5_13, -1, 1, 0}, - /* 14 */ {4, s_5_14, -1, 1, 0}, - /* 15 */ {2, s_5_15, -1, 1, 0}, - /* 16 */ {4, s_5_16, 15, 1, 0}, - /* 17 */ {4, s_5_17, 15, 1, 0}, - /* 18 */ {2, s_5_18, -1, 1, 0}, - /* 19 */ {4, s_5_19, 18, 1, 0}, - /* 20 */ {4, s_5_20, 18, 1, 0}, - /* 21 */ {6, s_5_21, 18, 1, 0}, - /* 22 */ {8, s_5_22, 21, 1, 0}, - /* 23 */ {6, s_5_23, 18, 1, 0}, - /* 24 */ {2, s_5_24, -1, 1, 0}, - /* 25 */ {4, s_5_25, 24, 1, 0}, - /* 26 */ {6, s_5_26, 25, 1, 0}, - /* 27 */ {4, s_5_27, 24, 1, 0}, - /* 28 */ {4, s_5_28, 24, 1, 0}, - /* 29 */ {4, s_5_29, -1, 1, 0}, - /* 30 */ {6, s_5_30, 29, 1, 0}, - /* 31 */ {4, s_5_31, -1, 1, 0}, - /* 32 */ {4, s_5_32, -1, 1, 0}, - /* 33 */ {6, s_5_33, 32, 1, 0}, - /* 34 */ {4, s_5_34, -1, 1, 0}, - /* 35 */ {2, s_5_35, -1, 1, 0} -}; - -static symbol s_6_0[6] = {0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82}; -static symbol s_6_1[8] = {0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82, 0xD1, 0x8C}; - -static struct among a_6[2] = -{ - /* 0 */ {6, s_6_0, -1, 1, 0}, - /* 1 */ {8, s_6_1, -1, 1, 0} -}; - -static symbol s_7_0[6] = {0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88}; -static symbol s_7_1[2] = {0xD1, 0x8C}; -static symbol s_7_2[8] = {0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88, 0xD0, 0xB5}; -static symbol s_7_3[2] = {0xD0, 0xBD}; - -static struct among a_7[4] = -{ - /* 0 */ {6, s_7_0, -1, 1, 0}, - /* 1 */ {2, s_7_1, -1, 3, 0}, - /* 2 */ {8, s_7_2, -1, 1, 0}, - /* 3 */ {2, s_7_3, -1, 2, 0} -}; - -static unsigned char g_v[] = {33, 65, 8, 232}; - -static symbol s_0[] = {0xD0, 0xB0}; -static symbol s_1[] = {0xD1, 0x8F}; -static symbol s_2[] = {0xD0, 0xB0}; -static symbol s_3[] = {0xD1, 0x8F}; -static symbol s_4[] = {0xD0, 0xB0}; -static symbol s_5[] = {0xD1, 0x8F}; -static symbol s_6[] = {0xD0, 0xBD}; -static symbol s_7[] = {0xD0, 0xBD}; -static symbol s_8[] = {0xD0, 0xBD}; -static symbol s_9[] = {0xD0, 0xB8}; - -static int -r_mark_regions(struct SN_env * z) -{ - z->I[0] = z->l; - z->I[1] = z->l; - { - int c = z->c; /* do, line 61 */ - - while (1) - { /* gopast, line 62 */ - if (!(in_grouping_U(z, g_v, 1072, 1103))) - goto lab1; - break; - lab1: - { - int c = skip_utf8(z->p, z->c, 0, z->l, 1); - - if (c < 0) - goto lab0; - z->c = c; /* gopast, line 62 */ - } - } - z->I[0] = z->c; /* setmark pV, line 62 */ - while (1) - { /* gopast, line 62 */ - if (!(out_grouping_U(z, g_v, 1072, 1103))) - goto lab2; - break; - lab2: - { - int c = skip_utf8(z->p, z->c, 0, z->l, 1); - - if (c < 0) - goto lab0; - z->c = c; /* gopast, line 62 */ - } - } - while (1) - { /* gopast, line 63 */ - if (!(in_grouping_U(z, g_v, 1072, 1103))) - goto lab3; - break; - lab3: - { - int c = skip_utf8(z->p, z->c, 0, z->l, 1); - - if (c < 0) - goto lab0; - z->c = c; /* gopast, line 63 */ - } - } - while (1) - { /* gopast, line 63 */ - if (!(out_grouping_U(z, g_v, 1072, 1103))) - goto lab4; - break; - lab4: - { - int c = skip_utf8(z->p, z->c, 0, z->l, 1); - - if (c < 0) - goto lab0; - z->c = c; /* gopast, line 63 */ - } - } - z->I[1] = z->c; /* setmark p2, line 63 */ -lab0: - z->c = c; - } - return 1; -} - -static int -r_R2(struct SN_env * z) -{ - if (!(z->I[1] <= z->c)) - return 0; - return 1; -} - -static int -r_perfective_gerund(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 72 */ - among_var = find_among_b(z, a_0, 9); /* substring, line 72 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 72 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int m = z->l - z->c; - - (void) m; /* or, line 76 */ - if (!(eq_s_b(z, 2, s_0))) - goto lab1; - goto lab0; - lab1: - z->c = z->l - m; - if (!(eq_s_b(z, 2, s_1))) - return 0; - } - lab0: - { - int ret; - - ret = slice_del(z); /* delete, line 76 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret; - - ret = slice_del(z); /* delete, line 83 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_adjective(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 88 */ - among_var = find_among_b(z, a_1, 26); /* substring, line 88 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 88 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 97 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_adjectival(struct SN_env * z) -{ - int among_var; - - { - int ret = r_adjective(z); - - if (ret == 0) - return 0; /* call adjective, line 102 */ - if (ret < 0) - return ret; - } - { - int m = z->l - z->c; - - (void) m; /* try, line 109 */ - z->ket = z->c; /* [, line 110 */ - among_var = find_among_b(z, a_2, 8); /* substring, line 110 */ - if (!(among_var)) - { - z->c = z->l - m; - goto lab0; - } - z->bra = z->c; /* ], line 110 */ - switch (among_var) - { - case 0: - { - z->c = z->l - m; - goto lab0; - } - case 1: - { - int m = z->l - z->c; - - (void) m; /* or, line 115 */ - if (!(eq_s_b(z, 2, s_2))) - goto lab2; - goto lab1; - lab2: - z->c = z->l - m; - if (!(eq_s_b(z, 2, s_3))) - { - z->c = z->l - m; - goto lab0; - } - } - lab1: - { - int ret; - - ret = slice_del(z); /* delete, line 115 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret; - - ret = slice_del(z); /* delete, line 122 */ - if (ret < 0) - return ret; - } - break; - } -lab0: - ; - } - return 1; -} - -static int -r_reflexive(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 129 */ - among_var = find_among_b(z, a_3, 2); /* substring, line 129 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 129 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 132 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_verb(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 137 */ - among_var = find_among_b(z, a_4, 46); /* substring, line 137 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 137 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int m = z->l - z->c; - - (void) m; /* or, line 143 */ - if (!(eq_s_b(z, 2, s_4))) - goto lab1; - goto lab0; - lab1: - z->c = z->l - m; - if (!(eq_s_b(z, 2, s_5))) - return 0; - } - lab0: - { - int ret; - - ret = slice_del(z); /* delete, line 143 */ - if (ret < 0) - return ret; - } - break; - case 2: - { - int ret; - - ret = slice_del(z); /* delete, line 151 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_noun(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 160 */ - among_var = find_among_b(z, a_5, 36); /* substring, line 160 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 160 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 167 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_derivational(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 176 */ - among_var = find_among_b(z, a_6, 2); /* substring, line 176 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 176 */ - { - int ret = r_R2(z); - - if (ret == 0) - return 0; /* call R2, line 176 */ - if (ret < 0) - return ret; - } - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 179 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -static int -r_tidy_up(struct SN_env * z) -{ - int among_var; - - z->ket = z->c; /* [, line 184 */ - among_var = find_among_b(z, a_7, 4); /* substring, line 184 */ - if (!(among_var)) - return 0; - z->bra = z->c; /* ], line 184 */ - switch (among_var) - { - case 0: - return 0; - case 1: - { - int ret; - - ret = slice_del(z); /* delete, line 188 */ - if (ret < 0) - return ret; - } - z->ket = z->c; /* [, line 189 */ - if (!(eq_s_b(z, 2, s_6))) - return 0; - z->bra = z->c; /* ], line 189 */ - if (!(eq_s_b(z, 2, s_7))) - return 0; - { - int ret; - - ret = slice_del(z); /* delete, line 189 */ - if (ret < 0) - return ret; - } - break; - case 2: - if (!(eq_s_b(z, 2, s_8))) - return 0; - { - int ret; - - ret = slice_del(z); /* delete, line 192 */ - if (ret < 0) - return ret; - } - break; - case 3: - { - int ret; - - ret = slice_del(z); /* delete, line 194 */ - if (ret < 0) - return ret; - } - break; - } - return 1; -} - -extern int -russian_UTF_8_stem(struct SN_env * z) -{ - { - int c = z->c; /* do, line 201 */ - - { - int ret = r_mark_regions(z); - - if (ret == 0) - goto lab0; /* call mark_regions, line 201 */ - if (ret < 0) - return ret; - } -lab0: - z->c = c; - } - z->lb = z->c; - z->c = z->l; /* backwards, line 202 */ - - { - int m3; /* setlimit, line 202 */ - int m = z->l - z->c; - - (void) m; - if (z->c < z->I[0]) - return 0; - z->c = z->I[0]; /* tomark, line 202 */ - m3 = z->lb; - z->lb = z->c; - z->c = z->l - m; - { - int m = z->l - z->c; - - (void) m; /* do, line 203 */ - { - int m = z->l - z->c; - - (void) m; /* or, line 204 */ - { - int ret = r_perfective_gerund(z); - - if (ret == 0) - goto lab3; /* call perfective_gerund, line 204 */ - if (ret < 0) - return ret; - } - goto lab2; - lab3: - z->c = z->l - m; - { - int m = z->l - z->c; - - (void) m; /* try, line 205 */ - { - int ret = r_reflexive(z); - - if (ret == 0) - { - z->c = z->l - m; - goto lab4; - } /* call reflexive, line 205 */ - if (ret < 0) - return ret; - } - lab4: - ; - } - { - int m = z->l - z->c; - - (void) m; /* or, line 206 */ - { - int ret = r_adjectival(z); - - if (ret == 0) - goto lab6; /* call adjectival, line 206 */ - if (ret < 0) - return ret; - } - goto lab5; - lab6: - z->c = z->l - m; - { - int ret = r_verb(z); - - if (ret == 0) - goto lab7; /* call verb, line 206 */ - if (ret < 0) - return ret; - } - goto lab5; - lab7: - z->c = z->l - m; - { - int ret = r_noun(z); - - if (ret == 0) - goto lab1; /* call noun, line 206 */ - if (ret < 0) - return ret; - } - } - lab5: - ; - } - lab2: - lab1: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* try, line 209 */ - z->ket = z->c; /* [, line 209 */ - if (!(eq_s_b(z, 2, s_9))) - { - z->c = z->l - m; - goto lab8; - } - z->bra = z->c; /* ], line 209 */ - { - int ret; - - ret = slice_del(z); /* delete, line 209 */ - if (ret < 0) - return ret; - } - lab8: - ; - } - { - int m = z->l - z->c; - - (void) m; /* do, line 212 */ - { - int ret = r_derivational(z); - - if (ret == 0) - goto lab9; /* call derivational, line 212 */ - if (ret < 0) - return ret; - } - lab9: - z->c = z->l - m; - } - { - int m = z->l - z->c; - - (void) m; /* do, line 213 */ - { - int ret = r_tidy_up(z); - - if (ret == 0) - goto lab10; /* call tidy_up, line 213 */ - if (ret < 0) - return ret; - } - lab10: - z->c = z->l - m; - } - z->lb = m3; - } - z->c = z->lb; - return 1; -} - -extern struct SN_env *russian_UTF_8_create_env(void) -{ - return SN_create_env(0, 2, 0); -} - -extern void russian_UTF_8_close_env(struct SN_env * z) -{ - SN_close_env(z); -} diff --git a/contrib/tsearch2/snowball/russian_stem_UTF8.h b/contrib/tsearch2/snowball/russian_stem_UTF8.h deleted file mode 100644 index 0beb0b9719..0000000000 --- a/contrib/tsearch2/snowball/russian_stem_UTF8.h +++ /dev/null @@ -1,17 +0,0 @@ - -/* This file was generated automatically by the Snowball to ANSI C compiler */ - -#ifdef __cplusplus -extern "C" -{ -#endif - - extern struct SN_env *russian_UTF_8_create_env(void); - extern void russian_UTF_8_close_env(struct SN_env * z); - - extern int russian_UTF_8_stem(struct SN_env * z); - -#ifdef __cplusplus -} - -#endif diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c deleted file mode 100644 index f06e5bb7a1..0000000000 --- a/contrib/tsearch2/snowball/utilities.c +++ /dev/null @@ -1,656 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "header.h" - -#define unless(C) if(!(C)) - -#define CREATE_SIZE 1 - -extern symbol * -create_s(void) -{ - symbol *p; - void *mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)); - - if (mem == NULL) - return NULL; - p = (symbol *) (HEAD + (char *) mem); - CAPACITY(p) = CREATE_SIZE; - SET_SIZE(p, CREATE_SIZE); - return p; -} - -extern void -lose_s(symbol * p) -{ - if (p == NULL) - return; - free((char *) p - HEAD); -} - -/* - new_p = X_skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c - if n +ve, or n characters backwards from p +c - 1 if n -ve. new_p is the new - position, or 0 on failure. - - -- used to implement hop and next in the utf8 case. -*/ - -extern int -skip_utf8(const symbol * p, int c, int lb, int l, int n) -{ - int b; - - if (n >= 0) - { - for (; n > 0; n--) - { - if (c >= l) - return -1; - b = p[c++]; - if (b >= 0xC0) - { /* 1100 0000 */ - while (c < l) - { - b = p[c]; - if (b >= 0xC0 || b < 0x80) - break; - /* break unless b is 10------ */ - c++; - } - } - } - } - else - { - for (; n < 0; n++) - { - if (c <= lb) - return -1; - b = p[--c]; - if (b >= 0x80) - { /* 1000 0000 */ - while (c > lb) - { - b = p[c]; - if (b >= 0xC0) - break; /* 1100 0000 */ - c--; - } - } - } - } - return c; -} - -/* Code for character groupings: utf8 cases */ - -static int -get_utf8(const symbol * p, int c, int l, int *slot) -{ - int b0, - b1; - - if (c >= l) - return 0; - b0 = p[c++]; - if (b0 < 0xC0 || c == l) - { /* 1100 0000 */ - *slot = b0; - return 1; - } - b1 = p[c++]; - if (b0 < 0xE0 || c == l) - { /* 1110 0000 */ - *slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); - return 2; - } - *slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (*p & 0x3F); - return 3; -} - -static int -get_b_utf8(const symbol * p, int c, int lb, int *slot) -{ - int b0, - b1; - - if (c <= lb) - return 0; - b0 = p[--c]; - if (b0 < 0x80 || c == lb) - { /* 1000 0000 */ - *slot = b0; - return 1; - } - b1 = p[--c]; - if (b1 >= 0xC0 || c == lb) - { /* 1100 0000 */ - *slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); - return 2; - } - *slot = (*p & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); - return 3; -} - -extern int -in_grouping_U(struct SN_env * z, unsigned char *s, int min, int max) -{ - int ch; - int w = get_utf8(z->p, z->c, z->l, &ch); - - unless(w) return 0; - if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) - return 0; - z->c += w; - return 1; -} - -extern int -in_grouping_b_U(struct SN_env * z, unsigned char *s, int min, int max) -{ - int ch; - int w = get_b_utf8(z->p, z->c, z->lb, &ch); - - unless(w) return 0; - if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) - return 0; - z->c -= w; - return 1; -} - -extern int -out_grouping_U(struct SN_env * z, unsigned char *s, int min, int max) -{ - int ch; - int w = get_utf8(z->p, z->c, z->l, &ch); - - unless(w) return 0; - unless(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0; - z->c += w; - return 1; -} - -extern int -out_grouping_b_U(struct SN_env * z, unsigned char *s, int min, int max) -{ - int ch; - int w = get_b_utf8(z->p, z->c, z->lb, &ch); - - unless(w) return 0; - unless(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0; - z->c -= w; - return 1; -} - -/* Code for character groupings: non-utf8 cases */ - -extern int -in_grouping(struct SN_env * z, unsigned char *s, int min, int max) -{ - int ch; - - if (z->c >= z->l) - return 0; - ch = z->p[z->c]; - if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) - return 0; - z->c++; - return 1; -} - -extern int -in_grouping_b(struct SN_env * z, unsigned char *s, int min, int max) -{ - int ch; - - if (z->c <= z->lb) - return 0; - ch = z->p[z->c - 1]; - if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) - return 0; - z->c--; - return 1; -} - -extern int -out_grouping(struct SN_env * z, unsigned char *s, int min, int max) -{ - int ch; - - if (z->c >= z->l) - return 0; - ch = z->p[z->c]; - unless(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0; - z->c++; - return 1; -} - -extern int -out_grouping_b(struct SN_env * z, unsigned char *s, int min, int max) -{ - int ch; - - if (z->c <= z->lb) - return 0; - ch = z->p[z->c - 1]; - unless(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0; - z->c--; - return 1; -} - -extern int -eq_s(struct SN_env * z, int s_size, symbol * s) -{ - if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) - return 0; - z->c += s_size; - return 1; -} - -extern int -eq_s_b(struct SN_env * z, int s_size, symbol * s) -{ - if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) - return 0; - z->c -= s_size; - return 1; -} - -extern int -eq_v(struct SN_env * z, symbol * p) -{ - return eq_s(z, SIZE(p), p); -} - -extern int -eq_v_b(struct SN_env * z, symbol * p) -{ - return eq_s_b(z, SIZE(p), p); -} - -extern int -find_among(struct SN_env * z, struct among * v, int v_size) -{ - - int i = 0; - int j = v_size; - - int c = z->c; - int l = z->l; - symbol *q = z->p + c; - - struct among *w; - - int common_i = 0; - int common_j = 0; - - int first_key_inspected = 0; - - while (1) - { - int k = i + ((j - i) >> 1); - int diff = 0; - int common = common_i < common_j ? common_i : common_j; /* smaller */ - - w = v + k; - { - int i; - - for (i = common; i < w->s_size; i++) - { - if (c + common == l) - { - diff = -1; - break; - } - diff = q[common] - w->s[i]; - if (diff != 0) - break; - common++; - } - } - if (diff < 0) - { - j = k; - common_j = common; - } - else - { - i = k; - common_i = common; - } - if (j - i <= 1) - { - if (i > 0) - break; /* v->s has been inspected */ - if (j == i) - break; /* only one item in v */ - - /* - * - but now we need to go round once more to get v->s inspected. - * This looks messy, but is actually the optimal approach. - */ - - if (first_key_inspected) - break; - first_key_inspected = 1; - } - } - while (1) - { - w = v + i; - if (common_i >= w->s_size) - { - z->c = c + w->s_size; - if (w->function == 0) - return w->result; - { - int res = w->function(z); - - z->c = c + w->s_size; - if (res) - return w->result; - } - } - i = w->substring_i; - if (i < 0) - return 0; - } -} - -/* find_among_b is for backwards processing. Same comments apply */ - -extern int -find_among_b(struct SN_env * z, struct among * v, int v_size) -{ - - int i = 0; - int j = v_size; - - int c = z->c; - int lb = z->lb; - symbol *q = z->p + c - 1; - - struct among *w; - - int common_i = 0; - int common_j = 0; - - int first_key_inspected = 0; - - while (1) - { - int k = i + ((j - i) >> 1); - int diff = 0; - int common = common_i < common_j ? common_i : common_j; - - w = v + k; - { - int i; - - for (i = w->s_size - 1 - common; i >= 0; i--) - { - if (c - common == lb) - { - diff = -1; - break; - } - diff = q[-common] - w->s[i]; - if (diff != 0) - break; - common++; - } - } - if (diff < 0) - { - j = k; - common_j = common; - } - else - { - i = k; - common_i = common; - } - if (j - i <= 1) - { - if (i > 0) - break; - if (j == i) - break; - if (first_key_inspected) - break; - first_key_inspected = 1; - } - } - while (1) - { - w = v + i; - if (common_i >= w->s_size) - { - z->c = c - w->s_size; - if (w->function == 0) - return w->result; - { - int res = w->function(z); - - z->c = c - w->s_size; - if (res) - return w->result; - } - } - i = w->substring_i; - if (i < 0) - return 0; - } -} - - -/* Increase the size of the buffer pointed to by p to at least n symbols. - * If insufficient memory, returns NULL and frees the old buffer. - */ -static symbol * -increase_size(symbol * p, int n) -{ - symbol *q; - int new_size = n + 20; - void *mem = realloc((char *) p - HEAD, - HEAD + (new_size + 1) * sizeof(symbol)); - - if (mem == NULL) - { - lose_s(p); - return NULL; - } - q = (symbol *) (HEAD + (char *) mem); - CAPACITY(q) = new_size; - return q; -} - -/* to replace symbols between c_bra and c_ket in z->p by the - s_size symbols at s. - Returns 0 on success, -1 on error. - Also, frees z->p (and sets it to NULL) on error. -*/ -extern int -replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int *adjptr) -{ - int adjustment; - int len; - - if (z->p == NULL) - { - z->p = create_s(); - if (z->p == NULL) - return -1; - } - adjustment = s_size - (c_ket - c_bra); - len = SIZE(z->p); - if (adjustment != 0) - { - if (adjustment + len > CAPACITY(z->p)) - { - z->p = increase_size(z->p, adjustment + len); - if (z->p == NULL) - return -1; - } - memmove(z->p + c_ket + adjustment, - z->p + c_ket, - (len - c_ket) * sizeof(symbol)); - SET_SIZE(z->p, adjustment + len); - z->l += adjustment; - if (z->c >= c_ket) - z->c += adjustment; - else if (z->c > c_bra) - z->c = c_bra; - } - unless(s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); - if (adjptr != NULL) - *adjptr = adjustment; - return 0; -} - -static int -slice_check(struct SN_env * z) -{ - - if (z->bra < 0 || - z->bra > z->ket || - z->ket > z->l || - z->p == NULL || - z->l > SIZE(z->p)) /* this line could be removed */ - { -#if 0 - fprintf(stderr, "faulty slice operation:\n"); - debug(z, -1, 0); -#endif - return -1; - } - return 0; -} - -extern int -slice_from_s(struct SN_env * z, int s_size, symbol * s) -{ - if (slice_check(z)) - return -1; - return replace_s(z, z->bra, z->ket, s_size, s, NULL); -} - -extern int -slice_from_v(struct SN_env * z, symbol * p) -{ - return slice_from_s(z, SIZE(p), p); -} - -extern int -slice_del(struct SN_env * z) -{ - return slice_from_s(z, 0, 0); -} - -extern int -insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s) -{ - int adjustment; - - if (replace_s(z, bra, ket, s_size, s, &adjustment)) - return -1; - if (bra <= z->bra) - z->bra += adjustment; - if (bra <= z->ket) - z->ket += adjustment; - return 0; -} - -extern int -insert_v(struct SN_env * z, int bra, int ket, symbol * p) -{ - int adjustment; - - if (replace_s(z, bra, ket, SIZE(p), p, &adjustment)) - return -1; - if (bra <= z->bra) - z->bra += adjustment; - if (bra <= z->ket) - z->ket += adjustment; - return 0; -} - -extern symbol * -slice_to(struct SN_env * z, symbol * p) -{ - if (slice_check(z)) - { - lose_s(p); - return NULL; - } - { - int len = z->ket - z->bra; - - if (CAPACITY(p) < len) - { - p = increase_size(p, len); - if (p == NULL) - return NULL; - } - memmove(p, z->p + z->bra, len * sizeof(symbol)); - SET_SIZE(p, len); - } - return p; -} - -extern symbol * -assign_to(struct SN_env * z, symbol * p) -{ - int len = z->l; - - if (CAPACITY(p) < len) - { - p = increase_size(p, len); - if (p == NULL) - return NULL; - } - memmove(p, z->p, len * sizeof(symbol)); - SET_SIZE(p, len); - return p; -} - -#if 0 -extern void -debug(struct SN_env * z, int number, int line_count) -{ - int i; - int limit = SIZE(z->p); - - /* if (number >= 0) printf("%3d (line %4d): '", number, line_count); */ - if (number >= 0) - printf("%3d (line %4d): [%d]'", number, line_count, limit); - for (i = 0; i <= limit; i++) - { - if (z->lb == i) - printf("{"); - if (z->bra == i) - printf("["); - if (z->c == i) - printf("|"); - if (z->ket == i) - printf("]"); - if (z->l == i) - printf("}"); - if (i < limit) - { - int ch = z->p[i]; - - if (ch == 0) - ch = '#'; - printf("%c", ch); - } - } - printf("'\n"); -} - -#endif diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql index c6ed880b47..a7f8e8d267 100644 --- a/contrib/tsearch2/sql/tsearch2.sql +++ b/contrib/tsearch2/sql/tsearch2.sql @@ -88,9 +88,9 @@ Moscow moskva | moscow \set ECHO all alter table test_tsquery add column keyword tsquery; -update test_tsquery set keyword = to_tsquery('default', txtkeyword); +update test_tsquery set keyword = to_tsquery('english', txtkeyword); alter table test_tsquery add column sample tsquery; -update test_tsquery set sample = to_tsquery('default', txtsample::text); +update test_tsquery set sample = to_tsquery('english', txtsample::text); create unique index bt_tsq on test_tsquery (keyword); @@ -116,21 +116,10 @@ select rewrite('moscow', 'select keyword, sample from test_tsquery'::text ); select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::text ); select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text ); -select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery; -select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery; -select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery; - - select keyword from test_tsquery where keyword @> 'new'; select keyword from test_tsquery where keyword @> 'moscow'; select keyword from test_tsquery where keyword <@ 'new'; select keyword from test_tsquery where keyword <@ 'moscow'; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword <@ query; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword <@ query; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword <@ query; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @> keyword; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @> keyword; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @> keyword; create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops); set enable_seqscan='off'; @@ -139,48 +128,43 @@ select keyword from test_tsquery where keyword @> 'new'; select keyword from test_tsquery where keyword @> 'moscow'; select keyword from test_tsquery where keyword <@ 'new'; select keyword from test_tsquery where keyword <@ 'moscow'; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword <@ query; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword <@ query; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword <@ query; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @> keyword; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @> keyword; -select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @> keyword; + set enable_seqscan='on'; select lexize('simple', 'ASD56 hsdkf'); -select lexize('en_stem', 'SKIES Problems identity'); +select lexize('english_stem', 'SKIES Problems identity'); select * from token_type('default'); select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> /usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 <i <b> wow < jqw <> qwerty'); -SELECT to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> +SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> /usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 <i <b> wow < jqw <> qwerty'); -SELECT length(to_tsvector('default', '345 qw')); +SELECT length(to_tsvector('english', '345 qw')); -SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> +SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> /usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 <i <b> wow < jqw <> qwerty')); -select to_tsquery('default', 'qwe & sKies '); +select to_tsquery('english', 'qwe & sKies '); select to_tsquery('simple', 'qwe & sKies '); -select to_tsquery('default', '''the wether'':dc & '' sKies '':BC '); -select to_tsquery('default', 'asd&(and|fghj)'); -select to_tsquery('default', '(asd&and)|fghj'); -select to_tsquery('default', '(asd&!and)|fghj'); -select to_tsquery('default', '(the|and&(i&1))&fghj'); - -select plainto_tsquery('default', 'the and z 1))& fghj'); -select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd'); -select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg'); -select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg'); -select plainto_tsquery('default', 'foo bar') && 'asd | fg'; +select to_tsquery('english', '''the wether'':dc & '' sKies '':BC '); +select to_tsquery('english', 'asd&(and|fghj)'); +select to_tsquery('english', '(asd&and)|fghj'); +select to_tsquery('english', '(asd&!and)|fghj'); +select to_tsquery('english', '(the|and&(i&1))&fghj'); + +select plainto_tsquery('english', 'the and z 1))& fghj'); +select plainto_tsquery('english', 'foo bar') && plainto_tsquery('english', 'asd'); +select plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd fg'); +select plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg'); +select plainto_tsquery('english', 'foo bar') && 'asd | fg'; select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca'; select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B'; @@ -209,7 +193,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; -select set_curcfg('default'); +select set_curcfg('english'); CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON test_tsvector @@ -225,11 +209,7 @@ UPDATE test_tsvector SET t = null WHERE t = '345 qwerty'; SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); -drop trigger tsvectorupdate on test_tsvector; -create function wow(text) returns text as 'select $1 || '' copyright''; ' language sql; -create trigger tsvectorupdate before update or insert on test_tsvector -for each row execute procedure tsearch2(a, wow, t); -insert into test_tsvector (t) values ('345 qwerty'); +insert into test_tsvector (t) values ('345 qwerty copyright'); select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright'); @@ -251,8 +231,7 @@ select * from stat('select a from test_tsvector','c') order by ndoc desc, nentry select * from stat('select a from test_tsvector','d') order by ndoc desc, nentry desc, word; select * from stat('select a from test_tsvector','ad') order by ndoc desc, nentry desc, word; -select reset_tsearch(); -select to_tsquery('default', 'skies & books'); +select to_tsquery('english', 'skies & books'); select rank_cd(to_tsvector('Erosion It took the sea a thousand years, A thousand years to trace @@ -284,36 +263,6 @@ The sculpture of these granite seams, Upon a woman s face. E. J. Pratt (1882 1964) '), to_tsquery('sea')); -select get_covers(to_tsvector('Erosion It took the sea a thousand years, -A thousand years to trace -The granite features of this cliff -In crag and scarp and base. -It took the sea an hour one night -An hour of storm to place -The sculpture of these granite seams, -Upon a woman s face. E. J. Pratt (1882 1964) -'), to_tsquery('sea&thousand&years')); - -select get_covers(to_tsvector('Erosion It took the sea a thousand years, -A thousand years to trace -The granite features of this cliff -In crag and scarp and base. -It took the sea an hour one night -An hour of storm to place -The sculpture of these granite seams, -Upon a woman s face. E. J. Pratt (1882 1964) -'), to_tsquery('granite&sea')); - -select get_covers(to_tsvector('Erosion It took the sea a thousand years, -A thousand years to trace -The granite features of this cliff -In crag and scarp and base. -It took the sea an hour one night -An hour of storm to place -The sculpture of these granite seams, -Upon a woman s face. E. J. Pratt (1882 1964) -'), to_tsquery('sea')); - select headline('Erosion It took the sea a thousand years, A thousand years to trace The granite features of this cliff @@ -359,7 +308,7 @@ ff-bg </html>', to_tsquery('sea&foo'), 'HighlightAll=true'); --check debug -select * from ts_debug('Tsearch module for PostgreSQL 7.3.3'); +select * from public.ts_debug('Tsearch module for PostgreSQL 7.3.3'); --check ordering insert into test_tsvector values (null, null); diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c deleted file mode 100644 index 5f6d56bce3..0000000000 --- a/contrib/tsearch2/stopword.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * stopword library - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include "common.h" -#include "dict.h" -#include "ts_locale.h" - -#define STOPBUFLEN 4096 - -void -freestoplist(StopList * s) -{ - char **ptr = s->stop; - - if (ptr) - while (*ptr && s->len > 0) - { - free(*ptr); - ptr++; - s->len--; - free(s->stop); - } - memset(s, 0, sizeof(StopList)); -} - -void -readstoplist(text *in, StopList * s) -{ - char **stop = NULL; - - s->len = 0; - if (in && VARSIZE(in) - VARHDRSZ > 0) - { - char *filename = to_absfilename(text2char(in)); - FILE *hin; - char buf[STOPBUFLEN], *pbuf; - int reallen = 0; - - if ((hin = fopen(filename, "r")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("could not open file \"%s\": %m", - filename))); - - while (fgets(buf, sizeof(buf), hin)) - { - pbuf = buf; - while( *pbuf && !isspace((unsigned char) *pbuf ) ) - pbuf++; - *pbuf = '\0'; - - pg_verifymbstr(buf, strlen(buf), false); - if (*buf == '\0' || *buf=='\n' || *buf=='\r') - continue; - - if (s->len >= reallen) - { - char **tmp; - - reallen = (reallen) ? reallen * 2 : 16; - tmp = (char **) realloc((void *) stop, sizeof(char *) * reallen); - if (!tmp) - { - freestoplist(s); - fclose(hin); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - stop = tmp; - } - - if (s->wordop) - { - pbuf = s->wordop(buf); - stop[s->len] = strdup(pbuf); - pfree(pbuf); - } else - stop[s->len] = strdup(buf); - - if (!stop[s->len]) - { - freestoplist(s); - fclose(hin); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - - (s->len)++; - } - fclose(hin); - pfree(filename); - } - s->stop = stop; -} - -static int -comparestr(const void *a, const void *b) -{ - return strcmp(*(char **) a, *(char **) b); -} - -void -sortstoplist(StopList * s) -{ - if (s->stop && s->len > 0) - qsort(s->stop, s->len, sizeof(char *), comparestr); -} - -bool -searchstoplist(StopList * s, char *key) -{ - return (s->stop && s->len > 0 && bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false; -} diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop deleted file mode 100644 index a9130116d3..0000000000 --- a/contrib/tsearch2/stopword/english.stop +++ /dev/null @@ -1,128 +0,0 @@ -i -me -my -myself -we -our -ours -ourselves -you -your -yours -yourself -yourselves -he -him -his -himself -she -her -hers -herself -it -its -itself -they -them -their -theirs -themselves -what -which -who -whom -this -that -these -those -am -is -are -was -were -be -been -being -have -has -had -having -do -does -did -doing -a -an -the -and -but -if -or -because -as -until -while -of -at -by -for -with -about -against -between -into -through -during -before -after -above -below -to -from -up -down -in -out -on -off -over -under -again -further -then -once -here -there -when -where -why -how -all -any -both -each -few -more -most -other -some -such -no -nor -not -only -own -same -so -than -too -very -s -t -can -will -just -don -should -now - diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop deleted file mode 100644 index 1877e3ab5b..0000000000 --- a/contrib/tsearch2/stopword/russian.stop +++ /dev/null @@ -1,151 +0,0 @@ -É -× -×Ï -ÎÅ -ÞÔÏ -ÏÎ -ÎÁ -Ñ -Ó -ÓÏ -ËÁË -Á -ÔÏ -×ÓÅ -ÏÎÁ -ÔÁË -ÅÇÏ -ÎÏ -ÄÁ -ÔÙ -Ë -Õ -ÖÅ -×Ù -ÚÁ -ÂÙ -ÐÏ -ÔÏÌØËÏ -ÅÅ -ÍÎÅ -ÂÙÌÏ -×ÏÔ -ÏÔ -ÍÅÎÑ -ÅÝÅ -ÎÅÔ -Ï -ÉÚ -ÅÍÕ -ÔÅÐÅÒØ -ËÏÇÄÁ -ÄÁÖÅ -ÎÕ -×ÄÒÕÇ -ÌÉ -ÅÓÌÉ -ÕÖÅ -ÉÌÉ -ÎÉ -ÂÙÔØ -ÂÙÌ -ÎÅÇÏ -ÄÏ -×ÁÓ -ÎÉÂÕÄØ -ÏÐÑÔØ -ÕÖ -×ÁÍ -×ÅÄØ -ÔÁÍ -ÐÏÔÏÍ -ÓÅÂÑ -ÎÉÞÅÇÏ -ÅÊ -ÍÏÖÅÔ -ÏÎÉ -ÔÕÔ -ÇÄÅ -ÅÓÔØ -ÎÁÄÏ -ÎÅÊ -ÄÌÑ -ÍÙ -ÔÅÂÑ -ÉÈ -ÞÅÍ -ÂÙÌÁ -ÓÁÍ -ÞÔÏ -ÂÅÚ -ÂÕÄÔÏ -ÞÅÇÏ -ÒÁÚ -ÔÏÖÅ -ÓÅÂÅ -ÐÏÄ -ÂÕÄÅÔ -Ö -ÔÏÇÄÁ -ËÔÏ -ÜÔÏÔ -ÔÏÇÏ -ÐÏÔÏÍÕ -ÜÔÏÇÏ -ËÁËÏÊ -ÓÏ×ÓÅÍ -ÎÉÍ -ÚÄÅÓØ -ÜÔÏÍ -ÏÄÉÎ -ÐÏÞÔÉ -ÍÏÊ -ÔÅÍ -ÞÔÏÂÙ -ÎÅÅ -ÓÅÊÞÁÓ -ÂÙÌÉ -ËÕÄÁ -ÚÁÞÅÍ -×ÓÅÈ -ÎÉËÏÇÄÁ -ÍÏÖÎÏ -ÐÒÉ -ÎÁËÏÎÅà -Ä×Á -Ï -ÄÒÕÇÏÊ -ÈÏÔØ -ÐÏÓÌÅ -ÎÁÄ -ÂÏÌØÛÅ -ÔÏÔ -ÞÅÒÅÚ -ÜÔÉ -ÎÁÓ -ÐÒÏ -×ÓÅÇÏ -ÎÉÈ -ËÁËÁÑ -ÍÎÏÇÏ -ÒÁÚ×Å -ÔÒÉ -ÜÔÕ -ÍÏÑ -×ÐÒÏÞÅÍ -ÈÏÒÏÛÏ -Ó×ÏÀ -ÜÔÏÊ -ÐÅÒÅÄ -ÉÎÏÇÄÁ -ÌÕÞÛÅ -ÞÕÔØ -ÔÏÍ -ÎÅÌØÚÑ -ÔÁËÏÊ -ÉÍ -ÂÏÌÅÅ -×ÓÅÇÄÁ -ËÏÎÅÞÎÏ -×ÓÀ -ÍÅÖÄÕ diff --git a/contrib/tsearch2/stopword/russian.stop.utf8 b/contrib/tsearch2/stopword/russian.stop.utf8 deleted file mode 100644 index ecb83d4a7f..0000000000 --- a/contrib/tsearch2/stopword/russian.stop.utf8 +++ /dev/null @@ -1,151 +0,0 @@ -и -в -во -не -что -он -на -Ñ -Ñ -Ñо -как -а -то -вÑе -она -так -его -но -да -Ñ‚Ñ‹ -к -у -же -вы -за -бы -по -только -ее -мне -было -вот -от -Ð¼ÐµÐ½Ñ -еще -нет -о -из -ему -теперь -когда -даже -ну -вдруг -ли -еÑли -уже -или -ни -быть -был -него -до -Ð²Ð°Ñ -нибудь -опÑÑ‚ÑŒ -уж -вам -ведь -там -потом -ÑÐµÐ±Ñ -ничего -ей -может -они -тут -где -еÑÑ‚ÑŒ -надо -ней -Ð´Ð»Ñ -мы -Ñ‚ÐµÐ±Ñ -их -чем -была -Ñам -чтоб -без -будто -чего -раз -тоже -Ñебе -под -будет -ж -тогда -кто -Ñтот -того -потому -Ñтого -какой -ÑовÑем -ним -здеÑÑŒ -Ñтом -один -почти -мой -тем -чтобы -нее -ÑÐµÐ¹Ñ‡Ð°Ñ -были -куда -зачем -вÑех -никогда -можно -при -наконец -два -об -другой -хоть -поÑле -над -больше -тот -через -Ñти -Ð½Ð°Ñ -про -вÑего -них -ÐºÐ°ÐºÐ°Ñ -много -разве -три -Ñту -Ð¼Ð¾Ñ -впрочем -хорошо -Ñвою -Ñтой -перед -иногда -лучше -чуть -том -Ð½ÐµÐ»ÑŒÐ·Ñ -такой -им -более -вÑегда -конечно -вÑÑŽ -между diff --git a/contrib/tsearch2/thesaurus b/contrib/tsearch2/thesaurus deleted file mode 100644 index 95423508c1..0000000000 --- a/contrib/tsearch2/thesaurus +++ /dev/null @@ -1,21 +0,0 @@ -# -# Theasurus config file. Character ':' splits -# string to part, example: -# sample-words : substitute-words -# -# Any substitute-word can be marked by preceding '*' character, -# which means do not lexize this word -# Docs: http://www.sai.msu.su/~megera/oddmuse/index.cgi/Thesaurus_dictionary - -#one two three : *123 -#one two : *12 -#one : *1 -#two : *2 - -#foo bar : blah blah -#f bar : fbar -#e bar : ebar -#g bar bar : gbarbar -#asd:sdffff -#qwerty:qwer wert erty - diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c deleted file mode 100644 index 646ffc1481..0000000000 --- a/contrib/tsearch2/ts_cfg.c +++ /dev/null @@ -1,648 +0,0 @@ -/* - * interface functions to tscfg - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include <ctype.h> -#include <locale.h> - -#include "catalog/pg_type.h" -#include "executor/spi.h" -#include "fmgr.h" -#include "utils/array.h" -#include "utils/memutils.h" - -#include "ts_cfg.h" -#include "dict.h" -#include "wparser.h" -#include "snmap.h" -#include "common.h" -#include "tsvector.h" - -PG_MODULE_MAGIC; - -#define IGNORE_LONGLEXEME 1 - -/*********top interface**********/ - -static Oid current_cfg_id = 0; - -void -init_cfg(Oid id, TSCfgInfo * cfg) -{ - Oid arg[2]; - bool isnull; - Datum pars[2]; - int stat, - i, - j; - text *ptr; - text *prsname = NULL; - char *nsp = get_namespace(TSNSP_FunctionOid); - char buf[1024]; - MemoryContext oldcontext; - void *plan; - - arg[0] = OIDOID; - arg[1] = OIDOID; - pars[0] = ObjectIdGetDatum(id); - pars[1] = ObjectIdGetDatum(id); - - memset(cfg, 0, sizeof(TSCfgInfo)); - SPI_connect(); - - sprintf(buf, "select prs_name from %s.pg_ts_cfg where oid = $1", nsp); - plan = SPI_prepare(buf, 1, arg); - if (!plan) - ts_error(ERROR, "SPI_prepare() failed"); - - stat = SPI_execp(plan, pars, " ", 1); - if (stat < 0) - ts_error(ERROR, "SPI_execp return %d", stat); - if (SPI_processed > 0) - { - prsname = DatumGetTextP(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); - oldcontext = MemoryContextSwitchTo(TopMemoryContext); - prsname = ptextdup(prsname); - MemoryContextSwitchTo(oldcontext); - - cfg->id = id; - } - else - ts_error(ERROR, "No tsearch cfg with id %d", id); - - SPI_freeplan(plan); - - arg[0] = TEXTOID; - sprintf(buf, "select lt.tokid, map.dict_name from %s.pg_ts_cfgmap as map, %s.pg_ts_cfg as cfg, %s.token_type( $1 ) as lt where lt.alias = map.tok_alias and map.ts_name = cfg.ts_name and cfg.oid= $2 order by lt.tokid desc;", nsp, nsp, nsp); - plan = SPI_prepare(buf, 2, arg); - if (!plan) - ts_error(ERROR, "SPI_prepare() failed"); - - pars[0] = PointerGetDatum(prsname); - stat = SPI_execp(plan, pars, " ", 0); - if (stat < 0) - ts_error(ERROR, "SPI_execp return %d", stat); - if (SPI_processed <= 0) - ts_error(ERROR, "No parser with id %d", id); - - for (i = 0; i < SPI_processed; i++) - { - int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull)); - ArrayType *toasted_a = (ArrayType *) PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull)); - ArrayType *a; - - if (!cfg->map) - { - cfg->len = lexid + 1; - cfg->map = (ListDictionary *) malloc(sizeof(ListDictionary) * cfg->len); - if (!cfg->map) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - memset(cfg->map, 0, sizeof(ListDictionary) * cfg->len); - } - - if (isnull) - continue; - - a = (ArrayType *) PointerGetDatum(PG_DETOAST_DATUM(DatumGetPointer(toasted_a))); - - if (ARR_NDIM(a) != 1) - ts_error(ERROR, "Wrong dimension"); - if (ARRNELEMS(a) < 1) - continue; - if (ARR_HASNULL(a)) - ts_error(ERROR, "Array must not contain nulls"); - - cfg->map[lexid].len = ARRNELEMS(a); - cfg->map[lexid].dict_id = (Datum *) malloc(sizeof(Datum) * cfg->map[lexid].len); - if (!cfg->map[lexid].dict_id) - ts_error(ERROR, "No memory"); - - memset(cfg->map[lexid].dict_id, 0, sizeof(Datum) * cfg->map[lexid].len); - ptr = (text *) ARR_DATA_PTR(a); - oldcontext = MemoryContextSwitchTo(TopMemoryContext); - for (j = 0; j < cfg->map[lexid].len; j++) - { - cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr)); - ptr = NEXTVAL(ptr); - } - MemoryContextSwitchTo(oldcontext); - - if (a != toasted_a) - pfree(a); - } - - SPI_freeplan(plan); - SPI_finish(); - cfg->prs_id = name2id_prs(prsname); - pfree(prsname); - pfree(nsp); - for (i = 0; i < cfg->len; i++) - { - for (j = 0; j < cfg->map[i].len; j++) - { - ptr = (text *) DatumGetPointer(cfg->map[i].dict_id[j]); - cfg->map[i].dict_id[j] = ObjectIdGetDatum(name2id_dict(ptr)); - pfree(ptr); - } - } -} - -typedef struct -{ - TSCfgInfo *last_cfg; - int len; - int reallen; - TSCfgInfo *list; - SNMap name2id_map; -} CFGList; - -static CFGList CList = {NULL, 0, 0, NULL, {0, 0, NULL}}; - -void -reset_cfg(void) -{ - freeSNMap(&(CList.name2id_map)); - if (CList.list) - { - int i, - j; - - for (i = 0; i < CList.len; i++) - if (CList.list[i].map) - { - for (j = 0; j < CList.list[i].len; j++) - if (CList.list[i].map[j].dict_id) - free(CList.list[i].map[j].dict_id); - free(CList.list[i].map); - } - free(CList.list); - } - memset(&CList, 0, sizeof(CFGList)); -} - -static int -comparecfg(const void *a, const void *b) -{ - if (((TSCfgInfo *) a)->id == ((TSCfgInfo *) b)->id) - return 0; - return (((TSCfgInfo *) a)->id < ((TSCfgInfo *) b)->id) ? -1 : 1; -} - -TSCfgInfo * -findcfg(Oid id) -{ - /* last used cfg */ - if (CList.last_cfg && CList.last_cfg->id == id) - return CList.last_cfg; - - /* already used cfg */ - if (CList.len != 0) - { - TSCfgInfo key; - - key.id = id; - CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg); - if (CList.last_cfg != NULL) - return CList.last_cfg; - } - - /* last chance */ - if (CList.len == CList.reallen) - { - TSCfgInfo *tmp; - int reallen = (CList.reallen) ? 2 * CList.reallen : 16; - - tmp = (TSCfgInfo *) realloc(CList.list, sizeof(TSCfgInfo) * reallen); - if (!tmp) - ts_error(ERROR, "No memory"); - CList.reallen = reallen; - CList.list = tmp; - } - init_cfg(id, &(CList.list[CList.len]) ); - CList.last_cfg = &(CList.list[CList.len]); - CList.len++; - qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg); - return findcfg(id); /* qsort changed order!! */ ; -} - - -Oid -name2id_cfg(text *name) -{ - Oid arg[1]; - bool isnull; - Datum pars[1]; - int stat; - Oid id = findSNMap_t(&(CList.name2id_map), name); - void *plan; - char *nsp; - char buf[1024]; - - arg[0] = TEXTOID; - pars[0] = PointerGetDatum(name); - - if (id) - return id; - - nsp = get_namespace(TSNSP_FunctionOid); - SPI_connect(); - sprintf(buf, "select oid from %s.pg_ts_cfg where ts_name = $1", nsp); - plan = SPI_prepare(buf, 1, arg); - if (!plan) - /* internal error */ - elog(ERROR, "SPI_prepare() failed"); - - stat = SPI_execp(plan, pars, " ", 1); - if (stat < 0) - /* internal error */ - elog(ERROR, "SPI_execp return %d", stat); - if (SPI_processed > 0) - { - id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); - if (isnull) - ereport(ERROR, - (errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("null id for tsearch config"))); - } - else - ereport(ERROR, - (errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("no tsearch config"))); - - SPI_freeplan(plan); - SPI_finish(); - addSNMap_t(&(CList.name2id_map), name, id); - return id; -} - -void -parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen) -{ - int type, - lenlemm; - char *lemm = NULL; - WParserInfo *prsobj = findprs(cfg->prs_id); - LexizeData ldata; - TSLexeme *norms; - - prsobj->prs = (void *) DatumGetPointer( - FunctionCall2( - &(prsobj->start_info), - PointerGetDatum(buf), - Int32GetDatum(buflen) - ) - ); - - LexizeInit(&ldata, cfg); - - do - { - type = DatumGetInt32(FunctionCall3( - &(prsobj->getlexeme_info), - PointerGetDatum(prsobj->prs), - PointerGetDatum(&lemm), - PointerGetDatum(&lenlemm))); - - if (type > 0 && lenlemm >= MAXSTRLEN) - { -#ifdef IGNORE_LONGLEXEME - ereport(NOTICE, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("A word you are indexing is too long. It will be ignored."))); - continue; -#else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("A word you are indexing is too long"))); -#endif - } - - LexizeAddLemm(&ldata, type, lemm, lenlemm); - - while ((norms = LexizeExec(&ldata, NULL)) != NULL) - { - TSLexeme *ptr = norms; - - prs->pos++; /* set pos */ - - while (ptr->lexeme) - { - if (prs->curwords == prs->lenwords) - { - prs->lenwords *= 2; - prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD)); - } - - if (ptr->flags & TSL_ADDPOS) - prs->pos++; - prs->words[prs->curwords].len = strlen(ptr->lexeme); - prs->words[prs->curwords].word = ptr->lexeme; - prs->words[prs->curwords].nvariant = ptr->nvariant; - prs->words[prs->curwords].alen = 0; - prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); - ptr++; - prs->curwords++; - } - pfree(norms); - } - } while (type > 0); - - FunctionCall1( - &(prsobj->end_info), - PointerGetDatum(prsobj->prs) - ); -} - -static void -hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) -{ - while (prs->curwords >= prs->lenwords) - { - prs->lenwords *= 2; - prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD)); - } - memset(&(prs->words[prs->curwords]), 0, sizeof(HLWORD)); - prs->words[prs->curwords].type = (uint8) type; - prs->words[prs->curwords].len = buflen; - prs->words[prs->curwords].word = palloc(buflen); - memcpy(prs->words[prs->curwords].word, buf, buflen); - prs->curwords++; -} - -static void -hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen) -{ - int i; - ITEM *item = GETQUERY(query); - HLWORD *word; - - while (prs->curwords + query->size >= prs->lenwords) - { - prs->lenwords *= 2; - prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD)); - } - - word = &(prs->words[prs->curwords - 1]); - for (i = 0; i < query->size; i++) - { - if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0) - { - if (word->item) - { - memcpy(&(prs->words[prs->curwords]), word, sizeof(HLWORD)); - prs->words[prs->curwords].item = item; - prs->words[prs->curwords].repeated = 1; - prs->curwords++; - } - else - word->item = item; - } - item++; - } -} - -static void -addHLParsedLex(HLPRSTEXT * prs, QUERYTYPE * query, ParsedLex * lexs, TSLexeme * norms) -{ - ParsedLex *tmplexs; - TSLexeme *ptr; - - while (lexs) - { - - if (lexs->type > 0) - hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type); - - ptr = norms; - while (ptr && ptr->lexeme) - { - hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme)); - ptr++; - } - - tmplexs = lexs->next; - pfree(lexs); - lexs = tmplexs; - } - - if (norms) - { - ptr = norms; - while (ptr->lexeme) - { - pfree(ptr->lexeme); - ptr++; - } - pfree(norms); - } -} - -void -hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen) -{ - int type, - lenlemm; - char *lemm = NULL; - WParserInfo *prsobj = findprs(cfg->prs_id); - LexizeData ldata; - TSLexeme *norms; - ParsedLex *lexs; - - prsobj->prs = (void *) DatumGetPointer( - FunctionCall2( - &(prsobj->start_info), - PointerGetDatum(buf), - Int32GetDatum(buflen) - ) - ); - - LexizeInit(&ldata, cfg); - - do - { - type = DatumGetInt32(FunctionCall3( - &(prsobj->getlexeme_info), - PointerGetDatum(prsobj->prs), - PointerGetDatum(&lemm), - PointerGetDatum(&lenlemm))); - - if (type > 0 && lenlemm >= MAXSTRLEN) - { -#ifdef IGNORE_LONGLEXEME - ereport(NOTICE, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("A word you are indexing is too long. It will be ignored."))); - continue; -#else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("A word you are indexing is too long"))); -#endif - } - - LexizeAddLemm(&ldata, type, lemm, lenlemm); - - do - { - if ((norms = LexizeExec(&ldata, &lexs)) != NULL) - addHLParsedLex(prs, query, lexs, norms); - else - addHLParsedLex(prs, query, lexs, NULL); - } while (norms); - - } while (type > 0); - - FunctionCall1( - &(prsobj->end_info), - PointerGetDatum(prsobj->prs) - ); -} - -text * -genhl(HLPRSTEXT * prs) -{ - text *out; - int len = 128; - char *ptr; - HLWORD *wrd = prs->words; - - out = (text *) palloc(len); - ptr = ((char *) out) + VARHDRSZ; - - while (wrd - prs->words < prs->curwords) - { - while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len) - { - int dist = ptr - ((char *) out); - - len *= 2; - out = (text *) repalloc(out, len); - ptr = ((char *) out) + dist; - } - - if (wrd->in && !wrd->repeated) - { - if (wrd->replace) - { - *ptr = ' '; - ptr++; - } - else - { - if (wrd->selected) - { - memcpy(ptr, prs->startsel, prs->startsellen); - ptr += prs->startsellen; - } - memcpy(ptr, wrd->word, wrd->len); - ptr += wrd->len; - if (wrd->selected) - { - memcpy(ptr, prs->stopsel, prs->stopsellen); - ptr += prs->stopsellen; - } - } - } - else if (!wrd->repeated) - pfree(wrd->word); - - wrd++; - } - - SET_VARSIZE(out, ptr - ((char *) out)); - return out; -} - -int -get_currcfg(void) -{ - Oid arg[1] = {TEXTOID}; - const char *curlocale; - Datum pars[1]; - bool isnull; - int stat; - char buf[1024]; - char *nsp; - void *plan; - - if (current_cfg_id > 0) - return current_cfg_id; - - nsp = get_namespace(TSNSP_FunctionOid); - SPI_connect(); - sprintf(buf, "select oid from %s.pg_ts_cfg where locale = $1 ", nsp); - pfree(nsp); - plan = SPI_prepare(buf, 1, arg); - if (!plan) - /* internal error */ - elog(ERROR, "SPI_prepare() failed"); - - curlocale = setlocale(LC_CTYPE, NULL); - pars[0] = PointerGetDatum(char2text((char *) curlocale)); - stat = SPI_execp(plan, pars, " ", 1); - - if (stat < 0) - /* internal error */ - elog(ERROR, "SPI_execp return %d", stat); - if (SPI_processed > 0) - current_cfg_id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); - else - ereport(ERROR, - (errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("could not find tsearch config by locale"))); - - pfree(DatumGetPointer(pars[0])); - SPI_freeplan(plan); - SPI_finish(); - return current_cfg_id; -} - -PG_FUNCTION_INFO_V1(set_curcfg); -Datum set_curcfg(PG_FUNCTION_ARGS); -Datum -set_curcfg(PG_FUNCTION_ARGS) -{ - SET_FUNCOID(); - findcfg(PG_GETARG_OID(0)); - current_cfg_id = PG_GETARG_OID(0); - PG_RETURN_VOID(); -} - -PG_FUNCTION_INFO_V1(set_curcfg_byname); -Datum set_curcfg_byname(PG_FUNCTION_ARGS); -Datum -set_curcfg_byname(PG_FUNCTION_ARGS) -{ - text *name = PG_GETARG_TEXT_P(0); - - SET_FUNCOID(); - DirectFunctionCall1( - set_curcfg, - ObjectIdGetDatum(name2id_cfg(name)) - ); - PG_FREE_IF_COPY(name, 0); - PG_RETURN_VOID(); -} - -PG_FUNCTION_INFO_V1(show_curcfg); -Datum show_curcfg(PG_FUNCTION_ARGS); -Datum -show_curcfg(PG_FUNCTION_ARGS) -{ - SET_FUNCOID(); - PG_RETURN_OID(get_currcfg()); -} - -PG_FUNCTION_INFO_V1(reset_tsearch); -Datum reset_tsearch(PG_FUNCTION_ARGS); -Datum -reset_tsearch(PG_FUNCTION_ARGS) -{ - SET_FUNCOID(); - ts_error(NOTICE, "TSearch cache cleaned"); - PG_RETURN_VOID(); -} diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h deleted file mode 100644 index 7bffdbcdd6..0000000000 --- a/contrib/tsearch2/ts_cfg.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef __TS_CFG_H__ -#define __TS_CFG_H__ - -#include "postgres.h" -#include "query.h" - - -typedef struct -{ - int len; - Datum *dict_id; -} ListDictionary; - -typedef struct -{ - Oid id; - Oid prs_id; - int len; - ListDictionary *map; -} TSCfgInfo; - -Oid name2id_cfg(text *name); -TSCfgInfo *findcfg(Oid id); -void init_cfg(Oid id, TSCfgInfo * cfg); -void reset_cfg(void); - -typedef struct -{ - uint16 len; - uint16 nvariant; - union - { - uint16 pos; - uint16 *apos; - } pos; - char *word; - uint32 alen; -} TSWORD; - -typedef struct -{ - TSWORD *words; - int4 lenwords; - int4 curwords; - int4 pos; -} PRSTEXT; - -typedef struct -{ - uint32 selected:1, - in:1, - replace:1, - repeated:1, - unused:4, - type:8, - len:16; - char *word; - ITEM *item; -} HLWORD; - -typedef struct -{ - HLWORD *words; - int4 lenwords; - int4 curwords; - char *startsel; - char *stopsel; - int2 startsellen; - int2 stopsellen; -} HLPRSTEXT; - -void hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen); -text *genhl(HLPRSTEXT * prs); - -void parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen); -int get_currcfg(void); - -#endif diff --git a/contrib/tsearch2/ts_lexize.c b/contrib/tsearch2/ts_lexize.c deleted file mode 100644 index f2e4904eb7..0000000000 --- a/contrib/tsearch2/ts_lexize.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * lexize stream of lexemes - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include <ctype.h> -#include <locale.h> - -#include "ts_cfg.h" -#include "dict.h" - -void -LexizeInit(LexizeData * ld, TSCfgInfo * cfg) -{ - ld->cfg = cfg; - ld->curDictId = InvalidOid; - ld->posDict = 0; - ld->towork.head = ld->towork.tail = ld->curSub = NULL; - ld->waste.head = ld->waste.tail = NULL; - ld->lastRes = NULL; - ld->tmpRes = NULL; -} - -static void -LPLAddTail(ListParsedLex * list, ParsedLex * newpl) -{ - if (list->tail) - { - list->tail->next = newpl; - list->tail = newpl; - } - else - list->head = list->tail = newpl; - newpl->next = NULL; -} - -static ParsedLex * -LPLRemoveHead(ListParsedLex * list) -{ - ParsedLex *res = list->head; - - if (list->head) - list->head = list->head->next; - - if (list->head == NULL) - list->tail = NULL; - - return res; -} - - -void -LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm) -{ - ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex)); - - newpl = (ParsedLex *) palloc(sizeof(ParsedLex)); - newpl->type = type; - newpl->lemm = lemm; - newpl->lenlemm = lenlemm; - LPLAddTail(&ld->towork, newpl); - ld->curSub = ld->towork.tail; -} - -static void -RemoveHead(LexizeData * ld) -{ - LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork)); - - ld->posDict = 0; -} - -static void -setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem) -{ - if (correspondLexem) - { - *correspondLexem = ld->waste.head; - } - else - { - ParsedLex *tmp, - *ptr = ld->waste.head; - - while (ptr) - { - tmp = ptr->next; - pfree(ptr); - ptr = tmp; - } - } - ld->waste.head = ld->waste.tail = NULL; -} - -static void -moveToWaste(LexizeData * ld, ParsedLex * stop) -{ - bool go = true; - - while (ld->towork.head && go) - { - if (ld->towork.head == stop) - { - ld->curSub = stop->next; - go = false; - } - RemoveHead(ld); - } -} - -static void -setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res) -{ - if (ld->tmpRes) - { - TSLexeme *ptr; - - for (ptr = ld->tmpRes; ptr->lexeme; ptr++) - pfree(ptr->lexeme); - pfree(ld->tmpRes); - } - ld->tmpRes = res; - ld->lastRes = lex; -} - -TSLexeme * -LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem) -{ - int i; - ListDictionary *map; - DictInfo *dict; - TSLexeme *res; - - if (ld->curDictId == InvalidOid) - { - /* - * usial mode: dictionary wants only one word, but we should keep in - * mind that we should go through all stack - */ - - while (ld->towork.head) - { - ParsedLex *curVal = ld->towork.head; - - map = ld->cfg->map + curVal->type; - - if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0) - { - /* skip this type of lexeme */ - RemoveHead(ld); - continue; - } - - for (i = ld->posDict; i < map->len; i++) - { - dict = finddict(DatumGetObjectId(map->dict_id[i])); - - ld->dictState.isend = ld->dictState.getnext = false; - ld->dictState.private = NULL; - res = (TSLexeme *) DatumGetPointer(FunctionCall4( - &(dict->lexize_info), - PointerGetDatum(dict->dictionary), - PointerGetDatum(curVal->lemm), - Int32GetDatum(curVal->lenlemm), - PointerGetDatum(&ld->dictState) - )); - - if (ld->dictState.getnext) - { - /* - * dictinary wants next word, so setup and store current - * position and go to multiword mode - */ - - ld->curDictId = DatumGetObjectId(map->dict_id[i]); - ld->posDict = i + 1; - ld->curSub = curVal->next; - if (res) - setNewTmpRes(ld, curVal, res); - return LexizeExec(ld, correspondLexem); - } - - if (!res) /* dictionary doesn't know this lexeme */ - continue; - - RemoveHead(ld); - setCorrLex(ld, correspondLexem); - return res; - } - - RemoveHead(ld); - } - } - else - { /* curDictId is valid */ - dict = finddict(ld->curDictId); - - /* - * Dictionary ld->curDictId asks us about following words - */ - - while (ld->curSub) - { - ParsedLex *curVal = ld->curSub; - - map = ld->cfg->map + curVal->type; - - if (curVal->type != 0) - { - bool dictExists = false; - - if (curVal->type >= ld->cfg->len || map->len == 0) - { - /* skip this type of lexeme */ - ld->curSub = curVal->next; - continue; - } - - /* - * We should be sure that current type of lexeme is recognized - * by our dictinonary: we just check is it exist in list of - * dictionaries ? - */ - for (i = 0; i < map->len && !dictExists; i++) - if (ld->curDictId == DatumGetObjectId(map->dict_id[i])) - dictExists = true; - - if (!dictExists) - { - /* - * Dictionary can't work with current tpe of lexeme, - * return to basic mode and redo all stored lexemes - */ - ld->curDictId = InvalidOid; - return LexizeExec(ld, correspondLexem); - } - } - - ld->dictState.isend = (curVal->type == 0) ? true : false; - ld->dictState.getnext = false; - - res = (TSLexeme *) DatumGetPointer(FunctionCall4( - &(dict->lexize_info), - PointerGetDatum(dict->dictionary), - PointerGetDatum(curVal->lemm), - Int32GetDatum(curVal->lenlemm), - PointerGetDatum(&ld->dictState) - )); - - if (ld->dictState.getnext) - { - /* Dictionary wants one more */ - ld->curSub = curVal->next; - if (res) - setNewTmpRes(ld, curVal, res); - continue; - } - - if (res || ld->tmpRes) - { - /* - * Dictionary normalizes lexemes, so we remove from stack all - * used lexemes , return to basic mode and redo end of stack - * (if it exists) - */ - if (res) - { - moveToWaste(ld, ld->curSub); - } - else - { - res = ld->tmpRes; - moveToWaste(ld, ld->lastRes); - } - - /* reset to initial state */ - ld->curDictId = InvalidOid; - ld->posDict = 0; - ld->lastRes = NULL; - ld->tmpRes = NULL; - setCorrLex(ld, correspondLexem); - return res; - } - - /* - * Dict don't want next lexem and didn't recognize anything, redo - * from ld->towork.head - */ - ld->curDictId = InvalidOid; - return LexizeExec(ld, correspondLexem); - } - } - - setCorrLex(ld, correspondLexem); - return NULL; -} diff --git a/contrib/tsearch2/ts_locale.c b/contrib/tsearch2/ts_locale.c deleted file mode 100644 index cb022d7e2a..0000000000 --- a/contrib/tsearch2/ts_locale.c +++ /dev/null @@ -1,191 +0,0 @@ -#include "ts_locale.h" - -#include "utils/builtins.h" -#include "utils/pg_locale.h" -#include "mb/pg_wchar.h" - - -#ifdef TS_USE_WIDE - -#ifdef WIN32 - -size_t -wchar2char(char *to, const wchar_t *from, size_t len) -{ - if (len == 0) - return 0; - - if (GetDatabaseEncoding() == PG_UTF8) - { - int r; - - r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len, - NULL, NULL); - - if (r == 0) - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("UTF-16 to UTF-8 translation failed: %lu", - GetLastError()))); - Assert(r <= len); - - return r; - } - - return wcstombs(to, from, len); -} -#endif /* WIN32 */ - -size_t -char2wchar(wchar_t *to, const char *from, size_t len) -{ - if (len == 0) - return 0; - -#ifdef WIN32 - if (GetDatabaseEncoding() == PG_UTF8) - { - int r; - - r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len); - - if (!r) - { - pg_verifymbstr(from, strlen(from), false); - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - - Assert(r <= len); - - return r; - } - else -#endif /* WIN32 */ - if ( lc_ctype_is_c() ) - { - /* - * pg_mb2wchar_with_len always adds trailing '\0', so - * 'to' should be allocated with sufficient space - */ - return pg_mb2wchar_with_len(from, (pg_wchar *)to, len); - } - - return mbstowcs(to, from, len); -} - -int -_t_isalpha(const char *ptr) -{ - wchar_t character[2]; - - if (lc_ctype_is_c()) - return isalpha(TOUCHAR(ptr)); - - char2wchar(character, ptr, 1); - - return iswalpha((wint_t) *character); -} - -int -_t_isprint(const char *ptr) -{ - wchar_t character[2]; - - if (lc_ctype_is_c()) - return isprint(TOUCHAR(ptr)); - - char2wchar(character, ptr, 1); - - return iswprint((wint_t) *character); -} -#endif /* TS_USE_WIDE */ - -char * -lowerstr(char *str) -{ - char *ptr = str; - char *out; - int len = strlen(str); - - if ( len == 0 ) - return pstrdup(""); - -#ifdef TS_USE_WIDE - - /* - * Use wide char code only when max encoding length > 1 and ctype != C. - * Some operating systems fail with multi-byte encodings and a C locale. - * Also, for a C locale there is no need to process as multibyte. From - * backend/utils/adt/oracle_compat.c Teodor - */ - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) - { - wchar_t *wstr, - *wptr; - int wlen; - - /* - *alloc number of wchar_t for worst case, len contains - * number of bytes <= number of characters and - * alloc 1 wchar_t for 0, because wchar2char(wcstombs in really) - * wants zero-terminated string - */ - wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len+1)); - - /* - * str SHOULD be cstring, so wlen contains number - * of converted character - */ - wlen = char2wchar(wstr, str, len); - if ( wlen < 0 ) - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("translation failed from server encoding to wchar_t"))); - - Assert(wlen<=len); - wstr[wlen] = 0; - - while (*wptr) - { - *wptr = towlower((wint_t) *wptr); - wptr++; - } - - /* - * Alloc result string for worst case + '\0' - */ - len = sizeof(char)*pg_database_encoding_max_length()*(wlen+1); - out = (char*)palloc(len); - - /* - * wlen now is number of bytes which is always >= number of characters - */ - wlen = wchar2char(out, wstr, len); - pfree(wstr); - - if ( wlen < 0 ) - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("translation failed from wchar_t to server encoding %d", errno))); - Assert(wlen<=len); - out[wlen]='\0'; - } - else -#endif - { - char *outptr; - - outptr = out = (char*)palloc( sizeof(char) * (len+1) ); - while (*ptr) - { - *outptr++ = tolower(*(unsigned char *) ptr); - ptr++; - } - *outptr = '\0'; - } - - return out; -} diff --git a/contrib/tsearch2/ts_locale.h b/contrib/tsearch2/ts_locale.h deleted file mode 100644 index 81d1a16600..0000000000 --- a/contrib/tsearch2/ts_locale.h +++ /dev/null @@ -1,79 +0,0 @@ -#ifndef __TSLOCALE_H__ -#define __TSLOCALE_H__ - -#include "postgres.h" -#include "utils/pg_locale.h" -#include "mb/pg_wchar.h" - -#include <ctype.h> -#include <limits.h> - -/* - * towlower() and friends should be in <wctype.h>, but some pre-C99 systems - * declare them in <wchar.h>. - */ -#ifdef HAVE_WCHAR_H -#include <wchar.h> -#endif -#ifdef HAVE_WCTYPE_H -#include <wctype.h> -#endif - -#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER) -#define TS_USE_WIDE -#endif - -#ifdef TS_USE_WIDE -#endif /* TS_USE_WIDE */ - - -#define TOUCHAR(x) (*((unsigned char*)(x))) - -#ifdef TS_USE_WIDE -size_t char2wchar(wchar_t *to, const char *from, size_t len); - -#ifdef WIN32 - -size_t wchar2char(char *to, const wchar_t *from, size_t len); - -#else /* WIN32 */ - -/* correct wcstombs */ -#define wchar2char wcstombs - -#endif /* WIN32 */ - -#define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) ) -#define t_isspace(x) ( pg_mblen(x)==1 && isspace( TOUCHAR(x) ) ) -extern int _t_isalpha(const char *ptr); - -#define t_isalpha(x) ( (pg_mblen(x)==1) ? isalpha( TOUCHAR(x) ) : _t_isalpha(x) ) -extern int _t_isprint(const char *ptr); - -#define t_isprint(x) ( (pg_mblen(x)==1) ? isprint( TOUCHAR(x) ) : _t_isprint(x) ) -/* - * t_iseq() should be called only for ASCII symbols - */ -#define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false ) - -#define COPYCHAR(d,s) do { \ - int lll = pg_mblen( s ); \ - \ - while( lll-- ) \ - TOUCHAR((d)+lll) = TOUCHAR((s)+lll); \ -} while(0) - -#else /* not def TS_USE_WIDE */ - -#define t_isdigit(x) isdigit( TOUCHAR(x) ) -#define t_isspace(x) isspace( TOUCHAR(x) ) -#define t_isalpha(x) isalpha( TOUCHAR(x) ) -#define t_isprint(x) isprint( TOUCHAR(x) ) -#define t_iseq(x,c) ( TOUCHAR(x) == ((unsigned char)(c)) ) - -#define COPYCHAR(d,s) TOUCHAR(d) = TOUCHAR(s) -#endif - -char *lowerstr(char *str); - -#endif /* __TSLOCALE_H__ */ diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c deleted file mode 100644 index d728dd5796..0000000000 --- a/contrib/tsearch2/ts_stat.c +++ /dev/null @@ -1,567 +0,0 @@ -/* - * stat functions - */ - -#include "tsvector.h" -#include "ts_stat.h" -#include "funcapi.h" -#include "catalog/pg_type.h" -#include "executor/spi.h" -#include "common.h" -#include "ts_locale.h" - -PG_FUNCTION_INFO_V1(tsstat_in); -Datum tsstat_in(PG_FUNCTION_ARGS); -Datum -tsstat_in(PG_FUNCTION_ARGS) -{ - tsstat *stat = palloc(STATHDRSIZE); - - SET_VARSIZE(stat, STATHDRSIZE); - stat->size = 0; - stat->weight = 0; - PG_RETURN_POINTER(stat); -} - -PG_FUNCTION_INFO_V1(tsstat_out); -Datum tsstat_out(PG_FUNCTION_ARGS); -Datum -tsstat_out(PG_FUNCTION_ARGS) -{ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("tsstat_out not implemented"))); - PG_RETURN_NULL(); -} - -static int -check_weight(tsvector * txt, WordEntry * wptr, int8 weight) -{ - int len = POSDATALEN(txt, wptr); - int num = 0; - WordEntryPos *ptr = POSDATAPTR(txt, wptr); - - while (len--) - { - if (weight & (1 << WEP_GETWEIGHT(*ptr))) - num++; - ptr++; - } - return num; -} - -static WordEntry ** -SEI_realloc(WordEntry ** in, uint32 *len) -{ - if (*len == 0 || in == NULL) - { - *len = 8; - in = palloc(sizeof(WordEntry *) * (*len)); - } - else - { - *len *= 2; - in = repalloc(in, sizeof(WordEntry *) * (*len)); - } - return in; -} - -static int -compareStatWord(StatEntry * a, WordEntry * b, tsstat * stat, tsvector * txt) -{ - if (a->len == b->len) - return strncmp( - STATSTRPTR(stat) + a->pos, - STRPTR(txt) + b->pos, - a->len - ); - return (a->len > b->len) ? 1 : -1; -} - -static tsstat * -formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len) -{ - tsstat *newstat; - uint32 totallen, - nentry; - uint32 slen = 0; - WordEntry **ptr = entry; - char *curptr; - StatEntry *sptr, - *nptr; - - while (ptr - entry < len) - { - slen += (*ptr)->len; - ptr++; - } - - nentry = stat->size + len; - slen += STATSTRSIZE(stat); - totallen = CALCSTATSIZE(nentry, slen); - newstat = palloc(totallen); - SET_VARSIZE(newstat, totallen); - newstat->weight = stat->weight; - newstat->size = nentry; - - memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat)); - curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat); - - ptr = entry; - sptr = STATPTR(stat); - nptr = STATPTR(newstat); - - if (len == 1) - { - StatEntry *StopLow = STATPTR(stat); - StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat); - - while (StopLow < StopHigh) - { - sptr = StopLow + (StopHigh - StopLow) / 2; - if (compareStatWord(sptr, *ptr, stat, txt) < 0) - StopLow = sptr + 1; - else - StopHigh = sptr; - } - nptr = STATPTR(newstat) + (StopLow - STATPTR(stat)); - memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat))); - if ((*ptr)->haspos) - nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); - else - nptr->nentry = 1; - nptr->ndoc = 1; - nptr->len = (*ptr)->len; - memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); - nptr->pos = curptr - STATSTRPTR(newstat); - memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow)); - } - else - { - while (sptr - STATPTR(stat) < stat->size && ptr - entry < len) - { - if (compareStatWord(sptr, *ptr, stat, txt) < 0) - { - memcpy(nptr, sptr, sizeof(StatEntry)); - sptr++; - } - else - { - if ((*ptr)->haspos) - nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); - else - nptr->nentry = 1; - nptr->ndoc = 1; - nptr->len = (*ptr)->len; - memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); - nptr->pos = curptr - STATSTRPTR(newstat); - curptr += nptr->len; - ptr++; - } - nptr++; - } - - memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat)))); - - while (ptr - entry < len) - { - if ((*ptr)->haspos) - nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); - else - nptr->nentry = 1; - nptr->ndoc = 1; - nptr->len = (*ptr)->len; - memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); - nptr->pos = curptr - STATSTRPTR(newstat); - curptr += nptr->len; - ptr++; - nptr++; - } - } - - return newstat; -} - -PG_FUNCTION_INFO_V1(ts_accum); -Datum ts_accum(PG_FUNCTION_ARGS); -Datum -ts_accum(PG_FUNCTION_ARGS) -{ - tsstat *newstat, - *stat = (tsstat *) PG_GETARG_POINTER(0); - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); - WordEntry **newentry = NULL; - uint32 len = 0, - cur = 0; - StatEntry *sptr; - WordEntry *wptr; - int n = 0; - - if (stat == NULL || PG_ARGISNULL(0)) - { /* Init in first */ - stat = palloc(STATHDRSIZE); - SET_VARSIZE(stat, STATHDRSIZE); - stat->size = 0; - stat->weight = 0; - } - - /* simple check of correctness */ - if (txt == NULL || PG_ARGISNULL(1) || txt->size == 0) - { - PG_FREE_IF_COPY(txt, 1); - PG_RETURN_POINTER(stat); - } - - sptr = STATPTR(stat); - wptr = ARRPTR(txt); - - if (stat->size < 100 * txt->size) - { /* merge */ - while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size) - { - int cmp = compareStatWord(sptr, wptr, stat, txt); - - if (cmp < 0) - sptr++; - else if (cmp == 0) - { - if (stat->weight == 0) - { - sptr->ndoc++; - sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1; - } - else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0) - { - sptr->ndoc++; - sptr->nentry += n; - } - sptr++; - wptr++; - } - else - { - if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0) - { - if (cur == len) - newentry = SEI_realloc(newentry, &len); - newentry[cur] = wptr; - cur++; - } - wptr++; - } - } - - while (wptr - ARRPTR(txt) < txt->size) - { - if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0) - { - if (cur == len) - newentry = SEI_realloc(newentry, &len); - newentry[cur] = wptr; - cur++; - } - wptr++; - } - } - else - { /* search */ - while (wptr - ARRPTR(txt) < txt->size) - { - StatEntry *StopLow = STATPTR(stat); - StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat); - int cmp; - - while (StopLow < StopHigh) - { - sptr = StopLow + (StopHigh - StopLow) / 2; - cmp = compareStatWord(sptr, wptr, stat, txt); - if (cmp == 0) - { - if (stat->weight == 0) - { - sptr->ndoc++; - sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1; - } - else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0) - { - sptr->ndoc++; - sptr->nentry += n; - } - break; - } - else if (cmp < 0) - StopLow = sptr + 1; - else - StopHigh = sptr; - } - - if (StopLow >= StopHigh) - { /* not found */ - if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0) - { - if (cur == len) - newentry = SEI_realloc(newentry, &len); - newentry[cur] = wptr; - cur++; - } - } - wptr++; - } - } - - - if (cur == 0) - { /* no new words */ - PG_FREE_IF_COPY(txt, 1); - PG_RETURN_POINTER(stat); - } - - newstat = formstat(stat, txt, newentry, cur); - pfree(newentry); - PG_FREE_IF_COPY(txt, 1); - /* pfree(stat); */ - - PG_RETURN_POINTER(newstat); -} - -typedef struct -{ - uint32 cur; - tsvector *stat; -} StatStorage; - -static void -ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, - tsstat * stat) -{ - TupleDesc tupdesc; - MemoryContext oldcontext; - StatStorage *st; - - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - st = palloc(sizeof(StatStorage)); - st->cur = 0; - st->stat = palloc(VARSIZE(stat)); - memcpy(st->stat, stat, VARSIZE(stat)); - funcctx->user_fctx = (void *) st; - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - elog(ERROR, "return type must be a row type"); - tupdesc = CreateTupleDescCopy(tupdesc); - funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); - MemoryContextSwitchTo(oldcontext); -} - - -static Datum -ts_process_call(FuncCallContext *funcctx) -{ - StatStorage *st; - - st = (StatStorage *) funcctx->user_fctx; - - if (st->cur < st->stat->size) - { - Datum result; - char *values[3]; - char ndoc[16]; - char nentry[16]; - StatEntry *entry = STATPTR(st->stat) + st->cur; - HeapTuple tuple; - - values[1] = ndoc; - sprintf(ndoc, "%d", entry->ndoc); - values[2] = nentry; - sprintf(nentry, "%d", entry->nentry); - values[0] = palloc(entry->len + 1); - memcpy(values[0], STATSTRPTR(st->stat) + entry->pos, entry->len); - (values[0])[entry->len] = '\0'; - - tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); - result = HeapTupleGetDatum(tuple); - - pfree(values[0]); - st->cur++; - return result; - } - else - { - pfree(st->stat); - pfree(st); - } - - return (Datum) 0; -} - -PG_FUNCTION_INFO_V1(ts_accum_finish); -Datum ts_accum_finish(PG_FUNCTION_ARGS); -Datum -ts_accum_finish(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - Datum result; - - if (SRF_IS_FIRSTCALL()) - { - funcctx = SRF_FIRSTCALL_INIT(); - ts_setup_firstcall(fcinfo, funcctx, (tsstat *) PG_GETARG_POINTER(0)); - } - - funcctx = SRF_PERCALL_SETUP(); - if ((result = ts_process_call(funcctx)) != (Datum) 0) - SRF_RETURN_NEXT(funcctx, result); - SRF_RETURN_DONE(funcctx); -} - -static Oid tiOid = InvalidOid; - -static void -get_ti_Oid(void) -{ - int ret; - bool isnull; - - if ((ret = SPI_exec("select oid from pg_type where typname='tsvector'", 1)) < 0) - /* internal error */ - elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret); - - if (SPI_processed < 1) - /* internal error */ - elog(ERROR, "there is no tsvector type"); - tiOid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); - if (tiOid == InvalidOid) - /* internal error */ - elog(ERROR, "tsvector type has InvalidOid"); -} - -static tsstat * -ts_stat_sql(text *txt, text *ws) -{ - char *query = text2char(txt); - int i; - tsstat *newstat, - *stat; - bool isnull; - Portal portal; - void *plan; - - if (tiOid == InvalidOid) - get_ti_Oid(); - - if ((plan = SPI_prepare(query, 0, NULL)) == NULL) - /* internal error */ - elog(ERROR, "SPI_prepare('%s') returns NULL", query); - - if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL) - /* internal error */ - elog(ERROR, "SPI_cursor_open('%s') returns NULL", query); - - SPI_cursor_fetch(portal, true, 100); - - if (SPI_tuptable->tupdesc->natts != 1) - /* internal error */ - elog(ERROR, "number of fields doesn't equal to 1"); - - if (SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid) - /* internal error */ - elog(ERROR, "column isn't of tsvector type"); - - stat = palloc(STATHDRSIZE); - SET_VARSIZE(stat, STATHDRSIZE); - stat->size = 0; - stat->weight = 0; - - if (ws) - { - char *buf; - - buf = VARDATA(ws); - while (buf - VARDATA(ws) < VARSIZE(ws) - VARHDRSZ) - { - if (pg_mblen(buf) == 1) - { - switch (*buf) - { - case 'A': - case 'a': - stat->weight |= 1 << 3; - break; - case 'B': - case 'b': - stat->weight |= 1 << 2; - break; - case 'C': - case 'c': - stat->weight |= 1 << 1; - break; - case 'D': - case 'd': - stat->weight |= 1; - break; - default: - stat->weight |= 0; - } - } - buf += pg_mblen(buf); - } - } - - while (SPI_processed > 0) - { - for (i = 0; i < SPI_processed; i++) - { - Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); - - if (!isnull) - { - newstat = (tsstat *) DatumGetPointer(DirectFunctionCall2( - ts_accum, - PointerGetDatum(stat), - data - )); - if (stat != newstat && stat) - pfree(stat); - stat = newstat; - } - } - - SPI_freetuptable(SPI_tuptable); - SPI_cursor_fetch(portal, true, 100); - } - - SPI_freetuptable(SPI_tuptable); - SPI_cursor_close(portal); - SPI_freeplan(plan); - pfree(query); - - return stat; -} - -PG_FUNCTION_INFO_V1(ts_stat); -Datum ts_stat(PG_FUNCTION_ARGS); -Datum -ts_stat(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - Datum result; - - if (SRF_IS_FIRSTCALL()) - { - tsstat *stat; - text *txt = PG_GETARG_TEXT_P(0); - text *ws = (PG_NARGS() > 1) ? PG_GETARG_TEXT_P(1) : NULL; - - funcctx = SRF_FIRSTCALL_INIT(); - SPI_connect(); - stat = ts_stat_sql(txt, ws); - PG_FREE_IF_COPY(txt, 0); - if (PG_NARGS() > 1) - PG_FREE_IF_COPY(ws, 1); - ts_setup_firstcall(fcinfo, funcctx, stat); - SPI_finish(); - } - - funcctx = SRF_PERCALL_SETUP(); - if ((result = ts_process_call(funcctx)) != (Datum) 0) - SRF_RETURN_NEXT(funcctx, result); - SRF_RETURN_DONE(funcctx); -} diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h deleted file mode 100644 index e1ba14b99b..0000000000 --- a/contrib/tsearch2/ts_stat.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __TXTIDX_STAT_H__ -#define __TXTIDX_STAT_H__ - -#include "postgres.h" - -#include "access/gist.h" -#include "access/itup.h" -#include "utils/builtins.h" -#include "storage/bufpage.h" -#include "tsvector.h" - -typedef struct -{ - uint32 len; - uint32 pos; - uint32 ndoc; - uint32 nentry; -} StatEntry; - -typedef struct -{ - int32 vl_len_; /* varlena header (do not touch directly!) */ - int4 size; - int4 weight; - char data[1]; -} tsstat; - -#define STATHDRSIZE (sizeof(int4) * 4) -#define CALCSTATSIZE(x, lenstr) ( (x) * sizeof(StatEntry) + STATHDRSIZE + (lenstr) ) -#define STATPTR(x) ( (StatEntry*) ( (char*)(x) + STATHDRSIZE ) ) -#define STATSTRPTR(x) ( (char*)(x) + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)(x))->size ) ) -#define STATSTRSIZE(x) ( VARSIZE((tsvector*)(x)) - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)(x))->size ) ) - -#endif diff --git a/contrib/tsearch2/tsearch2.c b/contrib/tsearch2/tsearch2.c new file mode 100644 index 0000000000..d51ba7ede7 --- /dev/null +++ b/contrib/tsearch2/tsearch2.c @@ -0,0 +1,441 @@ +/*------------------------------------------------------------------------- + * + * tsearch2.c + * Backwards-compatibility package for old contrib/tsearch2 API + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/contrib/tsearch2/tsearch2.c,v 1.1 2007/11/13 21:02:29 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/namespace.h" +#include "commands/trigger.h" +#include "fmgr.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/guc.h" +#include "utils/syscache.h" + +PG_MODULE_MAGIC; + +static Oid current_dictionary_oid = InvalidOid; +static Oid current_parser_oid = InvalidOid; + +/* insert given value at argument position 0 */ +#define INSERT_ARGUMENT0(argument, isnull) \ + do { \ + int i; \ + for (i = fcinfo->nargs; i > 0; i--) \ + { \ + fcinfo->arg[i] = fcinfo->arg[i-1]; \ + fcinfo->argnull[i] = fcinfo->argnull[i-1]; \ + } \ + fcinfo->arg[0] = (argument); \ + fcinfo->argnull[0] = (isnull); \ + fcinfo->nargs++; \ + } while (0) + +#define TextPGetCString(t) \ + DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(t))) +#define CStringGetTextP(c) \ + DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(c))) + +#define TextGetObjectId(infunction, text) \ + DatumGetObjectId(DirectFunctionCall1(infunction, \ + DirectFunctionCall1(textout, PointerGetDatum(text)))) + +#define UNSUPPORTED_FUNCTION(name) \ + Datum name(PG_FUNCTION_ARGS); \ + Datum \ + name(PG_FUNCTION_ARGS) \ + { \ + ereport(ERROR, \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),\ + errmsg("function %s is no longer supported", \ + format_procedure(fcinfo->flinfo->fn_oid)), \ + errhint("Switch to new tsearch functionality."))); \ + /* keep compiler quiet */ \ + PG_RETURN_NULL(); \ + } \ + PG_FUNCTION_INFO_V1(name) + +static Oid GetCurrentDict(void); +static Oid GetCurrentParser(void); + +Datum tsa_lexize_byname(PG_FUNCTION_ARGS); +Datum tsa_lexize_bycurrent(PG_FUNCTION_ARGS); +Datum tsa_set_curdict(PG_FUNCTION_ARGS); +Datum tsa_set_curdict_byname(PG_FUNCTION_ARGS); +Datum tsa_token_type_current(PG_FUNCTION_ARGS); +Datum tsa_set_curprs(PG_FUNCTION_ARGS); +Datum tsa_set_curprs_byname(PG_FUNCTION_ARGS); +Datum tsa_parse_current(PG_FUNCTION_ARGS); +Datum tsa_set_curcfg(PG_FUNCTION_ARGS); +Datum tsa_set_curcfg_byname(PG_FUNCTION_ARGS); +Datum tsa_show_curcfg(PG_FUNCTION_ARGS); +Datum tsa_to_tsvector_name(PG_FUNCTION_ARGS); +Datum tsa_to_tsquery_name(PG_FUNCTION_ARGS); +Datum tsa_plainto_tsquery_name(PG_FUNCTION_ARGS); +Datum tsa_headline_byname(PG_FUNCTION_ARGS); +Datum tsa_ts_stat(PG_FUNCTION_ARGS); +Datum tsa_tsearch2(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsa_lexize_byname); +PG_FUNCTION_INFO_V1(tsa_lexize_bycurrent); +PG_FUNCTION_INFO_V1(tsa_set_curdict); +PG_FUNCTION_INFO_V1(tsa_set_curdict_byname); +PG_FUNCTION_INFO_V1(tsa_token_type_current); +PG_FUNCTION_INFO_V1(tsa_set_curprs); +PG_FUNCTION_INFO_V1(tsa_set_curprs_byname); +PG_FUNCTION_INFO_V1(tsa_parse_current); +PG_FUNCTION_INFO_V1(tsa_set_curcfg); +PG_FUNCTION_INFO_V1(tsa_set_curcfg_byname); +PG_FUNCTION_INFO_V1(tsa_show_curcfg); +PG_FUNCTION_INFO_V1(tsa_to_tsvector_name); +PG_FUNCTION_INFO_V1(tsa_to_tsquery_name); +PG_FUNCTION_INFO_V1(tsa_plainto_tsquery_name); +PG_FUNCTION_INFO_V1(tsa_headline_byname); +PG_FUNCTION_INFO_V1(tsa_ts_stat); +PG_FUNCTION_INFO_V1(tsa_tsearch2); + + +/* + * List of unsupported functions + * + * The parser and dictionary functions are defined only so that the former + * contents of pg_ts_parser and pg_ts_dict can be loaded into the system, + * for ease of reference while creating the new tsearch configuration. + */ + +UNSUPPORTED_FUNCTION(tsa_dex_init); +UNSUPPORTED_FUNCTION(tsa_dex_lexize); + +UNSUPPORTED_FUNCTION(tsa_snb_en_init); +UNSUPPORTED_FUNCTION(tsa_snb_lexize); +UNSUPPORTED_FUNCTION(tsa_snb_ru_init_koi8); +UNSUPPORTED_FUNCTION(tsa_snb_ru_init_utf8); + +UNSUPPORTED_FUNCTION(tsa_spell_init); +UNSUPPORTED_FUNCTION(tsa_spell_lexize); + +UNSUPPORTED_FUNCTION(tsa_syn_init); +UNSUPPORTED_FUNCTION(tsa_syn_lexize); + +UNSUPPORTED_FUNCTION(tsa_thesaurus_init); +UNSUPPORTED_FUNCTION(tsa_thesaurus_lexize); + +UNSUPPORTED_FUNCTION(tsa_prsd_start); +UNSUPPORTED_FUNCTION(tsa_prsd_getlexeme); +UNSUPPORTED_FUNCTION(tsa_prsd_end); +UNSUPPORTED_FUNCTION(tsa_prsd_lextype); +UNSUPPORTED_FUNCTION(tsa_prsd_headline); + +UNSUPPORTED_FUNCTION(tsa_reset_tsearch); +UNSUPPORTED_FUNCTION(tsa_get_covers); + +UNSUPPORTED_FUNCTION(tsa_rewrite_accum); +UNSUPPORTED_FUNCTION(tsa_rewrite_finish); + + +/* + * list of redefined functions + */ + +/* lexize(text, text) */ +Datum +tsa_lexize_byname(PG_FUNCTION_ARGS) +{ + text *dictname = PG_GETARG_TEXT_P(0); + Datum arg1 = PG_GETARG_DATUM(1); + + return DirectFunctionCall2(ts_lexize, + ObjectIdGetDatum(TextGetObjectId(regdictionaryin, dictname)), + arg1); +} + +/* lexize(text) */ +Datum +tsa_lexize_bycurrent(PG_FUNCTION_ARGS) +{ + Datum arg0 = PG_GETARG_DATUM(0); + Oid id = GetCurrentDict(); + + return DirectFunctionCall2(ts_lexize, + ObjectIdGetDatum(id), + arg0); +} + +/* set_curdict(int) */ +Datum +tsa_set_curdict(PG_FUNCTION_ARGS) +{ + Oid dict_oid = PG_GETARG_OID(0); + + if (!SearchSysCacheExists(TSDICTOID, + ObjectIdGetDatum(dict_oid), + 0, 0, 0)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + dict_oid); + + current_dictionary_oid = dict_oid; + + PG_RETURN_VOID(); +} + +/* set_curdict(text) */ +Datum +tsa_set_curdict_byname(PG_FUNCTION_ARGS) +{ + text *name = PG_GETARG_TEXT_P(0); + Oid dict_oid; + + dict_oid = TSDictionaryGetDictid(stringToQualifiedNameList(TextPGetCString(name)), false); + + current_dictionary_oid = dict_oid; + + PG_RETURN_VOID(); +} + +/* token_type() */ +Datum +tsa_token_type_current(PG_FUNCTION_ARGS) +{ + INSERT_ARGUMENT0(ObjectIdGetDatum(GetCurrentParser()), false); + return ts_token_type_byid(fcinfo); +} + +/* set_curprs(int) */ +Datum +tsa_set_curprs(PG_FUNCTION_ARGS) +{ + Oid parser_oid = PG_GETARG_OID(0); + + if (!SearchSysCacheExists(TSPARSEROID, + ObjectIdGetDatum(parser_oid), + 0, 0, 0)) + elog(ERROR, "cache lookup failed for text search parser %u", + parser_oid); + + current_parser_oid = parser_oid; + + PG_RETURN_VOID(); +} + +/* set_curprs(text) */ +Datum +tsa_set_curprs_byname(PG_FUNCTION_ARGS) +{ + text *name = PG_GETARG_TEXT_P(0); + Oid parser_oid; + + parser_oid = TSParserGetPrsid(stringToQualifiedNameList(TextPGetCString(name)), false); + + current_parser_oid = parser_oid; + + PG_RETURN_VOID(); +} + +/* parse(text) */ +Datum +tsa_parse_current(PG_FUNCTION_ARGS) +{ + INSERT_ARGUMENT0(ObjectIdGetDatum(GetCurrentParser()), false); + return ts_parse_byid(fcinfo); +} + +/* set_curcfg(int) */ +Datum +tsa_set_curcfg(PG_FUNCTION_ARGS) +{ + Oid arg0 = PG_GETARG_OID(0); + char *name; + + name = DatumGetCString(DirectFunctionCall1(regconfigout, + ObjectIdGetDatum(arg0))); + + set_config_option("default_text_search_config", name, + PGC_USERSET, + PGC_S_SESSION, + GUC_ACTION_SET, + true); + + PG_RETURN_VOID(); +} + +/* set_curcfg(text) */ +Datum +tsa_set_curcfg_byname(PG_FUNCTION_ARGS) +{ + text *arg0 = PG_GETARG_TEXT_P(0); + char *name; + + name = TextPGetCString(arg0); + set_config_option("default_text_search_config", name, + PGC_USERSET, + PGC_S_SESSION, + GUC_ACTION_SET, + true); + + PG_RETURN_VOID(); +} + +/* show_curcfg() */ +Datum +tsa_show_curcfg(PG_FUNCTION_ARGS) +{ + char *cfgname; + Oid config_oid; + + cfgname = GetConfigOptionByName("default_text_search_config", NULL); + config_oid = DatumGetObjectId(DirectFunctionCall1(regconfigin, + CStringGetDatum(cfgname))); + + PG_RETURN_OID(config_oid); +} + +/* to_tsvector(text, text) */ +Datum +tsa_to_tsvector_name(PG_FUNCTION_ARGS) +{ + text *cfgname = PG_GETARG_TEXT_P(0); + Datum arg1 = PG_GETARG_DATUM(1); + Oid config_oid; + + config_oid = TextGetObjectId(regconfigin, cfgname); + + return DirectFunctionCall2(to_tsvector_byid, + ObjectIdGetDatum(config_oid), arg1); +} + +/* to_tsquery(text, text) */ +Datum +tsa_to_tsquery_name(PG_FUNCTION_ARGS) +{ + text *cfgname = PG_GETARG_TEXT_P(0); + Datum arg1 = PG_GETARG_DATUM(1); + Oid config_oid; + + config_oid = TextGetObjectId(regconfigin, cfgname); + + return DirectFunctionCall2(to_tsquery_byid, + ObjectIdGetDatum(config_oid), arg1); +} + + +/* plainto_tsquery(text, text) */ +Datum +tsa_plainto_tsquery_name(PG_FUNCTION_ARGS) +{ + text *cfgname = PG_GETARG_TEXT_P(0); + Datum arg1 = PG_GETARG_DATUM(1); + Oid config_oid; + + config_oid = TextGetObjectId(regconfigin, cfgname); + + return DirectFunctionCall2(plainto_tsquery_byid, + ObjectIdGetDatum(config_oid), arg1); +} + +/* headline(text, text, tsquery [,text]) */ +Datum +tsa_headline_byname(PG_FUNCTION_ARGS) +{ + Datum arg0 = PG_GETARG_DATUM(0); + Datum arg1 = PG_GETARG_DATUM(1); + Datum arg2 = PG_GETARG_DATUM(2); + Datum result; + Oid config_oid; + + /* first parameter has to be converted to oid */ + config_oid = DatumGetObjectId(DirectFunctionCall1(regconfigin, + DirectFunctionCall1(textout, arg0))); + + if (PG_NARGS() == 3) + result = DirectFunctionCall3(ts_headline_byid, + ObjectIdGetDatum(config_oid), arg1, arg2); + else + { + Datum arg3 = PG_GETARG_DATUM(3); + + result = DirectFunctionCall4(ts_headline_byid_opt, + ObjectIdGetDatum(config_oid), + arg1, arg2, arg3); + } + + return result; +} + +/* + * tsearch2 version of update trigger + * + * We pass this on to the core trigger after inserting the default text + * search configuration name as the second argument. Note that this isn't + * a complete implementation of the original functionality; tsearch2 allowed + * transformation function names to be included in the list. However, that + * is deliberately removed as being a security risk. + */ +Datum +tsa_tsearch2(PG_FUNCTION_ARGS) +{ + TriggerData *trigdata; + Trigger *trigger; + char **tgargs; + int i; + + /* Check call context */ + if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */ + elog(ERROR, "tsvector_update_trigger: not fired by trigger manager"); + + trigdata = (TriggerData *) fcinfo->context; + trigger = trigdata->tg_trigger; + + if (trigger->tgnargs < 2) + elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)"); + + /* create space for configuration name */ + tgargs = (char **) palloc((trigger->tgnargs + 1) * sizeof(char *)); + tgargs[0] = trigger->tgargs[0]; + for (i = 1; i < trigger->tgnargs; i++) + tgargs[i+1] = trigger->tgargs[i]; + + tgargs[1] = pstrdup(GetConfigOptionByName("default_text_search_config", + NULL)); + trigger->tgargs = tgargs; + trigger->tgnargs++; + + return tsvector_update_trigger_byid(fcinfo); +} + +/* + * Get Oid of current dictionary + */ +static Oid +GetCurrentDict(void) +{ + if (current_dictionary_oid == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("no current dictionary"), + errhint("Execute SELECT set_curdict(...)."))); + + return current_dictionary_oid; +} + +/* + * Get Oid of current parser + * + * Here, it seems reasonable to select the "default" parser if none has been + * set. + */ +static Oid +GetCurrentParser(void) +{ + if (current_parser_oid == InvalidOid) + current_parser_oid = TSParserGetPrsid(stringToQualifiedNameList("pg_catalog.default"), false); + return current_parser_oid; +} diff --git a/contrib/tsearch2/tsearch2.sql.in b/contrib/tsearch2/tsearch2.sql.in new file mode 100644 index 0000000000..80d9cb3e42 --- /dev/null +++ b/contrib/tsearch2/tsearch2.sql.in @@ -0,0 +1,570 @@ +/* $PostgreSQL: pgsql/contrib/tsearch2/tsearch2.sql.in,v 1.1 2007/11/13 21:02:29 tgl Exp $ */ + +-- Adjust this setting to control where the objects get created. +SET search_path = public; + +-- These domains are just to catch schema-qualified references to the +-- old data types. +CREATE DOMAIN tsvector AS pg_catalog.tsvector; +CREATE DOMAIN tsquery AS pg_catalog.tsquery; +CREATE DOMAIN gtsvector AS pg_catalog.gtsvector; +CREATE DOMAIN gtsq AS pg_catalog.text; + +--dict interface +CREATE FUNCTION lexize(oid, text) + RETURNS _text + as 'ts_lexize' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION lexize(text, text) + RETURNS _text + as 'MODULE_PATHNAME', 'tsa_lexize_byname' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION lexize(text) + RETURNS _text + as 'MODULE_PATHNAME', 'tsa_lexize_bycurrent' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION set_curdict(int) + RETURNS void + as 'MODULE_PATHNAME', 'tsa_set_curdict' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION set_curdict(text) + RETURNS void + as 'MODULE_PATHNAME', 'tsa_set_curdict_byname' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +--built-in dictionaries +CREATE FUNCTION dex_init(internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_dex_init' + LANGUAGE C; + +CREATE FUNCTION dex_lexize(internal,internal,int4) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_dex_lexize' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION snb_en_init(internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_snb_en_init' + LANGUAGE C; + +CREATE FUNCTION snb_lexize(internal,internal,int4) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_snb_lexize' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION snb_ru_init_koi8(internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_snb_ru_init_koi8' + LANGUAGE C; + +CREATE FUNCTION snb_ru_init_utf8(internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_snb_ru_init_utf8' + LANGUAGE C; + +CREATE FUNCTION spell_init(internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_spell_init' + LANGUAGE C; + +CREATE FUNCTION spell_lexize(internal,internal,int4) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_spell_lexize' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION syn_init(internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_syn_init' + LANGUAGE C; + +CREATE FUNCTION syn_lexize(internal,internal,int4) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_syn_lexize' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION thesaurus_init(internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_thesaurus_init' + LANGUAGE C; + +CREATE FUNCTION thesaurus_lexize(internal,internal,int4,internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_thesaurus_lexize' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +--sql-level interface +CREATE TYPE tokentype + as (tokid int4, alias text, descr text); + +CREATE FUNCTION token_type(int4) + RETURNS setof tokentype + as 'ts_token_type_byid' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT + ROWS 16; + +CREATE FUNCTION token_type(text) + RETURNS setof tokentype + as 'ts_token_type_byname' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT + ROWS 16; + +CREATE FUNCTION token_type() + RETURNS setof tokentype + as 'MODULE_PATHNAME', 'tsa_token_type_current' + LANGUAGE C + RETURNS NULL ON NULL INPUT + ROWS 16; + +CREATE FUNCTION set_curprs(int) + RETURNS void + as 'MODULE_PATHNAME', 'tsa_set_curprs' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION set_curprs(text) + RETURNS void + as 'MODULE_PATHNAME', 'tsa_set_curprs_byname' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE TYPE tokenout + as (tokid int4, token text); + +CREATE FUNCTION parse(oid,text) + RETURNS setof tokenout + as 'ts_parse_byid' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION parse(text,text) + RETURNS setof tokenout + as 'ts_parse_byname' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION parse(text) + RETURNS setof tokenout + as 'MODULE_PATHNAME', 'tsa_parse_current' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +--default parser +CREATE FUNCTION prsd_start(internal,int4) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_prsd_start' + LANGUAGE C; + +CREATE FUNCTION prsd_getlexeme(internal,internal,internal) + RETURNS int4 + as 'MODULE_PATHNAME', 'tsa_prsd_getlexeme' + LANGUAGE C; + +CREATE FUNCTION prsd_end(internal) + RETURNS void + as 'MODULE_PATHNAME', 'tsa_prsd_end' + LANGUAGE C; + +CREATE FUNCTION prsd_lextype(internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_prsd_lextype' + LANGUAGE C; + +CREATE FUNCTION prsd_headline(internal,internal,internal) + RETURNS internal + as 'MODULE_PATHNAME', 'tsa_prsd_headline' + LANGUAGE C; + +--tsearch config +CREATE FUNCTION set_curcfg(int) + RETURNS void + as 'MODULE_PATHNAME', 'tsa_set_curcfg' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION set_curcfg(text) + RETURNS void + as 'MODULE_PATHNAME', 'tsa_set_curcfg_byname' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION show_curcfg() + RETURNS oid + as 'MODULE_PATHNAME', 'tsa_show_curcfg' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION length(tsvector) + RETURNS int4 + AS 'tsvector_length' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION to_tsvector(oid, text) + RETURNS tsvector + AS 'to_tsvector_byid' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION to_tsvector(text, text) + RETURNS tsvector + AS 'MODULE_PATHNAME', 'tsa_to_tsvector_name' + LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION to_tsvector(text) + RETURNS tsvector + AS 'to_tsvector' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION strip(tsvector) + RETURNS tsvector + AS 'tsvector_strip' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION setweight(tsvector,"char") + RETURNS tsvector + AS 'tsvector_setweight' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION concat(tsvector,tsvector) + RETURNS tsvector + AS 'tsvector_concat' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION querytree(tsquery) + RETURNS text + AS 'tsquerytree' + LANGUAGE INTERNAL RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION to_tsquery(oid, text) + RETURNS tsquery + AS 'to_tsquery_byid' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION to_tsquery(text, text) + RETURNS tsquery + AS 'MODULE_PATHNAME','tsa_to_tsquery_name' + LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION to_tsquery(text) + RETURNS tsquery + AS 'to_tsquery' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION plainto_tsquery(oid, text) + RETURNS tsquery + AS 'plainto_tsquery_byid' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION plainto_tsquery(text, text) + RETURNS tsquery + AS 'MODULE_PATHNAME','tsa_plainto_tsquery_name' + LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION plainto_tsquery(text) + RETURNS tsquery + AS 'plainto_tsquery' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +--Trigger +CREATE FUNCTION tsearch2() + RETURNS trigger + AS 'MODULE_PATHNAME', 'tsa_tsearch2' + LANGUAGE C; + +--Relevation +CREATE FUNCTION rank(float4[], tsvector, tsquery) + RETURNS float4 + AS 'ts_rank_wtt' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION rank(float4[], tsvector, tsquery, int4) + RETURNS float4 + AS 'ts_rank_wttf' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION rank(tsvector, tsquery) + RETURNS float4 + AS 'ts_rank_tt' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION rank(tsvector, tsquery, int4) + RETURNS float4 + AS 'ts_rank_ttf' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION rank_cd(float4[], tsvector, tsquery) + RETURNS float4 + AS 'ts_rankcd_wtt' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION rank_cd(float4[], tsvector, tsquery, int4) + RETURNS float4 + AS 'ts_rankcd_wttf' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION rank_cd(tsvector, tsquery) + RETURNS float4 + AS 'ts_rankcd_tt' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION rank_cd(tsvector, tsquery, int4) + RETURNS float4 + AS 'ts_rankcd_ttf' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION headline(oid, text, tsquery, text) + RETURNS text + AS 'ts_headline_byid_opt' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION headline(oid, text, tsquery) + RETURNS text + AS 'ts_headline_byid' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION headline(text, text, tsquery, text) + RETURNS text + AS 'MODULE_PATHNAME', 'tsa_headline_byname' + LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION headline(text, text, tsquery) + RETURNS text + AS 'MODULE_PATHNAME', 'tsa_headline_byname' + LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION headline(text, tsquery, text) + RETURNS text + AS 'ts_headline_opt' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION headline(text, tsquery) + RETURNS text + AS 'ts_headline' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +-- CREATE the OPERATOR class +CREATE OPERATOR CLASS gist_tsvector_ops +FOR TYPE tsvector USING gist +AS + OPERATOR 1 @@ (tsvector, tsquery) RECHECK , + FUNCTION 1 gtsvector_consistent (gtsvector, internal, int4), + FUNCTION 2 gtsvector_union (internal, internal), + FUNCTION 3 gtsvector_compress (internal), + FUNCTION 4 gtsvector_decompress (internal), + FUNCTION 5 gtsvector_penalty (internal, internal, internal), + FUNCTION 6 gtsvector_picksplit (internal, internal), + FUNCTION 7 gtsvector_same (gtsvector, gtsvector, internal), + STORAGE gtsvector; + +--stat info +CREATE TYPE statinfo + as (word text, ndoc int4, nentry int4); + +CREATE FUNCTION stat(text) + RETURNS setof statinfo + as 'ts_stat1' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT; + +CREATE FUNCTION stat(text,text) + RETURNS setof statinfo + as 'ts_stat2' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT; + +--reset - just for debuging +CREATE FUNCTION reset_tsearch() + RETURNS void + as 'MODULE_PATHNAME', 'tsa_reset_tsearch' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +--get cover (debug for rank_cd) +CREATE FUNCTION get_covers(tsvector,tsquery) + RETURNS text + as 'MODULE_PATHNAME', 'tsa_get_covers' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +--debug function +create type tsdebug as ( + ts_name text, + tok_type text, + description text, + token text, + dict_name text[], + "tsvector" tsvector +); + +CREATE or replace FUNCTION _get_parser_from_curcfg() +RETURNS text as +$$select prsname::text from pg_catalog.pg_ts_parser p join pg_ts_config c on cfgparser = p.oid where c.oid = show_curcfg();$$ +LANGUAGE SQL RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE FUNCTION ts_debug(text) +RETURNS setof tsdebug as $$ +select + (select c.cfgname::text from pg_catalog.pg_ts_config as c + where c.oid = show_curcfg()), + t.alias as tok_type, + t.descr as description, + p.token, + ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary::pg_catalog.text + FROM pg_catalog.pg_ts_config_map AS m + WHERE m.mapcfg = show_curcfg() AND m.maptokentype = p.tokid + ORDER BY m.mapseqno ) + AS dict_name, + strip(to_tsvector(p.token)) as tsvector +from + parse( _get_parser_from_curcfg(), $1 ) as p, + token_type() as t +where + t.tokid = p.tokid +$$ LANGUAGE SQL RETURNS NULL ON NULL INPUT; + +CREATE OR REPLACE FUNCTION numnode(tsquery) + RETURNS int4 + as 'tsquery_numnode' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE OR REPLACE FUNCTION tsquery_and(tsquery,tsquery) + RETURNS tsquery + as 'tsquery_and' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE OR REPLACE FUNCTION tsquery_or(tsquery,tsquery) + RETURNS tsquery + as 'tsquery_or' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE OR REPLACE FUNCTION tsquery_not(tsquery) + RETURNS tsquery + as 'tsquery_not' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +--------------rewrite subsystem + +CREATE OR REPLACE FUNCTION rewrite(tsquery, text) + RETURNS tsquery + as 'tsquery_rewrite_query' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE OR REPLACE FUNCTION rewrite(tsquery, tsquery, tsquery) + RETURNS tsquery + as 'tsquery_rewrite' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE OR REPLACE FUNCTION rewrite_accum(tsquery,tsquery[]) + RETURNS tsquery + AS 'MODULE_PATHNAME', 'tsa_rewrite_accum' + LANGUAGE C; + +CREATE OR REPLACE FUNCTION rewrite_finish(tsquery) + RETURNS tsquery + as 'MODULE_PATHNAME', 'tsa_rewrite_finish' + LANGUAGE C; + +CREATE AGGREGATE rewrite ( + BASETYPE = tsquery[], + SFUNC = rewrite_accum, + STYPE = tsquery, + FINALFUNC = rewrite_finish +); + +CREATE OR REPLACE FUNCTION tsq_mcontains(tsquery, tsquery) + RETURNS bool + as 'tsq_mcontains' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE OR REPLACE FUNCTION tsq_mcontained(tsquery, tsquery) + RETURNS bool + as 'tsq_mcontained' + LANGUAGE INTERNAL + RETURNS NULL ON NULL INPUT IMMUTABLE; + +CREATE OPERATOR CLASS gist_tp_tsquery_ops +FOR TYPE tsquery USING gist +AS + OPERATOR 7 @> (tsquery, tsquery) RECHECK, + OPERATOR 8 <@ (tsquery, tsquery) RECHECK, + FUNCTION 1 gtsquery_consistent (bigint, internal, int4), + FUNCTION 2 gtsquery_union (internal, internal), + FUNCTION 3 gtsquery_compress (internal), + FUNCTION 4 gtsquery_decompress (internal), + FUNCTION 5 gtsquery_penalty (internal, internal, internal), + FUNCTION 6 gtsquery_picksplit (internal, internal), + FUNCTION 7 gtsquery_same (bigint, bigint, internal), + STORAGE bigint; + +CREATE OPERATOR CLASS gin_tsvector_ops +FOR TYPE tsvector USING gin +AS + OPERATOR 1 @@ (tsvector, tsquery), + OPERATOR 2 @@@ (tsvector, tsquery) RECHECK, + FUNCTION 1 bttextcmp(text, text), + FUNCTION 2 gin_extract_tsvector(tsvector,internal), + FUNCTION 3 gin_extract_query(internal,internal,smallint), + FUNCTION 4 gin_ts_consistent(internal,smallint,internal), + STORAGE text; + +CREATE OPERATOR CLASS tsvector_ops +FOR TYPE tsvector USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 tsvector_cmp(tsvector, tsvector); + +CREATE OPERATOR CLASS tsquery_ops +FOR TYPE tsquery USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 tsquery_cmp(tsquery, tsquery); diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c deleted file mode 100644 index dfbdacd345..0000000000 --- a/contrib/tsearch2/tsvector.c +++ /dev/null @@ -1,1106 +0,0 @@ -/* - * In/Out definitions for tsvector type - * Internal structure: - * string of values, array of position lexeme in string and it's length - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - - -#include "access/gist.h" -#include "access/itup.h" -#include "catalog/namespace.h" -#include "commands/trigger.h" -#include "executor/spi.h" -#include "nodes/pg_list.h" -#include "storage/bufpage.h" -#include "utils/builtins.h" -#include "utils/pg_locale.h" -#include "mb/pg_wchar.h" - -#include <ctype.h> -#include "tsvector.h" -#include "query.h" -#include "ts_cfg.h" -#include "common.h" - -PG_FUNCTION_INFO_V1(tsvector_in); -Datum tsvector_in(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(tsvector_out); -Datum tsvector_out(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(to_tsvector); -Datum to_tsvector(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(to_tsvector_current); -Datum to_tsvector_current(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(to_tsvector_name); -Datum to_tsvector_name(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(tsearch2); -Datum tsearch2(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(tsvector_length); -Datum tsvector_length(PG_FUNCTION_ARGS); - -/* - * in/out text index type - */ -static int -comparePos(const void *a, const void *b) -{ - if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b)) - return 0; - return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1; -} - -static int -uniquePos(WordEntryPos * a, int4 l) -{ - WordEntryPos *ptr, - *res; - - res = a; - if (l == 1) - return l; - - qsort((void *) a, l, sizeof(WordEntryPos), comparePos); - - ptr = a + 1; - while (ptr - a < l) - { - if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res)) - { - res++; - *res = *ptr; - if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1) - break; - } - else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res)) - WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr)); - ptr++; - } - return res + 1 - a; -} - -static int -compareentry(const void *a, const void *b, void *arg) -{ - char *BufferStr = (char *) arg; - - if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len) - { - return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos], - &BufferStr[((WordEntryIN *) b)->entry.pos], - ((WordEntryIN *) a)->entry.len); - } - return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1; -} - -static int -uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) -{ - WordEntryIN *ptr, - *res; - - res = a; - if (l == 1) - { - if (a->entry.haspos) - { - *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos)); - *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos); - } - return l; - } - - ptr = a + 1; - qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf); - - while (ptr - a < l) - { - if (!(ptr->entry.len == res->entry.len && - strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0)) - { - if (res->entry.haspos) - { - *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); - *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); - } - *outbuflen += SHORTALIGN(res->entry.len); - res++; - memcpy(res, ptr, sizeof(WordEntryIN)); - } - else if (ptr->entry.haspos) - { - if (res->entry.haspos) - { - int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos); - - res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos)); - memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]), - &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos)); - *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos); - pfree(ptr->pos); - } - else - { - res->entry.haspos = 1; - res->pos = ptr->pos; - } - } - ptr++; - } - if (res->entry.haspos) - { - *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); - *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); - } - *outbuflen += SHORTALIGN(res->entry.len); - - return res + 1 - a; -} - -#define WAITWORD 1 -#define WAITENDWORD 2 -#define WAITNEXTCHAR 3 -#define WAITENDCMPLX 4 -#define WAITPOSINFO 5 -#define INPOSINFO 6 -#define WAITPOSDELIM 7 -#define WAITCHARCMPLX 8 - -#define RESIZEPRSBUF \ -do { \ - if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \ - { \ - int4 clen = state->curpos - state->word; \ - state->len *= 2; \ - state->word = (char*)repalloc( (void*)state->word, state->len ); \ - state->curpos = state->word + clen; \ - } \ -} while (0) - - -int4 -gettoken_tsvector(TI_IN_STATE * state) -{ - int4 oldstate = 0; - - state->curpos = state->word; - state->state = WAITWORD; - state->alen = 0; - - while (1) - { - if (state->state == WAITWORD) - { - if (*(state->prsbuf) == '\0') - return 0; - else if (t_iseq(state->prsbuf, '\'')) - state->state = WAITENDCMPLX; - else if (t_iseq(state->prsbuf, '\\')) - { - state->state = WAITNEXTCHAR; - oldstate = WAITENDWORD; - } - else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - else if (!t_isspace(state->prsbuf)) - { - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - state->state = WAITENDWORD; - } - } - else if (state->state == WAITNEXTCHAR) - { - if (*(state->prsbuf) == '\0') - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("there is no escaped character"))); - else - { - RESIZEPRSBUF; - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - state->state = oldstate; - } - } - else if (state->state == WAITENDWORD) - { - if (t_iseq(state->prsbuf, '\\')) - { - state->state = WAITNEXTCHAR; - oldstate = WAITENDWORD; - } - else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || - (state->oprisdelim && ISOPERATOR(state->prsbuf))) - { - RESIZEPRSBUF; - if (state->curpos == state->word) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - *(state->curpos) = '\0'; - return 1; - } - else if (t_iseq(state->prsbuf, ':')) - { - if (state->curpos == state->word) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - *(state->curpos) = '\0'; - if (state->oprisdelim) - return 1; - else - state->state = INPOSINFO; - } - else - { - RESIZEPRSBUF; - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - } - } - else if (state->state == WAITENDCMPLX) - { - if (t_iseq(state->prsbuf, '\'')) - { - state->state = WAITCHARCMPLX; - } - else if (t_iseq(state->prsbuf, '\\')) - { - state->state = WAITNEXTCHAR; - oldstate = WAITENDCMPLX; - } - else if (*(state->prsbuf) == '\0') - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - else - { - RESIZEPRSBUF; - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - } - } - else if (state->state == WAITCHARCMPLX) - { - if (t_iseq(state->prsbuf, '\'')) - { - RESIZEPRSBUF; - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - state->state = WAITENDCMPLX; - } - else - { - RESIZEPRSBUF; - *(state->curpos) = '\0'; - if (state->curpos == state->word) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - if (state->oprisdelim) - { - /* state->prsbuf+=pg_mblen(state->prsbuf); */ - return 1; - } - else - state->state = WAITPOSINFO; - continue; /* recheck current character */ - } - } - else if (state->state == WAITPOSINFO) - { - if (t_iseq(state->prsbuf, ':')) - state->state = INPOSINFO; - else - return 1; - } - else if (state->state == INPOSINFO) - { - if (t_isdigit(state->prsbuf)) - { - if (state->alen == 0) - { - state->alen = 4; - state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen); - *(uint16 *) (state->pos) = 0; - } - else if (*(uint16 *) (state->pos) + 1 >= state->alen) - { - state->alen *= 2; - state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen); - } - (*(uint16 *) (state->pos))++; - WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf))); - if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("wrong position info"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0); - state->state = WAITPOSDELIM; - } - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - } - else if (state->state == WAITPOSDELIM) - { - if (t_iseq(state->prsbuf, ',')) - state->state = INPOSINFO; - else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*')) - { - if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3); - } - else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B')) - { - if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2); - } - else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C')) - { - if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1); - } - else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D')) - { - if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0); - } - else if (t_isspace(state->prsbuf) || - *(state->prsbuf) == '\0') - return 1; - else if (!t_isdigit(state->prsbuf)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - } - else - /* internal error */ - elog(ERROR, "internal error"); - - /* get next char */ - state->prsbuf += pg_mblen(state->prsbuf); - } - - return 0; -} - -Datum -tsvector_in(PG_FUNCTION_ARGS) -{ - char *buf = PG_GETARG_CSTRING(0); - TI_IN_STATE state; - WordEntryIN *arr; - WordEntry *inarr; - int4 len = 0, - totallen = 64; - tsvector *in; - char *tmpbuf, - *cur; - int4 i, - buflen = 256; - - SET_FUNCOID(); - - pg_verifymbstr(buf, strlen(buf), false); - state.prsbuf = buf; - state.len = 32; - state.word = (char *) palloc(state.len); - state.oprisdelim = false; - - arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen); - cur = tmpbuf = (char *) palloc(buflen); - while (gettoken_tsvector(&state)) - { - if (len >= totallen) - { - totallen *= 2; - arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen); - } - while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen) - { - int4 dist = cur - tmpbuf; - - buflen *= 2; - tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); - cur = tmpbuf + dist; - } - if (state.curpos - state.word >= MAXSTRLEN) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("word is too long"))); - arr[len].entry.len = state.curpos - state.word; - if (cur - tmpbuf > MAXSTRPOS) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("too long value"))); - arr[len].entry.pos = cur - tmpbuf; - memcpy((void *) cur, (void *) state.word, arr[len].entry.len); - cur += arr[len].entry.len; - if (state.alen) - { - arr[len].entry.haspos = 1; - arr[len].pos = state.pos; - } - else - arr[len].entry.haspos = 0; - len++; - } - pfree(state.word); - - if (len > 0) - len = uniqueentry(arr, len, tmpbuf, &buflen); - else - buflen = 0; - totallen = CALCDATASIZE(len, buflen); - in = (tsvector *) palloc0(totallen); - SET_VARSIZE(in, totallen); - in->size = len; - cur = STRPTR(in); - inarr = ARRPTR(in); - for (i = 0; i < len; i++) - { - memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len); - arr[i].entry.pos = cur - STRPTR(in); - cur += SHORTALIGN(arr[i].entry.len); - if (arr[i].entry.haspos) - { - memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos)); - cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos); - pfree(arr[i].pos); - } - memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry)); - } - pfree(tmpbuf); - pfree(arr); - PG_RETURN_POINTER(in); -} - -Datum -tsvector_length(PG_FUNCTION_ARGS) -{ - tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - int4 ret = in->size; - - PG_FREE_IF_COPY(in, 0); - PG_RETURN_INT32(ret); -} - -Datum -tsvector_out(PG_FUNCTION_ARGS) -{ - tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - char *outbuf; - int4 i, - lenbuf = 0, - pp; - WordEntry *ptr = ARRPTR(out); - char *curbegin, - *curin, - *curout; - - lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ; - for (i = 0; i < out->size; i++) - { - lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ; - if (ptr[i].haspos) - lenbuf += 7 * POSDATALEN(out, &(ptr[i])); - } - - curout = outbuf = (char *) palloc(lenbuf); - for (i = 0; i < out->size; i++) - { - curbegin = curin = STRPTR(out) + ptr->pos; - if (i != 0) - *curout++ = ' '; - *curout++ = '\''; - while (curin - curbegin < ptr->len) - { - int len = pg_mblen(curin); - - if (t_iseq(curin, '\'')) - { - int4 pos = curout - outbuf; - - outbuf = (char *) repalloc((void *) outbuf, ++lenbuf); - curout = outbuf + pos; - *curout++ = '\''; - } - while (len--) - *curout++ = *curin++; - } - *curout++ = '\''; - if ((pp = POSDATALEN(out, ptr)) != 0) - { - WordEntryPos *wptr; - - *curout++ = ':'; - wptr = POSDATAPTR(out, ptr); - while (pp) - { - sprintf(curout, "%d", WEP_GETPOS(*wptr)); - curout = strchr(curout, '\0'); - switch (WEP_GETWEIGHT(*wptr)) - { - case 3: - *curout++ = 'A'; - break; - case 2: - *curout++ = 'B'; - break; - case 1: - *curout++ = 'C'; - break; - case 0: - default: - break; - } - if (pp > 1) - *curout++ = ','; - pp--; - wptr++; - } - } - ptr++; - } - *curout = '\0'; - outbuf[lenbuf - 1] = '\0'; - PG_FREE_IF_COPY(out, 0); - PG_RETURN_POINTER(outbuf); -} - -static int -compareWORD(const void *a, const void *b) -{ - if (((TSWORD *) a)->len == ((TSWORD *) b)->len) - { - int res = strncmp( - ((TSWORD *) a)->word, - ((TSWORD *) b)->word, - ((TSWORD *) b)->len); - - if (res == 0) - return (((TSWORD *) a)->pos.pos > ((TSWORD *) b)->pos.pos) ? 1 : -1; - return res; - } - return (((TSWORD *) a)->len > ((TSWORD *) b)->len) ? 1 : -1; -} - -static int -uniqueWORD(TSWORD * a, int4 l) -{ - TSWORD *ptr, - *res; - int tmppos; - - if (l == 1) - { - tmppos = LIMITPOS(a->pos.pos); - a->alen = 2; - a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen); - a->pos.apos[0] = 1; - a->pos.apos[1] = tmppos; - return l; - } - - res = a; - ptr = a + 1; - - qsort((void *) a, l, sizeof(TSWORD), compareWORD); - tmppos = LIMITPOS(a->pos.pos); - a->alen = 2; - a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen); - a->pos.apos[0] = 1; - a->pos.apos[1] = tmppos; - - while (ptr - a < l) - { - if (!(ptr->len == res->len && - strncmp(ptr->word, res->word, res->len) == 0)) - { - res++; - res->len = ptr->len; - res->word = ptr->word; - tmppos = LIMITPOS(ptr->pos.pos); - res->alen = 2; - res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen); - res->pos.apos[0] = 1; - res->pos.apos[1] = tmppos; - } - else - { - pfree(ptr->word); - if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1) - { - if (res->pos.apos[0] + 1 >= res->alen) - { - res->alen *= 2; - res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen); - } - if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos)) - { - res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos); - res->pos.apos[0]++; - } - } - } - ptr++; - } - - return res + 1 - a; -} - -/* - * make value of tsvector - */ -static tsvector * -makevalue(PRSTEXT * prs) -{ - int4 i, - j, - lenstr = 0, - totallen; - tsvector *in; - WordEntry *ptr; - char *str, - *cur; - - prs->curwords = uniqueWORD(prs->words, prs->curwords); - for (i = 0; i < prs->curwords; i++) - { - lenstr += SHORTALIGN(prs->words[i].len); - - if (prs->words[i].alen) - lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); - } - - totallen = CALCDATASIZE(prs->curwords, lenstr); - in = (tsvector *) palloc0(totallen); - SET_VARSIZE(in, totallen); - in->size = prs->curwords; - - ptr = ARRPTR(in); - cur = str = STRPTR(in); - for (i = 0; i < prs->curwords; i++) - { - ptr->len = prs->words[i].len; - if (cur - str > MAXSTRPOS) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("value is too big"))); - ptr->pos = cur - str; - memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len); - pfree(prs->words[i].word); - cur += SHORTALIGN(prs->words[i].len); - if (prs->words[i].alen) - { - WordEntryPos *wptr; - - ptr->haspos = 1; - *(uint16 *) cur = prs->words[i].pos.apos[0]; - wptr = POSDATAPTR(in, ptr); - for (j = 0; j < *(uint16 *) cur; j++) - { - WEP_SETWEIGHT(wptr[j], 0); - WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]); - } - cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); - pfree(prs->words[i].pos.apos); - } - else - ptr->haspos = 0; - ptr++; - } - pfree(prs->words); - return in; -} - - -Datum -to_tsvector(PG_FUNCTION_ARGS) -{ - text *in = PG_GETARG_TEXT_P(1); - PRSTEXT prs; - tsvector *out; - TSCfgInfo *cfg; - - SET_FUNCOID(); - cfg = findcfg(PG_GETARG_INT32(0)); - - prs.lenwords = 32; - prs.curwords = 0; - prs.pos = 0; - prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords); - - parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ); - PG_FREE_IF_COPY(in, 1); - - if (prs.curwords) - out = makevalue(&prs); - else - { - pfree(prs.words); - out = palloc(CALCDATASIZE(0, 0)); - SET_VARSIZE(out, CALCDATASIZE(0, 0)); - out->size = 0; - } - PG_RETURN_POINTER(out); -} - -Datum -to_tsvector_name(PG_FUNCTION_ARGS) -{ - text *cfg = PG_GETARG_TEXT_P(0); - Datum res; - - SET_FUNCOID(); - res = DirectFunctionCall3( - to_tsvector, - Int32GetDatum(name2id_cfg(cfg)), - PG_GETARG_DATUM(1), - (Datum) 0 - ); - - PG_FREE_IF_COPY(cfg, 0); - PG_RETURN_DATUM(res); -} - -Datum -to_tsvector_current(PG_FUNCTION_ARGS) -{ - Datum res; - - SET_FUNCOID(); - res = DirectFunctionCall3( - to_tsvector, - Int32GetDatum(get_currcfg()), - PG_GETARG_DATUM(0), - (Datum) 0 - ); - - PG_RETURN_DATUM(res); -} - -static Oid -findFunc(char *fname) -{ - FuncCandidateList clist, - ptr; - Oid funcid = InvalidOid; - List *names = list_make1(makeString(fname)); - - ptr = clist = FuncnameGetCandidates(names, 1); - list_free(names); - - if (!ptr) - return funcid; - - while (ptr) - { - if (ptr->args[0] == TEXTOID && funcid == InvalidOid) - funcid = ptr->oid; - clist = ptr->next; - pfree(ptr); - ptr = clist; - } - - return funcid; -} - -/* - * Trigger - */ -Datum -tsearch2(PG_FUNCTION_ARGS) -{ - TriggerData *trigdata; - Trigger *trigger; - Relation rel; - HeapTuple rettuple = NULL; - int numidxattr, - i; - PRSTEXT prs; - Datum datum = (Datum) 0; - Oid funcoid = InvalidOid; - TSCfgInfo *cfg; - - SET_FUNCOID(); - cfg = findcfg(get_currcfg()); - - if (!CALLED_AS_TRIGGER(fcinfo)) - /* internal error */ - elog(ERROR, "TSearch: Not fired by trigger manager"); - - trigdata = (TriggerData *) fcinfo->context; - if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event)) - /* internal error */ - elog(ERROR, "TSearch: Cannot process STATEMENT events"); - if (TRIGGER_FIRED_AFTER(trigdata->tg_event)) - /* internal error */ - elog(ERROR, "TSearch: Must be fired BEFORE event"); - - if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) - rettuple = trigdata->tg_trigtuple; - else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) - rettuple = trigdata->tg_newtuple; - else - /* internal error */ - elog(ERROR, "TSearch: Unknown event"); - - trigger = trigdata->tg_trigger; - rel = trigdata->tg_relation; - - if (trigger->tgnargs < 2) - /* internal error */ - elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)"); - - numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]); - if (numidxattr == SPI_ERROR_NOATTRIBUTE) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("tsvector column \"%s\" does not exist", - trigger->tgargs[0]))); - - prs.lenwords = 32; - prs.curwords = 0; - prs.pos = 0; - prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords); - - /* find all words in indexable column */ - for (i = 1; i < trigger->tgnargs; i++) - { - int numattr; - Oid oidtype; - Datum txt_toasted; - bool isnull; - text *txt; - - numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]); - if (numattr == SPI_ERROR_NOATTRIBUTE) - { - funcoid = findFunc(trigger->tgargs[i]); - if (funcoid == InvalidOid) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("could not find function or field \"%s\"", - trigger->tgargs[i]))); - - continue; - } - oidtype = SPI_gettypeid(rel->rd_att, numattr); - /* We assume char() and varchar() are binary-equivalent to text */ - if (!(oidtype == TEXTOID || - oidtype == VARCHAROID || - oidtype == BPCHAROID)) - { - elog(WARNING, "TSearch: '%s' is not of character type", - trigger->tgargs[i]); - continue; - } - txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull); - if (isnull) - continue; - - if (funcoid != InvalidOid) - { - text *txttmp = (text *) DatumGetPointer(OidFunctionCall1( - funcoid, - PointerGetDatum(txt_toasted) - )); - - txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp))); - if (txt == txttmp) - txt_toasted = PointerGetDatum(txt); - } - else - txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted))); - - parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ); - if (txt != (text *) DatumGetPointer(txt_toasted)) - pfree(txt); - } - - /* make tsvector value */ - if (prs.curwords) - { - datum = PointerGetDatum(makevalue(&prs)); - rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, - &datum, NULL); - pfree(DatumGetPointer(datum)); - } - else - { - tsvector *out = palloc(CALCDATASIZE(0, 0)); - - SET_VARSIZE(out, CALCDATASIZE(0, 0)); - out->size = 0; - datum = PointerGetDatum(out); - pfree(prs.words); - rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, - &datum, NULL); - } - - if (rettuple == NULL) - /* internal error */ - elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result); - - return PointerGetDatum(rettuple); -} - -static int -silly_cmp_tsvector(const tsvector * a, const tsvector * b) -{ - if (VARSIZE(a) < VARSIZE(b)) - return -1; - else if (VARSIZE(a) > VARSIZE(b)) - return 1; - else if (a->size < b->size) - return -1; - else if (a->size > b->size) - return 1; - else - { - WordEntry *aptr = ARRPTR(a); - WordEntry *bptr = ARRPTR(b); - int i = 0; - int res; - - - for (i = 0; i < a->size; i++) - { - if (aptr->haspos != bptr->haspos) - { - return (aptr->haspos > bptr->haspos) ? -1 : 1; - } - else if (aptr->len != bptr->len) - { - return (aptr->len > bptr->len) ? -1 : 1; - } - else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0) - { - return res; - } - else if (aptr->haspos) - { - WordEntryPos *ap = POSDATAPTR(a, aptr); - WordEntryPos *bp = POSDATAPTR(b, bptr); - int j; - - if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr)) - return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1; - - for (j = 0; j < POSDATALEN(a, aptr); j++) - { - if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp)) - { - return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1; - } - else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp)) - { - return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1; - } - ap++, bp++; - } - } - - aptr++; - bptr++; - } - } - - return 0; -} - -PG_FUNCTION_INFO_V1(tsvector_cmp); -PG_FUNCTION_INFO_V1(tsvector_lt); -PG_FUNCTION_INFO_V1(tsvector_le); -PG_FUNCTION_INFO_V1(tsvector_eq); -PG_FUNCTION_INFO_V1(tsvector_ne); -PG_FUNCTION_INFO_V1(tsvector_ge); -PG_FUNCTION_INFO_V1(tsvector_gt); -Datum tsvector_cmp(PG_FUNCTION_ARGS); -Datum tsvector_lt(PG_FUNCTION_ARGS); -Datum tsvector_le(PG_FUNCTION_ARGS); -Datum tsvector_eq(PG_FUNCTION_ARGS); -Datum tsvector_ne(PG_FUNCTION_ARGS); -Datum tsvector_ge(PG_FUNCTION_ARGS); -Datum tsvector_gt(PG_FUNCTION_ARGS); - -#define RUNCMP \ -tsvector *a = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));\ -tsvector *b = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));\ -int res = silly_cmp_tsvector(a,b); \ -PG_FREE_IF_COPY(a,0); \ -PG_FREE_IF_COPY(b,1); \ - -Datum -tsvector_cmp(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_INT32(res); -} - -Datum -tsvector_lt(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res < 0) ? true : false); -} - -Datum -tsvector_le(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res <= 0) ? true : false); -} - -Datum -tsvector_eq(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res == 0) ? true : false); -} - -Datum -tsvector_ge(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res >= 0) ? true : false); -} - -Datum -tsvector_gt(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res > 0) ? true : false); -} - -Datum -tsvector_ne(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res != 0) ? true : false); -} diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h deleted file mode 100644 index e006a1dcbd..0000000000 --- a/contrib/tsearch2/tsvector.h +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef __TXTIDX_H__ -#define __TXTIDX_H__ - -/* -#define TXTIDX_DEBUG -*/ - -#include "postgres.h" - -#include "access/gist.h" -#include "access/itup.h" -#include "utils/builtins.h" -#include "storage/bufpage.h" - -typedef struct -{ - uint32 - haspos:1, - len:11, /* MAX 2Kb */ - pos:20; /* MAX 1Mb */ -} WordEntry; - -#define MAXSTRLEN ( 1<<11 ) -#define MAXSTRPOS ( 1<<20 ) - -/* -Equivalent to -typedef struct -{ - uint16 - weight:2, - pos:14; -} WordEntryPos; - -*/ - -typedef uint16 WordEntryPos; - -#define WEP_GETWEIGHT(x) ( (x) >> 14 ) -#define WEP_GETPOS(x) ( (x) & 0x3fff ) - -#define WEP_SETWEIGHT(x,v) (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) -#define WEP_SETPOS(x,v) (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) - - -#define MAXENTRYPOS (1<<14) -#define MAXNUMPOS 256 -#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) ) - -/* - * Structure of tsvector datatype: - * 1) standard varlena header - * 2) int4 size - number of lexemes or WordEntry array, which is the same - * 3) Array of WordEntry - sorted array, comparison based on word's length - * and strncmp(). WordEntry->pos points number of - * bytes from end of WordEntry array to start of - * corresponding lexeme. - * 4) Lexeme's storage: - * SHORTALIGNED(lexeme) and position information if it exists - * Position information: first int2 - is a number of positions and it - * follows array of WordEntryPos - */ - -typedef struct -{ - int32 vl_len_; /* varlena header (do not touch directly!) */ - int4 size; - char data[1]; -} tsvector; - -#define DATAHDRSIZE (VARHDRSZ + sizeof(int4)) -#define CALCDATASIZE(x, lenstr) ( (x) * sizeof(WordEntry) + DATAHDRSIZE + (lenstr) ) -#define ARRPTR(x) ( (WordEntry*) ( (char*)(x) + DATAHDRSIZE ) ) -#define STRPTR(x) ( (char*)(x) + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)(x))->size ) ) -#define STRSIZE(x) ( ((tsvector*)(x))->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)(x))->size ) ) -#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len)) -#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) -#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) ) - - -typedef struct -{ - WordEntry entry; - WordEntryPos *pos; -} WordEntryIN; - -typedef struct -{ - char *prsbuf; - char *word; - char *curpos; - int4 len; - int4 state; - int4 alen; - WordEntryPos *pos; - bool oprisdelim; -} TI_IN_STATE; - -int4 gettoken_tsvector(TI_IN_STATE * state); - -#endif diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c deleted file mode 100644 index e8d8e16db4..0000000000 --- a/contrib/tsearch2/tsvector_op.c +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Operations for tsvector type - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - - -#include "access/gist.h" -#include "access/itup.h" -#include "catalog/namespace.h" -#include "commands/trigger.h" -#include "executor/spi.h" -#include "nodes/pg_list.h" -#include "storage/bufpage.h" -#include "utils/builtins.h" -#include "utils/pg_locale.h" - -#include "tsvector.h" -#include "query.h" -#include "ts_cfg.h" -#include "common.h" - -PG_FUNCTION_INFO_V1(strip); -Datum strip(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(setweight); -Datum setweight(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(concat); -Datum concat(PG_FUNCTION_ARGS); - -Datum -strip(PG_FUNCTION_ARGS) -{ - tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - tsvector *out; - int i, - len = 0; - WordEntry *arrin = ARRPTR(in), - *arrout; - char *cur; - - for (i = 0; i < in->size; i++) - len += SHORTALIGN(arrin[i].len); - - len = CALCDATASIZE(in->size, len); - out = (tsvector *) palloc0(len); - SET_VARSIZE(out, len); - out->size = in->size; - arrout = ARRPTR(out); - cur = STRPTR(out); - for (i = 0; i < in->size; i++) - { - memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len); - arrout[i].haspos = 0; - arrout[i].len = arrin[i].len; - arrout[i].pos = cur - STRPTR(out); - cur += SHORTALIGN(arrout[i].len); - } - - PG_FREE_IF_COPY(in, 0); - PG_RETURN_POINTER(out); -} - -Datum -setweight(PG_FUNCTION_ARGS) -{ - tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - char cw = PG_GETARG_CHAR(1); - tsvector *out; - int i, - j; - WordEntry *entry; - WordEntryPos *p; - int w = 0; - - switch (cw) - { - case 'A': - case 'a': - w = 3; - break; - case 'B': - case 'b': - w = 2; - break; - case 'C': - case 'c': - w = 1; - break; - case 'D': - case 'd': - w = 0; - break; - /* internal error */ - default: - elog(ERROR, "unrecognized weight"); - } - - out = (tsvector *) palloc(VARSIZE(in)); - memcpy(out, in, VARSIZE(in)); - entry = ARRPTR(out); - i = out->size; - while (i--) - { - if ((j = POSDATALEN(out, entry)) != 0) - { - p = POSDATAPTR(out, entry); - while (j--) - { - WEP_SETWEIGHT(*p, w); - p++; - } - } - entry++; - } - - PG_FREE_IF_COPY(in, 0); - PG_RETURN_POINTER(out); -} - -static int -compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b) -{ - if (a->len == b->len) - { - return strncmp( - ptra + a->pos, - ptrb + b->pos, - a->len); - } - return (a->len > b->len) ? 1 : -1; -} - -static int4 -add_pos(tsvector * src, WordEntry * srcptr, tsvector * dest, WordEntry * destptr, int4 maxpos) -{ - uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr); - int i; - uint16 slen = POSDATALEN(src, srcptr), - startlen; - WordEntryPos *spos = POSDATAPTR(src, srcptr), - *dpos = POSDATAPTR(dest, destptr); - - if (!destptr->haspos) - *clen = 0; - - startlen = *clen; - for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); i++) - { - WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i])); - WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos)); - (*clen)++; - } - - if (*clen != startlen) - destptr->haspos = 1; - return *clen - startlen; -} - - -Datum -concat(PG_FUNCTION_ARGS) -{ - tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); - tsvector *out; - WordEntry *ptr; - WordEntry *ptr1, - *ptr2; - WordEntryPos *p; - int maxpos = 0, - i, - j, - i1, - i2; - char *cur; - char *data, - *data1, - *data2; - - ptr = ARRPTR(in1); - i = in1->size; - while (i--) - { - if ((j = POSDATALEN(in1, ptr)) != 0) - { - p = POSDATAPTR(in1, ptr); - while (j--) - { - if (WEP_GETPOS(*p) > maxpos) - maxpos = WEP_GETPOS(*p); - p++; - } - } - ptr++; - } - - ptr1 = ARRPTR(in1); - ptr2 = ARRPTR(in2); - data1 = STRPTR(in1); - data2 = STRPTR(in2); - i1 = in1->size; - i2 = in2->size; - out = (tsvector *) palloc0(VARSIZE(in1) + VARSIZE(in2)); - SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2)); - out->size = in1->size + in2->size; - data = cur = STRPTR(out); - ptr = ARRPTR(out); - while (i1 && i2) - { - int cmp = compareEntry(data1, ptr1, data2, ptr2); - - if (cmp < 0) - { /* in1 first */ - ptr->haspos = ptr1->haspos; - ptr->len = ptr1->len; - memcpy(cur, data1 + ptr1->pos, ptr1->len); - ptr->pos = cur - data; - cur += SHORTALIGN(ptr1->len); - if (ptr->haspos) - { - memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); - cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); - } - ptr++; - ptr1++; - i1--; - } - else if (cmp > 0) - { /* in2 first */ - ptr->haspos = ptr2->haspos; - ptr->len = ptr2->len; - memcpy(cur, data2 + ptr2->pos, ptr2->len); - ptr->pos = cur - data; - cur += SHORTALIGN(ptr2->len); - if (ptr->haspos) - { - int addlen = add_pos(in2, ptr2, out, ptr, maxpos); - - if (addlen == 0) - ptr->haspos = 0; - else - cur += addlen * sizeof(WordEntryPos) + sizeof(uint16); - } - ptr++; - ptr2++; - i2--; - } - else - { - ptr->haspos = ptr1->haspos | ptr2->haspos; - ptr->len = ptr1->len; - memcpy(cur, data1 + ptr1->pos, ptr1->len); - ptr->pos = cur - data; - cur += SHORTALIGN(ptr1->len); - if (ptr->haspos) - { - if (ptr1->haspos) - { - memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); - cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); - if (ptr2->haspos) - cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); - } - else if (ptr2->haspos) - { - int addlen = add_pos(in2, ptr2, out, ptr, maxpos); - - if (addlen == 0) - ptr->haspos = 0; - else - cur += addlen * sizeof(WordEntryPos) + sizeof(uint16); - } - } - ptr++; - ptr1++; - ptr2++; - i1--; - i2--; - } - } - - while (i1) - { - ptr->haspos = ptr1->haspos; - ptr->len = ptr1->len; - memcpy(cur, data1 + ptr1->pos, ptr1->len); - ptr->pos = cur - data; - cur += SHORTALIGN(ptr1->len); - if (ptr->haspos) - { - memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); - cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); - } - ptr++; - ptr1++; - i1--; - } - - while (i2) - { - ptr->haspos = ptr2->haspos; - ptr->len = ptr2->len; - memcpy(cur, data2 + ptr2->pos, ptr2->len); - ptr->pos = cur - data; - cur += SHORTALIGN(ptr2->len); - if (ptr->haspos) - { - int addlen = add_pos(in2, ptr2, out, ptr, maxpos); - - if (addlen == 0) - ptr->haspos = 0; - else - cur += addlen * sizeof(WordEntryPos) + sizeof(uint16); - } - ptr++; - ptr2++; - i2--; - } - - out->size = ptr - ARRPTR(out); - SET_VARSIZE(out, CALCDATASIZE(out->size, cur - data)); - if (data != STRPTR(out)) - memmove(STRPTR(out), data, cur - data); - - PG_FREE_IF_COPY(in1, 0); - PG_FREE_IF_COPY(in2, 1); - PG_RETURN_POINTER(out); -} diff --git a/contrib/tsearch2/uninstall_tsearch2.sql b/contrib/tsearch2/uninstall_tsearch2.sql new file mode 100644 index 0000000000..8eaf4a54de --- /dev/null +++ b/contrib/tsearch2/uninstall_tsearch2.sql @@ -0,0 +1,95 @@ +/* $PostgreSQL: pgsql/contrib/tsearch2/uninstall_tsearch2.sql,v 1.1 2007/11/13 21:02:29 tgl Exp $ */ + +-- Adjust this setting to control where the objects get dropped. +SET search_path = public, pg_catalog; + +DROP DOMAIN tsvector CASCADE; +DROP DOMAIN tsquery CASCADE; +DROP DOMAIN gtsvector CASCADE; +DROP DOMAIN gtsq CASCADE; + +DROP TYPE tokentype CASCADE; +DROP TYPE tokenout CASCADE; +DROP TYPE statinfo CASCADE; +DROP TYPE tsdebug CASCADE; + +DROP OPERATOR CLASS tsquery_ops USING btree CASCADE; + +DROP OPERATOR CLASS tsvector_ops USING btree CASCADE; + +DROP OPERATOR CLASS gin_tsvector_ops USING gin CASCADE; + +DROP OPERATOR CLASS gist_tp_tsquery_ops USING gist CASCADE; + +DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE; + +DROP FUNCTION lexize(oid, text) ; +DROP FUNCTION lexize(text, text); +DROP FUNCTION lexize(text); +DROP FUNCTION set_curdict(int); +DROP FUNCTION set_curdict(text); +DROP FUNCTION dex_init(internal); +DROP FUNCTION dex_lexize(internal,internal,int4); +DROP FUNCTION snb_en_init(internal); +DROP FUNCTION snb_lexize(internal,internal,int4); +DROP FUNCTION snb_ru_init_koi8(internal); +DROP FUNCTION snb_ru_init_utf8(internal); +DROP FUNCTION spell_init(internal); +DROP FUNCTION spell_lexize(internal,internal,int4); +DROP FUNCTION syn_init(internal); +DROP FUNCTION syn_lexize(internal,internal,int4); +DROP FUNCTION thesaurus_init(internal); +DROP FUNCTION thesaurus_lexize(internal,internal,int4,internal); +DROP FUNCTION set_curprs(int); +DROP FUNCTION set_curprs(text); +DROP FUNCTION prsd_start(internal,int4); +DROP FUNCTION prsd_getlexeme(internal,internal,internal); +DROP FUNCTION prsd_end(internal); +DROP FUNCTION prsd_lextype(internal); +DROP FUNCTION prsd_headline(internal,internal,internal); +DROP FUNCTION set_curcfg(int); +DROP FUNCTION set_curcfg(text); +DROP FUNCTION show_curcfg(); +DROP FUNCTION length(tsvector); +DROP FUNCTION to_tsvector(oid, text); +DROP FUNCTION to_tsvector(text, text); +DROP FUNCTION to_tsvector(text); +DROP FUNCTION strip(tsvector); +DROP FUNCTION setweight(tsvector,"char"); +DROP FUNCTION concat(tsvector,tsvector); +DROP FUNCTION querytree(tsquery); +DROP FUNCTION to_tsquery(oid, text); +DROP FUNCTION to_tsquery(text, text); +DROP FUNCTION to_tsquery(text); +DROP FUNCTION plainto_tsquery(oid, text); +DROP FUNCTION plainto_tsquery(text, text); +DROP FUNCTION plainto_tsquery(text); +DROP FUNCTION tsearch2() CASCADE; +DROP FUNCTION rank(float4[], tsvector, tsquery); +DROP FUNCTION rank(float4[], tsvector, tsquery, int4); +DROP FUNCTION rank(tsvector, tsquery); +DROP FUNCTION rank(tsvector, tsquery, int4); +DROP FUNCTION rank_cd(float4[], tsvector, tsquery); +DROP FUNCTION rank_cd(float4[], tsvector, tsquery, int4); +DROP FUNCTION rank_cd(tsvector, tsquery); +DROP FUNCTION rank_cd(tsvector, tsquery, int4); +DROP FUNCTION headline(oid, text, tsquery, text); +DROP FUNCTION headline(oid, text, tsquery); +DROP FUNCTION headline(text, text, tsquery, text); +DROP FUNCTION headline(text, text, tsquery); +DROP FUNCTION headline(text, tsquery, text); +DROP FUNCTION headline(text, tsquery); +DROP FUNCTION get_covers(tsvector,tsquery); +DROP FUNCTION _get_parser_from_curcfg(); +DROP FUNCTION numnode(tsquery); +DROP FUNCTION tsquery_and(tsquery,tsquery); +DROP FUNCTION tsquery_or(tsquery,tsquery); +DROP FUNCTION tsquery_not(tsquery); +DROP FUNCTION rewrite(tsquery, text); +DROP FUNCTION rewrite(tsquery, tsquery, tsquery); +DROP AGGREGATE rewrite (tsquery[]); +DROP FUNCTION rewrite_accum(tsquery,tsquery[]); +DROP FUNCTION rewrite_finish(tsquery); +DROP FUNCTION tsq_mcontains(tsquery, tsquery); +DROP FUNCTION tsq_mcontained(tsquery, tsquery); +DROP FUNCTION reset_tsearch(); diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in deleted file mode 100644 index c6a69e019c..0000000000 --- a/contrib/tsearch2/untsearch.sql.in +++ /dev/null @@ -1,71 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/untsearch.sql.in,v 1.10 2007/11/13 04:24:29 momjian Exp $ */ - --- Adjust this setting to control where the objects get dropped. -SET search_path = public; - -DROP OPERATOR CLASS gin_tsvector_ops USING gin CASCADE; - -DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE; - - -DROP OPERATOR || (tsvector, tsvector); -DROP OPERATOR @@ (tsvector, tsquery); -DROP OPERATOR @@ (tsquery, tsvector); - ---DROP AGGREGATE stat(tsvector); - -DROP TABLE pg_ts_dict; -DROP TABLE pg_ts_parser; -DROP TABLE pg_ts_cfg; -DROP TABLE pg_ts_cfgmap; - -DROP TYPE tokentype CASCADE; -DROP TYPE tokenout CASCADE; -DROP TYPE tsvector CASCADE; -DROP TYPE tsquery CASCADE; -DROP TYPE gtsvector CASCADE; ---DROP TYPE tsstat CASCADE; -DROP TYPE statinfo CASCADE; -DROP TYPE tsdebug CASCADE; -DROP TYPE gtsq CASCADE; - -DROP FUNCTION lexize(oid, text) ; -DROP FUNCTION lexize(text, text); -DROP FUNCTION lexize(text); -DROP FUNCTION set_curdict(int); -DROP FUNCTION set_curdict(text); -DROP FUNCTION dex_init(internal); -DROP FUNCTION dex_lexize(internal,internal,int4); -DROP FUNCTION snb_en_init(internal); -DROP FUNCTION snb_lexize(internal,internal,int4); -DROP FUNCTION snb_ru_init_koi8(internal); -DROP FUNCTION snb_ru_init_utf8(internal); -DROP FUNCTION spell_init(internal); -DROP FUNCTION spell_lexize(internal,internal,int4); -DROP FUNCTION thesaurus_init(internal); -DROP FUNCTION thesaurus_lexize(internal,internal,int4,internal); -DROP FUNCTION syn_init(internal); -DROP FUNCTION syn_lexize(internal,internal,int4); -DROP FUNCTION set_curprs(int); -DROP FUNCTION set_curprs(text); -DROP FUNCTION prsd_start(internal,int4); -DROP FUNCTION prsd_getlexeme(internal,internal,internal); -DROP FUNCTION prsd_end(internal); -DROP FUNCTION prsd_lextype(internal); -DROP FUNCTION prsd_headline(internal,internal,internal); -DROP FUNCTION set_curcfg(int); -DROP FUNCTION set_curcfg(text); -DROP FUNCTION show_curcfg(); -DROP FUNCTION gtsvector_compress(internal); -DROP FUNCTION gtsvector_decompress(internal); -DROP FUNCTION gtsvector_penalty(internal,internal,internal); -DROP FUNCTION gtsvector_picksplit(internal, internal); -DROP FUNCTION gtsvector_union(internal, internal); -DROP FUNCTION gtsq_compress(internal); -DROP FUNCTION gtsq_decompress(internal); -DROP FUNCTION gtsq_penalty(internal,internal,internal); -DROP FUNCTION gtsq_picksplit(internal, internal); -DROP FUNCTION gtsq_union(bytea, internal); -DROP FUNCTION reset_tsearch(); -DROP FUNCTION tsearch2() CASCADE; -DROP FUNCTION _get_parser_from_curcfg(); diff --git a/contrib/tsearch2/wordparser/Makefile b/contrib/tsearch2/wordparser/Makefile deleted file mode 100644 index 2d1e739599..0000000000 --- a/contrib/tsearch2/wordparser/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# $PostgreSQL: pgsql/contrib/tsearch2/wordparser/Makefile,v 1.10 2007/06/26 22:05:03 tgl Exp $ - -SUBOBJS = parser.o deflex.o - -EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) - -PG_CPPFLAGS = -I$(srcdir)/.. - -ifdef USE_PGXS -PG_CONFIG = pg_config -PGXS := $(shell $(PG_CONFIG) --pgxs) -include $(PGXS) -else -subdir = contrib/tsearch2/wordparser -top_builddir = ../../.. -include $(top_builddir)/src/Makefile.global -include $(top_srcdir)/contrib/contrib-global.mk -endif - -override CFLAGS += $(CFLAGS_SL) - -all: SUBSYS.o - -SUBSYS.o: $(SUBOBJS) - $(LD) $(LDREL) $(LDOUT) $@ $^ - - diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c deleted file mode 100644 index 004069f48e..0000000000 --- a/contrib/tsearch2/wordparser/deflex.c +++ /dev/null @@ -1,57 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/deflex.c,v 1.4 2006/03/11 04:38:30 momjian Exp $ */ - -#include "deflex.h" - -const char *lex_descr[] = { - "", - "Latin word", - "Non-latin word", - "Word", - "Email", - "URL", - "Host", - "Scientific notation", - "VERSION", - "Part of hyphenated word", - "Non-latin part of hyphenated word", - "Latin part of hyphenated word", - "Space symbols", - "HTML Tag", - "Protocol head", - "Hyphenated word", - "Latin hyphenated word", - "Non-latin hyphenated word", - "URI", - "File or path name", - "Decimal notation", - "Signed integer", - "Unsigned integer", - "HTML Entity" -}; - -const char *tok_alias[] = { - "", - "lword", - "nlword", - "word", - "email", - "url", - "host", - "sfloat", - "version", - "part_hword", - "nlpart_hword", - "lpart_hword", - "blank", - "tag", - "protocol", - "hword", - "lhword", - "nlhword", - "uri", - "file", - "float", - "int", - "uint", - "entity" -}; diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h deleted file mode 100644 index 72852e4485..0000000000 --- a/contrib/tsearch2/wordparser/deflex.h +++ /dev/null @@ -1,36 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/deflex.h,v 1.3 2006/03/11 04:38:30 momjian Exp $ */ - -#ifndef __DEFLEX_H__ -#define __DEFLEX_H__ - -/* rememder !!!! */ -#define LASTNUM 23 - -#define LATWORD 1 -#define CYRWORD 2 -#define UWORD 3 -#define EMAIL 4 -#define FURL 5 -#define HOST 6 -#define SCIENTIFIC 7 -#define VERSIONNUMBER 8 -#define PARTHYPHENWORD 9 -#define CYRPARTHYPHENWORD 10 -#define LATPARTHYPHENWORD 11 -#define SPACE 12 -#define TAG 13 -#define PROTOCOL 14 -#define HYPHENWORD 15 -#define LATHYPHENWORD 16 -#define CYRHYPHENWORD 17 -#define URI 18 -#define FILEPATH 19 -#define DECIMAL 20 -#define SIGNEDINT 21 -#define UNSIGNEDINT 22 -#define HTMLENTITY 23 - -extern const char *lex_descr[]; -extern const char *tok_alias[]; - -#endif diff --git a/contrib/tsearch2/wordparser/parser.c b/contrib/tsearch2/wordparser/parser.c deleted file mode 100644 index c7b7459e59..0000000000 --- a/contrib/tsearch2/wordparser/parser.c +++ /dev/null @@ -1,1266 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.13 2007/03/22 15:58:24 teodor Exp $ */ - -#include "postgres.h" - -#include "utils/builtins.h" -#include "utils/pg_locale.h" -#include "mb/pg_wchar.h" - -#include "deflex.h" -#include "parser.h" -#include "ts_locale.h" - - -static TParserPosition * -newTParserPosition(TParserPosition * prev) -{ - TParserPosition *res = (TParserPosition *) palloc(sizeof(TParserPosition)); - - if (prev) - memcpy(res, prev, sizeof(TParserPosition)); - else - memset(res, 0, sizeof(TParserPosition)); - - res->prev = prev; - - res->pushedAtAction = NULL; - - return res; -} - -TParser * -TParserInit(char *str, int len) -{ - TParser *prs = (TParser *) palloc0(sizeof(TParser)); - - prs->charmaxlen = pg_database_encoding_max_length(); - prs->str = str; - prs->lenstr = len; - -#ifdef TS_USE_WIDE - - /* - * Use wide char code only when max encoding length > 1. - */ - - if (prs->charmaxlen > 1) - { - prs->usewide = true; - prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr+1)); - prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr); - } - else -#endif - prs->usewide = false; - - prs->state = newTParserPosition(NULL); - prs->state->state = TPS_Base; - - return prs; -} - -void -TParserClose(TParser * prs) -{ - while (prs->state) - { - TParserPosition *ptr = prs->state->prev; - - pfree(prs->state); - prs->state = ptr; - } - -#ifdef TS_USE_WIDE - if (prs->wstr) - pfree(prs->wstr); -#endif - - pfree(prs); -} - -/* - * defining support function, equvalent is* macroses, but - * working with any possible encodings and locales. Note, - * that with multibyte encoding and C-locale isw* function may fail - * or give wrong result. Note 2: multibyte encoding and C-locale - * often are used for Asian languages. - */ - -#ifdef TS_USE_WIDE - -#define p_iswhat(type) \ -static int \ -p_is##type(TParser *prs) { \ - Assert( prs->state ); \ - if ( prs->usewide ) \ - { \ - if ( lc_ctype_is_c() ) \ - return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \ - \ - return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \ - } \ - \ - return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \ -} \ - \ -static int \ -p_isnot##type(TParser *prs) { \ - return !p_is##type(prs); \ -} - -static int -p_isalnum(TParser *prs) -{ - Assert( prs->state ); - - if (prs->usewide) - { - if (lc_ctype_is_c()) - { - unsigned int c = *(prs->wstr + prs->state->poschar); - - /* - * any non-ascii symbol with multibyte encoding - * with C-locale is an alpha character - */ - if ( c > 0x7f ) - return 1; - - return isalnum(0xff & c); - } - - return iswalnum( (wint_t)*( prs->wstr + prs->state->poschar)); - } - - return isalnum( *(unsigned char*)( prs->str + prs->state->posbyte )); -} - -static int -p_isnotalnum(TParser *prs) -{ - return !p_isalnum(prs); -} - -static int -p_isalpha(TParser *prs) -{ - Assert( prs->state ); - - if (prs->usewide) - { - if (lc_ctype_is_c()) - { - unsigned int c = *(prs->wstr + prs->state->poschar); - - /* - * any non-ascii symbol with multibyte encoding - * with C-locale is an alpha character - */ - if ( c > 0x7f ) - return 1; - - return isalpha(0xff & c); - } - - return iswalpha( (wint_t)*( prs->wstr + prs->state->poschar)); - } - - return isalpha( *(unsigned char*)( prs->str + prs->state->posbyte )); -} - -static int -p_isnotalpha(TParser *prs) -{ - return !p_isalpha(prs); -} - -/* p_iseq should be used only for ascii symbols */ - -static int -p_iseq(TParser * prs, char c) -{ - Assert(prs->state); - return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0; -} - -#else /* TS_USE_WIDE */ - -#define p_iswhat(type) \ -static int \ -p_is##type(TParser *prs) { \ - Assert( prs->state ); \ - return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \ -} \ - \ -static int \ -p_isnot##type(TParser *prs) { \ - return !p_is##type(prs); \ -} - - -static int -p_iseq(TParser * prs, char c) -{ - Assert(prs->state); - return (*(prs->str + prs->state->posbyte) == c) ? 1 : 0; -} - -p_iswhat(alnum) -p_iswhat(alpha) - -#endif /* TS_USE_WIDE */ - -p_iswhat(digit) -p_iswhat(lower) -p_iswhat(print) -p_iswhat(punct) -p_iswhat(space) -p_iswhat(upper) -p_iswhat(xdigit) - -static int -p_isEOF(TParser * prs) -{ - Assert(prs->state); - return (prs->state->posbyte == prs->lenstr || prs->state->charlen == 0) ? 1 : 0; -} - -static int -p_iseqC(TParser * prs) -{ - return p_iseq(prs, prs->c); -} - -static int -p_isneC(TParser * prs) -{ - return !p_iseq(prs, prs->c); -} - -static int -p_isascii(TParser * prs) -{ - return (prs->state->charlen == 1 && isascii((unsigned char) *(prs->str + prs->state->posbyte))) ? 1 : 0; -} - -static int -p_islatin(TParser * prs) -{ - return (p_isalpha(prs) && p_isascii(prs)) ? 1 : 0; -} - -static int -p_isnonlatin(TParser * prs) -{ - return (p_isalpha(prs) && !p_isascii(prs)) ? 1 : 0; -} - -void _make_compiler_happy(void); -void -_make_compiler_happy(void) -{ - p_isalnum(NULL); - p_isnotalnum(NULL); - p_isalpha(NULL); - p_isnotalpha(NULL); - p_isdigit(NULL); - p_isnotdigit(NULL); - p_islower(NULL); - p_isnotlower(NULL); - p_isprint(NULL); - p_isnotprint(NULL); - p_ispunct(NULL); - p_isnotpunct(NULL); - p_isspace(NULL); - p_isnotspace(NULL); - p_isupper(NULL); - p_isnotupper(NULL); - p_isxdigit(NULL); - p_isnotxdigit(NULL); - p_isEOF(NULL); - p_iseqC(NULL); - p_isneC(NULL); -} - - -static void -SpecialTags(TParser * prs) -{ - switch (prs->state->lencharlexeme) - { - case 8: /* </script */ - if (pg_strncasecmp(prs->lexeme, "</script", 8) == 0) - prs->ignore = false; - break; - case 7: /* <script || </style */ - if (pg_strncasecmp(prs->lexeme, "</style", 7) == 0) - prs->ignore = false; - else if (pg_strncasecmp(prs->lexeme, "<script", 7) == 0) - prs->ignore = true; - break; - case 6: /* <style */ - if (pg_strncasecmp(prs->lexeme, "<style", 6) == 0) - prs->ignore = true; - break; - default: - break; - } -} - -static void -SpecialFURL(TParser * prs) -{ - prs->wanthost = true; - prs->state->posbyte -= prs->state->lenbytelexeme; - prs->state->poschar -= prs->state->lencharlexeme; -} - -static void -SpecialHyphen(TParser * prs) -{ - prs->state->posbyte -= prs->state->lenbytelexeme; - prs->state->poschar -= prs->state->lencharlexeme; -} - -static void -SpecialVerVersion(TParser * prs) -{ - prs->state->posbyte -= prs->state->lenbytelexeme; - prs->state->poschar -= prs->state->lencharlexeme; - prs->state->lenbytelexeme = 0; - prs->state->lencharlexeme = 0; -} - -static int -p_isstophost(TParser * prs) -{ - if (prs->wanthost) - { - prs->wanthost = false; - return 1; - } - return 0; -} - -static int -p_isignore(TParser * prs) -{ - return (prs->ignore) ? 1 : 0; -} - -static int -p_ishost(TParser * prs) -{ - TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte); - int res = 0; - - if (TParserGet(tmpprs) && tmpprs->type == HOST) - { - prs->state->posbyte += tmpprs->lenbytelexeme; - prs->state->poschar += tmpprs->lencharlexeme; - prs->state->lenbytelexeme += tmpprs->lenbytelexeme; - prs->state->lencharlexeme += tmpprs->lencharlexeme; - prs->state->charlen = tmpprs->state->charlen; - res = 1; - } - TParserClose(tmpprs); - - return res; -} - -static int -p_isURI(TParser * prs) -{ - TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte); - int res = 0; - - tmpprs->state = newTParserPosition(tmpprs->state); - tmpprs->state->state = TPS_InFileFirst; - - if (TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH)) - { - prs->state->posbyte += tmpprs->lenbytelexeme; - prs->state->poschar += tmpprs->lencharlexeme; - prs->state->lenbytelexeme += tmpprs->lenbytelexeme; - prs->state->lencharlexeme += tmpprs->lencharlexeme; - prs->state->charlen = tmpprs->state->charlen; - res = 1; - } - TParserClose(tmpprs); - - return res; -} - -/* - * Table of state/action of parser - */ - -#define A_NEXT 0x0000 -#define A_BINGO 0x0001 -#define A_POP 0x0002 -#define A_PUSH 0x0004 -#define A_RERUN 0x0008 -#define A_CLEAR 0x0010 -#define A_MERGE 0x0020 -#define A_CLRALL 0x0040 - -static TParserStateActionItem actionTPS_Base[] = { - {p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL}, - {p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InLatWord, 0, NULL}, - {p_isnonlatin, 0, A_NEXT, TPS_InCyrWord, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, - {p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, - {p_iseqC, '&', A_PUSH, TPS_InHTMLEntityFirst, 0, NULL}, - {p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL}, - {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InPathFirstFirst, 0, NULL}, - {NULL, 0, A_NEXT, TPS_InSpace, 0, NULL} -}; - - -static TParserStateActionItem actionTPS_InUWord[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, UWORD, NULL}, - {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, - {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, - {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, UWORD, NULL} -}; - -static TParserStateActionItem actionTPS_InLatWord[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, LATWORD, NULL}, - {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst, 0, NULL}, - {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, - {p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL}, - {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, - {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, - {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, LATWORD, NULL} -}; - -static TParserStateActionItem actionTPS_InCyrWord[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, CYRWORD, NULL}, - {p_isnonlatin, 0, A_NEXT, TPS_Null, 0, NULL}, - {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, CYRWORD, NULL} -}; - -static TParserStateActionItem actionTPS_InUnsignedInt[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}, - {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL}, - {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, - {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, - {p_islatin, 0, A_PUSH, TPS_InHost, 0, NULL}, - {p_isalpha, 0, A_NEXT, TPS_InUWord, 0, NULL}, - {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL} -}; - -static TParserStateActionItem actionTPS_InSignedIntFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_NEXT | A_CLEAR, TPS_InSignedInt, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InSignedInt[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}, - {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL}, - {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, - {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL} -}; - -static TParserStateActionItem actionTPS_InSpace[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL}, - {p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL}, - {p_isignore, 0, A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL}, - {p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL}, - {p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL}, - {p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL}, - {p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, SPACE, NULL} -}; - -static TParserStateActionItem actionTPS_InUDecimalFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InUDecimal[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL}, - {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, - {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL} -}; - -static TParserStateActionItem actionTPS_InDecimalFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InDecimal[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InVerVersion, 0, NULL}, - {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, - {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL} -}; - -static TParserStateActionItem actionTPS_InVerVersion[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_RERUN, TPS_InSVerVersion, 0, SpecialVerVersion}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InSVerVersion[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_BINGO | A_CLRALL, TPS_InUnsignedInt, SPACE, NULL}, - {NULL, 0, A_NEXT, TPS_Null, 0, NULL} -}; - - -static TParserStateActionItem actionTPS_InVersionFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InVersion[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL} -}; - -static TParserStateActionItem actionTPS_InMantissaFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL}, - {p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL}, - {p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InMantissaSign[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InMantissa[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL} -}; - -static TParserStateActionItem actionTPS_InHTMLEntityFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHTMLEntity[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, - {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHTMLEntityNum[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, - {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHTMLEntityEnd[] = { - {NULL, 0, A_BINGO | A_CLEAR, TPS_Base, HTMLENTITY, NULL} -}; - -static TParserStateActionItem actionTPS_InTagFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL}, - {p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL}, - {p_iseqC, '?', A_PUSH, TPS_InXMLBegin, 0, NULL}, - {p_islatin, 0, A_PUSH, TPS_InTagName, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InXMLBegin[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - /* <?xml ... */ - {p_iseqC, 'x', A_NEXT, TPS_InTag, 0, NULL}, - {p_iseqC, 'X', A_NEXT, TPS_InTag, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InTagCloseFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InTagName, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InTagName[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - /* <br/> case */ - {p_iseqC, '/', A_NEXT, TPS_InTagBeginEnd, 0, NULL}, - {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags}, - {p_isspace, 0, A_NEXT, TPS_InTag, 0, SpecialTags}, - {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InTagBeginEnd[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InTag[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags}, - {p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL}, - {p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '/', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '&', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '?', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '~', A_NEXT, TPS_Null, 0, NULL}, - {p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InTagEscapeK[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL}, - {p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL}, - {NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InTagEscapeKK[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL}, - {p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL}, - {NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InTagBackSleshed[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {NULL, 0, A_MERGE, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InTagEnd[] = { - {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG, NULL} -}; - -static TParserStateActionItem actionTPS_InCommentFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL}, - /* <!DOCTYPE ...> */ - {p_iseqC, 'D', A_NEXT, TPS_InTag, 0, NULL}, - {p_iseqC, 'd', A_NEXT, TPS_InTag, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InCommentLast[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InComment[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst, 0, NULL}, - {NULL, 0, A_NEXT, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InCloseCommentFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL}, - {NULL, 0, A_NEXT, TPS_InComment, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InCloseCommentLast[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL}, - {p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL}, - {NULL, 0, A_NEXT, TPS_InComment, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InCommentEnd[] = { - {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG, NULL} -}; - -static TParserStateActionItem actionTPS_InHostFirstDomain[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHostDomainSecond, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHostDomainSecond[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHostDomain, 0, NULL}, - {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, - {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHostDomain[] = { - {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHostDomain, 0, NULL}, - {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, - {p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, - {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, - {p_isdigit, 0, A_POP, TPS_Null, 0, NULL}, - {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL}, - {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, - {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL} -}; - -static TParserStateActionItem actionTPS_InPortFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InPort[] = { - {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL}, - {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL}, - {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, - {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL} -}; - -static TParserStateActionItem actionTPS_InHostFirstAN[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHost[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL}, - {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InEmail[] = { - {p_ishost, 0, A_BINGO | A_CLRALL, TPS_Base, EMAIL, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InFileFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL}, - {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, - {p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InFileTwiddle[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InPathFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL}, - {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InPathFirstFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL}, - {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InPathSecond[] = { - {p_isEOF, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL}, - {p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL}, - {p_iseqC, '/', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL}, - {p_isspace, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InFile[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL}, - {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL}, - {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, - {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL} -}; - -static TParserStateActionItem actionTPS_InFileNext[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL}, - {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL}, - {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InURIFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '"', A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '\'', A_POP, TPS_Null, 0, NULL}, - {p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL}, -}; - -static TParserStateActionItem actionTPS_InURIStart[] = { - {NULL, 0, A_NEXT, TPS_InURI, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InURI[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL}, - {p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL}, - {p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL}, - {p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, URI, NULL} -}; - -static TParserStateActionItem actionTPS_InFURL[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isURI, 0, A_BINGO | A_CLRALL, TPS_Base, FURL, SpecialFURL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InProtocolFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InProtocolSecond[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InProtocolEnd[] = { - {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, PROTOCOL, NULL} -}; - -static TParserStateActionItem actionTPS_InHyphenLatWordFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL}, - {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHyphenLatWord[] = { - {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen}, - {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL}, - {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst, 0, NULL}, - {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen} -}; - -static TParserStateActionItem actionTPS_InHyphenCyrWordFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHyphenCyrWord[] = { - {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen}, - {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst, 0, NULL}, - {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen} -}; - -static TParserStateActionItem actionTPS_InHyphenUWordFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, - {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHyphenUWord[] = { - {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, - {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, - {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} -}; - -static TParserStateActionItem actionTPS_InHyphenValueFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHyphenValue[] = { - {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, - {p_isalpha, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, - {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} -}; - -static TParserStateActionItem actionTPS_InHyphenValueExact[] = { - {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, - {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} -}; - -static TParserStateActionItem actionTPS_InParseHyphen[] = { - {p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart, 0, NULL}, - {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt, 0, NULL}, - {p_iseqC, '-', A_PUSH, TPS_InParseHyphenHyphen, 0, NULL}, - {NULL, 0, A_RERUN, TPS_Base, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InParseHyphenHyphen[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isalnum, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHyphenCyrWordPart[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, CYRPARTHYPHENWORD, NULL}, - {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart, 0, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, - {NULL, 0, A_BINGO, TPS_InParseHyphen, CYRPARTHYPHENWORD, NULL} -}; - -static TParserStateActionItem actionTPS_InHyphenLatWordPart[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, LATPARTHYPHENWORD, NULL}, - {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart, 0, NULL}, - {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, - {NULL, 0, A_BINGO, TPS_InParseHyphen, LATPARTHYPHENWORD, NULL} -}; - -static TParserStateActionItem actionTPS_InHyphenUWordPart[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, PARTHYPHENWORD, NULL}, - {p_isalnum, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, - {NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHYPHENWORD, NULL} -}; - -static TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt, 0, NULL}, - {p_isalpha, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHDecimalPartFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_InParseHyphen, UNSIGNEDINT, NULL} -}; - -static TParserStateActionItem actionTPS_InHDecimalPartFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_CLEAR, TPS_InHDecimalPart, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHDecimalPart[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHDecimalPart, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_InParseHyphen, DECIMAL, NULL} -}; - -static TParserStateActionItem actionTPS_InHVersionPartFirst[] = { - {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isdigit, 0, A_CLEAR, TPS_InHVersionPart, 0, NULL}, - {NULL, 0, A_POP, TPS_Null, 0, NULL} -}; - -static TParserStateActionItem actionTPS_InHVersionPart[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}, - {p_isdigit, 0, A_NEXT, TPS_InHVersionPart, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst, 0, NULL}, - {NULL, 0, A_BINGO, TPS_InParseHyphen, VERSIONNUMBER, NULL} -}; - -/* - * order should be the same as in typedef enum {} TParserState!! - */ - -static const TParserStateAction Actions[] = { - {TPS_Base, actionTPS_Base}, - {TPS_InUWord, actionTPS_InUWord}, - {TPS_InLatWord, actionTPS_InLatWord}, - {TPS_InCyrWord, actionTPS_InCyrWord}, - {TPS_InUnsignedInt, actionTPS_InUnsignedInt}, - {TPS_InSignedIntFirst, actionTPS_InSignedIntFirst}, - {TPS_InSignedInt, actionTPS_InSignedInt}, - {TPS_InSpace, actionTPS_InSpace}, - {TPS_InUDecimalFirst, actionTPS_InUDecimalFirst}, - {TPS_InUDecimal, actionTPS_InUDecimal}, - {TPS_InDecimalFirst, actionTPS_InDecimalFirst}, - {TPS_InDecimal, actionTPS_InDecimal}, - {TPS_InVerVersion, actionTPS_InVerVersion}, - {TPS_InSVerVersion, actionTPS_InSVerVersion}, - {TPS_InVersionFirst, actionTPS_InVersionFirst}, - {TPS_InVersion, actionTPS_InVersion}, - {TPS_InMantissaFirst, actionTPS_InMantissaFirst}, - {TPS_InMantissaSign, actionTPS_InMantissaSign}, - {TPS_InMantissa, actionTPS_InMantissa}, - {TPS_InHTMLEntityFirst, actionTPS_InHTMLEntityFirst}, - {TPS_InHTMLEntity, actionTPS_InHTMLEntity}, - {TPS_InHTMLEntityNumFirst, actionTPS_InHTMLEntityNumFirst}, - {TPS_InHTMLEntityNum, actionTPS_InHTMLEntityNum}, - {TPS_InHTMLEntityEnd, actionTPS_InHTMLEntityEnd}, - {TPS_InTagFirst, actionTPS_InTagFirst}, - {TPS_InXMLBegin, actionTPS_InXMLBegin}, - {TPS_InTagCloseFirst, actionTPS_InTagCloseFirst}, - {TPS_InTagName, actionTPS_InTagName}, - {TPS_InTagBeginEnd, actionTPS_InTagBeginEnd}, - {TPS_InTag, actionTPS_InTag}, - {TPS_InTagEscapeK, actionTPS_InTagEscapeK}, - {TPS_InTagEscapeKK, actionTPS_InTagEscapeKK}, - {TPS_InTagBackSleshed, actionTPS_InTagBackSleshed}, - {TPS_InTagEnd, actionTPS_InTagEnd}, - {TPS_InCommentFirst, actionTPS_InCommentFirst}, - {TPS_InCommentLast, actionTPS_InCommentLast}, - {TPS_InComment, actionTPS_InComment}, - {TPS_InCloseCommentFirst, actionTPS_InCloseCommentFirst}, - {TPS_InCloseCommentLast, actionTPS_InCloseCommentLast}, - {TPS_InCommentEnd, actionTPS_InCommentEnd}, - {TPS_InHostFirstDomain, actionTPS_InHostFirstDomain}, - {TPS_InHostDomainSecond, actionTPS_InHostDomainSecond}, - {TPS_InHostDomain, actionTPS_InHostDomain}, - {TPS_InPortFirst, actionTPS_InPortFirst}, - {TPS_InPort, actionTPS_InPort}, - {TPS_InHostFirstAN, actionTPS_InHostFirstAN}, - {TPS_InHost, actionTPS_InHost}, - {TPS_InEmail, actionTPS_InEmail}, - {TPS_InFileFirst, actionTPS_InFileFirst}, - {TPS_InFileTwiddle, actionTPS_InFileTwiddle}, - {TPS_InPathFirst, actionTPS_InPathFirst}, - {TPS_InPathFirstFirst, actionTPS_InPathFirstFirst}, - {TPS_InPathSecond, actionTPS_InPathSecond}, - {TPS_InFile, actionTPS_InFile}, - {TPS_InFileNext, actionTPS_InFileNext}, - {TPS_InURIFirst, actionTPS_InURIFirst}, - {TPS_InURIStart, actionTPS_InURIStart}, - {TPS_InURI, actionTPS_InURI}, - {TPS_InFURL, actionTPS_InFURL}, - {TPS_InProtocolFirst, actionTPS_InProtocolFirst}, - {TPS_InProtocolSecond, actionTPS_InProtocolSecond}, - {TPS_InProtocolEnd, actionTPS_InProtocolEnd}, - {TPS_InHyphenLatWordFirst, actionTPS_InHyphenLatWordFirst}, - {TPS_InHyphenLatWord, actionTPS_InHyphenLatWord}, - {TPS_InHyphenCyrWordFirst, actionTPS_InHyphenCyrWordFirst}, - {TPS_InHyphenCyrWord, actionTPS_InHyphenCyrWord}, - {TPS_InHyphenUWordFirst, actionTPS_InHyphenUWordFirst}, - {TPS_InHyphenUWord, actionTPS_InHyphenUWord}, - {TPS_InHyphenValueFirst, actionTPS_InHyphenValueFirst}, - {TPS_InHyphenValue, actionTPS_InHyphenValue}, - {TPS_InHyphenValueExact, actionTPS_InHyphenValueExact}, - {TPS_InParseHyphen, actionTPS_InParseHyphen}, - {TPS_InParseHyphenHyphen, actionTPS_InParseHyphenHyphen}, - {TPS_InHyphenCyrWordPart, actionTPS_InHyphenCyrWordPart}, - {TPS_InHyphenLatWordPart, actionTPS_InHyphenLatWordPart}, - {TPS_InHyphenUWordPart, actionTPS_InHyphenUWordPart}, - {TPS_InHyphenUnsignedInt, actionTPS_InHyphenUnsignedInt}, - {TPS_InHDecimalPartFirst, actionTPS_InHDecimalPartFirst}, - {TPS_InHDecimalPart, actionTPS_InHDecimalPart}, - {TPS_InHVersionPartFirst, actionTPS_InHVersionPartFirst}, - {TPS_InHVersionPart, actionTPS_InHVersionPart}, - {TPS_Null, NULL} -}; - - -bool -TParserGet(TParser * prs) -{ - TParserStateActionItem *item = NULL; - - if (prs->state->posbyte >= prs->lenstr) - return false; - - Assert(prs->state); - prs->lexeme = prs->str + prs->state->posbyte; - prs->state->pushedAtAction = NULL; - - /* look at string */ - while (prs->state->posbyte <= prs->lenstr) - { - if (prs->state->posbyte == prs->lenstr) - prs->state->charlen = 0; - else - prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen : - pg_mblen(prs->str + prs->state->posbyte); - - Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr); - Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null); - Assert(Actions[prs->state->state].state == prs->state->state); - - item = Actions[prs->state->state].action; - Assert(item != NULL); - - if (item < prs->state->pushedAtAction) - item = prs->state->pushedAtAction; - - /* find action by character class */ - while (item->isclass) - { - prs->c = item->c; - if (item->isclass(prs) != 0) - { - if (item > prs->state->pushedAtAction) /* remember: after - * pushing we were by - * false way */ - break; - } - item++; - } - - prs->state->pushedAtAction = NULL; - - /* call special handler if exists */ - if (item->special) - item->special(prs); - - /* BINGO, lexeme is found */ - if (item->flags & A_BINGO) - { - Assert(item->type > 0); - prs->lenbytelexeme = prs->state->lenbytelexeme; - prs->lencharlexeme = prs->state->lencharlexeme; - prs->state->lenbytelexeme = prs->state->lencharlexeme = 0; - prs->type = item->type; - } - - /* do various actions by flags */ - if (item->flags & A_POP) - { /* pop stored state in stack */ - TParserPosition *ptr = prs->state->prev; - - pfree(prs->state); - prs->state = ptr; - Assert(prs->state); - } - else if (item->flags & A_PUSH) - { /* push (store) state in stack */ - prs->state->pushedAtAction = item; /* remember where we push */ - prs->state = newTParserPosition(prs->state); - } - else if (item->flags & A_CLEAR) - { /* clear previous pushed state */ - TParserPosition *ptr; - - Assert(prs->state->prev); - ptr = prs->state->prev->prev; - pfree(prs->state->prev); - prs->state->prev = ptr; - } - else if (item->flags & A_CLRALL) - { /* clear all previous pushed state */ - TParserPosition *ptr; - - while (prs->state->prev) - { - ptr = prs->state->prev->prev; - pfree(prs->state->prev); - prs->state->prev = ptr; - } - } - else if (item->flags & A_MERGE) - { /* merge posinfo with current and pushed state */ - TParserPosition *ptr = prs->state; - - Assert(prs->state->prev); - prs->state = prs->state->prev; - - prs->state->posbyte = ptr->posbyte; - prs->state->poschar = ptr->poschar; - prs->state->charlen = ptr->charlen; - prs->state->lenbytelexeme = ptr->lenbytelexeme; - prs->state->lencharlexeme = ptr->lencharlexeme; - pfree(ptr); - } - - /* set new state if pointed */ - if (item->tostate != TPS_Null) - prs->state->state = item->tostate; - - /* check for go away */ - if ((item->flags & A_BINGO) || (prs->state->posbyte >= prs->lenstr && (item->flags & A_RERUN) == 0)) - break; - - /* go to begining of loop if we should rerun or we just restore state */ - if (item->flags & (A_RERUN | A_POP)) - continue; - - /* move forward */ - if (prs->state->charlen) - { - prs->state->posbyte += prs->state->charlen; - prs->state->lenbytelexeme += prs->state->charlen; - prs->state->poschar++; - prs->state->lencharlexeme++; - } - } - - return (item && (item->flags & A_BINGO)) ? true : false; -} diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h deleted file mode 100644 index c40717a80f..0000000000 --- a/contrib/tsearch2/wordparser/parser.h +++ /dev/null @@ -1,167 +0,0 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.h,v 1.11 2006/03/11 04:38:30 momjian Exp $ */ - -#ifndef __PARSER_H__ -#define __PARSER_H__ - -#include <ctype.h> -#include <limits.h> -#include "ts_locale.h" - -typedef enum -{ - TPS_Base = 0, - TPS_InUWord, - TPS_InLatWord, - TPS_InCyrWord, - TPS_InUnsignedInt, - TPS_InSignedIntFirst, - TPS_InSignedInt, - TPS_InSpace, - TPS_InUDecimalFirst, - TPS_InUDecimal, - TPS_InDecimalFirst, - TPS_InDecimal, - TPS_InVerVersion, - TPS_InSVerVersion, - TPS_InVersionFirst, - TPS_InVersion, - TPS_InMantissaFirst, - TPS_InMantissaSign, - TPS_InMantissa, - TPS_InHTMLEntityFirst, - TPS_InHTMLEntity, - TPS_InHTMLEntityNumFirst, - TPS_InHTMLEntityNum, - TPS_InHTMLEntityEnd, - TPS_InTagFirst, - TPS_InXMLBegin, - TPS_InTagCloseFirst, - TPS_InTagName, - TPS_InTagBeginEnd, - TPS_InTag, - TPS_InTagEscapeK, - TPS_InTagEscapeKK, - TPS_InTagBackSleshed, - TPS_InTagEnd, - TPS_InCommentFirst, - TPS_InCommentLast, - TPS_InComment, - TPS_InCloseCommentFirst, - TPS_InCloseCommentLast, - TPS_InCommentEnd, - TPS_InHostFirstDomain, - TPS_InHostDomainSecond, - TPS_InHostDomain, - TPS_InPortFirst, - TPS_InPort, - TPS_InHostFirstAN, - TPS_InHost, - TPS_InEmail, - TPS_InFileFirst, - TPS_InFileTwiddle, - TPS_InPathFirst, - TPS_InPathFirstFirst, - TPS_InPathSecond, - TPS_InFile, - TPS_InFileNext, - TPS_InURIFirst, - TPS_InURIStart, - TPS_InURI, - TPS_InFURL, - TPS_InProtocolFirst, - TPS_InProtocolSecond, - TPS_InProtocolEnd, - TPS_InHyphenLatWordFirst, - TPS_InHyphenLatWord, - TPS_InHyphenCyrWordFirst, - TPS_InHyphenCyrWord, - TPS_InHyphenUWordFirst, - TPS_InHyphenUWord, - TPS_InHyphenValueFirst, - TPS_InHyphenValue, - TPS_InHyphenValueExact, - TPS_InParseHyphen, - TPS_InParseHyphenHyphen, - TPS_InHyphenCyrWordPart, - TPS_InHyphenLatWordPart, - TPS_InHyphenUWordPart, - TPS_InHyphenUnsignedInt, - TPS_InHDecimalPartFirst, - TPS_InHDecimalPart, - TPS_InHVersionPartFirst, - TPS_InHVersionPart, - TPS_Null /* last state (fake value) */ -} TParserState; - -/* forward declaration */ -struct TParser; - - -typedef int (*TParserCharTest) (struct TParser *); /* any p_is* functions - * except p_iseq */ -typedef void (*TParserSpecial) (struct TParser *); /* special handler for - * special cases... */ - -typedef struct -{ - TParserCharTest isclass; - char c; - uint16 flags; - TParserState tostate; - int type; - TParserSpecial special; -} TParserStateActionItem; - -typedef struct -{ - TParserState state; - TParserStateActionItem *action; -} TParserStateAction; - -typedef struct TParserPosition -{ - int posbyte; /* position of parser in bytes */ - int poschar; /* osition of parser in characters */ - int charlen; /* length of current char */ - int lenbytelexeme; - int lencharlexeme; - TParserState state; - struct TParserPosition *prev; - int flags; - TParserStateActionItem *pushedAtAction; -} TParserPosition; - -typedef struct TParser -{ - /* string and position information */ - char *str; /* multibyte string */ - int lenstr; /* length of mbstring */ -#ifdef TS_USE_WIDE - wchar_t *wstr; /* wide character string */ - int lenwstr; /* length of wsting */ -#endif - - /* State of parse */ - int charmaxlen; - bool usewide; - TParserPosition *state; - bool ignore; - bool wanthost; - - /* silly char */ - char c; - - /* out */ - char *lexeme; - int lenbytelexeme; - int lencharlexeme; - int type; - -} TParser; - - -TParser *TParserInit(char *, int); -bool TParserGet(TParser *); -void TParserClose(TParser *); - -#endif diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c deleted file mode 100644 index 31148db62b..0000000000 --- a/contrib/tsearch2/wparser.c +++ /dev/null @@ -1,611 +0,0 @@ -/* - * interface functions to parser - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include <ctype.h> - -#include "catalog/pg_type.h" -#include "executor/spi.h" -#include "fmgr.h" -#include "funcapi.h" -#include "utils/array.h" -#include "utils/memutils.h" - -#include "wparser.h" -#include "ts_cfg.h" -#include "snmap.h" -#include "common.h" - -/*********top interface**********/ - -static Oid current_parser_id = InvalidOid; - -void -init_prs(Oid id, WParserInfo * prs) -{ - Oid arg[1]; - bool isnull; - Datum pars[1]; - int stat; - void *plan; - char buf[1024], - *nsp; - - arg[0] = OIDOID; - pars[0] = ObjectIdGetDatum(id); - - memset(prs, 0, sizeof(WParserInfo)); - SPI_connect(); - nsp = get_namespace(TSNSP_FunctionOid); - sprintf(buf, "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from %s.pg_ts_parser where oid = $1", nsp); - pfree(nsp); - plan = SPI_prepare(buf, 1, arg); - if (!plan) - ts_error(ERROR, "SPI_prepare() failed"); - - stat = SPI_execp(plan, pars, " ", 1); - if (stat < 0) - ts_error(ERROR, "SPI_execp return %d", stat); - if (SPI_processed > 0) - { - Oid oid = InvalidOid; - - oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); - fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext); - oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull)); - fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext); - oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull)); - fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext); - prs->lextype = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull)); - oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull)); - fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext); - prs->prs_id = id; - } - else - ts_error(ERROR, "No parser with id %d", id); - SPI_freeplan(plan); - SPI_finish(); -} - -typedef struct -{ - WParserInfo *last_prs; - int len; - int reallen; - WParserInfo *list; - SNMap name2id_map; -} PrsList; - -static PrsList PList = {NULL, 0, 0, NULL, {0, 0, NULL}}; - -void -reset_prs(void) -{ - freeSNMap(&(PList.name2id_map)); - if (PList.list) - free(PList.list); - memset(&PList, 0, sizeof(PrsList)); -} - -static int -compareprs(const void *a, const void *b) -{ - if (((WParserInfo *) a)->prs_id == ((WParserInfo *) b)->prs_id) - return 0; - return (((WParserInfo *) a)->prs_id < ((WParserInfo *) b)->prs_id) ? -1 : 1; -} - -WParserInfo * -findprs(Oid id) -{ - /* last used prs */ - if (PList.last_prs && PList.last_prs->prs_id == id) - return PList.last_prs; - - /* already used prs */ - if (PList.len != 0) - { - WParserInfo key; - - key.prs_id = id; - PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs); - if (PList.last_prs != NULL) - return PList.last_prs; - } - - /* last chance */ - if (PList.len == PList.reallen) - { - WParserInfo *tmp; - int reallen = (PList.reallen) ? 2 * PList.reallen : 16; - - tmp = (WParserInfo *) realloc(PList.list, sizeof(WParserInfo) * reallen); - if (!tmp) - ts_error(ERROR, "No memory"); - PList.reallen = reallen; - PList.list = tmp; - } - init_prs(id, &(PList.list[PList.len]) ); - PList.last_prs = &(PList.list[PList.len]); - PList.len++; - qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs); - return findprs(id); /* qsort changed order!! */ ; -} - -Oid -name2id_prs(text *name) -{ - Oid arg[1]; - bool isnull; - Datum pars[1]; - int stat; - Oid id = findSNMap_t(&(PList.name2id_map), name); - char buf[1024], - *nsp; - void *plan; - - arg[0] = TEXTOID; - pars[0] = PointerGetDatum(name); - - if (id) - return id; - - SPI_connect(); - nsp = get_namespace(TSNSP_FunctionOid); - sprintf(buf, "select oid from %s.pg_ts_parser where prs_name = $1", nsp); - pfree(nsp); - plan = SPI_prepare(buf, 1, arg); - if (!plan) - ts_error(ERROR, "SPI_prepare() failed"); - - stat = SPI_execp(plan, pars, " ", 1); - if (stat < 0) - ts_error(ERROR, "SPI_execp return %d", stat); - if (SPI_processed > 0) - id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); - else - ts_error(ERROR, "No parser '%s'", text2char(name)); - SPI_freeplan(plan); - SPI_finish(); - addSNMap_t(&(PList.name2id_map), name, id); - return id; -} - - -/******sql-level interface******/ -typedef struct -{ - int cur; - LexDescr *list; -} TypeStorage; - -static void -setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, Oid prsid) -{ - TupleDesc tupdesc; - MemoryContext oldcontext; - TypeStorage *st; - WParserInfo *prs = findprs(prsid); - - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - st = (TypeStorage *) palloc(sizeof(TypeStorage)); - st->cur = 0; - st->list = (LexDescr *) DatumGetPointer( - OidFunctionCall1(prs->lextype, PointerGetDatum(prs->prs)) - ); - funcctx->user_fctx = (void *) st; - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - elog(ERROR, "return type must be a row type"); - tupdesc = CreateTupleDescCopy(tupdesc); - funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); - MemoryContextSwitchTo(oldcontext); -} - -static Datum -process_call(FuncCallContext *funcctx) -{ - TypeStorage *st; - - st = (TypeStorage *) funcctx->user_fctx; - if (st->list && st->list[st->cur].lexid) - { - Datum result; - char *values[3]; - char txtid[16]; - HeapTuple tuple; - - values[0] = txtid; - sprintf(txtid, "%d", st->list[st->cur].lexid); - values[1] = st->list[st->cur].alias; - values[2] = st->list[st->cur].descr; - - tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); - result = HeapTupleGetDatum(tuple); - - pfree(values[1]); - pfree(values[2]); - st->cur++; - return result; - } - else - { - if (st->list) - pfree(st->list); - pfree(st); - } - return (Datum) 0; -} - -PG_FUNCTION_INFO_V1(token_type); -Datum token_type(PG_FUNCTION_ARGS); - -Datum -token_type(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - Datum result; - - SET_FUNCOID(); - if (SRF_IS_FIRSTCALL()) - { - funcctx = SRF_FIRSTCALL_INIT(); - setup_firstcall(fcinfo, funcctx, PG_GETARG_OID(0)); - } - - funcctx = SRF_PERCALL_SETUP(); - - if ((result = process_call(funcctx)) != (Datum) 0) - SRF_RETURN_NEXT(funcctx, result); - SRF_RETURN_DONE(funcctx); -} - -PG_FUNCTION_INFO_V1(token_type_byname); -Datum token_type_byname(PG_FUNCTION_ARGS); -Datum -token_type_byname(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - Datum result; - - SET_FUNCOID(); - if (SRF_IS_FIRSTCALL()) - { - text *name = PG_GETARG_TEXT_P(0); - - funcctx = SRF_FIRSTCALL_INIT(); - setup_firstcall(fcinfo, funcctx, name2id_prs(name)); - PG_FREE_IF_COPY(name, 0); - } - - funcctx = SRF_PERCALL_SETUP(); - - if ((result = process_call(funcctx)) != (Datum) 0) - SRF_RETURN_NEXT(funcctx, result); - SRF_RETURN_DONE(funcctx); -} - -PG_FUNCTION_INFO_V1(token_type_current); -Datum token_type_current(PG_FUNCTION_ARGS); -Datum -token_type_current(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - Datum result; - - SET_FUNCOID(); - if (SRF_IS_FIRSTCALL()) - { - funcctx = SRF_FIRSTCALL_INIT(); - if (current_parser_id == InvalidOid) - current_parser_id = name2id_prs(char2text("default")); - setup_firstcall(fcinfo, funcctx, current_parser_id); - } - - funcctx = SRF_PERCALL_SETUP(); - - if ((result = process_call(funcctx)) != (Datum) 0) - SRF_RETURN_NEXT(funcctx, result); - SRF_RETURN_DONE(funcctx); -} - - -PG_FUNCTION_INFO_V1(set_curprs); -Datum set_curprs(PG_FUNCTION_ARGS); -Datum -set_curprs(PG_FUNCTION_ARGS) -{ - SET_FUNCOID(); - findprs(PG_GETARG_OID(0)); - current_parser_id = PG_GETARG_OID(0); - PG_RETURN_VOID(); -} - -PG_FUNCTION_INFO_V1(set_curprs_byname); -Datum set_curprs_byname(PG_FUNCTION_ARGS); -Datum -set_curprs_byname(PG_FUNCTION_ARGS) -{ - text *name = PG_GETARG_TEXT_P(0); - - SET_FUNCOID(); - DirectFunctionCall1( - set_curprs, - ObjectIdGetDatum(name2id_prs(name)) - ); - PG_FREE_IF_COPY(name, 0); - PG_RETURN_VOID(); -} - -typedef struct -{ - int type; - char *lexeme; -} LexemeEntry; - -typedef struct -{ - int cur; - int len; - LexemeEntry *list; -} PrsStorage; - - -static void -prs_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, - int prsid, text *txt) -{ - TupleDesc tupdesc; - MemoryContext oldcontext; - PrsStorage *st; - WParserInfo *prs = findprs(prsid); - char *lex = NULL; - int llen = 0, - type = 0; - - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - st = (PrsStorage *) palloc(sizeof(PrsStorage)); - st->cur = 0; - st->len = 16; - st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len); - - prs->prs = (void *) DatumGetPointer( - FunctionCall2( - &(prs->start_info), - PointerGetDatum(VARDATA(txt)), - Int32GetDatum(VARSIZE(txt) - VARHDRSZ) - ) - ); - - while ((type = DatumGetInt32(FunctionCall3( - &(prs->getlexeme_info), - PointerGetDatum(prs->prs), - PointerGetDatum(&lex), - PointerGetDatum(&llen)))) != 0) - { - - if (st->cur >= st->len) - { - st->len = 2 * st->len; - st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len); - } - st->list[st->cur].lexeme = palloc(llen + 1); - memcpy(st->list[st->cur].lexeme, lex, llen); - st->list[st->cur].lexeme[llen] = '\0'; - st->list[st->cur].type = type; - st->cur++; - } - - FunctionCall1( - &(prs->end_info), - PointerGetDatum(prs->prs) - ); - - st->len = st->cur; - st->cur = 0; - - funcctx->user_fctx = (void *) st; - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - elog(ERROR, "return type must be a row type"); - tupdesc = CreateTupleDescCopy(tupdesc); - funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); - MemoryContextSwitchTo(oldcontext); -} - -static Datum -prs_process_call(FuncCallContext *funcctx) -{ - PrsStorage *st; - - st = (PrsStorage *) funcctx->user_fctx; - if (st->cur < st->len) - { - Datum result; - char *values[2]; - char tid[16]; - HeapTuple tuple; - - values[0] = tid; - sprintf(tid, "%d", st->list[st->cur].type); - values[1] = st->list[st->cur].lexeme; - tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); - result = HeapTupleGetDatum(tuple); - - pfree(values[1]); - st->cur++; - return result; - } - else - { - if (st->list) - pfree(st->list); - pfree(st); - } - return (Datum) 0; -} - - - -PG_FUNCTION_INFO_V1(parse); -Datum parse(PG_FUNCTION_ARGS); -Datum -parse(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - Datum result; - - SET_FUNCOID(); - if (SRF_IS_FIRSTCALL()) - { - text *txt = PG_GETARG_TEXT_P(1); - - funcctx = SRF_FIRSTCALL_INIT(); - prs_setup_firstcall(fcinfo, funcctx, PG_GETARG_OID(0), txt); - PG_FREE_IF_COPY(txt, 1); - } - - funcctx = SRF_PERCALL_SETUP(); - - if ((result = prs_process_call(funcctx)) != (Datum) 0) - SRF_RETURN_NEXT(funcctx, result); - SRF_RETURN_DONE(funcctx); -} - -PG_FUNCTION_INFO_V1(parse_byname); -Datum parse_byname(PG_FUNCTION_ARGS); -Datum -parse_byname(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - Datum result; - - SET_FUNCOID(); - if (SRF_IS_FIRSTCALL()) - { - text *name = PG_GETARG_TEXT_P(0); - text *txt = PG_GETARG_TEXT_P(1); - - funcctx = SRF_FIRSTCALL_INIT(); - prs_setup_firstcall(fcinfo, funcctx, name2id_prs(name), txt); - PG_FREE_IF_COPY(name, 0); - PG_FREE_IF_COPY(txt, 1); - } - - funcctx = SRF_PERCALL_SETUP(); - - if ((result = prs_process_call(funcctx)) != (Datum) 0) - SRF_RETURN_NEXT(funcctx, result); - SRF_RETURN_DONE(funcctx); -} - - -PG_FUNCTION_INFO_V1(parse_current); -Datum parse_current(PG_FUNCTION_ARGS); -Datum -parse_current(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - Datum result; - - SET_FUNCOID(); - if (SRF_IS_FIRSTCALL()) - { - text *txt = PG_GETARG_TEXT_P(0); - - funcctx = SRF_FIRSTCALL_INIT(); - if (current_parser_id == InvalidOid) - current_parser_id = name2id_prs(char2text("default")); - prs_setup_firstcall(fcinfo, funcctx, current_parser_id, txt); - PG_FREE_IF_COPY(txt, 0); - } - - funcctx = SRF_PERCALL_SETUP(); - - if ((result = prs_process_call(funcctx)) != (Datum) 0) - SRF_RETURN_NEXT(funcctx, result); - SRF_RETURN_DONE(funcctx); -} - -PG_FUNCTION_INFO_V1(headline); -Datum headline(PG_FUNCTION_ARGS); -Datum -headline(PG_FUNCTION_ARGS) -{ - text *in = PG_GETARG_TEXT_P(1); - QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2))); - text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; - HLPRSTEXT prs; - text *out; - TSCfgInfo *cfg; - WParserInfo *prsobj; - - SET_FUNCOID(); - cfg = findcfg(PG_GETARG_OID(0)); - prsobj = findprs(cfg->prs_id); - - memset(&prs, 0, sizeof(HLPRSTEXT)); - prs.lenwords = 32; - prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords); - hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ); - - - FunctionCall3( - &(prsobj->headline_info), - PointerGetDatum(&prs), - PointerGetDatum(opt), - PointerGetDatum(query) - ); - - out = genhl(&prs); - - PG_FREE_IF_COPY(in, 1); - PG_FREE_IF_COPY(query, 2); - if (opt) - PG_FREE_IF_COPY(opt, 3); - pfree(prs.words); - pfree(prs.startsel); - pfree(prs.stopsel); - - PG_RETURN_POINTER(out); -} - - -PG_FUNCTION_INFO_V1(headline_byname); -Datum headline_byname(PG_FUNCTION_ARGS); -Datum -headline_byname(PG_FUNCTION_ARGS) -{ - text *cfg = PG_GETARG_TEXT_P(0); - - Datum out; - - SET_FUNCOID(); - out = DirectFunctionCall4( - headline, - ObjectIdGetDatum(name2id_cfg(cfg)), - PG_GETARG_DATUM(1), - PG_GETARG_DATUM(2), - (PG_NARGS() > 3) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL) - ); - - PG_FREE_IF_COPY(cfg, 0); - PG_RETURN_DATUM(out); -} - -PG_FUNCTION_INFO_V1(headline_current); -Datum headline_current(PG_FUNCTION_ARGS); -Datum -headline_current(PG_FUNCTION_ARGS) -{ - SET_FUNCOID(); - PG_RETURN_DATUM(DirectFunctionCall4( - headline, - ObjectIdGetDatum(get_currcfg()), - PG_GETARG_DATUM(0), - PG_GETARG_DATUM(1), - (PG_NARGS() > 2) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL) - )); -} diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h deleted file mode 100644 index c3c44151f0..0000000000 --- a/contrib/tsearch2/wparser.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef __WPARSER_H__ -#define __WPARSER_H__ -#include "postgres.h" -#include "fmgr.h" - -typedef struct -{ - Oid prs_id; - FmgrInfo start_info; - FmgrInfo getlexeme_info; - FmgrInfo end_info; - FmgrInfo headline_info; - Oid lextype; - void *prs; -} WParserInfo; - -void init_prs(Oid id, WParserInfo * prs); -WParserInfo *findprs(Oid id); -Oid name2id_prs(text *name); -void reset_prs(void); - - -typedef struct -{ - int lexid; - char *alias; - char *descr; -} LexDescr; - -#endif diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c deleted file mode 100644 index b20909ce5b..0000000000 --- a/contrib/tsearch2/wparser_def.c +++ /dev/null @@ -1,390 +0,0 @@ -/* - * default word parser - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - -#include "utils/builtins.h" - -#include "dict.h" -#include "wparser.h" -#include "common.h" -#include "ts_cfg.h" -#include "wordparser/parser.h" -#include "wordparser/deflex.h" - -PG_FUNCTION_INFO_V1(prsd_lextype); -Datum prsd_lextype(PG_FUNCTION_ARGS); - -Datum -prsd_lextype(PG_FUNCTION_ARGS) -{ - LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1)); - int i; - - for (i = 1; i <= LASTNUM; i++) - { - descr[i - 1].lexid = i; - descr[i - 1].alias = pstrdup(tok_alias[i]); - descr[i - 1].descr = pstrdup(lex_descr[i]); - } - - descr[LASTNUM].lexid = 0; - - PG_RETURN_POINTER(descr); -} - -PG_FUNCTION_INFO_V1(prsd_start); -Datum prsd_start(PG_FUNCTION_ARGS); -Datum -prsd_start(PG_FUNCTION_ARGS) -{ - PG_RETURN_POINTER(TParserInit((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1))); -} - -PG_FUNCTION_INFO_V1(prsd_getlexeme); -Datum prsd_getlexeme(PG_FUNCTION_ARGS); -Datum -prsd_getlexeme(PG_FUNCTION_ARGS) -{ - TParser *p = (TParser *) PG_GETARG_POINTER(0); - char **t = (char **) PG_GETARG_POINTER(1); - int *tlen = (int *) PG_GETARG_POINTER(2); - - if (!TParserGet(p)) - PG_RETURN_INT32(0); - - *t = p->lexeme; - *tlen = p->lenbytelexeme; - - PG_RETURN_INT32(p->type); -} - -PG_FUNCTION_INFO_V1(prsd_end); -Datum prsd_end(PG_FUNCTION_ARGS); -Datum -prsd_end(PG_FUNCTION_ARGS) -{ - TParser *p = (TParser *) PG_GETARG_POINTER(0); - - TParserClose(p); - PG_RETURN_VOID(); -} - -#define LEAVETOKEN(x) ( (x)==12 ) -#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 ) -#define ENDPUNCTOKEN(x) ( (x)==12 ) - - -#define TS_IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 ) -#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 ) -#define HTMLHLIDIGNORE(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 ) -#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) ) -#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || TS_IDIGNORE(x) ) - -typedef struct -{ - HLWORD *words; - int len; -} hlCheck; - -static bool -checkcondition_HL(void *checkval, ITEM * val) -{ - int i; - - for (i = 0; i < ((hlCheck *) checkval)->len; i++) - { - if (((hlCheck *) checkval)->words[i].item == val) - return true; - } - return false; -} - - -static bool -hlCover(HLPRSTEXT * prs, QUERYTYPE * query, int *p, int *q) -{ - int i, - j; - ITEM *item = GETQUERY(query); - int pos = *p; - - *q = 0; - *p = 0x7fffffff; - - for (j = 0; j < query->size; j++) - { - if (item->type != VAL) - { - item++; - continue; - } - for (i = pos; i < prs->curwords; i++) - { - if (prs->words[i].item == item) - { - if (i > *q) - *q = i; - break; - } - } - item++; - } - - if (*q == 0) - return false; - - item = GETQUERY(query); - for (j = 0; j < query->size; j++) - { - if (item->type != VAL) - { - item++; - continue; - } - for (i = *q; i >= pos; i--) - { - if (prs->words[i].item == item) - { - if (i < *p) - *p = i; - break; - } - } - item++; - } - - if (*p <= *q) - { - hlCheck ch; - - ch.words = &(prs->words[*p]); - ch.len = *q - *p + 1; - if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL)) - return true; - else - { - (*p)++; - return hlCover(prs, query, p, q); - } - } - - return false; -} - -PG_FUNCTION_INFO_V1(prsd_headline); -Datum prsd_headline(PG_FUNCTION_ARGS); -Datum -prsd_headline(PG_FUNCTION_ARGS) -{ - HLPRSTEXT *prs = (HLPRSTEXT *) PG_GETARG_POINTER(0); - text *opt = (text *) PG_GETARG_POINTER(1); /* can't be toasted */ - QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(2); /* can't be toasted */ - - /* from opt + start and and tag */ - int min_words = 15; - int max_words = 35; - int shortword = 3; - - int p = 0, - q = 0; - int bestb = -1, - beste = -1; - int bestlen = -1; - int pose = 0, - posb, - poslen, - curlen; - - int i; - int highlight = 0; - - /* config */ - prs->startsel = NULL; - prs->stopsel = NULL; - if (opt) - { - Map *map, - *mptr; - - parse_cfgdict(opt, &map); - mptr = map; - - while (mptr && mptr->key) - { - if (pg_strcasecmp(mptr->key, "MaxWords") == 0) - max_words = pg_atoi(mptr->value, 4, 1); - else if (pg_strcasecmp(mptr->key, "MinWords") == 0) - min_words = pg_atoi(mptr->value, 4, 1); - else if (pg_strcasecmp(mptr->key, "ShortWord") == 0) - shortword = pg_atoi(mptr->value, 4, 1); - else if (pg_strcasecmp(mptr->key, "StartSel") == 0) - prs->startsel = pstrdup(mptr->value); - else if (pg_strcasecmp(mptr->key, "StopSel") == 0) - prs->stopsel = pstrdup(mptr->value); - else if (pg_strcasecmp(mptr->key, "HighlightAll") == 0) - highlight = ( - pg_strcasecmp(mptr->value, "1") == 0 || - pg_strcasecmp(mptr->value, "on") == 0 || - pg_strcasecmp(mptr->value, "true") == 0 || - pg_strcasecmp(mptr->value, "t") == 0 || - pg_strcasecmp(mptr->value, "y") == 0 || - pg_strcasecmp(mptr->value, "yes") == 0) ? - 1 : 0; - - pfree(mptr->key); - pfree(mptr->value); - - mptr++; - } - pfree(map); - - if (highlight == 0) - { - if (min_words >= max_words) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("MinWords should be less than MaxWords"))); - if (min_words <= 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("MinWords should be positive"))); - if (shortword < 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("ShortWord should be >= 0"))); - } - } - - if (highlight == 0) - { - while (hlCover(prs, query, &p, &q)) - { - /* find cover len in words */ - curlen = 0; - poslen = 0; - for (i = p; i <= q && curlen < max_words; i++) - { - if (!NONWORDTOKEN(prs->words[i].type)) - curlen++; - if (prs->words[i].item && !prs->words[i].repeated) - poslen++; - pose = i; - } - - if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)) - { - /* best already finded, so try one more cover */ - p++; - continue; - } - - posb = p; - if (curlen < max_words) - { /* find good end */ - for (i = i - 1; i < prs->curwords && curlen < max_words; i++) - { - if (i != q) - { - if (!NONWORDTOKEN(prs->words[i].type)) - curlen++; - if (prs->words[i].item && !prs->words[i].repeated) - poslen++; - } - pose = i; - if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) - continue; - if (curlen >= min_words) - break; - } - if (curlen < min_words && i >= prs->curwords) - { /* got end of text and our cover is shoter - * than min_words */ - for (i = p; i >= 0; i--) - { - if (!NONWORDTOKEN(prs->words[i].type)) - curlen++; - if (prs->words[i].item && !prs->words[i].repeated) - poslen++; - if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) - continue; - if (curlen >= min_words) - break; - } - posb = (i >= 0) ? i : 0; - } - } - else - { /* shorter cover :((( */ - for (; curlen > min_words; i--) - { - if (!NONWORDTOKEN(prs->words[i].type)) - curlen--; - if (prs->words[i].item && !prs->words[i].repeated) - poslen--; - pose = i; - if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) - continue; - break; - } - } - - if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || - (bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) && - (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))) - { - bestb = posb; - beste = pose; - bestlen = poslen; - } - - p++; - } - - if (bestlen < 0) - { - curlen = 0; - for (i = 0; i < prs->curwords && curlen < min_words; i++) - { - if (!NONWORDTOKEN(prs->words[i].type)) - curlen++; - pose = i; - } - bestb = 0; - beste = pose; - } - } - else - { - bestb = 0; - beste = prs->curwords - 1; - } - - for (i = bestb; i <= beste; i++) - { - if (prs->words[i].item) - prs->words[i].selected = 1; - if (highlight == 0) - { - if (HLIDIGNORE(prs->words[i].type)) - prs->words[i].replace = 1; - } - else - { - if (HTMLHLIDIGNORE(prs->words[i].type)) - prs->words[i].replace = 1; - } - - prs->words[i].in = (prs->words[i].repeated) ? 0 : 1; - } - - if (!prs->startsel) - prs->startsel = pstrdup("<b>"); - if (!prs->stopsel) - prs->stopsel = pstrdup("</b>"); - prs->startsellen = strlen(prs->startsel); - prs->stopsellen = strlen(prs->stopsel); - - PG_RETURN_POINTER(prs); -} |