diff options
author | Marc G. Fournier <scrappy@hub.org> | 1997-02-19 12:59:07 +0000 |
---|---|---|
committer | Marc G. Fournier <scrappy@hub.org> | 1997-02-19 12:59:07 +0000 |
commit | 29138eeb3ca299d0fdc3d4ea2cbe523b759c9db0 (patch) | |
tree | bac9efe5ffc3619cd09d8b920e31209a0d5f9a75 | |
parent | 34f35a4c19a92d7869e25cdd522366df74285729 (diff) | |
download | postgresql-29138eeb3ca299d0fdc3d4ea2cbe523b759c9db0.tar.gz |
Merge in GEQO Optimizer
From: "Martin S. Utesch" <utesch@aut.tu-freiberg.de>
29 files changed, 3823 insertions, 3 deletions
diff --git a/src/backend/optimizer/Makefile b/src/backend/optimizer/Makefile index 9530d17887..92d9ebf8a4 100644 --- a/src/backend/optimizer/Makefile +++ b/src/backend/optimizer/Makefile @@ -4,13 +4,13 @@ # Makefile for optimizer # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/optimizer/Makefile,v 1.2 1996/11/10 03:12:38 bryanh Exp $ +# $Header: /cvsroot/pgsql/src/backend/optimizer/Makefile,v 1.3 1997/02/19 12:56:31 scrappy Exp $ # #------------------------------------------------------------------------- all: submake SUBSYS.o -OBJS = path/SUBSYS.o plan/SUBSYS.o prep/SUBSYS.o util/SUBSYS.o +OBJS = path/SUBSYS.o plan/SUBSYS.o prep/SUBSYS.o util/SUBSYS.o geqo/SUBSYS.o SUBSYS.o: $(OBJS) $(LD) -r -o SUBSYS.o $(OBJS) @@ -21,6 +21,7 @@ submake: $(MAKE) -C plan SUBSYS.o $(MAKE) -C prep SUBSYS.o $(MAKE) -C util SUBSYS.o + $(MAKE) -C geqo SUBSYS.o clean: rm -f SUBSYS.o @@ -28,9 +29,11 @@ clean: $(MAKE) -C plan clean $(MAKE) -C prep clean $(MAKE) -C util clean + $(MAKE) -C geqo clean .DEFAULT: $(MAKE) -C path $@ $(MAKE) -C plan $@ $(MAKE) -C prep $@ $(MAKE) -C util $@ + $(MAKE) -C geqo $@ diff --git a/src/backend/optimizer/geqo/Makefile b/src/backend/optimizer/geqo/Makefile new file mode 100644 index 0000000000..61e57f6ebd --- /dev/null +++ b/src/backend/optimizer/geqo/Makefile @@ -0,0 +1,43 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for the genetic query optimizer module +# +# Copyright (c) 1994, Regents of the University of California +# +# $Id: Makefile,v 1.1 1997/02/19 12:56:38 scrappy Exp $ +# +#------------------------------------------------------------------------- + +SRCDIR = ../../.. +include ../../../Makefile.global + +INCLUDE_OPT = -I../.. \ + -I../../port/$(PORTNAME) \ + -I../../../include + +CFLAGS+=$(INCLUDE_OPT) + +OBJS = geqo_copy.o geqo_eval.o geqo_main.o geqo_misc.o \ + geqo_params.o geqo_paths.o geqo_pool.o geqo_recombination.o \ + geqo_selection.o \ + geqo_erx.o geqo_pmx.o geqo_cx.o geqo_px.o + +# not ready yet: geqo_ox1.o geqo_ox2.o +# deprecated: minspantree.o + +all: SUBSYS.o + +SUBSYS.o: $(OBJS) + $(LD) -r -o SUBSYS.o $(OBJS) + +depend dep: + $(CC) -MM $(INCLUDE_OPT) *.c >depend + +clean: + rm -f SUBSYS.o $(OBJS) + +ifeq (depend,$(wildcard depend)) +include depend +endif + diff --git a/src/backend/optimizer/geqo/geqo_copy.c b/src/backend/optimizer/geqo/geqo_copy.c new file mode 100644 index 0000000000..3356f8d547 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_copy.c @@ -0,0 +1,67 @@ +/*------------------------------------------------------------------------ + * + * geqo_copy.c-- + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_copy.c,v 1.1 1997/02/19 12:56:40 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* this is adopted from D. Whitley's Genitor algorithm */ + +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo_copy.h" + +/* geqo_copy-- + * + * copies one gene to another + * + */ +void +geqo_copy (Chromosome *chromo1, Chromosome *chromo2, int string_length) +{ + int i; + + for (i=0; i<string_length; i++) + chromo1->string[i] = chromo2->string[i]; + + chromo1->worth = chromo2->worth; +} diff --git a/src/backend/optimizer/geqo/geqo_cx.c b/src/backend/optimizer/geqo/geqo_cx.c new file mode 100644 index 0000000000..a3aa6fce4f --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_cx.c @@ -0,0 +1,129 @@ +/*------------------------------------------------------------------------ +* +* geqo_cx.c-- +* +* cycle crossover [CX] routines; +* CX operator according to Oliver et al +* (Proc 2nd Int'l Conf on GA's) +* +* $Id: geqo_cx.c,v 1.1 1997/02/19 12:56:48 scrappy Exp $ +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the cx algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" +#include "optimizer/geqo_recombination.h" +#include "optimizer/geqo_random.h" + + +/* cx-- + * + * cycle crossover + */ +int +cx(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table) +{ + + int i, start_pos, curr_pos; + int count = 0; + int num_diffs = 0; + + /* initialize city table */ + for (i=1; i<=num_gene; i++) { + city_table[i].used = 0; + city_table[tour2[i-1]].tour2_position = i-1; + city_table[tour1[i-1]].tour1_position = i-1; + } + + /* choose random cycle starting position */ + start_pos = geqo_randint(num_gene - 1, 0); + + /* child inherits first city */ + offspring[start_pos] = tour1[start_pos]; + + /* begin cycle with tour1 */ + curr_pos = start_pos; + city_table[(int) tour1[start_pos]].used = 1; + + count++; + + /* cx main part */ + + +/* STEP 1 */ + + while (tour2[curr_pos] != tour1[start_pos]) { + city_table[(int) tour2[curr_pos]].used = 1; + curr_pos = city_table[(int) tour2[curr_pos]].tour1_position; + offspring[curr_pos] = tour1[curr_pos]; + count++; + } + + +/* STEP 2 */ + + /* failed to create a complete tour */ + if (count < num_gene) { + for (i=1; i<=num_gene; i++) { + if (!city_table[i].used) { + offspring[city_table[i].tour2_position] = + tour2[(int) city_table[i].tour2_position]; + count++; + } + } + } + + +/* STEP 3 */ + + /* still failed to create a complete tour */ + if (count < num_gene) { + + /* count the number of differences between mom and offspring */ + for (i=0; i<num_gene; i++) + if (tour1[i] != offspring[i]) num_diffs++; + + } + + return(num_diffs); + } + diff --git a/src/backend/optimizer/geqo/geqo_erx.c b/src/backend/optimizer/geqo/geqo_erx.c new file mode 100644 index 0000000000..f6d601d64d --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_erx.c @@ -0,0 +1,424 @@ +/*------------------------------------------------------------------------ +* +* geqo_erx.c-- +* edge recombination crossover [ER] +* +* $Id: geqo_erx.c,v 1.1 1997/02/19 12:56:55 scrappy Exp $ +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the edge recombination algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" +#include "optimizer/geqo_recombination.h" +#include "optimizer/geqo_random.h" + + +static int gimme_edge (Gene gene1, Gene gene2, Edge *edge_table); +static void remove_gene(Gene gene, Edge edge, Edge *edge_table); +static Gene gimme_gene(Edge edge, Edge *edge_table); + +static Gene edge_failure(Gene *gene, int index, Edge *edge_table, int num_gene); + + +/* alloc_edge_table-- + * + * allocate memory for edge table + * + */ + +Edge * +alloc_edge_table(int num_gene) +{ + Edge *edge_table; + + /* palloc one extra location so that nodes numbered + 1..n can be indexed directly; 0 will not be used */ + + edge_table = (Edge *) palloc ((num_gene+1)*sizeof(Edge)); + + return (edge_table); + } + +/* free_edge_table-- + * + * deallocate memory of edge table + * + */ + void + free_edge_table(Edge *edge_table) + { + pfree(edge_table); + } + +/* gimme_edge_table-- + * + * fills a data structure which represents the set of explicit + * edges between points in the (2) input genes + * + * assumes circular tours and bidirectional edges + * + * gimme_edge() will set "shared" edges to negative values + * + * returns average number edges/city in range 2.0 - 4.0 + * where 2.0=homogeneous; 4.0=diverse + * + */ +float +gimme_edge_table (Gene *tour1, Gene *tour2, int num_gene, Edge *edge_table) +{ + int i, index1, index2; + int edge_total; /* total number of unique edges in two genes */ + + /* at first clear the edge table's old data */ + for (i = 1; i <= num_gene; i++) { + edge_table[i].total_edges = 0; + edge_table[i].unused_edges = 0; + } + + /* fill edge table with new data */ + + edge_total = 0; + + for (index1 = 0; index1 < num_gene; index1++) { + + /* presume the tour is circular, i.e. 1->2, 2->3, 3->1 + this operaton maps n back to 1 */ + + index2 = (index1 + 1) % num_gene; + + /* edges are bidirectional, i.e. 1->2 is same as 2->1 + call gimme_edge twice per edge */ + + edge_total += gimme_edge(tour1[index1], tour1[index2], edge_table); + gimme_edge(tour1[index2], tour1[index1], edge_table); + + edge_total += gimme_edge(tour2[index1], tour2[index2], edge_table); + gimme_edge(tour2[index2], tour2[index1], edge_table); + } + + /* return average number of edges per index */ + return (((float) (edge_total * 2)/ (float) num_gene)); +} + +/* gimme_edge-- + * + * registers edge from city1 to city2 in input edge table + * + * no assumptions about directionality are made; + * therefor it is up to the calling routine to + * call gimme_edge twice to make a bi-directional edge + * between city1 and city2; + * uni-directional edges are possible as well (just call gimme_edge + * once with the direction from city1 to city2) + * + * returns 1 if edge was not already registered and was just added; + * 0 if edge was already registered and edge_table is unchanged + */ +static int +gimme_edge (Gene gene1, Gene gene2, Edge *edge_table) +{ + int i; + int edges; + int city1 = (int) gene1; + int city2 = (int) gene2; + + + /* check whether edge city1->city2 already exists */ + edges = edge_table[city1].total_edges; + + for (i=0; i<edges; i++) { + if ((Gene) Abs(edge_table[city1].edge_list[i]) == city2) { + + /* mark shared edges as negative */ + edge_table[city1].edge_list[i] = 0-city2; + + return (0); + } + } + + /* add city1->city2; */ + edge_table[city1].edge_list[edges] = city2; + + /* increment the number of edges from city1 */ + edge_table[city1].total_edges++; + edge_table[city1].unused_edges++; + + return (1); +} + +/* gimme_tour-- + * + * creates a new tour using edges from the edge table. + * priority is given to "shared" edges (i.e. edges which + * all parent genes possess and are marked as negative + * in the edge table.) + * + */ +int +gimme_tour (Edge *edge_table, Gene *new_gene, int num_gene) +{ + int i; + int edge_failures=0; + + new_gene[0] = (Gene) geqo_randint(num_gene, 1); /* choose int between 1 and num_gene */ + + for (i=1; i<num_gene; i++) { + + /* as each point is entered into the tour, + remove it from the edge table */ + + remove_gene(new_gene[i-1], edge_table[(int) new_gene[i-1]], edge_table); + + /* find destination for the newly entered point */ + + if (edge_table[new_gene[i-1]].unused_edges > 0) { + new_gene[i] = gimme_gene(edge_table[(int) new_gene[i-1]], edge_table); + } + + else { /* cope with fault */ + edge_failures++; + + new_gene[i] = edge_failure(new_gene, i-1, edge_table, num_gene); + } + + /* mark this node as incorporated */ + edge_table[(int) new_gene[i-1]].unused_edges = -1; + + } /* for (i=1; i<num_gene; i++) */ + +return(edge_failures); + +} + +/* remove_gene-- + * + * removes input gene from edge_table. + * input edge is used + * to identify deletion locations within edge table. + * + */ +static void +remove_gene (Gene gene, Edge edge, Edge *edge_table) +{ + int i,j; + int possess_edge; + int genes_remaining; + + /* do for every gene known to have an edge to input gene + (i.e. in edge_list for input edge) */ + + for (i=0; i<edge.unused_edges; i++) { + possess_edge = (int) Abs(edge.edge_list[i]); + genes_remaining = edge_table[possess_edge].unused_edges; + + /* find the input gene in all edge_lists and delete it */ + for (j=0; j<genes_remaining; j++) { + + if ( (Gene) Abs(edge_table[possess_edge].edge_list[j]) == gene) { + + edge_table[possess_edge].unused_edges--; + + edge_table[possess_edge].edge_list[j] = + edge_table[possess_edge].edge_list[genes_remaining-1]; + + break; + } + } + } +} + +/* gimme_gene-- + * + * priority is given to "shared" edges + * (i.e. edges which both genes possess) + * + */ +static Gene +gimme_gene (Edge edge, Edge *edge_table) +{ + int i; + Gene friend; + int minimum_edges; + int minimum_count; + int rand_decision; + + /* no point has edges to more than 4 other points + thus, this contrived minimum will be replaced */ + + minimum_edges = 5; + + /* consider candidate destination points in edge list */ + + for (i=0; i<edge.unused_edges; i++) { + friend = (Gene) edge.edge_list[i]; + + /* give priority to shared edges that are negative; + so return 'em */ + + /* negative values are caught here + so we need not worry about converting to absolute values */ + if (friend < 0) return ( (Gene) Abs(friend)); + + + /* give priority to candidates with fewest remaining unused edges; + find out what the minimum number of unused edges is (minimum_edges); + if there is more than one cadidate with the minimum number + of unused edges keep count of this number (minimum_count); */ + + if (edge_table[(int) friend].unused_edges < minimum_edges) { + minimum_edges = edge_table[(int) friend].unused_edges; + minimum_count = 1; + } + else + if (edge_table[(int) friend].unused_edges == minimum_edges) + minimum_count++; + + } /* for (i=0; i<edge.unused_edges; i++) */ + + + /* random decision of the possible candidates to use */ + rand_decision = (int) geqo_randint(minimum_count-1, 0); + + + for (i=0; i<edge.unused_edges; i++) { + friend = (Gene) edge.edge_list[i]; + + /* return the chosen candidate point */ + if (edge_table[(int) friend].unused_edges == minimum_edges) { + minimum_count--; + + if ( minimum_count == rand_decision ) return (friend); + } + } + + /* ... should never be reached */ + elog(WARN,"gimme_gene: neither shared nor minimum number nor random edge found"); +} + +/* edge_failure-- + * + * routine for handling edge failure + * + */ +static Gene +edge_failure (Gene *gene, int index, Edge *edge_table, int num_gene) +{ + int i; + Gene fail_gene = gene[index]; + int remaining_edges = 0; + int four_count = 0; + int rand_decision; + + + /* how many edges remain? + how many gene with four total (initial) edges remain? */ + + for (i=1; i<=num_gene; i++) { + if ( (edge_table[i].unused_edges != -1) && (i != (int) fail_gene) ) { + remaining_edges++; + + if (edge_table[i].total_edges == 4) four_count++; + } + } + + /* random decision of the gene + with remaining edges and whose total_edges == 4 */ + + if (four_count != 0 ) { + + rand_decision = (int) geqo_randint(four_count-1, 0); + + for (i=1; i<=num_gene; i++) { + + if ((Gene) i != fail_gene && + edge_table[i].unused_edges != -1 && + edge_table[i].total_edges==4) { + + four_count--; + + if (rand_decision == four_count) return ((Gene) i); + } + } + + elog(DEBUG,"edge_failure(1): no edge found via random decision and total_edges == 4"); + } + + else /* random decision of the gene with remaining edges */ + + if (remaining_edges != 0) { + + rand_decision = (int) geqo_randint(remaining_edges-1, 0); + + for (i=1; i<=num_gene; i++) { + + if ((Gene) i != fail_gene && + edge_table[i].unused_edges != -1) { + + remaining_edges--; + + if (rand_decision == remaining_edges) return (i); + } + } + + elog(DEBUG,"edge_failure(2): no edge found via random decision and remainig edges"); + } + + /* edge table seems to be empty; this happens sometimes on + the last point due to the fact that the first point is + removed from the table even though only one of its edges + has been determined */ + + else { /* occurs only at the last point in the tour; + simply look for the point which is not yet used */ + + for (i=1; i<=num_gene; i++) + if (edge_table[i].unused_edges >= 0) + return ((Gene) i); + + elog(DEBUG,"edge_failure(3): no edge found via looking for the last ununsed point"); + } + + +/* ... should never be reached */ + elog(WARN,"edge_failure: no edge detected"); +} + diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c new file mode 100644 index 0000000000..f6f1f4c03d --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -0,0 +1,667 @@ +/*------------------------------------------------------------------------ + * + * geqo_eval.c-- + * Routines to evaluate query trees + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_eval.c,v 1.1 1997/02/19 12:57:01 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#include <math.h> +#ifdef WIN32 +#include <float.h> +#include <limits.h> +#define MAXINT INT_MAX +#else +# if defined(PORTNAME_BSD44_derived) || \ + defined(PORTNAME_bsdi) || \ + defined(PORTNAME_bsdi_2_1) +# include <machine/limits.h> +# define MAXINT INT_MAX +# else +# include <values.h> +# endif /* !PORTNAME_BSD44_derived */ +#endif /* WIN32 */ + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" +#include "optimizer/geqo_paths.h" + + +static List *gimme_clause_joins(Query *root, Rel *outer_rel, Rel *inner_rel); +static Rel *gimme_clauseless_join(Rel *outer_rel, Rel *inner_rel); +static Rel *init_join_rel(Rel *outer_rel, Rel *inner_rel, JInfo *joininfo); +static List *new_join_tlist(List *tlist, List *other_relids, int first_resdomno); +static List *new_joininfo_list(List *joininfo_list, List *join_relids); +static void add_superrels(Rel *rel, Rel *super_rel); +static bool nonoverlap_rels(Rel *rel1, Rel *rel2); +static bool nonoverlap_sets(List *s1, List *s2); +static void geqo_joinrel_size(Rel *joinrel, Rel *outer_rel, Rel *inner_rel); + +static void geqo_add_new_joininfos(Query *root, List *joinrels, List *outerrels); +static List *geqo_final_join_rels(List *join_rel_list); + + +/* + * geqo_eval-- + * + * Returns cost of a query tree as an individual of the population. + */ +Cost +geqo_eval (Query *root, Gene *tour, int num_gene) +{ + Rel *joinrel; + Cost fitness; + List *temp; + + +/* remember root->join_relation_list_ ... */ +/* because root->join_relation_list_ will be changed during the following */ + temp = listCopy(root->join_relation_list_); + +/* joinrel is readily processed query tree -- left-sided ! */ + joinrel = gimme_tree(root, tour, 0, num_gene, NULL); + +/* compute fitness */ + fitness = (Cost) joinrel->cheapestpath->path_cost; + + root->join_relation_list_ = listCopy(temp); + + pfree(joinrel); + freeList(temp); + + return(fitness); + +} + +/* + * gimme-tree -- + * this program presumes that only LEFT-SIDED TREES are considered! + * + * 'outer_rel' is the preceeding join + * + * Returns a new join relation incorporating all joins in a left-sided tree. + */ +Rel * +gimme_tree (Query *root, Gene *tour, int rel_count, int num_gene, Rel *outer_rel) +{ + Rel *inner_rel; /* current relation */ + int relid; + + List *new_rels = NIL; + Rel *new_rel = NULL; + + if (rel_count < num_gene ) { /* tree not yet finished */ + + /* tour[0] = 3; tour[1] = 1; tour[2] = 2 */ + relid = (int) tour[rel_count]; + inner_rel = (Rel *) get_base_rel(root, relid); + + if (rel_count == 0) { /* processing first join with relid = (int) tour[0] */ + rel_count++; + return gimme_tree(root, tour, rel_count, num_gene, inner_rel); + } + else { /* tree main part */ + + if(!(new_rels = gimme_clause_joins(root, outer_rel,inner_rel))) { + if (BushyPlanFlag) { + new_rels = lcons(gimme_clauseless_join(outer_rel,outer_rel),NIL); /* ??? MAU */ + } + else { + new_rels = lcons(gimme_clauseless_join(outer_rel,inner_rel),NIL); + } + } + + /* process new_rel->pathlist */ + find_all_join_paths(root, new_rels); + + /* prune new_rels */ + /* MAU: is this necessary? */ + /* what's the matter if more than one new rel is left till now? */ + /* joinrels in newrels with different ordering of relids are not possible */ + if (length(new_rels) > 1) new_rels = geqo_prune_rels(new_rels); + + if (length(new_rels) > 1) { /* should never be reached ... */ + elog(DEBUG,"gimme_tree: still %d relations left", length(new_rels)); + } + + /* get essential new relation */ + new_rel = (Rel *) lfirst(new_rels); + rel_count++; + + /* process new_rel->cheapestpath, new_rel->unorderedpath */ + geqo_rel_paths(new_rel); + + /* processing of other new_rel attributes */ + new_rel->size = compute_rel_size(new_rel); + new_rel->width = compute_rel_width(new_rel); + + root->join_relation_list_ = lcons(new_rel, NIL); + + return gimme_tree(root, tour, rel_count, num_gene, new_rel); + } + + } + + return (outer_rel); /* tree finished ... */ +} + +/* + * gimme-clause-joins-- + * + * 'outer-rel' is the relation entry for the outer relation + * 'inner-rel' is the relation entry for the inner relation + * + * Returns a list of new join relations. + */ + +static List * +gimme_clause_joins(Query *root, Rel *outer_rel, Rel *inner_rel) +{ + List *join_list = NIL; + List *i = NIL; + List *joininfo_list = (List *) outer_rel->joininfo; + + foreach (i, joininfo_list) { + JInfo *joininfo = (JInfo*)lfirst(i); + Rel *rel = NULL; + + if(!joininfo->inactive) { + List *other_rels = (List *)joininfo->otherrels; + + if(other_rels != NIL) { + if( (length(other_rels) == 1) ) { + + if( same(other_rels, inner_rel->relids) ) { /* look if inner_rel is it...*/ + rel = init_join_rel(outer_rel, inner_rel, joininfo); + } + } + else if (BushyPlanFlag) { /* ?!? MAU */ + rel = init_join_rel(outer_rel, get_join_rel(root, other_rels), joininfo); + } + else { + rel = NULL; + } + + if (rel != NULL) + join_list = lappend(join_list, rel); + + } + } + } + + return(join_list); +} + +/* + * gimme-clauseless-join-- + * Given an outer relation 'outer-rel' and an inner relation + * 'inner-rel', create a join relation between 'outer-rel' and 'inner-rel' + * + * Returns a new join relation. + */ + +static Rel * +gimme_clauseless_join(Rel *outer_rel, Rel *inner_rel) +{ + return(init_join_rel(outer_rel, inner_rel, (JInfo*)NULL)); +} + +/* + * init-join-rel-- + * Creates and initializes a new join relation. + * + * 'outer-rel' and 'inner-rel' are relation nodes for the relations to be + * joined + * 'joininfo' is the joininfo node(join clause) containing both + * 'outer-rel' and 'inner-rel', if any exists + * + * Returns the new join relation node. + */ +static Rel * +init_join_rel(Rel *outer_rel, Rel *inner_rel, JInfo *joininfo) +{ + Rel *joinrel = makeNode(Rel); + List *joinrel_joininfo_list = NIL; + List *new_outer_tlist; + List *new_inner_tlist; + + /* + * Create a new tlist by removing irrelevant elements from both + * tlists of the outer and inner join relations and then merging + * the results together. + */ + new_outer_tlist = + new_join_tlist(outer_rel->targetlist, /* XXX 1-based attnos */ + inner_rel->relids, 1); + new_inner_tlist = + new_join_tlist(inner_rel->targetlist, /* XXX 1-based attnos */ + outer_rel->relids, + length(new_outer_tlist) + 1); + + joinrel->relids = NIL; + joinrel->indexed = false; + joinrel->pages = 0; + joinrel->tuples = 0; + joinrel->width = 0; +/* joinrel->targetlist = NIL;*/ + joinrel->pathlist = NIL; + joinrel->unorderedpath = (Path *)NULL; + joinrel->cheapestpath = (Path *)NULL; + joinrel->pruneable = true; + joinrel->classlist = NULL; + joinrel->relam = InvalidOid; + joinrel->ordering = NULL; + joinrel->clauseinfo = NIL; + joinrel->joininfo = NULL; + joinrel->innerjoin = NIL; + joinrel->superrels = NIL; + + joinrel->relids = lcons(outer_rel->relids, lcons(inner_rel->relids, NIL)); + + new_outer_tlist = nconc(new_outer_tlist,new_inner_tlist); + joinrel->targetlist = new_outer_tlist; + + if (joininfo) { + joinrel->clauseinfo = joininfo->jinfoclauseinfo; + if (BushyPlanFlag) joininfo->inactive = true; + } + + joinrel_joininfo_list = + new_joininfo_list(append(outer_rel->joininfo, inner_rel->joininfo), + intAppend(outer_rel->relids, inner_rel->relids)); + + joinrel->joininfo = joinrel_joininfo_list; + + geqo_joinrel_size(joinrel, outer_rel, inner_rel); + + return(joinrel); +} + +/* + * new-join-tlist-- + * Builds a join relations's target list by keeping those elements that + * will be in the final target list and any other elements that are still + * needed for future joins. For a target list entry to still be needed + * for future joins, its 'joinlist' field must not be empty after removal + * of all relids in 'other-relids'. + * + * 'tlist' is the target list of one of the join relations + * 'other-relids' is a list of relids contained within the other + * join relation + * 'first-resdomno' is the resdom number to use for the first created + * target list entry + * + * Returns the new target list. + */ +static List * +new_join_tlist(List *tlist, + List *other_relids, + int first_resdomno) +{ + int resdomno = first_resdomno - 1; + TargetEntry *xtl = NULL; + List *temp_node = NIL; + List *t_list = NIL; + List *i = NIL; + List *join_list = NIL; + bool in_final_tlist =false; + + + foreach(i,tlist) { + xtl= lfirst(i); + in_final_tlist = (join_list==NIL); + if( in_final_tlist) { + resdomno += 1; + temp_node = + lcons(create_tl_element(get_expr(xtl), + resdomno), + NIL); + t_list = nconc(t_list,temp_node); + } + } + + return(t_list); +} + +/* + * new-joininfo-list-- + * Builds a join relation's joininfo list by checking for join clauses + * which still need to used in future joins involving this relation. A + * join clause is still needed if there are still relations in the clause + * not contained in the list of relations comprising this join relation. + * New joininfo nodes are only created and added to + * 'current-joininfo-list' if a node for a particular join hasn't already + * been created. + * + * 'current-joininfo-list' contains a list of those joininfo nodes that + * have already been built + * 'joininfo-list' is the list of join clauses involving this relation + * 'join-relids' is a list of relids corresponding to the relations + * currently being joined + * + * Returns a list of joininfo nodes, new and old. + */ +static List * +new_joininfo_list(List *joininfo_list, List *join_relids) +{ + List *current_joininfo_list = NIL; + List *new_otherrels = NIL; + JInfo *other_joininfo = (JInfo*)NULL; + List *xjoininfo = NIL; + + foreach (xjoininfo, joininfo_list) { + JInfo *joininfo = (JInfo*)lfirst(xjoininfo); + + new_otherrels = joininfo->otherrels; + if (nonoverlap_sets(new_otherrels,join_relids)) { + other_joininfo = joininfo_member(new_otherrels, + current_joininfo_list); + if(other_joininfo) { + other_joininfo->jinfoclauseinfo = + (List*)LispUnion(joininfo->jinfoclauseinfo, + other_joininfo->jinfoclauseinfo); + }else { + other_joininfo = makeNode(JInfo); + + other_joininfo->otherrels = + joininfo->otherrels; + other_joininfo->jinfoclauseinfo = + joininfo->jinfoclauseinfo; + other_joininfo->mergesortable = + joininfo->mergesortable; + other_joininfo->hashjoinable = + joininfo->hashjoinable; + other_joininfo->inactive = false; + + current_joininfo_list = lcons(other_joininfo, + current_joininfo_list); + } + } + } + + return(current_joininfo_list); +} + +/* + * add-new-joininfos-- + * For each new join relation, create new joininfos that + * use the join relation as inner relation, and add + * the new joininfos to those rel nodes that still + * have joins with the join relation. + * + * 'joinrels' is a list of join relations. + * + * Modifies the joininfo field of appropriate rel nodes. + */ +static void +geqo_add_new_joininfos(Query *root, List *joinrels, List *outerrels) +{ + List *xjoinrel = NIL; + List *xrelid = NIL; + List *xrel = NIL; + List *xjoininfo = NIL; + + Rel *rel; + List *relids; + + List *super_rels; + List *xsuper_rel = NIL; + JInfo *new_joininfo; + + foreach(xjoinrel, joinrels) { + Rel *joinrel = (Rel *)lfirst(xjoinrel); + foreach(xrelid, joinrel->relids) { + /* length(joinrel->relids) should always be greater that 1, because of *JOIN* */ + /* ! BUG BUG ! + Relid relid = (Relid)lfirst(xrelid); + Rel *rel = get_join_rel(root, relid); + */ + + /* + if ( (root->join_relation_list_) != NIL ) { + rel = get_join_rel(root, xrelid); + } + else { + rel = get_base_rel(root, lfirsti(xrelid)); + } + */ + + /* NOTE: STILL BUGGY FOR CLAUSE-JOINS: */ + /* + relids = lconsi(lfirsti(xrelid), NIL); + rel = rel_member(relids, outerrels); + */ + + relids = lconsi(lfirsti(xrelid), NIL); + rel = rel_member(relids, root->base_relation_list_); + + add_superrels(rel,joinrel); + } + } + foreach(xjoinrel, joinrels) { + Rel *joinrel = (Rel *)lfirst(xjoinrel); + + foreach(xjoininfo, joinrel->joininfo) { + JInfo *joininfo = (JInfo*)lfirst(xjoininfo); + List *other_rels = joininfo->otherrels; + List *clause_info = joininfo->jinfoclauseinfo; + bool mergesortable = joininfo->mergesortable; + bool hashjoinable = joininfo->hashjoinable; + + foreach(xrelid, other_rels) { + /* ! BUG BUG ! + Relid relid = (Relid)lfirst(xrelid); + Rel *rel = get_join_rel(root, relid); + */ + + /* + if ( (root->join_relation_list_) != NIL ) { + rel = get_join_rel(root, xrelid); + } + else { + rel = get_base_rel(root, lfirsti(xrelid)); + } + */ + + /* NOTE: STILL BUGGY FOR CLAUSE-JOINS: */ + /* + relids = lconsi(lfirsti(xrelid), NIL); + rel = rel_member(relids, outerrels); + */ + + relids = lconsi(lfirsti(xrelid), NIL); + rel = rel_member(relids, root->base_relation_list_); + + super_rels = rel->superrels; + new_joininfo = makeNode(JInfo); + + new_joininfo->otherrels = joinrel->relids; + new_joininfo->jinfoclauseinfo = clause_info; + new_joininfo->mergesortable = mergesortable; + new_joininfo->hashjoinable = hashjoinable; + new_joininfo->inactive = false; + rel->joininfo = + lappend(rel->joininfo, new_joininfo); + + foreach(xsuper_rel, super_rels) { + Rel *super_rel = (Rel *)lfirst(xsuper_rel); + + if( nonoverlap_rels(super_rel,joinrel) ) { + List *new_relids = super_rel->relids; + JInfo *other_joininfo = + joininfo_member(new_relids, + joinrel->joininfo); + + if (other_joininfo) { + other_joininfo->jinfoclauseinfo = + (List*)LispUnion(clause_info, + other_joininfo->jinfoclauseinfo); + } else { + JInfo *new_joininfo = makeNode(JInfo); + + new_joininfo->otherrels = new_relids; + new_joininfo->jinfoclauseinfo = clause_info; + new_joininfo->mergesortable = mergesortable; + new_joininfo->hashjoinable = hashjoinable; + new_joininfo->inactive = false; + joinrel->joininfo = + lappend(joinrel->joininfo, + new_joininfo); + } + } + } + } + } + } + foreach(xrel, outerrels) { + rel = (Rel *)lfirst(xrel); + rel->superrels = NIL; + } +} + +/* + * final-join-rels-- + * Find the join relation that includes all the original + * relations, i.e. the final join result. + * + * 'join-rel-list' is a list of join relations. + * + * Returns the list of final join relations. + */ +static List * +geqo_final_join_rels(List *join_rel_list) +{ + List *xrel = NIL; + List *temp = NIL; + List *t_list = NIL; + + /* + * find the relations that has no further joins, + * i.e., its joininfos all have otherrels nil. + */ + foreach(xrel,join_rel_list) { + Rel *rel = (Rel *)lfirst(xrel); + List *xjoininfo = NIL; + bool final = true; + + foreach (xjoininfo, rel->joininfo) { + JInfo *joininfo = (JInfo*)lfirst(xjoininfo); + + if (joininfo->otherrels != NIL) { + final = false; + break; + } + } + if (final) { + temp = lcons(rel, NIL); + t_list = nconc(t_list, temp); + } + } + + return(t_list); +} + +/* + * add_superrels-- + * add rel to the temporary property list superrels. + * + * 'rel' a rel node + * 'super-rel' rel node of a join relation that includes rel + * + * Modifies the superrels field of rel + */ +static void +add_superrels(Rel *rel, Rel *super_rel) +{ + rel->superrels = lappend(rel->superrels, super_rel); +} + +/* + * nonoverlap-rels-- + * test if two join relations overlap, i.e., includes the same + * relation. + * + * 'rel1' and 'rel2' are two join relations + * + * Returns non-nil if rel1 and rel2 do not overlap. + */ +static bool +nonoverlap_rels(Rel *rel1, Rel *rel2) +{ + return(nonoverlap_sets(rel1->relids, rel2->relids)); +} + +static bool +nonoverlap_sets(List *s1, List *s2) +{ + List *x = NIL; + + foreach(x,s1) { + int e = lfirsti(x); + if(intMember(e,s2)) + return(false); + } + return(true); +} + +/* + * geqo_joinrel_size-- + * compute estimate for join relation tuples, even for + * long join queries; so get logarithm of size when MAXINT overflow; + */ +static void +geqo_joinrel_size(Rel *joinrel, Rel *outer_rel, Rel *inner_rel) +{ + Cost temp; + int ntuples; + + temp = (Cost) inner_rel->tuples * (Cost) outer_rel->tuples; /* cartesian product */ + + if (joinrel->clauseinfo) { + temp = temp * product_selec(joinrel->clauseinfo); + } + + if (temp >= (MAXINT -1)) { + ntuples = ceil( geqo_log((double)temp, (double) GEQO_LOG_BASE) ); + } + else { + ntuples = ceil((double)temp); + } + + if (ntuples < 1) ntuples = 1; /* make the best case 1 instead of 0 */ + + joinrel->tuples = ntuples; +} + +double +geqo_log(double x, double b) +{ + return(log(x)/log(b)); +} diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c new file mode 100644 index 0000000000..6dadbb1839 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_main.c @@ -0,0 +1,271 @@ +/*------------------------------------------------------------------------ + * + * geqo_main.c-- + * solution of the query optimization problem + * by means of a Genetic Algorithm (GA) + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_main.c,v 1.1 1997/02/19 12:57:05 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/plannodes.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" +#include "optimizer/geqo_pool.h" +#include "optimizer/geqo_selection.h" +#include "optimizer/geqo_recombination.h" +#include "optimizer/geqo_mutation.h" +#include "optimizer/geqo_misc.h" + + +/* define edge recombination crossover [ERX] per default */ +#if !defined(ERX) && \ + !defined(PMX) && \ + !defined(CX) && \ + !defined(PX) && \ + !defined(OX1) && \ + !defined(OX2) +#define ERX +#endif + + +/* + * geqo-- + * solution of the query optimization problem + * similar to a constrained Traveling Salesman Problem (TSP) + */ + +Rel * +geqo(Query *root) +{ + int i,j; + int generation; + Chromosome *momma; + Chromosome *daddy; + Chromosome *kid; + + Edge *edge_table; /* list of edges */ + int edge_failures=0; + float difference; + + City *city_table; /* list of cities */ + int cycle_diffs=0; + + int mutations=0; + + int number_of_rels; + List *r = NIL; + List *rel_list = (List *) root->base_relation_list_; + + Pool *pool; + int pool_size, number_generations, status_interval; + + Gene *best_tour; + Rel *best_rel; + Plan *best_plan; + + +/* set tour size */ + number_of_rels = length(root->base_relation_list_); + +/* set GA parameters */ + geqo_params(number_of_rels) ; /* out of "$PGDATA/pg_geqo" file */ + pool_size = PoolSize; + number_generations = Generations; + status_interval = 10; + +/* seed random number generator */ + srandom(RandomSeed); + +/* allocate genetic pool memory */ + pool = alloc_pool(pool_size, number_of_rels); + +/* random initialization of the pool */ + random_init_pool (root, pool, 0, pool->size); + +/* sort the pool according to cheapest path as fitness */ + sort_pool (pool); /* we have to do it only one time, since all kids replace the worst individuals in future (-> geqo_pool.c:spread_chromo ) */ + +/* allocate chromosome momma and daddy memory */ + momma = alloc_chromo(pool->string_length); + daddy = alloc_chromo(pool->string_length); + +#if defined (ERX) + elog(DEBUG,"geqo_main: using edge recombination crossover [ERX]"); +/* allocate edge table memory */ + edge_table = alloc_edge_table(pool->string_length); +#elif defined(PMX) + elog(DEBUG,"geqo_main: using partially matched crossover [PMX]"); +/* allocate chromosome kid memory */ + kid = alloc_chromo(pool->string_length); +#elif defined(CX) + elog(DEBUG,"geqo_main: using cycle crossover [CX]"); +/* allocate city table memory */ + kid = alloc_chromo(pool->string_length); + city_table = alloc_city_table(pool->string_length); +#elif defined(PX) + elog(DEBUG,"geqo_main: using position crossover [PX]"); +/* allocate city table memory */ + kid = alloc_chromo(pool->string_length); + city_table = alloc_city_table(pool->string_length); +#elif defined(OX1) + elog(DEBUG,"geqo_main: using order crossover [OX1]"); +/* allocate city table memory */ + kid = alloc_chromo(pool->string_length); + city_table = alloc_city_table(pool->string_length); +#elif defined(OX2) + elog(DEBUG,"geqo_main: using order crossover [OX2]"); +/* allocate city table memory */ + kid = alloc_chromo(pool->string_length); + city_table = alloc_city_table(pool->string_length); +#endif + + +/* my pain main part: */ +/* iterative optimization */ + + for (generation = 0; generation < number_generations; generation++) { + + /* SELECTION */ + geqo_selection(momma, daddy, pool, SelectionBias); /* using linear bias function */ + + + +#if defined (ERX) + /* EDGE RECOMBINATION CROSSOVER */ + difference = gimme_edge_table(momma->string, daddy->string, pool->string_length, edge_table); + + /* let the kid grow in momma's womb (storage) for nine months ;-) */ + /* sleep(23328000) -- har har har */ + kid = momma; + + /* are there any edge failures ? */ + edge_failures += gimme_tour(edge_table, kid->string, pool->string_length); +#elif defined(PMX) + /* PARTIALLY MATCHED CROSSOVER */ + pmx(momma->string, daddy->string, kid->string, pool->string_length); +#elif defined(CX) + /* CYCLE CROSSOVER */ + cycle_diffs = + cx(momma->string, daddy->string, kid->string, pool->string_length, city_table); + /* mutate the child */ + if (cycle_diffs == 0) { + mutations++; + geqo_mutation (kid->string, pool->string_length); + } +#elif defined(PX) + /* POSITION CROSSOVER */ + px(momma->string, daddy->string, kid->string, pool->string_length, city_table); +#elif defined(OX1) + /* ORDER CROSSOVER */ + ox1(momma->string, daddy->string, kid->string, pool->string_length, city_table); +#elif defined(OX2) + /* ORDER CROSSOVER */ + ox2(momma->string, daddy->string, kid->string, pool->string_length, city_table); +#endif + + + /* EVALUATE FITNESS */ + kid->worth = geqo_eval (root, kid->string, pool->string_length); + + /* push the kid into the wilderness of life according to its worth */ + spread_chromo (kid, pool); + + +#ifdef GEQO_DEBUG + if (status_interval && !(generation % status_interval)) + print_gen (stdout, pool, generation); +#endif + + } /* end of iterative optimization */ + + +#if defined(ERX) && defined(GEQO_DEBUG) +if (edge_failures != 0) + fprintf (stdout, "\nFailures: %d Avg: %d\n", edge_failures, (int) generation/edge_failures); + +else fprintf (stdout, "No edge failures detected.\n"); +#endif + + +#if defined(CX) && defined(GEQO_DEBUG) +if (mutations != 0) + fprintf (stdout, "\nMutations: %d Generations: %d\n", mutations, generation); + +else fprintf (stdout, "No mutations processed.\n"); +#endif + + +#ifdef GEQO_DEBUG +fprintf (stdout, "\n"); +print_pool (stdout, pool, 0, pool_size-1); +#endif + + +/* got the cheapest query tree processed by geqo; + first element of the population indicates the best query tree */ + +best_tour = (Gene *) pool->data[0].string; + +/* root->join_relation_list_ will be modified during this ! */ +best_rel = (Rel *) gimme_tree(root, best_tour, 0, pool->string_length, NULL); + +/* DBG: show the query plan +print_plan(best_plan, root); + DBG */ + +/* ... free memory stuff */ +free_chromo(momma); +free_chromo(daddy); + +#if defined (ERX) +free_edge_table(edge_table); +#elif defined(PMX) +free_chromo(kid); +#elif defined(CX) +free_chromo(kid); +free_city_table(city_table); +#elif defined(PX) +free_chromo(kid); +free_city_table(city_table); +#elif defined(OX1) +free_chromo(kid); +free_city_table(city_table); +#elif defined(OX2) +free_chromo(kid); +free_city_table(city_table); +#endif + +free_pool(pool); + +return(best_rel); +} + diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c new file mode 100644 index 0000000000..e5559e7a20 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_misc.c @@ -0,0 +1,248 @@ +/*------------------------------------------------------------------------ + * + * geqo_misc.c-- + * misc. printout and debug stuff + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_misc.c,v 1.1 1997/02/19 12:57:09 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + + +#include <stdio.h> + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo_pool.h" +#include "optimizer/geqo_recombination.h" +#include "optimizer/geqo_misc.h" + +static float avg_pool (Pool *pool); + +/* avg_pool-- + * + */ +static float +avg_pool (Pool *pool) +{ + int i; + double cumulative = 0.0; + + if (pool->size==0) + elog(WARN,"avg_pool: pool_size of zero"); + + for (i=0; i<pool->size; i++) + cumulative = cumulative + pool->data[i].worth; + + return ((float) cumulative/pool->size); +} + +/* print_pool-- + */ +void +print_pool (FILE *fp, Pool *pool, int start, int stop) +{ + int i, j; + + /* be extra careful that start and stop are valid inputs */ + + if (start < 0) start = 0; + if (stop > pool->size) stop = pool->size; + + if (start+stop > pool->size) { + start = 0; + stop = pool->size; + } + + for (i=start; i<stop; i++) { + fprintf (fp, "%d)\t", i); + for (j=0; j<pool->string_length; j++) + fprintf (fp, "%d ", pool->data[i].string[j]); + fprintf (fp, "%f\n", pool->data[i].worth); + } +} + +/* print_gen-- + * + * printout for chromosome: best, worst, mean, average + * + */ +void +print_gen(FILE *fp, Pool *pool, int generation) +{ + int lowest; + + /* Get index to lowest ranking gene in poplulation. */ + /* Use 2nd to last since last is buffer. */ + lowest = pool->size > 1 ? pool->size-2 : 0; + + fprintf (fp, + "%5d | Bst: %f Wst: %f Mean: %f Avg: %f\n", + generation, + pool->data[0].worth, + pool->data[lowest].worth, + pool->data[pool->size/2].worth, + avg_pool(pool)); +} + + +void +print_edge_table (FILE *fp, Edge *edge_table, int num_gene) +{ + int i,j; + + fprintf (fp, "\nEDGE TABLE\n"); + + for (i=1; i<=num_gene; i++) + { + fprintf (fp, "%d :", i); + for (j=0; j<edge_table[i].unused_edges; j++) + fprintf (fp, " %d", edge_table[i].edge_list[j]); + fprintf (fp, "\n"); + } + + fprintf (fp, "\n"); +} + +/************************************************************* + Debug output subroutines + *************************************************************/ + +void +geqo_print_joinclauses(Query *root, List *clauses) +{ + List *l; + extern void print_expr(Node *expr, List *rtable); /* in print.c */ + + foreach(l, clauses) { + CInfo *c = lfirst(l); + + print_expr((Node*)c->clause, root->rtable); + if (lnext(l)) printf(" "); + } +} + +void +geqo_print_path(Query *root, Path *path, int indent) +{ + char *ptype = NULL; + JoinPath *jp; + bool join; + int i; + + for(i=0; i < indent; i++) + printf("\t"); + + switch(nodeTag(path)) { + case T_Path: + ptype = "SeqScan"; join=false; break; + case T_IndexPath: + ptype = "IdxScan"; join=false; break; + case T_JoinPath: + ptype = "Nestloop"; join=true; break; + case T_MergePath: + ptype = "MergeJoin"; join=true; break; + case T_HashPath: + ptype = "HashJoin"; join=true; break; + default: + break; + } + if (join) { + int size = path->parent->size; + jp = (JoinPath*)path; + printf("%s size=%d cost=%f\n", ptype, size, path->path_cost); + switch(nodeTag(path)) { + case T_MergePath: + case T_HashPath: + for(i=0; i < indent+1; i++) + printf("\t"); + printf(" clauses=("); + geqo_print_joinclauses(root, + ((JoinPath*)path)->pathclauseinfo); + printf(")\n"); + + if (nodeTag(path)==T_MergePath) { + MergePath *mp = (MergePath*)path; + if (mp->outersortkeys || mp->innersortkeys) { + for(i=0; i < indent+1; i++) + printf("\t"); + printf(" sortouter=%d sortinner=%d\n", + ((mp->outersortkeys)?1:0), + ((mp->innersortkeys)?1:0)); + } + } + break; + default: + break; + } + geqo_print_path(root, jp->outerjoinpath, indent+1); + geqo_print_path(root, jp->innerjoinpath, indent+1); + } else { + int size = path->parent->size; + int relid = lfirsti(path->parent->relids); + printf("%s(%d) size=%d cost=%f", + ptype, relid, size, path->path_cost); + + if (nodeTag(path)==T_IndexPath) { + List *k, *l; + + printf(" keys="); + foreach (k, path->keys) { + printf("("); + foreach (l, lfirst(k)) { + Var *var = lfirst(l); + printf("%d.%d", var->varnoold, var->varoattno); + if (lnext(l)) printf(", "); + } + printf(")"); + if (lnext(k)) printf(", "); + } + } + printf("\n"); + } +} + +void +geqo_print_rel(Query *root, Rel *rel) +{ + List *l; + + printf("______________________________\n"); + printf("("); + foreach(l, rel->relids) { + printf("%d ", lfirsti(l)); + } + printf("): size=%d width=%d\n", rel->size, rel->width); + + printf("\tpath list:\n"); + foreach (l, rel->pathlist) { + geqo_print_path(root, lfirst(l), 1); + } + + printf("\tcheapest path:\n"); + geqo_print_path(root, rel->cheapestpath, 1); +} diff --git a/src/backend/optimizer/geqo/geqo_mutation.c b/src/backend/optimizer/geqo/geqo_mutation.c new file mode 100644 index 0000000000..9d54456421 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_mutation.c @@ -0,0 +1,76 @@ +/*------------------------------------------------------------------------ +* +* geqo_mutation.c-- +* +* TSP mutation routines +* +* $Id: geqo_mutation.c,v 1.1 1997/02/19 12:57:13 scrappy Exp $ +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* this is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_mutation.h" + + void + geqo_mutation (Gene *tour, int num_gene) + { + int swap1; + int swap2; + int num_swaps = geqo_randint (num_gene/3, 0); + Gene temp; + + + while (num_swaps > 0) { + swap1 = geqo_randint (num_gene-1, 0); + swap2 = geqo_randint (num_gene-1, 0); + + while (swap1 == swap2) + swap2 = geqo_randint (num_gene-1, 0); + + temp = tour[swap1]; + tour[swap1] = tour[swap2]; + tour[swap2] = temp; + + + num_swaps -= 1; + } +} diff --git a/src/backend/optimizer/geqo/geqo_params.c b/src/backend/optimizer/geqo/geqo_params.c new file mode 100644 index 0000000000..47c56ba333 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_params.c @@ -0,0 +1,323 @@ +/*------------------------------------------------------------------------ +* +* geqo_params.c-- +* routines for determining necessary genetic optimization parameters +* +* Copyright (c) 1994, Regents of the University of California +* +* $Id: geqo_params.c,v 1.1 1997/02/19 12:57:20 scrappy Exp $ +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#include <stdio.h> +#include <time.h> +#include <math.h> + +#include "postgres.h" +#include "miscadmin.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" + +#define POOL_TAG "Pool_Size" +#define TRIAL_TAG "Generations" +#define RAND_TAG "Random_Seed" +#define BIAS_TAG "Selection_Bias" + +#define EFFORT_TAG "Effort" /* optimization effort and */ +#define LOW "low" /* corresponding tags */ +#define MEDIUM "medium" +#define HIGH "high" + +#define MAX_TOKEN 80 /* Maximum size of one token in the * + * configuration file */ + +static int gimme_pool_size(int string_length); +static int gimme_number_generations(int pool_size, int effort); +static int next_token(FILE *, char *, int); + +/* + * geqo_param-- + * get ga parameters out of "$PGDATA/pg_geqo" file. + */ +void +geqo_params(int string_length) +{ + int i; + + char buf[MAX_TOKEN]; + FILE *file; + + char *conf_file; + +/* these static variables are used to signal that a value has been set */ + int pool_size = 0; + int number_trials = 0; + int random_seed = 0; + int selection_bias = 0; + int effort = 0; + + + /* put together the full pathname to the config file */ + conf_file = + (char *) palloc((strlen(DataDir)+strlen(GEQO_FILE)+2)*sizeof(char)); + + sprintf(conf_file, "%s/%s", DataDir, GEQO_FILE); + + /* open the config file */ + file = fopen(conf_file, "r"); + if (file) + { + /* + * empty and comment line stuff + */ + while ((i = next_token(file, buf, sizeof(buf))) != EOF) + { + /* If only token on the line, ignore */ + if (i == '\n') continue; + + /* Comment -- read until end of line then next line */ + if (buf[0] == '#') + { + while (next_token(file, buf, sizeof(buf)) == 0) ; + continue; + } + + /* + * get ga parameters by parsing + */ + + /*------------------------------------------------- pool size */ + if ( strcmp(buf, POOL_TAG) == 0 ) + { + i = next_token(file, buf, sizeof(buf)); /* get next token */ + + if (i != EOF) /* only ignore if we got no text at all */ + { + if (sscanf (buf, "%d", &PoolSize) == 1) pool_size = 1; + } + + } + + /*------------------------------------------------- number of trials */ + else if ( strcmp(buf, TRIAL_TAG) == 0 ) + { + i = next_token(file, buf, sizeof(buf)); + + if (i != EOF) + { + if (sscanf (buf, "%d", &Generations) == 1) number_trials = 1; + } + + } + + /*------------------------------------------------- optimization effort */ + else if ( strcmp(buf, EFFORT_TAG) == 0 ) + { + i = next_token(file, buf, sizeof(buf)); + + if (i != EOF) + { + if (strcmp (buf, LOW) == 0) effort = LOW_EFFORT; + else if (strcmp (buf, MEDIUM) == 0) effort = MEDIUM_EFFORT; + else if (strcmp (buf, HIGH) == 0) effort = HIGH_EFFORT; + } + + } + + /*------------------------------------------- random seed */ + else if ( strcmp(buf, RAND_TAG) == 0 ) + { + i = next_token(file, buf, sizeof(buf)); + + if (i != EOF) + { + if (sscanf (buf, "%ld", &RandomSeed) == 1) random_seed = 1; + } + + } + + /*------------------------------------------- selection bias */ + else if ( strcmp(buf, BIAS_TAG) == 0 ) + { + i = next_token(file, buf, sizeof(buf)); + + if (i != EOF) + { + if (sscanf (buf, "%f", &SelectionBias) == 1) selection_bias = 1; + } + + } + + /* unrecognized tags */ + else + { + if (i != EOF) + { + } + + elog(DEBUG,"geqo_params: unknown parameter type \"%s\"\nin file \'%s\'", buf, conf_file); + + /* if not at end-of-line, keep reading til we are */ + while (i == 0) i = next_token(file, buf, sizeof(buf)); + } + } + + fclose(file); + + pfree(conf_file); + } + + else + { + elog(DEBUG,"geqo_params: ga parameter file\n\'%s\'\ndoes not exist or permissions are not setup correctly", conf_file); + } + + /* + * parameter checkings follow + */ + + /**************** PoolSize: essential ****************/ + if ( !(pool_size) ) + { + PoolSize = gimme_pool_size(string_length); + + elog(DEBUG,"geqo_params: no pool size specified;\nusing computed value of %d", PoolSize); + } + + + /**************** Effort: essential ****************/ + if ( !(effort) ) + { + if (PoolSize == MAX_POOL) + effort = HIGH_EFFORT; + else + effort = MEDIUM_EFFORT; + + elog(DEBUG,"geqo_params: no optimization effort specified;\nusing value of %d", effort); + + } + + /**************** Generations: essential ****************/ + if ( !(number_trials) ) + { + Generations = gimme_number_generations(PoolSize, effort); + + elog(DEBUG,"geqo_params: no number of trials specified;\nusing computed value of %d", Generations); + + } + + /* RandomSeed: */ + if ( !(random_seed) ) + { + RandomSeed = (long) time(NULL); + + elog(DEBUG,"geqo_params: no random seed specified;\nusing computed value of %ld", RandomSeed); + } + + /* SelectionBias: */ + if ( !(selection_bias) ) + { + SelectionBias = SELECTION_BIAS; + + elog(DEBUG,"geqo_params: no selection bias specified;\nusing default value of %f", SelectionBias); + } + +} + + +/* + * Grab one token out of fp. Defined as the next string of non-whitespace + * in the file. After we get the token, continue reading until EOF, end of + * line or the next token. If it's the last token on the line, return '\n' + * for the value. If we get EOF before reading a token, return EOF. In all + * other cases return 0. + */ +static int +next_token(FILE *fp, char *buf, int bufsz) +{ + int c; + char *eb = buf+(bufsz-1); + + /* Discard inital whitespace */ + while (isspace(c = getc(fp))) ; + + /* EOF seen before any token so return EOF */ + if (c == EOF) return -1; + + /* Form a token in buf */ + do { + if (buf < eb) *buf++ = c; + c = getc(fp); + } while (!isspace(c) && c != EOF); + *buf = '\0'; + + /* Discard trailing tabs and spaces */ + while (c == ' ' || c == '\t') c = getc(fp); + + /* Put back the char that was non-whitespace (putting back EOF is ok) */ + (void) ungetc(c, fp); + + /* If we ended with a newline, return that, otherwise return 0 */ + return (c == '\n' ? '\n' : 0); +} + +/* gimme_pool_size-- + * compute good estimation for pool size + * according to number of involved rels in a query + */ +static int +gimme_pool_size(int string_length) +{ + double exponent; + double size; + + exponent = (double) string_length + 1.0; + + size = pow (2.0, exponent); + + if (size < MIN_POOL) { + return (MIN_POOL); + } + else if (size > MAX_POOL) { + return (MAX_POOL); + } + else + return ( (int) ceil(size) ); +} + +/* gimme_number_generations-- + * compute good estimation for number of generations size + * for convergence + */ +static int +gimme_number_generations(int pool_size, int effort) +{ + int number_gens; + + number_gens = (int) ceil ( geqo_log((double) pool_size, 2.0) ); + + return (effort * number_gens); +} diff --git a/src/backend/optimizer/geqo/geqo_paths.c b/src/backend/optimizer/geqo/geqo_paths.c new file mode 100644 index 0000000000..c6a7ce512b --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_paths.c @@ -0,0 +1,145 @@ +/*------------------------------------------------------------------------- + * + * geqo_paths.c-- + * Routines to process redundant paths and relations + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_paths.c,v 1.1 1997/02/19 12:57:25 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_paths.h" + + +static List *geqo_prune_rel(Rel *rel, List *other_rels); +static Path *set_paths(Rel *rel, Path *unorderedpath); + +/* + * geqo-prune-rels-- + * Removes any redundant relation entries from a list of rel nodes + * 'rel-list'. + * + * Returns the resulting list. + * + */ +List *geqo_prune_rels(List *rel_list) +{ + List *temp_list = NIL; + + if (rel_list != NIL) { + temp_list = lcons(lfirst(rel_list), + geqo_prune_rels(geqo_prune_rel((Rel*)lfirst(rel_list), + lnext(rel_list)))); + } + return(temp_list); +} + +/* + * geqo-prune-rel-- + * Prunes those relations from 'other-rels' that are redundant with + * 'rel'. A relation is redundant if it is built up of the same + * relations as 'rel'. Paths for the redundant relation are merged into + * the pathlist of 'rel'. + * + * Returns a list of non-redundant relations, and sets the pathlist field + * of 'rel' appropriately. + * + */ +static List * +geqo_prune_rel(Rel *rel, List *other_rels) +{ + List *i = NIL; + List *t_list = NIL; + List *temp_node = NIL; + Rel *other_rel = (Rel *)NULL; + + foreach(i, other_rels) { + other_rel = (Rel*)lfirst(i); + if(same(rel->relids, other_rel->relids)) { + rel->pathlist = add_pathlist(rel, + rel->pathlist, + other_rel->pathlist); + t_list = nconc(t_list, NIL); /* XXX is this right ? */ + } else { + temp_node = lcons(other_rel, NIL); + t_list = nconc(t_list,temp_node); + } + } + return(t_list); +} + +/* + * geqo-rel-paths-- + * For a relation 'rel' (which corresponds to a join + * relation), set pointers to the unordered path and cheapest paths + * (if the unordered path isn't the cheapest, it is pruned), and + * reset the relation's size field to reflect the join. + * + * Returns nothing of interest. + * + */ +void +geqo_rel_paths(Rel *rel) +{ + List *y = NIL; + Path *path; + JoinPath *cheapest = (JoinPath*)NULL; + + foreach(y, rel->pathlist) { + path = (Path*)lfirst(y); + + if(!path->p_ordering.ord.sortop) { + break; + } + } + + cheapest = (JoinPath*)set_paths(rel, path); + rel->size = compute_joinrel_size(cheapest); +} + + +/* + * set-path-- + * Compares the unordered path for a relation with the cheapest path. If + * the unordered path is not cheapest, it is pruned. + * + * Resets the pointers in 'rel' for unordered and cheapest paths. + * + * Returns the cheapest path. + * + */ +static Path * +set_paths(Rel *rel, Path *unorderedpath) +{ + Path *cheapest = set_cheapest(rel, rel->pathlist); + + /* don't prune if not pruneable -- JMH, 11/23/92 */ + if(unorderedpath != cheapest + && rel->pruneable) { + + rel->unorderedpath = (Path *)NULL; + rel->pathlist = lremove(unorderedpath, rel->pathlist); + } else { + rel->unorderedpath = (Path *)unorderedpath; + } + + return(cheapest); +} + diff --git a/src/backend/optimizer/geqo/geqo_pmx.c b/src/backend/optimizer/geqo/geqo_pmx.c new file mode 100644 index 0000000000..c6e699cfa9 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_pmx.c @@ -0,0 +1,210 @@ +/*------------------------------------------------------------------------ +* +* geqo_pmx.c-- +* +* partially matched crossover [PMX] routines; +* PMX operator according to Goldberg & Lingle +* (Proc Int'l Conf on GA's) +* +* $Id: geqo_pmx.c,v 1.1 1997/02/19 12:57:28 scrappy Exp $ +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the pmx algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" +#include "optimizer/geqo_recombination.h" +#include "optimizer/geqo_random.h" + + +/* pmx-- + * + * partially matched crossover + */ +void +pmx(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene) +{ + int *failed = (int *) palloc ((num_gene+1)*sizeof(int)); + int *from = (int *) palloc ((num_gene+1)*sizeof(int)); + int *indx = (int *) palloc ((num_gene+1)*sizeof(int)); + int *check_list = (int *) palloc ((num_gene+1)*sizeof(int)); + + int left, right, temp, i, j, k; + int mx_fail, found, mx_hold; + + +/* no mutation so start up the pmx replacement algorithm */ +/* initialize failed[], from[], check_list[] */ + for (k = 0; k < num_gene; k++) { + failed[k] = -1; + from[k] = -1; + check_list[k+1] = 0; + } + +/* locate crossover points */ + left = geqo_randint(num_gene-1, 0); + right = geqo_randint(num_gene-1, 0); + + if (left > right) { + temp = left; + left = right; + right = temp; + } + + +/* copy tour2 into offspring */ + for (k = 0; k < num_gene; k++) { + offspring[k] = tour2[k]; + from[k] = DAD; + check_list[tour2[k]]++; + } + +/* copy tour1 into offspring */ + for (k = left; k <= right; k++) { + check_list[offspring[k]]--; + offspring[k] = tour1[k]; + from[k] = MOM; + check_list[tour1[k]]++; + } + + +/* pmx main part */ + + mx_fail = 0; + +/* STEP 1 */ + + for (k = left; k <= right; k++) { /* for all elements in the tour1-2 */ + + if (tour1[k] == tour2[k]) found = 1; /* find match in tour2 */ + + else { + found = 0; /* substitute elements */ + + j = 0; + while ( !(found) && (j < num_gene) ) { + if ( (offspring[j] == tour1[k]) && (from[j] == DAD) ) { + + check_list[offspring[j]]--; + offspring[j] = tour2[k]; + found = 1; + check_list[tour2[k]]++; + } + + j++; + } + + } + + if ( !(found) ) { /* failed to replace gene */ + failed[mx_fail] = (int) tour1[k]; + indx[mx_fail] = k; + mx_fail++; + } + + } /* ... for */ + + +/* STEP 2 */ + + /* see if any genes could not be replaced */ + if (mx_fail > 0) { + mx_hold = mx_fail; + + for (k = 0; k < mx_hold; k++) { + found = 0; + + j = 0; + while ( !(found) && (j < num_gene) ) { + + if ( (failed[k] == (int) offspring[j]) && (from[j] == DAD) ) { + check_list[offspring[j]]--; + offspring[j] = tour2[indx[k]]; + check_list[tour2[indx[k]]]++; + + found = 1; + failed[k] = -1; + mx_fail--; + } + + j++; + } + + } /* ... for */ + + } /* ... if */ + + +/* STEP 3 */ + + for (k = 1; k <= num_gene; k++) { + + if (check_list[k] > 1) { + i = 0; + + while (i < num_gene) { + if ( (offspring[i] == (Gene) k) && (from[i] == DAD) ) { + j = 1; + + while (j <= num_gene) { + if (check_list[j] == 0) { + offspring[i] = (Gene) j; + check_list[k]--; + check_list[j]++; + i = num_gene + 1; + j = i; + } + + j++; + } + + } /* ... if */ + + i++; + } /* end while */ + + } + } /* ... for */ + + pfree(failed); + pfree(from); + pfree(indx); + pfree(check_list); +} diff --git a/src/backend/optimizer/geqo/geqo_pool.c b/src/backend/optimizer/geqo/geqo_pool.c new file mode 100644 index 0000000000..98a1a6e2a0 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_pool.c @@ -0,0 +1,248 @@ +/*------------------------------------------------------------------------ + * + * geqo_pool.c-- + * Genetic Algorithm (GA) pool stuff + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_pool.c,v 1.1 1997/02/19 12:57:31 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "lib/qsort.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" +#include "optimizer/geqo_pool.h" +#include "optimizer/geqo_copy.h" +#include "optimizer/geqo_recombination.h" + + +static int compare(void *arg1, void *arg2); + +/* + * alloc-pool-- + * allocates memory for GA pool + */ +Pool * +alloc_pool(int pool_size, int string_length) +{ + Pool *new_pool; + Chromosome *chromo; + int i; + + /* pool */ + new_pool = (Pool *) palloc (sizeof(Pool)); + new_pool->size = (int) pool_size; + new_pool->string_length = (int) string_length; + + /* all chromosome */ + new_pool->data = (Chromosome *) palloc (pool_size * sizeof(Chromosome)); + + /* all gene */ + chromo = (Chromosome *) new_pool->data; /* vector of all chromos */ + for (i=0; i<pool_size; i++) { + chromo[i].string = palloc((string_length+1)*sizeof(Gene)); + } + + return (new_pool); +} + +/* + * free-pool-- + * deallocates memory for GA pool + */ +void +free_pool (Pool *pool) +{ + Chromosome *chromo; + int i; + + /* all gene */ + chromo = (Chromosome *) pool->data; /* vector of all chromos */ + for (i=0; i<pool->size; i++) pfree(chromo[i].string); + + /* all chromosome */ + pfree (pool->data); + + /* pool */ + pfree (pool); +} + +/* + * random-init-pool-- + * initialize genetic pool + */ +void +random_init_pool (Query *root, Pool *pool, int strt, int stp) +{ + Chromosome *chromo = (Chromosome *) pool->data; + int i; + + for (i=strt; i<stp; i++) { + init_tour(chromo[i].string, pool->string_length); /* from "geqo_recombination.c" */ + + pool->data[i].worth = + geqo_eval(root, chromo[i].string, pool->string_length); /* "from geqo_eval.c" */ + } +} + +/* + * sort-pool-- + * sorts input pool according to worth, from smallest to largest + * + * maybe you have to change compare() for different ordering ... + */ +void +sort_pool(Pool *pool) +{ + pg_qsort(pool->data, pool->size, sizeof(Chromosome), compare); + +} + +/* + * compare-- + * static input function for pg_sort + * + * return values for sort from smallest to largest are prooved! + * don't change them! + */ +static int +compare(void *arg1, void *arg2) +{ + Chromosome chromo1 = *(Chromosome *) arg1; + Chromosome chromo2 = *(Chromosome *) arg2; + + if (chromo1.worth == chromo2.worth) + return(0); + else if (chromo1.worth > chromo2.worth) + return(1); + else + return(-1); +} + +/* alloc_chromo-- + * allocates a chromosome and string space + */ +Chromosome * +alloc_chromo (int string_length) +{ + Chromosome *chromo; + + chromo = (Chromosome *) palloc (sizeof(Chromosome)); + chromo->string = (Gene *) palloc ((string_length+1)*sizeof(Gene)); + + return (chromo); +} + +/* free_chromo-- + * deallocates a chromosome and string space + */ +void +free_chromo (Chromosome *chromo) +{ + pfree(chromo->string); + pfree(chromo); +} + +/* spread_chromo-- + * inserts a new chromosome into the pool, displacing worst gene in pool + * assumes best->worst = smallest->largest + */ +void +spread_chromo (Chromosome *chromo, Pool *pool) +{ + int top, mid, bot; + int i, index; + Chromosome swap_chromo, tmp_chromo; + + /* new chromo is so bad we can't use it */ + if (chromo->worth > pool->data[pool->size-1].worth) return; + + /* do a binary search to find the index of the new chromo */ + + top = 0; + mid = pool->size/2; + bot = pool->size-1; + index = -1; + + while (index == -1) { + /* these 4 cases find a new location */ + + if (chromo->worth <= pool->data[top].worth) + index = top; + else + if (chromo->worth == pool->data[mid].worth) + index = mid; + else + if (chromo->worth == pool->data[bot].worth) + index = bot; + else + if (bot-top <=1) + index = bot; + + + /* these 2 cases move the search indices since + a new location has not yet been found. */ + + else + if (chromo->worth < pool->data[mid].worth) { + bot = mid; + mid = top + ( (bot-top)/2 ); + } + else { /* (chromo->worth > pool->data[mid].worth) */ + top = mid; + mid = top + ( (bot-top)/2 ); + } + } /* ... while */ + + /* now we have index for chromo */ + + /* move every gene from index on down + one position to make room for chromo */ + + /* copy new gene into pool storage; + always replace worst gene in pool */ + + geqo_copy (&pool->data[pool->size-1], chromo, pool->string_length); + + swap_chromo.string = pool->data[pool->size-1].string; + swap_chromo.worth = pool->data[pool->size-1].worth; + + for (i=index; i<pool->size; i++) { + tmp_chromo.string = pool->data[i].string; + tmp_chromo.worth = pool->data[i].worth; + + pool->data[i].string = swap_chromo.string; + pool->data[i].worth = swap_chromo.worth; + + swap_chromo.string = tmp_chromo.string; + swap_chromo.worth = tmp_chromo.worth; + } +} diff --git a/src/backend/optimizer/geqo/geqo_px.c b/src/backend/optimizer/geqo/geqo_px.c new file mode 100644 index 0000000000..f060561b51 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_px.c @@ -0,0 +1,116 @@ +/*------------------------------------------------------------------------ +* +* geqo_px.c-- +* +* position crossover [PX] routines; +* PX operator according to Syswerda +* (The Genetic Algorithms Handbook, L Davis, ed) +* +* $Id: geqo_px.c,v 1.1 1997/02/19 12:57:37 scrappy Exp $ +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the px algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" +#include "optimizer/geqo_recombination.h" +#include "optimizer/geqo_random.h" + + +/* px-- + * + * position crossover + */ +void +px(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table) +{ + + int num_positions; + int i, pos, tour2_index, offspring_index; + + /* initialize city table */ + for (i=1; i<=num_gene; i++) { + city_table[i].used = 0; + } + + /* choose random positions that will be inherited directly from parent */ + num_positions = geqo_randint (2*num_gene/3, num_gene/3); + + /* choose random position */ + for (i=0; i<num_positions; i++) { + pos = geqo_randint (num_gene - 1, 0); + + offspring[pos] = tour1[pos]; /* transfer cities to child */ + city_table[(int) tour1[pos]].used = 1; /* mark city used */ + } + + tour2_index = 0; + offspring_index = 0; + + + /* px main part */ + + while (offspring_index < num_gene) { + + /* next position in offspring filled */ + if (!city_table[(int) tour1[offspring_index]].used) { + + /* next city in tour1 not used */ + if (!city_table[(int) tour2[tour2_index]].used) { + + /* inherit from tour1 */ + offspring[offspring_index] = tour2[tour2_index]; + + tour2_index++; + offspring_index++; + } + else { /* next city in tour2 has been used */ + tour2_index++; + } + + } + else { /* next position in offspring is filled */ + offspring_index++; + } + + } + + } + diff --git a/src/backend/optimizer/geqo/geqo_recombination.c b/src/backend/optimizer/geqo/geqo_recombination.c new file mode 100644 index 0000000000..df175dcb86 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_recombination.c @@ -0,0 +1,106 @@ +/*------------------------------------------------------------------------ +* +* geqo_recombination.c-- +* misc recombination procedures +* +* $Id: geqo_recombination.c,v 1.1 1997/02/19 12:57:42 scrappy Exp $ +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" +#include "optimizer/geqo_recombination.h" +#include "optimizer/geqo_random.h" + + +/* + * init_tour-- + * + * Randomly generates a legal "traveling salesman" tour + * (i.e. where each point is visited only once.) + * Essentially, this routine fills an array with all possible + * points on the tour and randomly chooses the 'next' city from + * this array. When a city is chosen, the array is shortened + * and the procedure repeated. + * + */ +void +init_tour(Gene *tour, int num_gene) +{ +Gene *tmp; +int remainder; +int next, i; + +tmp = (Gene *) palloc (num_gene*sizeof(Gene)); + +for(i = 0; i < num_gene; i++) { + tmp[i] = (Gene) i+1; /* builds tours "1 - 2 - 3" etc. */ + } + +remainder = num_gene - 1; + +for(i = 0; i < num_gene; i++) { + next = (int) geqo_randint(remainder, 0); /* choose city between 0 and remainder */ + tour[i] = tmp[next]; + tmp[next] = tmp[remainder]; + remainder--; + } + +pfree(tmp); +} + +/* alloc_city_table-- + * + * allocate memory for city table + * + */ +City * +alloc_city_table(int num_gene) +{ + City *city_table; + + /* palloc one extra location so that nodes numbered + 1..n can be indexed directly; 0 will not be used */ + + city_table = (City *) palloc ((num_gene+1)*sizeof(City)); + + return (city_table); + } + +/* free_city_table-- + * + * deallocate memory of city table + * + */ + void + free_city_table(City *city_table) + { + pfree(city_table); + } + diff --git a/src/backend/optimizer/geqo/geqo_selection.c b/src/backend/optimizer/geqo/geqo_selection.c new file mode 100644 index 0000000000..0c6502003b --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_selection.c @@ -0,0 +1,103 @@ +/*------------------------------------------------------------------------- + * + * geqo_selection.c-- + * linear selection scheme for the genetic query optimizer + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_selection.c,v 1.1 1997/02/19 12:57:46 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* this is adopted from D. Whitley's Genitor algorithm */ + +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include <math.h> + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" + +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo_selection.h" +#include "optimizer/geqo_copy.h" +#include "optimizer/geqo_random.h" + +static int linear(int max, double bias); + +/* geqo_selection-- + * + * according to bias described by input parameters, + * second genes are selected from the pool + */ +void +geqo_selection (Chromosome *momma, Chromosome *daddy, Pool *pool, double bias) +{ + int first, second; + + first = (int) linear(pool->size, bias); + second = (int) linear(pool->size, bias); + + if (pool->size > 1) { + while(first==second) + second = (int) linear(pool->size, bias); + } + + geqo_copy (momma, &pool->data[first], pool->string_length); + geqo_copy (daddy, &pool->data[second], pool->string_length); +} + +/* linear-- + * generates random integer between 0 and input max number + * using input linear bias + * + * probability distribution function is: f(x) = bias - 2(bias - 1)x + * bias = (prob of first rule) / (prob of middle rule) + * + */ + +static int +linear(int pool_size, double bias) /* bias is y-intercept of linear distribution */ +{ + double index; /* index between 0 and pop_size */ + double max = (double) pool_size; + + index = + max*( bias - sqrt ( (bias*bias) - 4.0*(bias-1.0)*geqo_rand() ) ) + / 2.0 / (bias-1.0); + + return((int) index); +} + diff --git a/src/backend/optimizer/geqo/minspantree.c b/src/backend/optimizer/geqo/minspantree.c new file mode 100644 index 0000000000..4e4c2ad11b --- /dev/null +++ b/src/backend/optimizer/geqo/minspantree.c @@ -0,0 +1,198 @@ +/*------------------------------------------------------------------------ +* +* minspantree.c-- +* routine to sort a join graph which is including cycles +* +* Copyright (c) 1994, Regents of the University of California +* +* +* IDENTIFICATION +* $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/Attic/minspantree.c,v 1.1 1997/02/19 12:57:50 scrappy Exp $ +* +*------------------------------------------------------------------------- +*/ + +#include <values.h> + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "utils/palloc.h" + +#include "optimizer/cost.h" + +/* + include "optimizer/geqo/tsp.h" + */ + +#include "optimizer/geqo/geqo_gene.h" +#include "optimizer/geqo/geqo.h" + +/* + * minspantree-- + * The function minspantree computes the minimum spanning tree + * for a given number of nodes and a given distance function. + * For each pair of nodes found to be connected, a given + * function is called. Nodes are denoted by the integer numbers + * 1 .. number_of_joins, where number_of_joins is the number of nodes. +*/ + +void +minspantree(Query *root, List *join_rels, Rel *garel) +{ + int number_of_rels = length(root->base_relation_list_); + int number_of_joins = length(join_rels); + int *connectto; + /* connectto[i] = 0, if node i is already connected */ + /* to the tree, otherwise connectto[i] is the node */ + /* nearest to i, which is already connected. */ + + Cost *disttoconnect; /* disttoconnect[i]: distance between i and connectto[i] */ + + Cost dist, /* temporary */ + mindist; /* minimal distance between connected and unconnected node */ + + Cost mstlength = 0.0; /* the total length of the minimum spanning tree */ + + int count; + int n, /* newly attached node */ + nextn, /* next node to be attached */ + tempn; + + int i, id1, id2; + List *r = NIL; + Rel *joinrel = NULL; + Rel **tmprel_array; + + + /* allocate memory for matrix tmprel_array[x][y] */ + tmprel_array = (Rel **) palloc((number_of_rels+1)*sizeof(Rel *)); + for (i=0; i<=number_of_rels; i++) + (tmprel_array[i] = (Rel *) palloc ((number_of_rels+1)*sizeof(Rel))); + + /* read relations of join-relations into tmprel_array */ + + foreach(r, join_rels) { + joinrel = (Rel *)lfirst(r); + id1 = (int)lfirst(joinrel->relids); + id2 = (int)lsecond(joinrel->relids); + + if (id1 > id2) { + tmprel_array[id2][id1] = *(Rel *)joinrel; + } + else { + tmprel_array[id1][id2] = *(Rel *)joinrel; /* ever reached? */ + } + } + + /* Trivial special cases handled first */ + /* garel is global in "tsp.h" */ + + if (number_of_joins <= 2) + { + i=1; + foreach(r, join_rels) { + garel[i] = *(Rel *)lfirst(r); + i++; + } + } + + + else if (number_of_joins == 3) + { + Rel *rel12 = (Rel *) &tmprel_array[1][2]; + Rel *rel13 = (Rel *) &tmprel_array[1][3]; + Rel *rel23 = (Rel *) &tmprel_array[2][3]; + if (rel12->cheapestpath->path_cost > rel13->cheapestpath->path_cost) + { + garel[1] = tmprel_array[1][3]; + if (rel12->cheapestpath->path_cost > rel23->cheapestpath->path_cost) + { + garel[2] = tmprel_array[2][3]; + } + else + { + garel[2] = tmprel_array[1][2]; + } + } + else + { + garel[1] = tmprel_array[1][2]; + if (rel13->cheapestpath->path_cost > rel23->cheapestpath->path_cost) + { + garel[2] = tmprel_array[2][3]; + } + else + { + garel[2] = tmprel_array[1][3]; + } + } + } + + + /* now the general case */ + else + { + connectto = (int *) palloc((number_of_rels+1)*sizeof(int)); + disttoconnect = (Cost *) palloc((number_of_rels+1)*sizeof(Cost)); + + nextn = 2; + for (tempn = 2; tempn <= number_of_rels; tempn++ ) + { + connectto[tempn] = 1; + disttoconnect[tempn] = (Cost) MAXFLOAT; + } + + joinrel = NULL; + n = 1; + i = 1; + for (count = 2; count <= number_of_rels; count++ ) + { + connectto[n] = 0; + mindist = (Cost) MAXFLOAT; + for (tempn = 2; tempn <= number_of_rels; tempn++ ) + { + if (connectto[tempn] != 0) + { + if (n > tempn) { + joinrel = (Rel *) &tmprel_array[tempn][n]; + } + else { + joinrel = (Rel *) &tmprel_array[n][tempn]; + } + dist = joinrel->cheapestpath->path_cost; + + if (dist < disttoconnect[tempn]) + { + disttoconnect[tempn] = dist; + connectto[tempn] = n; + } + if (disttoconnect[tempn] < mindist) + { + mindist = disttoconnect[tempn]; + nextn = tempn; + } + } + } + n = nextn; + if (n > connectto[n]) { + garel[i] = tmprel_array[connectto[n]][n]; + } + else { + garel[i] = tmprel_array[n][connectto[n]]; + } + i++; + } + + pfree(connectto); + pfree(disttoconnect); + + } + + for (i=0; i<=number_of_rels; i++) pfree(tmprel_array[i]); + pfree(tmprel_array); +} + diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index dd0083edfb..920820302c 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.4 1996/11/10 03:00:55 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.5 1997/02/19 12:58:01 scrappy Exp $ * *------------------------------------------------------------------------- */ @@ -30,6 +30,9 @@ #include "commands/creatinh.h" +#include "optimizer/geqo_gene.h" +#include "optimizer/geqo.h" + static void find_rel_paths(Query *root, List *rels); static List *find_join_paths(Query *root, List *outer_rels, int levels_left); @@ -158,6 +161,19 @@ find_join_paths(Query *root, List *outer_rels, int levels_left) List *new_rels; Rel *rel; + /******************************************* + * genetic query optimizer entry point * + * <utesch@aut.tu-freiberg.de> * + *******************************************/ + +#ifdef GEQO + return lcons(geqo(root), NIL); /* returns *one* Rel, so lcons it */ +#endif + + /******************************************* + * rest will be deprecated in case of GEQO * + *******************************************/ + /* * Determine all possible pairs of relations to be joined at this level. * Determine paths for joining these relation pairs and modify 'new-rels' diff --git a/src/include/config.h.in b/src/include/config.h.in index 2e1be342b1..495344b88d 100644 --- a/src/include/config.h.in +++ b/src/include/config.h.in @@ -256,6 +256,18 @@ /* #define OLD_REWRITE */ /* #define NOTYET */ +/* Genetic Query Optimization (GEQO): + * + * The GEQO module in PostgreSQL is intended for the solution of the + * query optimization problem by means of a Genetic Algorithm (GA). + * It allows the handling of large JOIN queries through non-exhaustive + * search. + * For further information see README.GEQO <utesch@aut.tu-freiberg.de>. + */ +#define GEQO /* backend/optimizer/path/allpaths.c */ + + + /* Undocumented "features"? */ #define FASTBUILD /* access/nbtree/nbtsort.c */ diff --git a/src/include/optimizer/geqo.h b/src/include/optimizer/geqo.h new file mode 100644 index 0000000000..b25d443be0 --- /dev/null +++ b/src/include/optimizer/geqo.h @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * geqo.h-- + * prototypes for various files in optimizer/geqo + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo.h,v 1.1 1997/02/19 12:58:28 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#ifndef GEQO_H +#define GEQO_H + + +/* GEQO debug flag */ +/* + #define GEQO_DEBUG +*/ + +/* recombination mechanism */ +/* + #define ERX + #define PMX + #define CX + #define PX + #define OX1 + #define OX2 + */ +#define ERX + + +/* genetic algorithm parameters */ + +#define GEQO_FILE "pg_geqo" /* Name of the ga config file */ + +#define MIN_POOL 128 /* minimum number of individuals */ +#define MAX_POOL 1024 /* maximum number of individuals */ + +#define LOW_EFFORT 1 /* optimization effort values */ +#define MEDIUM_EFFORT 40 /* are multipliers for computed */ +#define HIGH_EFFORT 80 /* number of generations */ + +#define SELECTION_BIAS 2.0 /* selective pressure within population */ + /* should be 1.5 <= SELECTION_BIAS <= 2.0 */ + + int PoolSize; + int Generations; + + long RandomSeed; /* defaults to (long) time(NULL) in geqo_params.c */ + double SelectionBias; + +/* logarithmic base for rel->size decrease in case of long + queries that cause an integer overflow; used in geqo_eval.c */ + +#define GEQO_LOG_BASE 1.5 /* should be 1.0 < GEQO_LOG_BASE <= 2.0 */ + /* ^^^ */ + +/* geqo prototypes */ +extern Rel *geqo(Query *root); + +extern void geqo_params(int string_length); + +extern Cost geqo_eval (Query *root, Gene *tour, int num_gene); +double geqo_log(double x, double b); +extern Rel *gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, Rel *outer_rel); + + +#endif /* GEQO_H */ diff --git a/src/include/optimizer/geqo_copy.h b/src/include/optimizer/geqo_copy.h new file mode 100644 index 0000000000..fcc6179168 --- /dev/null +++ b/src/include/optimizer/geqo_copy.h @@ -0,0 +1,27 @@ +/*------------------------------------------------------------------------- + * + * geqo_copy.h-- + * prototypes for copy functions in optimizer/geqo + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_copy.h,v 1.1 1997/02/19 12:58:32 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#ifndef GEQO_COPY_H +#define GEQO_COPY_H + + +extern void geqo_copy (Chromosome *chromo1, Chromosome *chromo2, int string_length); + +#endif /* GEQO_COPY_H */ diff --git a/src/include/optimizer/geqo_gene.h b/src/include/optimizer/geqo_gene.h new file mode 100644 index 0000000000..3d38bd26f8 --- /dev/null +++ b/src/include/optimizer/geqo_gene.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------- + * + * geqo_gene.h-- + * genome representation in optimizer/geqo + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_gene.h,v 1.1 1997/02/19 12:58:37 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + + +#ifndef GEQO_GENE_H +#define GEQO_GENE_H + + +/* we presume that int instead of Relid + is o.k. for Gene; so don't change it! */ +typedef +int Gene; + +typedef struct Chromosome { + Gene *string; + Cost worth; +} Chromosome; + +typedef struct Pool { + Chromosome *data; + int size; + int string_length; +} Pool; + +#endif /* GEQO_GENE_H */ diff --git a/src/include/optimizer/geqo_misc.h b/src/include/optimizer/geqo_misc.h new file mode 100644 index 0000000000..904b85123e --- /dev/null +++ b/src/include/optimizer/geqo_misc.h @@ -0,0 +1,34 @@ +/*------------------------------------------------------------------------- + * + * geqo_misc.h-- + * prototypes for printout routines in optimizer/geqo + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_misc.h,v 1.1 1997/02/19 12:58:41 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#ifndef GEQO_MISC_H +#define GEQO_MISC_H + +#include <stdio.h> + +extern void print_pool (FILE *fp, Pool *pool, int start, int stop); +extern void print_gen(FILE *fp, Pool *pool, int generation); +extern void print_edge_table (FILE *fp, Edge *edge_table, int num_gene); + +extern void geqo_print_rel(Query *root, Rel *rel); +extern void geqo_print_path(Query *root, Path *path, int indent); +extern void geqo_print_joinclauses(Query *root, List *clauses); + +#endif /* GEQO_MISC_H */ diff --git a/src/include/optimizer/geqo_mutation.h b/src/include/optimizer/geqo_mutation.h new file mode 100644 index 0000000000..91e4b4b538 --- /dev/null +++ b/src/include/optimizer/geqo_mutation.h @@ -0,0 +1,27 @@ +/*------------------------------------------------------------------------- + * + * geqo_mutation.h-- + * prototypes for mutation functions in optimizer/geqo + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_mutation.h,v 1.1 1997/02/19 12:58:49 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#ifndef GEQO_MUTATION_H +#define GEQO_MUTATION_H + + +extern void geqo_mutation (Gene *tour, int num_gene); + +#endif /* GEQO_MUTATION_H */ diff --git a/src/include/optimizer/geqo_paths.h b/src/include/optimizer/geqo_paths.h new file mode 100644 index 0000000000..c4b890a3db --- /dev/null +++ b/src/include/optimizer/geqo_paths.h @@ -0,0 +1,28 @@ +/*------------------------------------------------------------------------- + * + * geqo_paths.h-- + * prototypes for various subroutines in geqo_path.c + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_paths.h,v 1.1 1997/02/19 12:58:55 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#ifndef GEQO_PATHS_H +#define GEQO_PATHS_H + + +extern List *geqo_prune_rels(List *rel_list); +extern void geqo_rel_paths(Rel *rel); + +#endif /* GEQO_PATHS_H */ diff --git a/src/include/optimizer/geqo_pool.h b/src/include/optimizer/geqo_pool.h new file mode 100644 index 0000000000..ba7cb52972 --- /dev/null +++ b/src/include/optimizer/geqo_pool.h @@ -0,0 +1,37 @@ +/*------------------------------------------------------------------------- + * + * geqo_pool.h-- + * pool representation in optimizer/geqo + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_pool.h,v 1.1 1997/02/19 12:58:59 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + + +#ifndef GEQO_POOL_H +#define GEQO_POOL_H + + +extern Pool *alloc_pool(int pool_size, int string_length); +extern void free_pool(Pool *pool); + +extern void random_init_pool(Query *root, Pool *pool, int strt, int stop); +extern Chromosome *alloc_chromo(int string_length); +extern void free_chromo(Chromosome *chromo); + +extern void spread_chromo(Chromosome *chromo, Pool *pool); + +extern void sort_pool (Pool *pool); + +#endif /* GEQO_POOL_H */ diff --git a/src/include/optimizer/geqo_random.h b/src/include/optimizer/geqo_random.h new file mode 100644 index 0000000000..9bb3affe72 --- /dev/null +++ b/src/include/optimizer/geqo_random.h @@ -0,0 +1,37 @@ +/*------------------------------------------------------------------------- + * + * geqo_random.h-- + * random number generator + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_random.h,v 1.1 1997/02/19 12:59:02 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ + +#ifndef GEQO_RANDOM_H +#define GEQO_RANDOM_H + +#include <math.h> + +#define MASK 2147483647 + +#define geqo_rand() ((double)random()/MASK) + +/* geqo_randint returns integer value + between lower and upper inclusive */ + +#define geqo_randint(upper,lower) ( (int) floor( geqo_rand()*((upper-lower)+0.999999) ) + lower ) + +#endif /* GEQO_RANDOM_H */ diff --git a/src/include/optimizer/geqo_recombination.h b/src/include/optimizer/geqo_recombination.h new file mode 100644 index 0000000000..baa7fc24fa --- /dev/null +++ b/src/include/optimizer/geqo_recombination.h @@ -0,0 +1,77 @@ +/*------------------------------------------------------------------------- + * + * geqo_recombination.h-- + * prototypes for recombination in the genetic query optimizer + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_recombination.h,v 1.1 1997/02/19 12:59:04 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ + +#ifndef GEQO_RECOMBINATION_H +#define GEQO_RECOMBINATION_H + + +extern void init_tour(Gene *tour, int num_gene); + + +/* edge recombination crossover [ERX] */ + +typedef struct Edge { + Gene edge_list[4]; /* list of edges */ + int total_edges; + int unused_edges; +} Edge; + +extern Edge *alloc_edge_table(int num_gene); +extern void free_edge_table(Edge *edge_table); + +extern float gimme_edge_table (Gene *tour1, Gene *tour2, int num_gene, Edge *edge_table); + +extern int gimme_tour(Edge *edge_table, Gene *new_gene, int num_gene); + + +/* partially matched crossover [PMX] */ + +#define DAD 1 /* indicator for gene from dad */ +#define MOM 0 /* indicator for gene from mom */ + +extern void pmx(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene); + + +typedef struct City { + int tour2_position; + int tour1_position; + int used; + int select_list; +} City; + +extern City *alloc_city_table(int num_gene); +extern void free_city_table(City *city_table); + +/* cycle crossover [CX] */ +extern int cx(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table); + +/* position crossover [PX] */ +extern void px(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table); + +/* order crossover [OX1] according to Davis */ +extern void ox1(Gene *mom, Gene *dad, Gene *offspring, int num_gene, City *city_table); + +/* order crossover [OX2] according to Syswerda */ +extern void ox2(Gene *mom, Gene *dad, Gene *offspring, int num_gene, City *city_table); + + +#endif /* GEQO_RECOMBINATION_H */ diff --git a/src/include/optimizer/geqo_selection.h b/src/include/optimizer/geqo_selection.h new file mode 100644 index 0000000000..8f3c22170f --- /dev/null +++ b/src/include/optimizer/geqo_selection.h @@ -0,0 +1,28 @@ +/*------------------------------------------------------------------------- + * + * geqo_selection.h-- + * prototypes for selection routines in optimizer/geqo + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: geqo_selection.h,v 1.1 1997/02/19 12:59:07 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + + +#ifndef GEQO_SELECTION_H +#define GEQO_SELECTION_H + + +extern void geqo_selection (Chromosome *momma, Chromosome *daddy, Pool *pool, double bias); + +#endif /* GEQO_SELECTION_H */ |