diff options
author | unknown <serg@sergbook.mysql.com> | 2003-03-29 20:40:03 +0300 |
---|---|---|
committer | unknown <serg@sergbook.mysql.com> | 2003-03-29 20:40:03 +0300 |
commit | d2117cbe2fd4b454a03e5588ba731be1fc0a138a (patch) | |
tree | d630bf005775ec8f06785632bfda2f75fa29bc16 /myisam/ftbench | |
parent | ba5065b80e1cdbbd59b1ac09c5840d903efbb6f1 (diff) | |
download | mariadb-git-d2117cbe2fd4b454a03e5588ba731be1fc0a138a.tar.gz |
ft benchmark suite: initial checkin
BitKeeper/etc/ignore:
Added myisam/ftbench/data myisam/ftbench/t to the ignore list
Diffstat (limited to 'myisam/ftbench')
-rwxr-xr-x | myisam/ftbench/Ecompare.pl | 96 | ||||
-rwxr-xr-x | myisam/ftbench/Ereport.pl | 53 | ||||
-rwxr-xr-x | myisam/ftbench/ft-test-run.sh | 81 |
3 files changed, 230 insertions, 0 deletions
diff --git a/myisam/ftbench/Ecompare.pl b/myisam/ftbench/Ecompare.pl new file mode 100755 index 00000000000..4bcee1fb588 --- /dev/null +++ b/myisam/ftbench/Ecompare.pl @@ -0,0 +1,96 @@ +#!/usr/bin/perl + +# compares out-files (as created by Ereport.pl) from dir1/*.out and dir2/*.out +# for each effectiveness column computes the probability of the hypothesis +# "Both files have the same effectiveness" + +# sign test is used to verify that test results are statistically +# significant to support the hypothesis. Function is computed on the fly. + +# basic formula is \sum_{r=0}^R C_N^r 2^{-N} +# As N can be big, we'll work with logarithms +$log2=log(2); +sub probab { + my $N=shift, $R=shift; + + my $r, $sum=0; + + for $r (0..$R) { + $sum+=exp(logfac($N)-logfac($r)-logfac($N-$r)-$N*$log2); + } + return $sum; +} + +# log(N!) +# for N<20 exact value from the table (below) is taken +# otherwise, Stirling approximation for N! is used +sub logfac { + my $n=shift; die "n=$n<0" if $n<0; + return $logfactab[$n] if $n<=$#logfactab; + return $n*log($n)-$n+log(2*3.14159265358*$n)/2; +} +@logfactab=( +0, 0, 0.693147180559945, 1.79175946922805, 3.17805383034795, +4.78749174278205, 6.57925121201010, 8.52516136106541, 10.6046029027453, +12.8018274800815, 15.1044125730755, 17.5023078458739, 19.9872144956619, +22.5521638531234, 25.1912211827387, 27.8992713838409, 30.6718601060807, +33.5050734501369, 36.3954452080331, 39.3398841871995, 42.3356164607535, +); + +############################# main () ############################### +#$p=shift; $m=shift; $p-=$m; +#if($p>$m) { +# print "1 > 2 [+$p-$m]: ", probab($p+$m, $m), "\n"; +#} elsif($p<$m) { +# print "1 < 2 [+$p-$m]: ", probab($p+$m, $p), "\n"; +#} else { +# print "1 = 2 [+$p-$m]: ", probab($p+$m, $m), "\n"; +#} +#exit; + +die "Use: $0 dir1 dir2\n" unless @ARGV==2 && + -d ($dir1=shift) && -d ($dir2=shift); +$_=`cd $dir1; echo *.out`; +s/\.out\b//g; +$total=""; + +for $file (split) { + open(OUT1,$out1="$dir1/$file.out") || die "Cannot open $out1: $!"; + open(OUT2,$out2="$dir2/$file.out") || die "Cannot open $out2: $!"; + + @p=@m=(); + while(!eof(OUT1) || !eof(OUT2)) { + $_=<OUT1>; @l1=split; shift @l1; + $_=<OUT2>; @l2=split; shift @l2; + + die "Number of columns differ in line $.\n" unless $#l1 == $#l2; + + for (0..$#l1) { + $p[$_]+= $l1[$_] > $l2[$_]; + $m[$_]+= $l1[$_] < $l2[$_]; + } + } + + for (0..$#l1) { + $pp[$_]+=$p[$_]; $mm[$_]+=$m[$_]; + $total.=rep($file, ($#l1 ? $_ : undef), $p[$_], $m[$_]); + } + close OUT1; + close OUT2; +} + +for (0..$#l1) { + rep($total, ($#l1 ? $_ : undef), $pp[$_], $mm[$_]); +} + +sub rep { + my ($test, $n, $p, $m, $c, $r)=@_; + + if ($p>$m) { $c=">"; $r="+"; } + elsif($p<$m) { $c="<"; $r="-"; } + else { $c="="; $r="="; } + $n=" $n: " if defined $n; + printf "%-8s $n $dir1 $c $dir2 [+%03d-%03d]: %16.15f\n", + $test, $p, $m, probab($p+$m, ($p>=$m ? $m : $p)); + $r; +} diff --git a/myisam/ftbench/Ereport.pl b/myisam/ftbench/Ereport.pl new file mode 100755 index 00000000000..aac06503849 --- /dev/null +++ b/myisam/ftbench/Ereport.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl + +die "Use: $0 eval_output qrels_file\n" unless @ARGV==2; + +open(EOUT,$eout=shift) || die "Cannot open $eout: $!"; +open(RELJ,$relj=shift) || die "Cannot open $relj: $!"; + +$_=<EOUT>; +die "$eout must start with a number!\n "unless /^[1-9][0-9]*\n/; +$ndocs=$_+0; + +$qid=0; +$relj_str=<RELJ>; +$eout_str=<EOUT>; + +while(!eof(RELJ) || !eof(EOUT)) { + ++$qid; + %dq=(); + $A=$B=$AB=0; + $Ravg=$Pavg=0; + + while($relj_str =~ /^0*$qid\s+(\d+)/) { + ++$A; + $dq{$1+0}=1; + last unless $relj_str=<RELJ>; + } + # Favg measure = 1/(a/Pavg+(1-a)/Ravg) +sub Favg { my $a=shift; $Pavg*$Ravg ? 1/($a/$Pavg+(1-$a)/$Ravg) : 0; } + # F0 : a=0 -- ignore precision + # F5 : a=0.5 + # F1 : a=1 -- ignore recall + while($eout_str =~ /^$qid\s+(\d+)\s+(\d+\.\d+)/) { + $B++; + $AB++ if $dq{$1+0}; + $Ravg+=$AB; + $Pavg+=$AB/$B; + last unless $eout_str=<EOUT>; + } + next unless $A; + + $Ravg/=$B*$A if $B; + $Pavg/=$B if $B; + + write; +format= +@##### @#.####### +$qid, Favg(0.5) +. +} + +exit 0; + + diff --git a/myisam/ftbench/ft-test-run.sh b/myisam/ftbench/ft-test-run.sh new file mode 100755 index 00000000000..4c81cac6d4f --- /dev/null +++ b/myisam/ftbench/ft-test-run.sh @@ -0,0 +1,81 @@ +#!/bin/sh -x + +if [ ! -x ./ft-test-run.sh ] ; then + echo "Usage: ./ft-test-run.sh" + exit 1 +fi + +BASE=`pwd` +DATA=$BASE/var +ROOT=`cd ../..; pwd` +MYSQLD=$ROOT/sql/mysqld +MYSQL=$ROOT/client/mysql +MYSQLADMIN=$ROOT/client/mysqladmin +SOCK=$DATA/mysql.sock +PID=$DATA/mysql.pid +H=../ftdefs.h +OPTS="--no-defaults --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets" + +# --ft_min_word_len=# +# --ft_max_word_len=# +# --ft_max_word_len_for_sort=# +# --ft_stopword_file=name +# --key_buffer_size=# + +stop_myslqd() +{ + [ -S $SOCK ] && $MYSQLADMIN $OPTS shutdown + [ -f $PID ] && kill `cat $PID` && sleep 15 && [ -f $PID ] && kill -9 `cat $PID` +} + +if [ ! -d t/BEST ] ; then + echo "No ./t/BEST directory! Aborting..." + exit 1 +fi +rm -f t/BEST/report.txt +if [ -w $H ] ; then + echo "$H is writeable! Aborting..." + exit 1 +fi + +for batch in t/BEST t/* ; do + A=`ls $batch/*.out` + [ ! -d $batch -o -n "$A" ] && continue + rm -f $H + ln -s $BASE/$batch/ftdefs.h $H + touch $H + OPTS="--defaults-file=$BASE/$batch/my.cnf --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets" + stop_myslqd + rm $MYSQLD + (cd $ROOT; gmake) + + for prog in $MYSQLD $MYSQL $MYSQLADMIN ; do + if [ ! -x $prog ] ; then + echo "No $prog" + exit 1 + fi + done + + rm -rf var 2>&1 >/dev/null + mkdir var + mkdir var/test + + $MYSQLD $OPTS --basedir=$BASE --skip-bdb --pid-file=$PID \ + --language=$ROOT/sql/share/english \ + --skip-grant-tables --skip-innodb \ + --skip-networking --tmpdir=$DATA & + + $MYSQLADMIN $OPTS --connect_timeout=60 ping + if [ $? != 0 ] ; then + echo "$MYSQLD refused to start" + exit 1 + fi + for test in `cd data; echo *.test|sed "s/\.test\>//g"` ; do + $MYSQL $OPTS --skip-column-names test <data/$test.test >var/$test.eval + ./Ereport.pl var/$test.eval data/$test.relj > $batch/$test.out || exit + done + stop_myslqd + rm -f $H + [ $batch -ef t/BEST ] || ./Ecompare.pl t/BEST $batch >> t/BEST/report.txt +done + |