diff options
author | Monty <xiphmont@xiph.org> | 2000-04-26 07:10:16 +0000 |
---|---|---|
committer | Monty <xiphmont@xiph.org> | 2000-04-26 07:10:16 +0000 |
commit | de5ef5ea3d8ca2fffac1d247520aea82c7bf11f5 (patch) | |
tree | 116c839881d24235e1cafed6409a6afed123345b | |
parent | 097027d02df14cab9f0ffe712995aa7f7f16f392 (diff) | |
download | libvorbis-git-de5ef5ea3d8ca2fffac1d247520aea82c7bf11f5.tar.gz |
Incremental update. Continuing work on lattice codebooks.
svn path=/branches/new_acoustics_pending_merge_20000328/vorbis/; revision=345
-rw-r--r-- | include/vorbis/codebook.h | 5 | ||||
-rw-r--r-- | lib/sharedbook.c | 40 | ||||
-rw-r--r-- | vq/Makefile.in | 10 | ||||
-rw-r--r-- | vq/bookutil.c | 31 | ||||
-rw-r--r-- | vq/latticebuild.c | 56 | ||||
-rw-r--r-- | vq/latticepare.c | 392 | ||||
-rw-r--r-- | vq/metrics.c | 338 | ||||
-rw-r--r-- | vq/run.c | 7 |
8 files changed, 631 insertions, 248 deletions
diff --git a/include/vorbis/codebook.h b/include/vorbis/codebook.h index ee4b013e..8e45c635 100644 --- a/include/vorbis/codebook.h +++ b/include/vorbis/codebook.h @@ -12,7 +12,7 @@ ******************************************************************** function: codebook types - last mod: $Id: codebook.h,v 1.4.4.5 2000/04/21 16:35:38 xiphmont Exp $ + last mod: $Id: codebook.h,v 1.4.4.6 2000/04/26 07:10:15 xiphmont Exp $ ********************************************************************/ @@ -81,7 +81,8 @@ typedef struct encode_aux_nearestmatch{ typedef struct encode_aux_threshmatch{ double *quantthresh; long *quantmap; - int quantvals; + int quantvals; + int threshvals; } encode_aux_threshmatch; typedef struct decode_aux{ diff --git a/lib/sharedbook.c b/lib/sharedbook.c index 0c144bcc..bc3987d4 100644 --- a/lib/sharedbook.c +++ b/lib/sharedbook.c @@ -12,7 +12,7 @@ ******************************************************************** function: basic shared codebook operations - last mod: $Id: sharedbook.c,v 1.1.2.5 2000/04/21 16:35:40 xiphmont Exp $ + last mod: $Id: sharedbook.c,v 1.1.2.6 2000/04/26 07:10:15 xiphmont Exp $ ********************************************************************/ @@ -323,6 +323,27 @@ int _best(codebook *book, double *a, int step){ int dim=book->dim; int ptr=0,k,o; + /* we assume for now that a thresh tree is the only other possibility */ + if(tt){ + int index=0; + /* find the quant val of each scalar */ + for(k=0,o=step*(dim-1);k<dim;k++,o-=step){ + int i; + /* linear search the quant list for now; it's small and although + with > 8 entries, it would be faster to bisect, this would be + a misplaced optimization for now */ + for(i=0;i<tt->threshvals-1;i++) + if(a[o]<tt->quantthresh[i])break; + + index=(index*tt->quantvals)+tt->quantmap[i]; + } + /* regular lattices are easy :-) */ + if(book->c->lengthlist[index]>0) /* is this unused? If so, we'll + use a decision tree after all + and fall through*/ + return(index); + } + if(nt){ /* optimized using the decision tree */ while(1){ @@ -342,23 +363,6 @@ int _best(codebook *book, double *a, int step){ return(-ptr); } - /* we assume for now that a thresh tree is the only other possibility */ - if(tt){ - int index=0; - /* find the quant val of each scalar */ - for(k=0,o=step*(dim-1);k<dim;k++,o-=step){ - int i; - /* linear search the quant list for now; it's small and although - with > 8 entries, it would be faster to bisect, this would be - a misplaced optimization for now */ - for(i=0;i<tt->quantvals-1;i++) - if(a[o]<tt->quantthresh[i])break; - - index=(index*tt->quantvals)+tt->quantmap[i]; - } - /* regular lattices are easy :-) */ - return(index); - } return(-1); } diff --git a/vq/Makefile.in b/vq/Makefile.in index 34a20603..02a1199e 100644 --- a/vq/Makefile.in +++ b/vq/Makefile.in @@ -1,4 +1,4 @@ -# $Id: Makefile.in,v 1.10.4.2 2000/04/21 16:35:40 xiphmont Exp $ +# $Id: Makefile.in,v 1.10.4.3 2000/04/26 07:10:15 xiphmont Exp $ ############################################################################### # # @@ -29,7 +29,8 @@ HFILES = ../include/vorbis/codebook.h vqgen.h vqext.h bookutil.h OFILES = vqgen.o vqsplit.o bookutil.o ../lib/sharedbook.o ALLOFILES = $(OFILES) lspdata.o genericdata.o train.o build.o run.o\ - cascade.o partition.o metrics.o residuedata.o latticebuild.o + cascade.o partition.o metrics.o residuedata.o latticebuild.o\ + latticepare.o all: $(MAKE) target CFLAGS="$(OPT)" @@ -40,7 +41,7 @@ debug: profile: $(MAKE) target CFLAGS="$(PROFILE)" -target: lspvqtrain genericvqtrain residuevqtrain vqbuild vqcascade vqmetrics latticebuild +target: lspvqtrain genericvqtrain residuevqtrain vqbuild vqcascade vqmetrics latticebuild latticepare lspvqtrain: $(OFILES) lspdata.o train.o $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS) @@ -63,6 +64,9 @@ vqmetrics: $(OFILES) run.o metrics.o latticebuild: $(OFILES) latticebuild.o $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS) +latticepare: $(OFILES) latticepare.o + $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS) + $(ALLOFILES): $(HFILES) diff --git a/vq/bookutil.c b/vq/bookutil.c index a1874859..e479f7fe 100644 --- a/vq/bookutil.c +++ b/vq/bookutil.c @@ -12,7 +12,7 @@ ******************************************************************** function: utility functions for loading .vqh and .vqd files - last mod: $Id: bookutil.c,v 1.12.4.3 2000/04/21 16:35:40 xiphmont Exp $ + last mod: $Id: bookutil.c,v 1.12.4.4 2000/04/26 07:10:15 xiphmont Exp $ ********************************************************************/ @@ -187,9 +187,6 @@ codebook *codebook_load(char *filename){ char *line; long i; - c->nearest_tree=a; - c->thresh_tree=t; - if(in==NULL){ fprintf(stderr,"Couldn't open codebook %s\n",filename); exit(1); @@ -217,7 +214,7 @@ codebook *codebook_load(char *filename){ /* find the auxiliary encode struct[s] (if any) */ if(find_seek_to(in,"static encode_aux_nearestmatch _vq_aux_")){ /* how big? */ - a=calloc(1,sizeof(encode_aux_nearestmatch)); + c->nearest_tree=a=calloc(1,sizeof(encode_aux_nearestmatch)); line=get_line(in); line=get_line(in); line=get_line(in); @@ -272,29 +269,35 @@ codebook *codebook_load(char *filename){ if(find_seek_to(in,"static encode_aux_threshmatch _vq_aux_")){ /* how big? */ - t=calloc(1,sizeof(encode_aux_threshmatch)); + c->thresh_tree=t=calloc(1,sizeof(encode_aux_threshmatch)); + line=get_line(in); line=get_line(in); line=get_line(in); if(sscanf(line,"%d",&(t->quantvals))!=1){ - fprintf(stderr,"2: syntax in %s in line:\t %s",filename,line); + fprintf(stderr,"3: syntax in %s in line:\t %s",filename,line); + exit(1); + } + line=get_line(in); + if(sscanf(line,"%d",&(t->threshvals))!=1){ + fprintf(stderr,"4: syntax in %s in line:\t %s",filename,line); exit(1); } /* load quantthresh */ - find_seek_to(in,"static long _vq_quantthresh_"); + find_seek_to(in,"static double _vq_quantthresh_"); reset_next_value(); - t->quantthresh=malloc(sizeof(long)*t->quantvals); - for(i=0;i<t->quantvals;i++) + t->quantthresh=malloc(sizeof(double)*t->threshvals); + for(i=0;i<t->threshvals-1;i++) if(get_next_value(in,t->quantthresh+i)){ - fprintf(stderr,"out of data while reading codebook %s\n",filename); + fprintf(stderr,"out of data 1 while reading codebook %s\n",filename); exit(1); } /* load quantmap */ find_seek_to(in,"static long _vq_quantmap_"); reset_next_value(); - t->quantmap=malloc(sizeof(long)*t->quantvals); - for(i=0;i<t->quantvals;i++) + t->quantmap=malloc(sizeof(long)*t->threshvals); + for(i=0;i<t->threshvals;i++) if(get_next_ivalue(in,t->quantmap+i)){ - fprintf(stderr,"out of data while reading codebook %s\n",filename); + fprintf(stderr,"out of data 2 while reading codebook %s\n",filename); exit(1); } } diff --git a/vq/latticebuild.c b/vq/latticebuild.c index 65aca502..9b0a22fa 100644 --- a/vq/latticebuild.c +++ b/vq/latticebuild.c @@ -12,7 +12,7 @@ ******************************************************************** function: utility main for building codebooks from lattice descriptions - last mod: $Id: latticebuild.c,v 1.1.2.1 2000/04/21 16:35:40 xiphmont Exp $ + last mod: $Id: latticebuild.c,v 1.1.2.2 2000/04/26 07:10:16 xiphmont Exp $ ********************************************************************/ @@ -31,12 +31,13 @@ command line: vqlattice description.vql datafile.vqd - the lattice description file contains four lines: + the lattice description file contains five lines: <n> <dim> <multiplicitavep> <value_0> <value_1> <value_2> ... <value_n-1> - <thresh_0> <thresh_1> <thresh_2> ... <thresh_n-2> - <map_0> <map_1> <map_2> ... <map_n-1> + <m> + <thresh_0> <thresh_1> <thresh_2> ... <thresh_m-2> + <map_0> <map_1> <map_2> ... <map_m-1> vqlattice sends residual data (for the next stage) to stdout, and produces description.vqh */ @@ -48,7 +49,7 @@ int main(int argc,char *argv[]){ double *quantlist; long *hits; - int entries=-1,dim=-1,quantvals=-1,addmul=-1; + int entries=-1,dim=-1,quantvals=-1,addmul=-1,threshvals=-1; FILE *out=NULL; FILE *in=NULL; char *line,*name; @@ -105,9 +106,6 @@ int main(int argc,char *argv[]){ c.maptype=1; c.q_sequencep=0; c.quantlist=calloc(quantvals,sizeof(long)); - t.quantthresh=calloc(quantvals-1,sizeof(double)); - t.quantmap=calloc(quantvals,sizeof(int)); - t.quantvals=quantvals; quantlist=malloc(sizeof(long)*c.dim*c.entries); hits=malloc(c.entries*sizeof(long)); @@ -122,18 +120,29 @@ int main(int argc,char *argv[]){ } } + line=setup_line(in); + if(sscanf(line,"%d",&threshvals)!=1){ + fprintf(stderr,"Syntax error reading book file (line 3)\n"); + exit(1); + } + + t.quantthresh=calloc(threshvals-1,sizeof(double)); + t.quantmap=calloc(threshvals,sizeof(int)); + t.threshvals=threshvals; + t.quantvals=quantvals; + setup_line(in); - for(j=0;j<quantvals-1;j++){ + for(j=0;j<threshvals-1;j++){ if(get_line_value(in,t.quantthresh+j)==-1){ - fprintf(stderr,"Ran out of data on line 3 of description file\n"); + fprintf(stderr,"Ran out of data on line 4 of description file\n"); exit(1); } } setup_line(in); - for(j=0;j<quantvals;j++){ + for(j=0;j<threshvals;j++){ if(get_next_ivalue(in,t.quantmap+j)==-1){ - fprintf(stderr,"Ran out of data on line 4 of description file\n"); + fprintf(stderr,"Ran out of data on line 5 of description file\n"); exit(1); } } @@ -141,12 +150,14 @@ int main(int argc,char *argv[]){ /* gen a real quant list from the more easily human-grokked input */ { double min=quantlist[0]; - double mindel=fabs(quantlist[0]-quantlist[1]); + double mindel=1; for(j=1;j<quantvals;j++){ if(quantlist[j]<min)min=quantlist[j]; for(k=0;k<j;k++){ - double del=fabs(quantlist[j]-quantlist[k]); - if(del<mindel)mindel=del; + double del=quantlist[k]-min; + /* really underpowered :-P know that this will only factor + powers of two (duh) */ + while((int)(del/mindel)+.01<del/mindel){mindel/=2;} } } c.q_min=_float32_pack(min); @@ -266,19 +277,19 @@ int main(int argc,char *argv[]){ /* quantthresh */ fprintf(out,"static double _vq_quantthresh_%s[] = {\n",name); - for(j=0;j<t.quantvals-1;){ + for(j=0;j<t.threshvals-1;){ fprintf(out,"\t"); - for(k=0;k<8 && j<t.quantvals-1;k++,j++) - fprintf(out,"%05g,",t.quantthresh[j]); + for(k=0;k<8 && j<t.threshvals-1;k++,j++) + fprintf(out,"%.5g, ",t.quantthresh[j]); fprintf(out,"\n"); } fprintf(out,"};\n\n"); /* quantmap */ fprintf(out,"static long _vq_quantmap_%s[] = {\n",name); - for(j=0;j<t.quantvals;){ + for(j=0;j<t.threshvals;){ fprintf(out,"\t"); - for(k=0;k<8 && j<t.quantvals;k++,j++) + for(k=0;k<8 && j<t.threshvals;k++,j++) fprintf(out,"%5ld,",t.quantmap[j]); fprintf(out,"\n"); } @@ -289,13 +300,14 @@ int main(int argc,char *argv[]){ fprintf(out,"static encode_aux_threshmatch _vq_aux_%s = {\n",name); fprintf(out,"\t_vq_quantthresh_%s,\n",name); fprintf(out,"\t_vq_quantmap_%s,\n",name); - fprintf(out,"\t%d\n};\n\n",t.quantvals); + fprintf(out,"\t%d,\n",t.quantvals); + fprintf(out,"\t%d\n};\n\n",t.threshvals); fprintf(out,"static static_codebook _vq_book_%s = {\n",name); fprintf(out,"\t%ld, %ld,\n",c.dim,c.entries); fprintf(out,"\t_vq_lengthlist_%s,\n",name); - fprintf(out,"\t2, %ld, %ld, %d, %d,\n", + fprintf(out,"\t1, %ld, %ld, %d, %d,\n", c.q_min,c.q_delta,c.q_quant,c.q_sequencep); fprintf(out,"\t_vq_quantlist_%s,\n",name); fprintf(out,"\tNULL,\n"); diff --git a/vq/latticepare.c b/vq/latticepare.c new file mode 100644 index 00000000..2b9b5086 --- /dev/null +++ b/vq/latticepare.c @@ -0,0 +1,392 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY * + * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. * + * PLEASE READ THESE TERMS DISTRIBUTING. * + * * + * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 * + * by Monty <monty@xiph.org> and The XIPHOPHORUS Company * + * http://www.xiph.org/ * + * * + ******************************************************************** + + function: utility for paring low hit count cells from lattice codebook + last mod: $Id: latticepare.c,v 1.1.2.1 2000/04/26 07:10:16 xiphmont Exp $ + + ********************************************************************/ + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> +#include <errno.h> +#include "vorbis/codebook.h" +#include "../lib/sharedbook.h" +#include "bookutil.h" + +/* Lattice codebooks have two strengths: important fetaures that are + poorly modelled by global error minimization training (eg, strong + peaks) are not neglected 2) compact quantized representation. + + A fully populated lattice codebook, however, swings point 1 too far + in the opposite direction; rare features need not be modelled quite + so religiously and as such, we waste bits unless we eliminate the + least common cells. The codebook rep supports unused cells, so we + need to tag such cells and build an auxiliary (non-thresh) search + mechanism to find the proper match quickly */ + +/* two basic steps; first is pare the cell for which dispersal creates + the least additional error. This will naturally choose + low-population cells and cells that have not taken on points from + neighboring paring (but does not result in the lattice collapsing + inward and leaving low population ares totally unmodelled). After + paring has removed the desired number of cells, we need to build an + auxiliary search for each culled point */ + +/* Although lattice books (due to threshhold-based matching) do not + actually use error to make cell selections (in fact, it need not + bear any relation), the 'secondbest' entry finder here is in fact + error/distance based, so latticepare is only useful on such books */ + +/* command line: + latticepare latticebook.vqh input_data.vqd <target_cells> + + produces a new output book on stdout +*/ + +void usage(){ + fprintf(stderr,"latticepare latticebook.vqh input_data.vqd <target_cells>\n" + "produces a new output book on stdout \n"); + exit(1); +} + +static double _dist(int el,double *a, double *b){ + int i; + double acc=0.; + for(i=0;i<el;i++){ + double val=(a[i]-b[i]); + acc+=val*val; + } + return(acc); +} + +static double *pointlist; +static long points=0; +static long allocated=0; + +void add_vector(codebook *b,double *vec,long n){ + int dim=b->dim,i,j; + for(i=0;i<n/dim;i++){ + for(j=i;j<n;j+=dim){ + if(points>=allocated){ + if(allocated){ + allocated*=2; + pointlist=realloc(pointlist,allocated*dim*sizeof(double)); + }else{ + allocated=1024*1024; + pointlist=malloc(allocated*dim*sizeof(double)); + } + } + + pointlist[points++]=vec[j]; + if(!(points&0xff))spinnit("loading... ",points); + } + } +} + +/* search neighboring [non-culled] cells for lowest distance. Spiral + out in the event culling is deep */ +static int secondbest(codebook *b,double *vec,int best){ + encode_aux_threshmatch *tt=b->c->thresh_tree; + int dim=b->dim; + int i,j,spiral=1; + int *index=alloca(dim*sizeof(int)*2); + int *mod=index+dim; + int entry=best; + + double bestmetric=0; + int bestentry=-1; + + /* decompose index */ + for(i=0;i<dim;i++){ + index[i]=best%tt->quantvals; + best/=tt->quantvals; + } + + /* hit one off on all sides of it; most likely we'll find a possible + match */ + for(i=0;i<dim;i++){ + /* one up */ + if(index[i]+1<tt->quantvals){ + int newentry=entry+rint(pow(tt->quantvals,i)); + if(b->c->lengthlist[newentry]>0){ + double thismetric=_dist(dim, vec, b->valuelist+newentry*dim); + + if(bestentry==-1 || bestmetric>thismetric){ + bestmetric=thismetric; + bestentry=newentry; + } + } + } + + /* one down */ + if(index[i]-1>=0){ + int newentry=entry-rint(pow(tt->quantvals,i)); + if(b->c->lengthlist[newentry]>0){ + double thismetric=_dist(dim, vec, b->valuelist+newentry*dim); + + if(bestentry==-1 || bestmetric>thismetric){ + bestmetric=thismetric; + bestentry=newentry; + } + } + } + } + + /* no match? search all cells, binary count, that are one away on + one or more axes. Then continue out until there's a match. + We'll find one eventually, it's relatively OK to be inefficient + as the attempt above will almost always succeed */ + while(bestentry==-1){ + for(i=0;i<dim;i++)mod[i]= -spiral; + while(1){ + int newentry=entry; + + /* update the mod */ + for(j=0;j<dim;j++){ + if(mod[j]<=spiral) + break; + else{ + if(j+1<dim){ + mod[j]= -spiral; + mod[j+1]++; + } + } + } + if(j==dim)break; + + /* reconstitute the entry */ + for(j=0;j<dim;j++){ + if(index[j]+mod[j]<0 || index[j]+mod[j]>=tt->quantvals){ + newentry=-1; + break; + } + newentry+=mod[j]*rint(pow(tt->quantvals,j)); + } + + if(newentry!=-1 && newentry!=entry) + if(b->c->lengthlist[newentry]>0){ + double thismetric=_dist(dim, vec, b->valuelist+newentry*dim); + + if(bestentry==-1 || bestmetric>thismetric){ + bestmetric=thismetric; + bestentry=newentry; + } + } + mod[0]++; + } + spiral++; + } + + return(bestentry); +} + + +int main(int argc,char *argv[]){ + char *basename; + codebook *b=NULL; + int input=0; + int entries; + int dim; + long i,j,target=-1; + argv++; + + if(*argv==NULL){ + usage(); + exit(1); + } + + /* yes, this is evil. However, it's very convenient to parse file + extentions */ + + while(*argv){ + if(*argv[0]=='-'){ + /* option */ + }else{ + /* input file. What kind? */ + char *dot; + char *ext=NULL; + char *name=strdup(*argv++); + dot=strrchr(name,'.'); + if(dot) + ext=dot+1; + else{ + ext=""; + target=atol(name); + } + + + /* codebook */ + if(!strcmp(ext,"vqh")){ + if(input){ + fprintf(stderr,"specify all input data (.vqd) files following\n" + "codebook header (.vqh) files\n"); + exit(1); + } + + basename=strrchr(name,'/'); + if(basename) + basename=strdup(basename)+1; + else + basename=strdup(name); + dot=strrchr(basename,'.'); + if(dot)*dot='\0'; + + b=codebook_load(name); + } + + /* data file; we do actually need to suck it into memory */ + /* we're dealing with just one book, so we can de-interleave */ + if(!strcmp(ext,"vqd")){ + int cols; + long lines=0; + char *line; + double *vec; + FILE *in=fopen(name,"r"); + if(!in){ + fprintf(stderr,"Could not open input file %s\n",name); + exit(1); + } + + reset_next_value(); + line=setup_line(in); + /* count cols before we start reading */ + { + char *temp=line; + while(*temp==' ')temp++; + for(cols=0;*temp;cols++){ + while(*temp>32)temp++; + while(*temp==' ')temp++; + } + } + vec=alloca(cols*sizeof(double)); + while(line){ + lines++; + for(j=0;j<cols;j++) + if(get_line_value(in,vec+j)){ + fprintf(stderr,"Too few columns on line %ld in data file\n",lines); + exit(1); + } + /* deinterleave, add to heap */ + add_vector(b,vec,cols); + + line=setup_line(in); + } + fclose(in); + } + } + } + dim=b->dim; + entries=b->entries; + + if(target==-1){ + fprintf(stderr,"Target number of cells required on command line\n"); + exit(1); + } + + /* set up auxiliary vectors for error tracking */ + { + long *membership=malloc(points*sizeof(long)); + long *cellhead=malloc(entries*sizeof(long)); + double *cellerror1=calloc(entries,sizeof(double)); /* error for + firstentries */ + double *cellerror2=calloc(entries,sizeof(double)); /* error for + secondentry */ + double globalerror=0.; + long cellsleft=entries; + for(i=0;i<points;i++)membership[i]=-1; + for(i=0;i<entries;i++)cellhead[i]=-1; + + for(i=0;i<points;i++){ + /* assign vectors to the nearest cell. Also keep track of second + nearest for error statistics */ + double *ppt=pointlist+i*dim; + int firstentry=_best(b,ppt,1); + int secondentry=secondbest(b,ppt,firstentry); + + double firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); + double secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); + + if(!(i&0xff))spinnit("initializing... ",points-i); + + membership[i]=cellhead[firstentry]; + cellhead[firstentry]=i; + + cellerror1[firstentry]+=firstmetric; + globalerror+=firstmetric; + cellerror2[firstentry]+=secondmetric; + + } + + while(cellsleft>target){ + int bestcell=-1; + double besterror=0; + long head=-1; + spinnit("cells left to eliminate... ",cellsleft-target); + + /* find the cell with lowest removal impact */ + for(i=0;i<entries;i++){ + if(b->c->lengthlist[i]>0){ + double thiserror=globalerror-cellerror1[i]+cellerror2[i]; + if(bestcell==-1 || besterror>thiserror){ + besterror=thiserror; + bestcell=i; + } + } + } + + /* disperse it. move each point out, adding it (properly) to + the second best */ + b->c->lengthlist[bestcell]=0; + head=cellhead[bestcell]; + cellhead[bestcell]=-1; + while(head!=-1){ + /* head is a point number */ + double *ppt=pointlist+head*dim; + int newentry=secondbest(b,ppt,bestcell); + int secondentry=secondbest(b,pointlist+head*dim,newentry); + double firstmetric=_dist(dim,b->valuelist+dim*newentry,ppt); + double secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); + long next=membership[head]; + cellerror1[newentry]+=firstmetric; + cellerror2[newentry]+=secondmetric; + + membership[head]=cellhead[newentry]; + cellhead[newentry]=head; + head=next; + } + + cellsleft--; + } + free(membership); + free(cellhead); + free(cellerror1); + free(cellerror2); + } + + for(i=0;i<entries;i++) + fprintf(stderr,"%ld, ",b->c->lengthlist[i]); + + /* paring is over. Build decision trees using points that now fall + through the thresh matcher */ + + + + fprintf(stderr,"\r \nDone."); + return(0); +} + + + + diff --git a/vq/metrics.c b/vq/metrics.c index 90e19ea9..201521f4 100644 --- a/vq/metrics.c +++ b/vq/metrics.c @@ -12,7 +12,7 @@ ******************************************************************** function: function calls to collect codebook metrics - last mod: $Id: metrics.c,v 1.6.4.2 2000/04/21 16:35:40 xiphmont Exp $ + last mod: $Id: metrics.c,v 1.6.4.3 2000/04/26 07:10:16 xiphmont Exp $ ********************************************************************/ @@ -24,57 +24,63 @@ #include "../lib/sharedbook.h" #include "bookutil.h" +/* collect the following metrics: + + mean and mean squared amplitude + mean and mean squared error + mean and mean squared error (per sample) by entry + worst case fit by entry + entry cell size + hits by entry + total bits + total samples + (average bits per sample)*/ + + /* set up metrics */ double meanamplitude_acc=0.; double meanamplitudesq_acc=0.; double meanerror_acc=0.; double meanerrorsq_acc=0.; -double meandev_acc=0.; - -double *histogram=NULL; -double *histogram_error=NULL; -double *histogram_errorsq=NULL; -double *histogram_distance=NULL; -double *histogram_hi=NULL; -double *histogram_lo=NULL; +double **histogram=NULL; +double **histogram_error=NULL; +double **histogram_errorsq=NULL; +double **histogram_hi=NULL; +double **histogram_lo=NULL; +double bits=0.; double count=0.; -int books=0; -int lastdim=-1; - static double *_now(codebook *c, int i){ return c->valuelist+i*c->c->dim; } -int histerrsort(const void *a, const void *b){ - double av=histogram_distance[*((long *)a)]; - double bv=histogram_distance[*((long *)b)]; - if(av<bv)return(-1); - return(1); -} +int books=0; void process_preprocess(codebook **bs,char *basename){ - while(bs[books]){ - codebook *b=bs[books]; - lastdim=b->c->dim; - books++; - } - - + int i; + while(bs[books])books++; + if(books){ - const static_codebook *b=bs[books-1]->c; - histogram=calloc(b->entries,sizeof(double)); - histogram_distance=calloc(b->entries,sizeof(double)); - histogram_errorsq=calloc(b->entries*lastdim,sizeof(double)); - histogram_error=calloc(b->entries*lastdim,sizeof(double)); - histogram_hi=calloc(b->entries*lastdim,sizeof(double)); - histogram_lo=calloc(b->entries*lastdim,sizeof(double)); + histogram=calloc(books,sizeof(double *)); + histogram_error=calloc(books,sizeof(double *)); + histogram_errorsq=calloc(books,sizeof(double *)); + histogram_hi=calloc(books,sizeof(double *)); + histogram_lo=calloc(books,sizeof(double *)); }else{ fprintf(stderr,"Specify at least one codebook\n"); exit(1); } + + for(i=0;i<books;i++){ + codebook *b=bs[i]; + histogram[i]=calloc(b->entries,sizeof(double)); + histogram_error[i]=calloc(b->entries*b->dim,sizeof(double)); + histogram_errorsq[i]=calloc(b->entries*b->dim,sizeof(double)); + histogram_hi[i]=calloc(b->entries*b->dim,sizeof(double)); + histogram_lo[i]=calloc(b->entries*b->dim,sizeof(double)); + } } static double _dist(int el,double *a, double *b){ @@ -87,219 +93,181 @@ static double _dist(int el,double *a, double *b){ return acc; } -void cell_spacing(codebook **b){ - int i,j,k; - for(i=0;i<books;i++){ - double min,max,mean=0.,meansq=0.; - codebook *c=b[i]; - - fprintf(stderr,"\nCell spacing for book %d:\n",i); - - /* minimum, maximum, mean, ms cell spacing */ - for(j=0;j<c->c->entries;j++){ - double localmin=-1.; - for(k=0;k<c->c->entries;k++){ - double this=_dist(c->c->dim,_now(c,j),_now(c,k)); - if(j!=k && - (localmin==-1 || this<localmin)) - localmin=this; - } - - if(j==0 || localmin<min)min=localmin; - if(j==0 || localmin>max)max=localmin; - mean+=sqrt(localmin); - meansq+=localmin; +void cell_spacing(codebook *c){ + int j,k; + double min,max,mean=0.,meansq=0.; + + /* minimum, maximum, mean, ms cell spacing */ + for(j=0;j<c->c->entries;j++){ + double localmin=-1.; + for(k=0;k<c->c->entries;k++){ + double this=_dist(c->c->dim,_now(c,j),_now(c,k)); + if(j!=k && + (localmin==-1 || this<localmin)) + localmin=this; } - - fprintf(stderr,"\tminimum cell spacing (closest side): %g\n",sqrt(min)); - fprintf(stderr,"\tmaximum cell spacing (closest side): %g\n",sqrt(max)); - fprintf(stderr,"\tmean closest side spacing: %g\n",mean/c->c->entries); - fprintf(stderr,"\tmean sq closest side spacing: %g\n",sqrt(meansq/c->c->entries)); + + if(j==0 || localmin<min)min=localmin; + if(j==0 || localmin>max)max=localmin; + mean+=sqrt(localmin); + meansq+=localmin; } + + fprintf(stderr,"\tminimum cell spacing (closest side): %g\n",sqrt(min)); + fprintf(stderr,"\tmaximum cell spacing (closest side): %g\n",sqrt(max)); + fprintf(stderr,"\tmean closest side spacing: %g\n",mean/c->c->entries); + fprintf(stderr,"\tmean sq closest side spacing: %g\n",sqrt(meansq/c->c->entries)); } -void process_postprocess(codebook **b,char *basename){ - int i,j,k; +void process_postprocess(codebook **bs,char *basename){ + int i,k,book; char *buffer=alloca(strlen(basename)+80); - codebook *bb=b[books-1]; - fprintf(stderr,"Done. Processed %ld data points:\n", + fprintf(stderr,"Done. Processed %ld data points:\n\n", (long)count); - fprintf(stderr,"\tglobal mean amplitude: %g\n", - meanamplitude_acc/(count*lastdim)); - fprintf(stderr,"\tglobal mean squared amplitude: %g\n", - sqrt(meanamplitudesq_acc/(count*lastdim))); - fprintf(stderr,"\tglobal mean error: %g\n", - meanerror_acc/(count*lastdim)); - fprintf(stderr,"\tglobal mean squared error: %g\n", - sqrt(meanerrorsq_acc/(count*lastdim))); - fprintf(stderr,"\tglobal mean deviation: %g\n", - meandev_acc/(count*lastdim)); + fprintf(stderr,"Global statistics:******************\n\n"); + + fprintf(stderr,"\ttotal samples: %ld\n",(long)count); + fprintf(stderr,"\ttotal bits required to code: %ld\n",(long)bits); + fprintf(stderr,"\taverage bits per sample: %g\n\n",bits/count); - cell_spacing(b); + fprintf(stderr,"\tmean sample amplitude: %g\n", + meanamplitude_acc/count); + fprintf(stderr,"\tmean squared sample amplitude: %g\n\n", + sqrt(meanamplitudesq_acc/count)); - { + fprintf(stderr,"\tmean code error: %g\n", + meanerror_acc/count); + fprintf(stderr,"\tmean squared code error: %g\n\n", + sqrt(meanerrorsq_acc/count)); + + for(book=0;book<books;book++){ FILE *out; + codebook *b=bs[book]; + int n=b->c->entries; + int dim=b->c->dim; + + fprintf(stderr,"Book %d statistics:------------------\n",book); - sprintf(buffer,"%s-mse.m",basename); + cell_spacing(b); + + sprintf(buffer,"%s-%d-mse.m",basename,book); out=fopen(buffer,"w"); if(!out){ fprintf(stderr,"Could not open file %s for writing\n",buffer); exit(1); } - - for(i=0;i<bb->c->entries;i++){ - for(k=0;k<bb->c->dim;k++){ - fprintf(out,"%ld, %g, %g\n", - i*bb->c->dim+k,(bb->valuelist+i*bb->c->dim)[k], - sqrt((histogram_errorsq+i*bb->c->dim)[k]/histogram[i])); + + for(i=0;i<n;i++){ + for(k=0;k<dim;k++){ + fprintf(out,"%d, %g, %g\n", + i*dim+k,(b->valuelist+i*dim)[k], + sqrt((histogram_errorsq[book]+i*dim)[k]/histogram[book][i])); } } fclose(out); - - sprintf(buffer,"%s-me.m",basename); + + sprintf(buffer,"%s-%d-me.m",basename,book); out=fopen(buffer,"w"); if(!out){ fprintf(stderr,"Could not open file %s for writing\n",buffer); exit(1); } - - for(i=0;i<bb->c->entries;i++){ - for(k=0;k<bb->c->dim;k++){ - fprintf(out,"%ld, %g, %g\n", - i*bb->c->dim+k,(bb->valuelist+i*bb->c->dim)[k], - (histogram_error+i*bb->c->dim)[k]/histogram[i]); + + for(i=0;i<n;i++){ + for(k=0;k<dim;k++){ + fprintf(out,"%d, %g, %g\n", + i*dim+k,(b->valuelist+i*dim)[k], + (histogram_error[book]+i*dim)[k]/histogram[book][i]); } } fclose(out); - sprintf(buffer,"%s-worst.m",basename); + sprintf(buffer,"%s-%d-worst.m",basename,book); out=fopen(buffer,"w"); if(!out){ fprintf(stderr,"Could not open file %s for writing\n",buffer); exit(1); } - - for(i=0;i<bb->c->entries;i++){ - for(k=0;k<bb->c->dim;k++){ - fprintf(out,"%ld, %g, %g, %g\n", - i*bb->c->dim+k,(bb->valuelist+i*bb->c->dim)[k], - (bb->valuelist+i*bb->c->dim)[k]+(histogram_lo+i*bb->c->dim)[k], - (bb->valuelist+i*bb->c->dim)[k]+(histogram_hi+i*bb->c->dim)[k]); + + for(i=0;i<n;i++){ + for(k=0;k<dim;k++){ + fprintf(out,"%d, %g, %g, %g\n", + i*dim+k,(b->valuelist+i*dim)[k], + (b->valuelist+i*dim)[k]+(histogram_lo[book]+i*dim)[k], + (b->valuelist+i*dim)[k]+(histogram_hi[book]+i*dim)[k]); } } fclose(out); } +} - { - FILE *out; - long *index=alloca(sizeof(long)*bb->c->entries); - sprintf(buffer,"%s-distance.m",basename); - out=fopen(buffer,"w"); - if(!out){ - fprintf(stderr,"Could not open file %s for writing\n",buffer); - exit(1); - } - for(j=0;j<bb->c->entries;j++){ - if(histogram[j])histogram_distance[j]/=histogram[j]; - index[j]=j; +void process_one(codebook *b,int book,double *a,int dim,int step,int addmul){ + int j,entry; + double base=0.; + double amplitude=0.; + + if(book==0) + for(j=0;j<dim;j++){ + if(b->c->q_sequencep){ + amplitude=a[j*step]-base; + base=a[j*step]; + }else + amplitude=a[j*step]; + meanamplitude_acc+=fabs(amplitude); + meanamplitudesq_acc+=amplitude*amplitude; + count++; } - qsort(index,bb->c->entries,sizeof(long),histerrsort); + entry=vorbis_book_besterror(b,a,step,addmul); + + histogram[book][entry]++; + bits+=vorbis_book_codelen(b,entry); + + for(j=0;j<dim;j++){ + double error=a[j*step]; - for(j=0;j<bb->c->entries;j++) - for(k=0;k<histogram[index[j]];k++) - fprintf(out,"%g,\n",histogram_distance[index[j]]); - fclose(out); - + if(book==books-1){ + meanerror_acc+=fabs(error); + meanerrorsq_acc+=error*error; + } + histogram_errorsq[book][entry*dim+j]+=error*error; + histogram_error[book][entry*dim+j]+=fabs(error); + if(histogram[book][entry]==0 || histogram_hi[book][entry*dim+j]<error) + histogram_hi[book][entry*dim+j]=error; + if(histogram[book][entry]==0 || histogram_lo[book][entry*dim+j]>error) + histogram_lo[book][entry*dim+j]=error; } } + void process_vector(codebook **bs,int *addmul,int inter,double *a,int n){ int bi; - int i,j; - double base=0.; - double *work=alloca(sizeof(double)*n); - double amplitude=0.; - - memcpy(work,a,sizeof(double)*n); + int i; for(bi=0;bi<books;bi++){ codebook *b=bs[bi]; int dim=b->dim; - + if(inter){ - for(i=0;i<n/dim;i++){ - int entry=vorbis_book_besterror(b,work+i,n/dim,addmul[bi]); - - if(bi==books-1){ - double distance=0; - histogram[entry]++; - - for(j=0;j<dim;j++){ - double error=work[i+j*(n/dim)]; - histogram_errorsq[entry*dim+j]+=error*error; - histogram_error[entry*dim+j]+=fabs(error); - if(histogram[entry]==0 || histogram_hi[entry*dim+j]<error) - histogram_hi[entry*dim+j]=error; - if(histogram[entry]==0 || histogram_lo[entry*dim+j]>error) - histogram_lo[entry*dim+j]=error; - distance+=error*error; - } - histogram_distance[entry]+=sqrt(distance); - } - } + for(i=0;i<n/dim;i++) + process_one(b,bi,a+i,dim,n/dim,addmul[bi]); }else{ - for(i=0;i<=n-dim;i+=dim){ - int entry=vorbis_book_besterror(b,work+i,1,addmul[bi]); - if(bi==books-1){ - double distance=0; - histogram[entry]++; - - for(j=0;j<dim;j++){ - double error=work[i+j]; - histogram_errorsq[entry*dim+j]+=error*error; - histogram_error[entry*dim+j]+=fabs(error); - if(histogram[entry]==0 || histogram_hi[entry*dim+j]<error) - histogram_hi[entry*dim+j]=error; - if(histogram[entry]==0 || histogram_lo[entry*dim+j]>error) - histogram_lo[entry*dim+j]=error; - distance+=error*error; - } - histogram_distance[entry]+=sqrt(distance); - } - } + for(i=0;i<=n-dim;i+=dim) + process_one(b,bi,a+i,dim,1,addmul[bi]); } } - - for(i=0;i<n;i++){ - double error=work[i]; - if(bs[0]->c->q_sequencep){ - amplitude=a[i]-base; - base=a[i]; - }else - amplitude=a[i]; - - meanamplitude_acc+=fabs(amplitude); - meanamplitudesq_acc+=amplitude*amplitude; - meanerror_acc+=fabs(error); - meanerrorsq_acc+=error*error; - - if(amplitude) - meandev_acc+=fabs(error/amplitude); - else - meandev_acc+=fabs(error); /* yeah, yeah */ - } - - if((long)(count++)%100)spinnit("working.... lines: ",count); + + if((long)(count)%100)spinnit("working.... samples: ",count); } void process_usage(void){ fprintf(stderr, - "usage: vqmetrics +|*<codebook>.vqh [ +|*<codebook.vqh> ]... \n" + "usage: vqmetrics [-i] +|*<codebook>.vqh [ +|*<codebook.vqh> ]... \n" " datafile.vqd [datafile.vqd]...\n\n" - " data can be taken on stdin. Output goes to output files:\n" + " data can be taken on stdin. -i indicates interleaved coding.\n" + " Output goes to output files:\n" " basename-me.m: gnuplot: mean error by entry value\n" " basename-mse.m: gnuplot: mean square error by entry value\n" " basename-worst.m: gnuplot: worst error by entry value\n" @@ -12,7 +12,7 @@ ******************************************************************** function: utility main for loading and operating on codebooks - last mod: $Id: run.c,v 1.9.4.1 2000/04/21 16:35:40 xiphmont Exp $ + last mod: $Id: run.c,v 1.9.4.2 2000/04/26 07:10:16 xiphmont Exp $ ********************************************************************/ @@ -140,7 +140,7 @@ int main(int argc,char *argv[]){ } reset_next_value(); - line=get_line(in); + line=setup_line(in); /* count cols before we start reading */ { char *temp=line; @@ -161,8 +161,7 @@ int main(int argc,char *argv[]){ /* ignores -s for now */ process_vector(b,addmul,interleave,vec,cols); - reset_next_value(); - line=get_line(in); + line=setup_line(in); } fclose(in); } |