summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMonty <xiphmont@xiph.org>2000-04-26 07:10:16 +0000
committerMonty <xiphmont@xiph.org>2000-04-26 07:10:16 +0000
commitde5ef5ea3d8ca2fffac1d247520aea82c7bf11f5 (patch)
tree116c839881d24235e1cafed6409a6afed123345b
parent097027d02df14cab9f0ffe712995aa7f7f16f392 (diff)
downloadlibvorbis-git-de5ef5ea3d8ca2fffac1d247520aea82c7bf11f5.tar.gz
Incremental update. Continuing work on lattice codebooks.
svn path=/branches/new_acoustics_pending_merge_20000328/vorbis/; revision=345
-rw-r--r--include/vorbis/codebook.h5
-rw-r--r--lib/sharedbook.c40
-rw-r--r--vq/Makefile.in10
-rw-r--r--vq/bookutil.c31
-rw-r--r--vq/latticebuild.c56
-rw-r--r--vq/latticepare.c392
-rw-r--r--vq/metrics.c338
-rw-r--r--vq/run.c7
8 files changed, 631 insertions, 248 deletions
diff --git a/include/vorbis/codebook.h b/include/vorbis/codebook.h
index ee4b013e..8e45c635 100644
--- a/include/vorbis/codebook.h
+++ b/include/vorbis/codebook.h
@@ -12,7 +12,7 @@
********************************************************************
function: codebook types
- last mod: $Id: codebook.h,v 1.4.4.5 2000/04/21 16:35:38 xiphmont Exp $
+ last mod: $Id: codebook.h,v 1.4.4.6 2000/04/26 07:10:15 xiphmont Exp $
********************************************************************/
@@ -81,7 +81,8 @@ typedef struct encode_aux_nearestmatch{
typedef struct encode_aux_threshmatch{
double *quantthresh;
long *quantmap;
- int quantvals;
+ int quantvals;
+ int threshvals;
} encode_aux_threshmatch;
typedef struct decode_aux{
diff --git a/lib/sharedbook.c b/lib/sharedbook.c
index 0c144bcc..bc3987d4 100644
--- a/lib/sharedbook.c
+++ b/lib/sharedbook.c
@@ -12,7 +12,7 @@
********************************************************************
function: basic shared codebook operations
- last mod: $Id: sharedbook.c,v 1.1.2.5 2000/04/21 16:35:40 xiphmont Exp $
+ last mod: $Id: sharedbook.c,v 1.1.2.6 2000/04/26 07:10:15 xiphmont Exp $
********************************************************************/
@@ -323,6 +323,27 @@ int _best(codebook *book, double *a, int step){
int dim=book->dim;
int ptr=0,k,o;
+ /* we assume for now that a thresh tree is the only other possibility */
+ if(tt){
+ int index=0;
+ /* find the quant val of each scalar */
+ for(k=0,o=step*(dim-1);k<dim;k++,o-=step){
+ int i;
+ /* linear search the quant list for now; it's small and although
+ with > 8 entries, it would be faster to bisect, this would be
+ a misplaced optimization for now */
+ for(i=0;i<tt->threshvals-1;i++)
+ if(a[o]<tt->quantthresh[i])break;
+
+ index=(index*tt->quantvals)+tt->quantmap[i];
+ }
+ /* regular lattices are easy :-) */
+ if(book->c->lengthlist[index]>0) /* is this unused? If so, we'll
+ use a decision tree after all
+ and fall through*/
+ return(index);
+ }
+
if(nt){
/* optimized using the decision tree */
while(1){
@@ -342,23 +363,6 @@ int _best(codebook *book, double *a, int step){
return(-ptr);
}
- /* we assume for now that a thresh tree is the only other possibility */
- if(tt){
- int index=0;
- /* find the quant val of each scalar */
- for(k=0,o=step*(dim-1);k<dim;k++,o-=step){
- int i;
- /* linear search the quant list for now; it's small and although
- with > 8 entries, it would be faster to bisect, this would be
- a misplaced optimization for now */
- for(i=0;i<tt->quantvals-1;i++)
- if(a[o]<tt->quantthresh[i])break;
-
- index=(index*tt->quantvals)+tt->quantmap[i];
- }
- /* regular lattices are easy :-) */
- return(index);
- }
return(-1);
}
diff --git a/vq/Makefile.in b/vq/Makefile.in
index 34a20603..02a1199e 100644
--- a/vq/Makefile.in
+++ b/vq/Makefile.in
@@ -1,4 +1,4 @@
-# $Id: Makefile.in,v 1.10.4.2 2000/04/21 16:35:40 xiphmont Exp $
+# $Id: Makefile.in,v 1.10.4.3 2000/04/26 07:10:15 xiphmont Exp $
###############################################################################
# #
@@ -29,7 +29,8 @@ HFILES = ../include/vorbis/codebook.h vqgen.h vqext.h bookutil.h
OFILES = vqgen.o vqsplit.o bookutil.o ../lib/sharedbook.o
ALLOFILES = $(OFILES) lspdata.o genericdata.o train.o build.o run.o\
- cascade.o partition.o metrics.o residuedata.o latticebuild.o
+ cascade.o partition.o metrics.o residuedata.o latticebuild.o\
+ latticepare.o
all:
$(MAKE) target CFLAGS="$(OPT)"
@@ -40,7 +41,7 @@ debug:
profile:
$(MAKE) target CFLAGS="$(PROFILE)"
-target: lspvqtrain genericvqtrain residuevqtrain vqbuild vqcascade vqmetrics latticebuild
+target: lspvqtrain genericvqtrain residuevqtrain vqbuild vqcascade vqmetrics latticebuild latticepare
lspvqtrain: $(OFILES) lspdata.o train.o
$(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS)
@@ -63,6 +64,9 @@ vqmetrics: $(OFILES) run.o metrics.o
latticebuild: $(OFILES) latticebuild.o
$(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS)
+latticepare: $(OFILES) latticepare.o
+ $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS)
+
$(ALLOFILES): $(HFILES)
diff --git a/vq/bookutil.c b/vq/bookutil.c
index a1874859..e479f7fe 100644
--- a/vq/bookutil.c
+++ b/vq/bookutil.c
@@ -12,7 +12,7 @@
********************************************************************
function: utility functions for loading .vqh and .vqd files
- last mod: $Id: bookutil.c,v 1.12.4.3 2000/04/21 16:35:40 xiphmont Exp $
+ last mod: $Id: bookutil.c,v 1.12.4.4 2000/04/26 07:10:15 xiphmont Exp $
********************************************************************/
@@ -187,9 +187,6 @@ codebook *codebook_load(char *filename){
char *line;
long i;
- c->nearest_tree=a;
- c->thresh_tree=t;
-
if(in==NULL){
fprintf(stderr,"Couldn't open codebook %s\n",filename);
exit(1);
@@ -217,7 +214,7 @@ codebook *codebook_load(char *filename){
/* find the auxiliary encode struct[s] (if any) */
if(find_seek_to(in,"static encode_aux_nearestmatch _vq_aux_")){
/* how big? */
- a=calloc(1,sizeof(encode_aux_nearestmatch));
+ c->nearest_tree=a=calloc(1,sizeof(encode_aux_nearestmatch));
line=get_line(in);
line=get_line(in);
line=get_line(in);
@@ -272,29 +269,35 @@ codebook *codebook_load(char *filename){
if(find_seek_to(in,"static encode_aux_threshmatch _vq_aux_")){
/* how big? */
- t=calloc(1,sizeof(encode_aux_threshmatch));
+ c->thresh_tree=t=calloc(1,sizeof(encode_aux_threshmatch));
+ line=get_line(in);
line=get_line(in);
line=get_line(in);
if(sscanf(line,"%d",&(t->quantvals))!=1){
- fprintf(stderr,"2: syntax in %s in line:\t %s",filename,line);
+ fprintf(stderr,"3: syntax in %s in line:\t %s",filename,line);
+ exit(1);
+ }
+ line=get_line(in);
+ if(sscanf(line,"%d",&(t->threshvals))!=1){
+ fprintf(stderr,"4: syntax in %s in line:\t %s",filename,line);
exit(1);
}
/* load quantthresh */
- find_seek_to(in,"static long _vq_quantthresh_");
+ find_seek_to(in,"static double _vq_quantthresh_");
reset_next_value();
- t->quantthresh=malloc(sizeof(long)*t->quantvals);
- for(i=0;i<t->quantvals;i++)
+ t->quantthresh=malloc(sizeof(double)*t->threshvals);
+ for(i=0;i<t->threshvals-1;i++)
if(get_next_value(in,t->quantthresh+i)){
- fprintf(stderr,"out of data while reading codebook %s\n",filename);
+ fprintf(stderr,"out of data 1 while reading codebook %s\n",filename);
exit(1);
}
/* load quantmap */
find_seek_to(in,"static long _vq_quantmap_");
reset_next_value();
- t->quantmap=malloc(sizeof(long)*t->quantvals);
- for(i=0;i<t->quantvals;i++)
+ t->quantmap=malloc(sizeof(long)*t->threshvals);
+ for(i=0;i<t->threshvals;i++)
if(get_next_ivalue(in,t->quantmap+i)){
- fprintf(stderr,"out of data while reading codebook %s\n",filename);
+ fprintf(stderr,"out of data 2 while reading codebook %s\n",filename);
exit(1);
}
}
diff --git a/vq/latticebuild.c b/vq/latticebuild.c
index 65aca502..9b0a22fa 100644
--- a/vq/latticebuild.c
+++ b/vq/latticebuild.c
@@ -12,7 +12,7 @@
********************************************************************
function: utility main for building codebooks from lattice descriptions
- last mod: $Id: latticebuild.c,v 1.1.2.1 2000/04/21 16:35:40 xiphmont Exp $
+ last mod: $Id: latticebuild.c,v 1.1.2.2 2000/04/26 07:10:16 xiphmont Exp $
********************************************************************/
@@ -31,12 +31,13 @@
command line:
vqlattice description.vql datafile.vqd
- the lattice description file contains four lines:
+ the lattice description file contains five lines:
<n> <dim> <multiplicitavep>
<value_0> <value_1> <value_2> ... <value_n-1>
- <thresh_0> <thresh_1> <thresh_2> ... <thresh_n-2>
- <map_0> <map_1> <map_2> ... <map_n-1>
+ <m>
+ <thresh_0> <thresh_1> <thresh_2> ... <thresh_m-2>
+ <map_0> <map_1> <map_2> ... <map_m-1>
vqlattice sends residual data (for the next stage) to stdout, and
produces description.vqh */
@@ -48,7 +49,7 @@ int main(int argc,char *argv[]){
double *quantlist;
long *hits;
- int entries=-1,dim=-1,quantvals=-1,addmul=-1;
+ int entries=-1,dim=-1,quantvals=-1,addmul=-1,threshvals=-1;
FILE *out=NULL;
FILE *in=NULL;
char *line,*name;
@@ -105,9 +106,6 @@ int main(int argc,char *argv[]){
c.maptype=1;
c.q_sequencep=0;
c.quantlist=calloc(quantvals,sizeof(long));
- t.quantthresh=calloc(quantvals-1,sizeof(double));
- t.quantmap=calloc(quantvals,sizeof(int));
- t.quantvals=quantvals;
quantlist=malloc(sizeof(long)*c.dim*c.entries);
hits=malloc(c.entries*sizeof(long));
@@ -122,18 +120,29 @@ int main(int argc,char *argv[]){
}
}
+ line=setup_line(in);
+ if(sscanf(line,"%d",&threshvals)!=1){
+ fprintf(stderr,"Syntax error reading book file (line 3)\n");
+ exit(1);
+ }
+
+ t.quantthresh=calloc(threshvals-1,sizeof(double));
+ t.quantmap=calloc(threshvals,sizeof(int));
+ t.threshvals=threshvals;
+ t.quantvals=quantvals;
+
setup_line(in);
- for(j=0;j<quantvals-1;j++){
+ for(j=0;j<threshvals-1;j++){
if(get_line_value(in,t.quantthresh+j)==-1){
- fprintf(stderr,"Ran out of data on line 3 of description file\n");
+ fprintf(stderr,"Ran out of data on line 4 of description file\n");
exit(1);
}
}
setup_line(in);
- for(j=0;j<quantvals;j++){
+ for(j=0;j<threshvals;j++){
if(get_next_ivalue(in,t.quantmap+j)==-1){
- fprintf(stderr,"Ran out of data on line 4 of description file\n");
+ fprintf(stderr,"Ran out of data on line 5 of description file\n");
exit(1);
}
}
@@ -141,12 +150,14 @@ int main(int argc,char *argv[]){
/* gen a real quant list from the more easily human-grokked input */
{
double min=quantlist[0];
- double mindel=fabs(quantlist[0]-quantlist[1]);
+ double mindel=1;
for(j=1;j<quantvals;j++){
if(quantlist[j]<min)min=quantlist[j];
for(k=0;k<j;k++){
- double del=fabs(quantlist[j]-quantlist[k]);
- if(del<mindel)mindel=del;
+ double del=quantlist[k]-min;
+ /* really underpowered :-P know that this will only factor
+ powers of two (duh) */
+ while((int)(del/mindel)+.01<del/mindel){mindel/=2;}
}
}
c.q_min=_float32_pack(min);
@@ -266,19 +277,19 @@ int main(int argc,char *argv[]){
/* quantthresh */
fprintf(out,"static double _vq_quantthresh_%s[] = {\n",name);
- for(j=0;j<t.quantvals-1;){
+ for(j=0;j<t.threshvals-1;){
fprintf(out,"\t");
- for(k=0;k<8 && j<t.quantvals-1;k++,j++)
- fprintf(out,"%05g,",t.quantthresh[j]);
+ for(k=0;k<8 && j<t.threshvals-1;k++,j++)
+ fprintf(out,"%.5g, ",t.quantthresh[j]);
fprintf(out,"\n");
}
fprintf(out,"};\n\n");
/* quantmap */
fprintf(out,"static long _vq_quantmap_%s[] = {\n",name);
- for(j=0;j<t.quantvals;){
+ for(j=0;j<t.threshvals;){
fprintf(out,"\t");
- for(k=0;k<8 && j<t.quantvals;k++,j++)
+ for(k=0;k<8 && j<t.threshvals;k++,j++)
fprintf(out,"%5ld,",t.quantmap[j]);
fprintf(out,"\n");
}
@@ -289,13 +300,14 @@ int main(int argc,char *argv[]){
fprintf(out,"static encode_aux_threshmatch _vq_aux_%s = {\n",name);
fprintf(out,"\t_vq_quantthresh_%s,\n",name);
fprintf(out,"\t_vq_quantmap_%s,\n",name);
- fprintf(out,"\t%d\n};\n\n",t.quantvals);
+ fprintf(out,"\t%d,\n",t.quantvals);
+ fprintf(out,"\t%d\n};\n\n",t.threshvals);
fprintf(out,"static static_codebook _vq_book_%s = {\n",name);
fprintf(out,"\t%ld, %ld,\n",c.dim,c.entries);
fprintf(out,"\t_vq_lengthlist_%s,\n",name);
- fprintf(out,"\t2, %ld, %ld, %d, %d,\n",
+ fprintf(out,"\t1, %ld, %ld, %d, %d,\n",
c.q_min,c.q_delta,c.q_quant,c.q_sequencep);
fprintf(out,"\t_vq_quantlist_%s,\n",name);
fprintf(out,"\tNULL,\n");
diff --git a/vq/latticepare.c b/vq/latticepare.c
new file mode 100644
index 00000000..2b9b5086
--- /dev/null
+++ b/vq/latticepare.c
@@ -0,0 +1,392 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
+ * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
+ * PLEASE READ THESE TERMS DISTRIBUTING. *
+ * *
+ * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
+ * by Monty <monty@xiph.org> and The XIPHOPHORUS Company *
+ * http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: utility for paring low hit count cells from lattice codebook
+ last mod: $Id: latticepare.c,v 1.1.2.1 2000/04/26 07:10:16 xiphmont Exp $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <errno.h>
+#include "vorbis/codebook.h"
+#include "../lib/sharedbook.h"
+#include "bookutil.h"
+
+/* Lattice codebooks have two strengths: important fetaures that are
+ poorly modelled by global error minimization training (eg, strong
+ peaks) are not neglected 2) compact quantized representation.
+
+ A fully populated lattice codebook, however, swings point 1 too far
+ in the opposite direction; rare features need not be modelled quite
+ so religiously and as such, we waste bits unless we eliminate the
+ least common cells. The codebook rep supports unused cells, so we
+ need to tag such cells and build an auxiliary (non-thresh) search
+ mechanism to find the proper match quickly */
+
+/* two basic steps; first is pare the cell for which dispersal creates
+ the least additional error. This will naturally choose
+ low-population cells and cells that have not taken on points from
+ neighboring paring (but does not result in the lattice collapsing
+ inward and leaving low population ares totally unmodelled). After
+ paring has removed the desired number of cells, we need to build an
+ auxiliary search for each culled point */
+
+/* Although lattice books (due to threshhold-based matching) do not
+ actually use error to make cell selections (in fact, it need not
+ bear any relation), the 'secondbest' entry finder here is in fact
+ error/distance based, so latticepare is only useful on such books */
+
+/* command line:
+ latticepare latticebook.vqh input_data.vqd <target_cells>
+
+ produces a new output book on stdout
+*/
+
+void usage(){
+ fprintf(stderr,"latticepare latticebook.vqh input_data.vqd <target_cells>\n"
+ "produces a new output book on stdout \n");
+ exit(1);
+}
+
+static double _dist(int el,double *a, double *b){
+ int i;
+ double acc=0.;
+ for(i=0;i<el;i++){
+ double val=(a[i]-b[i]);
+ acc+=val*val;
+ }
+ return(acc);
+}
+
+static double *pointlist;
+static long points=0;
+static long allocated=0;
+
+void add_vector(codebook *b,double *vec,long n){
+ int dim=b->dim,i,j;
+ for(i=0;i<n/dim;i++){
+ for(j=i;j<n;j+=dim){
+ if(points>=allocated){
+ if(allocated){
+ allocated*=2;
+ pointlist=realloc(pointlist,allocated*dim*sizeof(double));
+ }else{
+ allocated=1024*1024;
+ pointlist=malloc(allocated*dim*sizeof(double));
+ }
+ }
+
+ pointlist[points++]=vec[j];
+ if(!(points&0xff))spinnit("loading... ",points);
+ }
+ }
+}
+
+/* search neighboring [non-culled] cells for lowest distance. Spiral
+ out in the event culling is deep */
+static int secondbest(codebook *b,double *vec,int best){
+ encode_aux_threshmatch *tt=b->c->thresh_tree;
+ int dim=b->dim;
+ int i,j,spiral=1;
+ int *index=alloca(dim*sizeof(int)*2);
+ int *mod=index+dim;
+ int entry=best;
+
+ double bestmetric=0;
+ int bestentry=-1;
+
+ /* decompose index */
+ for(i=0;i<dim;i++){
+ index[i]=best%tt->quantvals;
+ best/=tt->quantvals;
+ }
+
+ /* hit one off on all sides of it; most likely we'll find a possible
+ match */
+ for(i=0;i<dim;i++){
+ /* one up */
+ if(index[i]+1<tt->quantvals){
+ int newentry=entry+rint(pow(tt->quantvals,i));
+ if(b->c->lengthlist[newentry]>0){
+ double thismetric=_dist(dim, vec, b->valuelist+newentry*dim);
+
+ if(bestentry==-1 || bestmetric>thismetric){
+ bestmetric=thismetric;
+ bestentry=newentry;
+ }
+ }
+ }
+
+ /* one down */
+ if(index[i]-1>=0){
+ int newentry=entry-rint(pow(tt->quantvals,i));
+ if(b->c->lengthlist[newentry]>0){
+ double thismetric=_dist(dim, vec, b->valuelist+newentry*dim);
+
+ if(bestentry==-1 || bestmetric>thismetric){
+ bestmetric=thismetric;
+ bestentry=newentry;
+ }
+ }
+ }
+ }
+
+ /* no match? search all cells, binary count, that are one away on
+ one or more axes. Then continue out until there's a match.
+ We'll find one eventually, it's relatively OK to be inefficient
+ as the attempt above will almost always succeed */
+ while(bestentry==-1){
+ for(i=0;i<dim;i++)mod[i]= -spiral;
+ while(1){
+ int newentry=entry;
+
+ /* update the mod */
+ for(j=0;j<dim;j++){
+ if(mod[j]<=spiral)
+ break;
+ else{
+ if(j+1<dim){
+ mod[j]= -spiral;
+ mod[j+1]++;
+ }
+ }
+ }
+ if(j==dim)break;
+
+ /* reconstitute the entry */
+ for(j=0;j<dim;j++){
+ if(index[j]+mod[j]<0 || index[j]+mod[j]>=tt->quantvals){
+ newentry=-1;
+ break;
+ }
+ newentry+=mod[j]*rint(pow(tt->quantvals,j));
+ }
+
+ if(newentry!=-1 && newentry!=entry)
+ if(b->c->lengthlist[newentry]>0){
+ double thismetric=_dist(dim, vec, b->valuelist+newentry*dim);
+
+ if(bestentry==-1 || bestmetric>thismetric){
+ bestmetric=thismetric;
+ bestentry=newentry;
+ }
+ }
+ mod[0]++;
+ }
+ spiral++;
+ }
+
+ return(bestentry);
+}
+
+
+int main(int argc,char *argv[]){
+ char *basename;
+ codebook *b=NULL;
+ int input=0;
+ int entries;
+ int dim;
+ long i,j,target=-1;
+ argv++;
+
+ if(*argv==NULL){
+ usage();
+ exit(1);
+ }
+
+ /* yes, this is evil. However, it's very convenient to parse file
+ extentions */
+
+ while(*argv){
+ if(*argv[0]=='-'){
+ /* option */
+ }else{
+ /* input file. What kind? */
+ char *dot;
+ char *ext=NULL;
+ char *name=strdup(*argv++);
+ dot=strrchr(name,'.');
+ if(dot)
+ ext=dot+1;
+ else{
+ ext="";
+ target=atol(name);
+ }
+
+
+ /* codebook */
+ if(!strcmp(ext,"vqh")){
+ if(input){
+ fprintf(stderr,"specify all input data (.vqd) files following\n"
+ "codebook header (.vqh) files\n");
+ exit(1);
+ }
+
+ basename=strrchr(name,'/');
+ if(basename)
+ basename=strdup(basename)+1;
+ else
+ basename=strdup(name);
+ dot=strrchr(basename,'.');
+ if(dot)*dot='\0';
+
+ b=codebook_load(name);
+ }
+
+ /* data file; we do actually need to suck it into memory */
+ /* we're dealing with just one book, so we can de-interleave */
+ if(!strcmp(ext,"vqd")){
+ int cols;
+ long lines=0;
+ char *line;
+ double *vec;
+ FILE *in=fopen(name,"r");
+ if(!in){
+ fprintf(stderr,"Could not open input file %s\n",name);
+ exit(1);
+ }
+
+ reset_next_value();
+ line=setup_line(in);
+ /* count cols before we start reading */
+ {
+ char *temp=line;
+ while(*temp==' ')temp++;
+ for(cols=0;*temp;cols++){
+ while(*temp>32)temp++;
+ while(*temp==' ')temp++;
+ }
+ }
+ vec=alloca(cols*sizeof(double));
+ while(line){
+ lines++;
+ for(j=0;j<cols;j++)
+ if(get_line_value(in,vec+j)){
+ fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
+ exit(1);
+ }
+ /* deinterleave, add to heap */
+ add_vector(b,vec,cols);
+
+ line=setup_line(in);
+ }
+ fclose(in);
+ }
+ }
+ }
+ dim=b->dim;
+ entries=b->entries;
+
+ if(target==-1){
+ fprintf(stderr,"Target number of cells required on command line\n");
+ exit(1);
+ }
+
+ /* set up auxiliary vectors for error tracking */
+ {
+ long *membership=malloc(points*sizeof(long));
+ long *cellhead=malloc(entries*sizeof(long));
+ double *cellerror1=calloc(entries,sizeof(double)); /* error for
+ firstentries */
+ double *cellerror2=calloc(entries,sizeof(double)); /* error for
+ secondentry */
+ double globalerror=0.;
+ long cellsleft=entries;
+ for(i=0;i<points;i++)membership[i]=-1;
+ for(i=0;i<entries;i++)cellhead[i]=-1;
+
+ for(i=0;i<points;i++){
+ /* assign vectors to the nearest cell. Also keep track of second
+ nearest for error statistics */
+ double *ppt=pointlist+i*dim;
+ int firstentry=_best(b,ppt,1);
+ int secondentry=secondbest(b,ppt,firstentry);
+
+ double firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt);
+ double secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
+
+ if(!(i&0xff))spinnit("initializing... ",points-i);
+
+ membership[i]=cellhead[firstentry];
+ cellhead[firstentry]=i;
+
+ cellerror1[firstentry]+=firstmetric;
+ globalerror+=firstmetric;
+ cellerror2[firstentry]+=secondmetric;
+
+ }
+
+ while(cellsleft>target){
+ int bestcell=-1;
+ double besterror=0;
+ long head=-1;
+ spinnit("cells left to eliminate... ",cellsleft-target);
+
+ /* find the cell with lowest removal impact */
+ for(i=0;i<entries;i++){
+ if(b->c->lengthlist[i]>0){
+ double thiserror=globalerror-cellerror1[i]+cellerror2[i];
+ if(bestcell==-1 || besterror>thiserror){
+ besterror=thiserror;
+ bestcell=i;
+ }
+ }
+ }
+
+ /* disperse it. move each point out, adding it (properly) to
+ the second best */
+ b->c->lengthlist[bestcell]=0;
+ head=cellhead[bestcell];
+ cellhead[bestcell]=-1;
+ while(head!=-1){
+ /* head is a point number */
+ double *ppt=pointlist+head*dim;
+ int newentry=secondbest(b,ppt,bestcell);
+ int secondentry=secondbest(b,pointlist+head*dim,newentry);
+ double firstmetric=_dist(dim,b->valuelist+dim*newentry,ppt);
+ double secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
+ long next=membership[head];
+ cellerror1[newentry]+=firstmetric;
+ cellerror2[newentry]+=secondmetric;
+
+ membership[head]=cellhead[newentry];
+ cellhead[newentry]=head;
+ head=next;
+ }
+
+ cellsleft--;
+ }
+ free(membership);
+ free(cellhead);
+ free(cellerror1);
+ free(cellerror2);
+ }
+
+ for(i=0;i<entries;i++)
+ fprintf(stderr,"%ld, ",b->c->lengthlist[i]);
+
+ /* paring is over. Build decision trees using points that now fall
+ through the thresh matcher */
+
+
+
+ fprintf(stderr,"\r \nDone.");
+ return(0);
+}
+
+
+
+
diff --git a/vq/metrics.c b/vq/metrics.c
index 90e19ea9..201521f4 100644
--- a/vq/metrics.c
+++ b/vq/metrics.c
@@ -12,7 +12,7 @@
********************************************************************
function: function calls to collect codebook metrics
- last mod: $Id: metrics.c,v 1.6.4.2 2000/04/21 16:35:40 xiphmont Exp $
+ last mod: $Id: metrics.c,v 1.6.4.3 2000/04/26 07:10:16 xiphmont Exp $
********************************************************************/
@@ -24,57 +24,63 @@
#include "../lib/sharedbook.h"
#include "bookutil.h"
+/* collect the following metrics:
+
+ mean and mean squared amplitude
+ mean and mean squared error
+ mean and mean squared error (per sample) by entry
+ worst case fit by entry
+ entry cell size
+ hits by entry
+ total bits
+ total samples
+ (average bits per sample)*/
+
+
/* set up metrics */
double meanamplitude_acc=0.;
double meanamplitudesq_acc=0.;
double meanerror_acc=0.;
double meanerrorsq_acc=0.;
-double meandev_acc=0.;
-
-double *histogram=NULL;
-double *histogram_error=NULL;
-double *histogram_errorsq=NULL;
-double *histogram_distance=NULL;
-double *histogram_hi=NULL;
-double *histogram_lo=NULL;
+double **histogram=NULL;
+double **histogram_error=NULL;
+double **histogram_errorsq=NULL;
+double **histogram_hi=NULL;
+double **histogram_lo=NULL;
+double bits=0.;
double count=0.;
-int books=0;
-int lastdim=-1;
-
static double *_now(codebook *c, int i){
return c->valuelist+i*c->c->dim;
}
-int histerrsort(const void *a, const void *b){
- double av=histogram_distance[*((long *)a)];
- double bv=histogram_distance[*((long *)b)];
- if(av<bv)return(-1);
- return(1);
-}
+int books=0;
void process_preprocess(codebook **bs,char *basename){
- while(bs[books]){
- codebook *b=bs[books];
- lastdim=b->c->dim;
- books++;
- }
-
-
+ int i;
+ while(bs[books])books++;
+
if(books){
- const static_codebook *b=bs[books-1]->c;
- histogram=calloc(b->entries,sizeof(double));
- histogram_distance=calloc(b->entries,sizeof(double));
- histogram_errorsq=calloc(b->entries*lastdim,sizeof(double));
- histogram_error=calloc(b->entries*lastdim,sizeof(double));
- histogram_hi=calloc(b->entries*lastdim,sizeof(double));
- histogram_lo=calloc(b->entries*lastdim,sizeof(double));
+ histogram=calloc(books,sizeof(double *));
+ histogram_error=calloc(books,sizeof(double *));
+ histogram_errorsq=calloc(books,sizeof(double *));
+ histogram_hi=calloc(books,sizeof(double *));
+ histogram_lo=calloc(books,sizeof(double *));
}else{
fprintf(stderr,"Specify at least one codebook\n");
exit(1);
}
+
+ for(i=0;i<books;i++){
+ codebook *b=bs[i];
+ histogram[i]=calloc(b->entries,sizeof(double));
+ histogram_error[i]=calloc(b->entries*b->dim,sizeof(double));
+ histogram_errorsq[i]=calloc(b->entries*b->dim,sizeof(double));
+ histogram_hi[i]=calloc(b->entries*b->dim,sizeof(double));
+ histogram_lo[i]=calloc(b->entries*b->dim,sizeof(double));
+ }
}
static double _dist(int el,double *a, double *b){
@@ -87,219 +93,181 @@ static double _dist(int el,double *a, double *b){
return acc;
}
-void cell_spacing(codebook **b){
- int i,j,k;
- for(i=0;i<books;i++){
- double min,max,mean=0.,meansq=0.;
- codebook *c=b[i];
-
- fprintf(stderr,"\nCell spacing for book %d:\n",i);
-
- /* minimum, maximum, mean, ms cell spacing */
- for(j=0;j<c->c->entries;j++){
- double localmin=-1.;
- for(k=0;k<c->c->entries;k++){
- double this=_dist(c->c->dim,_now(c,j),_now(c,k));
- if(j!=k &&
- (localmin==-1 || this<localmin))
- localmin=this;
- }
-
- if(j==0 || localmin<min)min=localmin;
- if(j==0 || localmin>max)max=localmin;
- mean+=sqrt(localmin);
- meansq+=localmin;
+void cell_spacing(codebook *c){
+ int j,k;
+ double min,max,mean=0.,meansq=0.;
+
+ /* minimum, maximum, mean, ms cell spacing */
+ for(j=0;j<c->c->entries;j++){
+ double localmin=-1.;
+ for(k=0;k<c->c->entries;k++){
+ double this=_dist(c->c->dim,_now(c,j),_now(c,k));
+ if(j!=k &&
+ (localmin==-1 || this<localmin))
+ localmin=this;
}
-
- fprintf(stderr,"\tminimum cell spacing (closest side): %g\n",sqrt(min));
- fprintf(stderr,"\tmaximum cell spacing (closest side): %g\n",sqrt(max));
- fprintf(stderr,"\tmean closest side spacing: %g\n",mean/c->c->entries);
- fprintf(stderr,"\tmean sq closest side spacing: %g\n",sqrt(meansq/c->c->entries));
+
+ if(j==0 || localmin<min)min=localmin;
+ if(j==0 || localmin>max)max=localmin;
+ mean+=sqrt(localmin);
+ meansq+=localmin;
}
+
+ fprintf(stderr,"\tminimum cell spacing (closest side): %g\n",sqrt(min));
+ fprintf(stderr,"\tmaximum cell spacing (closest side): %g\n",sqrt(max));
+ fprintf(stderr,"\tmean closest side spacing: %g\n",mean/c->c->entries);
+ fprintf(stderr,"\tmean sq closest side spacing: %g\n",sqrt(meansq/c->c->entries));
}
-void process_postprocess(codebook **b,char *basename){
- int i,j,k;
+void process_postprocess(codebook **bs,char *basename){
+ int i,k,book;
char *buffer=alloca(strlen(basename)+80);
- codebook *bb=b[books-1];
- fprintf(stderr,"Done. Processed %ld data points:\n",
+ fprintf(stderr,"Done. Processed %ld data points:\n\n",
(long)count);
- fprintf(stderr,"\tglobal mean amplitude: %g\n",
- meanamplitude_acc/(count*lastdim));
- fprintf(stderr,"\tglobal mean squared amplitude: %g\n",
- sqrt(meanamplitudesq_acc/(count*lastdim)));
- fprintf(stderr,"\tglobal mean error: %g\n",
- meanerror_acc/(count*lastdim));
- fprintf(stderr,"\tglobal mean squared error: %g\n",
- sqrt(meanerrorsq_acc/(count*lastdim)));
- fprintf(stderr,"\tglobal mean deviation: %g\n",
- meandev_acc/(count*lastdim));
+ fprintf(stderr,"Global statistics:******************\n\n");
+
+ fprintf(stderr,"\ttotal samples: %ld\n",(long)count);
+ fprintf(stderr,"\ttotal bits required to code: %ld\n",(long)bits);
+ fprintf(stderr,"\taverage bits per sample: %g\n\n",bits/count);
- cell_spacing(b);
+ fprintf(stderr,"\tmean sample amplitude: %g\n",
+ meanamplitude_acc/count);
+ fprintf(stderr,"\tmean squared sample amplitude: %g\n\n",
+ sqrt(meanamplitudesq_acc/count));
- {
+ fprintf(stderr,"\tmean code error: %g\n",
+ meanerror_acc/count);
+ fprintf(stderr,"\tmean squared code error: %g\n\n",
+ sqrt(meanerrorsq_acc/count));
+
+ for(book=0;book<books;book++){
FILE *out;
+ codebook *b=bs[book];
+ int n=b->c->entries;
+ int dim=b->c->dim;
+
+ fprintf(stderr,"Book %d statistics:------------------\n",book);
- sprintf(buffer,"%s-mse.m",basename);
+ cell_spacing(b);
+
+ sprintf(buffer,"%s-%d-mse.m",basename,book);
out=fopen(buffer,"w");
if(!out){
fprintf(stderr,"Could not open file %s for writing\n",buffer);
exit(1);
}
-
- for(i=0;i<bb->c->entries;i++){
- for(k=0;k<bb->c->dim;k++){
- fprintf(out,"%ld, %g, %g\n",
- i*bb->c->dim+k,(bb->valuelist+i*bb->c->dim)[k],
- sqrt((histogram_errorsq+i*bb->c->dim)[k]/histogram[i]));
+
+ for(i=0;i<n;i++){
+ for(k=0;k<dim;k++){
+ fprintf(out,"%d, %g, %g\n",
+ i*dim+k,(b->valuelist+i*dim)[k],
+ sqrt((histogram_errorsq[book]+i*dim)[k]/histogram[book][i]));
}
}
fclose(out);
-
- sprintf(buffer,"%s-me.m",basename);
+
+ sprintf(buffer,"%s-%d-me.m",basename,book);
out=fopen(buffer,"w");
if(!out){
fprintf(stderr,"Could not open file %s for writing\n",buffer);
exit(1);
}
-
- for(i=0;i<bb->c->entries;i++){
- for(k=0;k<bb->c->dim;k++){
- fprintf(out,"%ld, %g, %g\n",
- i*bb->c->dim+k,(bb->valuelist+i*bb->c->dim)[k],
- (histogram_error+i*bb->c->dim)[k]/histogram[i]);
+
+ for(i=0;i<n;i++){
+ for(k=0;k<dim;k++){
+ fprintf(out,"%d, %g, %g\n",
+ i*dim+k,(b->valuelist+i*dim)[k],
+ (histogram_error[book]+i*dim)[k]/histogram[book][i]);
}
}
fclose(out);
- sprintf(buffer,"%s-worst.m",basename);
+ sprintf(buffer,"%s-%d-worst.m",basename,book);
out=fopen(buffer,"w");
if(!out){
fprintf(stderr,"Could not open file %s for writing\n",buffer);
exit(1);
}
-
- for(i=0;i<bb->c->entries;i++){
- for(k=0;k<bb->c->dim;k++){
- fprintf(out,"%ld, %g, %g, %g\n",
- i*bb->c->dim+k,(bb->valuelist+i*bb->c->dim)[k],
- (bb->valuelist+i*bb->c->dim)[k]+(histogram_lo+i*bb->c->dim)[k],
- (bb->valuelist+i*bb->c->dim)[k]+(histogram_hi+i*bb->c->dim)[k]);
+
+ for(i=0;i<n;i++){
+ for(k=0;k<dim;k++){
+ fprintf(out,"%d, %g, %g, %g\n",
+ i*dim+k,(b->valuelist+i*dim)[k],
+ (b->valuelist+i*dim)[k]+(histogram_lo[book]+i*dim)[k],
+ (b->valuelist+i*dim)[k]+(histogram_hi[book]+i*dim)[k]);
}
}
fclose(out);
}
+}
- {
- FILE *out;
- long *index=alloca(sizeof(long)*bb->c->entries);
- sprintf(buffer,"%s-distance.m",basename);
- out=fopen(buffer,"w");
- if(!out){
- fprintf(stderr,"Could not open file %s for writing\n",buffer);
- exit(1);
- }
- for(j=0;j<bb->c->entries;j++){
- if(histogram[j])histogram_distance[j]/=histogram[j];
- index[j]=j;
+void process_one(codebook *b,int book,double *a,int dim,int step,int addmul){
+ int j,entry;
+ double base=0.;
+ double amplitude=0.;
+
+ if(book==0)
+ for(j=0;j<dim;j++){
+ if(b->c->q_sequencep){
+ amplitude=a[j*step]-base;
+ base=a[j*step];
+ }else
+ amplitude=a[j*step];
+ meanamplitude_acc+=fabs(amplitude);
+ meanamplitudesq_acc+=amplitude*amplitude;
+ count++;
}
- qsort(index,bb->c->entries,sizeof(long),histerrsort);
+ entry=vorbis_book_besterror(b,a,step,addmul);
+
+ histogram[book][entry]++;
+ bits+=vorbis_book_codelen(b,entry);
+
+ for(j=0;j<dim;j++){
+ double error=a[j*step];
- for(j=0;j<bb->c->entries;j++)
- for(k=0;k<histogram[index[j]];k++)
- fprintf(out,"%g,\n",histogram_distance[index[j]]);
- fclose(out);
-
+ if(book==books-1){
+ meanerror_acc+=fabs(error);
+ meanerrorsq_acc+=error*error;
+ }
+ histogram_errorsq[book][entry*dim+j]+=error*error;
+ histogram_error[book][entry*dim+j]+=fabs(error);
+ if(histogram[book][entry]==0 || histogram_hi[book][entry*dim+j]<error)
+ histogram_hi[book][entry*dim+j]=error;
+ if(histogram[book][entry]==0 || histogram_lo[book][entry*dim+j]>error)
+ histogram_lo[book][entry*dim+j]=error;
}
}
+
void process_vector(codebook **bs,int *addmul,int inter,double *a,int n){
int bi;
- int i,j;
- double base=0.;
- double *work=alloca(sizeof(double)*n);
- double amplitude=0.;
-
- memcpy(work,a,sizeof(double)*n);
+ int i;
for(bi=0;bi<books;bi++){
codebook *b=bs[bi];
int dim=b->dim;
-
+
if(inter){
- for(i=0;i<n/dim;i++){
- int entry=vorbis_book_besterror(b,work+i,n/dim,addmul[bi]);
-
- if(bi==books-1){
- double distance=0;
- histogram[entry]++;
-
- for(j=0;j<dim;j++){
- double error=work[i+j*(n/dim)];
- histogram_errorsq[entry*dim+j]+=error*error;
- histogram_error[entry*dim+j]+=fabs(error);
- if(histogram[entry]==0 || histogram_hi[entry*dim+j]<error)
- histogram_hi[entry*dim+j]=error;
- if(histogram[entry]==0 || histogram_lo[entry*dim+j]>error)
- histogram_lo[entry*dim+j]=error;
- distance+=error*error;
- }
- histogram_distance[entry]+=sqrt(distance);
- }
- }
+ for(i=0;i<n/dim;i++)
+ process_one(b,bi,a+i,dim,n/dim,addmul[bi]);
}else{
- for(i=0;i<=n-dim;i+=dim){
- int entry=vorbis_book_besterror(b,work+i,1,addmul[bi]);
- if(bi==books-1){
- double distance=0;
- histogram[entry]++;
-
- for(j=0;j<dim;j++){
- double error=work[i+j];
- histogram_errorsq[entry*dim+j]+=error*error;
- histogram_error[entry*dim+j]+=fabs(error);
- if(histogram[entry]==0 || histogram_hi[entry*dim+j]<error)
- histogram_hi[entry*dim+j]=error;
- if(histogram[entry]==0 || histogram_lo[entry*dim+j]>error)
- histogram_lo[entry*dim+j]=error;
- distance+=error*error;
- }
- histogram_distance[entry]+=sqrt(distance);
- }
- }
+ for(i=0;i<=n-dim;i+=dim)
+ process_one(b,bi,a+i,dim,1,addmul[bi]);
}
}
-
- for(i=0;i<n;i++){
- double error=work[i];
- if(bs[0]->c->q_sequencep){
- amplitude=a[i]-base;
- base=a[i];
- }else
- amplitude=a[i];
-
- meanamplitude_acc+=fabs(amplitude);
- meanamplitudesq_acc+=amplitude*amplitude;
- meanerror_acc+=fabs(error);
- meanerrorsq_acc+=error*error;
-
- if(amplitude)
- meandev_acc+=fabs(error/amplitude);
- else
- meandev_acc+=fabs(error); /* yeah, yeah */
- }
-
- if((long)(count++)%100)spinnit("working.... lines: ",count);
+
+ if((long)(count)%100)spinnit("working.... samples: ",count);
}
void process_usage(void){
fprintf(stderr,
- "usage: vqmetrics +|*<codebook>.vqh [ +|*<codebook.vqh> ]... \n"
+ "usage: vqmetrics [-i] +|*<codebook>.vqh [ +|*<codebook.vqh> ]... \n"
" datafile.vqd [datafile.vqd]...\n\n"
- " data can be taken on stdin. Output goes to output files:\n"
+ " data can be taken on stdin. -i indicates interleaved coding.\n"
+ " Output goes to output files:\n"
" basename-me.m: gnuplot: mean error by entry value\n"
" basename-mse.m: gnuplot: mean square error by entry value\n"
" basename-worst.m: gnuplot: worst error by entry value\n"
diff --git a/vq/run.c b/vq/run.c
index d0fc7339..afe1ebf8 100644
--- a/vq/run.c
+++ b/vq/run.c
@@ -12,7 +12,7 @@
********************************************************************
function: utility main for loading and operating on codebooks
- last mod: $Id: run.c,v 1.9.4.1 2000/04/21 16:35:40 xiphmont Exp $
+ last mod: $Id: run.c,v 1.9.4.2 2000/04/26 07:10:16 xiphmont Exp $
********************************************************************/
@@ -140,7 +140,7 @@ int main(int argc,char *argv[]){
}
reset_next_value();
- line=get_line(in);
+ line=setup_line(in);
/* count cols before we start reading */
{
char *temp=line;
@@ -161,8 +161,7 @@ int main(int argc,char *argv[]){
/* ignores -s for now */
process_vector(b,addmul,interleave,vec,cols);
- reset_next_value();
- line=get_line(in);
+ line=setup_line(in);
}
fclose(in);
}