summaryrefslogtreecommitdiff
path: root/vq/train.c
diff options
context:
space:
mode:
Diffstat (limited to 'vq/train.c')
-rw-r--r--vq/train.c362
1 files changed, 362 insertions, 0 deletions
diff --git a/vq/train.c b/vq/train.c
new file mode 100644
index 00000000..833a12cb
--- /dev/null
+++ b/vq/train.c
@@ -0,0 +1,362 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
+ * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
+ * PLEASE READ THESE TERMS DISTRIBUTING. *
+ * *
+ * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
+ * by Monty <monty@xiph.org> and The XIPHOPHORUS Company *
+ * http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: utility main for training codebooks
+ last mod: $Id: train.c,v 1.18.6.1 2000/08/31 09:00:03 xiphmont Exp $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include "vqgen.h"
+#include "vqext.h"
+#include "bookutil.h"
+
+static char *rline(FILE *in,FILE *out,int pass){
+ while(1){
+ char *line=get_line(in);
+ if(line && line[0]=='#'){
+ if(pass)fprintf(out,"%s\n",line);
+ }else{
+ return(line);
+ }
+ }
+}
+
+/* command line:
+ trainvq vqfile [options] trainfile [trainfile]
+
+ options: -params entries,dim,quant
+ -subvector start[,num]
+ -error desired_error
+ -iterations iterations
+*/
+
+static void usage(void){
+ fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n"
+ "<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n"
+ "options: -p[arams] <entries,dim,quant>\n"
+ " -s[ubvector] <start[,num]>\n"
+ " -e[rror] <desired_error>\n"
+ " -i[terations] <maxiterations>\n"
+ " -d[istance] quantization mesh spacing for density limitation\n"
+ " -b <dummy> eliminate cell size biasing; use normal LBG\n\n"
+ " -c <dummy> Use centroid (not median) midpoints\n"
+
+ "examples:\n"
+ " train a new codebook to 1%% tolerance on datafile 'foo':\n"
+ " xxxvqtrain book -p 256,6,8 -e .01 foo\n"
+ " (produces a trained set in book-0.vqi)\n\n"
+ " continue training 'book-0.vqi' (produces book-1.vqi):\n"
+ " xxxvqtrain book-0.vqi\n\n"
+ " add subvector from element 1 to <dimension> from files\n"
+ " data*.m to the training in progress, prodicing book-1.vqi:\n"
+ " xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype);
+}
+
+int exiting=0;
+void setexit(int dummy){
+ fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
+ exiting=1;
+}
+
+int main(int argc,char *argv[]){
+ vqgen v;
+
+ int entries=-1,dim=-1;
+ int start=0,num=-1;
+ float desired=.05,mindist=0.;
+ int iter=1000;
+ int biasp=1;
+ int centroid=0;
+
+ FILE *out=NULL;
+ char *line;
+ long i,j,k;
+ int init=0;
+ q.quant=-1;
+
+ argv++;
+ if(!*argv){
+ usage();
+ exit(0);
+ }
+
+ /* get the book name, a preexisting book to continue training */
+ {
+ FILE *in=NULL;
+ char *filename=alloca(strlen(*argv)+30),*ptr;
+
+ strcpy(filename,*argv);
+ in=fopen(filename,"r");
+ ptr=strrchr(filename,'-');
+ if(ptr){
+ int num;
+ ptr++;
+ num=atoi(ptr);
+ sprintf(ptr,"%d.vqi",num+1);
+ }else
+ strcat(filename,"-0.vqi");
+
+ out=fopen(filename,"w");
+ if(out==NULL){
+ fprintf(stderr,"Unable to open %s for writing\n",filename);
+ exit(1);
+ }
+
+ if(in){
+ /* we wish to suck in a preexisting book and continue to train it */
+ float a;
+
+ line=rline(in,out,1);
+ if(strcmp(line,vqext_booktype)){
+ fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
+ exit(1);
+ }
+
+ line=rline(in,out,1);
+ if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){
+ fprintf(stderr,"Syntax error reading book file\n");
+ exit(1);
+ }
+
+ vqgen_init(&v,dim,vqext_aux,entries,mindist,
+ vqext_metric,vqext_weight,centroid);
+ init=1;
+
+ /* quant setup */
+ line=rline(in,out,1);
+ if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
+ &q.quant,&q.sequencep)!=4){
+ fprintf(stderr,"Syntax error reading book file\n");
+ exit(1);
+ }
+
+ /* quantized entries */
+ i=0;
+ for(j=0;j<entries;j++){
+ for(k=0;k<dim;k++){
+ line=rline(in,out,0);
+ sscanf(line,"%lf",&a);
+ v.entrylist[i++]=a;
+ }
+ }
+ vqgen_unquantize(&v,&q);
+
+ /* bias */
+ i=0;
+ for(j=0;j<entries;j++){
+ line=rline(in,out,0);
+ sscanf(line,"%lf",&a);
+ v.bias[i++]=a;
+ }
+
+ v.seeded=1;
+ {
+ float *b=alloca((dim+vqext_aux)*sizeof(float));
+ i=0;
+ while(1){
+ for(k=0;k<dim+vqext_aux;k++){
+ line=rline(in,out,0);
+ if(!line)break;
+ sscanf(line,"%lf",b+k);
+ }
+ if(feof(in))break;
+ vqgen_addpoint(&v,b,b+dim);
+ }
+ }
+
+ fclose(in);
+ }
+ }
+
+ /* get the rest... */
+ argv=argv++;
+ while(*argv){
+ if(argv[0][0]=='-'){
+ /* it's an option */
+ if(!argv[1]){
+ fprintf(stderr,"Option %s missing argument.\n",argv[0]);
+ exit(1);
+ }
+ switch(argv[0][1]){
+ case 'p':
+ if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
+ goto syner;
+ break;
+ case 's':
+ if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
+ num= -1;
+ if(sscanf(argv[1],"%d",&start)!=1)
+ goto syner;
+ }
+ break;
+ case 'e':
+ if(sscanf(argv[1],"%lf",&desired)!=1)
+ goto syner;
+ break;
+ case 'd':
+ if(sscanf(argv[1],"%lf",&mindist)!=1)
+ goto syner;
+ if(init)v.mindist=mindist;
+ break;
+ case 'i':
+ if(sscanf(argv[1],"%d",&iter)!=1)
+ goto syner;
+ break;
+ case 'b':
+ biasp=0;
+ break;
+ case 'c':
+ centroid=1;
+ break;
+ default:
+ fprintf(stderr,"Unknown option %s\n",argv[0]);
+ exit(1);
+ }
+ argv+=2;
+ }else{
+ /* it's an input file */
+ char *file=strdup(*argv++);
+ FILE *in;
+ int cols=-1;
+
+ if(!init){
+ if(dim==-1 || entries==-1 || q.quant==-1){
+ fprintf(stderr,"-p required when training a new set\n");
+ exit(1);
+ }
+ vqgen_init(&v,dim,vqext_aux,entries,mindist,
+ vqext_metric,vqext_weight,centroid);
+ init=1;
+ }
+
+ in=fopen(file,"r");
+ if(in==NULL){
+ fprintf(stderr,"Could not open input file %s\n",file);
+ exit(1);
+ }
+ fprintf(out,"# training file entry: %s\n",file);
+
+ while((line=rline(in,out,0))){
+ if(cols==-1){
+ char *temp=line;
+ while(*temp==' ')temp++;
+ for(cols=0;*temp;cols++){
+ while(*temp>32)temp++;
+ while(*temp==' ')temp++;
+ }
+ }
+ {
+ int i;
+ float b[cols];
+ if(start+num*dim>cols){
+ fprintf(stderr,"ran out of columns reading %s\n",file);
+ exit(1);
+ }
+ while(*line==' ')line++;
+ for(i=0;i<cols;i++){
+
+ /* static length buffer bug workaround */
+ char *temp=line;
+ char old;
+ while(*temp>32)temp++;
+
+ old=temp[0];
+ temp[0]='\0';
+ b[i]=atof(line);
+ temp[0]=old;
+
+ while(*line>32)line++;
+ while(*line==' ')line++;
+ }
+ if(num<=0)num=(cols-start)/dim;
+ for(i=0;i<num;i++)
+ vqext_addpoint_adj(&v,b,start+i*dim,dim,cols,num);
+
+ }
+ }
+ fclose(in);
+ }
+ }
+
+ if(!init){
+ fprintf(stderr,"No input files!\n");
+ exit(1);
+ }
+
+ vqext_preprocess(&v);
+
+ /* train the book */
+ signal(SIGTERM,setexit);
+ signal(SIGINT,setexit);
+
+ for(i=0;i<iter && !exiting;i++){
+ float result;
+ if(i!=0){
+ vqgen_unquantize(&v,&q);
+ vqgen_cellmetric(&v);
+ }
+ result=vqgen_iterate(&v,biasp);
+ vqext_quantize(&v,&q);
+ if(result<desired)break;
+ }
+
+ /* save the book */
+
+ fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
+ fprintf(out,"%s\n",vqext_booktype);
+ fprintf(out,"%d %d %d\n",entries,dim,vqext_aux);
+ fprintf(out,"%ld %ld %d %d\n",
+ q.min,q.delta,q.quant,q.sequencep);
+
+ /* quantized entries */
+ fprintf(out,"# quantized entries---\n");
+ i=0;
+ for(j=0;j<entries;j++)
+ for(k=0;k<dim;k++)
+ fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
+
+ fprintf(out,"# biases---\n");
+ i=0;
+ for(j=0;j<entries;j++)
+ fprintf(out,"%f\n",v.bias[i++]);
+
+ /* we may have done the density limiting mesh trick; refetch the
+ training points from the temp file */
+
+ rewind(v.asciipoints);
+ fprintf(out,"# points---\n");
+ {
+ /* sloppy, no error handling */
+ long bytes;
+ char buff[4096];
+ while((bytes=fread(buff,1,4096,v.asciipoints)))
+ while(bytes)bytes-=fwrite(buff,1,bytes,out);
+ }
+
+ fclose(out);
+ fclose(v.asciipoints);
+
+ vqgen_unquantize(&v,&q);
+ vqgen_cellmetric(&v);
+ exit(0);
+
+ syner:
+ fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
+ exit(1);
+}