From 653c6faab591f3c5f290a18a7f58c29cb1a3b0a7 Mon Sep 17 00:00:00 2001
From: wlemb <wlemb>
Date: Sun, 6 Feb 2000 09:34:01 +0000
Subject: Initial revision

---
 src/utils/indxbib/Makefile.sub |  30 ++
 src/utils/indxbib/dirnamemax.c |  49 +++
 src/utils/indxbib/eign         | 133 ++++++++
 src/utils/indxbib/indxbib.cc   | 744 +++++++++++++++++++++++++++++++++++++++++
 src/utils/indxbib/indxbib.man  | 204 +++++++++++
 src/utils/indxbib/signal.c     |  63 ++++
 6 files changed, 1223 insertions(+)
 create mode 100644 src/utils/indxbib/Makefile.sub
 create mode 100755 src/utils/indxbib/dirnamemax.c
 create mode 100644 src/utils/indxbib/eign
 create mode 100644 src/utils/indxbib/indxbib.cc
 create mode 100644 src/utils/indxbib/indxbib.man
 create mode 100644 src/utils/indxbib/signal.c

(limited to 'src/utils/indxbib')
diff --git a/src/utils/indxbib/Makefile.sub b/src/utils/indxbib/Makefile.sub
new file mode 100644
index 00000000..2c50e659
--- /dev/null
+++ b/src/utils/indxbib/Makefile.sub
@@ -0,0 +1,30 @@
+PROG=indxbib
+MAN1=indxbib.n
+XLIBS=$(LIBBIB) $(LIBGROFF)
+MLIB=$(LIBM)
+OBJS=\
+  indxbib.o \
+  dirnamemax.o \
+  signal.o
+CCSRCS=\
+  $(srcdir)/indxbib.cc
+CSRCS=\
+  $(srcdir)/dirnamemax.c \
+  $(srcdir)/signal.c
+NAMEPREFIX=$(g)
+
+install_data: eign
+	-test -d $(datadir) || $(mkinstalldirs) $(datadir)
+	-test -d $(datasubdir) || $(mkinstalldirs) $(datasubdir)
+	if test -f /usr/lib/eign; then \
+	  rm -f $(common_words_file); \
+	  ln -s /usr/lib/eign $(common_words_file) 2>/dev/null \
+	  || ln /usr/lib/eign $(common_words_file) 2>/dev/null \
+	  || cp /usr/lib/eign $(common_words_file); \
+	else \
+	  rm -f $(common_words_file); \
+	  $(INSTALL_DATA) $(srcdir)/eign $(common_words_file); \
+	fi
+
+uninstall_sub:
+	-rm -f $(common_words_file)
diff --git a/src/utils/indxbib/dirnamemax.c b/src/utils/indxbib/dirnamemax.c
new file mode 100755
index 00000000..a8cd9923
--- /dev/null
+++ b/src/utils/indxbib/dirnamemax.c
@@ -0,0 +1,49 @@
+/* dir_name_max(dir) does the same as pathconf(dir, _PC_NAME_MAX) */
+
+#include <sys/types.h>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+
+#ifdef _POSIX_VERSION
+
+long dir_name_max(dir)
+     char *dir;
+{
+  return pathconf(dir, _PC_NAME_MAX);
+}
+
+#else /* not _POSIX_VERSION */
+
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif /* HAVE_LIMITS_H */
+
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#else /* not HAVE_DIRENT_H */
+#ifdef HAVE_SYS_DIR_H
+#include <sys/dir.h>
+#endif /* HAVE_SYS_DIR_H */
+#endif /* not HAVE_DIRENT_H */
+
+#ifndef NAME_MAX
+#ifdef MAXNAMLEN
+#define NAME_MAX MAXNAMLEN
+#else /* !MAXNAMLEN */
+#ifdef MAXNAMELEN
+#define NAME_MAX MAXNAMELEN
+#else /* !MAXNAMELEN */
+#define NAME_MAX 14
+#endif /* !MAXNAMELEN */
+#endif /* !MAXNAMLEN */
+#endif /* !NAME_MAX */
+
+long dir_name_max(dir)
+     char *dir;
+{
+  return NAME_MAX;
+}
+
+#endif /* not _POSIX_VERSION */
diff --git a/src/utils/indxbib/eign b/src/utils/indxbib/eign
new file mode 100644
index 00000000..7718c8b1
--- /dev/null
+++ b/src/utils/indxbib/eign
@@ -0,0 +1,133 @@
+a
+i
+the
+to
+of
+and
+in
+is
+it
+for
+that
+if
+you
+this
+be
+on
+with
+not
+have
+are
+or
+as
+from
+can
+but
+by
+at
+an
+will
+no
+all
+was
+do
+there
+my
+one
+so
+we
+they
+what
+would
+any
+which
+about
+get
+your
+use
+some
+me
+then
+name
+like
+out
+when
+up
+time
+other
+more
+only
+just
+end
+also
+know
+how
+new
+should
+been
+than
+them
+he
+who
+make
+may
+people
+these
+now
+their
+here
+into
+first
+could
+way
+had
+see
+work
+well
+were
+two
+very
+where
+while
+us
+because
+good
+same
+even
+much
+most
+many
+such
+long
+his
+over
+last
+since
+right
+before
+our
+without
+too
+those
+why
+must
+part
+being
+current
+back
+still
+go
+point
+value
+each
+did
+both
+true
+off
+say
+another
+state
+might
+under
+start
+try
diff --git a/src/utils/indxbib/indxbib.cc b/src/utils/indxbib/indxbib.cc
new file mode 100644
index 00000000..c22190f5
--- /dev/null
+++ b/src/utils/indxbib/indxbib.cc
@@ -0,0 +1,744 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING.  If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "posix.h"
+#include "lib.h"
+#include "errarg.h"
+#include "error.h"
+#include "stringclass.h"
+#include "cset.h"
+#include "cmap.h"
+
+#include "defs.h"
+#include "index.h"
+
+extern "C" {
+  // Sun's stdlib.h fails to declare this.
+  char *mktemp(char *);
+}
+
+#define DEFAULT_HASH_TABLE_SIZE 997
+#define TEMP_INDEX_TEMPLATE "indxbibXXXXXX"
+
+// (2^n - MALLOC_OVERHEAD) should be a good argument for malloc().
+
+#define MALLOC_OVERHEAD 16
+
+#ifdef BLOCK_SIZE
+#undef BLOCK_SIZE
+#endif
+
+const int BLOCK_SIZE = ((1024 - MALLOC_OVERHEAD - sizeof(struct block *)
+			 - sizeof(int)) / sizeof(int));
+struct block {
+  block *next;
+  int used;
+  int v[BLOCK_SIZE];
+  
+  block(block *p = 0) : next(p), used(0) { }
+};
+
+struct block;
+
+union table_entry {
+  block *ptr;
+  int count;
+};
+
+struct word_list {
+  word_list *next;
+  char *str;
+  int len;
+  word_list(const char *, int, word_list *);
+};
+
+table_entry *hash_table;
+int hash_table_size = DEFAULT_HASH_TABLE_SIZE;
+// We make this the same size as hash_table so we only have to do one
+// mod per key.
+static word_list **common_words_table = 0;
+char *key_buffer;
+
+FILE *indxfp;
+int ntags = 0;
+string filenames;
+char *temp_index_file = 0;
+
+const char *ignore_fields = "XYZ";
+const char *common_words_file = COMMON_WORDS_FILE;
+int n_ignore_words = 100;
+int truncate_len = 6;
+int shortest_len = 3;
+int max_keys_per_item = 100;
+
+static void usage();
+static void write_hash_table();
+static void init_hash_table();
+static void read_common_words_file();
+static int store_key(char *s, int len);
+static void possibly_store_key(char *s, int len);
+static int do_whole_file(const char *filename);
+static int do_file(const char *filename);
+static void store_reference(int filename_index, int pos, int len);
+static void check_integer_arg(char opt, const char *arg, int min, int *res);
+static void store_filename(const char *);
+static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp);
+static char *get_cwd();
+
+extern "C" {
+  void cleanup();
+  long dir_name_max(const char *);
+  void catch_fatal_signals();
+  void ignore_fatal_signals();
+}
+
+int main(int argc, char **argv)
+{
+  program_name = argv[0];
+  static char stderr_buf[BUFSIZ];
+  setbuf(stderr, stderr_buf);
+  
+  const char *basename = 0;
+  typedef int (*parser_t)(const char *);
+  parser_t parser = do_file;
+  const char *directory = 0;
+  const char *foption = 0;
+  int opt;
+  while ((opt = getopt(argc, argv, "c:o:h:i:k:l:t:n:c:d:f:vw")) != EOF)
+    switch (opt) {
+    case 'c':
+      common_words_file = optarg;
+      break;
+    case 'd':
+      directory = optarg;
+      break;
+    case 'f':
+      foption = optarg;
+      break;
+    case 'h':
+      check_integer_arg('h', optarg, 1, &hash_table_size);
+      if (!is_prime(hash_table_size)) {
+	while (!is_prime(++hash_table_size))
+	  ;
+	warning("%1 not prime: using %2 instead", optarg, hash_table_size);
+      }
+      break;
+    case 'i':
+      ignore_fields = optarg;
+      break;
+    case 'k':
+      check_integer_arg('k', optarg, 1, &max_keys_per_item);
+      break;
+    case 'l':
+      check_integer_arg('l', optarg, 0, &shortest_len);
+      break;
+    case 'n':
+      check_integer_arg('n', optarg, 0, &n_ignore_words);
+      break;
+    case 'o':
+      basename = optarg;
+      break;
+    case 't':
+      check_integer_arg('t', optarg, 1, &truncate_len);
+      break;
+    case 'w':
+      parser = do_whole_file;
+      break;
+    case 'v':
+      {
+	extern const char *Version_string;
+	fprintf(stderr, "GNU indxbib version %s\n", Version_string);
+	fflush(stderr);
+	break;
+      }
+    case '?':
+      usage();
+      break;
+    default:
+      assert(0);
+      break;
+    }
+  if (optind >= argc && foption == 0)
+    fatal("no files and no -f option");
+  if (!directory) {
+    char *path = get_cwd();
+    store_filename(path);
+    a_delete path;
+  }
+  else
+    store_filename(directory);
+  init_hash_table();
+  store_filename(common_words_file);
+  store_filename(ignore_fields);
+  key_buffer = new char[truncate_len];
+  read_common_words_file();
+  if (!basename)
+    basename = optind < argc ? argv[optind] : DEFAULT_INDEX_NAME;
+  const char *p = strrchr(basename, '/');
+  long name_max;
+  if (p) {
+    char *dir = strsave(basename);
+    dir[p - basename] = '\0';
+    name_max = dir_name_max(dir);
+    a_delete dir;
+  }
+  else
+    name_max = dir_name_max(".");
+  const char *filename = p ? p + 1 : basename;
+  if (name_max >= 0 && strlen(filename) + sizeof(INDEX_SUFFIX) - 1 > name_max)
+    fatal("`%1.%2' is too long for a filename", filename, INDEX_SUFFIX);
+  if (p) {
+    p++;
+    temp_index_file = new char[p - basename + sizeof(TEMP_INDEX_TEMPLATE)];
+    memcpy(temp_index_file, basename, p - basename);
+    strcpy(temp_index_file + (p - basename), TEMP_INDEX_TEMPLATE);
+  }
+  else {
+    temp_index_file = strsave(TEMP_INDEX_TEMPLATE);
+  }
+  if (!mktemp(temp_index_file) || !temp_index_file[0])
+    fatal("cannot create file name for temporary file");
+  catch_fatal_signals();
+  int fd = creat(temp_index_file, S_IRUSR|S_IRGRP|S_IROTH);
+  if (fd < 0)
+    fatal("can't create temporary index file: %1", strerror(errno));
+  indxfp = fdopen(fd, "w");
+  if (indxfp == 0)
+    fatal("fdopen failed");
+  if (fseek(indxfp, sizeof(index_header), 0) < 0)
+    fatal("can't seek past index header: %1", strerror(errno));
+  int failed = 0;
+  if (foption) {
+    FILE *fp = stdin;
+    if (strcmp(foption, "-") != 0) {
+      errno = 0;
+      fp = fopen(foption, "r");
+      if (!fp)
+	fatal("can't open `%1': %2", foption, strerror(errno));
+    }
+    string path;
+    int lineno = 1;
+    for (;;) {
+      int c;
+      for (c = getc(fp); c != '\n' && c != EOF; c = getc(fp)) {
+	if (c == '\0')
+	  error_with_file_and_line(foption, lineno,
+				   "nul character in pathname ignored");
+	else
+	  path += c;
+      }
+      if (path.length() > 0) {
+	path += '\0';
+	if (!(*parser)(path.contents()))
+	  failed = 1;
+	path.clear();
+      }
+      if (c == EOF)
+	break;
+      lineno++;
+    }
+    if (fp != stdin)
+      fclose(fp);
+  }
+  for (int i = optind; i < argc; i++)
+    if (!(*parser)(argv[i]))
+      failed = 1;
+  write_hash_table();
+  if (fclose(indxfp) < 0)
+    fatal("error closing temporary index file: %1", strerror(errno));
+  char *index_file = new char[strlen(basename) + sizeof(INDEX_SUFFIX)];    
+  strcpy(index_file, basename);
+  strcat(index_file, INDEX_SUFFIX);
+#ifdef HAVE_RENAME
+  if (rename(temp_index_file, index_file) < 0)
+    fatal("can't rename temporary index file: %1", strerror(errno));
+#else /* not HAVE_RENAME */
+  ignore_fatal_signals();
+  if (unlink(index_file) < 0) {
+    if (errno != ENOENT)
+      fatal("can't unlink `%1': %2", index_file, strerror(errno));
+  }
+  if (link(temp_index_file, index_file) < 0)
+    fatal("can't link temporary index file: %1", strerror(errno));
+  if (unlink(temp_index_file) < 0)
+    fatal("can't unlink temporary index file: %1", strerror(errno));
+#endif /* not HAVE_RENAME */
+  temp_index_file = 0;
+  return failed;
+}
+
+static void usage()
+{
+  fprintf(stderr,
+"usage: %s [-vw] [-c file] [-d dir] [-f file] [-h n] [-i XYZ] [-k n]\n"
+"       [-l n] [-n n] [-o base] [-t n] [files...]\n",
+	  program_name);
+  exit(1);
+}
+
+static void check_integer_arg(char opt, const char *arg, int min, int *res)
+{
+  char *ptr;
+  long n = strtol(arg, &ptr, 10);
+  if (n == 0 && ptr == arg)
+    error("argument to -%1 not an integer", opt);
+  else if (n < min)
+    error("argument to -%1 must not be less than %2", opt, min);
+  else {
+    if (n > INT_MAX)
+      error("argument to -%1 greater than maximum integer", opt);
+    else if (*ptr != '\0')
+      error("junk after integer argument to -%1", opt);
+    *res = int(n);
+  }
+}
+
+static char *get_cwd()
+{
+  char *buf;
+  int size = 12;
+
+  for (;;) {
+    buf = new char[size];
+    if (getcwd(buf, size))
+      break;
+    if (errno != ERANGE)
+      fatal("cannot get current working directory: %1", strerror(errno));
+    a_delete buf;
+    if (size == INT_MAX)
+      fatal("current working directory longer than INT_MAX");
+    if (size > INT_MAX/2)
+      size = INT_MAX;
+    else
+      size *= 2;
+  }
+  return buf;
+}
+
+word_list::word_list(const char *s, int n, word_list *p)
+: next(p), len(n)
+{
+  str = new char[n];
+  memcpy(str, s, n);
+}
+
+static void read_common_words_file()
+{
+  if (n_ignore_words <= 0)
+    return;
+  errno = 0;
+  FILE *fp = fopen(common_words_file, "r");
+  if (!fp)
+    fatal("can't open `%1': %2", common_words_file, strerror(errno));
+  common_words_table = new word_list * [hash_table_size];
+  for (int i = 0; i < hash_table_size; i++)
+    common_words_table[i] = 0;
+  int count = 0;
+  int key_len = 0;
+  for (;;) {
+    int c = getc(fp);
+    while (c != EOF && !csalnum(c))
+      c = getc(fp);
+    if (c == EOF)
+      break;
+    do {
+      if (key_len < truncate_len)
+	key_buffer[key_len++] = cmlower(c);
+      c = getc(fp);
+    } while (c != EOF && csalnum(c));
+    if (key_len >= shortest_len) {
+      int h = hash(key_buffer, key_len) % hash_table_size;
+      common_words_table[h] = new word_list(key_buffer, key_len,
+					    common_words_table[h]);
+    }
+    if (++count >= n_ignore_words)
+      break;
+    key_len = 0;
+    if (c == EOF)
+      break;
+  }
+  n_ignore_words = count;
+  fclose(fp);
+}
+
+static int do_whole_file(const char *filename)
+{
+  errno = 0;
+  FILE *fp = fopen(filename, "r");
+  if (!fp) {
+    error("can't open `%1': %2", filename, strerror(errno));
+    return 0;
+  }
+  int count = 0;
+  int key_len = 0;
+  int c;
+  while ((c = getc(fp)) != EOF) {
+    if (csalnum(c)) {
+      key_len = 1;
+      key_buffer[0] = c;
+      while ((c = getc(fp)) != EOF) {
+	if (!csalnum(c))
+	  break;
+	if (key_len < truncate_len)
+	  key_buffer[key_len++] = c;
+      }
+      if (store_key(key_buffer, key_len)) {
+	if (++count >= max_keys_per_item)
+	  break;
+      }
+      if (c == EOF)
+	break;
+    }
+  }
+  store_reference(filenames.length(), 0, 0);
+  store_filename(filename);
+  fclose(fp);
+  return 1;
+}
+
+static int do_file(const char *filename)
+{
+  errno = 0;
+  FILE *fp = fopen(filename, "r");
+  if (fp == 0) {
+    error("can't open `%1': %2", filename, strerror(errno));
+    return 0;
+  }
+  int filename_index = filenames.length();
+  store_filename(filename);
+
+  enum {
+    START,	// at the start of the file; also in between references
+    BOL,	// in the middle of a reference, at the beginning of the line
+    PERCENT,	// seen a percent at the beginning of the line
+    IGNORE,	// ignoring a field
+    IGNORE_BOL,	// at the beginning of a line ignoring a field
+    KEY,	// in the middle of a key
+    DISCARD,	// after truncate_len bytes of a key
+    MIDDLE	// in between keys
+  } state = START;
+  
+  // In states START, BOL, IGNORE_BOL, space_count how many spaces at
+  // the beginning have been seen.  In states PERCENT, IGNORE, KEY,
+  // MIDDLE space_count must be 0.
+  int space_count = 0;
+  int byte_count = 0;		// bytes read
+  int key_len = 0;
+  int ref_start = -1;		// position of start of current reference
+  for (;;) {
+    int c = getc(fp);
+    if (c == EOF)
+      break;
+    byte_count++;
+    switch (state) {
+    case START:
+      if (c == ' ' || c == '\t') {
+	space_count++;
+	break;
+      }
+      if (c == '\n') {
+	space_count = 0;
+	break;
+      }
+      ref_start = byte_count - space_count - 1;
+      space_count = 0;
+      if (c == '%')
+	state = PERCENT;
+      else if (csalnum(c)) {
+	state = KEY;
+	key_buffer[0] = c;
+	key_len = 1;
+      }
+      else
+	state = MIDDLE;
+      break;
+    case BOL:
+      switch (c) {
+      case '%':
+	if (space_count > 0) {
+	  space_count = 0;
+	  state = MIDDLE;
+	}
+	else
+	  state = PERCENT;
+	break;
+      case ' ':
+      case '\t':
+	space_count++;
+	break;
+      case '\n':
+	store_reference(filename_index, ref_start,
+			byte_count - 1 - space_count - ref_start);
+	state = START;
+	space_count = 0;
+	break;
+      default:
+	space_count = 0;
+	if (csalnum(c)) {
+	  state = KEY;
+	  key_buffer[0] = c;
+	  key_len = 1;
+	}
+	else
+	  state = MIDDLE;
+      }
+      break;
+    case PERCENT:
+      if (strchr(ignore_fields, c) != 0)
+	state = IGNORE;
+      else if (c == '\n')
+	state = BOL;
+      else
+	state = MIDDLE;
+      break;
+    case IGNORE:
+      if (c == '\n')
+	state = IGNORE_BOL;
+      break;
+    case IGNORE_BOL:
+      switch (c) {
+      case '%':
+	if (space_count > 0) {
+	  state = IGNORE;
+	  space_count = 0;
+	}
+	else
+	  state = PERCENT;
+	break;
+      case ' ':
+      case '\t':
+	space_count++;
+	break;
+      case '\n':
+	store_reference(filename_index, ref_start,
+			byte_count - 1 - space_count - ref_start);
+	state = START;
+	space_count = 0;
+	break;
+      default:
+	space_count = 0;
+	state = IGNORE;
+      }
+      break;
+    case KEY:
+      if (csalnum(c)) {
+	if (key_len < truncate_len)
+	  key_buffer[key_len++] = c;
+	else
+	  state = DISCARD;
+      }
+      else {
+	possibly_store_key(key_buffer, key_len);
+	key_len = 0;
+	if (c == '\n')
+	  state = BOL;
+	else
+	  state = MIDDLE;
+      }
+      break;
+    case DISCARD:
+      if (!csalnum(c)) {
+	possibly_store_key(key_buffer, key_len);
+	key_len = 0;
+	if (c == '\n')
+	  state = BOL;
+	else
+	  state = MIDDLE;
+      }
+      break;
+    case MIDDLE:
+      if (csalnum(c)) {
+	state = KEY;
+	key_buffer[0] = c;
+	key_len = 1;
+      }
+      else if (c == '\n')
+	state = BOL;
+      break;
+    default:
+      assert(0);
+    }
+  }
+  switch (state) {
+  case START:
+    break;
+  case DISCARD:
+  case KEY:
+    possibly_store_key(key_buffer, key_len);
+    // fall through
+  case BOL:
+  case PERCENT:
+  case IGNORE_BOL:
+  case IGNORE:
+  case MIDDLE:
+    store_reference(filename_index, ref_start,
+		    byte_count - ref_start - space_count);
+    break;
+  default:
+    assert(0);
+  }
+  fclose(fp);
+  return 1;
+}
+
+static void store_reference(int filename_index, int pos, int len)
+{
+  tag t;
+  t.filename_index = filename_index;
+  t.start = pos;
+  t.length = len;
+  fwrite_or_die(&t, sizeof(t), 1, indxfp);
+  ntags++;
+}
+
+static void store_filename(const char *fn)
+{
+  filenames += fn;
+  filenames += '\0';
+}
+
+static void init_hash_table()
+{
+  hash_table = new table_entry[hash_table_size];
+  for (int i = 0; i < hash_table_size; i++)
+    hash_table[i].ptr = 0;
+}
+
+static void possibly_store_key(char *s, int len)
+{
+  static int last_tagno = -1;
+  static int key_count;
+  if (last_tagno != ntags) {
+    last_tagno = ntags;
+    key_count = 0;
+  }
+  if (key_count < max_keys_per_item) {
+    if (store_key(s, len))
+      key_count++;
+  }
+}
+
+static int store_key(char *s, int len)
+{
+  if (len < shortest_len)
+    return 0;
+  int is_number = 1;
+  for (int i = 0; i < len; i++)
+    if (!csdigit(s[i])) {
+      is_number = 0;
+      s[i] = cmlower(s[i]);
+    }
+  if (is_number && !(len == 4 && s[0] == '1' && s[1] == '9'))
+    return 0;
+  int h = hash(s, len) % hash_table_size;
+  if (common_words_table) {
+    for (word_list *ptr = common_words_table[h]; ptr; ptr = ptr->next)
+      if (len == ptr->len && memcmp(s, ptr->str, len) == 0)
+	return 0;
+  }
+  table_entry *pp =  hash_table + h;
+  if (!pp->ptr)
+    pp->ptr = new block;
+  else if (pp->ptr->v[pp->ptr->used - 1] == ntags)
+    return 1;
+  else if (pp->ptr->used >= BLOCK_SIZE)
+    pp->ptr = new block(pp->ptr);
+  pp->ptr->v[(pp->ptr->used)++] = ntags;
+  return 1;
+}
+
+static void write_hash_table()
+{
+  const int minus_one = -1;
+  int li = 0;
+  for (int i = 0; i < hash_table_size; i++) {
+    block *ptr = hash_table[i].ptr;
+    if (!ptr)
+      hash_table[i].count = -1;
+    else {
+      hash_table[i].count = li;
+      block *rev = 0;
+      while (ptr) {
+	block *tem = ptr;
+	ptr = ptr->next;
+	tem->next = rev;
+	rev = tem;
+      }
+      while (rev) {
+	fwrite_or_die(rev->v, sizeof(int), rev->used, indxfp);
+	li += rev->used;
+	block *tem = rev;
+	rev = rev->next;
+	delete tem;
+      }
+      fwrite_or_die(&minus_one, sizeof(int), 1, indxfp);
+      li += 1;
+    }
+  }
+  if (sizeof(table_entry) == sizeof(int))
+    fwrite_or_die(hash_table, sizeof(int), hash_table_size, indxfp);
+  else {
+    // write it out word by word
+    for (int i = 0; i < hash_table_size; i++)
+      fwrite_or_die(&hash_table[i].count, sizeof(int), 1, indxfp);
+  }
+  fwrite_or_die(filenames.contents(), 1, filenames.length(), indxfp);
+  if (fseek(indxfp, 0, 0) < 0)
+    fatal("error seeking on index file: %1", strerror(errno));
+  index_header h;
+  h.magic = INDEX_MAGIC;
+  h.version = INDEX_VERSION;
+  h.tags_size = ntags;
+  h.lists_size = li;
+  h.table_size = hash_table_size;
+  h.strings_size = filenames.length();
+  h.truncate = truncate_len;
+  h.shortest = shortest_len;
+  h.common = n_ignore_words;
+  fwrite_or_die(&h, sizeof(h), 1, indxfp);
+}
+
+static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp)
+{
+  if (fwrite(ptr, size, nitems, fp) != nitems)
+    fatal("fwrite failed: %1", strerror(errno));
+}
+
+void fatal_error_exit()
+{
+  cleanup();
+  exit(3);
+}
+
+extern "C" {
+
+void cleanup()
+{
+  if (temp_index_file)
+    unlink(temp_index_file);
+}
+
+}
diff --git a/src/utils/indxbib/indxbib.man b/src/utils/indxbib/indxbib.man
new file mode 100644
index 00000000..914cb698
--- /dev/null
+++ b/src/utils/indxbib/indxbib.man
@@ -0,0 +1,204 @@
+.ig \"-*- nroff -*-
+Copyright (C) 1989-1995 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the
+entire resulting derived work is distributed under the terms of a
+permission notice identical to this one.
+
+Permission is granted to copy and distribute translations of this
+manual into another language, under the above conditions for modified
+versions, except that this permission notice may be included in
+translations approved by the Free Software Foundation instead of in
+the original English.
+..
+.TH @G@INDXBIB @MAN1EXT@ "@MDATE@" "Groff Version @VERSION@"
+.SH NAME
+@g@indxbib \- make inverted index for bibliographic databases
+.SH SYNOPSIS
+.nr a \n(.j
+.ad l
+.nr i \n(.i
+.in +\w'\fB@g@indxbib 'u
+.ti \niu
+.B @g@indxbib
+.de OP
+.ie \\n(.$-1 .RI "[\ \fB\\$1\fP" "\\$2" "\ ]"
+.el .RB "[\ " "\\$1" "\ ]"
+..
+.OP \-vw
+.OP \-c file
+.OP \-d dir
+.OP \-f file
+.OP \-h n
+.OP \-i string
+.OP \-k n
+.OP \-l n
+.OP \-n n
+.OP \-o file
+.OP \-t n
+.RI [\  filename \|.\|.\|.\ ]
+.ad \na
+.SH DESCRIPTION
+.B @g@indxbib
+makes an inverted index for the bibliographic databases in
+.IR filename \|.\|.\|.
+for use with
+.BR @g@refer (@MAN1EXT@),
+.BR @g@lookbib (@MAN1EXT@),
+and
+.BR lkbib (@MAN1EXT@).
+The index will be named
+.IB filename @INDEX_SUFFIX@\fR;
+the index is written to a temporary file which is then renamed to this.
+If no filenames are given on the command line because the
+.B \-f
+option has been used, and no
+.B \-o
+option is given, the index will be named
+.BR @DEFAULT_INDEX_NAME@@INDEX_SUFFIX@ .
+.LP
+Bibliographic databases are divided into records by blank lines.
+Within a record, each fields starts with a
+.B %
+character at the beginning of a line.
+Fields have a one letter name which follows the
+.B %
+character.
+.LP
+The values set by the
+.BR \-c ,
+.BR \-n ,
+.BR \-l
+and
+.B \-t
+options are stored in the index;
+when the index is searched, keys will be discarded and truncated in a
+manner appropriate to these options;
+the original keys will be used for verifying that any record
+found using the index actually contains the keys.
+This means that a user of an index need not know whether these
+options were used in the creation of the index,
+provided that not all the keys to be searched for
+would have been discarded during indexing
+and that the user supplies at least the part of each key
+that would have remained after being truncated during indexing.
+The value set by the
+.B \-i
+option is also stored in the index
+and will be used in verifying records found using the index.
+.SH OPTIONS
+.TP
+.B \-v
+Print the version number.
+.TP
+.B \-w
+Index whole files.
+Each file is a separate record.
+.TP
+.BI \-c file
+Read the list of common words from
+.I file
+instead of
+.BR @COMMON_WORDS_FILE@ .
+.TP
+.BI \-d dir
+Use
+.I dir
+as the pathname of the current working directory to store in the index,
+instead of the path printed by
+.BR pwd (1).
+Usually
+.I dir
+will be a symbolic link that points to the directory printed by
+.BR pwd (1).
+.TP
+.BI \-f file
+Read the files to be indexed from
+.IR file .
+If
+.I file
+is
+.BR \- ,
+files will be read from the standard input.
+The
+.B \-f
+option can be given at most once.
+.TP
+.BI \-i string
+Don't index the contents of fields whose names are in
+.IR string .
+Initially
+.I string
+is
+.BR XYZ .
+.TP
+.BI \-h n
+Use the first prime greater than or equal to
+.I n
+for the size of the hash table.
+Larger values of
+.I n
+will usually make searching faster,
+but will make the index larger
+and
+.B @g@indxbib
+use more memory.
+Initially
+.I n
+is 997.
+.TP
+.BI \-k n
+Use at most
+.I n
+keys per input record.
+Initially
+.I n
+is 100.
+.TP
+.BI \-l n
+Discard keys that are shorter than
+.IR n .
+Initially
+.I n
+is 3.
+.TP
+.BI \-n n
+Discard the
+.I n
+most common words.
+Initially
+.I n
+is 100.
+.TP
+.BI \-o basename
+The index should be named
+.IB basename @INDEX_SUFFIX@\fR.
+.TP
+.BI \-t n
+Truncate keys to
+.IR n .
+Initially
+.I n
+is 6.
+.SH FILES
+.TP \w'\fBindxbib\fIXXXXXX'u+2n
+.IB filename @INDEX_SUFFIX@
+Index.
+.TP
+.B @DEFAULT_INDEX_NAME@@INDEX_SUFFIX@
+Default index name.
+.TP
+.B @COMMON_WORDS_FILE@
+List of common words.
+.TP
+.BI indxbib XXXXXX
+Temporary file.
+.SH "SEE ALSO"
+.BR @g@refer (@MAN1EXT@),
+.BR lkbib (@MAN1EXT@),
+.BR @g@lookbib (@MAN1EXT@)
diff --git a/src/utils/indxbib/signal.c b/src/utils/indxbib/signal.c
new file mode 100644
index 00000000..8078472f
--- /dev/null
+++ b/src/utils/indxbib/signal.c
@@ -0,0 +1,63 @@
+/* Copyright (C) 1992 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING.  If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Unfortunately vendors seem to have problems writing a <signal.h>
+that is correct for C++, so we implement all signal handling in C. */
+
+#include <sys/types.h>
+#include <signal.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifndef RETSIGTYPE
+#define RETSIGTYPE void
+#endif
+
+extern void cleanup();
+
+static RETSIGTYPE handle_fatal_signal(signum)
+     int signum;
+{
+  signal(signum, SIG_DFL);
+  cleanup();
+  kill(getpid(), signum);
+}
+
+void catch_fatal_signals()
+{
+#ifdef SIGHUP
+  signal(SIGHUP, handle_fatal_signal);
+#endif
+  signal(SIGINT, handle_fatal_signal);
+  signal(SIGTERM, handle_fatal_signal);
+}
+
+#ifndef HAVE_RENAME
+
+void ignore_fatal_signals()
+{
+#ifdef SIGHUP
+  signal(SIGHUP, SIG_IGN);
+#endif
+  signal(SIGINT, SIG_IGN);
+  signal(SIGTERM, SIG_IGN);
+}
+
+#endif /* not HAVE_RENAME */
-- 
cgit v1.2.1