summaryrefslogtreecommitdiff
path: root/src/preproc/refer
diff options
context:
space:
mode:
Diffstat (limited to 'src/preproc/refer')
-rw-r--r--src/preproc/refer/Makefile.sub23
-rw-r--r--src/preproc/refer/TODO124
-rw-r--r--src/preproc/refer/command.cc807
-rw-r--r--src/preproc/refer/command.h36
-rwxr-xr-xsrc/preproc/refer/label.cc1602
-rw-r--r--src/preproc/refer/label.y1177
-rw-r--r--src/preproc/refer/ref.cc1160
-rw-r--r--src/preproc/refer/ref.h120
-rw-r--r--src/preproc/refer/refer.cc1228
-rw-r--r--src/preproc/refer/refer.h78
-rw-r--r--src/preproc/refer/refer.man1299
-rw-r--r--src/preproc/refer/token.cc378
-rw-r--r--src/preproc/refer/token.h88
13 files changed, 8120 insertions, 0 deletions
diff --git a/src/preproc/refer/Makefile.sub b/src/preproc/refer/Makefile.sub
new file mode 100644
index 00000000..1631b5e3
--- /dev/null
+++ b/src/preproc/refer/Makefile.sub
@@ -0,0 +1,23 @@
+PROG=refer
+MAN1=refer.n
+XLIBS=$(LIBBIB) $(LIBGROFF)
+MLIB=$(LIBM)
+OBJS=\
+ command.o \
+ label.o \
+ ref.o \
+ refer.o \
+ token.o
+CCSRCS=\
+ $(srcdir)/command.cc \
+ $(srcdir)/ref.cc \
+ $(srcdir)/refer.cc \
+ $(srcdir)/token.cc
+HDRS=\
+ $(srcdir)/refer.h \
+ $(srcdir)/token.h \
+ $(srcdir)/command.h \
+ $(srcdir)/ref.h
+GRAM=$(srcdir)/label.y
+YTABC=$(srcdir)/label.cc
+NAMEPREFIX=$(g)
diff --git a/src/preproc/refer/TODO b/src/preproc/refer/TODO
new file mode 100644
index 00000000..5bbd9bff
--- /dev/null
+++ b/src/preproc/refer/TODO
@@ -0,0 +1,124 @@
+inline references
+
+Some sort of macro/subroutine that can cover several references.
+
+move-punctuation should ignore multiple punctuation characters.
+
+Make the index files machine independent.
+
+Allow search keys to be negated (with !) to indicate that the
+reference should not contain the key. Ignore negated keys during
+indexed searching.
+
+Provide an option with lkbib and lookbib that prints the location
+(filename, position) of each reference. Need to map filename_id's
+back to filenames.
+
+Rename join-authors to join-fields. Have a separate label-join-fields
+command used by @ and #.
+
+Have some sort of quantifier: eg $.n#A means execute `$.n' for each
+instance of an A field, setting $ to that field, and then join the
+results using the join-authors command.
+
+no-text-in-bracket command which says not to allow post_text and
+pre_text when the [] flags has been given. Useful for superscripted
+footnotes.
+
+Make it possible to translate - to \(en in page ranges.
+
+Trim eign a bit.
+
+In indexed searching discard all numeric keys except dates.
+
+Allow `\ ' to separate article from first word.
+
+%also
+
+Option automatically to supply [] flags in every reference.
+
+See if we can avoid requiring a comma before jr. and so on
+in find_last_name().
+
+Cache sortified authors in authors string during tentative evaluation of
+label specification.
+
+Possibly don't allow * and % expressions in the first part of ?:, | or
+& expressions.
+
+Handle better the case where <> occurs inside functions and in the
+first operand of ~. Or perhaps implement <> using some magic character
+in the string.
+
+Should special treatment be given to lines beginning with . in
+references? (Unix refer seems to treat them like `%').
+
+Add global flag to control whether all files should be stat-ed after
+loading, and whether they should be stat-ed before each search.
+Perhaps make this dependent on the number of files there are.
+
+Option to truncate keys to truncate_len in linear searching.
+
+Allow multiple -f options in indxbib.
+
+In indxbib, possibly store common words rather than common words
+filename. In this case store only words that are actually present in
+the file.
+
+Perhaps we should put out an obnoxious copyright message when lookbib
+starts up.
+
+Provide an option that writes a file containing just the references
+actually used. Useful if you want to distribute a document.
+
+Have a magic token such that
+%A <sort stuff><magic token><print stuff>
+will print as though it were
+%A <print stuff>
+but sort as though it were
+%A <sort stuff>
+Do we need this if we can specify author alternatives for sorting?
+No, provided we have separate alternatives for @.
+
+In consider_authors when last names are ambiguous we might be able to
+use just the first name and not Jr. bit. Or we might be able to
+abbreviate the author.
+
+It ought to be possible to specify an alternative field to sort on
+instead of date. (ie if there's a field giving the type of document --
+these references should sort after any years)
+
+Provide a way to execute a command using a command-line option.
+
+Option to set the label-spec as a command-line option (-L).
+
+Command to to specify which fields can occur multiple times:
+multiple AE
+
+Command to specify how various fields sort:
+aort-as-name A
+sort-as-date D
+sort-as-title T
+sort-as-other O
+
+Command to specify which fields are author fields:
+# if we don't have A use field Q
+author-fields AQ
+
+Commands to set properties of tokens.
+sortify-token \(ae ae
+uppercase-token \[ae] \[AE]
+
+Command to set the names of months:
+months january february march april may ...
+
+Perhaps provide some sort of macro capability:
+# perhaps a macro capability
+defmacro foo
+annotation-field $1
+endef
+
+Command to control strings used in capitalization
+capitalize-start \s+2
+capitalize-end \s-2
+(perhaps make these arguments to the capitalize command.)
diff --git a/src/preproc/refer/command.cc b/src/preproc/refer/command.cc
new file mode 100644
index 00000000..004189ee
--- /dev/null
+++ b/src/preproc/refer/command.cc
@@ -0,0 +1,807 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "refer.h"
+#include "refid.h"
+#include "search.h"
+#include "command.h"
+
+cset cs_field_name = csalpha;
+
+class input_item {
+ input_item *next;
+ char *filename;
+ int first_lineno;
+ string buffer;
+ const char *ptr;
+ const char *end;
+public:
+ input_item(string &, const char *, int = 1);
+ ~input_item();
+ int get_char();
+ int peek_char();
+ void skip_char();
+ int get_location(const char **, int *);
+
+ friend class input_stack;
+};
+
+input_item::input_item(string &s, const char *fn, int ln)
+: filename(strsave(fn)), first_lineno(ln)
+{
+ buffer.move(s);
+ ptr = buffer.contents();
+ end = ptr + buffer.length();
+}
+
+input_item::~input_item()
+{
+ a_delete filename;
+}
+
+inline int input_item::peek_char()
+{
+ if (ptr >= end)
+ return EOF;
+ else
+ return (unsigned char)*ptr;
+}
+
+inline int input_item::get_char()
+{
+ if (ptr >= end)
+ return EOF;
+ else
+ return (unsigned char)*ptr++;
+}
+
+inline void input_item::skip_char()
+{
+ ptr++;
+}
+
+int input_item::get_location(const char **filenamep, int *linenop)
+{
+ *filenamep = filename;
+ if (ptr == buffer.contents())
+ *linenop = first_lineno;
+ else {
+ int ln = first_lineno;
+ const char *e = ptr - 1;
+ for (const char *p = buffer.contents(); p < e; p++)
+ if (*p == '\n')
+ ln++;
+ *linenop = ln;
+ }
+ return 1;
+}
+
+class input_stack {
+ static input_item *top;
+public:
+ static void init();
+ static int get_char();
+ static int peek_char();
+ static void skip_char() { top->skip_char(); }
+ static void push_file(const char *);
+ static void push_string(string &, const char *, int);
+ static void error(const char *format,
+ const errarg &arg1 = empty_errarg,
+ const errarg &arg2 = empty_errarg,
+ const errarg &arg3 = empty_errarg);
+};
+
+input_item *input_stack::top = 0;
+
+void input_stack::init()
+{
+ while (top) {
+ input_item *tem = top;
+ top = top->next;
+ delete tem;
+ }
+}
+
+int input_stack::get_char()
+{
+ while (top) {
+ int c = top->get_char();
+ if (c >= 0)
+ return c;
+ input_item *tem = top;
+ top = top->next;
+ delete tem;
+ }
+ return -1;
+}
+
+int input_stack::peek_char()
+{
+ while (top) {
+ int c = top->peek_char();
+ if (c >= 0)
+ return c;
+ input_item *tem = top;
+ top = top->next;
+ delete tem;
+ }
+ return -1;
+}
+
+void input_stack::push_file(const char *fn)
+{
+ FILE *fp;
+ if (strcmp(fn, "-") == 0) {
+ fp = stdin;
+ fn = "<standard input>";
+ }
+ else {
+ errno = 0;
+ fp = fopen(fn, "r");
+ if (fp == 0) {
+ error("can't open `%1': %2", fn, strerror(errno));
+ return;
+ }
+ }
+ string buf;
+ int bol = 1;
+ int lineno = 1;
+ for (;;) {
+ int c = getc(fp);
+ if (bol && c == '.') {
+ // replace lines beginning with .R1 or .R2 with a blank line
+ c = getc(fp);
+ if (c == 'R') {
+ c = getc(fp);
+ if (c == '1' || c == '2') {
+ int cc = c;
+ c = getc(fp);
+ if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
+ while (c != '\n' && c != EOF)
+ c = getc(fp);
+ }
+ else {
+ buf += '.';
+ buf += 'R';
+ buf += cc;
+ }
+ }
+ else {
+ buf += '.';
+ buf += 'R';
+ }
+ }
+ else
+ buf += '.';
+ }
+ if (c == EOF)
+ break;
+ if (illegal_input_char(c))
+ error_with_file_and_line(fn, lineno,
+ "illegal input character code %1", int(c));
+ else {
+ buf += c;
+ if (c == '\n') {
+ bol = 1;
+ lineno++;
+ }
+ else
+ bol = 0;
+ }
+ }
+ if (fp != stdin)
+ fclose(fp);
+ if (buf.length() > 0 && buf[buf.length() - 1] != '\n')
+ buf += '\n';
+ input_item *it = new input_item(buf, fn);
+ it->next = top;
+ top = it;
+}
+
+void input_stack::push_string(string &s, const char *filename, int lineno)
+{
+ input_item *it = new input_item(s, filename, lineno);
+ it->next = top;
+ top = it;
+}
+
+void input_stack::error(const char *format, const errarg &arg1,
+ const errarg &arg2, const errarg &arg3)
+{
+ const char *filename;
+ int lineno;
+ for (input_item *it = top; it; it = it->next)
+ if (it->get_location(&filename, &lineno)) {
+ error_with_file_and_line(filename, lineno, format, arg1, arg2, arg3);
+ return;
+ }
+ ::error(format, arg1, arg2, arg3);
+}
+
+void command_error(const char *format, const errarg &arg1,
+ const errarg &arg2, const errarg &arg3)
+{
+ input_stack::error(format, arg1, arg2, arg3);
+}
+
+// # not recognized in ""
+// \<newline> is recognized in ""
+// # does not conceal newline
+// if missing closing quote, word extends to end of line
+// no special treatment of \ other than before newline
+// \<newline> not recognized after #
+// ; allowed as alternative to newline
+// ; not recognized in ""
+// don't clear word_buffer; just append on
+// return -1 for EOF, 0 for newline, 1 for word
+
+int get_word(string &word_buffer)
+{
+ int c = input_stack::get_char();
+ for (;;) {
+ if (c == '#') {
+ do {
+ c = input_stack::get_char();
+ } while (c != '\n' && c != EOF);
+ break;
+ }
+ if (c == '\\' && input_stack::peek_char() == '\n')
+ input_stack::skip_char();
+ else if (c != ' ' && c != '\t')
+ break;
+ c = input_stack::get_char();
+ }
+ if (c == EOF)
+ return -1;
+ if (c == '\n' || c == ';')
+ return 0;
+ if (c == '"') {
+ for (;;) {
+ c = input_stack::peek_char();
+ if (c == EOF || c == '\n')
+ break;
+ input_stack::skip_char();
+ if (c == '"') {
+ int d = input_stack::peek_char();
+ if (d == '"')
+ input_stack::skip_char();
+ else
+ break;
+ }
+ else if (c == '\\') {
+ int d = input_stack::peek_char();
+ if (d == '\n')
+ input_stack::skip_char();
+ else
+ word_buffer += '\\';
+ }
+ else
+ word_buffer += c;
+ }
+ return 1;
+ }
+ word_buffer += c;
+ for (;;) {
+ c = input_stack::peek_char();
+ if (c == ' ' || c == '\t' || c == '\n' || c == '#' || c == ';')
+ break;
+ input_stack::skip_char();
+ if (c == '\\') {
+ int d = input_stack::peek_char();
+ if (d == '\n')
+ input_stack::skip_char();
+ else
+ word_buffer += '\\';
+ }
+ else
+ word_buffer += c;
+ }
+ return 1;
+}
+
+union argument {
+ const char *s;
+ int n;
+};
+
+// This is for debugging.
+
+static void echo_command(int argc, argument *argv)
+{
+ for (int i = 0; i < argc; i++)
+ fprintf(stderr, "%s\n", argv[i].s);
+}
+
+static void include_command(int argc, argument *argv)
+{
+ assert(argc == 1);
+ input_stack::push_file(argv[0].s);
+}
+
+static void capitalize_command(int argc, argument *argv)
+{
+ if (argc > 0)
+ capitalize_fields = argv[0].s;
+ else
+ capitalize_fields.clear();
+}
+
+static void accumulate_command(int, argument *)
+{
+ accumulate = 1;
+}
+
+static void no_accumulate_command(int, argument *)
+{
+ accumulate = 0;
+}
+
+static void move_punctuation_command(int, argument *)
+{
+ move_punctuation = 1;
+}
+
+static void no_move_punctuation_command(int, argument *)
+{
+ move_punctuation = 0;
+}
+
+static void sort_command(int argc, argument *argv)
+{
+ if (argc == 0)
+ sort_fields = "AD";
+ else
+ sort_fields = argv[0].s;
+ accumulate = 1;
+}
+
+static void no_sort_command(int, argument *)
+{
+ sort_fields.clear();
+}
+
+static void articles_command(int argc, argument *argv)
+{
+ articles.clear();
+ int i;
+ for (i = 0; i < argc; i++) {
+ articles += argv[i].s;
+ articles += '\0';
+ }
+ int len = articles.length();
+ for (i = 0; i < len; i++)
+ articles[i] = cmlower(articles[i]);
+}
+
+static void database_command(int argc, argument *argv)
+{
+ for (int i = 0; i < argc; i++)
+ database_list.add_file(argv[i].s);
+}
+
+static void default_database_command(int, argument *)
+{
+ search_default = 1;
+}
+
+static void no_default_database_command(int, argument *)
+{
+ search_default = 0;
+}
+
+static void bibliography_command(int argc, argument *argv)
+{
+ const char *saved_filename = current_filename;
+ int saved_lineno = current_lineno;
+ int saved_label_in_text = label_in_text;
+ label_in_text = 0;
+ if (!accumulate)
+ fputs(".]<\n", stdout);
+ for (int i = 0; i < argc; i++)
+ do_bib(argv[i].s);
+ if (accumulate)
+ output_references();
+ else
+ fputs(".]>\n", stdout);
+ current_filename = saved_filename;
+ current_lineno = saved_lineno;
+ label_in_text = saved_label_in_text;
+}
+
+static void annotate_command(int argc, argument *argv)
+{
+ if (argc > 0)
+ annotation_field = argv[0].s[0];
+ else
+ annotation_field = 'X';
+ if (argc == 2)
+ annotation_macro = argv[1].s;
+ else
+ annotation_macro = "AP";
+}
+
+static void no_annotate_command(int, argument *)
+{
+ annotation_macro.clear();
+ annotation_field = -1;
+}
+
+static void reverse_command(int, argument *argv)
+{
+ reverse_fields = argv[0].s;
+}
+
+static void no_reverse_command(int, argument *)
+{
+ reverse_fields.clear();
+}
+
+static void abbreviate_command(int argc, argument *argv)
+{
+ abbreviate_fields = argv[0].s;
+ period_before_initial = argc > 1 ? argv[1].s : ". ";
+ period_before_last_name = argc > 2 ? argv[2].s : ". ";
+ period_before_other = argc > 3 ? argv[3].s : ". ";
+ period_before_hyphen = argc > 4 ? argv[4].s : ".";
+}
+
+static void no_abbreviate_command(int, argument *)
+{
+ abbreviate_fields.clear();
+}
+
+string search_ignore_fields;
+
+static void search_ignore_command(int argc, argument *argv)
+{
+ if (argc > 0)
+ search_ignore_fields = argv[0].s;
+ else
+ search_ignore_fields = "XYZ";
+ search_ignore_fields += '\0';
+ linear_ignore_fields = search_ignore_fields.contents();
+}
+
+static void no_search_ignore_command(int, argument *)
+{
+ linear_ignore_fields = "";
+}
+
+static void search_truncate_command(int argc, argument *argv)
+{
+ if (argc > 0)
+ linear_truncate_len = argv[0].n;
+ else
+ linear_truncate_len = 6;
+}
+
+static void no_search_truncate_command(int, argument *)
+{
+ linear_truncate_len = -1;
+}
+
+static void discard_command(int argc, argument *argv)
+{
+ if (argc == 0)
+ discard_fields = "XYZ";
+ else
+ discard_fields = argv[0].s;
+ accumulate = 1;
+}
+
+static void no_discard_command(int, argument *)
+{
+ discard_fields.clear();
+}
+
+static void label_command(int, argument *argv)
+{
+ set_label_spec(argv[0].s);
+}
+
+static void abbreviate_label_ranges_command(int argc, argument *argv)
+{
+ abbreviate_label_ranges = 1;
+ label_range_indicator = argc > 0 ? argv[0].s : "-";
+}
+
+static void no_abbreviate_label_ranges_command(int, argument *)
+{
+ abbreviate_label_ranges = 0;
+}
+
+static void label_in_reference_command(int, argument *)
+{
+ label_in_reference = 1;
+}
+
+static void no_label_in_reference_command(int, argument *)
+{
+ label_in_reference = 0;
+}
+
+static void label_in_text_command(int, argument *)
+{
+ label_in_text = 1;
+}
+
+static void no_label_in_text_command(int, argument *)
+{
+ label_in_text = 0;
+}
+
+static void sort_adjacent_labels_command(int, argument *)
+{
+ sort_adjacent_labels = 1;
+}
+
+static void no_sort_adjacent_labels_command(int, argument *)
+{
+ sort_adjacent_labels = 0;
+}
+
+static void date_as_label_command(int argc, argument *argv)
+{
+ if (set_date_label_spec(argc > 0 ? argv[0].s : "D%a*"))
+ date_as_label = 1;
+}
+
+static void no_date_as_label_command(int, argument *)
+{
+ date_as_label = 0;
+}
+
+static void short_label_command(int, argument *argv)
+{
+ if (set_short_label_spec(argv[0].s))
+ short_label_flag = 1;
+}
+
+static void no_short_label_command(int, argument *)
+{
+ short_label_flag = 0;
+}
+
+static void compatible_command(int, argument *)
+{
+ compatible_flag = 1;
+}
+
+static void no_compatible_command(int, argument *)
+{
+ compatible_flag = 0;
+}
+
+static void join_authors_command(int argc, argument *argv)
+{
+ join_authors_exactly_two = argv[0].s;
+ join_authors_default = argc > 1 ? argv[1].s : argv[0].s;
+ join_authors_last_two = argc == 3 ? argv[2].s : argv[0].s;
+}
+
+static void bracket_label_command(int, argument *argv)
+{
+ pre_label = argv[0].s;
+ post_label = argv[1].s;
+ sep_label = argv[2].s;
+}
+
+static void separate_label_second_parts_command(int, argument *argv)
+{
+ separate_label_second_parts = argv[0].s;
+}
+
+static void et_al_command(int argc, argument *argv)
+{
+ et_al = argv[0].s;
+ et_al_min_elide = argv[1].n;
+ if (et_al_min_elide < 1)
+ et_al_min_elide = 1;
+ et_al_min_total = argc >= 3 ? argv[2].n : 0;
+}
+
+static void no_et_al_command(int, argument *)
+{
+ et_al.clear();
+ et_al_min_elide = 0;
+}
+
+typedef void (*command_t)(int, argument *);
+
+/* arg_types is a string describing the numbers and types of arguments.
+s means a string, i means an integer, f is a list of fields, F is
+a single field,
+? means that the previous argument is optional, * means that the
+previous argument can occur any number of times. */
+
+struct {
+ const char *name;
+ command_t func;
+ const char *arg_types;
+} command_table[] = {
+ { "include", include_command, "s" },
+ { "echo", echo_command, "s*" },
+ { "capitalize", capitalize_command, "f?" },
+ { "accumulate", accumulate_command, "" },
+ { "no-accumulate", no_accumulate_command, "" },
+ { "move-punctuation", move_punctuation_command, "" },
+ { "no-move-punctuation", no_move_punctuation_command, "" },
+ { "sort", sort_command, "s?" },
+ { "no-sort", no_sort_command, "" },
+ { "articles", articles_command, "s*" },
+ { "database", database_command, "ss*" },
+ { "default-database", default_database_command, "" },
+ { "no-default-database", no_default_database_command, "" },
+ { "bibliography", bibliography_command, "ss*" },
+ { "annotate", annotate_command, "F?s?" },
+ { "no-annotate", no_annotate_command, "" },
+ { "reverse", reverse_command, "s" },
+ { "no-reverse", no_reverse_command, "" },
+ { "abbreviate", abbreviate_command, "ss?s?s?s?" },
+ { "no-abbreviate", no_abbreviate_command, "" },
+ { "search-ignore", search_ignore_command, "f?" },
+ { "no-search-ignore", no_search_ignore_command, "" },
+ { "search-truncate", search_truncate_command, "i?" },
+ { "no-search-truncate", no_search_truncate_command, "" },
+ { "discard", discard_command, "f?" },
+ { "no-discard", no_discard_command, "" },
+ { "label", label_command, "s" },
+ { "abbreviate-label-ranges", abbreviate_label_ranges_command, "s?" },
+ { "no-abbreviate-label-ranges", no_abbreviate_label_ranges_command, "" },
+ { "label-in-reference", label_in_reference_command, "" },
+ { "no-label-in-reference", no_label_in_reference_command, "" },
+ { "label-in-text", label_in_text_command, "" },
+ { "no-label-in-text", no_label_in_text_command, "" },
+ { "sort-adjacent-labels", sort_adjacent_labels_command, "" },
+ { "no-sort-adjacent-labels", no_sort_adjacent_labels_command, "" },
+ { "date-as-label", date_as_label_command, "s?" },
+ { "no-date-as-label", no_date_as_label_command, "" },
+ { "short-label", short_label_command, "s" },
+ { "no-short-label", no_short_label_command, "" },
+ { "compatible", compatible_command, "" },
+ { "no-compatible", no_compatible_command, "" },
+ { "join-authors", join_authors_command, "sss?" },
+ { "bracket-label", bracket_label_command, "sss" },
+ { "separate-label-second-parts", separate_label_second_parts_command, "s" },
+ { "et-al", et_al_command, "sii?" },
+ { "no-et-al", no_et_al_command, "" },
+};
+
+static int check_args(const char *types, const char *name,
+ int argc, argument *argv)
+{
+ int argno = 0;
+ while (*types) {
+ if (argc == 0) {
+ if (types[1] == '?')
+ break;
+ else if (types[1] == '*') {
+ assert(types[2] == '\0');
+ break;
+ }
+ else {
+ input_stack::error("missing argument for command `%1'", name);
+ return 0;
+ }
+ }
+ switch (*types) {
+ case 's':
+ break;
+ case 'i':
+ {
+ char *ptr;
+ long n = strtol(argv->s, &ptr, 10);
+ if ((n == 0 && ptr == argv->s)
+ || *ptr != '\0') {
+ input_stack::error("argument %1 for command `%2' must be an integer",
+ argno + 1, name);
+ return 0;
+ }
+ argv->n = (int)n;
+ break;
+ }
+ case 'f':
+ {
+ for (const char *ptr = argv->s; *ptr != '\0'; ptr++)
+ if (!cs_field_name(*ptr)) {
+ input_stack::error("argument %1 for command `%2' must be a list of fields",
+ argno + 1, name);
+ return 0;
+ }
+ break;
+ }
+ case 'F':
+ if (argv->s[0] == '\0' || argv->s[1] != '\0'
+ || !cs_field_name(argv->s[0])) {
+ input_stack::error("argument %1 for command `%2' must be a field name",
+ argno + 1, name);
+ return 0;
+ }
+ break;
+ default:
+ assert(0);
+ }
+ if (types[1] == '?')
+ types += 2;
+ else if (types[1] != '*')
+ types += 1;
+ --argc;
+ ++argv;
+ ++argno;
+ }
+ if (argc > 0) {
+ input_stack::error("too many arguments for command `%1'", name);
+ return 0;
+ }
+ return 1;
+}
+
+static void execute_command(const char *name, int argc, argument *argv)
+{
+ for (int i = 0; i < sizeof(command_table)/sizeof(command_table[0]); i++)
+ if (strcmp(name, command_table[i].name) == 0) {
+ if (check_args(command_table[i].arg_types, name, argc, argv))
+ (*command_table[i].func)(argc, argv);
+ return;
+ }
+ input_stack::error("unknown command `%1'", name);
+}
+
+static void command_loop()
+{
+ string command;
+ for (;;) {
+ command.clear();
+ int res = get_word(command);
+ if (res != 1) {
+ if (res == 0)
+ continue;
+ break;
+ }
+ int argc = 0;
+ command += '\0';
+ while ((res = get_word(command)) == 1) {
+ argc++;
+ command += '\0';
+ }
+ argument *argv = new argument[argc];
+ const char *ptr = command.contents();
+ for (int i = 0; i < argc; i++)
+ argv[i].s = ptr = strchr(ptr, '\0') + 1;
+ execute_command(command.contents(), argc, argv);
+ a_delete argv;
+ if (res == -1)
+ break;
+ }
+}
+
+void process_commands(const char *file)
+{
+ input_stack::init();
+ input_stack::push_file(file);
+ command_loop();
+}
+
+void process_commands(string &s, const char *file, int lineno)
+{
+ input_stack::init();
+ input_stack::push_string(s, file, lineno);
+ command_loop();
+}
diff --git a/src/preproc/refer/command.h b/src/preproc/refer/command.h
new file mode 100644
index 00000000..c7085db6
--- /dev/null
+++ b/src/preproc/refer/command.h
@@ -0,0 +1,36 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+void process_commands(const char *file);
+void process_commands(string &s, const char *file, int lineno);
+
+extern int accumulate;
+extern int move_punctuation;
+extern int search_default;
+extern search_list database_list;
+extern int label_in_text;
+extern int label_in_reference;
+extern int sort_adjacent_labels;
+extern string pre_label;
+extern string post_label;
+extern string sep_label;
+
+extern void do_bib(const char *);
+extern void output_references();
diff --git a/src/preproc/refer/label.cc b/src/preproc/refer/label.cc
new file mode 100755
index 00000000..206980f1
--- /dev/null
+++ b/src/preproc/refer/label.cc
@@ -0,0 +1,1602 @@
+#ifndef lint
+/*static char yysccsid[] = "from: @(#)yaccpar 1.9 (Berkeley) 02/21/93";*/
+static char yyrcsid[] = "$Id: label.cc,v 1.1 2000/02/06 09:38:34 wlemb Exp $";
+#endif
+#define YYBYACC 1
+#define YYMAJOR 1
+#define YYMINOR 9
+#define yyclearin (yychar=(-1))
+#define yyerrok (yyerrflag=0)
+#define YYRECOVERING (yyerrflag!=0)
+#define YYPREFIX "yy"
+#line 22 "label.y"
+
+#include "refer.h"
+#include "refid.h"
+#include "ref.h"
+#include "token.h"
+
+int yylex();
+void yyerror(const char *);
+int yyparse();
+
+static const char *format_serial(char c, int n);
+
+struct label_info {
+ int start;
+ int length;
+ int count;
+ int total;
+ label_info(const string &);
+};
+
+label_info *lookup_label(const string &label);
+
+struct expression {
+ enum {
+ /* Does the tentative label depend on the reference?*/
+ CONTAINS_VARIABLE = 01,
+ CONTAINS_STAR = 02,
+ CONTAINS_FORMAT = 04,
+ CONTAINS_AT = 010
+ };
+ virtual ~expression() { }
+ virtual void evaluate(int, const reference &, string &,
+ substring_position &) = 0;
+ virtual unsigned analyze() { return 0; }
+};
+
+class at_expr : public expression {
+public:
+ at_expr() { }
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
+};
+
+class format_expr : public expression {
+ char type;
+ int width;
+ int first_number;
+public:
+ format_expr(char c, int w = 0, int f = 1)
+ : type(c), width(w), first_number(f) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() { return CONTAINS_FORMAT; }
+};
+
+class field_expr : public expression {
+ int number;
+ char name;
+public:
+ field_expr(char nm, int num) : number(num), name(nm) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() { return CONTAINS_VARIABLE; }
+};
+
+class literal_expr : public expression {
+ string s;
+public:
+ literal_expr(const char *ptr, int len) : s(ptr, len) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class unary_expr : public expression {
+protected:
+ expression *expr;
+public:
+ unary_expr(expression *e) : expr(e) { }
+ ~unary_expr() { delete expr; }
+ void evaluate(int, const reference &, string &, substring_position &) = 0;
+ unsigned analyze() { return expr ? expr->analyze() : 0; }
+};
+
+/* This caches the analysis of an expression.*/
+
+class analyzed_expr : public unary_expr {
+ unsigned flags;
+public:
+ analyzed_expr(expression *);
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() { return flags; }
+};
+
+class star_expr : public unary_expr {
+public:
+ star_expr(expression *e) : unary_expr(e) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() {
+ return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
+ | CONTAINS_STAR);
+ }
+};
+
+typedef void map_func(const char *, const char *, string &);
+
+class map_expr : public unary_expr {
+ map_func *func;
+public:
+ map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+typedef const char *extractor_func(const char *, const char *, const char **);
+
+class extractor_expr : public unary_expr {
+ int part;
+ extractor_func *func;
+public:
+ enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
+ extractor_expr(expression *e, extractor_func *f, int pt)
+ : unary_expr(e), part(pt), func(f) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class truncate_expr : public unary_expr {
+ int n;
+public:
+ truncate_expr(expression *e, int i) : unary_expr(e), n(i) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class separator_expr : public unary_expr {
+public:
+ separator_expr(expression *e) : unary_expr(e) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class binary_expr : public expression {
+protected:
+ expression *expr1;
+ expression *expr2;
+public:
+ binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
+ ~binary_expr() { delete expr1; delete expr2; }
+ void evaluate(int, const reference &, string &, substring_position &) = 0;
+ unsigned analyze() {
+ return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
+ }
+};
+
+class alternative_expr : public binary_expr {
+public:
+ alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class list_expr : public binary_expr {
+public:
+ list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class substitute_expr : public binary_expr {
+public:
+ substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class ternary_expr : public expression {
+protected:
+ expression *expr1;
+ expression *expr2;
+ expression *expr3;
+public:
+ ternary_expr(expression *e1, expression *e2, expression *e3)
+ : expr1(e1), expr2(e2), expr3(e3) { }
+ ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
+ void evaluate(int, const reference &, string &, substring_position &) = 0;
+ unsigned analyze() {
+ return ((expr1 ? expr1->analyze() : 0)
+ | (expr2 ? expr2->analyze() : 0)
+ | (expr3 ? expr3->analyze() : 0));
+ }
+};
+
+class conditional_expr : public ternary_expr {
+public:
+ conditional_expr(expression *e1, expression *e2, expression *e3)
+ : ternary_expr(e1, e2, e3) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+static expression *parsed_label = 0;
+static expression *parsed_date_label = 0;
+static expression *parsed_short_label = 0;
+
+static expression *parse_result;
+
+string literals;
+
+#line 221 "label.y"
+typedef union {
+ int num;
+ expression *expr;
+ struct { int ndigits; int val; } dig;
+ struct { int start; int len; } str;
+} YYSTYPE;
+#line 218 "y.tab.c"
+#define TOKEN_LETTER 257
+#define TOKEN_LITERAL 258
+#define TOKEN_DIGIT 259
+#define YYERRCODE 256
+short yylhs[] = { -1,
+ 0, 1, 1, 6, 6, 2, 2, 2, 3, 3,
+ 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 9, 9, 7, 7, 8, 8,
+ 10, 10, 10,
+};
+short yylen[] = { 2,
+ 1, 1, 5, 0, 1, 1, 3, 3, 1, 2,
+ 1, 3, 1, 1, 1, 2, 2, 2, 5, 3,
+ 3, 2, 3, 3, 0, 1, 1, 2, 1, 2,
+ 0, 1, 1,
+};
+short yydefred[] = { 0,
+ 0, 14, 13, 0, 0, 0, 0, 5, 0, 0,
+ 0, 0, 1, 27, 0, 17, 29, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 22, 0, 28,
+ 30, 23, 24, 0, 0, 0, 32, 33, 0, 0,
+ 0, 0, 0, 0, 3, 0, 19,
+};
+short yydgoto[] = { 7,
+ 8, 9, 10, 11, 12, 13, 15, 18, 47, 39,
+};
+short yysindex[] = { -32,
+ -257, 0, 0, -240, -32, -32, 0, 0, -18, -32,
+ -36, -114, 0, 0, -246, 0, 0, -241, -14, -39,
+ -32, -32, -32, -114, -21, -257, -257, 0, -32, 0,
+ 0, 0, 0, -25, -32, -32, 0, 0, -223, -246,
+ -246, -36, -32, -257, 0, -246, 0,
+};
+short yyrindex[] = { 35,
+ 1, 0, 0, 0, -5, -4, 0, 0, 14, 208,
+ 159, 224, 0, 0, 11, 0, 0, 40, 0, 0,
+ 2, 0, 0, 253, -220, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 263, 281, 0, 0, 0, 50,
+ 105, 214, 0, 115, 0, 149, 0,
+};
+short yygindex[] = { 0,
+ 19, 0, 7, 37, -10, 10, -23, 0, 0, 0,
+};
+#define YYTABLESIZE 511
+short yytable[] = { 24,
+ 15, 14, 40, 41, 4, 28, 26, 5, 27, 25,
+ 16, 29, 30, 2, 19, 20, 16, 31, 17, 23,
+ 46, 37, 33, 38, 24, 24, 32, 6, 35, 36,
+ 34, 3, 43, 44, 4, 4, 31, 15, 15, 18,
+ 15, 15, 15, 15, 21, 15, 15, 16, 16, 20,
+ 16, 16, 16, 16, 2, 16, 16, 4, 15, 4,
+ 15, 45, 15, 15, 15, 42, 0, 0, 16, 0,
+ 16, 2, 16, 16, 16, 2, 18, 18, 0, 18,
+ 18, 18, 18, 0, 18, 18, 20, 20, 0, 20,
+ 20, 20, 20, 0, 20, 20, 0, 18, 0, 18,
+ 0, 18, 18, 18, 21, 22, 0, 20, 0, 20,
+ 0, 20, 20, 20, 25, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 15, 0, 15, 0, 0, 0,
+ 0, 0, 0, 0, 16, 0, 16, 0, 0, 0,
+ 0, 21, 21, 0, 21, 21, 21, 21, 26, 21,
+ 21, 25, 25, 0, 25, 25, 25, 25, 11, 25,
+ 25, 0, 21, 18, 21, 18, 21, 21, 21, 0,
+ 0, 0, 25, 20, 25, 20, 25, 25, 25, 0,
+ 0, 0, 0, 0, 0, 26, 26, 0, 26, 26,
+ 26, 26, 0, 26, 26, 11, 11, 0, 11, 11,
+ 0, 0, 0, 0, 0, 0, 26, 6, 26, 0,
+ 26, 26, 26, 12, 0, 0, 11, 0, 11, 0,
+ 11, 11, 11, 9, 1, 2, 0, 0, 21, 0,
+ 21, 0, 0, 0, 0, 0, 0, 0, 25, 0,
+ 25, 0, 0, 0, 0, 6, 0, 0, 6, 0,
+ 12, 12, 10, 12, 12, 0, 0, 15, 15, 0,
+ 9, 9, 7, 9, 9, 6, 0, 16, 16, 6,
+ 6, 12, 26, 12, 26, 12, 12, 12, 0, 0,
+ 8, 9, 11, 9, 11, 9, 9, 9, 0, 10,
+ 10, 0, 10, 10, 0, 0, 18, 18, 0, 0,
+ 7, 0, 0, 7, 0, 0, 20, 20, 0, 0,
+ 10, 0, 10, 0, 10, 10, 10, 0, 8, 0,
+ 7, 8, 0, 0, 7, 7, 0, 0, 0, 0,
+ 0, 6, 0, 0, 0, 0, 0, 12, 8, 12,
+ 0, 0, 8, 8, 0, 0, 0, 9, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 21, 21, 0, 0, 0, 0, 0, 0, 0,
+ 0, 25, 25, 0, 0, 0, 10, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 7, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 8, 26, 26, 0, 0, 0,
+ 0, 0, 0, 0, 0, 11, 11, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 12, 12, 0, 0, 0, 0, 0, 0, 0, 0,
+ 9, 9, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 10,
+};
+short yycheck[] = { 10,
+ 0, 259, 26, 27, 37, 42, 43, 40, 45, 46,
+ 0, 126, 259, 0, 5, 6, 257, 259, 259, 38,
+ 44, 43, 62, 45, 35, 36, 41, 60, 22, 23,
+ 21, 64, 58, 257, 0, 41, 257, 37, 38, 0,
+ 40, 41, 42, 43, 63, 45, 46, 37, 38, 0,
+ 40, 41, 42, 43, 41, 45, 46, 62, 58, 58,
+ 60, 43, 62, 63, 64, 29, -1, -1, 58, -1,
+ 60, 58, 62, 63, 64, 62, 37, 38, -1, 40,
+ 41, 42, 43, -1, 45, 46, 37, 38, -1, 40,
+ 41, 42, 43, -1, 45, 46, -1, 58, -1, 60,
+ -1, 62, 63, 64, 0, 124, -1, 58, -1, 60,
+ -1, 62, 63, 64, 0, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 124, -1, 126, -1, -1, -1,
+ -1, -1, -1, -1, 124, -1, 126, -1, -1, -1,
+ -1, 37, 38, -1, 40, 41, 42, 43, 0, 45,
+ 46, 37, 38, -1, 40, 41, 42, 43, 0, 45,
+ 46, -1, 58, 124, 60, 126, 62, 63, 64, -1,
+ -1, -1, 58, 124, 60, 126, 62, 63, 64, -1,
+ -1, -1, -1, -1, -1, 37, 38, -1, 40, 41,
+ 42, 43, -1, 45, 46, 37, 38, -1, 40, 41,
+ -1, -1, -1, -1, -1, -1, 58, 0, 60, -1,
+ 62, 63, 64, 0, -1, -1, 58, -1, 60, -1,
+ 62, 63, 64, 0, 257, 258, -1, -1, 124, -1,
+ 126, -1, -1, -1, -1, -1, -1, -1, 124, -1,
+ 126, -1, -1, -1, -1, 38, -1, -1, 41, -1,
+ 37, 38, 0, 40, 41, -1, -1, 257, 258, -1,
+ 37, 38, 0, 40, 41, 58, -1, 257, 258, 62,
+ 63, 58, 124, 60, 126, 62, 63, 64, -1, -1,
+ 0, 58, 124, 60, 126, 62, 63, 64, -1, 37,
+ 38, -1, 40, 41, -1, -1, 257, 258, -1, -1,
+ 38, -1, -1, 41, -1, -1, 257, 258, -1, -1,
+ 58, -1, 60, -1, 62, 63, 64, -1, 38, -1,
+ 58, 41, -1, -1, 62, 63, -1, -1, -1, -1,
+ -1, 124, -1, -1, -1, -1, -1, 124, 58, 126,
+ -1, -1, 62, 63, -1, -1, -1, 124, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 257, 258, -1, -1, -1, -1, -1, -1, -1,
+ -1, 257, 258, -1, -1, -1, 124, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 124, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 124, 257, 258, -1, -1, -1,
+ -1, -1, -1, -1, -1, 257, 258, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 257, 258, -1, -1, -1, -1, -1, -1, -1, -1,
+ 257, 258, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, 257,
+ 258,
+};
+#define YYFINAL 7
+#ifndef YYDEBUG
+#define YYDEBUG 0
+#endif
+#define YYMAXTOKEN 259
+#if YYDEBUG
+char *yyname[] = {
+"end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,"'%'","'&'",0,"'('","')'","'*'","'+'",0,"'-'","'.'",0,0,0,0,0,0,0,0,0,0,0,
+"':'",0,"'<'",0,"'>'","'?'","'@'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0,
+"'~'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,"TOKEN_LETTER","TOKEN_LITERAL","TOKEN_DIGIT",
+};
+char *yyrule[] = {
+"$accept : expr",
+"expr : optional_conditional",
+"conditional : alternative",
+"conditional : alternative '?' optional_conditional ':' conditional",
+"optional_conditional :",
+"optional_conditional : conditional",
+"alternative : list",
+"alternative : alternative '|' list",
+"alternative : alternative '&' list",
+"list : substitute",
+"list : list substitute",
+"substitute : string",
+"substitute : substitute '~' string",
+"string : '@'",
+"string : TOKEN_LITERAL",
+"string : TOKEN_LETTER",
+"string : TOKEN_LETTER number",
+"string : '%' TOKEN_LETTER",
+"string : '%' digits",
+"string : string '.' flag TOKEN_LETTER optional_number",
+"string : string '+' number",
+"string : string '-' number",
+"string : string '*'",
+"string : '(' optional_conditional ')'",
+"string : '<' optional_conditional '>'",
+"optional_number :",
+"optional_number : number",
+"number : TOKEN_DIGIT",
+"number : number TOKEN_DIGIT",
+"digits : TOKEN_DIGIT",
+"digits : digits TOKEN_DIGIT",
+"flag :",
+"flag : '+'",
+"flag : '-'",
+};
+#endif
+#ifdef YYSTACKSIZE
+#undef YYMAXDEPTH
+#define YYMAXDEPTH YYSTACKSIZE
+#else
+#ifdef YYMAXDEPTH
+#define YYSTACKSIZE YYMAXDEPTH
+#else
+#define YYSTACKSIZE 500
+#define YYMAXDEPTH 500
+#endif
+#endif
+int yydebug;
+int yynerrs;
+int yyerrflag;
+int yychar;
+short *yyssp;
+YYSTYPE *yyvsp;
+YYSTYPE yyval;
+YYSTYPE yylval;
+short yyss[YYSTACKSIZE];
+YYSTYPE yyvs[YYSTACKSIZE];
+#define yystacksize YYSTACKSIZE
+#line 397 "label.y"
+
+/* bison defines const to be empty unless __STDC__ is defined, which it
+isn't under cfront */
+
+#ifdef const
+#undef const
+#endif
+
+const char *spec_ptr;
+const char *spec_end;
+const char *spec_cur;
+
+int yylex()
+{
+ while (spec_ptr < spec_end && csspace(*spec_ptr))
+ spec_ptr++;
+ spec_cur = spec_ptr;
+ if (spec_ptr >= spec_end)
+ return 0;
+ unsigned char c = *spec_ptr++;
+ if (csalpha(c)) {
+ yylval.num = c;
+ return TOKEN_LETTER;
+ }
+ if (csdigit(c)) {
+ yylval.num = c - '0';
+ return TOKEN_DIGIT;
+ }
+ if (c == '\'') {
+ yylval.str.start = literals.length();
+ for (; spec_ptr < spec_end; spec_ptr++) {
+ if (*spec_ptr == '\'') {
+ if (++spec_ptr < spec_end && *spec_ptr == '\'')
+ literals += '\'';
+ else {
+ yylval.str.len = literals.length() - yylval.str.start;
+ return TOKEN_LITERAL;
+ }
+ }
+ else
+ literals += *spec_ptr;
+ }
+ yylval.str.len = literals.length() - yylval.str.start;
+ return TOKEN_LITERAL;
+ }
+ return c;
+}
+
+int set_label_spec(const char *label_spec)
+{
+ spec_cur = spec_ptr = label_spec;
+ spec_end = strchr(label_spec, '\0');
+ literals.clear();
+ if (yyparse())
+ return 0;
+ delete parsed_label;
+ parsed_label = parse_result;
+ return 1;
+}
+
+int set_date_label_spec(const char *label_spec)
+{
+ spec_cur = spec_ptr = label_spec;
+ spec_end = strchr(label_spec, '\0');
+ literals.clear();
+ if (yyparse())
+ return 0;
+ delete parsed_date_label;
+ parsed_date_label = parse_result;
+ return 1;
+}
+
+int set_short_label_spec(const char *label_spec)
+{
+ spec_cur = spec_ptr = label_spec;
+ spec_end = strchr(label_spec, '\0');
+ literals.clear();
+ if (yyparse())
+ return 0;
+ delete parsed_short_label;
+ parsed_short_label = parse_result;
+ return 1;
+}
+
+void yyerror(const char *message)
+{
+ if (spec_cur < spec_end)
+ command_error("label specification %1 before `%2'", message, spec_cur);
+ else
+ command_error("label specification %1 at end of string",
+ message, spec_cur);
+}
+
+void at_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (tentative)
+ ref.canonicalize_authors(result);
+ else {
+ const char *end, *start = ref.get_authors(&end);
+ if (start)
+ result.append(start, end - start);
+ }
+}
+
+void format_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (tentative)
+ return;
+ const label_info *lp = ref.get_label_ptr();
+ int num = lp == 0 ? ref.get_number() : lp->count;
+ if (type != '0')
+ result += format_serial(type, num + 1);
+ else {
+ const char *ptr = itoa(num + first_number);
+ int pad = width - strlen(ptr);
+ while (--pad >= 0)
+ result += '0';
+ result += ptr;
+ }
+}
+
+static const char *format_serial(char c, int n)
+{
+ assert(n > 0);
+ static char buf[128]; // more than enough.
+ switch (c) {
+ case 'i':
+ case 'I':
+ {
+ char *p = buf;
+ // troff uses z and w to represent 10000 and 5000 in Roman
+ // numerals; I can find no historical basis for this usage
+ const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
+ if (n >= 40000)
+ return itoa(n);
+ while (n >= 10000) {
+ *p++ = s[0];
+ n -= 10000;
+ }
+ for (int i = 1000; i > 0; i /= 10, s += 2) {
+ int m = n/i;
+ n -= m*i;
+ switch (m) {
+ case 3:
+ *p++ = s[2];
+ /* falls through */
+ case 2:
+ *p++ = s[2];
+ /* falls through */
+ case 1:
+ *p++ = s[2];
+ break;
+ case 4:
+ *p++ = s[2];
+ *p++ = s[1];
+ break;
+ case 8:
+ *p++ = s[1];
+ *p++ = s[2];
+ *p++ = s[2];
+ *p++ = s[2];
+ break;
+ case 7:
+ *p++ = s[1];
+ *p++ = s[2];
+ *p++ = s[2];
+ break;
+ case 6:
+ *p++ = s[1];
+ *p++ = s[2];
+ break;
+ case 5:
+ *p++ = s[1];
+ break;
+ case 9:
+ *p++ = s[2];
+ *p++ = s[0];
+ }
+ }
+ *p = 0;
+ break;
+ }
+ case 'a':
+ case 'A':
+ {
+ char *p = buf;
+ // this is derived from troff/reg.c
+ while (n > 0) {
+ int d = n % 26;
+ if (d == 0)
+ d = 26;
+ n -= d;
+ n /= 26;
+ *p++ = c + d - 1; // ASCII dependent
+ }
+ *p-- = 0;
+ // Reverse it.
+ char *q = buf;
+ while (q < p) {
+ char temp = *q;
+ *q = *p;
+ *p = temp;
+ --p;
+ ++q;
+ }
+ break;
+ }
+ default:
+ assert(0);
+ }
+ return buf;
+}
+
+void field_expr::evaluate(int, const reference &ref,
+ string &result, substring_position &)
+{
+ const char *end;
+ const char *start = ref.get_field(name, &end);
+ if (start) {
+ start = nth_field(number, start, &end);
+ if (start)
+ result.append(start, end - start);
+ }
+}
+
+void literal_expr::evaluate(int, const reference &,
+ string &result, substring_position &)
+{
+ result += s;
+}
+
+analyzed_expr::analyzed_expr(expression *e)
+: unary_expr(e), flags(e ? e->analyze() : 0)
+{
+}
+
+void analyzed_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ if (expr)
+ expr->evaluate(tentative, ref, result, pos);
+}
+
+void star_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ const label_info *lp = ref.get_label_ptr();
+ if (!tentative
+ && (lp == 0 || lp->total > 1)
+ && expr)
+ expr->evaluate(tentative, ref, result, pos);
+}
+
+void separator_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ int start_length = result.length();
+ int is_first = pos.start < 0;
+ if (expr)
+ expr->evaluate(tentative, ref, result, pos);
+ if (is_first) {
+ pos.start = start_length;
+ pos.length = result.length() - start_length;
+ }
+}
+
+void map_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (expr) {
+ string temp;
+ substring_position temp_pos;
+ expr->evaluate(tentative, ref, temp, temp_pos);
+ (*func)(temp.contents(), temp.contents() + temp.length(), result);
+ }
+}
+
+void extractor_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (expr) {
+ string temp;
+ substring_position temp_pos;
+ expr->evaluate(tentative, ref, temp, temp_pos);
+ const char *end, *start = (*func)(temp.contents(),
+ temp.contents() + temp.length(),
+ &end);
+ switch (part) {
+ case BEFORE:
+ if (start)
+ result.append(temp.contents(), start - temp.contents());
+ else
+ result += temp;
+ break;
+ case MATCH:
+ if (start)
+ result.append(start, end - start);
+ break;
+ case AFTER:
+ if (start)
+ result.append(end, temp.contents() + temp.length() - end);
+ break;
+ default:
+ assert(0);
+ }
+ }
+}
+
+static void first_part(int len, const char *ptr, const char *end,
+ string &result)
+{
+ for (;;) {
+ const char *token_start = ptr;
+ if (!get_token(&ptr, end))
+ break;
+ const token_info *ti = lookup_token(token_start, ptr);
+ int counts = ti->sortify_non_empty(token_start, ptr);
+ if (counts && --len < 0)
+ break;
+ if (counts || ti->is_accent())
+ result.append(token_start, ptr - token_start);
+ }
+}
+
+static void last_part(int len, const char *ptr, const char *end,
+ string &result)
+{
+ const char *start = ptr;
+ int count = 0;
+ for (;;) {
+ const char *token_start = ptr;
+ if (!get_token(&ptr, end))
+ break;
+ const token_info *ti = lookup_token(token_start, ptr);
+ if (ti->sortify_non_empty(token_start, ptr))
+ count++;
+ }
+ ptr = start;
+ int skip = count - len;
+ if (skip > 0) {
+ for (;;) {
+ const char *token_start = ptr;
+ if (!get_token(&ptr, end))
+ assert(0);
+ const token_info *ti = lookup_token(token_start, ptr);
+ if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
+ ptr = token_start;
+ break;
+ }
+ }
+ }
+ first_part(len, ptr, end, result);
+}
+
+void truncate_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (expr) {
+ string temp;
+ substring_position temp_pos;
+ expr->evaluate(tentative, ref, temp, temp_pos);
+ const char *start = temp.contents();
+ const char *end = start + temp.length();
+ if (n > 0)
+ first_part(n, start, end, result);
+ else if (n < 0)
+ last_part(-n, start, end, result);
+ }
+}
+
+void alternative_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ int start_length = result.length();
+ if (expr1)
+ expr1->evaluate(tentative, ref, result, pos);
+ if (result.length() == start_length && expr2)
+ expr2->evaluate(tentative, ref, result, pos);
+}
+
+void list_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ if (expr1)
+ expr1->evaluate(tentative, ref, result, pos);
+ if (expr2)
+ expr2->evaluate(tentative, ref, result, pos);
+}
+
+void substitute_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ int start_length = result.length();
+ if (expr1)
+ expr1->evaluate(tentative, ref, result, pos);
+ if (result.length() > start_length && result[result.length() - 1] == '-') {
+ // ought to see if pos covers the -
+ result.set_length(result.length() - 1);
+ if (expr2)
+ expr2->evaluate(tentative, ref, result, pos);
+ }
+}
+
+void conditional_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ string temp;
+ substring_position temp_pos;
+ if (expr1)
+ expr1->evaluate(tentative, ref, temp, temp_pos);
+ if (temp.length() > 0) {
+ if (expr2)
+ expr2->evaluate(tentative, ref, result, pos);
+ }
+ else {
+ if (expr3)
+ expr3->evaluate(tentative, ref, result, pos);
+ }
+}
+
+void reference::pre_compute_label()
+{
+ if (parsed_label != 0
+ && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
+ label.clear();
+ substring_position temp_pos;
+ parsed_label->evaluate(1, *this, label, temp_pos);
+ label_ptr = lookup_label(label);
+ }
+}
+
+void reference::compute_label()
+{
+ label.clear();
+ if (parsed_label)
+ parsed_label->evaluate(0, *this, label, separator_pos);
+ if (short_label_flag && parsed_short_label)
+ parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
+ if (date_as_label) {
+ string new_date;
+ if (parsed_date_label) {
+ substring_position temp_pos;
+ parsed_date_label->evaluate(0, *this, new_date, temp_pos);
+ }
+ set_date(new_date);
+ }
+ if (label_ptr)
+ label_ptr->count += 1;
+}
+
+void reference::immediate_compute_label()
+{
+ if (label_ptr)
+ label_ptr->total = 2; // force use of disambiguator
+ compute_label();
+}
+
+int reference::merge_labels(reference **v, int n, label_type type,
+ string &result)
+{
+ if (abbreviate_label_ranges)
+ return merge_labels_by_number(v, n, type, result);
+ else
+ return merge_labels_by_parts(v, n, type, result);
+}
+
+int reference::merge_labels_by_number(reference **v, int n, label_type type,
+ string &result)
+{
+ if (n <= 1)
+ return 0;
+ int num = get_number();
+ // Only merge three or more labels.
+ if (v[0]->get_number() != num + 1
+ || v[1]->get_number() != num + 2)
+ return 0;
+ int i;
+ for (i = 2; i < n; i++)
+ if (v[i]->get_number() != num + i + 1)
+ break;
+ result = get_label(type);
+ result += label_range_indicator;
+ result += v[i - 1]->get_label(type);
+ return i;
+}
+
+const substring_position &reference::get_separator_pos(label_type type) const
+{
+ if (type == SHORT_LABEL && short_label_flag)
+ return short_separator_pos;
+ else
+ return separator_pos;
+}
+
+const string &reference::get_label(label_type type) const
+{
+ if (type == SHORT_LABEL && short_label_flag)
+ return short_label;
+ else
+ return label;
+}
+
+int reference::merge_labels_by_parts(reference **v, int n, label_type type,
+ string &result)
+{
+ if (n <= 0)
+ return 0;
+ const string &lb = get_label(type);
+ const substring_position &sp = get_separator_pos(type);
+ if (sp.start < 0
+ || sp.start != v[0]->get_separator_pos(type).start
+ || memcmp(lb.contents(), v[0]->get_label(type).contents(),
+ sp.start) != 0)
+ return 0;
+ result = lb;
+ int i = 0;
+ do {
+ result += separate_label_second_parts;
+ const substring_position &s = v[i]->get_separator_pos(type);
+ int sep_end_pos = s.start + s.length;
+ result.append(v[i]->get_label(type).contents() + sep_end_pos,
+ v[i]->get_label(type).length() - sep_end_pos);
+ } while (++i < n
+ && sp.start == v[i]->get_separator_pos(type).start
+ && memcmp(lb.contents(), v[i]->get_label(type).contents(),
+ sp.start) == 0);
+ return i;
+}
+
+string label_pool;
+
+label_info::label_info(const string &s)
+: start(label_pool.length()), length(s.length()), count(0), total(1)
+{
+ label_pool += s;
+}
+
+static label_info **label_table = 0;
+static int label_table_size = 0;
+static int label_table_used = 0;
+
+label_info *lookup_label(const string &label)
+{
+ if (label_table == 0) {
+ label_table = new label_info *[17];
+ label_table_size = 17;
+ for (int i = 0; i < 17; i++)
+ label_table[i] = 0;
+ }
+ unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
+ label_info **ptr;
+ for (ptr = label_table + h;
+ *ptr != 0;
+ (ptr == label_table)
+ ? (ptr = label_table + label_table_size - 1)
+ : ptr--)
+ if ((*ptr)->length == label.length()
+ && memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
+ label.length()) == 0) {
+ (*ptr)->total += 1;
+ return *ptr;
+ }
+ label_info *result = *ptr = new label_info(label);
+ if (++label_table_used * 2 > label_table_size) {
+ // Rehash the table.
+ label_info **old_table = label_table;
+ int old_size = label_table_size;
+ label_table_size = next_size(label_table_size);
+ label_table = new label_info *[label_table_size];
+ int i;
+ for (i = 0; i < label_table_size; i++)
+ label_table[i] = 0;
+ for (i = 0; i < old_size; i++)
+ if (old_table[i]) {
+ unsigned h = hash_string(label_pool.contents() + old_table[i]->start,
+ old_table[i]->length);
+ label_info **p;
+ for (p = label_table + (h % label_table_size);
+ *p != 0;
+ (p == label_table)
+ ? (p = label_table + label_table_size - 1)
+ : --p)
+ ;
+ *p = old_table[i];
+ }
+ a_delete old_table;
+ }
+ return result;
+}
+
+void clear_labels()
+{
+ for (int i = 0; i < label_table_size; i++) {
+ delete label_table[i];
+ label_table[i] = 0;
+ }
+ label_table_used = 0;
+ label_pool.clear();
+}
+
+static void consider_authors(reference **start, reference **end, int i);
+
+void compute_labels(reference **v, int n)
+{
+ if (parsed_label
+ && (parsed_label->analyze() & expression::CONTAINS_AT)
+ && sort_fields.length() >= 2
+ && sort_fields[0] == 'A'
+ && sort_fields[1] == '+')
+ consider_authors(v, v + n, 0);
+ for (int i = 0; i < n; i++)
+ v[i]->compute_label();
+}
+
+
+/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
+where 0 <= i <= N if there exists a reference with a list of authors
+<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
+and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0,
+A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and
+<B0,B1,...,BM>. If a reference needs author i we only have to call
+need_author(j) for some j >= i such that the reference also needs
+author j. */
+
+/* This function handles 2 tasks:
+determine which authors are needed (cannot be elided with et al.);
+determine which authors can have only last names in the labels.
+
+References >= start and < end have the same first i author names.
+Also they're sorted by A+. */
+
+static void consider_authors(reference **start, reference **end, int i)
+{
+ if (start >= end)
+ return;
+ reference **p = start;
+ if (i >= (*p)->get_nauthors()) {
+ for (++p; p < end && i >= (*p)->get_nauthors(); p++)
+ ;
+ if (p < end && i > 0) {
+ // If we have an author list <A B C> and an author list <A B C D>,
+ // then both lists need C.
+ for (reference **q = start; q < end; q++)
+ (*q)->need_author(i - 1);
+ }
+ start = p;
+ }
+ while (p < end) {
+ reference **last_name_start = p;
+ reference **name_start = p;
+ for (++p;
+ p < end && i < (*p)->get_nauthors()
+ && same_author_last_name(**last_name_start, **p, i);
+ p++) {
+ if (!same_author_name(**name_start, **p, i)) {
+ consider_authors(name_start, p, i + 1);
+ name_start = p;
+ }
+ }
+ consider_authors(name_start, p, i + 1);
+ if (last_name_start == name_start) {
+ for (reference **q = last_name_start; q < p; q++)
+ (*q)->set_last_name_unambiguous(i);
+ }
+ // If we have an author list <A B C D> and <A B C E>, then the lists
+ // need author D and E respectively.
+ if (name_start > start || p < end) {
+ for (reference **q = last_name_start; q < p; q++)
+ (*q)->need_author(i);
+ }
+ }
+}
+
+int same_author_last_name(const reference &r1, const reference &r2, int n)
+{
+ const char *ae1;
+ const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
+ assert(as1 != 0);
+ const char *ae2;
+ const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
+ assert(as2 != 0);
+ return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
+}
+
+int same_author_name(const reference &r1, const reference &r2, int n)
+{
+ const char *ae1;
+ const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
+ assert(as1 != 0);
+ const char *ae2;
+ const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
+ assert(as2 != 0);
+ return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
+}
+
+
+void int_set::set(int i)
+{
+ assert(i >= 0);
+ int bytei = i >> 3;
+ if (bytei >= v.length()) {
+ int old_length = v.length();
+ v.set_length(bytei + 1);
+ for (int j = old_length; j <= bytei; j++)
+ v[j] = 0;
+ }
+ v[bytei] |= 1 << (i & 7);
+}
+
+int int_set::get(int i) const
+{
+ assert(i >= 0);
+ int bytei = i >> 3;
+ return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
+}
+
+void reference::set_last_name_unambiguous(int i)
+{
+ last_name_unambiguous.set(i);
+}
+
+void reference::need_author(int n)
+{
+ if (n > last_needed_author)
+ last_needed_author = n;
+}
+
+const char *reference::get_authors(const char **end) const
+{
+ if (!computed_authors) {
+ ((reference *)this)->computed_authors = 1;
+ string &result = ((reference *)this)->authors;
+ int na = get_nauthors();
+ result.clear();
+ for (int i = 0; i < na; i++) {
+ if (last_name_unambiguous.get(i)) {
+ const char *e, *start = get_author_last_name(i, &e);
+ assert(start != 0);
+ result.append(start, e - start);
+ }
+ else {
+ const char *e, *start = get_author(i, &e);
+ assert(start != 0);
+ result.append(start, e - start);
+ }
+ if (i == last_needed_author
+ && et_al.length() > 0
+ && et_al_min_elide > 0
+ && last_needed_author + et_al_min_elide < na
+ && na >= et_al_min_total) {
+ result += et_al;
+ break;
+ }
+ if (i < na - 1) {
+ if (na == 2)
+ result += join_authors_exactly_two;
+ else if (i < na - 2)
+ result += join_authors_default;
+ else
+ result += join_authors_last_two;
+ }
+ }
+ }
+ const char *start = authors.contents();
+ *end = start + authors.length();
+ return start;
+}
+
+int reference::get_nauthors() const
+{
+ if (nauthors < 0) {
+ const char *dummy;
+ int na;
+ for (na = 0; get_author(na, &dummy) != 0; na++)
+ ;
+ ((reference *)this)->nauthors = na;
+ }
+ return nauthors;
+}
+#line 1228 "y.tab.c"
+#define YYABORT goto yyabort
+#define YYREJECT goto yyabort
+#define YYACCEPT goto yyaccept
+#define YYERROR goto yyerrlab
+int
+#if defined(__STDC__)
+yyparse(void)
+#else
+yyparse()
+#endif
+{
+ register int yym, yyn, yystate;
+#if YYDEBUG
+ register char *yys;
+ extern char *getenv();
+
+ if (yys = getenv("YYDEBUG"))
+ {
+ yyn = *yys;
+ if (yyn >= '0' && yyn <= '9')
+ yydebug = yyn - '0';
+ }
+#endif
+
+ yynerrs = 0;
+ yyerrflag = 0;
+ yychar = (-1);
+
+ yyssp = yyss;
+ yyvsp = yyvs;
+ *yyssp = yystate = 0;
+
+yyloop:
+ if ((yyn = yydefred[yystate]) != 0) goto yyreduce;
+ if (yychar < 0)
+ {
+ if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+ if (yydebug)
+ {
+ yys = 0;
+ if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+ if (!yys) yys = "illegal-symbol";
+ printf("%sdebug: state %d, reading %d (%s)\n",
+ YYPREFIX, yystate, yychar, yys);
+ }
+#endif
+ }
+ if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: state %d, shifting to state %d\n",
+ YYPREFIX, yystate, yytable[yyn]);
+#endif
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ goto yyoverflow;
+ }
+ *++yyssp = yystate = yytable[yyn];
+ *++yyvsp = yylval;
+ yychar = (-1);
+ if (yyerrflag > 0) --yyerrflag;
+ goto yyloop;
+ }
+ if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+ {
+ yyn = yytable[yyn];
+ goto yyreduce;
+ }
+ if (yyerrflag) goto yyinrecovery;
+ yyerror("syntax error");
+#ifdef lint
+ goto yyerrlab;
+#endif
+yyerrlab:
+ ++yynerrs;
+yyinrecovery:
+ if (yyerrflag < 3)
+ {
+ yyerrflag = 3;
+ for (;;)
+ {
+ if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: state %d, error recovery shifting\
+ to state %d\n", YYPREFIX, *yyssp, yytable[yyn]);
+#endif
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ goto yyoverflow;
+ }
+ *++yyssp = yystate = yytable[yyn];
+ *++yyvsp = yylval;
+ goto yyloop;
+ }
+ else
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: error recovery discarding state %d\n",
+ YYPREFIX, *yyssp);
+#endif
+ if (yyssp <= yyss) goto yyabort;
+ --yyssp;
+ --yyvsp;
+ }
+ }
+ }
+ else
+ {
+ if (yychar == 0) goto yyabort;
+#if YYDEBUG
+ if (yydebug)
+ {
+ yys = 0;
+ if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+ if (!yys) yys = "illegal-symbol";
+ printf("%sdebug: state %d, error recovery discards token %d (%s)\n",
+ YYPREFIX, yystate, yychar, yys);
+ }
+#endif
+ yychar = (-1);
+ goto yyloop;
+ }
+yyreduce:
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: state %d, reducing by rule %d (%s)\n",
+ YYPREFIX, yystate, yyn, yyrule[yyn]);
+#endif
+ yym = yylen[yyn];
+ yyval = yyvsp[1-yym];
+ switch (yyn)
+ {
+case 1:
+#line 250 "label.y"
+{ parse_result = (yyvsp[0].expr ? new analyzed_expr(yyvsp[0].expr) : 0); }
+break;
+case 2:
+#line 255 "label.y"
+{ yyval.expr = yyvsp[0].expr; }
+break;
+case 3:
+#line 257 "label.y"
+{ yyval.expr = new conditional_expr(yyvsp[-4].expr, yyvsp[-2].expr, yyvsp[0].expr); }
+break;
+case 4:
+#line 262 "label.y"
+{ yyval.expr = 0; }
+break;
+case 5:
+#line 264 "label.y"
+{ yyval.expr = yyvsp[0].expr; }
+break;
+case 6:
+#line 269 "label.y"
+{ yyval.expr = yyvsp[0].expr; }
+break;
+case 7:
+#line 271 "label.y"
+{ yyval.expr = new alternative_expr(yyvsp[-2].expr, yyvsp[0].expr); }
+break;
+case 8:
+#line 273 "label.y"
+{ yyval.expr = new conditional_expr(yyvsp[-2].expr, yyvsp[0].expr, 0); }
+break;
+case 9:
+#line 278 "label.y"
+{ yyval.expr = yyvsp[0].expr; }
+break;
+case 10:
+#line 280 "label.y"
+{ yyval.expr = new list_expr(yyvsp[-1].expr, yyvsp[0].expr); }
+break;
+case 11:
+#line 285 "label.y"
+{ yyval.expr = yyvsp[0].expr; }
+break;
+case 12:
+#line 287 "label.y"
+{ yyval.expr = new substitute_expr(yyvsp[-2].expr, yyvsp[0].expr); }
+break;
+case 13:
+#line 292 "label.y"
+{ yyval.expr = new at_expr; }
+break;
+case 14:
+#line 294 "label.y"
+{
+ yyval.expr = new literal_expr(literals.contents() + yyvsp[0].str.start,
+ yyvsp[0].str.len);
+ }
+break;
+case 15:
+#line 299 "label.y"
+{ yyval.expr = new field_expr(yyvsp[0].num, 0); }
+break;
+case 16:
+#line 301 "label.y"
+{ yyval.expr = new field_expr(yyvsp[-1].num, yyvsp[0].num - 1); }
+break;
+case 17:
+#line 303 "label.y"
+{
+ switch (yyvsp[0].num) {
+ case 'I':
+ case 'i':
+ case 'A':
+ case 'a':
+ yyval.expr = new format_expr(yyvsp[0].num);
+ break;
+ default:
+ command_error("unrecognized format `%1'", char(yyvsp[0].num));
+ yyval.expr = new format_expr('a');
+ break;
+ }
+ }
+break;
+case 18:
+#line 319 "label.y"
+{
+ yyval.expr = new format_expr('0', yyvsp[0].dig.ndigits, yyvsp[0].dig.val);
+ }
+break;
+case 19:
+#line 323 "label.y"
+{
+ switch (yyvsp[-1].num) {
+ case 'l':
+ yyval.expr = new map_expr(yyvsp[-4].expr, lowercase);
+ break;
+ case 'u':
+ yyval.expr = new map_expr(yyvsp[-4].expr, uppercase);
+ break;
+ case 'c':
+ yyval.expr = new map_expr(yyvsp[-4].expr, capitalize);
+ break;
+ case 'r':
+ yyval.expr = new map_expr(yyvsp[-4].expr, reverse_name);
+ break;
+ case 'a':
+ yyval.expr = new map_expr(yyvsp[-4].expr, abbreviate_name);
+ break;
+ case 'y':
+ yyval.expr = new extractor_expr(yyvsp[-4].expr, find_year, yyvsp[-2].num);
+ break;
+ case 'n':
+ yyval.expr = new extractor_expr(yyvsp[-4].expr, find_last_name, yyvsp[-2].num);
+ break;
+ default:
+ yyval.expr = yyvsp[-4].expr;
+ command_error("unknown function `%1'", char(yyvsp[-1].num));
+ break;
+ }
+ }
+break;
+case 20:
+#line 354 "label.y"
+{ yyval.expr = new truncate_expr(yyvsp[-2].expr, yyvsp[0].num); }
+break;
+case 21:
+#line 356 "label.y"
+{ yyval.expr = new truncate_expr(yyvsp[-2].expr, -yyvsp[0].num); }
+break;
+case 22:
+#line 358 "label.y"
+{ yyval.expr = new star_expr(yyvsp[-1].expr); }
+break;
+case 23:
+#line 360 "label.y"
+{ yyval.expr = yyvsp[-1].expr; }
+break;
+case 24:
+#line 362 "label.y"
+{ yyval.expr = new separator_expr(yyvsp[-1].expr); }
+break;
+case 25:
+#line 367 "label.y"
+{ yyval.num = -1; }
+break;
+case 26:
+#line 369 "label.y"
+{ yyval.num = yyvsp[0].num; }
+break;
+case 27:
+#line 374 "label.y"
+{ yyval.num = yyvsp[0].num; }
+break;
+case 28:
+#line 376 "label.y"
+{ yyval.num = yyvsp[-1].num*10 + yyvsp[0].num; }
+break;
+case 29:
+#line 381 "label.y"
+{ yyval.dig.ndigits = 1; yyval.dig.val = yyvsp[0].num; }
+break;
+case 30:
+#line 383 "label.y"
+{ yyval.dig.ndigits = yyvsp[-1].dig.ndigits + 1; yyval.dig.val = yyvsp[-1].dig.val*10 + yyvsp[0].num; }
+break;
+case 31:
+#line 389 "label.y"
+{ yyval.num = 0; }
+break;
+case 32:
+#line 391 "label.y"
+{ yyval.num = 1; }
+break;
+case 33:
+#line 393 "label.y"
+{ yyval.num = -1; }
+break;
+#line 1547 "y.tab.c"
+ }
+ yyssp -= yym;
+ yystate = *yyssp;
+ yyvsp -= yym;
+ yym = yylhs[yyn];
+ if (yystate == 0 && yym == 0)
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: after reduction, shifting from state 0 to\
+ state %d\n", YYPREFIX, YYFINAL);
+#endif
+ yystate = YYFINAL;
+ *++yyssp = YYFINAL;
+ *++yyvsp = yyval;
+ if (yychar < 0)
+ {
+ if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+ if (yydebug)
+ {
+ yys = 0;
+ if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+ if (!yys) yys = "illegal-symbol";
+ printf("%sdebug: state %d, reading %d (%s)\n",
+ YYPREFIX, YYFINAL, yychar, yys);
+ }
+#endif
+ }
+ if (yychar == 0) goto yyaccept;
+ goto yyloop;
+ }
+ if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == yystate)
+ yystate = yytable[yyn];
+ else
+ yystate = yydgoto[yym];
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: after reduction, shifting from state %d \
+to state %d\n", YYPREFIX, *yyssp, yystate);
+#endif
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ goto yyoverflow;
+ }
+ *++yyssp = yystate;
+ *++yyvsp = yyval;
+ goto yyloop;
+yyoverflow:
+ yyerror("yacc stack overflow");
+yyabort:
+ return (1);
+yyaccept:
+ return (0);
+}
diff --git a/src/preproc/refer/label.y b/src/preproc/refer/label.y
new file mode 100644
index 00000000..6bc12c18
--- /dev/null
+++ b/src/preproc/refer/label.y
@@ -0,0 +1,1177 @@
+/* -*- C++ -*-
+ Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+%{
+
+#include "refer.h"
+#include "refid.h"
+#include "ref.h"
+#include "token.h"
+
+int yylex();
+void yyerror(const char *);
+int yyparse();
+
+static const char *format_serial(char c, int n);
+
+struct label_info {
+ int start;
+ int length;
+ int count;
+ int total;
+ label_info(const string &);
+};
+
+label_info *lookup_label(const string &label);
+
+struct expression {
+ enum {
+ // Does the tentative label depend on the reference?
+ CONTAINS_VARIABLE = 01,
+ CONTAINS_STAR = 02,
+ CONTAINS_FORMAT = 04,
+ CONTAINS_AT = 010
+ };
+ virtual ~expression() { }
+ virtual void evaluate(int, const reference &, string &,
+ substring_position &) = 0;
+ virtual unsigned analyze() { return 0; }
+};
+
+class at_expr : public expression {
+public:
+ at_expr() { }
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
+};
+
+class format_expr : public expression {
+ char type;
+ int width;
+ int first_number;
+public:
+ format_expr(char c, int w = 0, int f = 1)
+ : type(c), width(w), first_number(f) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() { return CONTAINS_FORMAT; }
+};
+
+class field_expr : public expression {
+ int number;
+ char name;
+public:
+ field_expr(char nm, int num) : number(num), name(nm) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() { return CONTAINS_VARIABLE; }
+};
+
+class literal_expr : public expression {
+ string s;
+public:
+ literal_expr(const char *ptr, int len) : s(ptr, len) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class unary_expr : public expression {
+protected:
+ expression *expr;
+public:
+ unary_expr(expression *e) : expr(e) { }
+ ~unary_expr() { delete expr; }
+ void evaluate(int, const reference &, string &, substring_position &) = 0;
+ unsigned analyze() { return expr ? expr->analyze() : 0; }
+};
+
+// This caches the analysis of an expression.
+
+class analyzed_expr : public unary_expr {
+ unsigned flags;
+public:
+ analyzed_expr(expression *);
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() { return flags; }
+};
+
+class star_expr : public unary_expr {
+public:
+ star_expr(expression *e) : unary_expr(e) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+ unsigned analyze() {
+ return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
+ | CONTAINS_STAR);
+ }
+};
+
+typedef void map_func(const char *, const char *, string &);
+
+class map_expr : public unary_expr {
+ map_func *func;
+public:
+ map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+typedef const char *extractor_func(const char *, const char *, const char **);
+
+class extractor_expr : public unary_expr {
+ int part;
+ extractor_func *func;
+public:
+ enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
+ extractor_expr(expression *e, extractor_func *f, int pt)
+ : unary_expr(e), part(pt), func(f) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class truncate_expr : public unary_expr {
+ int n;
+public:
+ truncate_expr(expression *e, int i) : unary_expr(e), n(i) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class separator_expr : public unary_expr {
+public:
+ separator_expr(expression *e) : unary_expr(e) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class binary_expr : public expression {
+protected:
+ expression *expr1;
+ expression *expr2;
+public:
+ binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
+ ~binary_expr() { delete expr1; delete expr2; }
+ void evaluate(int, const reference &, string &, substring_position &) = 0;
+ unsigned analyze() {
+ return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
+ }
+};
+
+class alternative_expr : public binary_expr {
+public:
+ alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class list_expr : public binary_expr {
+public:
+ list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class substitute_expr : public binary_expr {
+public:
+ substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class ternary_expr : public expression {
+protected:
+ expression *expr1;
+ expression *expr2;
+ expression *expr3;
+public:
+ ternary_expr(expression *e1, expression *e2, expression *e3)
+ : expr1(e1), expr2(e2), expr3(e3) { }
+ ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
+ void evaluate(int, const reference &, string &, substring_position &) = 0;
+ unsigned analyze() {
+ return ((expr1 ? expr1->analyze() : 0)
+ | (expr2 ? expr2->analyze() : 0)
+ | (expr3 ? expr3->analyze() : 0));
+ }
+};
+
+class conditional_expr : public ternary_expr {
+public:
+ conditional_expr(expression *e1, expression *e2, expression *e3)
+ : ternary_expr(e1, e2, e3) { }
+ void evaluate(int, const reference &, string &, substring_position &);
+};
+
+static expression *parsed_label = 0;
+static expression *parsed_date_label = 0;
+static expression *parsed_short_label = 0;
+
+static expression *parse_result;
+
+string literals;
+
+%}
+
+%union {
+ int num;
+ expression *expr;
+ struct { int ndigits; int val; } dig;
+ struct { int start; int len; } str;
+}
+
+/* uppercase or lowercase letter */
+%token <num> TOKEN_LETTER
+/* literal characters */
+%token <str> TOKEN_LITERAL
+/* digit */
+%token <num> TOKEN_DIGIT
+
+%type <expr> conditional
+%type <expr> alternative
+%type <expr> list
+%type <expr> string
+%type <expr> substitute
+%type <expr> optional_conditional
+%type <num> number
+%type <dig> digits
+%type <num> optional_number
+%type <num> flag
+
+%%
+
+expr:
+ optional_conditional
+ { parse_result = ($1 ? new analyzed_expr($1) : 0); }
+ ;
+
+conditional:
+ alternative
+ { $$ = $1; }
+ | alternative '?' optional_conditional ':' conditional
+ { $$ = new conditional_expr($1, $3, $5); }
+ ;
+
+optional_conditional:
+ /* empty */
+ { $$ = 0; }
+ | conditional
+ { $$ = $1; }
+ ;
+
+alternative:
+ list
+ { $$ = $1; }
+ | alternative '|' list
+ { $$ = new alternative_expr($1, $3); }
+ | alternative '&' list
+ { $$ = new conditional_expr($1, $3, 0); }
+ ;
+
+list:
+ substitute
+ { $$ = $1; }
+ | list substitute
+ { $$ = new list_expr($1, $2); }
+ ;
+
+substitute:
+ string
+ { $$ = $1; }
+ | substitute '~' string
+ { $$ = new substitute_expr($1, $3); }
+ ;
+
+string:
+ '@'
+ { $$ = new at_expr; }
+ | TOKEN_LITERAL
+ {
+ $$ = new literal_expr(literals.contents() + $1.start,
+ $1.len);
+ }
+ | TOKEN_LETTER
+ { $$ = new field_expr($1, 0); }
+ | TOKEN_LETTER number
+ { $$ = new field_expr($1, $2 - 1); }
+ | '%' TOKEN_LETTER
+ {
+ switch ($2) {
+ case 'I':
+ case 'i':
+ case 'A':
+ case 'a':
+ $$ = new format_expr($2);
+ break;
+ default:
+ command_error("unrecognized format `%1'", char($2));
+ $$ = new format_expr('a');
+ break;
+ }
+ }
+
+ | '%' digits
+ {
+ $$ = new format_expr('0', $2.ndigits, $2.val);
+ }
+ | string '.' flag TOKEN_LETTER optional_number
+ {
+ switch ($4) {
+ case 'l':
+ $$ = new map_expr($1, lowercase);
+ break;
+ case 'u':
+ $$ = new map_expr($1, uppercase);
+ break;
+ case 'c':
+ $$ = new map_expr($1, capitalize);
+ break;
+ case 'r':
+ $$ = new map_expr($1, reverse_name);
+ break;
+ case 'a':
+ $$ = new map_expr($1, abbreviate_name);
+ break;
+ case 'y':
+ $$ = new extractor_expr($1, find_year, $3);
+ break;
+ case 'n':
+ $$ = new extractor_expr($1, find_last_name, $3);
+ break;
+ default:
+ $$ = $1;
+ command_error("unknown function `%1'", char($4));
+ break;
+ }
+ }
+
+ | string '+' number
+ { $$ = new truncate_expr($1, $3); }
+ | string '-' number
+ { $$ = new truncate_expr($1, -$3); }
+ | string '*'
+ { $$ = new star_expr($1); }
+ | '(' optional_conditional ')'
+ { $$ = $2; }
+ | '<' optional_conditional '>'
+ { $$ = new separator_expr($2); }
+ ;
+
+optional_number:
+ /* empty */
+ { $$ = -1; }
+ | number
+ { $$ = $1; }
+ ;
+
+number:
+ TOKEN_DIGIT
+ { $$ = $1; }
+ | number TOKEN_DIGIT
+ { $$ = $1*10 + $2; }
+ ;
+
+digits:
+ TOKEN_DIGIT
+ { $$.ndigits = 1; $$.val = $1; }
+ | digits TOKEN_DIGIT
+ { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; }
+ ;
+
+
+flag:
+ /* empty */
+ { $$ = 0; }
+ | '+'
+ { $$ = 1; }
+ | '-'
+ { $$ = -1; }
+ ;
+
+%%
+
+/* bison defines const to be empty unless __STDC__ is defined, which it
+isn't under cfront */
+
+#ifdef const
+#undef const
+#endif
+
+const char *spec_ptr;
+const char *spec_end;
+const char *spec_cur;
+
+int yylex()
+{
+ while (spec_ptr < spec_end && csspace(*spec_ptr))
+ spec_ptr++;
+ spec_cur = spec_ptr;
+ if (spec_ptr >= spec_end)
+ return 0;
+ unsigned char c = *spec_ptr++;
+ if (csalpha(c)) {
+ yylval.num = c;
+ return TOKEN_LETTER;
+ }
+ if (csdigit(c)) {
+ yylval.num = c - '0';
+ return TOKEN_DIGIT;
+ }
+ if (c == '\'') {
+ yylval.str.start = literals.length();
+ for (; spec_ptr < spec_end; spec_ptr++) {
+ if (*spec_ptr == '\'') {
+ if (++spec_ptr < spec_end && *spec_ptr == '\'')
+ literals += '\'';
+ else {
+ yylval.str.len = literals.length() - yylval.str.start;
+ return TOKEN_LITERAL;
+ }
+ }
+ else
+ literals += *spec_ptr;
+ }
+ yylval.str.len = literals.length() - yylval.str.start;
+ return TOKEN_LITERAL;
+ }
+ return c;
+}
+
+int set_label_spec(const char *label_spec)
+{
+ spec_cur = spec_ptr = label_spec;
+ spec_end = strchr(label_spec, '\0');
+ literals.clear();
+ if (yyparse())
+ return 0;
+ delete parsed_label;
+ parsed_label = parse_result;
+ return 1;
+}
+
+int set_date_label_spec(const char *label_spec)
+{
+ spec_cur = spec_ptr = label_spec;
+ spec_end = strchr(label_spec, '\0');
+ literals.clear();
+ if (yyparse())
+ return 0;
+ delete parsed_date_label;
+ parsed_date_label = parse_result;
+ return 1;
+}
+
+int set_short_label_spec(const char *label_spec)
+{
+ spec_cur = spec_ptr = label_spec;
+ spec_end = strchr(label_spec, '\0');
+ literals.clear();
+ if (yyparse())
+ return 0;
+ delete parsed_short_label;
+ parsed_short_label = parse_result;
+ return 1;
+}
+
+void yyerror(const char *message)
+{
+ if (spec_cur < spec_end)
+ command_error("label specification %1 before `%2'", message, spec_cur);
+ else
+ command_error("label specification %1 at end of string",
+ message, spec_cur);
+}
+
+void at_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (tentative)
+ ref.canonicalize_authors(result);
+ else {
+ const char *end, *start = ref.get_authors(&end);
+ if (start)
+ result.append(start, end - start);
+ }
+}
+
+void format_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (tentative)
+ return;
+ const label_info *lp = ref.get_label_ptr();
+ int num = lp == 0 ? ref.get_number() : lp->count;
+ if (type != '0')
+ result += format_serial(type, num + 1);
+ else {
+ const char *ptr = itoa(num + first_number);
+ int pad = width - strlen(ptr);
+ while (--pad >= 0)
+ result += '0';
+ result += ptr;
+ }
+}
+
+static const char *format_serial(char c, int n)
+{
+ assert(n > 0);
+ static char buf[128]; // more than enough.
+ switch (c) {
+ case 'i':
+ case 'I':
+ {
+ char *p = buf;
+ // troff uses z and w to represent 10000 and 5000 in Roman
+ // numerals; I can find no historical basis for this usage
+ const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
+ if (n >= 40000)
+ return itoa(n);
+ while (n >= 10000) {
+ *p++ = s[0];
+ n -= 10000;
+ }
+ for (int i = 1000; i > 0; i /= 10, s += 2) {
+ int m = n/i;
+ n -= m*i;
+ switch (m) {
+ case 3:
+ *p++ = s[2];
+ /* falls through */
+ case 2:
+ *p++ = s[2];
+ /* falls through */
+ case 1:
+ *p++ = s[2];
+ break;
+ case 4:
+ *p++ = s[2];
+ *p++ = s[1];
+ break;
+ case 8:
+ *p++ = s[1];
+ *p++ = s[2];
+ *p++ = s[2];
+ *p++ = s[2];
+ break;
+ case 7:
+ *p++ = s[1];
+ *p++ = s[2];
+ *p++ = s[2];
+ break;
+ case 6:
+ *p++ = s[1];
+ *p++ = s[2];
+ break;
+ case 5:
+ *p++ = s[1];
+ break;
+ case 9:
+ *p++ = s[2];
+ *p++ = s[0];
+ }
+ }
+ *p = 0;
+ break;
+ }
+ case 'a':
+ case 'A':
+ {
+ char *p = buf;
+ // this is derived from troff/reg.c
+ while (n > 0) {
+ int d = n % 26;
+ if (d == 0)
+ d = 26;
+ n -= d;
+ n /= 26;
+ *p++ = c + d - 1; // ASCII dependent
+ }
+ *p-- = 0;
+ // Reverse it.
+ char *q = buf;
+ while (q < p) {
+ char temp = *q;
+ *q = *p;
+ *p = temp;
+ --p;
+ ++q;
+ }
+ break;
+ }
+ default:
+ assert(0);
+ }
+ return buf;
+}
+
+void field_expr::evaluate(int, const reference &ref,
+ string &result, substring_position &)
+{
+ const char *end;
+ const char *start = ref.get_field(name, &end);
+ if (start) {
+ start = nth_field(number, start, &end);
+ if (start)
+ result.append(start, end - start);
+ }
+}
+
+void literal_expr::evaluate(int, const reference &,
+ string &result, substring_position &)
+{
+ result += s;
+}
+
+analyzed_expr::analyzed_expr(expression *e)
+: unary_expr(e), flags(e ? e->analyze() : 0)
+{
+}
+
+void analyzed_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ if (expr)
+ expr->evaluate(tentative, ref, result, pos);
+}
+
+void star_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ const label_info *lp = ref.get_label_ptr();
+ if (!tentative
+ && (lp == 0 || lp->total > 1)
+ && expr)
+ expr->evaluate(tentative, ref, result, pos);
+}
+
+void separator_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ int start_length = result.length();
+ int is_first = pos.start < 0;
+ if (expr)
+ expr->evaluate(tentative, ref, result, pos);
+ if (is_first) {
+ pos.start = start_length;
+ pos.length = result.length() - start_length;
+ }
+}
+
+void map_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (expr) {
+ string temp;
+ substring_position temp_pos;
+ expr->evaluate(tentative, ref, temp, temp_pos);
+ (*func)(temp.contents(), temp.contents() + temp.length(), result);
+ }
+}
+
+void extractor_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (expr) {
+ string temp;
+ substring_position temp_pos;
+ expr->evaluate(tentative, ref, temp, temp_pos);
+ const char *end, *start = (*func)(temp.contents(),
+ temp.contents() + temp.length(),
+ &end);
+ switch (part) {
+ case BEFORE:
+ if (start)
+ result.append(temp.contents(), start - temp.contents());
+ else
+ result += temp;
+ break;
+ case MATCH:
+ if (start)
+ result.append(start, end - start);
+ break;
+ case AFTER:
+ if (start)
+ result.append(end, temp.contents() + temp.length() - end);
+ break;
+ default:
+ assert(0);
+ }
+ }
+}
+
+static void first_part(int len, const char *ptr, const char *end,
+ string &result)
+{
+ for (;;) {
+ const char *token_start = ptr;
+ if (!get_token(&ptr, end))
+ break;
+ const token_info *ti = lookup_token(token_start, ptr);
+ int counts = ti->sortify_non_empty(token_start, ptr);
+ if (counts && --len < 0)
+ break;
+ if (counts || ti->is_accent())
+ result.append(token_start, ptr - token_start);
+ }
+}
+
+static void last_part(int len, const char *ptr, const char *end,
+ string &result)
+{
+ const char *start = ptr;
+ int count = 0;
+ for (;;) {
+ const char *token_start = ptr;
+ if (!get_token(&ptr, end))
+ break;
+ const token_info *ti = lookup_token(token_start, ptr);
+ if (ti->sortify_non_empty(token_start, ptr))
+ count++;
+ }
+ ptr = start;
+ int skip = count - len;
+ if (skip > 0) {
+ for (;;) {
+ const char *token_start = ptr;
+ if (!get_token(&ptr, end))
+ assert(0);
+ const token_info *ti = lookup_token(token_start, ptr);
+ if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
+ ptr = token_start;
+ break;
+ }
+ }
+ }
+ first_part(len, ptr, end, result);
+}
+
+void truncate_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &)
+{
+ if (expr) {
+ string temp;
+ substring_position temp_pos;
+ expr->evaluate(tentative, ref, temp, temp_pos);
+ const char *start = temp.contents();
+ const char *end = start + temp.length();
+ if (n > 0)
+ first_part(n, start, end, result);
+ else if (n < 0)
+ last_part(-n, start, end, result);
+ }
+}
+
+void alternative_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ int start_length = result.length();
+ if (expr1)
+ expr1->evaluate(tentative, ref, result, pos);
+ if (result.length() == start_length && expr2)
+ expr2->evaluate(tentative, ref, result, pos);
+}
+
+void list_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ if (expr1)
+ expr1->evaluate(tentative, ref, result, pos);
+ if (expr2)
+ expr2->evaluate(tentative, ref, result, pos);
+}
+
+void substitute_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ int start_length = result.length();
+ if (expr1)
+ expr1->evaluate(tentative, ref, result, pos);
+ if (result.length() > start_length && result[result.length() - 1] == '-') {
+ // ought to see if pos covers the -
+ result.set_length(result.length() - 1);
+ if (expr2)
+ expr2->evaluate(tentative, ref, result, pos);
+ }
+}
+
+void conditional_expr::evaluate(int tentative, const reference &ref,
+ string &result, substring_position &pos)
+{
+ string temp;
+ substring_position temp_pos;
+ if (expr1)
+ expr1->evaluate(tentative, ref, temp, temp_pos);
+ if (temp.length() > 0) {
+ if (expr2)
+ expr2->evaluate(tentative, ref, result, pos);
+ }
+ else {
+ if (expr3)
+ expr3->evaluate(tentative, ref, result, pos);
+ }
+}
+
+void reference::pre_compute_label()
+{
+ if (parsed_label != 0
+ && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
+ label.clear();
+ substring_position temp_pos;
+ parsed_label->evaluate(1, *this, label, temp_pos);
+ label_ptr = lookup_label(label);
+ }
+}
+
+void reference::compute_label()
+{
+ label.clear();
+ if (parsed_label)
+ parsed_label->evaluate(0, *this, label, separator_pos);
+ if (short_label_flag && parsed_short_label)
+ parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
+ if (date_as_label) {
+ string new_date;
+ if (parsed_date_label) {
+ substring_position temp_pos;
+ parsed_date_label->evaluate(0, *this, new_date, temp_pos);
+ }
+ set_date(new_date);
+ }
+ if (label_ptr)
+ label_ptr->count += 1;
+}
+
+void reference::immediate_compute_label()
+{
+ if (label_ptr)
+ label_ptr->total = 2; // force use of disambiguator
+ compute_label();
+}
+
+int reference::merge_labels(reference **v, int n, label_type type,
+ string &result)
+{
+ if (abbreviate_label_ranges)
+ return merge_labels_by_number(v, n, type, result);
+ else
+ return merge_labels_by_parts(v, n, type, result);
+}
+
+int reference::merge_labels_by_number(reference **v, int n, label_type type,
+ string &result)
+{
+ if (n <= 1)
+ return 0;
+ int num = get_number();
+ // Only merge three or more labels.
+ if (v[0]->get_number() != num + 1
+ || v[1]->get_number() != num + 2)
+ return 0;
+ int i;
+ for (i = 2; i < n; i++)
+ if (v[i]->get_number() != num + i + 1)
+ break;
+ result = get_label(type);
+ result += label_range_indicator;
+ result += v[i - 1]->get_label(type);
+ return i;
+}
+
+const substring_position &reference::get_separator_pos(label_type type) const
+{
+ if (type == SHORT_LABEL && short_label_flag)
+ return short_separator_pos;
+ else
+ return separator_pos;
+}
+
+const string &reference::get_label(label_type type) const
+{
+ if (type == SHORT_LABEL && short_label_flag)
+ return short_label;
+ else
+ return label;
+}
+
+int reference::merge_labels_by_parts(reference **v, int n, label_type type,
+ string &result)
+{
+ if (n <= 0)
+ return 0;
+ const string &lb = get_label(type);
+ const substring_position &sp = get_separator_pos(type);
+ if (sp.start < 0
+ || sp.start != v[0]->get_separator_pos(type).start
+ || memcmp(lb.contents(), v[0]->get_label(type).contents(),
+ sp.start) != 0)
+ return 0;
+ result = lb;
+ int i = 0;
+ do {
+ result += separate_label_second_parts;
+ const substring_position &s = v[i]->get_separator_pos(type);
+ int sep_end_pos = s.start + s.length;
+ result.append(v[i]->get_label(type).contents() + sep_end_pos,
+ v[i]->get_label(type).length() - sep_end_pos);
+ } while (++i < n
+ && sp.start == v[i]->get_separator_pos(type).start
+ && memcmp(lb.contents(), v[i]->get_label(type).contents(),
+ sp.start) == 0);
+ return i;
+}
+
+string label_pool;
+
+label_info::label_info(const string &s)
+: start(label_pool.length()), length(s.length()), count(0), total(1)
+{
+ label_pool += s;
+}
+
+static label_info **label_table = 0;
+static int label_table_size = 0;
+static int label_table_used = 0;
+
+label_info *lookup_label(const string &label)
+{
+ if (label_table == 0) {
+ label_table = new label_info *[17];
+ label_table_size = 17;
+ for (int i = 0; i < 17; i++)
+ label_table[i] = 0;
+ }
+ unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
+ label_info **ptr;
+ for (ptr = label_table + h;
+ *ptr != 0;
+ (ptr == label_table)
+ ? (ptr = label_table + label_table_size - 1)
+ : ptr--)
+ if ((*ptr)->length == label.length()
+ && memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
+ label.length()) == 0) {
+ (*ptr)->total += 1;
+ return *ptr;
+ }
+ label_info *result = *ptr = new label_info(label);
+ if (++label_table_used * 2 > label_table_size) {
+ // Rehash the table.
+ label_info **old_table = label_table;
+ int old_size = label_table_size;
+ label_table_size = next_size(label_table_size);
+ label_table = new label_info *[label_table_size];
+ int i;
+ for (i = 0; i < label_table_size; i++)
+ label_table[i] = 0;
+ for (i = 0; i < old_size; i++)
+ if (old_table[i]) {
+ unsigned h = hash_string(label_pool.contents() + old_table[i]->start,
+ old_table[i]->length);
+ label_info **p;
+ for (p = label_table + (h % label_table_size);
+ *p != 0;
+ (p == label_table)
+ ? (p = label_table + label_table_size - 1)
+ : --p)
+ ;
+ *p = old_table[i];
+ }
+ a_delete old_table;
+ }
+ return result;
+}
+
+void clear_labels()
+{
+ for (int i = 0; i < label_table_size; i++) {
+ delete label_table[i];
+ label_table[i] = 0;
+ }
+ label_table_used = 0;
+ label_pool.clear();
+}
+
+static void consider_authors(reference **start, reference **end, int i);
+
+void compute_labels(reference **v, int n)
+{
+ if (parsed_label
+ && (parsed_label->analyze() & expression::CONTAINS_AT)
+ && sort_fields.length() >= 2
+ && sort_fields[0] == 'A'
+ && sort_fields[1] == '+')
+ consider_authors(v, v + n, 0);
+ for (int i = 0; i < n; i++)
+ v[i]->compute_label();
+}
+
+
+/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
+where 0 <= i <= N if there exists a reference with a list of authors
+<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
+and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0,
+A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and
+<B0,B1,...,BM>. If a reference needs author i we only have to call
+need_author(j) for some j >= i such that the reference also needs
+author j. */
+
+/* This function handles 2 tasks:
+determine which authors are needed (cannot be elided with et al.);
+determine which authors can have only last names in the labels.
+
+References >= start and < end have the same first i author names.
+Also they're sorted by A+. */
+
+static void consider_authors(reference **start, reference **end, int i)
+{
+ if (start >= end)
+ return;
+ reference **p = start;
+ if (i >= (*p)->get_nauthors()) {
+ for (++p; p < end && i >= (*p)->get_nauthors(); p++)
+ ;
+ if (p < end && i > 0) {
+ // If we have an author list <A B C> and an author list <A B C D>,
+ // then both lists need C.
+ for (reference **q = start; q < end; q++)
+ (*q)->need_author(i - 1);
+ }
+ start = p;
+ }
+ while (p < end) {
+ reference **last_name_start = p;
+ reference **name_start = p;
+ for (++p;
+ p < end && i < (*p)->get_nauthors()
+ && same_author_last_name(**last_name_start, **p, i);
+ p++) {
+ if (!same_author_name(**name_start, **p, i)) {
+ consider_authors(name_start, p, i + 1);
+ name_start = p;
+ }
+ }
+ consider_authors(name_start, p, i + 1);
+ if (last_name_start == name_start) {
+ for (reference **q = last_name_start; q < p; q++)
+ (*q)->set_last_name_unambiguous(i);
+ }
+ // If we have an author list <A B C D> and <A B C E>, then the lists
+ // need author D and E respectively.
+ if (name_start > start || p < end) {
+ for (reference **q = last_name_start; q < p; q++)
+ (*q)->need_author(i);
+ }
+ }
+}
+
+int same_author_last_name(const reference &r1, const reference &r2, int n)
+{
+ const char *ae1;
+ const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
+ assert(as1 != 0);
+ const char *ae2;
+ const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
+ assert(as2 != 0);
+ return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
+}
+
+int same_author_name(const reference &r1, const reference &r2, int n)
+{
+ const char *ae1;
+ const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
+ assert(as1 != 0);
+ const char *ae2;
+ const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
+ assert(as2 != 0);
+ return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
+}
+
+
+void int_set::set(int i)
+{
+ assert(i >= 0);
+ int bytei = i >> 3;
+ if (bytei >= v.length()) {
+ int old_length = v.length();
+ v.set_length(bytei + 1);
+ for (int j = old_length; j <= bytei; j++)
+ v[j] = 0;
+ }
+ v[bytei] |= 1 << (i & 7);
+}
+
+int int_set::get(int i) const
+{
+ assert(i >= 0);
+ int bytei = i >> 3;
+ return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
+}
+
+void reference::set_last_name_unambiguous(int i)
+{
+ last_name_unambiguous.set(i);
+}
+
+void reference::need_author(int n)
+{
+ if (n > last_needed_author)
+ last_needed_author = n;
+}
+
+const char *reference::get_authors(const char **end) const
+{
+ if (!computed_authors) {
+ ((reference *)this)->computed_authors = 1;
+ string &result = ((reference *)this)->authors;
+ int na = get_nauthors();
+ result.clear();
+ for (int i = 0; i < na; i++) {
+ if (last_name_unambiguous.get(i)) {
+ const char *e, *start = get_author_last_name(i, &e);
+ assert(start != 0);
+ result.append(start, e - start);
+ }
+ else {
+ const char *e, *start = get_author(i, &e);
+ assert(start != 0);
+ result.append(start, e - start);
+ }
+ if (i == last_needed_author
+ && et_al.length() > 0
+ && et_al_min_elide > 0
+ && last_needed_author + et_al_min_elide < na
+ && na >= et_al_min_total) {
+ result += et_al;
+ break;
+ }
+ if (i < na - 1) {
+ if (na == 2)
+ result += join_authors_exactly_two;
+ else if (i < na - 2)
+ result += join_authors_default;
+ else
+ result += join_authors_last_two;
+ }
+ }
+ }
+ const char *start = authors.contents();
+ *end = start + authors.length();
+ return start;
+}
+
+int reference::get_nauthors() const
+{
+ if (nauthors < 0) {
+ const char *dummy;
+ int na;
+ for (na = 0; get_author(na, &dummy) != 0; na++)
+ ;
+ ((reference *)this)->nauthors = na;
+ }
+ return nauthors;
+}
diff --git a/src/preproc/refer/ref.cc b/src/preproc/refer/ref.cc
new file mode 100644
index 00000000..c3517b19
--- /dev/null
+++ b/src/preproc/refer/ref.cc
@@ -0,0 +1,1160 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "refer.h"
+#include "refid.h"
+#include "ref.h"
+#include "token.h"
+
+static const char *find_day(const char *, const char *, const char **);
+static int find_month(const char *start, const char *end);
+static void abbreviate_names(string &);
+
+#define DEFAULT_ARTICLES "the\000a\000an"
+
+string articles(DEFAULT_ARTICLES, sizeof(DEFAULT_ARTICLES));
+
+// Multiple occurrences of fields are separated by FIELD_SEPARATOR.
+const char FIELD_SEPARATOR = '\0';
+
+const char MULTI_FIELD_NAMES[] = "AE";
+const char *AUTHOR_FIELDS = "AQ";
+
+enum { OTHER, JOURNAL_ARTICLE, BOOK, ARTICLE_IN_BOOK, TECH_REPORT, BELL_TM };
+
+const char *reference_types[] = {
+ "other",
+ "journal-article",
+ "book",
+ "article-in-book",
+ "tech-report",
+ "bell-tm",
+};
+
+static string temp_fields[256];
+
+reference::reference(const char *start, int len, reference_id *ridp)
+: h(0), merged(0), no(-1), field(0), nfields(0), label_ptr(0),
+ computed_authors(0), last_needed_author(-1), nauthors(-1)
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ field_index[i] = NULL_FIELD_INDEX;
+ if (ridp)
+ rid = *ridp;
+ if (start == 0)
+ return;
+ if (len <= 0)
+ return;
+ const char *end = start + len;
+ const char *ptr = start;
+ assert(*ptr == '%');
+ while (ptr < end) {
+ if (ptr + 1 < end && ptr[1] != '\0'
+ && ((ptr[1] != '%' && ptr[1] == annotation_field)
+ || (ptr + 2 < end && ptr[1] == '%' && ptr[2] != '\0'
+ && discard_fields.search(ptr[2]) < 0))) {
+ if (ptr[1] == '%')
+ ptr++;
+ string &f = temp_fields[(unsigned char)ptr[1]];
+ ptr += 2;
+ while (ptr < end && csspace(*ptr))
+ ptr++;
+ for (;;) {
+ for (;;) {
+ if (ptr >= end) {
+ f += '\n';
+ break;
+ }
+ f += *ptr;
+ if (*ptr++ == '\n')
+ break;
+ }
+ if (ptr >= end || *ptr == '%')
+ break;
+ }
+ }
+ else if (ptr + 1 < end && ptr[1] != '\0' && ptr[1] != '%'
+ && discard_fields.search(ptr[1]) < 0) {
+ string &f = temp_fields[(unsigned char)ptr[1]];
+ if (f.length() > 0) {
+ if (strchr(MULTI_FIELD_NAMES, ptr[1]) != 0)
+ f += FIELD_SEPARATOR;
+ else
+ f.clear();
+ }
+ ptr += 2;
+ if (ptr < end) {
+ if (*ptr == ' ')
+ ptr++;
+ for (;;) {
+ const char *p = ptr;
+ while (ptr < end && *ptr != '\n')
+ ptr++;
+ // strip trailing white space
+ const char *q = ptr;
+ while (q > p && q[-1] != '\n' && csspace(q[-1]))
+ q--;
+ while (p < q)
+ f += *p++;
+ if (ptr >= end)
+ break;
+ ptr++;
+ if (ptr >= end)
+ break;
+ if (*ptr == '%')
+ break;
+ f += ' ';
+ }
+ }
+ }
+ else {
+ // skip this field
+ for (;;) {
+ while (ptr < end && *ptr++ != '\n')
+ ;
+ if (ptr >= end || *ptr == '%')
+ break;
+ }
+ }
+ }
+ for (i = 0; i < 256; i++)
+ if (temp_fields[i].length() > 0)
+ nfields++;
+ field = new string[nfields];
+ int j = 0;
+ for (i = 0; i < 256; i++)
+ if (temp_fields[i].length() > 0) {
+ field[j].move(temp_fields[i]);
+ if (abbreviate_fields.search(i) >= 0)
+ abbreviate_names(field[j]);
+ field_index[i] = j;
+ j++;
+ }
+}
+
+reference::~reference()
+{
+ if (nfields > 0)
+ ad_delete(nfields) field;
+}
+
+// ref is the inline, this is the database ref
+
+void reference::merge(reference &ref)
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (field_index[i] != NULL_FIELD_INDEX)
+ temp_fields[i].move(field[field_index[i]]);
+ for (i = 0; i < 256; i++)
+ if (ref.field_index[i] != NULL_FIELD_INDEX)
+ temp_fields[i].move(ref.field[ref.field_index[i]]);
+ for (i = 0; i < 256; i++)
+ field_index[i] = NULL_FIELD_INDEX;
+ int old_nfields = nfields;
+ nfields = 0;
+ for (i = 0; i < 256; i++)
+ if (temp_fields[i].length() > 0)
+ nfields++;
+ if (nfields != old_nfields) {
+ if (old_nfields > 0)
+ ad_delete(old_nfields) field;
+ field = new string[nfields];
+ }
+ int j = 0;
+ for (i = 0; i < 256; i++)
+ if (temp_fields[i].length() > 0) {
+ field[j].move(temp_fields[i]);
+ field_index[i] = j;
+ j++;
+ }
+ merged = 1;
+}
+
+void reference::insert_field(unsigned char c, string &s)
+{
+ assert(s.length() > 0);
+ if (field_index[c] != NULL_FIELD_INDEX) {
+ field[field_index[c]].move(s);
+ return;
+ }
+ assert(field_index[c] == NULL_FIELD_INDEX);
+ string *old_field = field;
+ field = new string[nfields + 1];
+ int pos = 0;
+ int i;
+ for (i = 0; i < int(c); i++)
+ if (field_index[i] != NULL_FIELD_INDEX)
+ pos++;
+ for (i = 0; i < pos; i++)
+ field[i].move(old_field[i]);
+ field[pos].move(s);
+ for (i = pos; i < nfields; i++)
+ field[i + 1].move(old_field[i]);
+ if (nfields > 0)
+ ad_delete(nfields) old_field;
+ nfields++;
+ field_index[c] = pos;
+ for (i = c + 1; i < 256; i++)
+ if (field_index[i] != NULL_FIELD_INDEX)
+ field_index[i] += 1;
+}
+
+void reference::delete_field(unsigned char c)
+{
+ if (field_index[c] == NULL_FIELD_INDEX)
+ return;
+ string *old_field = field;
+ field = new string[nfields - 1];
+ int i;
+ for (i = 0; i < int(field_index[c]); i++)
+ field[i].move(old_field[i]);
+ for (i = field_index[c]; i < nfields - 1; i++)
+ field[i].move(old_field[i + 1]);
+ if (nfields > 0)
+ ad_delete(nfields) old_field;
+ nfields--;
+ field_index[c] = NULL_FIELD_INDEX;
+ for (i = c + 1; i < 256; i++)
+ if (field_index[i] != NULL_FIELD_INDEX)
+ field_index[i] -= 1;
+}
+
+void reference::compute_hash_code()
+{
+ if (!rid.is_null())
+ h = rid.hash();
+ else {
+ h = 0;
+ for (int i = 0; i < nfields; i++)
+ if (field[i].length() > 0) {
+ h <<= 4;
+ h ^= hash_string(field[i].contents(), field[i].length());
+ }
+ }
+}
+
+void reference::set_number(int n)
+{
+ no = n;
+}
+
+const char SORT_SEP = '\001';
+const char SORT_SUB_SEP = '\002';
+const char SORT_SUB_SUB_SEP = '\003';
+
+// sep specifies additional word separators
+
+void sortify_words(const char *s, const char *end, const char *sep,
+ string &result)
+{
+ int non_empty = 0;
+ int need_separator = 0;
+ for (;;) {
+ const char *token_start = s;
+ if (!get_token(&s, end))
+ break;
+ if ((s - token_start == 1
+ && (*token_start == ' '
+ || *token_start == '\n'
+ || (sep && *token_start != '\0'
+ && strchr(sep, *token_start) != 0)))
+ || (s - token_start == 2
+ && token_start[0] == '\\' && token_start[1] == ' ')) {
+ if (non_empty)
+ need_separator = 1;
+ }
+ else {
+ const token_info *ti = lookup_token(token_start, s);
+ if (ti->sortify_non_empty(token_start, s)) {
+ if (need_separator) {
+ result += ' ';
+ need_separator = 0;
+ }
+ ti->sortify(token_start, s, result);
+ non_empty = 1;
+ }
+ }
+ }
+}
+
+void sortify_word(const char *s, const char *end, string &result)
+{
+ for (;;) {
+ const char *token_start = s;
+ if (!get_token(&s, end))
+ break;
+ const token_info *ti = lookup_token(token_start, s);
+ ti->sortify(token_start, s, result);
+ }
+}
+
+void sortify_other(const char *s, int len, string &key)
+{
+ sortify_words(s, s + len, 0, key);
+}
+
+void sortify_title(const char *s, int len, string &key)
+{
+ const char *end = s + len;
+ for (; s < end && (*s == ' ' || *s == '\n'); s++)
+ ;
+ const char *ptr = s;
+ for (;;) {
+ const char *token_start = ptr;
+ if (!get_token(&ptr, end))
+ break;
+ if (ptr - token_start == 1
+ && (*token_start == ' ' || *token_start == '\n'))
+ break;
+ }
+ if (ptr < end) {
+ int first_word_len = ptr - s - 1;
+ const char *ae = articles.contents() + articles.length();
+ for (const char *a = articles.contents();
+ a < ae;
+ a = strchr(a, '\0') + 1)
+ if (first_word_len == strlen(a)) {
+ int j;
+ for (j = 0; j < first_word_len; j++)
+ if (a[j] != cmlower(s[j]))
+ break;
+ if (j >= first_word_len) {
+ s = ptr;
+ for (; s < end && (*s == ' ' || *s == '\n'); s++)
+ ;
+ break;
+ }
+ }
+ }
+ sortify_words(s, end, 0, key);
+}
+
+void sortify_name(const char *s, int len, string &key)
+{
+ const char *last_name_end;
+ const char *last_name = find_last_name(s, s + len, &last_name_end);
+ sortify_word(last_name, last_name_end, key);
+ key += SORT_SUB_SUB_SEP;
+ if (last_name > s)
+ sortify_words(s, last_name, ".", key);
+ key += SORT_SUB_SUB_SEP;
+ if (last_name_end < s + len)
+ sortify_words(last_name_end, s + len, ".,", key);
+}
+
+void sortify_date(const char *s, int len, string &key)
+{
+ const char *year_end;
+ const char *year_start = find_year(s, s + len, &year_end);
+ if (!year_start) {
+ // Things without years are often `forthcoming', so it makes sense
+ // that they sort after things with explicit years.
+ key += 'A';
+ sortify_words(s, s + len, 0, key);
+ return;
+ }
+ int n = year_end - year_start;
+ while (n < 4) {
+ key += '0';
+ n++;
+ }
+ while (year_start < year_end)
+ key += *year_start++;
+ int m = find_month(s, s + len);
+ if (m < 0)
+ return;
+ key += 'A' + m;
+ const char *day_end;
+ const char *day_start = find_day(s, s + len, &day_end);
+ if (!day_start)
+ return;
+ if (day_end - day_start == 1)
+ key += '0';
+ while (day_start < day_end)
+ key += *day_start++;
+}
+
+// SORT_{SUB,SUB_SUB}_SEP can creep in from use of @ in label specification.
+
+void sortify_label(const char *s, int len, string &key)
+{
+ const char *end = s + len;
+ for (;;) {
+ const char *ptr;
+ for (ptr = s;
+ ptr < end && *ptr != SORT_SUB_SEP && *ptr != SORT_SUB_SUB_SEP;
+ ptr++)
+ ;
+ if (ptr > s)
+ sortify_words(s, ptr, 0, key);
+ s = ptr;
+ if (s >= end)
+ break;
+ key += *s++;
+ }
+}
+
+void reference::compute_sort_key()
+{
+ if (sort_fields.length() == 0)
+ return;
+ sort_fields += '\0';
+ const char *sf = sort_fields.contents();
+ while (*sf != '\0') {
+ if (sf > sort_fields)
+ sort_key += SORT_SEP;
+ char f = *sf++;
+ int n = 1;
+ if (*sf == '+') {
+ n = INT_MAX;
+ sf++;
+ }
+ else if (csdigit(*sf)) {
+ char *ptr;
+ long l = strtol(sf, &ptr, 10);
+ if (l == 0 && ptr == sf)
+ ;
+ else {
+ sf = ptr;
+ if (l < 0) {
+ n = 1;
+ }
+ else {
+ n = int(l);
+ }
+ }
+ }
+ if (f == '.')
+ sortify_label(label.contents(), label.length(), sort_key);
+ else if (f == AUTHOR_FIELDS[0])
+ sortify_authors(n, sort_key);
+ else
+ sortify_field(f, n, sort_key);
+ }
+ sort_fields.set_length(sort_fields.length() - 1);
+}
+
+void reference::sortify_authors(int n, string &result) const
+{
+ for (const char *p = AUTHOR_FIELDS; *p != '\0'; p++)
+ if (contains_field(*p)) {
+ sortify_field(*p, n, result);
+ return;
+ }
+ sortify_field(AUTHOR_FIELDS[0], n, result);
+}
+
+void reference::canonicalize_authors(string &result) const
+{
+ int len = result.length();
+ sortify_authors(INT_MAX, result);
+ if (result.length() > len)
+ result += SORT_SUB_SEP;
+}
+
+void reference::sortify_field(unsigned char f, int n, string &result) const
+{
+ typedef void (*sortify_t)(const char *, int, string &);
+ sortify_t sortifier = sortify_other;
+ switch (f) {
+ case 'A':
+ case 'E':
+ sortifier = sortify_name;
+ break;
+ case 'D':
+ sortifier = sortify_date;
+ break;
+ case 'B':
+ case 'J':
+ case 'T':
+ sortifier = sortify_title;
+ break;
+ }
+ int fi = field_index[(unsigned char)f];
+ if (fi != NULL_FIELD_INDEX) {
+ string &str = field[fi];
+ const char *start = str.contents();
+ const char *end = start + str.length();
+ for (int i = 0; i < n && start < end; i++) {
+ const char *p = start;
+ while (start < end && *start != FIELD_SEPARATOR)
+ start++;
+ if (i > 0)
+ result += SORT_SUB_SEP;
+ (*sortifier)(p, start - p, result);
+ if (start < end)
+ start++;
+ }
+ }
+}
+
+int compare_reference(const reference &r1, const reference &r2)
+{
+ assert(r1.no >= 0);
+ assert(r2.no >= 0);
+ const char *s1 = r1.sort_key.contents();
+ int n1 = r1.sort_key.length();
+ const char *s2 = r2.sort_key.contents();
+ int n2 = r2.sort_key.length();
+ for (; n1 > 0 && n2 > 0; --n1, --n2, ++s1, ++s2)
+ if (*s1 != *s2)
+ return (int)(unsigned char)*s1 - (int)(unsigned char)*s2;
+ if (n2 > 0)
+ return -1;
+ if (n1 > 0)
+ return 1;
+ return r1.no - r2.no;
+}
+
+int same_reference(const reference &r1, const reference &r2)
+{
+ if (!r1.rid.is_null() && r1.rid == r2.rid)
+ return 1;
+ if (r1.h != r2.h)
+ return 0;
+ if (r1.nfields != r2.nfields)
+ return 0;
+ int i = 0;
+ for (i = 0; i < 256; i++)
+ if (r1.field_index != r2.field_index)
+ return 0;
+ for (i = 0; i < r1.nfields; i++)
+ if (r1.field[i] != r2.field[i])
+ return 0;
+ return 1;
+}
+
+const char *find_last_name(const char *start, const char *end,
+ const char **endp)
+{
+ const char *ptr = start;
+ const char *last_word = start;
+ for (;;) {
+ const char *token_start = ptr;
+ if (!get_token(&ptr, end))
+ break;
+ if (ptr - token_start == 1) {
+ if (*token_start == ',') {
+ *endp = token_start;
+ return last_word;
+ }
+ else if (*token_start == ' ' || *token_start == '\n') {
+ if (ptr < end && *ptr != ' ' && *ptr != '\n')
+ last_word = ptr;
+ }
+ }
+ }
+ *endp = end;
+ return last_word;
+}
+
+void abbreviate_name(const char *ptr, const char *end, string &result)
+{
+ const char *last_name_end;
+ const char *last_name_start = find_last_name(ptr, end, &last_name_end);
+ int need_period = 0;
+ for (;;) {
+ const char *token_start = ptr;
+ if (!get_token(&ptr, last_name_start))
+ break;
+ const token_info *ti = lookup_token(token_start, ptr);
+ if (need_period) {
+ if ((ptr - token_start == 1 && *token_start == ' ')
+ || (ptr - token_start == 2 && token_start[0] == '\\'
+ && token_start[1] == ' '))
+ continue;
+ if (ti->is_upper())
+ result += period_before_initial;
+ else
+ result += period_before_other;
+ need_period = 0;
+ }
+ result.append(token_start, ptr - token_start);
+ if (ti->is_upper()) {
+ const char *lower_ptr = ptr;
+ int first_token = 1;
+ for (;;) {
+ token_start = ptr;
+ if (!get_token(&ptr, last_name_start))
+ break;
+ if ((ptr - token_start == 1 && *token_start == ' ')
+ || (ptr - token_start == 2 && token_start[0] == '\\'
+ && token_start[1] == ' '))
+ break;
+ ti = lookup_token(token_start, ptr);
+ if (ti->is_hyphen()) {
+ const char *ptr1 = ptr;
+ if (get_token(&ptr1, last_name_start)) {
+ ti = lookup_token(ptr, ptr1);
+ if (ti->is_upper()) {
+ result += period_before_hyphen;
+ result.append(token_start, ptr1 - token_start);
+ ptr = ptr1;
+ }
+ }
+ }
+ else if (ti->is_upper()) {
+ // MacDougal -> MacD.
+ result.append(lower_ptr, ptr - lower_ptr);
+ lower_ptr = ptr;
+ first_token = 1;
+ }
+ else if (first_token && ti->is_accent()) {
+ result.append(token_start, ptr - token_start);
+ lower_ptr = ptr;
+ }
+ first_token = 0;
+ }
+ need_period = 1;
+ }
+ }
+ if (need_period)
+ result += period_before_last_name;
+ result.append(last_name_start, end - last_name_start);
+}
+
+static void abbreviate_names(string &result)
+{
+ string str;
+ str.move(result);
+ const char *ptr = str.contents();
+ const char *end = ptr + str.length();
+ while (ptr < end) {
+ const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
+ if (name_end == 0)
+ name_end = end;
+ abbreviate_name(ptr, name_end, result);
+ if (name_end >= end)
+ break;
+ ptr = name_end + 1;
+ result += FIELD_SEPARATOR;
+ }
+}
+
+void reverse_name(const char *ptr, const char *name_end, string &result)
+{
+ const char *last_name_end;
+ const char *last_name_start = find_last_name(ptr, name_end, &last_name_end);
+ result.append(last_name_start, last_name_end - last_name_start);
+ while (last_name_start > ptr
+ && (last_name_start[-1] == ' ' || last_name_start[-1] == '\n'))
+ last_name_start--;
+ if (last_name_start > ptr) {
+ result += ", ";
+ result.append(ptr, last_name_start - ptr);
+ }
+ if (last_name_end < name_end)
+ result.append(last_name_end, name_end - last_name_end);
+}
+
+void reverse_names(string &result, int n)
+{
+ if (n <= 0)
+ return;
+ string str;
+ str.move(result);
+ const char *ptr = str.contents();
+ const char *end = ptr + str.length();
+ while (ptr < end) {
+ if (--n < 0) {
+ result.append(ptr, end - ptr);
+ break;
+ }
+ const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
+ if (name_end == 0)
+ name_end = end;
+ reverse_name(ptr, name_end, result);
+ if (name_end >= end)
+ break;
+ ptr = name_end + 1;
+ result += FIELD_SEPARATOR;
+ }
+}
+
+// Return number of field separators.
+
+int join_fields(string &f)
+{
+ const char *ptr = f.contents();
+ int len = f.length();
+ int nfield_seps = 0;
+ int j;
+ for (j = 0; j < len; j++)
+ if (ptr[j] == FIELD_SEPARATOR)
+ nfield_seps++;
+ if (nfield_seps == 0)
+ return 0;
+ string temp;
+ int field_seps_left = nfield_seps;
+ for (j = 0; j < len; j++) {
+ if (ptr[j] == FIELD_SEPARATOR) {
+ if (nfield_seps == 1)
+ temp += join_authors_exactly_two;
+ else if (--field_seps_left == 0)
+ temp += join_authors_last_two;
+ else
+ temp += join_authors_default;
+ }
+ else
+ temp += ptr[j];
+ }
+ f = temp;
+ return nfield_seps;
+}
+
+void uppercase(const char *start, const char *end, string &result)
+{
+ for (;;) {
+ const char *token_start = start;
+ if (!get_token(&start, end))
+ break;
+ const token_info *ti = lookup_token(token_start, start);
+ ti->upper_case(token_start, start, result);
+ }
+}
+
+void lowercase(const char *start, const char *end, string &result)
+{
+ for (;;) {
+ const char *token_start = start;
+ if (!get_token(&start, end))
+ break;
+ const token_info *ti = lookup_token(token_start, start);
+ ti->lower_case(token_start, start, result);
+ }
+}
+
+void capitalize(const char *ptr, const char *end, string &result)
+{
+ int in_small_point_size = 0;
+ for (;;) {
+ const char *start = ptr;
+ if (!get_token(&ptr, end))
+ break;
+ const token_info *ti = lookup_token(start, ptr);
+ const char *char_end = ptr;
+ int is_lower = ti->is_lower();
+ if ((is_lower || ti->is_upper()) && get_token(&ptr, end)) {
+ const token_info *ti2 = lookup_token(char_end, ptr);
+ if (!ti2->is_accent())
+ ptr = char_end;
+ }
+ if (is_lower) {
+ if (!in_small_point_size) {
+ result += "\\s-2";
+ in_small_point_size = 1;
+ }
+ ti->upper_case(start, char_end, result);
+ result.append(char_end, ptr - char_end);
+ }
+ else {
+ if (in_small_point_size) {
+ result += "\\s+2";
+ in_small_point_size = 0;
+ }
+ result.append(start, ptr - start);
+ }
+ }
+ if (in_small_point_size)
+ result += "\\s+2";
+}
+
+void capitalize_field(string &str)
+{
+ string temp;
+ capitalize(str.contents(), str.contents() + str.length(), temp);
+ str.move(temp);
+}
+
+int is_terminated(const char *ptr, const char *end)
+{
+ const char *last_token = end;
+ for (;;) {
+ const char *p = ptr;
+ if (!get_token(&ptr, end))
+ break;
+ last_token = p;
+ }
+ return end - last_token == 1
+ && (*last_token == '.' || *last_token == '!' || *last_token == '?');
+}
+
+void reference::output(FILE *fp)
+{
+ fputs(".]-\n", fp);
+ for (int i = 0; i < 256; i++)
+ if (field_index[i] != NULL_FIELD_INDEX && i != annotation_field) {
+ string &f = field[field_index[i]];
+ if (!csdigit(i)) {
+ int j = reverse_fields.search(i);
+ if (j >= 0) {
+ int n;
+ int len = reverse_fields.length();
+ if (++j < len && csdigit(reverse_fields[j])) {
+ n = reverse_fields[j] - '0';
+ for (++j; j < len && csdigit(reverse_fields[j]); j++)
+ // should check for overflow
+ n = n*10 + reverse_fields[j] - '0';
+ }
+ else
+ n = INT_MAX;
+ reverse_names(f, n);
+ }
+ }
+ int is_multiple = join_fields(f) > 0;
+ if (capitalize_fields.search(i) >= 0)
+ capitalize_field(f);
+ if (memchr(f.contents(), '\n', f.length()) == 0) {
+ fprintf(fp, ".ds [%c ", i);
+ if (f[0] == ' ' || f[0] == '\\' || f[0] == '"')
+ putc('"', fp);
+ put_string(f, fp);
+ putc('\n', fp);
+ }
+ else {
+ fprintf(fp, ".de [%c\n", i);
+ put_string(f, fp);
+ fputs("..\n", fp);
+ }
+ if (i == 'P') {
+ int multiple_pages = 0;
+ const char *s = f.contents();
+ const char *end = f.contents() + f.length();
+ for (;;) {
+ const char *token_start = s;
+ if (!get_token(&s, end))
+ break;
+ const token_info *ti = lookup_token(token_start, s);
+ if (ti->is_hyphen() || ti->is_range_sep()) {
+ multiple_pages = 1;
+ break;
+ }
+ }
+ fprintf(fp, ".nr [P %d\n", multiple_pages);
+ }
+ else if (i == 'E')
+ fprintf(fp, ".nr [E %d\n", is_multiple);
+ }
+ for (const char *p = "TAO"; *p; p++) {
+ int fi = field_index[(unsigned char)*p];
+ if (fi != NULL_FIELD_INDEX) {
+ string &f = field[fi];
+ fprintf(fp, ".nr [%c %d\n", *p,
+ is_terminated(f.contents(), f.contents() + f.length()));
+ }
+ }
+ int t = classify();
+ fprintf(fp, ".][ %d %s\n", t, reference_types[t]);
+ if (annotation_macro.length() > 0 && annotation_field >= 0
+ && field_index[annotation_field] != NULL_FIELD_INDEX) {
+ putc('.', fp);
+ put_string(annotation_macro, fp);
+ putc('\n', fp);
+ put_string(field[field_index[annotation_field]], fp);
+ }
+}
+
+void reference::print_sort_key_comment(FILE *fp)
+{
+ fputs(".\\\"", fp);
+ put_string(sort_key, fp);
+ putc('\n', fp);
+}
+
+const char *find_year(const char *start, const char *end, const char **endp)
+{
+ for (;;) {
+ while (start < end && !csdigit(*start))
+ start++;
+ const char *ptr = start;
+ if (start == end)
+ break;
+ while (ptr < end && csdigit(*ptr))
+ ptr++;
+ if (ptr - start == 4 || ptr - start == 3
+ || (ptr - start == 2
+ && (start[0] >= '4' || (start[0] == '3' && start[1] >= '2')))) {
+ *endp = ptr;
+ return start;
+ }
+ start = ptr;
+ }
+ return 0;
+}
+
+static const char *find_day(const char *start, const char *end,
+ const char **endp)
+{
+ for (;;) {
+ while (start < end && !csdigit(*start))
+ start++;
+ const char *ptr = start;
+ if (start == end)
+ break;
+ while (ptr < end && csdigit(*ptr))
+ ptr++;
+ if ((ptr - start == 1 && start[0] != '0')
+ || (ptr - start == 2 &&
+ (start[0] == '1'
+ || start[0] == '2'
+ || (start[0] == '3' && start[1] <= '1')
+ || (start[0] == '0' && start[1] != '0')))) {
+ *endp = ptr;
+ return start;
+ }
+ start = ptr;
+ }
+ return 0;
+}
+
+static int find_month(const char *start, const char *end)
+{
+ static const char *months[] = {
+ "january",
+ "february",
+ "march",
+ "april",
+ "may",
+ "june",
+ "july",
+ "august",
+ "september",
+ "october",
+ "november",
+ "december",
+ };
+ for (;;) {
+ while (start < end && !csalpha(*start))
+ start++;
+ const char *ptr = start;
+ if (start == end)
+ break;
+ while (ptr < end && csalpha(*ptr))
+ ptr++;
+ if (ptr - start >= 3) {
+ for (int i = 0; i < sizeof(months)/sizeof(months[0]); i++) {
+ const char *q = months[i];
+ const char *p = start;
+ for (; p < ptr; p++, q++)
+ if (cmlower(*p) != *q)
+ break;
+ if (p >= ptr)
+ return i;
+ }
+ }
+ start = ptr;
+ }
+ return -1;
+}
+
+int reference::contains_field(char c) const
+{
+ return field_index[(unsigned char)c] != NULL_FIELD_INDEX;
+}
+
+int reference::classify()
+{
+ if (contains_field('J'))
+ return JOURNAL_ARTICLE;
+ if (contains_field('B'))
+ return ARTICLE_IN_BOOK;
+ if (contains_field('G'))
+ return TECH_REPORT;
+ if (contains_field('R'))
+ return TECH_REPORT;
+ if (contains_field('I'))
+ return BOOK;
+ if (contains_field('M'))
+ return BELL_TM;
+ return OTHER;
+}
+
+const char *reference::get_year(const char **endp) const
+{
+ if (field_index['D'] != NULL_FIELD_INDEX) {
+ string &date = field[field_index['D']];
+ const char *start = date.contents();
+ const char *end = start + date.length();
+ return find_year(start, end, endp);
+ }
+ else
+ return 0;
+}
+
+const char *reference::get_field(unsigned char c, const char **endp) const
+{
+ if (field_index[c] != NULL_FIELD_INDEX) {
+ string &f = field[field_index[c]];
+ const char *start = f.contents();
+ *endp = start + f.length();
+ return start;
+ }
+ else
+ return 0;
+}
+
+const char *reference::get_date(const char **endp) const
+{
+ return get_field('D', endp);
+}
+
+const char *nth_field(int i, const char *start, const char **endp)
+{
+ while (--i >= 0) {
+ start = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
+ if (!start)
+ return 0;
+ start++;
+ }
+ const char *e = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
+ if (e)
+ *endp = e;
+ return start;
+}
+
+const char *reference::get_author(int i, const char **endp) const
+{
+ for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
+ const char *start = get_field(*f, endp);
+ if (start) {
+ if (strchr(MULTI_FIELD_NAMES, *f) != 0)
+ return nth_field(i, start, endp);
+ else if (i == 0)
+ return start;
+ else
+ return 0;
+ }
+ }
+ return 0;
+}
+
+const char *reference::get_author_last_name(int i, const char **endp) const
+{
+ for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
+ const char *start = get_field(*f, endp);
+ if (start) {
+ if (strchr(MULTI_FIELD_NAMES, *f) != 0) {
+ start = nth_field(i, start, endp);
+ if (!start)
+ return 0;
+ }
+ if (*f == 'A')
+ return find_last_name(start, *endp, endp);
+ else
+ return start;
+ }
+ }
+ return 0;
+}
+
+void reference::set_date(string &d)
+{
+ if (d.length() == 0)
+ delete_field('D');
+ else
+ insert_field('D', d);
+}
+
+int same_year(const reference &r1, const reference &r2)
+{
+ const char *ye1;
+ const char *ys1 = r1.get_year(&ye1);
+ const char *ye2;
+ const char *ys2 = r2.get_year(&ye2);
+ if (ys1 == 0) {
+ if (ys2 == 0)
+ return same_date(r1, r2);
+ else
+ return 0;
+ }
+ else if (ys2 == 0)
+ return 0;
+ else if (ye1 - ys1 != ye2 - ys2)
+ return 0;
+ else
+ return memcmp(ys1, ys2, ye1 - ys1) == 0;
+}
+
+int same_date(const reference &r1, const reference &r2)
+{
+ const char *e1;
+ const char *s1 = r1.get_date(&e1);
+ const char *e2;
+ const char *s2 = r2.get_date(&e2);
+ if (s1 == 0)
+ return s2 == 0;
+ else if (s2 == 0)
+ return 0;
+ else if (e1 - s1 != e2 - s2)
+ return 0;
+ else
+ return memcmp(s1, s2, e1 - s1) == 0;
+}
+
+const char *reference::get_sort_field(int i, int si, int ssi,
+ const char **endp) const
+{
+ const char *start = sort_key.contents();
+ const char *end = start + sort_key.length();
+ if (i < 0) {
+ *endp = end;
+ return start;
+ }
+ while (--i >= 0) {
+ start = (char *)memchr(start, SORT_SEP, end - start);
+ if (!start)
+ return 0;
+ start++;
+ }
+ const char *e = (char *)memchr(start, SORT_SEP, end - start);
+ if (e)
+ end = e;
+ if (si < 0) {
+ *endp = end;
+ return start;
+ }
+ while (--si >= 0) {
+ start = (char *)memchr(start, SORT_SUB_SEP, end - start);
+ if (!start)
+ return 0;
+ start++;
+ }
+ e = (char *)memchr(start, SORT_SUB_SEP, end - start);
+ if (e)
+ end = e;
+ if (ssi < 0) {
+ *endp = end;
+ return start;
+ }
+ while (--ssi >= 0) {
+ start = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
+ if (!start)
+ return 0;
+ start++;
+ }
+ e = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
+ if (e)
+ end = e;
+ *endp = end;
+ return start;
+}
+
diff --git a/src/preproc/refer/ref.h b/src/preproc/refer/ref.h
new file mode 100644
index 00000000..13a984a4
--- /dev/null
+++ b/src/preproc/refer/ref.h
@@ -0,0 +1,120 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+struct label_info;
+
+enum label_type { NORMAL_LABEL, SHORT_LABEL };
+const int N_LABEL_TYPES = 2;
+
+struct substring_position {
+ int start;
+ int length;
+ substring_position() : start(-1) { }
+};
+
+class int_set {
+ string v;
+public:
+ int_set() { }
+ void set(int i);
+ int get(int i) const;
+};
+
+class reference {
+private:
+ unsigned h;
+ reference_id rid;
+ int merged;
+ string sort_key;
+ int no;
+ string *field;
+ int nfields;
+ unsigned char field_index[256];
+ enum { NULL_FIELD_INDEX = 255 };
+ string label;
+ substring_position separator_pos;
+ string short_label;
+ substring_position short_separator_pos;
+ label_info *label_ptr;
+ string authors;
+ int computed_authors;
+ int last_needed_author;
+ int nauthors;
+ int_set last_name_unambiguous;
+
+ int contains_field(char) const;
+ void insert_field(unsigned char, string &s);
+ void delete_field(unsigned char);
+ void set_date(string &);
+ const char *get_sort_field(int i, int si, int ssi, const char **endp) const;
+ int merge_labels_by_parts(reference **, int, label_type, string &);
+ int merge_labels_by_number(reference **, int, label_type, string &);
+public:
+ reference(const char * = 0, int = -1, reference_id * = 0);
+ ~reference();
+ void output(FILE *);
+ void print_sort_key_comment(FILE *);
+ void set_number(int);
+ int get_number() const { return no; }
+ unsigned hash() const { return h; }
+ const string &get_label(label_type type) const;
+ const substring_position &get_separator_pos(label_type) const;
+ int is_merged() const { return merged; }
+ void compute_sort_key();
+ void compute_hash_code();
+ void pre_compute_label();
+ void compute_label();
+ void immediate_compute_label();
+ int classify();
+ void merge(reference &);
+ int merge_labels(reference **, int, label_type, string &);
+ int get_nauthors() const;
+ void need_author(int);
+ void set_last_name_unambiguous(int);
+ void sortify_authors(int, string &) const;
+ void canonicalize_authors(string &) const;
+ void sortify_field(unsigned char, int, string &) const;
+ const char *get_author(int, const char **) const;
+ const char *get_author_last_name(int, const char **) const;
+ const char *get_date(const char **) const;
+ const char *get_year(const char **) const;
+ const char *get_field(unsigned char, const char **) const;
+ const label_info *get_label_ptr() const { return label_ptr; }
+ const char *get_authors(const char **) const;
+ // for sorting
+ friend int compare_reference(const reference &r1, const reference &r2);
+ // for merging
+ friend int same_reference(const reference &, const reference &);
+ friend int same_year(const reference &, const reference &);
+ friend int same_date(const reference &, const reference &);
+ friend int same_author_last_name(const reference &, const reference &, int);
+ friend int same_author_name(const reference &, const reference &, int);
+};
+
+const char *find_year(const char *, const char *, const char **);
+const char *find_last_name(const char *, const char *, const char **);
+
+const char *nth_field(int i, const char *start, const char **endp);
+
+void capitalize(const char *ptr, const char *end, string &result);
+void reverse_name(const char *ptr, const char *end, string &result);
+void uppercase(const char *ptr, const char *end, string &result);
+void lowercase(const char *ptr, const char *end, string &result);
+void abbreviate_name(const char *ptr, const char *end, string &result);
diff --git a/src/preproc/refer/refer.cc b/src/preproc/refer/refer.cc
new file mode 100644
index 00000000..cc8febe4
--- /dev/null
+++ b/src/preproc/refer/refer.cc
@@ -0,0 +1,1228 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "refer.h"
+#include "refid.h"
+#include "ref.h"
+#include "token.h"
+#include "search.h"
+#include "command.h"
+
+const char PRE_LABEL_MARKER = '\013';
+const char POST_LABEL_MARKER = '\014';
+const char LABEL_MARKER = '\015'; // label_type is added on
+
+#define FORCE_LEFT_BRACKET 04
+#define FORCE_RIGHT_BRACKET 010
+
+static FILE *outfp = stdout;
+
+string capitalize_fields;
+string reverse_fields;
+string abbreviate_fields;
+string period_before_last_name = ". ";
+string period_before_initial = ".";
+string period_before_hyphen = "";
+string period_before_other = ". ";
+string sort_fields;
+int annotation_field = -1;
+string annotation_macro;
+string discard_fields = "XYZ";
+string pre_label = "\\*([.";
+string post_label = "\\*(.]";
+string sep_label = ", ";
+int accumulate = 0;
+int move_punctuation = 0;
+int abbreviate_label_ranges = 0;
+string label_range_indicator;
+int label_in_text = 1;
+int label_in_reference = 1;
+int date_as_label = 0;
+int sort_adjacent_labels = 0;
+// Join exactly two authors with this.
+string join_authors_exactly_two = " and ";
+// When there are more than two authors join the last two with this.
+string join_authors_last_two = ", and ";
+// Otherwise join authors with this.
+string join_authors_default = ", ";
+string separate_label_second_parts = ", ";
+// Use this string to represent that there are other authors.
+string et_al = " et al";
+// Use et al only if it can replace at least this many authors.
+int et_al_min_elide = 2;
+// Use et al only if the total number of authors is at least this.
+int et_al_min_total = 3;
+
+
+int compatible_flag = 0;
+
+int short_label_flag = 0;
+
+static int recognize_R1_R2 = 1;
+
+search_list database_list;
+int search_default = 1;
+static int default_database_loaded = 0;
+
+static reference **citation = 0;
+static int ncitations = 0;
+static int citation_max = 0;
+
+static reference **reference_hash_table = 0;
+static int hash_table_size;
+static int nreferences = 0;
+
+static int need_syncing = 0;
+string pending_line;
+string pending_lf_lines;
+
+static void output_pending_line();
+static unsigned immediately_handle_reference(const string &);
+static void immediately_output_references();
+static unsigned store_reference(const string &);
+static void divert_to_temporary_file();
+static reference *make_reference(const string &, unsigned *);
+static void usage();
+static void do_file(const char *);
+static void split_punct(string &line, string &punct);
+static void output_citation_group(reference **v, int n, label_type, FILE *fp);
+static void possibly_load_default_database();
+
+int main(int argc, char **argv)
+{
+ program_name = argv[0];
+ static char stderr_buf[BUFSIZ];
+ setbuf(stderr, stderr_buf);
+ outfp = stdout;
+ int finished_options = 0;
+ int bib_flag = 0;
+ int done_spec = 0;
+
+ for (--argc, ++argv;
+ !finished_options && argc > 0 && argv[0][0] == '-'
+ && argv[0][1] != '\0';
+ argv++, argc--) {
+ const char *opt = argv[0] + 1;
+ while (opt != 0 && *opt != '\0') {
+ switch (*opt) {
+ case 'C':
+ compatible_flag = 1;
+ opt++;
+ break;
+ case 'B':
+ bib_flag = 1;
+ label_in_reference = 0;
+ label_in_text = 0;
+ ++opt;
+ if (*opt == '\0') {
+ annotation_field = 'X';
+ annotation_macro = "AP";
+ }
+ else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
+ annotation_field = opt[0];
+ annotation_macro = opt + 2;
+ }
+ opt = 0;
+ break;
+ case 'P':
+ move_punctuation = 1;
+ opt++;
+ break;
+ case 'R':
+ recognize_R1_R2 = 0;
+ opt++;
+ break;
+ case 'S':
+ // Not a very useful spec.
+ set_label_spec("(A.n|Q)', '(D.y|D)");
+ done_spec = 1;
+ pre_label = " (";
+ post_label = ")";
+ sep_label = "; ";
+ opt++;
+ break;
+ case 'V':
+ verify_flag = 1;
+ opt++;
+ break;
+ case 'f':
+ {
+ const char *num = 0;
+ if (*++opt == '\0') {
+ if (argc > 1) {
+ num = *++argv;
+ --argc;
+ }
+ else {
+ error("option `f' requires an argument");
+ usage();
+ }
+ }
+ else {
+ num = opt;
+ opt = 0;
+ }
+ const char *ptr;
+ for (ptr = num; *ptr; ptr++)
+ if (!csdigit(*ptr)) {
+ error("bad character `%1' in argument to -f option", *ptr);
+ break;
+ }
+ if (*ptr == '\0') {
+ string spec;
+ spec = '%';
+ spec += num;
+ spec += '\0';
+ set_label_spec(spec.contents());
+ done_spec = 1;
+ }
+ break;
+ }
+ case 'b':
+ label_in_text = 0;
+ label_in_reference = 0;
+ opt++;
+ break;
+ case 'e':
+ accumulate = 1;
+ opt++;
+ break;
+ case 'c':
+ capitalize_fields = ++opt;
+ opt = 0;
+ break;
+ case 'k':
+ {
+ char buf[5];
+ if (csalpha(*++opt))
+ buf[0] = *opt++;
+ else {
+ if (*opt != '\0')
+ error("bad field name `%1'", *opt++);
+ buf[0] = 'L';
+ }
+ buf[1] = '~';
+ buf[2] = '%';
+ buf[3] = 'a';
+ buf[4] = '\0';
+ set_label_spec(buf);
+ done_spec = 1;
+ }
+ break;
+ case 'a':
+ {
+ const char *ptr;
+ for (ptr = ++opt; *ptr; ptr++)
+ if (!csdigit(*ptr)) {
+ error("argument to `a' option not a number");
+ break;
+ }
+ if (*ptr == '\0') {
+ reverse_fields = 'A';
+ reverse_fields += opt;
+ }
+ opt = 0;
+ }
+ break;
+ case 'i':
+ linear_ignore_fields = ++opt;
+ opt = 0;
+ break;
+ case 'l':
+ {
+ char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
+ strcpy(buf, "A.n");
+ if (*++opt != '\0' && *opt != ',') {
+ char *ptr;
+ long n = strtol(opt, &ptr, 10);
+ if (n == 0 && ptr == opt) {
+ error("bad integer `%1' in `l' option", opt);
+ opt = 0;
+ break;
+ }
+ if (n < 0)
+ n = 0;
+ opt = ptr;
+ sprintf(strchr(buf, '\0'), "+%ld", n);
+ }
+ strcat(buf, "D.y");
+ if (*opt == ',')
+ opt++;
+ if (*opt != '\0') {
+ char *ptr;
+ long n = strtol(opt, &ptr, 10);
+ if (n == 0 && ptr == opt) {
+ error("bad integer `%1' in `l' option", opt);
+ opt = 0;
+ break;
+ }
+ if (n < 0)
+ n = 0;
+ sprintf(strchr(buf, '\0'), "-%ld", n);
+ opt = ptr;
+ if (*opt != '\0')
+ error("argument to `l' option not of form `m,n'");
+ }
+ strcat(buf, "%a");
+ if (!set_label_spec(buf))
+ assert(0);
+ done_spec = 1;
+ }
+ break;
+ case 'n':
+ search_default = 0;
+ opt++;
+ break;
+ case 'p':
+ {
+ const char *filename = 0;
+ if (*++opt == '\0') {
+ if (argc > 1) {
+ filename = *++argv;
+ argc--;
+ }
+ else {
+ error("option `p' requires an argument");
+ usage();
+ }
+ }
+ else {
+ filename = opt;
+ opt = 0;
+ }
+ database_list.add_file(filename);
+ }
+ break;
+ case 's':
+ if (*++opt == '\0')
+ sort_fields = "AD";
+ else {
+ sort_fields = opt;
+ opt = 0;
+ }
+ accumulate = 1;
+ break;
+ case 't':
+ {
+ char *ptr;
+ long n = strtol(opt, &ptr, 10);
+ if (n == 0 && ptr == opt) {
+ error("bad integer `%1' in `t' option", opt);
+ opt = 0;
+ break;
+ }
+ if (n < 1)
+ n = 1;
+ linear_truncate_len = int(n);
+ opt = ptr;
+ break;
+ }
+ case 'v':
+ {
+ extern const char *Version_string;
+ fprintf(stderr, "GNU refer version %s\n", Version_string);
+ fflush(stderr);
+ opt++;
+ break;
+ }
+ case '-':
+ if (opt[1] == '\0') {
+ finished_options = 1;
+ opt++;
+ break;
+ }
+ // fall through
+ default:
+ error("unrecognized option `%1'", *opt);
+ usage();
+ break;
+ }
+ }
+ }
+ if (!done_spec)
+ set_label_spec("%1");
+ if (argc <= 0) {
+ if (bib_flag)
+ do_bib("-");
+ else
+ do_file("-");
+ }
+ else {
+ for (int i = 0; i < argc; i++) {
+ if (bib_flag)
+ do_bib(argv[i]);
+ else
+ do_file(argv[i]);
+ }
+ }
+ if (accumulate)
+ output_references();
+ if (fflush(stdout) < 0)
+ fatal("output error");
+ return 0;
+}
+
+static void usage()
+{
+ fprintf(stderr,
+"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
+" [-sXYZ] [-tN] [-BL.M] [files ...]\n",
+ program_name);
+ exit(1);
+}
+
+static void possibly_load_default_database()
+{
+ if (search_default && !default_database_loaded) {
+ char *filename = getenv("REFER");
+ if (filename)
+ database_list.add_file(filename);
+ else
+ database_list.add_file(DEFAULT_INDEX, 1);
+ default_database_loaded = 1;
+ }
+}
+
+static int is_list(const string &str)
+{
+ const char *start = str.contents();
+ const char *end = start + str.length();
+ while (end > start && csspace(end[-1]))
+ end--;
+ while (start < end && csspace(*start))
+ start++;
+ return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
+}
+
+static void do_file(const char *filename)
+{
+ FILE *fp;
+ if (strcmp(filename, "-") == 0) {
+ fp = stdin;
+ }
+ else {
+ errno = 0;
+ fp = fopen(filename, "r");
+ if (fp == 0) {
+ error("can't open `%1': %2", filename, strerror(errno));
+ return;
+ }
+ }
+ current_filename = filename;
+ fprintf(outfp, ".lf 1 %s\n", filename);
+ string line;
+ current_lineno = 0;
+ for (;;) {
+ line.clear();
+ for (;;) {
+ int c = getc(fp);
+ if (c == EOF) {
+ if (line.length() > 0)
+ line += '\n';
+ break;
+ }
+ if (illegal_input_char(c))
+ error("illegal input character code %1", c);
+ else {
+ line += c;
+ if (c == '\n')
+ break;
+ }
+ }
+ int len = line.length();
+ if (len == 0)
+ break;
+ current_lineno++;
+ if (len >= 2 && line[0] == '.' && line[1] == '[') {
+ int start_lineno = current_lineno;
+ int start_of_line = 1;
+ string str;
+ string post;
+ string pre(line.contents() + 2, line.length() - 3);
+ for (;;) {
+ int c = getc(fp);
+ if (c == EOF) {
+ error_with_file_and_line(current_filename, start_lineno,
+ "missing `.]' line");
+ break;
+ }
+ if (start_of_line)
+ current_lineno++;
+ if (start_of_line && c == '.') {
+ int d = getc(fp);
+ if (d == ']') {
+ while ((d = getc(fp)) != '\n' && d != EOF) {
+ if (illegal_input_char(d))
+ error("illegal input character code %1", d);
+ else
+ post += d;
+ }
+ break;
+ }
+ if (d != EOF)
+ ungetc(d, fp);
+ }
+ if (illegal_input_char(c))
+ error("illegal input character code %1", c);
+ else
+ str += c;
+ start_of_line = (c == '\n');
+ }
+ if (is_list(str)) {
+ output_pending_line();
+ if (accumulate)
+ output_references();
+ else
+ error("found `$LIST$' but not accumulating references");
+ }
+ else {
+ unsigned flags = (accumulate
+ ? store_reference(str)
+ : immediately_handle_reference(str));
+ if (label_in_text) {
+ if (accumulate && outfp == stdout)
+ divert_to_temporary_file();
+ if (pending_line.length() == 0) {
+ warning("can't attach citation to previous line");
+ }
+ else
+ pending_line.set_length(pending_line.length() - 1);
+ string punct;
+ if (move_punctuation)
+ split_punct(pending_line, punct);
+ int have_text = pre.length() > 0 || post.length() > 0;
+ label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
+ |FORCE_RIGHT_BRACKET));
+ if ((flags & FORCE_LEFT_BRACKET) || !have_text)
+ pending_line += PRE_LABEL_MARKER;
+ pending_line += pre;
+ char lm = LABEL_MARKER + lt;
+ pending_line += lm;
+ pending_line += post;
+ if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
+ pending_line += POST_LABEL_MARKER;
+ pending_line += punct;
+ pending_line += '\n';
+ }
+ }
+ need_syncing = 1;
+ }
+ else if (len >= 4
+ && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
+ && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
+ pending_lf_lines += line;
+ line += '\0';
+ if (interpret_lf_args(line.contents() + 3))
+ current_lineno--;
+ }
+ else if (recognize_R1_R2
+ && len >= 4
+ && line[0] == '.' && line[1] == 'R' && line[2] == '1'
+ && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
+ line.clear();
+ int start_of_line = 1;
+ int start_lineno = current_lineno;
+ for (;;) {
+ int c = getc(fp);
+ if (c != EOF && start_of_line)
+ current_lineno++;
+ if (start_of_line && c == '.') {
+ c = getc(fp);
+ if (c == 'R') {
+ c = getc(fp);
+ if (c == '2') {
+ c = getc(fp);
+ if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
+ while (c != EOF && c != '\n')
+ c = getc(fp);
+ break;
+ }
+ else {
+ line += '.';
+ line += 'R';
+ line += '2';
+ }
+ }
+ else {
+ line += '.';
+ line += 'R';
+ }
+ }
+ else
+ line += '.';
+ }
+ if (c == EOF) {
+ error_with_file_and_line(current_filename, start_lineno,
+ "missing `.R2' line");
+ break;
+ }
+ if (illegal_input_char(c))
+ error("illegal input character code %1", int(c));
+ else {
+ line += c;
+ start_of_line = c == '\n';
+ }
+ }
+ output_pending_line();
+ if (accumulate)
+ output_references();
+ else
+ nreferences = 0;
+ process_commands(line, current_filename, start_lineno + 1);
+ need_syncing = 1;
+ }
+ else {
+ output_pending_line();
+ pending_line = line;
+ }
+ }
+ need_syncing = 0;
+ output_pending_line();
+ if (fp != stdin)
+ fclose(fp);
+}
+
+class label_processing_state {
+ enum {
+ NORMAL,
+ PENDING_LABEL,
+ PENDING_LABEL_POST,
+ PENDING_LABEL_POST_PRE,
+ PENDING_POST
+ } state;
+ label_type type; // type of pending labels
+ int count; // number of pending labels
+ reference **rptr; // pointer to next reference
+ int rcount; // number of references left
+ FILE *fp;
+ int handle_pending(int c);
+public:
+ label_processing_state(reference **, int, FILE *);
+ ~label_processing_state();
+ void process(int c);
+};
+
+static void output_pending_line()
+{
+ if (label_in_text && !accumulate && ncitations > 0) {
+ label_processing_state state(citation, ncitations, outfp);
+ int len = pending_line.length();
+ for (int i = 0; i < len; i++)
+ state.process((unsigned char)(pending_line[i]));
+ }
+ else
+ put_string(pending_line, outfp);
+ pending_line.clear();
+ if (pending_lf_lines.length() > 0) {
+ put_string(pending_lf_lines, outfp);
+ pending_lf_lines.clear();
+ }
+ if (!accumulate)
+ immediately_output_references();
+ if (need_syncing) {
+ fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
+ need_syncing = 0;
+ }
+}
+
+static void split_punct(string &line, string &punct)
+{
+ const char *start = line.contents();
+ const char *end = start + line.length();
+ const char *ptr = start;
+ const char *last_token_start = 0;
+ for (;;) {
+ if (ptr >= end)
+ break;
+ last_token_start = ptr;
+ if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
+ || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
+ ptr++;
+ else if (!get_token(&ptr, end))
+ break;
+ }
+ if (last_token_start) {
+ const token_info *ti = lookup_token(last_token_start, end);
+ if (ti->is_punct()) {
+ punct.append(last_token_start, end - last_token_start);
+ line.set_length(last_token_start - start);
+ }
+ }
+}
+
+static void divert_to_temporary_file()
+{
+ outfp = xtmpfile();
+}
+
+static void store_citation(reference *ref)
+{
+ if (ncitations >= citation_max) {
+ if (citation == 0)
+ citation = new reference*[citation_max = 100];
+ else {
+ reference **old_citation = citation;
+ citation_max *= 2;
+ citation = new reference *[citation_max];
+ memcpy(citation, old_citation, ncitations*sizeof(reference *));
+ a_delete old_citation;
+ }
+ }
+ citation[ncitations++] = ref;
+}
+
+static unsigned store_reference(const string &str)
+{
+ if (reference_hash_table == 0) {
+ reference_hash_table = new reference *[17];
+ hash_table_size = 17;
+ for (int i = 0; i < hash_table_size; i++)
+ reference_hash_table[i] = 0;
+ }
+ unsigned flags;
+ reference *ref = make_reference(str, &flags);
+ ref->compute_hash_code();
+ unsigned h = ref->hash();
+ reference **ptr;
+ for (ptr = reference_hash_table + (h % hash_table_size);
+ *ptr != 0;
+ ((ptr == reference_hash_table)
+ ? (ptr = reference_hash_table + hash_table_size - 1)
+ : --ptr))
+ if (same_reference(**ptr, *ref))
+ break;
+ if (*ptr != 0) {
+ if (ref->is_merged())
+ warning("fields ignored because reference already used");
+ delete ref;
+ ref = *ptr;
+ }
+ else {
+ *ptr = ref;
+ ref->set_number(nreferences);
+ nreferences++;
+ ref->pre_compute_label();
+ ref->compute_sort_key();
+ if (nreferences*2 >= hash_table_size) {
+ // Rehash it.
+ reference **old_table = reference_hash_table;
+ int old_size = hash_table_size;
+ hash_table_size = next_size(hash_table_size);
+ reference_hash_table = new reference*[hash_table_size];
+ int i;
+ for (i = 0; i < hash_table_size; i++)
+ reference_hash_table[i] = 0;
+ for (i = 0; i < old_size; i++)
+ if (old_table[i]) {
+ reference **p;
+ for (p = (reference_hash_table
+ + (old_table[i]->hash() % hash_table_size));
+ *p;
+ ((p == reference_hash_table)
+ ? (p = reference_hash_table + hash_table_size - 1)
+ : --p))
+ ;
+ *p = old_table[i];
+ }
+ a_delete old_table;
+ }
+ }
+ if (label_in_text)
+ store_citation(ref);
+ return flags;
+}
+
+unsigned immediately_handle_reference(const string &str)
+{
+ unsigned flags;
+ reference *ref = make_reference(str, &flags);
+ ref->set_number(nreferences);
+ if (label_in_text || label_in_reference) {
+ ref->pre_compute_label();
+ ref->immediate_compute_label();
+ }
+ nreferences++;
+ store_citation(ref);
+ return flags;
+}
+
+static void immediately_output_references()
+{
+ for (int i = 0; i < ncitations; i++) {
+ reference *ref = citation[i];
+ if (label_in_reference) {
+ fputs(".ds [F ", outfp);
+ const string &label = ref->get_label(NORMAL_LABEL);
+ if (label.length() > 0
+ && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
+ putc('"', outfp);
+ put_string(label, outfp);
+ putc('\n', outfp);
+ }
+ ref->output(outfp);
+ delete ref;
+ }
+ ncitations = 0;
+}
+
+static void output_citation_group(reference **v, int n, label_type type,
+ FILE *fp)
+{
+ if (sort_adjacent_labels) {
+ // Do an insertion sort. Usually n will be very small.
+ for (int i = 1; i < n; i++) {
+ int num = v[i]->get_number();
+ reference *temp = v[i];
+ int j;
+ for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
+ v[j + 1] = v[j];
+ v[j + 1] = temp;
+ }
+ }
+ // This messes up if !accumulate.
+ if (accumulate && n > 1) {
+ // remove duplicates
+ int j = 1;
+ for (int i = 1; i < n; i++)
+ if (v[i]->get_label(type) != v[i - 1]->get_label(type))
+ v[j++] = v[i];
+ n = j;
+ }
+ string merged_label;
+ for (int i = 0; i < n; i++) {
+ int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
+ if (nmerged > 0) {
+ put_string(merged_label, fp);
+ i += nmerged;
+ }
+ else
+ put_string(v[i]->get_label(type), fp);
+ if (i < n - 1)
+ put_string(sep_label, fp);
+ }
+}
+
+
+label_processing_state::label_processing_state(reference **p, int n, FILE *f)
+: state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
+{
+}
+
+label_processing_state::~label_processing_state()
+{
+ int handled = handle_pending(EOF);
+ assert(!handled);
+ assert(rcount == 0);
+}
+
+int label_processing_state::handle_pending(int c)
+{
+ switch (state) {
+ case NORMAL:
+ break;
+ case PENDING_LABEL:
+ if (c == POST_LABEL_MARKER) {
+ state = PENDING_LABEL_POST;
+ return 1;
+ }
+ else {
+ output_citation_group(rptr, count, type, fp);
+ rptr += count ;
+ rcount -= count;
+ state = NORMAL;
+ }
+ break;
+ case PENDING_LABEL_POST:
+ if (c == PRE_LABEL_MARKER) {
+ state = PENDING_LABEL_POST_PRE;
+ return 1;
+ }
+ else {
+ output_citation_group(rptr, count, type, fp);
+ rptr += count;
+ rcount -= count;
+ put_string(post_label, fp);
+ state = NORMAL;
+ }
+ break;
+ case PENDING_LABEL_POST_PRE:
+ if (c >= LABEL_MARKER
+ && c < LABEL_MARKER + N_LABEL_TYPES
+ && c - LABEL_MARKER == type) {
+ count += 1;
+ state = PENDING_LABEL;
+ return 1;
+ }
+ else {
+ output_citation_group(rptr, count, type, fp);
+ rptr += count;
+ rcount -= count;
+ put_string(sep_label, fp);
+ state = NORMAL;
+ }
+ break;
+ case PENDING_POST:
+ if (c == PRE_LABEL_MARKER) {
+ put_string(sep_label, fp);
+ state = NORMAL;
+ return 1;
+ }
+ else {
+ put_string(post_label, fp);
+ state = NORMAL;
+ }
+ break;
+ }
+ return 0;
+}
+
+void label_processing_state::process(int c)
+{
+ if (handle_pending(c))
+ return;
+ assert(state == NORMAL);
+ switch (c) {
+ case PRE_LABEL_MARKER:
+ put_string(pre_label, fp);
+ state = NORMAL;
+ break;
+ case POST_LABEL_MARKER:
+ state = PENDING_POST;
+ break;
+ case LABEL_MARKER:
+ case LABEL_MARKER + 1:
+ count = 1;
+ state = PENDING_LABEL;
+ type = label_type(c - LABEL_MARKER);
+ break;
+ default:
+ state = NORMAL;
+ putc(c, fp);
+ break;
+ }
+}
+
+extern "C" {
+
+static int rcompare(const void *p1, const void *p2)
+{
+ return compare_reference(**(reference **)p1, **(reference **)p2);
+}
+
+}
+
+void output_references()
+{
+ assert(accumulate);
+ if (nreferences > 0) {
+ int j = 0;
+ int i;
+ for (i = 0; i < hash_table_size; i++)
+ if (reference_hash_table[i] != 0)
+ reference_hash_table[j++] = reference_hash_table[i];
+ assert(j == nreferences);
+ for (; j < hash_table_size; j++)
+ reference_hash_table[j] = 0;
+ qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
+ for (i = 0; i < nreferences; i++)
+ reference_hash_table[i]->set_number(i);
+ compute_labels(reference_hash_table, nreferences);
+ }
+ if (outfp != stdout) {
+ rewind(outfp);
+ {
+ label_processing_state state(citation, ncitations, stdout);
+ int c;
+ while ((c = getc(outfp)) != EOF)
+ state.process(c);
+ }
+ ncitations = 0;
+ fclose(outfp);
+ outfp = stdout;
+ }
+ if (nreferences > 0) {
+ fputs(".]<\n", outfp);
+ for (int i = 0; i < nreferences; i++) {
+ if (sort_fields.length() > 0)
+ reference_hash_table[i]->print_sort_key_comment(outfp);
+ if (label_in_reference) {
+ fputs(".ds [F ", outfp);
+ const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
+ if (label.length() > 0
+ && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
+ putc('"', outfp);
+ put_string(label, outfp);
+ putc('\n', outfp);
+ }
+ reference_hash_table[i]->output(outfp);
+ delete reference_hash_table[i];
+ reference_hash_table[i] = 0;
+ }
+ fputs(".]>\n", outfp);
+ nreferences = 0;
+ }
+ clear_labels();
+}
+
+static reference *find_reference(const char *query, int query_len)
+{
+ // This is so that error messages look better.
+ while (query_len > 0 && csspace(query[query_len - 1]))
+ query_len--;
+ string str;
+ for (int i = 0; i < query_len; i++)
+ str += query[i] == '\n' ? ' ' : query[i];
+ str += '\0';
+ possibly_load_default_database();
+ search_list_iterator iter(&database_list, str.contents());
+ reference_id rid;
+ const char *start;
+ int len;
+ if (!iter.next(&start, &len, &rid)) {
+ error("no matches for `%1'", str.contents());
+ return 0;
+ }
+ const char *end = start + len;
+ while (start < end) {
+ if (*start == '%')
+ break;
+ while (start < end && *start++ != '\n')
+ ;
+ }
+ if (start >= end) {
+ error("found a reference for `%1' but it didn't contain any fields",
+ str.contents());
+ return 0;
+ }
+ reference *result = new reference(start, end - start, &rid);
+ if (iter.next(&start, &len, &rid))
+ warning("multiple matches for `%1'", str.contents());
+ return result;
+}
+
+static reference *make_reference(const string &str, unsigned *flagsp)
+{
+ const char *start = str.contents();
+ const char *end = start + str.length();
+ const char *ptr = start;
+ while (ptr < end) {
+ if (*ptr == '%')
+ break;
+ while (ptr < end && *ptr++ != '\n')
+ ;
+ }
+ *flagsp = 0;
+ for (; start < ptr; start++) {
+ if (*start == '#')
+ *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
+ | FORCE_LEFT_BRACKET)));
+ else if (*start == '[')
+ *flagsp |= FORCE_LEFT_BRACKET;
+ else if (*start == ']')
+ *flagsp |= FORCE_RIGHT_BRACKET;
+ else if (!csspace(*start))
+ break;
+ }
+ if (start >= end) {
+ error("empty reference");
+ return new reference;
+ }
+ reference *database_ref = 0;
+ if (start < ptr)
+ database_ref = find_reference(start, ptr - start);
+ reference *inline_ref = 0;
+ if (ptr < end)
+ inline_ref = new reference(ptr, end - ptr);
+ if (inline_ref) {
+ if (database_ref) {
+ database_ref->merge(*inline_ref);
+ delete inline_ref;
+ return database_ref;
+ }
+ else
+ return inline_ref;
+ }
+ else if (database_ref)
+ return database_ref;
+ else
+ return new reference;
+}
+
+static void do_ref(const string &str)
+{
+ if (accumulate)
+ (void)store_reference(str);
+ else {
+ (void)immediately_handle_reference(str);
+ immediately_output_references();
+ }
+}
+
+static void trim_blanks(string &str)
+{
+ const char *start = str.contents();
+ const char *end = start + str.length();
+ while (end > start && end[-1] != '\n' && csspace(end[-1]))
+ --end;
+ str.set_length(end - start);
+}
+
+void do_bib(const char *filename)
+{
+ FILE *fp;
+ if (strcmp(filename, "-") == 0)
+ fp = stdin;
+ else {
+ errno = 0;
+ fp = fopen(filename, "r");
+ if (fp == 0) {
+ error("can't open `%1': %2", filename, strerror(errno));
+ return;
+ }
+ current_filename = filename;
+ }
+ enum {
+ START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
+ } state = START;
+ string body;
+ for (;;) {
+ int c = getc(fp);
+ if (c == EOF)
+ break;
+ if (illegal_input_char(c)) {
+ error("illegal input character code %1", c);
+ continue;
+ }
+ switch (state) {
+ case START:
+ if (c == '%') {
+ body = c;
+ state = BODY;
+ }
+ else if (c != '\n')
+ state = MIDDLE;
+ break;
+ case MIDDLE:
+ if (c == '\n')
+ state = START;
+ break;
+ case BODY:
+ body += c;
+ if (c == '\n')
+ state = BODY_START;
+ break;
+ case BODY_START:
+ if (c == '\n') {
+ do_ref(body);
+ state = START;
+ }
+ else if (c == '.')
+ state = BODY_DOT;
+ else if (csspace(c)) {
+ state = BODY_BLANK;
+ body += c;
+ }
+ else {
+ body += c;
+ state = BODY;
+ }
+ break;
+ case BODY_BLANK:
+ if (c == '\n') {
+ trim_blanks(body);
+ do_ref(body);
+ state = START;
+ }
+ else if (csspace(c))
+ body += c;
+ else {
+ body += c;
+ state = BODY;
+ }
+ break;
+ case BODY_DOT:
+ if (c == ']') {
+ do_ref(body);
+ state = MIDDLE;
+ }
+ else {
+ body += '.';
+ body += c;
+ state = c == '\n' ? BODY_START : BODY;
+ }
+ break;
+ default:
+ assert(0);
+ }
+ if (c == '\n')
+ current_lineno++;
+ }
+ switch (state) {
+ case START:
+ case MIDDLE:
+ break;
+ case BODY:
+ body += '\n';
+ do_ref(body);
+ break;
+ case BODY_DOT:
+ case BODY_START:
+ do_ref(body);
+ break;
+ case BODY_BLANK:
+ trim_blanks(body);
+ do_ref(body);
+ break;
+ }
+ fclose(fp);
+}
+
+// from the Dragon Book
+
+unsigned hash_string(const char *s, int len)
+{
+ const char *end = s + len;
+ unsigned h = 0, g;
+ while (s < end) {
+ h <<= 4;
+ h += *s++;
+ if ((g = h & 0xf0000000) != 0) {
+ h ^= g >> 24;
+ h ^= g;
+ }
+ }
+ return h;
+}
+
+int next_size(int n)
+{
+ static const int table_sizes[] = {
+ 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
+ 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
+ 16000057, 32000011, 64000031, 128000003, 0
+ };
+
+ const int *p;
+ for (p = table_sizes; *p <= n && *p != 0; p++)
+ ;
+ assert(*p != 0);
+ return *p;
+}
+
diff --git a/src/preproc/refer/refer.h b/src/preproc/refer/refer.h
new file mode 100644
index 00000000..f0ab3cd7
--- /dev/null
+++ b/src/preproc/refer/refer.h
@@ -0,0 +1,78 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <errno.h>
+
+#include "errarg.h"
+#include "error.h"
+#include "lib.h"
+#include "stringclass.h"
+#include "cset.h"
+#include "cmap.h"
+
+#include "defs.h"
+
+unsigned hash_string(const char *, int);
+int next_size(int);
+
+extern string capitalize_fields;
+extern string reverse_fields;
+extern string abbreviate_fields;
+extern string period_before_last_name;
+extern string period_before_initial;
+extern string period_before_hyphen;
+extern string period_before_other;
+extern string sort_fields;
+extern int annotation_field;
+extern string annotation_macro;
+extern string discard_fields;
+extern string articles;
+extern int abbreviate_label_ranges;
+extern string label_range_indicator;
+extern int date_as_label;
+extern string join_authors_exactly_two;
+extern string join_authors_last_two;
+extern string join_authors_default;
+extern string separate_label_second_parts;
+extern string et_al;
+extern int et_al_min_elide;
+extern int et_al_min_total;
+
+extern int compatible_flag;
+
+extern int set_label_spec(const char *);
+extern int set_date_label_spec(const char *);
+extern int set_short_label_spec(const char *);
+
+extern int short_label_flag;
+
+void clear_labels();
+void command_error(const char *,
+ const errarg &arg1 = empty_errarg,
+ const errarg &arg2 = empty_errarg,
+ const errarg &arg3 = empty_errarg);
+
+struct reference;
+
+void compute_labels(reference **, int);
diff --git a/src/preproc/refer/refer.man b/src/preproc/refer/refer.man
new file mode 100644
index 00000000..6b09c973
--- /dev/null
+++ b/src/preproc/refer/refer.man
@@ -0,0 +1,1299 @@
+.ig \"-*- nroff -*-
+Copyright (C) 1989-1995 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the
+entire resulting derived work is distributed under the terms of a
+permission notice identical to this one.
+
+Permission is granted to copy and distribute translations of this
+manual into another language, under the above conditions for modified
+versions, except that this permission notice may be included in
+translations approved by the Free Software Foundation instead of in
+the original English.
+..
+.de TQ
+.br
+.ns
+.TP \\$1
+..
+.\" Like TP, but if specified indent is more than half
+.\" the current line-length - indent, use the default indent.
+.de Tp
+.ie \\n(.$=0:((0\\$1)*2u>(\\n(.lu-\\n(.iu)) .TP
+.el .TP "\\$1"
+..
+.\" The BSD man macros can't handle " in arguments to font change macros,
+.\" so use \(ts instead of ".
+.tr \(ts"
+.TH @G@REFER @MAN1EXT@ "@MDATE@" "Groff Version @VERSION@"
+.SH NAME
+@g@refer \- preprocess bibliographic references for groff
+.SH SYNOPSIS
+.nr a \n(.j
+.ad l
+.nr i \n(.i
+.in +\w'\fB@g@refer 'u
+.ti \niu
+.B @g@refer
+.de OP
+.ie \\n(.$-1 .RI "[\ \fB\\$1\fP" "\\$2" "\ ]"
+.el .RB "[\ " "\\$1" "\ ]"
+..
+.OP \-benvCPRS
+.OP \-a n
+.OP \-c fields
+.OP \-f n
+.OP \-i fields
+.OP \-k field
+.OP \-l m,n
+.OP \-p filename
+.OP \-s fields
+.OP \-t n
+.OP \-B field.macro
+.RI [\ filename \|.\|.\|.\ ]
+.br
+.ad \na
+.SH DESCRIPTION
+This file documents the GNU version of
+.BR refer ,
+which is part of the groff document formatting system.
+.B refer
+copies the contents of
+.IR filename \|.\|.\|.
+to the standard output,
+except that lines between
+.B .[
+and
+.B .]
+are interpreted as citations,
+and lines between
+.B .R1
+and
+.B .R2
+are interpreted as commands about how citations are to be processed.
+.LP
+Each citation specifies a reference.
+The citation can specify a reference that is contained in
+a bibliographic database by giving a set of keywords
+that only that reference contains.
+Alternatively it can specify a reference by supplying a database
+record in the citation.
+A combination of these alternatives is also possible.
+.LP
+For each citation,
+.B refer
+can produce a mark in the text.
+This mark consists of some label which can be separated from
+the text and from other labels in various ways.
+For each reference it also outputs
+.B groff
+commands that can be used by a macro package to produce a formatted
+reference for each citation.
+The output of
+.B refer
+must therefore be processed using a suitable macro package.
+The
+.B \-ms
+and
+.B \-me
+macros are both suitable.
+The commands to format a citation's reference can be output immediately after
+the citation,
+or the references may be accumulated,
+and the commands output at some later point.
+If the references are accumulated, then multiple citations of the same
+reference will produce a single formatted reference.
+.LP
+The interpretation of lines between
+.B .R1
+and
+.B .R2
+as commands is a new feature of GNU refer.
+Documents making use of this feature can still be processed by
+Unix refer just by adding the lines
+.RS
+.LP
+.nf
+.ft B
+\&.de R1
+\&.ig R2
+\&..
+.ft
+.fi
+.RE
+to the beginning of the document.
+This will cause
+.B troff
+to ignore everything between
+.B .R1
+and
+.BR .R2 .
+The effect of some commands can also be achieved by options.
+These options are supported mainly for compatibility with Unix refer.
+It is usually more convenient to use commands.
+.LP
+.B refer
+generates
+.B .lf
+lines so that filenames and line numbers in messages produced
+by commands that read
+.B refer
+output will be correct;
+it also interprets lines beginning with
+.B .lf
+so that filenames and line numbers in the messages and
+.B .lf
+lines that it produces will be accurate even if the input has been
+preprocessed by a command such as
+.BR @g@soelim (@MAN1EXT@).
+.SH OPTIONS
+.LP
+Most options are equivalent to commands
+(for a description of these commands see the
+.B Commands
+subsection):
+.TP
+.B \-b
+.B
+no-label-in-text; no-label-in-reference
+.TP
+.B \-e
+.B accumulate
+.TP
+.B \-n
+.B no-default-database
+.TP
+.B \-C
+.B compatible
+.TP
+.B \-P
+.B move-punctuation
+.TP
+.B \-S
+.B
+label "(A.n|Q) ', ' (D.y|D)"; bracket-label " (" ) "; "
+.TP
+.BI \-a n
+.B reverse
+.BI A n
+.TP
+.BI \-c fields
+.B capitalize
+.I fields
+.TP
+.BI \-f n
+.B label
+.BI % n
+.TP
+.BI \-i fields
+.B search-ignore
+.I fields
+.TP
+.B \-k
+.B label
+.B L\(ti%a
+.TP
+.BI \-k field
+.B label
+.IB field \(ti%a
+.TP
+.B \-l
+.B label
+.BI A.nD.y%a
+.TP
+.BI \-l m
+.B label
+.BI A.n+ m D.y%a
+.TP
+.BI \-l, n
+.B label
+.BI A.nD.y\- n %a
+.TP
+.BI \-l m , n
+.B label
+.BI A.n+ m D.y\- n %a
+.TP
+.BI \-p filename
+.B database
+.I filename
+.TP
+.BI \-s spec
+.B sort
+.I spec
+.TP
+.BI \-t n
+.B search-truncate
+.I n
+.LP
+These options are equivalent to the following commands with the
+addition that the filenames specified on the command line are
+processed as if they were arguments to the
+.B bibliography
+command instead of in the normal way:
+.TP
+.B \-B
+.B
+annotate X AP; no-label-in-reference
+.TP
+.BI \-B field . macro
+.B annotate
+.I field
+.IB macro ;
+.B no-label-in-reference
+.LP
+The following options have no equivalent commands:
+.TP
+.B \-v
+Print the version number.
+.TP
+.B \-R
+Don't recognize lines beginning with
+.BR .R1 / .R2 .
+.SH USAGE
+.SS Bibliographic databases
+The bibliographic database is a text file consisting of records
+separated by one or more blank lines.
+Within each record fields start with a
+.B %
+at the beginning of a line.
+Each field has a one character name that immediately follows the
+.BR % .
+It is best to use only upper and lower case letters for the names
+of fields.
+The name of the field should be followed by exactly one space,
+and then by the contents of the field.
+Empty fields are ignored.
+The conventional meaning of each field is as follows:
+.TP
+.B A
+The name of an author.
+If the name contains a title such as
+.B Jr.
+at the end,
+it should be separated from the last name by a comma.
+There can be multiple occurrences of the
+.B A
+field.
+The order is significant.
+It is a good idea always to supply an
+.B A
+field or a
+.B Q
+field.
+.TP
+.B B
+For an article that is part of a book, the title of the book
+.TP
+.B C
+The place (city) of publication.
+.TP
+.B D
+The date of publication.
+The year should be specified in full.
+If the month is specified, the name rather than the number of the month
+should be used, but only the first three letters are required.
+It is a good idea always to supply a
+.B D
+field;
+if the date is unknown, a value such as
+.B in press
+or
+.B unknown
+can be used.
+.TP
+.B E
+For an article that is part of a book, the name of an editor of the book.
+Where the work has editors and no authors,
+the names of the editors should be given as
+.B A
+fields and
+.B ,\ (ed)
+or
+.B ,\ (eds)
+should be appended to the last author.
+.TP
+.B G
+US Government ordering number.
+.TP
+.B I
+The publisher (issuer).
+.TP
+.B J
+For an article in a journal, the name of the journal.
+.TP
+.B K
+Keywords to be used for searching.
+.TP
+.B L
+Label.
+.TP
+.B N
+Journal issue number.
+.TP
+.B O
+Other information.
+This is usually printed at the end of the reference.
+.TP
+.B P
+Page number.
+A range of pages can be specified as
+.IB m \- n\fR.
+.TP
+.B Q
+The name of the author, if the author is not a person.
+This will only be used if there are no
+.B A
+fields.
+There can only be one
+.B Q
+field.
+.TP
+.B R
+Technical report number.
+.TP
+.B S
+Series name.
+.TP
+.B T
+Title.
+For an article in a book or journal,
+this should be the title of the article.
+.TP
+.B V
+Volume number of the journal or book.
+.TP
+.B X
+Annotation.
+.LP
+For all fields except
+.B A
+and
+.BR E ,
+if there is more than one occurrence of a particular field in a record,
+only the last such field will be used.
+.LP
+If accent strings are used, they should follow the character to be accented.
+This means that the
+.B AM
+macro must be used with the
+.B \-ms
+macros.
+Accent strings should not be quoted:
+use one
+.B \e
+rather than two.
+.SS Citations
+The format of a citation is
+.RS
+.BI .[ opening-text
+.br
+.I
+flags keywords
+.br
+.I fields
+.br
+.BI .] closing-text
+.RE
+.LP
+The
+.IR opening-text ,
+.IR closing-text
+and
+.I flags
+components are optional.
+Only one of the
+.I keywords
+and
+.I fields
+components need be specified.
+.LP
+The
+.I keywords
+component says to search the bibliographic databases for a reference
+that contains all the words in
+.IR keywords .
+It is an error if more than one reference if found.
+.LP
+The
+.I fields
+components specifies additional fields to replace or supplement
+those specified in the reference.
+When references are being accumulated and the
+.I keywords
+component is non-empty,
+then additional fields should be specified only on the first
+occasion that a particular reference is cited,
+and will apply to all citations of that reference.
+.LP
+The
+.I opening-text
+and
+.I closing-text
+component specifies strings to be used to bracket the label instead
+of the strings specified in the
+.B bracket-label
+command.
+If either of these components is non-empty,
+the strings specified in the
+.B bracket-label
+command will not be used;
+this behaviour can be altered using the
+.B [
+and
+.B ]
+flags.
+Note that leading and trailing spaces are significant for these components.
+.LP
+The
+.I flags
+component is a list of
+non-alphanumeric characters each of which modifies the treatment
+of this particular citation.
+Unix refer will treat these flags as part of the keywords and
+so will ignore them since they are non-alphanumeric.
+The following flags are currently recognized:
+.TP
+.B #
+This says to use the label specified by the
+.B short-label
+command,
+instead of that specified by the
+.B label
+command.
+If no short label has been specified, the normal label will be used.
+Typically the short label is used with author-date labels
+and consists of only the date and possibly a disambiguating letter;
+the
+.B #
+is supposed to be suggestive of a numeric type of label.
+.TP
+.B [
+Precede
+.I opening-text
+with the first string specified in the
+.B bracket-label
+command.
+.TP
+.B ]
+Follow
+.I closing-text
+with the second string specified in the
+.B bracket-label
+command.
+.LP
+One advantages of using the
+.B [
+and
+.B ]
+flags rather than including the brackets in
+.I opening-text
+and
+.I closing-text
+is that
+you can change the style of bracket used in the document just by changing the
+.B bracket-label
+command.
+Another advantage is that sorting and merging of citations
+will not necessarily be inhibited if the flags are used.
+.LP
+If a label is to be inserted into the text,
+it will be attached to the line preceding the
+.B .[
+line.
+If there is no such line, then an extra line will be inserted before the
+.B .[
+line and a warning will be given.
+.LP
+There is no special notation for making a citation to multiple references.
+Just use a sequence of citations, one for each reference.
+Don't put anything between the citations.
+The labels for all the citations will be attached to the line preceding
+the first citation.
+The labels may also be sorted or merged.
+See the description of the
+.B <>
+label expression, and of the
+.B sort-adjacent-labels
+and
+.B abbreviate-label-ranges
+command.
+A label will not be merged if its citation has a non-empty
+.I opening-text
+or
+.IR closing-text .
+However, the labels for a citation using the
+.B ]
+flag and without any
+.I closing-text
+immediately followed by a citation using the
+.B [
+flag and without any
+.I opening-text
+may be sorted and merged
+even though the first citation's
+.I opening-text
+or the second citation's
+.I closing-text
+is non-empty.
+(If you wish to prevent this just make the first citation's
+.I closing-text
+.BR \e& .)
+.SS Commands
+Commands are contained between lines starting with
+.B .R1
+and
+.BR .R2 .
+Recognition of these lines can be prevented by the
+.B \-R
+option.
+When a
+.B .R1
+line is recognized any accumulated references are flushed out.
+Neither
+.B .R1
+nor
+.B .R2
+lines,
+nor anything between them
+is output.
+.LP
+Commands are separated by newlines or
+.BR ; s.
+.B #
+introduces a comment that extends to the end of the line
+(but does not conceal the newline).
+Each command is broken up into words.
+Words are separated by spaces or tabs.
+A word that begins with
+.B \(ts
+extends to the next
+.B \(ts
+that is not followed by another
+.BR \(ts .
+If there is no such
+.B \(ts
+the word extends to the end of the line.
+Pairs of
+.B \(ts
+in a word beginning with
+.B \(ts
+collapse to a single
+.BR \(ts .
+Neither
+.B #
+nor
+.B ;
+are recognized inside
+.BR \(ts s.
+A line can be continued by ending it with
+.BR \e ;
+this works everywhere except after a
+.BR # .
+.LP
+.ds n \fR*
+Each command
+.I name
+that is marked with \*n has an associated negative command
+.BI no- name
+that undoes the effect of
+.IR name .
+For example, the
+.B no-sort
+command specifies that references should not be sorted.
+The negative commands take no arguments.
+.LP
+In the following description each argument must be a single word;
+.I field
+is used for a single upper or lower case letter naming a field;
+.I fields
+is used for a sequence of such letters;
+.I m
+and
+.I n
+are used for a non-negative numbers;
+.I string
+is used for an arbitrary string;
+.I filename
+is used for the name of a file.
+.Tp \w'\fBabbreviate-label-ranges'u+2n
+.BI abbreviate\*n\ fields\ string1\ string2\ string3\ string4
+Abbreviate the first names of
+.IR fields .
+An initial letter will be separated from another initial letter by
+.IR string1 ,
+from the last name by
+.IR string2 ,
+and from anything else
+(such as a
+.B von
+or
+.BR de )
+by
+.IR string3 .
+These default to a period followed by a space.
+In a hyphenated first name,
+the initial of the first part of the name will be separated from the hyphen by
+.IR string4 ;
+this defaults to a period.
+No attempt is made to handle any ambiguities that might
+result from abbreviation.
+Names are abbreviated before sorting and before
+label construction.
+.TP
+.BI abbreviate-label-ranges\*n\ string
+Three or more adjacent labels that refer to consecutive references
+will be abbreviated to a label consisting
+of the first label, followed by
+.I string
+followed by the last label.
+This is mainly useful with numeric labels.
+If
+.I string
+is omitted it defaults to
+.BR \- .
+.TP
+.B accumulate\*n
+Accumulate references instead of writing out each reference
+as it is encountered.
+Accumulated references will be written out whenever a reference
+of the form
+.RS
+.IP
+.B .[
+.br
+.B $LIST$
+.br
+.B .]
+.LP
+is encountered,
+after all input files hve been processed,
+and whenever
+.B .R1
+line is recognized.
+.RE
+.TP
+.BI annotate\*n\ field\ string
+.I field
+is an annotation;
+print it at the end of the reference as a paragraph preceded by the line
+.RS
+.IP
+.BI . string
+.LP
+If
+.I macro
+is omitted it will default to
+.BR AP ;
+if
+.I field
+is also omitted it will default to
+.BR X .
+Only one field can be an annotation.
+.RE
+.TP
+.BI articles\ string \fR\|.\|.\|.
+.IR string \|.\|.\|.
+are definite or indefinite articles, and should be ignored at the beginning of
+.B T
+fields when sorting.
+Initially,
+.BR the ,
+.B a
+and
+.B an
+are recognized as articles.
+.TP
+.BI bibliography\ filename \fR\|.\|.\|.
+Write out all the references contained in the bibliographic databases
+.IR filename \|.\|.\|.
+.TP
+.BI bracket-label\ string1\ string2\ string3
+In the text, bracket each label
+with
+.I string1
+and
+.IR string2 .
+An occurrence of
+.I string2
+immediately followed by
+.I string1
+will be turned into
+.IR string3 .
+The default behaviour is
+.RS
+.IP
+.B
+bracket-label \e*([. \e*(.] ", "
+.RE
+.TP
+.BI capitalize\ fields
+Convert
+.I fields
+to caps and small caps.
+.TP
+.B compatible\*n
+Recognize
+.B .R1
+and
+.B .R2
+even when followed by a character other than space or newline.
+.TP
+.BI database\ filename \fR\|.\|.\|.
+Search the bibliographic databases
+.IR filename \|.\|.\|.
+For each
+.I filename
+if an index
+.IB filename @INDEX_SUFFIX@
+created by
+.BR @g@indxbib (@MAN1EXT@)
+exists, then it will be searched instead;
+each index can cover multiple databases.
+.TP
+.BI date-as-label\*n\ string
+.I string
+is a label expression that specifies a string with which to replace the
+.B D
+field after constructing the label.
+See the
+.B "Label expressions"
+subsection for a description of label expressions.
+This command is useful if you do not want explicit labels in the
+reference list, but instead want to handle any necessary
+disambiguation by qualifying the date in some way.
+The label used in the text would typically be some combination of the
+author and date.
+In most cases you should also use the
+.B no-label-in-reference
+command.
+For example,
+.RS
+.IP
+.B
+date-as-label D.+yD.y%a*D.-y
+.LP
+would attach a disambiguating letter to the year part of the
+.B D
+field in the reference.
+.RE
+.TP
+.B default-database\*n
+The default database should be searched.
+This is the default behaviour, so the negative version of
+this command is more useful.
+refer determines whether the default database should be searched
+on the first occasion that it needs to do a search.
+Thus a
+.B no-default-database
+command must be given before then,
+in order to be effective.
+.TP
+.BI discard\*n\ fields
+When the reference is read,
+.I fields
+should be discarded;
+no string definitions for
+.I fields
+will be output.
+Initially,
+.I fields
+are
+.BR XYZ .
+.TP
+.BI et-al\*n\ string\ m\ n
+Control use of
+.B
+et al
+in the evaluation of
+.B @
+expressions in label expressions.
+If the number of authors needed to make the author sequence
+unambiguous is
+.I u
+and the total number of authors is
+.I t
+then the last
+.IR t \|\-\| u
+authors will be replaced by
+.I string
+provided that
+.IR t \|\-\| u
+is not less than
+.I m
+and
+.I t
+is not less than
+.IR n .
+The default behaviour is
+.RS
+.IP
+.B
+et-al " et al" 2 3
+.RE
+.TP
+.BI include\ filename
+Include
+.I filename
+and interpret the contents as commands.
+.TP
+.BI join-authors\ string1\ string2\ string3
+This says how authors should be joined together.
+When there are exactly two authors, they will be joined with
+.IR string1 .
+When there are more than two authors, all but the last two will
+be joined with
+.IR string2 ,
+and the last two authors will be joined with
+.IR string3 .
+If
+.I string3
+is omitted,
+it will default to
+.IR string1 ;
+if
+.I string2
+is also omitted it will also default to
+.IR string1 .
+For example,
+.RS
+.IP
+.B
+join-authors " and " ", " ", and "
+.LP
+will restore the default method for joining authors.
+.RE
+.TP
+.B label-in-reference\*n
+When outputting the reference,
+define the string
+.B [F
+to be the reference's label.
+This is the default behaviour; so the negative version
+of this command is more useful.
+.TP
+.B label-in-text\*n
+For each reference output a label in the text.
+The label will be separated from the surrounding text as described in the
+.B bracket-label
+command.
+This is the default behaviour; so the negative version
+of this command is more useful.
+.TP
+.BI label\ string
+.I string
+is a label expression describing how to label each reference.
+.TP
+.BI separate-label-second-parts\ string
+When merging two-part labels, separate the second part of the second
+label from the first label with
+.IR string .
+See the description of the
+.B <>
+label expression.
+.TP
+.B move-punctuation\*n
+In the text, move any punctuation at the end of line past the label.
+It is usually a good idea to give this command unless you are using
+superscripted numbers as labels.
+.TP
+.BI reverse\*n\ string
+Reverse the fields whose names
+are in
+.IR string .
+Each field name can be followed by a number which says
+how many such fields should be reversed.
+If no number is given for a field, all such fields will be reversed.
+.TP
+.BI search-ignore\*n\ fields
+While searching for keys in databases for which no index exists,
+ignore the contents of
+.IR fields .
+Initially, fields
+.B XYZ
+are ignored.
+.TP
+.BI search-truncate\*n\ n
+Only require the first
+.I n
+characters of keys to be given.
+In effect when searching for a given key
+words in the database are truncated to the maximum of
+.I n
+and the length of the key.
+Initially
+.I n
+is 6.
+.TP
+.BI short-label\*n\ string
+.I string
+is a label expression that specifies an alternative (usually shorter)
+style of label.
+This is used when the
+.B #
+flag is given in the citation.
+When using author-date style labels, the identity of the author
+or authors is sometimes clear from the context, and so it
+may be desirable to omit the author or authors from the label.
+The
+.B short-label
+command will typically be used to specify a label containing just
+a date and possibly a disambiguating letter.
+.TP
+.BI sort\*n\ string
+Sort references according to
+.BR string .
+References will automatically be accumulated.
+.I string
+should be a list of field names, each followed by a number,
+indicating how many fields with the name should be used for sorting.
+.B +
+can be used to indicate that all the fields with the name should be used.
+Also
+.B .
+can be used to indicate the references should be sorted using the
+(tentative) label.
+(The
+.B
+Label expressions
+subsection describes the concept of a tentative label.)
+.TP
+.B sort-adjacent-labels\*n
+Sort labels that are adjacent in the text according to their
+position in the reference list.
+This command should usually be given if the
+.B abbreviate-label-ranges
+command has been given,
+or if the label expression contains a
+.B <>
+expression.
+This will have no effect unless references are being accumulated.
+.SS Label expressions
+.LP
+Label expressions can be evaluated both normally and tentatively.
+The result of normal evaluation is used for output.
+The result of tentative evaluation, called the
+.I
+tentative label,
+is used to gather the information
+that normal evaluation needs to disambiguate the label.
+Label expressions specified by the
+.B date-as-label
+and
+.B short-label
+commands are not evaluated tentatively.
+Normal and tentative evaluation are the same for all types
+of expression other than
+.BR @ ,
+.BR * ,
+and
+.B %
+expressions.
+The description below applies to normal evaluation,
+except where otherwise specified.
+.TP
+.I field
+.TQ
+.I field\ n
+The
+.IR n -th
+part of
+.IR field .
+If
+.I n
+is omitted, it defaults to 1.
+.TP
+.BI ' string '
+The characters in
+.I string
+literally.
+.TP
+.B @
+All the authors joined as specified by the
+.B join-authors
+command.
+The whole of each author's name will be used.
+However, if the references are sorted by author
+(that is the sort specification starts with
+.BR A+ ),
+then authors' last names will be used instead, provided that this does
+not introduce ambiguity,
+and also an initial subsequence of the authors may be used
+instead of all the authors, again provided that this does not
+introduce ambiguity.
+The use of only the last name for the
+.IR i -th
+author of some reference
+is considered to be ambiguous if
+there is some other reference,
+such that the first
+.IR i \|-\|1
+authors of the references are the same,
+the
+.IR i -th
+authors are not the same,
+but the
+.IR i -th
+authors' last names are the same.
+A proper initial subsequence of the sequence
+of authors for some reference is considered to be ambiguous if there is
+a reference with some other sequence of authors which also has
+that subsequence as a proper initial subsequence.
+When an initial subsequence of authors is used, the remaining
+authors are replaced by the string specified by the
+.B et-al
+command;
+this command may also specify additional requirements that must be
+met before an initial subsequence can be used.
+.B @
+tentatively evaluates to a canonical representation of the authors,
+such that authors that compare equally for sorting purpose
+will have the same representation.
+.TP
+.BI % n
+.TQ
+.B %a
+.TQ
+.B %A
+.TQ
+.B %i
+.TQ
+.B %I
+The serial number of the reference formatted according to the character
+following the
+.BR % .
+The serial number of a reference is 1 plus the number of earlier references
+with same tentative label as this reference.
+These expressions tentatively evaluate to an empty string.
+.TP
+.IB expr *
+If there is another reference with the same tentative label as
+this reference, then
+.IR expr ,
+otherwise an empty string.
+It tentatively evaluates to an empty string.
+.TP
+.IB expr + n
+.TQ
+.IB expr \- n
+The first
+.RB ( + )
+or last
+.RB ( \- )
+.I n
+upper or lower case letters or digits of
+.IR expr .
+Troff special characters (such as
+.BR \e('a )
+count as a single letter.
+Accent strings are retained but do not count towards the total.
+.TP
+.IB expr .l
+.I expr
+converted to lowercase.
+.TP
+.IB expr .u
+.I expr
+converted to uppercase.
+.TP
+.IB expr .c
+.I expr
+converted to caps and small caps.
+.TP
+.IB expr .r
+.I expr
+reversed so that the last name is first.
+.TP
+.IB expr .a
+.I expr
+with first names abbreviated.
+Note that fields specified in the
+.B abbreviate
+command are abbreviated before any labels are evaluated.
+Thus
+.B .a
+is useful only when you want a field to be abbreviated in a label
+but not in a reference.
+.TP
+.IB expr .y
+The year part of
+.IR expr .
+.TP
+.IB expr .+y
+The part of
+.I expr
+before the year, or the whole of
+.I expr
+if it does not contain a year.
+.TP
+.IB expr .\-y
+The part of
+.I expr
+after the year, or an empty string if
+.I expr
+does not contain a year.
+.TP
+.IB expr .n
+The last name part of
+.IR expr .
+.TP
+.IB expr1 \(ti expr2
+.I expr1
+except that if the last character of
+.I expr1
+is
+.B \-
+then it will be replaced by
+.IR expr2 .
+.TP
+.I expr1\ expr2
+The concatenation of
+.I expr1
+and
+.IR expr2 .
+.TP
+.IB expr1 | expr2
+If
+.I expr1
+is non-empty then
+.I expr1
+otherwise
+.IR expr2 .
+.TP
+.IB expr1 & expr2
+If
+.I expr1
+is non-empty
+then
+.I expr2
+otherwise an empty string.
+.TP
+.IB expr1 ? expr2 : expr3
+If
+.I expr1
+is non-empty
+then
+.I expr2
+otherwise
+.IR expr3 .
+.TP
+.BI < expr >
+The label is in two parts, which are separated by
+.IR expr .
+Two adjacent two-part labels which have the same first part will be
+merged by appending the second part of the second label onto the first
+label separated by the string specified in the
+.B separate-label-second-parts
+command (initially, a comma followed by a space); the resulting label
+will also be a two-part label with the same first part as before
+merging, and so additional labels can be merged into it.
+Note that it is permissible for the first part to be empty;
+this maybe desirable for expressions used in the
+.B short-label
+command.
+.TP
+.BI ( expr )
+The same as
+.IR expr .
+Used for grouping.
+.LP
+The above expressions are listed in order of precedence
+(highest first);
+.B &
+and
+.B |
+have the same precedence.
+.SS Macro interface
+Each reference starts with a call to the macro
+.BR ]- .
+The string
+.B [F
+will be defined to be the label for this reference,
+unless the
+.B no-label-in-reference
+command has been given.
+There then follows a series of string definitions,
+one for each field:
+string
+.BI [ X
+corresponds to field
+.IR X .
+The number register
+.B [P
+is set to 1 if the
+.B P
+field contains a range of pages.
+The
+.BR [T ,
+.B [A
+and
+.B [O
+number registers are set to 1 according as the
+.BR T ,
+.B A
+and
+.B O
+fields end with one of the characters
+.BR .?! .
+The
+.B [E
+number register will be set to 1 if the
+.B [E
+string contains more than one name.
+The reference is followed by a call to the
+.B ][
+macro.
+The first argument to this macro gives a number representing
+the type of the reference.
+If a reference contains a
+.B J
+field, it will be classified as type 1,
+otherwise if it contains a
+.B B
+field, it will type 3,
+otherwise if it contains a
+.B G
+or
+.B R
+field it will be type 4,
+otherwise if contains a
+.B I
+field it will be type 2,
+otherwise it will be type 0.
+The second argument is a symbolic name for the type:
+.BR other ,
+.BR journal-article ,
+.BR book ,
+.B article-in-book
+or
+.BR tech-report .
+Groups of references that have been accumulated
+or are produced by the
+.B bibliography
+command are preceded by a call to the
+.B ]<
+macro and followed by a call to the
+.B ]>
+macro.
+.SH FILES
+.Tp \w'\fB@DEFAULT_INDEX@'u+2n
+.B @DEFAULT_INDEX@
+Default database.
+.TP
+.IB file @INDEX_SUFFIX@
+Index files.
+.SH "SEE ALSO"
+.BR @g@indxbib (@MAN1EXT@),
+.BR @g@lookbib (@MAN1EXT@),
+.BR lkbib (@MAN1EXT@)
+.br
+.SH BUGS
+In label expressions,
+.B <>
+expressions are ignored inside
+.BI . char
+expressions.
diff --git a/src/preproc/refer/token.cc b/src/preproc/refer/token.cc
new file mode 100644
index 00000000..1cf6890f
--- /dev/null
+++ b/src/preproc/refer/token.cc
@@ -0,0 +1,378 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "refer.h"
+#include "token.h"
+
+#define TOKEN_TABLE_SIZE 1009
+// I believe in Icelandic thorn sorts after z.
+#define THORN_SORT_KEY "{"
+
+struct token_table_entry {
+ const char *tok;
+ token_info ti;
+ token_table_entry();
+};
+
+token_table_entry token_table[TOKEN_TABLE_SIZE];
+int ntokens = 0;
+
+static void skip_name(const char **ptr, const char *end)
+{
+ if (*ptr < end) {
+ switch (*(*ptr)++) {
+ case '(':
+ if (*ptr < end) {
+ *ptr += 1;
+ if (*ptr < end)
+ *ptr += 1;
+ }
+ break;
+ case '[':
+ while (*ptr < end)
+ if (*(*ptr)++ == ']')
+ break;
+ break;
+ }
+ }
+}
+
+int get_token(const char **ptr, const char *end)
+{
+ if (*ptr >= end)
+ return 0;
+ char c = *(*ptr)++;
+ if (c == '\\' && *ptr < end) {
+ switch (**ptr) {
+ default:
+ *ptr += 1;
+ break;
+ case '(':
+ case '[':
+ skip_name(ptr, end);
+ break;
+ case '*':
+ case 'f':
+ *ptr += 1;
+ skip_name(ptr, end);
+ break;
+ }
+ }
+ return 1;
+}
+
+token_info::token_info()
+: type(TOKEN_OTHER), sort_key(0), other_case(0)
+{
+}
+
+void token_info::set(token_type t, const char *sk, const char *oc)
+{
+ assert(oc == 0 || t == TOKEN_UPPER || t == TOKEN_LOWER);
+ type = t;
+ sort_key = sk;
+ other_case = oc;
+}
+
+void token_info::sortify(const char *start, const char *end, string &result)
+ const
+{
+ if (sort_key)
+ result += sort_key;
+ else if (type == TOKEN_UPPER || type == TOKEN_LOWER) {
+ for (; start < end; start++)
+ if (csalpha(*start))
+ result += cmlower(*start);
+ }
+}
+
+int token_info::sortify_non_empty(const char *start, const char *end) const
+{
+ if (sort_key)
+ return *sort_key != '\0';
+ if (type != TOKEN_UPPER && type != TOKEN_LOWER)
+ return 0;
+ for (; start < end; start++)
+ if (csalpha(*start))
+ return 1;
+ return 0;
+}
+
+
+void token_info::lower_case(const char *start, const char *end,
+ string &result) const
+{
+ if (type != TOKEN_UPPER) {
+ while (start < end)
+ result += *start++;
+ }
+ else if (other_case)
+ result += other_case;
+ else {
+ while (start < end)
+ result += cmlower(*start++);
+ }
+}
+
+void token_info::upper_case(const char *start, const char *end,
+ string &result) const
+{
+ if (type != TOKEN_LOWER) {
+ while (start < end)
+ result += *start++;
+ }
+ else if (other_case)
+ result += other_case;
+ else {
+ while (start < end)
+ result += cmupper(*start++);
+ }
+}
+
+token_table_entry::token_table_entry()
+: tok(0)
+{
+}
+
+static void store_token(const char *tok, token_type typ,
+ const char *sk = 0, const char *oc = 0)
+{
+ unsigned n = hash_string(tok, strlen(tok)) % TOKEN_TABLE_SIZE;
+ for (;;) {
+ if (token_table[n].tok == 0) {
+ if (++ntokens == TOKEN_TABLE_SIZE)
+ assert(0);
+ token_table[n].tok = tok;
+ break;
+ }
+ if (strcmp(tok, token_table[n].tok) == 0)
+ break;
+ if (n == 0)
+ n = TOKEN_TABLE_SIZE - 1;
+ else
+ --n;
+ }
+ token_table[n].ti.set(typ, sk, oc);
+}
+
+
+token_info default_token_info;
+
+const token_info *lookup_token(const char *start, const char *end)
+{
+ unsigned n = hash_string(start, end - start) % TOKEN_TABLE_SIZE;
+ for (;;) {
+ if (token_table[n].tok == 0)
+ break;
+ if (strlen(token_table[n].tok) == end - start
+ && memcmp(token_table[n].tok, start, end - start) == 0)
+ return &(token_table[n].ti);
+ if (n == 0)
+ n = TOKEN_TABLE_SIZE - 1;
+ else
+ --n;
+ }
+ return &default_token_info;
+}
+
+static void init_ascii()
+{
+ const char *p;
+ for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++) {
+ char buf[2];
+ buf[0] = *p;
+ buf[1] = '\0';
+ store_token(strsave(buf), TOKEN_LOWER);
+ buf[0] = cmupper(buf[0]);
+ store_token(strsave(buf), TOKEN_UPPER);
+ }
+ for (p = "0123456789"; *p; p++) {
+ char buf[2];
+ buf[0] = *p;
+ buf[1] = '\0';
+ const char *s = strsave(buf);
+ store_token(s, TOKEN_OTHER, s);
+ }
+ for (p = ".,:;?!"; *p; p++) {
+ char buf[2];
+ buf[0] = *p;
+ buf[1] = '\0';
+ store_token(strsave(buf), TOKEN_PUNCT);
+ }
+ store_token("-", TOKEN_HYPHEN);
+}
+
+static void store_letter(const char *lower, const char *upper,
+ const char *sort_key = 0)
+{
+ store_token(lower, TOKEN_LOWER, sort_key, upper);
+ store_token(upper, TOKEN_UPPER, sort_key, lower);
+}
+
+static void init_letter(unsigned char uc_code, unsigned char lc_code,
+ const char *sort_key)
+{
+ char lbuf[2];
+ lbuf[0] = lc_code;
+ lbuf[1] = 0;
+ char ubuf[2];
+ ubuf[0] = uc_code;
+ ubuf[1] = 0;
+ store_letter(strsave(lbuf), strsave(ubuf), sort_key);
+}
+
+static void init_latin1()
+{
+ init_letter(0xc0, 0xe0, "a");
+ init_letter(0xc1, 0xe1, "a");
+ init_letter(0xc2, 0xe2, "a");
+ init_letter(0xc3, 0xe3, "a");
+ init_letter(0xc4, 0xe4, "a");
+ init_letter(0xc5, 0xe5, "a");
+ init_letter(0xc6, 0xe6, "ae");
+ init_letter(0xc7, 0xe7, "c");
+ init_letter(0xc8, 0xe8, "e");
+ init_letter(0xc9, 0xe9, "e");
+ init_letter(0xca, 0xea, "e");
+ init_letter(0xcb, 0xeb, "e");
+ init_letter(0xcc, 0xec, "i");
+ init_letter(0xcd, 0xed, "i");
+ init_letter(0xce, 0xee, "i");
+ init_letter(0xcf, 0xef, "i");
+
+ init_letter(0xd0, 0xf0, "d");
+ init_letter(0xd1, 0xf1, "n");
+ init_letter(0xd2, 0xf2, "o");
+ init_letter(0xd3, 0xf3, "o");
+ init_letter(0xd4, 0xf4, "o");
+ init_letter(0xd5, 0xf5, "o");
+ init_letter(0xd6, 0xf6, "o");
+ init_letter(0xd8, 0xf8, "o");
+ init_letter(0xd9, 0xf9, "u");
+ init_letter(0xda, 0xfa, "u");
+ init_letter(0xdb, 0xfb, "u");
+ init_letter(0xdc, 0xfc, "u");
+ init_letter(0xdd, 0xfd, "y");
+ init_letter(0xde, 0xfe, THORN_SORT_KEY);
+
+ store_token("\337", TOKEN_LOWER, "ss", "SS");
+ store_token("\377", TOKEN_LOWER, "y", "Y");
+}
+
+static void init_two_char_letter(char l1, char l2, char u1, char u2,
+ const char *sk = 0)
+{
+ char buf[6];
+ buf[0] = '\\';
+ buf[1] = '(';
+ buf[2] = l1;
+ buf[3] = l2;
+ buf[4] = '\0';
+ const char *p = strsave(buf);
+ buf[2] = u1;
+ buf[3] = u2;
+ store_letter(p, strsave(buf), sk);
+ buf[1] = '[';
+ buf[4] = ']';
+ buf[5] = '\0';
+ p = strsave(buf);
+ buf[2] = l1;
+ buf[3] = l2;
+ store_letter(strsave(buf), p, sk);
+
+}
+
+static void init_special_chars()
+{
+ const char *p;
+ for (p = "':^`~"; *p; p++)
+ for (const char *q = "aeiouy"; *q; q++) {
+ // Use a variable to work around bug in gcc 2.0
+ char c = cmupper(*q);
+ init_two_char_letter(*p, *q, *p, c);
+ }
+ for (p = "/l/o~n,coeaeij"; *p; p += 2) {
+ // Use variables to work around bug in gcc 2.0
+ char c0 = cmupper(p[0]);
+ char c1 = cmupper(p[1]);
+ init_two_char_letter(p[0], p[1], c0, c1);
+ }
+ init_two_char_letter('v', 's', 'v', 'S', "s");
+ init_two_char_letter('v', 'z', 'v', 'Z', "z");
+ init_two_char_letter('o', 'a', 'o', 'A', "a");
+ init_two_char_letter('T', 'p', 'T', 'P', THORN_SORT_KEY);
+ init_two_char_letter('-', 'd', '-', 'D');
+
+ store_token("\\(ss", TOKEN_LOWER, 0, "SS");
+ store_token("\\[ss]", TOKEN_LOWER, 0, "SS");
+
+ store_token("\\(Sd", TOKEN_LOWER, "d", "\\(-D");
+ store_token("\\[Sd]", TOKEN_LOWER, "d", "\\[-D]");
+ store_token("\\(hy", TOKEN_HYPHEN);
+ store_token("\\[hy]", TOKEN_HYPHEN);
+ store_token("\\(en", TOKEN_RANGE_SEP);
+ store_token("\\[en]", TOKEN_RANGE_SEP);
+}
+
+static void init_strings()
+{
+ char buf[6];
+ buf[0] = '\\';
+ buf[1] = '*';
+ for (const char *p = "'`^^,:~v_o./;"; *p; p++) {
+ buf[2] = *p;
+ buf[3] = '\0';
+ store_token(strsave(buf), TOKEN_ACCENT);
+ buf[2] = '[';
+ buf[3] = *p;
+ buf[4] = ']';
+ buf[5] = '\0';
+ store_token(strsave(buf), TOKEN_ACCENT);
+ }
+
+ // -ms special letters
+ store_letter("\\*(th", "\\*(Th", THORN_SORT_KEY);
+ store_letter("\\*[th]", "\\*[Th]", THORN_SORT_KEY);
+ store_letter("\\*(d-", "\\*(D-");
+ store_letter("\\*[d-]", "\\*[D-]");
+ store_letter("\\*(ae", "\\*(Ae", "ae");
+ store_letter("\\*[ae]", "\\*[Ae]", "ae");
+ store_letter("\\*(oe", "\\*(Oe", "oe");
+ store_letter("\\*[oe]", "\\*[Oe]", "oe");
+
+ store_token("\\*3", TOKEN_LOWER, "y", "Y");
+ store_token("\\*8", TOKEN_LOWER, "ss", "SS");
+ store_token("\\*q", TOKEN_LOWER, "o", "O");
+}
+
+struct token_initer {
+ token_initer();
+};
+
+static token_initer the_token_initer;
+
+token_initer::token_initer()
+{
+ init_ascii();
+ init_latin1();
+ init_special_chars();
+ init_strings();
+ default_token_info.set(TOKEN_OTHER);
+}
diff --git a/src/preproc/refer/token.h b/src/preproc/refer/token.h
new file mode 100644
index 00000000..6da430d6
--- /dev/null
+++ b/src/preproc/refer/token.h
@@ -0,0 +1,88 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+enum token_type {
+ TOKEN_OTHER,
+ TOKEN_UPPER,
+ TOKEN_LOWER,
+ TOKEN_ACCENT,
+ TOKEN_PUNCT,
+ TOKEN_HYPHEN,
+ TOKEN_RANGE_SEP
+};
+
+class token_info {
+private:
+ token_type type;
+ const char *sort_key;
+ const char *other_case;
+public:
+ token_info();
+ void set(token_type, const char *sk = 0, const char *oc = 0);
+ void lower_case(const char *start, const char *end, string &result) const;
+ void upper_case(const char *start, const char *end, string &result) const;
+ void sortify(const char *start, const char *end, string &result) const;
+ int sortify_non_empty(const char *start, const char *end) const;
+ int is_upper() const;
+ int is_lower() const;
+ int is_accent() const;
+ int is_other() const;
+ int is_punct() const;
+ int is_hyphen() const;
+ int is_range_sep() const;
+};
+
+inline int token_info::is_upper() const
+{
+ return type == TOKEN_UPPER;
+}
+
+inline int token_info::is_lower() const
+{
+ return type == TOKEN_LOWER;
+}
+
+inline int token_info::is_accent() const
+{
+ return type == TOKEN_ACCENT;
+}
+
+inline int token_info::is_other() const
+{
+ return type == TOKEN_OTHER;
+}
+
+inline int token_info::is_punct() const
+{
+ return type == TOKEN_PUNCT;
+}
+
+inline int token_info::is_hyphen() const
+{
+ return type == TOKEN_HYPHEN;
+}
+
+inline int token_info::is_range_sep() const
+{
+ return type == TOKEN_RANGE_SEP;
+}
+
+int get_token(const char **ptr, const char *end);
+const token_info *lookup_token(const char *start, const char *end);