summaryrefslogtreecommitdiff
path: root/utils/unlit
diff options
context:
space:
mode:
authorSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
committerSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
commit0065d5ab628975892cea1ec7303f968c3338cbe1 (patch)
tree8e2afe0ab48ee33cf95009809d67c9649573ef92 /utils/unlit
parent28a464a75e14cece5db40f2765a29348273ff2d2 (diff)
downloadhaskell-0065d5ab628975892cea1ec7303f968c3338cbe1.tar.gz
Reorganisation of the source tree
Most of the other users of the fptools build system have migrated to Cabal, and with the move to darcs we can now flatten the source tree without losing history, so here goes. The main change is that the ghc/ subdir is gone, and most of what it contained is now at the top level. The build system now makes no pretense at being multi-project, it is just the GHC build system. No doubt this will break many things, and there will be a period of instability while we fix the dependencies. A straightforward build should work, but I haven't yet fixed binary/source distributions. Changes to the Building Guide will follow, too.
Diffstat (limited to 'utils/unlit')
-rw-r--r--utils/unlit/Makefile16
-rw-r--r--utils/unlit/README8
-rw-r--r--utils/unlit/unlit.c401
3 files changed, 425 insertions, 0 deletions
diff --git a/utils/unlit/Makefile b/utils/unlit/Makefile
new file mode 100644
index 0000000000..15e7fc4252
--- /dev/null
+++ b/utils/unlit/Makefile
@@ -0,0 +1,16 @@
+TOP=../..
+include $(TOP)/mk/boilerplate.mk
+
+C_SRCS=unlit.c
+C_PROG=unlit
+SRC_CC_OPTS += -O
+
+# Get it over with!
+boot :: all
+
+#
+# Install unlit in lib/
+#
+INSTALL_LIBEXECS += $(C_PROG)
+
+include $(TOP)/mk/target.mk
diff --git a/utils/unlit/README b/utils/unlit/README
new file mode 100644
index 0000000000..4dd2ef5132
--- /dev/null
+++ b/utils/unlit/README
@@ -0,0 +1,8 @@
+This "unlit" program, used by the GHC driver, is originally by Mark
+Jones (then at Oxford). It is taken in its present form *directly*
+from the LML/HBC distribution (from Chalmers).
+
+We are grateful for this piece of shared code.
+
+For more "powerful" swizzling of literate scripts, please see the
+"literate" stuff from Glasgow.
diff --git a/utils/unlit/unlit.c b/utils/unlit/unlit.c
new file mode 100644
index 0000000000..366302156a
--- /dev/null
+++ b/utils/unlit/unlit.c
@@ -0,0 +1,401 @@
+/* unlit.c Wed Dec 5 17:16:24 GMT 1990
+ *
+ * Literate script filter. In contrast with the format used by most
+ * programming languages, a literate script is a program in which
+ * comments are given the leading role, whilst program text must be
+ * explicitly flagged as such by placing a `>' character in the first
+ * column on each line. It is hoped that this style of programming will
+ * encourage the writing of accurate and clearly documented programs
+ * in which the writer may include motivating arguments, examples
+ * and explanations.
+ *
+ * Unlit is a filter that can be used to strip all of the comment lines
+ * out of a literate script file. The command format for unlit is:
+ * unlit [-n] [-q] ifile ofile
+ * where ifile and ofile are the names of the input (literate script) and
+ * output (raw program) files respectively. Either of these names may
+ * be `-' representing the standard input or the standard output resp.
+ * A number of rules are used in an attempt to guard against the most
+ * common errors that are made when writing literate scripts:
+ * 1) Empty script files are not permitted. A file in which no lines
+ * begin with `>' usually indicates a file in which the programmer
+ * has forgotten about the literate script convention.
+ * 2) A line containing part of program definition (i.e. preceeded by `>')
+ * cannot be used immediately before or after a comment line unless
+ * the comment line is blank. This error usually indicates that
+ * the `>' character has been omitted from a line in a section of
+ * program spread over a number of lines.
+ * Using the -q (quiet) flag suppresses the signalling of these error
+ * conditions. The default behaviour can be selected explicitly using
+ * the -n (noisy) option so that any potential errors in the script file
+ * are reported.
+ *
+ * The original idea for the use of literate scripts is due to Richard
+ * Bird of the programming Research Group, Oxford and was initially
+ * adopted for use in the implementation of the functional programming
+ * language Orwell used for teaching in Oxford. This idea has subsequently
+ * been borrowed in a number of other language implementations.
+ *
+ * Modified to understand \begin{code} ... \end{code} used in Glasgow. -- LA
+ * And \begin{pseudocode} ... \end{pseudocode}. -- LA
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#define NULLSTR ((char *)0)
+#define DEFNCHAR '>'
+#define MISSINGBLANK "unlit: Program line next to comment"
+#define EMPTYSCRIPT "unlit: No definitions in file (perhaps you forgot the '>'s?)"
+#define USAGE "usage: unlit [-q] [-n] [-c] [-#] [-P] [-h label] file1 file2\n"
+#define CANNOTOPEN "unlit: cannot open \"%s\"\n"
+#define CANNOTWRITE "unlit: error writing \"%s\"\n"
+#define CANNOTWRITESTDOUT "unlit: error writing standard output\n"
+#define DISTINCTNAMES "unlit: input and output filenames must differ\n"
+#define MISSINGENDCODE "unlit: missing \\end{code}\n"
+
+#define BEGINCODE "\\begin{code}"
+#define LENBEGINCODE 12
+#define ENDCODE "\\end{code}"
+#define LENENDCODE 10
+#ifdef PSEUDOCODE
+/* According to Will Partain, the inventor of pseudocode, this gone now. */
+#define MISSINGENDPSEUDOCODE "unlit: missing \\end{pseudocode}\n"
+#define BEGINPSEUDOCODE "\\begin{pseudocode}"
+#define LENBEGINPSEUDOCODE 18
+#define ENDPSEUDOCODE "\\end{pseudocode}"
+#define LENENDPSEUDOCODE 16
+#endif
+
+typedef enum { START, BLANK, TEXT, DEFN, BEGIN, /*PSEUDO,*/ END, HASH, SHEBANG } line;
+#define isWhitespace(c) (c==' ' || c=='\t' || c=='\r')
+#define isLineTerm(c) (c=='\n' || c==EOF)
+
+static int noisy = 1; /* 0 => keep quiet about errors, 1 => report errors */
+static int errors = 0; /* count the number of errors reported */
+static int crunchnl = 0; /* don't print \n for removed lines */
+static int leavecpp = 1; /* leave preprocessor lines */
+static int ignore_shebang = 1; /* Leave out shebang (#!) lines */
+static int no_line_pragma = 0; /* Leave out initial line pragma */
+
+static char* prefix_str = NULL; /* Prefix output with a string */
+
+static char *ofilename = NULL;
+
+/* complain(file,line,what)
+ *
+ * print error message `what' for `file' at `line'. The error is suppressed
+ * if noisy is not set.
+ */
+
+complain(file, lin, what)
+char *file;
+char *what;
+int lin; {
+ if (noisy) {
+ if (file)
+ fprintf(stderr, "%s ", file);
+ fprintf(stderr,"line %d: %s\n",lin,what);
+ errors++;
+ }
+}
+
+writeerror()
+{
+ if (!strcmp(ofilename,"-")) {
+ fprintf(stderr, CANNOTWRITESTDOUT);
+ } else {
+ fprintf(stderr, CANNOTWRITE, ofilename);
+ }
+ exit(1);
+}
+
+myputc(c, ostream)
+char c;
+FILE *ostream; {
+ if (putc(c,ostream) == EOF) {
+ writeerror();
+ }
+}
+
+#define TABPOS 8
+
+/* As getc, but does TAB expansion */
+int
+egetc(istream)
+FILE *istream;
+{
+ static int spleft = 0;
+ static int linepos = 0;
+ int c;
+
+ if (spleft > 0) {
+ spleft--;
+ linepos++;
+ return ' ';
+ }
+ c = getc(istream);
+ if (c == EOF)
+ return c;
+ else if (c == '\n' || c == '\f') {
+ linepos = 0;
+ return c;
+ } else if (c == '\t') {
+ spleft = TABPOS - linepos % TABPOS;
+ spleft--;
+ linepos++;
+ return ' ';
+ } else {
+ linepos++;
+ return c;
+ }
+
+}
+
+/* readline(istream, ostream)
+ *
+ * Read a line from the input stream `istream', and return a value
+ * indicating whether that line was:
+ * BLANK (whitespace only),
+ * DEFN (first character is DEFNCHAR),
+ * TEXT (a line of text)
+ * BEGIN (a \begin{code} line)
+ * PSEUDO (a \begin{pseodocode} line)
+ * HASH (a preprocessor line)
+ * or END (indicating an EOF).
+ * Lines of type DEFN are copied to the output stream `ostream'
+ * (without the leading DEFNCHAR). BLANK and TEXT lines are
+ * replaced by empty (i.e. blank lines) in the output stream, so
+ * that error messages refering to line numbers in the output file
+ * can also be used to locate the corresponding line in the input
+ * stream.
+ */
+
+line readline(istream,ostream)
+FILE *istream, *ostream; {
+ int c, c1;
+ char buf[100];
+ int i;
+
+ c = egetc(istream);
+
+ if (c==EOF)
+ return END;
+
+ if ( c == '#' ) {
+ if ( ignore_shebang ) {
+ c1 = egetc(istream);
+ if ( c1 == '!' ) {
+ while (c=egetc(istream), !isLineTerm(c)) ;
+ return SHEBANG;
+ }
+ myputc(c, ostream);
+ c=c1;
+ }
+ if ( leavecpp ) {
+ myputc(c, ostream);
+ while (c=egetc(istream), !isLineTerm(c))
+ myputc(c,ostream);
+ myputc('\n',ostream);
+ return HASH;
+ }
+ }
+
+ if (c==DEFNCHAR) {
+/* myputc(' ',ostream);*/
+ while (c=egetc(istream), !isLineTerm(c))
+ myputc(c,ostream);
+ myputc('\n',ostream);
+ return DEFN;
+ }
+
+ if (!crunchnl)
+ myputc('\n',ostream);
+
+ while (isWhitespace(c))
+ c=egetc(istream);
+ if (isLineTerm(c))
+ return BLANK;
+
+ i = 0;
+ buf[i++] = c;
+ while (c=egetc(istream), !isLineTerm(c))
+ if (i < sizeof buf - 1)
+ buf[i++] = c;
+ while(i > 0 && isspace(buf[i-1]))
+ i--;
+ buf[i] = 0;
+ if (strcmp(buf, BEGINCODE) == 0)
+ return BEGIN;
+#ifdef PSEUDOCODE
+ else if (strcmp(buf, BEGINPSEUDOCODE) == 0)
+ return PSEUDO;
+#endif
+ else
+ return TEXT;
+}
+
+
+/* unlit(file,istream,ostream)
+ *
+ * Copy the file named `file', accessed using the input stream `istream'
+ * to the output stream `ostream', removing any comments and checking
+ * for bad use of literate script features:
+ * - there should be at least one BLANK line between a DEFN and TEXT
+ * - there should be at least one DEFN line in a script.
+ */
+
+unlit(file, istream, ostream)
+char *file;
+FILE *istream;
+FILE *ostream; {
+ line last, this=START;
+ int linesread=0;
+ int defnsread=0;
+
+ do {
+ last = this;
+ this = readline(istream, ostream);
+ linesread++;
+ if (this==DEFN)
+ defnsread++;
+ if (last==DEFN && this==TEXT)
+ complain(file, linesread-1, MISSINGBLANK);
+ if (last==TEXT && this==DEFN)
+ complain(file, linesread, MISSINGBLANK);
+ if (this == BEGIN) {
+ /* start of code, copy to end */
+ char lineb[1000];
+ for(;;) {
+ if (fgets(lineb, sizeof lineb, istream) == NULL) {
+ complain(file, linesread, MISSINGENDCODE);
+ exit(1);
+ }
+ linesread++;
+ if (strncmp(lineb,ENDCODE,LENENDCODE) == 0) {
+ myputc('\n', ostream);
+ break;
+ }
+ fputs(lineb, ostream);
+ }
+ defnsread++;
+ }
+#ifdef PSEUDOCODE
+ if (this == PSEUDO) {
+ char lineb[1000];
+ for(;;) {
+ if (fgets(lineb, sizeof lineb, istream) == NULL) {
+ complain(file, linesread, MISSINGENDPSEUDOCODE);
+ exit(1);
+ }
+ linesread++;
+ myputc('\n', ostream);
+ if (strncmp(lineb,ENDPSEUDOCODE,LENENDPSEUDOCODE) == 0) {
+ break;
+ }
+ }
+ }
+#endif
+ } while(this!=END);
+
+ if (defnsread==0)
+ complain(file,linesread,EMPTYSCRIPT);
+}
+
+/* main(argc, argv)
+ *
+ * Main program. Processes command line arguments, looking for leading:
+ * -q quiet mode - do not complain about bad literate script files
+ * -n noisy mode - complain about bad literate script files.
+ * -r remove cpp droppings in output.
+ * -P don't output any CPP line pragmas.
+ * Expects two additional arguments, a file name for the input and a file
+ * name for the output file. These two names must normally be distinct.
+ * An exception is made for the special name "-" which can be used in either
+ * position to specify the standard input or the standard output respectively.
+ */
+
+main(argc,argv)
+int argc;
+char **argv; {
+ FILE *istream, *ostream;
+ char *file;
+
+ for (argc--, argv++; argc > 0; argc--, argv++)
+ if (strcmp(*argv,"-n")==0)
+ noisy = 1;
+ else if (strcmp(*argv,"-q")==0)
+ noisy = 0;
+ else if (strcmp(*argv,"-c")==0)
+ crunchnl = 1;
+ else if (strcmp(*argv,"-P")==0)
+ no_line_pragma = 1;
+ else if (strcmp(*argv,"-h")==0) {
+ if (argc > 1) {
+ argc--; argv++;
+ if (prefix_str)
+ free(prefix_str);
+ prefix_str = (char*)malloc(sizeof(char)*(1+strlen(*argv)));
+ if (prefix_str)
+ strcpy(prefix_str, *argv);
+ }
+ } else if (strcmp(*argv,"-#")==0)
+ ignore_shebang = 0;
+ else
+ break;
+
+ if (argc!=2) {
+ fprintf(stderr, USAGE);
+ exit(1);
+ }
+
+ if (strcmp(argv[0],argv[1])==0 && strcmp(argv[0],"-")!=0) {
+ fprintf(stderr, DISTINCTNAMES);
+ exit(1);
+ }
+
+ file = argv[0];
+ if (strcmp(argv[0], "-")==0) {
+ istream = stdin;
+ file = "stdin";
+ }
+ else
+ if ((istream=fopen(argv[0], "r")) == NULL) {
+ fprintf(stderr, CANNOTOPEN, argv[0]);
+ exit(1);
+ }
+
+ ofilename=argv[1];
+ if (strcmp(argv[1], "-")==0)
+ ostream = stdout;
+ else
+ if ((ostream=fopen(argv[1], "w")) == NULL) {
+ fprintf(stderr, CANNOTOPEN, argv[1]);
+ exit(1);
+ }
+
+ /* Prefix the output with line pragmas */
+ if (!no_line_pragma && prefix_str) {
+ /* Both GHC and CPP understand the #line pragma.
+ * We used to throw in both a #line and a {-# LINE #-} pragma
+ * here, but CPP doesn't understand {-# LINE #-} so it thought
+ * the line numbers were off by one. We could put the {-# LINE
+ * #-} before the #line, but there's no point since GHC
+ * understands #line anyhow. --SDM 8/2003
+ */
+ fprintf(ostream, "#line 1 \"%s\"\n", prefix_str);
+ }
+
+ unlit(file, istream, ostream);
+
+ if (istream != stdin) fclose(istream);
+ if (ostream != stdout) {
+ if (fclose(ostream) == EOF) {
+ writeerror();
+ }
+ }
+
+ exit(errors==0 ? 0 : 1);
+}