diff options
author | rhlabs <rhlabs> | 1998-02-23 19:01:36 +0000 |
---|---|---|
committer | rhlabs <rhlabs> | 1998-02-23 19:01:36 +0000 |
commit | 98d20420baa8233fb86bdb8d7d787bf55ee5fc5e (patch) | |
tree | 899284166dc64a07dc3fdcd8afd604d0199da904 /src | |
parent | 9aaae47386589dab4bb6e6dd581207c325437f3c (diff) | |
download | yelp-98d20420baa8233fb86bdb8d7d787bf55ee5fc5e.tar.gz |
GNU info to html converter
Dr Mike <msf@redhat.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/info2html/Makefile.am | 19 | ||||
-rw-r--r-- | src/info2html/data.h | 62 | ||||
-rw-r--r-- | src/info2html/html.c | 835 | ||||
-rw-r--r-- | src/info2html/html.h | 22 | ||||
-rw-r--r-- | src/info2html/main.c | 108 | ||||
-rw-r--r-- | src/info2html/parse.c | 513 | ||||
-rw-r--r-- | src/info2html/parse.h | 25 | ||||
-rw-r--r-- | src/info2html/utils.c | 273 | ||||
-rw-r--r-- | src/info2html/utils.h | 17 | ||||
-rw-r--r-- | src/info2html/version.h | 6 |
10 files changed, 1880 insertions, 0 deletions
diff --git a/src/info2html/Makefile.am b/src/info2html/Makefile.am new file mode 100644 index 00000000..d6ecd854 --- /dev/null +++ b/src/info2html/Makefile.am @@ -0,0 +1,19 @@ +## Process this file with automake to produce Makefile.in. + +INCLUDES = \ + -I$(includedir) \ + -I$(top_srcdir)/intl + + +LDADD = \ + $(GNOME_LIBDIR) \ + $(GNOMEUI_LIBS) \ + $(INTLLIBS) + +bin_PROGRAMS = gnome-info2html + +gnome_info2html_SOURCES = main.c parse.c utils.c html.c + +CFLAGS += -Wall + + diff --git a/src/info2html/data.h b/src/info2html/data.h new file mode 100644 index 00000000..581e4ca5 --- /dev/null +++ b/src/info2html/data.h @@ -0,0 +1,62 @@ +#ifndef DATA_H +#define DATA_H + +/* data.h - first cut at data structures for info2html filter */ +/* many of these are motivated by the source code to the 'info' program */ + +/* be quiet or not? */ +static int be_quiet=1; + +/* line_number we're on */ +static int work_line_number; + +/* file we're working on */ +char work_filename[1024]; + +/* node we're working on */ +char work_node[1024]; + +/* some type's we'll want to use below */ +typedef struct info_menu_entry MENU_ENTRY; + +/* the basic component of an info file is a Node */ +/* a node is described by (FILENAME)NODENAME */ +/* .next and .prev are normally along the same branch as current node */ +/* .up is normally 'one branch' up the tree above current branch. */ +/* All can be arbitrary links however */ +/* menu entry is just a linked list of references */ + +typedef struct { + char *filename; /* file in which this node exists */ + char *nodename; /* name of this node */ + char *contents; /* text within this node */ + int contlen; /* length of contents */ + char *next; /* node which follows this one */ + char *prev; /* node previous to this one */ + char *up; /* node above this one */ + MENU_ENTRY *menu; /* linked list of refs from this node */ + char *menu_start; /* ptr to start of menu text in contents */ +} NODE; + +/* a reference is a link to a node */ +typedef struct { + char *refname; /* menu name for reference */ + NODE *node; /* descriptor of node we point at */ +} REFERENCE; + + +struct info_menu_entry{ + char *header; /* header to go before menu */ + REFERENCE *ref; + struct info_menu_entry *next; +}; + +#define INFO_FF '\014' +#define INFO_COOKIE '\037' + + +#define MENU_START "* Menu:" +#define MENU_ENTRY "* " + + +#endif /* DATA_H */ diff --git a/src/info2html/html.c b/src/info2html/html.c new file mode 100644 index 00000000..6e12b8ff --- /dev/null +++ b/src/info2html/html.c @@ -0,0 +1,835 @@ +/* handles all html operations */ + +#include <stdio.h> +#include <unistd.h> +#include <ctype.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> + +#include <glib.h> + +#include "data.h" +#include "html.h" +#include "parse.h" +#include "utils.h" + +#define USE_FILE_URLS + +/* print out the url for a info file */ +char *form_info_tag_href( char *nodefile, char *nodename ) +{ + char tmp[1024]; + char *escaped_nodename; + + escaped_nodename = escape_html_chars( nodename ); + +#if 0 +#ifdef USE_FILE_URLS + snprintf(tmp,sizeof(tmp), + "HREF=\"../%s/%s.html\"", nodefile, escaped_nodename ); +#else + snprintf(tmp,sizeof(tmp), + "HREF=\"/cgi-bin/grab-info-file?doctype=info&" + "docname=%s&doctag=%s\"", nodefile, escaped_nodename ); +#endif +#endif +/* snprintf(tmp,sizeof(tmp),"HREF=\"info:%s#%s\"", nodefile, escaped_nodename ); */ + + snprintf(tmp,sizeof(tmp),"HREF=\"info:%s#%s\"", nodefile, escaped_nodename ); + + if (escaped_nodename) + g_free(escaped_nodename); + return g_strdup(tmp); +} + + +/* returns zero if success making link from destfile -> index.html in */ +/* specified directory. If it already exists, just returns with success */ +int make_Top_link( char *destdir, char *destfile ) +{ + struct stat filestat; + char *indexlink; + + indexlink = (char *) g_malloc( strlen(destdir) + 20); + strcpy(indexlink, destdir); + strcat(indexlink, "/index.html"); + + if (lstat(indexlink, &filestat)) + { + if (errno == ENOENT) + { + if (symlink(destfile, indexlink)) + { + fprintf(stderr,"Error creating link to %s\n",indexlink); + perror("Error was"); + exit(1); + } + } + else + { + fprintf(stderr,"Error stat'ing file %s\n",indexlink); + perror("Error was"); + exit(1); + } + } + else if (!S_ISLNK(filestat.st_mode)) + { + fprintf(stderr, "file %s exists and isnt a link\n",indexlink); + fprintf(stderr, "FIX ME!!!\n"); + g_free(indexlink); + return -1; + } + return 0; +} + +/* return non-zero if error with making directory */ +int make_info_dir( char *destdir ) +{ + struct stat filestat; + + if (stat(destdir, &filestat)) + { + if (errno == ENOENT) + { + if (mkdir(destdir, 01777)) + { + fprintf(stderr,"Error creating directory %s\n",destdir); + perror("Error was"); + exit(1); + } + } + else + { + fprintf(stderr,"Error stat'ing directory %s\n",destdir); + perror("Error was"); + exit(1); + } + } + else if (!S_ISDIR(filestat.st_mode)) + { + fprintf(stderr, "Info dir %s exists and isnt a directory!\n",destdir); + fprintf(stderr, "FIX ME!!!\n"); + return -1; + } + return 0; +} + +/* write a link to another document */ +void write_node_link_html( FILE *f, char *nodefile, char *refname, char *ref ) +{ + char *converted_nodename; + char *href; + + if (ref) + { + if (strcasecmp(ref, "(dir)")) + { + converted_nodename = g_strdup( ref ); + map_spaces_to_underscores( converted_nodename ); + href = form_info_tag_href(nodefile, converted_nodename); + fprintf(f,"<A %s>%s%s</A>\n", href, refname, ref); + g_free(href); +#if 0 + fprintf(f,"<A HREF=\"../%s/%s.html\">%s%s</A>\n", + nodefile, converted_nodename, refname, ref); +#endif + g_free(converted_nodename); + } + else + { +#if 0 + fprintf(f,"<A HREF=\"../dir/Top.html\">%s(dir)</A>\n",refname); +#endif + href = form_info_tag_href("dir", "Top"); + fprintf(f,"<A %s>%s(dir)</A>\n", href, refname); + g_free(href); + + } + } +} + +/* write out top of a new html file */ +void write_html_header( FILE *f, char *filename, char *nodename) +{ + fprintf(f,"<!DOCTYPE HTML PUBLIC \"-//W3C/DTD HTML 3.2//EN\">\n"); + fprintf(f,"<HTML>\n"); + fprintf(f,"<HEAD>\n"); + fprintf(f,"<TITLE>Info Node: (%s)%s</TITLE>\n",filename,nodename); + fprintf(f,"<META NAME=\"GENERATOR\" CONTENT=\"info2html\">\n"); + fprintf(f,"</HEAD>\n"); + fprintf(f,"<!-- conversion of file \"%s\", node \"%s\" -->\n",work_filename, work_node); +} + +/* start of everything after html header */ +void start_html_content( FILE *f ) +{ + fprintf(f,"<BODY>\n"); +} + +/* we want to put links to next, prev, and up nodes */ +void make_nav_links( FILE *f, NODE *node ) +{ + fprintf(f,"<PRE>\n"); + write_node_link_html( f, node->filename, "Next:", node->next ); + write_node_link_html( f, node->filename, "Prev:", node->prev ); + write_node_link_html( f, node->filename, "Up:", node->up ); + fprintf(f,"</PRE>\n"); +} + +/* s is error message */ +/* p is start of offending line */ +/* q is end of offending line */ +void html_error( char *s, char *p, char *q ) +{ + fprintf(stderr, "%s:%s\n",work_filename, work_node); + fprintf(stderr, "\t%s\n",s); + fprintf(stderr, "\tOffending line is:\n\t|"); + fwrite(p, 1, q-p, stderr); + fprintf(stderr, "|\n"); +} + +/******************************************************************** + * here is what we expect in contents of a node: + * + * headers: These are identified as a line of text + * followed by a row of '---' or '###' normally. + * These get mapped to <H2> </H2> for now. + * + * body text: Format this between <PRE> </PRE> statements. + * Catch any *Note and *note and make into + * links to other documents. Also try to catch + * URLs as well. + * + * menus: Starts with a '* Menu' line. Goes until the + * end of the node, or until the next line which + * starts with something other than a '* ' or '\n'. + * + * end of node: The INFO_FF and INFO_COOKIE mark the end of a node. + * Hitting EOF also marks the end of a node. + ********************************************************************/ + +void dump_html_for_node( NODE *node ) +{ + char *destdir; + char *destfile; + char *escaped_nodename; + char *converted_nodename; + char *contents_start, *contents_end; + char *header_name; + char *p, *q, *r, *skippnt; + char *end_menu_entry; + + + int menu_open, body_open; + + int seen_menu; + + int prev_was_blank, next_is_blank, current_is_blank; + + int seen_first_header; + + int last_output_was_header; + + int nskip; + + int we_are_in_dir_node; + + int i; + + FILE *f; + +/* msf - used to write each node to a separate file - now we're going */ +/* to just output HTML to stdout. */ +/* Each node will just be concantentated to previous */ +#if 0 + destdir = (char *) g_malloc ( strlen(node->filename) + + strlen(HTML_ROOT) + + strlen(node->filename) + 2); + strcpy(destdir, HTML_ROOT); + strcat(destdir, "/"); + strcat(destdir, node->filename); + strcat(destdir, "/"); + + /* check that the dir for info file exists */ + make_info_dir( destdir ); + + /* ok, we made the dir, lets go */ + destfile = (char *) g_malloc( strlen(destdir) + strlen(node->nodename) + 10); + strcpy(destfile, destdir); + converted_nodename = g_strdup( node->nodename ); + map_spaces_to_underscores( converted_nodename ); + strcat(destfile, converted_nodename); + strcat(destfile, ".html"); + g_free(converted_nodename); + + if (!(f=fopen(destfile, "w"))) + { + fprintf(f,"Couldnt create node html file %s\n",destfile); + perror("Error was"); + exit(1); + } +#endif + + f = stdout; + + /* see if this is THE dir node */ + we_are_in_dir_node = !strcmp("Top", node->nodename) && !strcmp("dir", node->filename); + +#if 0 + /* try and make a link between 'index.html' and 'Top.html' */ + if (!strcmp("Top", node->nodename)) + make_Top_link( destdir, destfile ); +#endif + + /* do the html header first */ + write_html_header( f, node->filename, node->nodename ); + + /* now for the body */ + start_html_content( f ); + + /* make an anchor */ + escaped_nodename = escape_html_chars( node->nodename ); + map_spaces_to_underscores( escaped_nodename ); + fprintf(f, "<A name=\"%s\">\n",escaped_nodename); + g_free(escaped_nodename); + + /* links to other immediate nodes */ + make_nav_links( f, node ); + + /* setup pointers to textual content of current node */ + contents_start = node->contents; + contents_end = node->contents+node->contlen; + + /* scan through all of contents and generate html on the fly */ + /* p points at start of current line */ + /* q points at the end of current line (at '\n' actually) */ + /* r points at the start of next line */ + /* we do this to catch headers */ + /* scan for a header at the top of the contents */ + /* if we see a '\n***'3 '*' in a row i */ + /* then take previous line as a header */ + header_name = NULL; + p = contents_start = node->contents; + q = memchr(p, '\n', contents_end - p); + r=q+1; + + /* we have several states we could be in */ + next_is_blank = 0; + prev_was_blank = 0; + current_is_blank = 0; + seen_first_header = 0; + + seen_menu = 0; + menu_open = 0; + body_open = 0; + + last_output_was_header = 0; + for (; q && r <= contents_end; ) + { + nskip = 1; + skippnt = NULL; + next_is_blank = (*r == '\n'); + current_is_blank = (*p == '\n'); + + /* test some easy things first */ + if (!strncmp(p, MENU_START, strlen(MENU_START))) + { + if (we_are_in_dir_node && !seen_menu) + { + if (body_open) + { + close_body_text_html(f); + body_open = 0; + } + + fprintf(f,"<H1> Main Info File Directory </H1>\n"); + + open_body_text_html(f); + body_open = 1; + + fprintf(f,"This is the main directory of available info files.\n"); + } + + if (body_open) + { + close_body_text_html(f); + body_open = 0; + } + else if (seen_menu) + html_error("Warning:saw new menu start and already in menu!", p, q); + + if (menu_open) + close_menu_html( f ); + + if (last_output_was_header) + open_menu_html( f, "" ); + else + open_menu_html( f, "Contents" ); + + seen_menu = 1; + menu_open = 1; + last_output_was_header = 0; + } + else if (we_are_in_dir_node && !seen_menu) + { + /* do nothing */ + } + else if (seen_menu) + { + /* if current line is blank ignore it */ + if (current_is_blank) + { + /* do nothing */ + } + /* first see if its a menu line */ + else if (!strncmp(p, MENU_ENTRY, strlen(MENU_ENTRY))) + { + if (!seen_menu) + html_error("Have seen menu start and hit a menu line!", p, q); + else + { + if (body_open) + { + if (menu_open) + html_error("Hit a menu line, and body and menu are opened!", p, q); + close_body_text_html( f ); + body_open = 0; + open_menu_html( f, "" ); + menu_open = 1; + } + if (!menu_open) + { + open_menu_html( f, "" ); + menu_open = 1; + } + write_menu_entry_html( f, p, node->filename, &end_menu_entry ); + if (end_menu_entry != NULL) + skippnt = end_menu_entry; + last_output_was_header = 0; + } + } + /* maybe its a header line */ + /* man this is getting ridiculous, its like teaching a child */ + /* to read! */ + else if (is_a_hdr_line(r) || + (*p != '*' && *r == '*' && *(r+1) == ' ') || + (*p != '*' && seen_menu && (*p != ' ' && *(p+1) != ' ') && + !current_is_blank && prev_was_blank && next_is_blank)) + { + header_name = (char *) g_malloc( q-p+2 ); + memcpy(header_name, p, q-p); + *(header_name + (q - p) ) = '\000'; + + /* if we were writing a particular component, close it */ + if (menu_open) + { + close_menu_html( f ); + menu_open = 0; + } + + if (body_open) + { + close_body_text_html( f ); + body_open = 0; + } + + if (seen_first_header) + write_header_html( f, header_name, HEADER_SIZE_2 ); + else + { + seen_first_header = 1; + write_header_html( f, header_name, HEADER_SIZE_1 ); + } + + g_free(header_name); + + /* jump ahead another line */ + if (!(*r == '*' && *(r+1) == ' ') && !next_is_blank) + nskip++; + + last_output_was_header = 1; + } + /* well, has to be body text then */ + else + { + if (menu_open) + { + close_menu_html( f ); + menu_open = 0; + + write_html_horiz_rule ( f ); + } + + if (!body_open) + { + open_body_text_html( f ); + body_open = 1; + } + + if (*p != '\n' && !last_output_was_header) + { + skippnt=write_body_text_html( f, p, q, node->filename ); + last_output_was_header = 0; + } + } + } + /* otherwise, no menu seen so things are easier */ + else + { + if (is_a_hdr_line(r)) + { + header_name = (char *) g_malloc( q-p+2 ); + memcpy(header_name, p, q-p); + *(header_name + (q - p) ) = '\000'; + + /* if we were writing a particular component, close it */ + if (body_open) + { + close_body_text_html( f ); + body_open = 0; + } + + if (seen_first_header) + write_header_html( f, header_name, HEADER_SIZE_2 ); + else + { + seen_first_header = 1; + write_header_html( f, header_name, HEADER_SIZE_1 ); + } + + g_free(header_name); + + /* jump ahead another line */ + if (!(*r == '*' && *(r+1) == ' ') && !next_is_blank) + nskip++; + + last_output_was_header = 1; + } + /* well, has to be body text then */ + else + { + if (!body_open) + { + open_body_text_html( f ); + body_open = 1; + } + + if (!(*p == '\n' && last_output_was_header)) + { + skippnt=write_body_text_html( f, p, q, node->filename ); + last_output_was_header = 0; + } + } + } + + /* end of cases, move to next line in contents */ + prev_was_blank = (*p == '\n'); + if (skippnt) + { + p = skippnt; + q = memchr(p, '\n', contents_end - p); + r = q+1; + skippnt = NULL; + } + else + for (i=0; i< nskip; i++) + { + p = r; + q = memchr(p, '\n', contents_end - p); + r = q+1; + } + } + + /* thats all folks */ + if (menu_open) + close_menu_html( f ); + else if (body_open) + close_body_text_html( f ); + + fprintf(f,"</BODY>\n</HTML>\n"); + + /* clean up */ +#if 0 + g_free(destdir); + g_free(destfile); +#endif +} + + +void write_header_html( FILE *f, char *p, char *hdr ) +{ + fprintf(f,"<%s> %s </%s>\n",hdr,p,hdr); +} + + +void open_body_text_html( FILE *f ) +{ + fprintf(f, "<PRE>\n"); +} + +void close_body_text_html( FILE *f ) +{ + fprintf(f, "</PRE>\n"); +} + +/* we have to handle '*note' and '*Note' links in body text */ +/* p is ptr to start of current line */ +/* q is ptr to '\n' at end of current line */ +char *write_body_text_html( FILE *f, char *p, char *q, char *nodefile ) +{ + int curlen; + int ref_exists; + char *tmp; + char *ptr; + char *match1; + char *note_ptr; + char *converted_nodename; + char *escaped_refname; + char *escaped_refnode; + char *refname, *reffile, *refnode, *end; + char *href; + + curlen = q - p; + tmp = (char *) g_malloc( curlen + 1 ); + memcpy( tmp, p, curlen ); + *(tmp+curlen) = '\000'; + + /* see if there is a reference in current line */ + /* and make sure this isnt a '*Note*' instead ! */ + ref_exists = 0; + if ((note_ptr=strstr(tmp, "*Note")) || (note_ptr=strstr(tmp, "*note"))) + if (*(note_ptr+6) != '*') + ref_exists = 1; + + if (ref_exists) + { + /* find the start of the link */ + note_ptr = (note_ptr - tmp) + p; + match1 = note_ptr + 4; + + /* not needed any more */ + g_free(tmp); + + for (; 1; ) + if (*(match1+1) == ' ' || *(match1+1) == '\n') + match1++; + else + break; + + /* find end of the link */ + if (parse_note_ref( match1, &refname, &reffile, &refnode, &end, 1)) + { + html_error( "Corrupt *Note link found!", p, q ); + return NULL; + } + + /* now we assume that parse_note_ref left control chars in ref* */ + /* if both null, we had a '::' and have to set both */ + if (reffile == NULL && refnode == NULL) + { + reffile = g_strdup(nodefile); + refnode = g_strdup(refname); + } + /* otherwise we had (file)., and we set node to 'Top' */ + else if (refnode == NULL) + refnode = g_strdup("Top"); + /* otherwise we had :nodename., and we set node to 'Top' */ + else if (reffile == NULL) + reffile = g_strdup(nodefile); + + /* write out stuff up to Note */ + fwrite(p, 1, note_ptr - p, f); + fprintf(f, "<STRONG>"); + fwrite(note_ptr, 1, match1 - note_ptr, f); + fprintf(f, " </STRONG>"); + + /* we need a nice nodename -> filename translation */ + /* so we convert newlines to spaces */ + converted_nodename = g_strdup( refnode ); + convert_newlines_to_spaces( converted_nodename ); + + /* we don't want two spaces in a row */ + strip_dupspaces( converted_nodename ); + map_spaces_to_underscores( converted_nodename ); + + /* escape HTML chars */ + escaped_refname = escape_html_chars( refname ); + escaped_refnode = escape_html_chars( refnode ); + + /* now output the link to html doc */ +#if 0 + fprintf(f,"<A HREF=\"../%s/%s.html\">", reffile, converted_nodename); +#endif + href = form_info_tag_href(reffile, converted_nodename); + fprintf(f,"<A %s>", href); + for (ptr=escaped_refname; *ptr; ptr++) + if (*ptr == '\n') + { + fprintf(f,"</A>\n"); + fprintf(f,"<A %s>", href); + } + else + fprintf(f,"%c", *ptr); + + if (strcmp(refname, refnode)) + { + fprintf(f,": "); + for (ptr=escaped_refnode; *ptr; ptr++) + if (*ptr == '\n') + { + fprintf(f,"</A>\n"); + fprintf(f,"<A %s>", href); + } + else + fprintf(f,"%c", *ptr); + + fprintf(f,"</A>"); + if (end > q && !(strchr(refnode, '\n'))) + fprintf(f,"\n"); + } + else + fprintf(f,"</A>"); + + if (href) + g_free(href); + if (escaped_refnode) + g_free(escaped_refnode); + if (escaped_refname) + g_free(escaped_refname); + if (converted_nodename) + g_free(converted_nodename); + + g_free(refname); + g_free(reffile); + g_free(refnode); + + /* write out stuff at end */ + if (end < q) + { + fwrite(end+1, 1, q - end, f); + return NULL; + } + else + return end+1; + } + else + { + fwrite(p, 1, q-p+1, f); + /* not needed any more */ + g_free(tmp); + return NULL; + } +} + +void open_menu_html( FILE *f, char *p ) +{ + if (*p != '\000') + fprintf(f, "<H2>%s</H2>\n",p); + /* fprintf(f, "<UL>\n"); */ + fprintf(f, "<dl>\n"); +} + +void close_menu_html( FILE *f ) +{ + /* fprintf(f, "</UL>\n"); */ + fprintf(f, "</dl>\n"); +} + +/* writes menu entry contained in string p */ +/* nodename and nodefile apply to the node which menu entry is in */ +void write_menu_entry_html( FILE *f, char *p, char *nodefile, char **menu_end ) +{ + char *refname; + char *reffile; + char *refnode; + char *end; + char *realend; + char *converted_nodename; + char *escaped_refnode; + char *escaped_refname; + char *href; + + int i, done; + + /* skip over the '* ' at the start of the line */ + if (parse_menu_line( p+2, &refname, &reffile, &refnode, &end, 0 )) + { + html_error("Error parsing menu", p, memchr(p, '\n', 80)); + return; + } + + /* if both null, we had a '::' and have to set both */ + if (reffile == NULL && refnode == NULL) + { + reffile = g_strdup(nodefile); + refnode = g_strdup(refname); + } + /* otherwise we had (file)., and we set node to 'Top' */ + else if (refnode == NULL) + refnode = g_strdup("Top"); + else if (reffile == NULL) + reffile = g_strdup(nodefile); + + /* now find the end of the right hand text for this menu line */ + /* it can continue for several lines */ + done = 0; + for (realend = end+1; !done; realend++) + { + if (*realend == '\n') + { + if (*(realend+1) == '\n') + { + done = 1; + continue; + } + + for (i=1; i<4; i++) + if (!isblank(*(realend+i)) && *(realend+i) != '\n') + { + done = 1; + break; + } + } + } + *menu_end = realend; + + + converted_nodename = g_strdup( refnode ); + map_spaces_to_underscores( converted_nodename ); + + escaped_refnode = escape_html_chars( refnode ); + escaped_refname = escape_html_chars( refname ); + +#if 0 + fprintf(f,"<dt><A HREF=\"../%s/%s.html\">%s</A>\n", + reffile, converted_nodename, escaped_refname); +#endif + href = form_info_tag_href( reffile, converted_nodename ); + fprintf(f,"<dt><A %s>%s</A>\n", href, escaped_refname ); + fprintf(f,"<dd>"); + if (*end == '.' && *(end+1) == '\n') + fprintf(f,"%s.\n",escaped_refnode); + else + fwrite(end+1, 1, *menu_end - end - 1, f); + + if (href) + g_free(href); + if (escaped_refname) + g_free(escaped_refname); + if (escaped_refnode) + g_free(escaped_refnode); + if (converted_nodename) + g_free(converted_nodename); + g_free(refname); + g_free(reffile); + g_free(refnode); +} + +void write_html_horiz_rule( FILE *f ) +{ + fprintf(f, "<HR>\n"); +} + diff --git a/src/info2html/html.h b/src/info2html/html.h new file mode 100644 index 00000000..8a0f5694 --- /dev/null +++ b/src/info2html/html.h @@ -0,0 +1,22 @@ +#ifndef HTML_H +#define HTML_H + +#define HTML_ROOT "./htmltest" + +#define HEADER_SIZE_1 "H1" +#define HEADER_SIZE_2 "H2" + +void dump_html_for_node( NODE *node ); + +void open_body_text_html( FILE *f ); +void close_body_text_html( FILE *f ); +char *write_body_text_html( FILE *f, char *p, char *q, char *nodefile ); + +void open_menu_html( FILE *f, char *p ); +void close_menu_html( FILE *f ); +void write_menu_entry_html( FILE *f, char *p, char *nodefile,char **menu_end ); + +void write_header_html( FILE *f, char *p, char *hdr ); + +void write_html_horiz_rule( FILE *f ); +#endif diff --git a/src/info2html/main.c b/src/info2html/main.c new file mode 100644 index 00000000..8b9c9b90 --- /dev/null +++ b/src/info2html/main.c @@ -0,0 +1,108 @@ +/* little test main() to see how we're doing */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "data.h" +#include "html.h" +#include "parse.h" +#include "utils.h" +#include "version.h" + +int +main(argc, argv) +int argc; +char **argv; +{ + FILE *f; + char line[250]; + + int result; + + NODE *node; + +if (!be_quiet) + printf("info2html Version %s\n",INFO2HTML_VERSION); + + if (argc == 1) + { + f = stdin; + strcpy(work_filename, "STDIN"); + } + else + { + if ((f=fopen(argv[1], "r"))==NULL) { + fprintf(stderr, "File %s not found.\n",argv[1]); + exit(1); + } + strcpy(work_filename, argv[1]); + } + + work_line_number = 0; + + + /* scan for start of real data */ + for (;1;) { + fgets(line,250,f); + if (feof(f)) + { + fprintf(stderr,"Info file had no contents\n"); + exit(1); + } + + work_line_number++; + if (*line == INFO_COOKIE) + break; + + } + + /* big loop to identify sections of info files */ + /* NEW PLAN - format on the fly */ + /* No need to store all nodes, etc since we let web server */ + /* handle resolving tags! */ + for (;1;) { + fgets(line,250,f); + if (feof(f)) + break; + + work_line_number++; + + /* found a node definition line */ + if (!strncmp(line, "File:", 5)) + { + node = alloc_node(); + result=read_node( f, line, node ); + if ( result == READ_ERR ) + { + fprintf(stderr, "Error reading the node contents\n"); + fprintf(stderr, "line was |%s|\n",line); + continue; + } + + strcpy(work_node,node->nodename); + +#ifdef DEBUG + printf("NEW NODE\n"); + printf("\tFile:|%s|\n\tNode:|%s|\n\tNext:|%s|\n", + node->filename, node->nodename,node->next); + printf("\tPrev:|%s|\n\tUp:|%s|\n\n", node->prev, node->up); + printf("------------------------------------------------------\n"); +#endif + /* now lets make some html */ + /* first make sure the subdir for this info file exists */ + dump_html_for_node( node ); + /* free_node(node); */ + if (node) + { + if ( node->contents ) + free(node->contents); + + free(node); + } + } + else + continue; + } + return 0; +} diff --git a/src/info2html/parse.c b/src/info2html/parse.c new file mode 100644 index 00000000..fe960e76 --- /dev/null +++ b/src/info2html/parse.c @@ -0,0 +1,513 @@ +/* simple info file parser. */ +/* currently only finds nodes and contructs a tree */ +/* partially motivated by source code of the 'info' program */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include "parse.h" +#include "data.h" +#include "utils.h" + +/* main routine to read in a node once we found its start in file f */ +/* assumes NODE has already been allocated */ +int read_node (FILE *f, char *line, NODE *node) +{ + /* we found a node line */ + if (!parse_node_line( node, line )) + return (READ_ERR); + + /* read in the contents, they go until a '\014', or a EOF */ + return (read_node_contents( f, node )); +} + +/* take a node definition and parse components */ +/* assumes that NODE has already been allocated */ +/* returns pointer to node if ok, or NULL on failure */ +NODE *parse_node_line( NODE *node, char * line ) +{ + char *result; + char *temp; + + /* fill in rest soon */ + node->filename = NULL; + node->nodename = NULL; + node->contents = NULL; + node->next = NULL; + node->prev = NULL; + node->up = NULL; + + temp = line; + + if (!(result=parse_node_label( &temp, "File:", 0))) + return NULL; + node->filename = result; + + if (!(result=parse_node_label( &temp, "Node:", 1))) + return NULL; + node->nodename = result; + + /* not clear any of the rest are actually necessary */ + /* keep eye out for hitting end of line */ + if ((result=parse_node_label( &temp, "Next:", 1))) + node->next = result; + + if ((result=parse_node_label( &temp, "Prev:", 1))) + node->prev = result; + + if ((result=parse_node_label( &temp, "Up:", 1))) + node->up = result; + + /* cleanup node filename */ + fixup_info_filename( node->filename ); + + return node; +} + +/* grab node name from the label 'label' */ +/* NULL means it doesn't exist */ +/* strdup's a string and returns it, which can be freed */ +char *parse_node_label( char **line, char *label, int allow_eof ) +{ + char *start, *end; + char *temp; + + temp = strdup( *line ); + + start = strstr( temp, label ); + if (start == NULL) + return NULL; + + start += strlen(label); + if (allow_eof) + { + end = strstr( start, "," ); + if (end == NULL) + end = strstr( start, "\n" ); + } + else + end = strstr( start, "," ); + + if (end == NULL) + return NULL; + + *end = '\000'; + strip_spaces( start ); + return start; +} + + +/* from current position in file f, read till we hit EOF or */ +/* a end of node marker. Allocate space and set ptr to point at it */ +/* contents of node->contents is a mirror image of what was in */ +/* the node (menus and all */ +/* assumes that NODE is already allocated */ +#define SEARCH_BUF_SIZE 1024 +#define CONTENTS_BUF_INCR 1024 + +int read_node_contents( FILE *f, NODE *node ) +{ + int nread; + int found; + char *searchbuf; + char *ptr; + + char *tmpcontents; + int tmpcontlen; + int linelen; + + char *status; + + searchbuf = (char *) malloc( SEARCH_BUF_SIZE ); + tmpcontents = (char *) malloc( CONTENTS_BUF_INCR ); + tmpcontlen = CONTENTS_BUF_INCR; + nread = 0; + + /* we read until we hit a '\014' or the end of file */ + /* since its coming form a pipe we read up to EOL */ + /* and save contents as we go along */ + for ( found=0 ; !found ; ) + { + status=fgets( searchbuf, SEARCH_BUF_SIZE, f); + linelen = strlen( searchbuf ); + for (found=0, ptr = searchbuf; *ptr && !found; ptr++) + if (*ptr == INFO_FF || *ptr == INFO_COOKIE) + found=1; + + /* if we didn't find the magic character, but hit eof, same deal */ + if (!found && feof(f)) + { + found = 1; + continue; + } + + if ((nread+linelen+2) > tmpcontlen) + { + tmpcontlen += CONTENTS_BUF_INCR; + tmpcontents = realloc( tmpcontents, tmpcontlen ); + } + + memcpy(tmpcontents+nread, searchbuf, linelen); + nread += linelen; + if (!feof(f)) + *(tmpcontents+nread) = '\14'; + } + +/*tmpcontents=realloc(tmpcontents, nread); */ + node->contlen = nread; + node->contents = tmpcontents; + if (searchbuf) + free(searchbuf); + else + fprintf(stderr, "For some reason searchbuf is NULL\n"); + + return READ_OK; +} + +/* given pointer to a string, tells us if its the line following */ +/* a info header line */ +/* ex. */ +/* This is a header */ +/* ================ */ +/* r points at the row of '='*/ +int is_a_hdr_line ( char *r ) +{ + return (!strncmp(r, "***", 3) || + !strncmp(r, "===", 3) || + !strncmp(r, "---", 3) || + !strncmp(r, "...", 3)); +} + + +/* returns 0 if good line found, non-0 otherwise */ +int parse_menu_line( char *line, char **refname, char **reffile, + char **refnode, char **end_of_link, + int span_lines) +{ + char *match; + char *match2; + char *start; + char *end_of_line; + char *end_refnode; + + + start = line; + end_of_line = strchr( line, '\n' ); + *end_of_link = NULL; + + /* ok, we found a menu line, have to convert to a reference */ + /* four types we worry about */ + /* only care about stuff up to '.' OR second ':' */ + /* 1) 'Link:: This is a link' */ + /* 2) 'Link: (file). This is another link' */ + /* 3) 'Link: (file)node. This is yet another' */ + /* 4) 'Link: node.' */ + + /* found a variation on #4 in amdref.info-2 ! */ + /* 5) 'Link: node: */ + + /* find the refname */ + if (!(match=copy_to_anychar( start, ":", refname ))) + return -1; + strip_spaces( *refname ); + strip_dupspaces( *refname ); + match++; + + /* simple case is case #1 above */ + if (*match == ':') + { + *reffile = NULL; + *refnode = NULL; + *end_of_link = match; + return 0; + } + else + { + /* look for parentheses */ + match2 = strchr( match, '(' ); + + /* this means it must be form 4 */ + /* but dont look too far away */ + /* this is a real hack - should do something more intelligent that 10 chars */ + if (!match2 || (match2 - match) > 10) + { + /* look for a ':' or '.' ending node */ +#if 0 + if (!(end_refnode=copy_to_anychar( match, ".", refnode ))) + if (!(end_refnode=copy_to_anychar( match, ":", refnode ))) + return -1; + /* but it cant be past end of the menu line */ + if (end_refnode > end_of_line && !span_lines) + return -1; +#endif + /* span_lines is ignored now we have parse_note_ref() */ + if (!(end_refnode=copy_to_anychar( match, "\n,.", refnode ))) + return -1; + *end_of_link = end_refnode; + strip_spaces( *refnode ); + strip_dupspaces( *refnode ); + if ( *refnode == '\000') + { + free(refnode); + refnode = NULL; + } + *reffile = NULL; + return 0; + } + else + { + match2++; + if (!(match=copy_to_anychar (match2, ")", reffile))) + return -1; + strip_spaces( *reffile ); + fixup_info_filename( *reffile ); + match++; + /* unsure about having '\n' here */ + if (!(match=copy_to_anychar (match, "\n.,", refnode))) + return -1; + *end_of_link = match; + strip_spaces( *refnode ); + strip_dupspaces( *refnode ); + strip_dupspaces( *refname ); + if (!(**refnode)) + *refnode = NULL; + + return 0; + } + } +} + +/* used for *note and *Note refs */ +/* returns 0 if good line found, non-0 otherwise */ +int parse_note_ref( char *line, char **refname, char **reffile, + char **refnode, char **end_of_link, + int span_lines) +{ + char *match; + char *match2; + char *start; + char *end_of_line; + char *end_refnode; + + + start = line; + end_of_line = strchr( line, '\n' ); + *end_of_link = NULL; + + /* ok, we found a menu line, have to convert to a reference */ + /* four types we worry about */ + /* only care about stuff up to '.' OR second ':' */ + /* 1) 'Link:: This is a link' */ + /* 2) 'Link: (file). This is another link' */ + /* 3) 'Link: (file)node. This is yet another' */ + /* 4) 'Link: node.' */ + + /* found a variation on #4 in amdref.info-2 ! */ + /* 5) 'Link: node: */ + + /* find the refname */ + if (!(match=copy_to_anychar( start, ":", refname ))) + return -1; + strip_spaces( *refname ); + strip_dupspaces( *refname ); + match++; + + /* simple case is case #1 above */ + if (*match == ':') + { + *reffile = NULL; + *refnode = NULL; + *end_of_link = match; + return 0; + } + else + { + /* look for parentheses */ + match2 = strchr( match, '(' ); + + /* this means it must be form 4 */ + /* but dont look too far away */ + /* this is a real hack - should do something more intelligent that 10 chars */ + if (!match2 || (match2 - match) > 10) + { + /* look for a ',', ':' or '.' ending node */ +#if 0 + if (!(end_refnode=copy_to_anychar( match, ",", refnode ))) + if (!(end_refnode=copy_to_anychar( match, ".", refnode ))) + if (!(end_refnode=copy_to_anychar( match, ":", refnode ))) +#endif + if (!(end_refnode=copy_to_anychar( match, ",.:", refnode ))) + return -1; + + /* but it cant be past end of the menu line */ + if (end_refnode > end_of_line && !span_lines) + return -1; + + *end_of_link = end_refnode; + strip_spaces( *refnode ); + strip_dupspaces( *refnode ); + *reffile = NULL; + return 0; + } + else + { + match2++; + if (!(match=copy_to_anychar (match2, ")", reffile))) + return -1; + strip_spaces( *reffile ); + fixup_info_filename( *reffile ); + match++; + if (!(match=copy_to_anychar (match, ".,", refnode))) + return -1; + *end_of_link = match; + strip_spaces( *refnode ); + strip_dupspaces( *refnode ); + strip_dupspaces( *refname ); + if (!(**refnode)) + *refnode = NULL; + + return 0; + } + } +} + + +/* old version 1.0 stuff */ +#if 0 + + + +void scan_for_menu( NODE *node ) +{ + char *ptr; + char *match; + char *line; + char *buf; + char *refname, *reffile, *refnode; + char *junk; + + MENU_ENTRY *head; + + int size; + int found; + + NODE *newnode; + REFERENCE *newref; + MENU_ENTRY *newentry; + char menu_hdr[8192]; + char *menu_hdr_ptr; + + /* search for start of a menu */ + size = node->contlen; + found = 0; + for (ptr = node->contents ; ptr < (node->contents+node->contlen) && !found;) + { + line = get_line_from_contents( ptr, size ); + match = strstr(line, MENU_START); + if (match) + { + node->menu_start=ptr; + found = 1; + } + size = size - strlen(line) - 1; + ptr += strlen(line) + 1; + free(line); + } + + if (!found) + return; + + /* we found a menu start, lets read menu in now */ + /* keep looking for entries till we hit a end-of-node condition */ + head = NULL; + menu_hdr_ptr = menu_hdr; + for ( ; ptr < (node->contents+node->contlen); ) + { + buf = get_line_from_contents( ptr, size ); + line = buf; + size = size - strlen(line) - 1; + ptr += strlen(line) + 1; + + if (*line == '\000') + { + free(buf); + continue; + } + + if (*line == INFO_FF || *line == INFO_COOKIE) + { + free(buf); + break; + } + + /* see if its a new menu entry or not */ + if (*line != '*') + { +#if 0 + free(buf); + break; +#endif + if ( (*line != '=') && (*line != '#') ) + { + memcpy(menu_hdr_ptr, line, strlen(line)); + menu_hdr_ptr += strlen(line); + *menu_hdr_ptr = '\n'; + menu_hdr_ptr++; + *menu_hdr_ptr = '\0'; + } + free(buf); + continue; + } + else + { + line += 2; + + if (parse_menu_line( line, &refname, &reffile, &refnode, &junk, 0 )) + { + free(buf); + continue; + } + + /* found the end of nodename, so make a new reference */ + newref = (REFERENCE *) malloc( sizeof(REFERENCE) ); + newref->refname = refname; + + newentry = (MENU_ENTRY *) malloc( sizeof(MENU_ENTRY) ); + newentry->ref = newref; + if (menu_hdr_ptr != menu_hdr) + { + newentry->header = strdup(menu_hdr); + menu_hdr_ptr = menu_hdr; + } + else + newentry->header = NULL; + newentry->next = NULL; + + newnode = (NODE *) malloc( sizeof(NODE) ); + newref->node = newnode; + newnode->next = newnode->prev = newnode->up = NULL; + if (refnode) + newnode->nodename = refnode; + else + newnode->nodename = strdup(refname); + + if (reffile) + newnode->filename = reffile; + else + newnode->filename = strdup(node->filename); + + + if (head == NULL) + node->menu = newentry; + else + head->next = newentry; + head = newentry; + } + free(buf); + } +} + + + +#endif diff --git a/src/info2html/parse.h b/src/info2html/parse.h new file mode 100644 index 00000000..10119f00 --- /dev/null +++ b/src/info2html/parse.h @@ -0,0 +1,25 @@ +#ifndef PARSE_H +#define PARSE_H + +#include "data.h" + +#define READ_OK 1 +#define READ_EOF 2 +#define READ_ERR 0 + +NODE *parse_node_line( NODE *node, char * line ); +char *parse_node_label( char **line, char *label, int allow_eof ); +int parse_menu_line( char *line, char **refname, char **reffile, + char **refnode, char **end_of_link, + int span_lines); + +int parse_note_ref( char *line, char **refname, char **reffile, + char **refnode, char **end_of_link, + int span_lines); + +int read_node_contents( FILE *f, NODE *node ); +int read_node (FILE *f, char *line, NODE *node); + +int is_a_hdr_line (char *r); + +#endif /* PARSE_H */ diff --git a/src/info2html/utils.c b/src/info2html/utils.c new file mode 100644 index 00000000..db0fd0b5 --- /dev/null +++ b/src/info2html/utils.c @@ -0,0 +1,273 @@ +/* various utility functions */ + +#include <stdio.h> +#include <string.h> +#include <regex.h> + +#include <glib.h> + +#include "data.h" +#include "utils.h" + +void strip_spaces( char *str ) +{ + int len; + + len = strlen( str ); + if (len < 1) + return; + + while (*str == ' ') + { + len = strlen( str ); + memmove(str, str+1, len); + } + + len = strlen( str ); + while (*(str+len) == '\n') + { + *(str+len) = '\000'; + len = strlen( str ); + } + +#if 0 + q = str + strlen( str ); + for (p=str; *p == ' ' || *p == '\t'; p++); + for (r=q-1; *r == ' ' || *r == '\t'; r--); + len = r - p + 1; + memmove( str, p, len ); + *(str+len)='\000'; +#endif +} + +void strip_newlines( char *str ) +{ + char *p; + int len; + + len = strlen( str ); + if (len < 2) + return; + + for (p=str; len > 0; len--) + if ( *p == '\n' ) + { + if (len > 1) + memmove(p , p+1, len ); + else + *p = '\000'; + } + else + p++; +} + +void strip_dupspaces( char *str ) +{ + char *p; + int len; + + len = strlen( str ) - 1; + if (len < 2) + return; + + for (p=str+1; len > 0; len--) + if ( *p == ' ' && *(p-1) == ' ') + { + if (len > 1) + memmove(p , p+1, len ); + else + *p = '\000'; + } + else + p++; +} + +void convert_newlines_to_spaces( char *str ) +{ + char *p; + int len; + + /* trim head and trailing newlines */ + while (*str == '\n') + { + len = strlen( str ); + memmove(str, str+1, len); + } + + len = strlen( str ); + while (*(str+len) == '\n') + { + *(str+len) = '\000'; + len = strlen( str ); + } + + len = strlen( str ); + for (p=str; len > 0; len--, p++) + if ( *p == '\n' ) + *p = ' '; +} + +char * escape_html_chars( char *str ) +{ + char *p; + char *q; + char tmp[2]; + int len; + + if (!str) + return NULL; + + len = strlen( str ); +#if 0 + if (len <2) + fprintf(stderr,"escaped less < 2 chars= |%s|\n",str); + +#endif + + q = (char *) g_malloc( len * 5 + 4); + *q = '\000'; + *(tmp+1) = '\000'; + for (p=str; *p; p++) + if ( *p == '>' ) + strcat(q, ">"); + else if ( *p == '<' ) + strcat(q, "<"); + else if ( *p == '&' ) + strcat(q, "&"); + else + { + *tmp = *p; + strcat(q, tmp); + } + return q; +} + + +char *get_line_from_contents( char *ptr, int size ) +{ + char *eoln; + char *line; + int linelen; + + eoln = memchr(ptr, '\n', size); + if (eoln == NULL) + return NULL; + + linelen = (eoln-ptr); + line = (char *) g_malloc( linelen+1 ); + memcpy(line, ptr, linelen); + *(line+linelen) = '\000'; + return line; +} + +/* copies from string str up to any character in chr */ +/* if chr doesn't exist, return NULL */ +/* otherwise return location of match */ +/* allocates a new string if anything copied */ +char *copy_to_anychar( char *str, char *chr, char **copy ) +{ + int len; + char *match; + + match = strpbrk(str, chr); + if (!match) + return NULL; + + len = match - str; + *copy = (char *) g_malloc( len+1 ); + *(*copy+len) = '\000'; + memcpy(*copy, str, len ); + return match; +} + +/* allocates a new node */ +NODE *alloc_node() +{ + NODE * tmp; + + tmp = (NODE *) g_malloc( sizeof(NODE) ); + + if (tmp) + { + tmp->nodename=NULL; + tmp->filename=NULL; + tmp->contents=NULL; + tmp->contlen=0; + tmp->next=NULL; + tmp->prev=NULL; + tmp->up=NULL; + tmp->filename=NULL; + tmp->menu=NULL; + tmp->menu_start=NULL; + } + return tmp; +} + +/* deallocates a new node */ +void free_node(NODE *tmp) +{ + if (tmp) + { + if (tmp->nodename) + g_free(tmp->nodename); + if (tmp->filename) + g_free(tmp->filename); + if (tmp->contents) + g_free(tmp->contents); + } + g_free(tmp); +} + +#if 0 +void map_spaces_to_underscores( char *str ) +{ + char *p; + + for (p=str; *p; p++) + if (*p == ' ') + *p = '_'; +} +#endif + +void map_spaces_to_underscores( char *str ) +{ + char *p; + + for (p=str; *p; p++) + switch (*p) + { + case ' ': + case '\n': + case '\t': + case '`': + case '\'': + case '/': + case '\\': + case '"': + case '.': + case '!': + *p = '_'; + break; + } +} + + + +/* reduce infofile filename to basename alone */ +void fixup_info_filename( char *file ) +{ + char *ptr1; + char tail[] = ".info"; + + if (strlen(file) < 6) + return; + + ptr1 = strrchr( file, '.' ); + if (!ptr1) + return; + + if (!strncmp(ptr1, tail, strlen(tail))) + *ptr1 = '\000'; + +} + diff --git a/src/info2html/utils.h b/src/info2html/utils.h new file mode 100644 index 00000000..25a964c8 --- /dev/null +++ b/src/info2html/utils.h @@ -0,0 +1,17 @@ +#ifndef UTILS_H +#define UTILS_H + +#include "data.h" + +void strip_spaces( char *str ); +void strip_newlines( char *str ); +void strip_dupspaces( char *str ); +void convert_newlines_to_spaces( char *str ); +char *get_line_from_contents( char *ptr, int size ); +char *copy_to_anychar( char *str, char *chr, char **copy ); +NODE *alloc_node( void ); +void free_node( NODE * ); +void map_spaces_to_underscores( char *str ); +void fixup_info_filename( char *file ); +char *escape_html_chars( char *str ); +#endif diff --git a/src/info2html/version.h b/src/info2html/version.h new file mode 100644 index 00000000..f1155c36 --- /dev/null +++ b/src/info2html/version.h @@ -0,0 +1,6 @@ +#ifndef VERSION_H +#define VERSION_H + +#define INFO2HTML_VERSION "2.1" + +#endif |