summaryrefslogtreecommitdiff
path: root/src/info2html/parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/info2html/parse.c')
-rw-r--r--src/info2html/parse.c513
1 files changed, 513 insertions, 0 deletions
diff --git a/src/info2html/parse.c b/src/info2html/parse.c
new file mode 100644
index 00000000..fe960e76
--- /dev/null
+++ b/src/info2html/parse.c
@@ -0,0 +1,513 @@
+/* simple info file parser. */
+/* currently only finds nodes and contructs a tree */
+/* partially motivated by source code of the 'info' program */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "parse.h"
+#include "data.h"
+#include "utils.h"
+
+/* main routine to read in a node once we found its start in file f */
+/* assumes NODE has already been allocated */
+int read_node (FILE *f, char *line, NODE *node)
+{
+ /* we found a node line */
+ if (!parse_node_line( node, line ))
+ return (READ_ERR);
+
+ /* read in the contents, they go until a '\014', or a EOF */
+ return (read_node_contents( f, node ));
+}
+
+/* take a node definition and parse components */
+/* assumes that NODE has already been allocated */
+/* returns pointer to node if ok, or NULL on failure */
+NODE *parse_node_line( NODE *node, char * line )
+{
+ char *result;
+ char *temp;
+
+ /* fill in rest soon */
+ node->filename = NULL;
+ node->nodename = NULL;
+ node->contents = NULL;
+ node->next = NULL;
+ node->prev = NULL;
+ node->up = NULL;
+
+ temp = line;
+
+ if (!(result=parse_node_label( &temp, "File:", 0)))
+ return NULL;
+ node->filename = result;
+
+ if (!(result=parse_node_label( &temp, "Node:", 1)))
+ return NULL;
+ node->nodename = result;
+
+ /* not clear any of the rest are actually necessary */
+ /* keep eye out for hitting end of line */
+ if ((result=parse_node_label( &temp, "Next:", 1)))
+ node->next = result;
+
+ if ((result=parse_node_label( &temp, "Prev:", 1)))
+ node->prev = result;
+
+ if ((result=parse_node_label( &temp, "Up:", 1)))
+ node->up = result;
+
+ /* cleanup node filename */
+ fixup_info_filename( node->filename );
+
+ return node;
+}
+
+/* grab node name from the label 'label' */
+/* NULL means it doesn't exist */
+/* strdup's a string and returns it, which can be freed */
+char *parse_node_label( char **line, char *label, int allow_eof )
+{
+ char *start, *end;
+ char *temp;
+
+ temp = strdup( *line );
+
+ start = strstr( temp, label );
+ if (start == NULL)
+ return NULL;
+
+ start += strlen(label);
+ if (allow_eof)
+ {
+ end = strstr( start, "," );
+ if (end == NULL)
+ end = strstr( start, "\n" );
+ }
+ else
+ end = strstr( start, "," );
+
+ if (end == NULL)
+ return NULL;
+
+ *end = '\000';
+ strip_spaces( start );
+ return start;
+}
+
+
+/* from current position in file f, read till we hit EOF or */
+/* a end of node marker. Allocate space and set ptr to point at it */
+/* contents of node->contents is a mirror image of what was in */
+/* the node (menus and all */
+/* assumes that NODE is already allocated */
+#define SEARCH_BUF_SIZE 1024
+#define CONTENTS_BUF_INCR 1024
+
+int read_node_contents( FILE *f, NODE *node )
+{
+ int nread;
+ int found;
+ char *searchbuf;
+ char *ptr;
+
+ char *tmpcontents;
+ int tmpcontlen;
+ int linelen;
+
+ char *status;
+
+ searchbuf = (char *) malloc( SEARCH_BUF_SIZE );
+ tmpcontents = (char *) malloc( CONTENTS_BUF_INCR );
+ tmpcontlen = CONTENTS_BUF_INCR;
+ nread = 0;
+
+ /* we read until we hit a '\014' or the end of file */
+ /* since its coming form a pipe we read up to EOL */
+ /* and save contents as we go along */
+ for ( found=0 ; !found ; )
+ {
+ status=fgets( searchbuf, SEARCH_BUF_SIZE, f);
+ linelen = strlen( searchbuf );
+ for (found=0, ptr = searchbuf; *ptr && !found; ptr++)
+ if (*ptr == INFO_FF || *ptr == INFO_COOKIE)
+ found=1;
+
+ /* if we didn't find the magic character, but hit eof, same deal */
+ if (!found && feof(f))
+ {
+ found = 1;
+ continue;
+ }
+
+ if ((nread+linelen+2) > tmpcontlen)
+ {
+ tmpcontlen += CONTENTS_BUF_INCR;
+ tmpcontents = realloc( tmpcontents, tmpcontlen );
+ }
+
+ memcpy(tmpcontents+nread, searchbuf, linelen);
+ nread += linelen;
+ if (!feof(f))
+ *(tmpcontents+nread) = '\14';
+ }
+
+/*tmpcontents=realloc(tmpcontents, nread); */
+ node->contlen = nread;
+ node->contents = tmpcontents;
+ if (searchbuf)
+ free(searchbuf);
+ else
+ fprintf(stderr, "For some reason searchbuf is NULL\n");
+
+ return READ_OK;
+}
+
+/* given pointer to a string, tells us if its the line following */
+/* a info header line */
+/* ex. */
+/* This is a header */
+/* ================ */
+/* r points at the row of '='*/
+int is_a_hdr_line ( char *r )
+{
+ return (!strncmp(r, "***", 3) ||
+ !strncmp(r, "===", 3) ||
+ !strncmp(r, "---", 3) ||
+ !strncmp(r, "...", 3));
+}
+
+
+/* returns 0 if good line found, non-0 otherwise */
+int parse_menu_line( char *line, char **refname, char **reffile,
+ char **refnode, char **end_of_link,
+ int span_lines)
+{
+ char *match;
+ char *match2;
+ char *start;
+ char *end_of_line;
+ char *end_refnode;
+
+
+ start = line;
+ end_of_line = strchr( line, '\n' );
+ *end_of_link = NULL;
+
+ /* ok, we found a menu line, have to convert to a reference */
+ /* four types we worry about */
+ /* only care about stuff up to '.' OR second ':' */
+ /* 1) 'Link:: This is a link' */
+ /* 2) 'Link: (file). This is another link' */
+ /* 3) 'Link: (file)node. This is yet another' */
+ /* 4) 'Link: node.' */
+
+ /* found a variation on #4 in amdref.info-2 ! */
+ /* 5) 'Link: node: */
+
+ /* find the refname */
+ if (!(match=copy_to_anychar( start, ":", refname )))
+ return -1;
+ strip_spaces( *refname );
+ strip_dupspaces( *refname );
+ match++;
+
+ /* simple case is case #1 above */
+ if (*match == ':')
+ {
+ *reffile = NULL;
+ *refnode = NULL;
+ *end_of_link = match;
+ return 0;
+ }
+ else
+ {
+ /* look for parentheses */
+ match2 = strchr( match, '(' );
+
+ /* this means it must be form 4 */
+ /* but dont look too far away */
+ /* this is a real hack - should do something more intelligent that 10 chars */
+ if (!match2 || (match2 - match) > 10)
+ {
+ /* look for a ':' or '.' ending node */
+#if 0
+ if (!(end_refnode=copy_to_anychar( match, ".", refnode )))
+ if (!(end_refnode=copy_to_anychar( match, ":", refnode )))
+ return -1;
+ /* but it cant be past end of the menu line */
+ if (end_refnode > end_of_line && !span_lines)
+ return -1;
+#endif
+ /* span_lines is ignored now we have parse_note_ref() */
+ if (!(end_refnode=copy_to_anychar( match, "\n,.", refnode )))
+ return -1;
+ *end_of_link = end_refnode;
+ strip_spaces( *refnode );
+ strip_dupspaces( *refnode );
+ if ( *refnode == '\000')
+ {
+ free(refnode);
+ refnode = NULL;
+ }
+ *reffile = NULL;
+ return 0;
+ }
+ else
+ {
+ match2++;
+ if (!(match=copy_to_anychar (match2, ")", reffile)))
+ return -1;
+ strip_spaces( *reffile );
+ fixup_info_filename( *reffile );
+ match++;
+ /* unsure about having '\n' here */
+ if (!(match=copy_to_anychar (match, "\n.,", refnode)))
+ return -1;
+ *end_of_link = match;
+ strip_spaces( *refnode );
+ strip_dupspaces( *refnode );
+ strip_dupspaces( *refname );
+ if (!(**refnode))
+ *refnode = NULL;
+
+ return 0;
+ }
+ }
+}
+
+/* used for *note and *Note refs */
+/* returns 0 if good line found, non-0 otherwise */
+int parse_note_ref( char *line, char **refname, char **reffile,
+ char **refnode, char **end_of_link,
+ int span_lines)
+{
+ char *match;
+ char *match2;
+ char *start;
+ char *end_of_line;
+ char *end_refnode;
+
+
+ start = line;
+ end_of_line = strchr( line, '\n' );
+ *end_of_link = NULL;
+
+ /* ok, we found a menu line, have to convert to a reference */
+ /* four types we worry about */
+ /* only care about stuff up to '.' OR second ':' */
+ /* 1) 'Link:: This is a link' */
+ /* 2) 'Link: (file). This is another link' */
+ /* 3) 'Link: (file)node. This is yet another' */
+ /* 4) 'Link: node.' */
+
+ /* found a variation on #4 in amdref.info-2 ! */
+ /* 5) 'Link: node: */
+
+ /* find the refname */
+ if (!(match=copy_to_anychar( start, ":", refname )))
+ return -1;
+ strip_spaces( *refname );
+ strip_dupspaces( *refname );
+ match++;
+
+ /* simple case is case #1 above */
+ if (*match == ':')
+ {
+ *reffile = NULL;
+ *refnode = NULL;
+ *end_of_link = match;
+ return 0;
+ }
+ else
+ {
+ /* look for parentheses */
+ match2 = strchr( match, '(' );
+
+ /* this means it must be form 4 */
+ /* but dont look too far away */
+ /* this is a real hack - should do something more intelligent that 10 chars */
+ if (!match2 || (match2 - match) > 10)
+ {
+ /* look for a ',', ':' or '.' ending node */
+#if 0
+ if (!(end_refnode=copy_to_anychar( match, ",", refnode )))
+ if (!(end_refnode=copy_to_anychar( match, ".", refnode )))
+ if (!(end_refnode=copy_to_anychar( match, ":", refnode )))
+#endif
+ if (!(end_refnode=copy_to_anychar( match, ",.:", refnode )))
+ return -1;
+
+ /* but it cant be past end of the menu line */
+ if (end_refnode > end_of_line && !span_lines)
+ return -1;
+
+ *end_of_link = end_refnode;
+ strip_spaces( *refnode );
+ strip_dupspaces( *refnode );
+ *reffile = NULL;
+ return 0;
+ }
+ else
+ {
+ match2++;
+ if (!(match=copy_to_anychar (match2, ")", reffile)))
+ return -1;
+ strip_spaces( *reffile );
+ fixup_info_filename( *reffile );
+ match++;
+ if (!(match=copy_to_anychar (match, ".,", refnode)))
+ return -1;
+ *end_of_link = match;
+ strip_spaces( *refnode );
+ strip_dupspaces( *refnode );
+ strip_dupspaces( *refname );
+ if (!(**refnode))
+ *refnode = NULL;
+
+ return 0;
+ }
+ }
+}
+
+
+/* old version 1.0 stuff */
+#if 0
+
+
+
+void scan_for_menu( NODE *node )
+{
+ char *ptr;
+ char *match;
+ char *line;
+ char *buf;
+ char *refname, *reffile, *refnode;
+ char *junk;
+
+ MENU_ENTRY *head;
+
+ int size;
+ int found;
+
+ NODE *newnode;
+ REFERENCE *newref;
+ MENU_ENTRY *newentry;
+ char menu_hdr[8192];
+ char *menu_hdr_ptr;
+
+ /* search for start of a menu */
+ size = node->contlen;
+ found = 0;
+ for (ptr = node->contents ; ptr < (node->contents+node->contlen) && !found;)
+ {
+ line = get_line_from_contents( ptr, size );
+ match = strstr(line, MENU_START);
+ if (match)
+ {
+ node->menu_start=ptr;
+ found = 1;
+ }
+ size = size - strlen(line) - 1;
+ ptr += strlen(line) + 1;
+ free(line);
+ }
+
+ if (!found)
+ return;
+
+ /* we found a menu start, lets read menu in now */
+ /* keep looking for entries till we hit a end-of-node condition */
+ head = NULL;
+ menu_hdr_ptr = menu_hdr;
+ for ( ; ptr < (node->contents+node->contlen); )
+ {
+ buf = get_line_from_contents( ptr, size );
+ line = buf;
+ size = size - strlen(line) - 1;
+ ptr += strlen(line) + 1;
+
+ if (*line == '\000')
+ {
+ free(buf);
+ continue;
+ }
+
+ if (*line == INFO_FF || *line == INFO_COOKIE)
+ {
+ free(buf);
+ break;
+ }
+
+ /* see if its a new menu entry or not */
+ if (*line != '*')
+ {
+#if 0
+ free(buf);
+ break;
+#endif
+ if ( (*line != '=') && (*line != '#') )
+ {
+ memcpy(menu_hdr_ptr, line, strlen(line));
+ menu_hdr_ptr += strlen(line);
+ *menu_hdr_ptr = '\n';
+ menu_hdr_ptr++;
+ *menu_hdr_ptr = '\0';
+ }
+ free(buf);
+ continue;
+ }
+ else
+ {
+ line += 2;
+
+ if (parse_menu_line( line, &refname, &reffile, &refnode, &junk, 0 ))
+ {
+ free(buf);
+ continue;
+ }
+
+ /* found the end of nodename, so make a new reference */
+ newref = (REFERENCE *) malloc( sizeof(REFERENCE) );
+ newref->refname = refname;
+
+ newentry = (MENU_ENTRY *) malloc( sizeof(MENU_ENTRY) );
+ newentry->ref = newref;
+ if (menu_hdr_ptr != menu_hdr)
+ {
+ newentry->header = strdup(menu_hdr);
+ menu_hdr_ptr = menu_hdr;
+ }
+ else
+ newentry->header = NULL;
+ newentry->next = NULL;
+
+ newnode = (NODE *) malloc( sizeof(NODE) );
+ newref->node = newnode;
+ newnode->next = newnode->prev = newnode->up = NULL;
+ if (refnode)
+ newnode->nodename = refnode;
+ else
+ newnode->nodename = strdup(refname);
+
+ if (reffile)
+ newnode->filename = reffile;
+ else
+ newnode->filename = strdup(node->filename);
+
+
+ if (head == NULL)
+ node->menu = newentry;
+ else
+ head->next = newentry;
+ head = newentry;
+ }
+ free(buf);
+ }
+}
+
+
+
+#endif