;;; read-scheme-source --- Read a file, recognizing scheme forms and comments ;; Copyright (C) 2001, 2006, 2011 Free Software Foundation, Inc. ;; ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public License ;; as published by the Free Software Foundation; either version 3, or ;; (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; Lesser General Public License for more details. ;; ;; You should have received a copy of the GNU Lesser General Public ;; License along with this software; see the file COPYING.LESSER. If ;; not, write to the Free Software Foundation, Inc., 51 Franklin ;; Street, Fifth Floor, Boston, MA 02110-1301 USA ;;; Author: Thien-Thi Nguyen ;;; Commentary: ;; Usage: read-scheme-source FILE1 FILE2 ... ;; ;; This program parses each FILE and writes to stdout sexps that describe the ;; top-level structures of the file: scheme forms, single-line comments, and ;; hash-bang comments. You can further process these (to associate comments ;; w/ scheme forms as a kind of documentation, for example). ;; ;; The output sexps have one of these forms: ;; ;; (quote (filename FILENAME)) ;; ;; (quote (comment :leading-semicolons N ;; :text LINE)) ;; ;; (quote (whitespace :text LINE)) ;; ;; (quote (hash-bang-comment :line LINUM ;; :line-count N ;; :text-list (LINE1 LINE2 ...))) ;; ;; (quote (following-form-properties :line LINUM ;; :line-count N) ;; :type TYPE ;; :signature SIGNATURE ;; :std-int-doc DOCSTRING)) ;; ;; SEXP ;; ;; The first four are straightforward (both FILENAME and LINE are strings sans ;; newline, while LINUM and N are integers). The last two always go together, ;; in that order. SEXP is scheme code processed only by `read' and then ;; `write'. ;; ;; The :type field may be omitted if the form is not recognized. Otherwise, ;; TYPE may be one of: procedure, alias, define-module, variable. ;; ;; The :signature field may be omitted if the form is not a procedure. ;; Otherwise, SIGNATURE is a list showing the procedure's signature. ;; ;; If the type is `procedure' and the form has a standard internal docstring ;; (first body form a string), that is extracted in full -- including any ;; embedded newlines -- and recorded by field :std-int-doc. ;; ;; ;; Usage from a program: The output list of sexps can be retrieved by scheme ;; programs w/o having to capture stdout, like so: ;; ;; (use-modules (scripts read-scheme-source)) ;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...)) ;; ;; There are also two convenience procs exported for use by Scheme programs: ;; ;; (clump FORMS) --- filter FORMS combining contiguous comment forms that ;; have the same number of leading semicolons. ;; ;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse ;; the ":tags", and return alist of (TAG . VAL) elems. ;; ;; TODO: Add option "--clump-comments", maybe w/ different clumping styles. ;; Make `annotate!' extensible. ;;; Code: (define-module (scripts read-scheme-source) :use-module (ice-9 rdelim) :export (read-scheme-source read-scheme-source-silently quoted? clump)) (define %include-in-guild-list #f) (define %summary "Print a parsed representation of a Scheme file.") ;; Try to figure out what FORM is and its various attributes. ;; Call proc NOTE! with key (a symbol) and value. ;; (define (annotate! form note!) (cond ((and (list? form) (< 2 (length form)) (eq? 'define (car form)) (pair? (cadr form)) (symbol? (caadr form))) (note! ':type 'procedure) (note! ':signature (cadr form)) (and (< 3 (length form)) (string? (caddr form)) (note! ':std-int-doc (caddr form)))) ((and (list? form) (< 2 (length form)) (eq? 'define (car form)) (symbol? (cadr form)) (list? (caddr form)) (< 3 (length (caddr form))) (eq? 'lambda (car (caddr form))) (string? (caddr (caddr form)))) (note! ':type 'procedure) (note! ':signature (cons (cadr form) (cadr (caddr form)))) (note! ':std-int-doc (caddr (caddr form)))) ((and (list? form) (= 3 (length form)) (eq? 'define (car form)) (symbol? (cadr form)) (symbol? (caddr form))) (note! ':type 'alias)) ((and (list? form) (eq? 'define-module (car form))) (note! ':type 'define-module)) ;; Add other types here. (else (note! ':type 'variable)))) ;; Process FILE, calling NB! on parsed top-level elements. ;; Recognized: #!-!# and regular comments in addition to normal forms. ;; (define (process file nb!) (nb! `'(filename ,file)) (let ((hash-bang-rx (make-regexp "^#!")) (bang-hash-rx (make-regexp "^!#")) (all-comment-rx (make-regexp "^[ \t]*(;+)")) (all-whitespace-rx (make-regexp "^[ \t]*$")) (p (open-input-file file))) (let loop ((n (1+ (port-line p))) (line (read-line p))) (or (not n) (eof-object? line) (begin (cond ((regexp-exec hash-bang-rx line) (let loop ((line (read-line p)) (text (list line))) (if (or (eof-object? line) (regexp-exec bang-hash-rx line)) (nb! `'(hash-bang-comment :line ,n :line-count ,(1+ (length text)) :text-list ,(reverse (cons line text)))) (loop (read-line p) (cons line text))))) ((regexp-exec all-whitespace-rx line) (nb! `'(whitespace :text ,line))) ((regexp-exec all-comment-rx line) => (lambda (m) (nb! `'(comment :leading-semicolons ,(let ((m1 (vector-ref m 1))) (- (cdr m1) (car m1))) :text ,line)))) (else (unread-string line p) (let* ((form (read p)) (count (- (port-line p) n)) (props (let* ((props '()) (prop+ (lambda args (set! props (append props args))))) (annotate! form prop+) props))) (or (= count 1) ; ugh (begin (read-line p) (set! count (1+ count)))) (nb! `'(following-form-properties :line ,n :line-count ,count ,@props)) (nb! form)))) (loop (1+ (port-line p)) (read-line p))))))) ;;; entry points (define (read-scheme-source-silently . files) "See commentary in module (scripts read-scheme-source)." (let* ((res '())) (for-each (lambda (file) (process file (lambda (e) (set! res (cons e res))))) files) (reverse res))) (define (read-scheme-source . files) "See commentary in module (scripts read-scheme-source)." (for-each (lambda (file) (process file (lambda (e) (write e) (newline)))) files)) ;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...)) ;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...) ;; where the tags are symbols. ;; (define (quoted? sym form) (and (list? form) (= 2 (length form)) (eq? 'quote (car form)) (let ((inside (cadr form))) (and (list? inside) (< 0 (length inside)) (eq? sym (car inside)) (let loop ((ls (cdr inside)) (alist '())) (if (null? ls) alist ; retval (let ((first (car ls))) (or (symbol? first) (error "bad list!")) (loop (cddr ls) (acons (string->symbol (substring (symbol->string first) 1)) (cadr ls) alist))))))))) ;; Filter FORMS, combining contiguous comment forms that have the same number ;; of leading semicolons. Do not include in them whitespace lines. ;; Whitespace lines outside of such comment groupings are ignored, as are ;; hash-bang comments. All other forms are passed through unchanged. ;; (define (clump forms) (let loop ((forms forms) (acc '()) (pass-this-one-through? #f)) (if (null? forms) (reverse acc) ; retval (let ((form (car forms))) (cond (pass-this-one-through? (loop (cdr forms) (cons form acc) #f)) ((quoted? 'following-form-properties form) (loop (cdr forms) (cons form acc) #t)) ((quoted? 'whitespace form) ;;; ignore (loop (cdr forms) acc #f)) ((quoted? 'hash-bang-comment form) ;;; ignore for now (loop (cdr forms) acc #f)) ((quoted? 'comment form) => (lambda (alist) (let cloop ((inner-forms (cdr forms)) (level (assq-ref alist 'leading-semicolons)) (text (list (assq-ref alist 'text)))) (let ((up (lambda () (loop inner-forms (cons (cons level (reverse text)) acc) #f)))) (if (null? inner-forms) (up) (let ((inner-form (car inner-forms))) (cond ((quoted? 'comment inner-form) => (lambda (inner-alist) (let ((new-level (assq-ref inner-alist 'leading-semicolons))) (if (= new-level level) (cloop (cdr inner-forms) level (cons (assq-ref inner-alist 'text) text)) (up))))) (else (up))))))))) (else (loop (cdr forms) (cons form acc) #f))))))) ;;; script entry point (define main read-scheme-source) ;;; read-scheme-source ends here