diff options
-rw-r--r-- | byterun/Makefile | 2 | ||||
-rw-r--r-- | stdlib/format.ml | 100 | ||||
-rw-r--r-- | stdlib/format.mli | 13 | ||||
-rw-r--r-- | stdlib/pervasives.mli | 41 | ||||
-rw-r--r-- | stdlib/scanf.mli | 78 |
5 files changed, 163 insertions, 71 deletions
diff --git a/byterun/Makefile b/byterun/Makefile index 316f69e5c6..0299515fb9 100644 --- a/byterun/Makefile +++ b/byterun/Makefile @@ -15,7 +15,7 @@ include Makefile.common -CFLAGS=-DCAML_NAME_SPACE -O $(BYTECCCOMPOPTS) $(IFLEXDIR) +CFLAGS=-DCAML_NAME_SPACE -O $(BYTECCCOMPOPTS) -fPIC $(IFLEXDIR) DFLAGS=-DCAML_NAME_SPACE -g -DDEBUG $(BYTECCCOMPOPTS) $(IFLEXDIR) OBJS=$(COMMONOBJS) unix.o main.o diff --git a/stdlib/format.ml b/stdlib/format.ml index 28bb5f1c59..445e824206 100644 --- a/stdlib/format.ml +++ b/stdlib/format.ml @@ -43,7 +43,7 @@ type pp_token = | Pp_newline (* to force a newline inside a block *) | Pp_if_newline (* to do something only if this very line has been broken *) -| Pp_open_tag of string (* opening a tag name *) +| Pp_open_tag of tag (* opening a tag name *) | Pp_close_tag (* closing the most recently opened tag *) and tag = string @@ -147,13 +147,13 @@ type formatter = { (* Ellipsis string. *) mutable pp_ellipsis : string; (* Output function. *) - mutable pp_output_function : string -> int -> int -> unit; + mutable pp_out_string : string -> int -> int -> unit; (* Flushing function. *) - mutable pp_flush_function : unit -> unit; + mutable pp_out_flush : unit -> unit; (* Output of new lines. *) - mutable pp_output_newline : unit -> unit; + mutable pp_out_newline : unit -> unit; (* Output of indentation spaces. *) - mutable pp_output_spaces : int -> unit; + mutable pp_out_spaces : int -> unit; (* Are tags printed ? *) mutable pp_print_tags : bool; (* Are tags marked ? *) @@ -240,9 +240,9 @@ let pp_clear_queue state = let pp_infinity = 1000000010;; (* Output functions for the formatter. *) -let pp_output_string state s = state.pp_output_function s 0 (String.length s) -and pp_output_newline state = state.pp_output_newline () -and pp_display_blanks state n = state.pp_output_spaces n +let pp_output_string state s = state.pp_out_string s 0 (String.length s) +and pp_output_newline state = state.pp_out_newline () +and pp_output_spaces state n = state.pp_out_spaces n ;; (* To format a break, indenting a new line. *) @@ -254,7 +254,7 @@ let break_new_line state offset width = let real_indent = min state.pp_max_indent indent in state.pp_current_indent <- real_indent; state.pp_space_left <- state.pp_margin - state.pp_current_indent; - pp_display_blanks state state.pp_current_indent + pp_output_spaces state state.pp_current_indent ;; (* To force a line break inside a block: no offset is added. *) @@ -263,7 +263,7 @@ let break_line state width = break_new_line state 0 width;; (* To format a break that fits on the current line. *) let break_same_line state width = state.pp_space_left <- state.pp_space_left - width; - pp_display_blanks state width + pp_output_spaces state width ;; (* To indent no more than pp_max_indent, if one tries to open a block @@ -675,9 +675,9 @@ and pp_open_box state indent = pp_open_box_gen state indent Pp_box;; (* Print a new line after printing all queued text (same for print_flush but without a newline). *) let pp_print_newline state () = - pp_flush_queue state true; state.pp_flush_function () + pp_flush_queue state true; state.pp_out_flush () and pp_print_flush state () = - pp_flush_queue state false; state.pp_flush_function ();; + pp_flush_queue state false; state.pp_out_flush ();; (* To get a newline when one does not want to close the current block. *) let pp_force_newline state () = @@ -808,42 +808,70 @@ let pp_set_margin state n = let pp_get_margin state () = state.pp_margin;; +type formatter_out_functions = { + out_string : string -> int -> int -> unit; + out_flush : unit -> unit; + out_newline : unit -> unit; + out_spaces : int -> unit; +} +;; + +let pp_set_formatter_out_functions state { + out_string = f; + out_flush = g; + out_newline = h; + out_spaces = i; + } = + state.pp_out_string <- f; + state.pp_out_flush <- g; + state.pp_out_newline <- h; + state.pp_out_spaces <- i; +;; + +let pp_get_formatter_out_functions state () = { + out_string = state.pp_out_string; + out_flush = state.pp_out_flush; + out_newline = state.pp_out_newline; + out_spaces = state.pp_out_spaces; +} +;; + let pp_set_formatter_output_functions state f g = - state.pp_output_function <- f; state.pp_flush_function <- g;; + state.pp_out_string <- f; state.pp_out_flush <- g;; let pp_get_formatter_output_functions state () = - (state.pp_output_function, state.pp_flush_function) + (state.pp_out_string, state.pp_out_flush) ;; let pp_set_all_formatter_output_functions state ~out:f ~flush:g ~newline:h ~spaces:i = pp_set_formatter_output_functions state f g; - state.pp_output_newline <- h; - state.pp_output_spaces <- i; + state.pp_out_newline <- h; + state.pp_out_spaces <- i; ;; let pp_get_all_formatter_output_functions state () = - (state.pp_output_function, state.pp_flush_function, - state.pp_output_newline, state.pp_output_spaces) + (state.pp_out_string, state.pp_out_flush, + state.pp_out_newline, state.pp_out_spaces) ;; (* Default function to output new lines. *) -let display_newline state () = state.pp_output_function "\n" 0 1;; +let display_newline state () = state.pp_out_string "\n" 0 1;; (* Default function to output spaces. *) let blank_line = String.make 80 ' ';; let rec display_blanks state n = if n > 0 then - if n <= 80 then state.pp_output_function blank_line 0 n else + if n <= 80 then state.pp_out_string blank_line 0 n else begin - state.pp_output_function blank_line 0 80; + state.pp_out_string blank_line 0 80; display_blanks state (n - 80) end ;; let pp_set_formatter_out_channel state os = - state.pp_output_function <- output os; - state.pp_flush_function <- (fun () -> flush os); - state.pp_output_newline <- display_newline state; - state.pp_output_spaces <- display_blanks state; + state.pp_out_string <- output os; + state.pp_out_flush <- (fun () -> flush os); + state.pp_out_newline <- display_newline state; + state.pp_out_spaces <- display_blanks state; ;; (************************************************************** @@ -855,8 +883,8 @@ let pp_set_formatter_out_channel state os = let default_pp_mark_open_tag s = "<" ^ s ^ ">";; let default_pp_mark_close_tag s = "</" ^ s ^ ">";; -let default_pp_print_open_tag _ = ();; -let default_pp_print_close_tag = default_pp_print_open_tag;; +let default_pp_print_open_tag = ignore;; +let default_pp_print_close_tag = ignore;; let pp_make_formatter f g h i = (* The initial state of the formatter contains a dummy box. *) @@ -883,10 +911,10 @@ let pp_make_formatter f g h i = pp_curr_depth = 1; pp_max_boxes = max_int; pp_ellipsis = "."; - pp_output_function = f; - pp_flush_function = g; - pp_output_newline = h; - pp_output_spaces = i; + pp_out_string = f; + pp_out_flush = g; + pp_out_newline = h; + pp_out_spaces = i; pp_print_tags = false; pp_mark_tags = false; pp_mark_open_tag = default_pp_mark_open_tag; @@ -900,8 +928,8 @@ let pp_make_formatter f g h i = (* Make a formatter with default functions to output spaces and new lines. *) let make_formatter output flush = let ppf = pp_make_formatter output flush ignore ignore in - ppf.pp_output_newline <- display_newline ppf; - ppf.pp_output_spaces <- display_blanks ppf; + ppf.pp_out_newline <- display_newline ppf; + ppf.pp_out_spaces <- display_blanks ppf; ppf ;; @@ -979,6 +1007,11 @@ and get_ellipsis_text = pp_get_ellipsis_text std_formatter and set_formatter_out_channel = pp_set_formatter_out_channel std_formatter +and set_formatter_out_functions = + pp_set_formatter_out_functions std_formatter +and get_formatter_out_functions = + pp_get_formatter_out_functions std_formatter + and set_formatter_output_functions = pp_set_formatter_output_functions std_formatter and get_formatter_output_functions = @@ -1347,5 +1380,6 @@ let bprintf b = (* Deprecated alias for ksprintf. *) let kprintf = ksprintf;; +(* Output everything left in the pretty printer queue at end of execution. *) at_exit print_flush ;; diff --git a/stdlib/format.mli b/stdlib/format.mli index 4831fe0202..9fcb8f08ce 100644 --- a/stdlib/format.mli +++ b/stdlib/format.mli @@ -282,7 +282,7 @@ type tag = string;; those strings is considered as zero for line breaking). Thus, tag handling is in some sense transparent to pretty-printing - and does not interfere with usual pretty-printing. Hence, a single + and does not interfere with usual indentation. Hence, a single pretty printing routine can output both simple ``verbatim'' material or richer decorated output depending on the treatment of tags. By default, tags are not active, hence the output is not @@ -367,6 +367,17 @@ val get_formatter_output_functions : (** {6:meaning Changing the meaning of standard formatter pretty printing} *) +type formatter_out_functions = { + out_string : string -> int -> int -> unit; + out_flush : unit -> unit; + out_newline : unit -> unit; + out_spaces : int -> unit; +} +;; + +val set_formatter_out_functions: formatter_out_functions -> unit;; +val get_formatter_out_functions: unit -> formatter_out_functions;; + (** The [Format] module is versatile enough to let you completely redefine the meaning of pretty printing: you may provide your own functions to define how to handle indentation, line breaking, and even printing of all the diff --git a/stdlib/pervasives.mli b/stdlib/pervasives.mli index bf19b2aeac..1d8fd70e3e 100644 --- a/stdlib/pervasives.mli +++ b/stdlib/pervasives.mli @@ -866,9 +866,33 @@ external decr : int ref -> unit = "%decr" (** {6 Operations on format strings} *) -(** Format strings are used to read and print data using formatted input - functions in module {!Scanf} and formatted output in modules {!Printf} and - {!Format}. *) +(** Format strings are character strings with special lexical conventions + that defines the functionality of formatted input/output functions. Format + strings are used to read data with formatted input functions from module + {!Scanf} and to print data with formatted output functions from modules + {!Printf} and {!Format}. + + Format strings are made of three kinds of entities: + - {e conversions specifications}, introduced by the special character ['%'] + followed by one or more characters specifying what kind of argument to + read or print, + - {e formatting indications}, introduced by the special character ['@'] + followed by one or more characters specifying how to read or print the + argument, + - {e plain characters} that are regular characters with usual lexical + conventions. Plain characters specify string literals to be read in the + input or printed in the output. + + There is an additional lexical rule to escape the special characters in + format strings: if a special character follows a ['%'] character, it is + treated as a plain character. In other words, ["%%"] is considered as a + plain ['%'] and ["%@"] as a plain ['@']. + + For more information about conversion indications and formatting + indications available, read the documentation of modules {!Scanf}, + {!Printf} and {!Format}. + +*) (** Format strings have a general and highly polymorphic type [('a, 'b, 'c, 'd, 'e, 'f) format6]. Type [format6] is built in. @@ -883,7 +907,7 @@ external decr : int ref -> unit = "%decr" ['d] is the result type for the [scanf]-style functions, ['e] is the type of the receiver function for the [scanf]-style functions, ['f] is the result type for the [printf]-style function. - *) +*) type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6 type ('a, 'b, 'c) format = ('a, 'b, 'c, 'c) format4 @@ -895,14 +919,17 @@ external format_of_string : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity" (** [format_of_string s] returns a format string read from the string - literal [s]. *) + literal [s]. + Note: [format_of_string] can not convert a string argument that is not a + literal. If you need this functionality, use the more general + {!Scanf.format_from_string} function. *) val ( ^^ ) : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> ('f, 'b, 'c, 'e, 'g, 'h) format6 -> ('a, 'b, 'c, 'd, 'g, 'h) format6 -(** [f1 ^^ f2] catenates formats [f1] and [f2]. The result is a format - that accepts arguments from [f1], then arguments from [f2]. *) +(** [f1 ^^ f2] catenates format strings [f1] and [f2]. The result is a + format string that accepts arguments from [f1], then arguments from [f2]. *) (** {6 Program termination} *) diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli index c147f7a04b..bb68d40fc6 100644 --- a/stdlib/scanf.mli +++ b/stdlib/scanf.mli @@ -45,7 +45,8 @@ material with module {!Printf} or {!Format}), - [f] is a function that has as many arguments as the number of values to - read in the input. *) + read in the input. +*) (** {7 A simple example} *) @@ -62,7 +63,8 @@ then [bscanf Scanning.stdin "%d" f] reads an integer [n] from the standard input and returns [f n] (that is [n + 1]). Thus, if we evaluate [bscanf stdin "%d" f], and then enter [41] at the - keyboard, we get [42] as the final result. *) + keyboard, we get [42] as the final result. +*) (** {7 Formatted input as a functional feature} *) @@ -75,8 +77,9 @@ useful additions to easily define complex tokens; as expected within a functional programming language, the formatted input functions also support polymorphism, in particular arbitrary interaction with - polymorphic user-defined scanners. Furthermore, the OCaml formatted input - facility is fully type-checked at compile time. *) + polymorphic user-defined scanners. Furthermore, the OCaml formatted input + facility is fully type-checked at compile time. +*) (** {6 Formatted input channel} *) @@ -101,7 +104,8 @@ type scanbuf = in_channel;; Note: a scanning action may often require to examine one character in advance; when this ``lookahead'' character does not belong to the token read, it is stored back in the scanning buffer and becomes the next - character yet to be read. *) + character yet to be read. +*) val stdin : in_channel;; (** The standard input notion for the [Scanf] module. @@ -154,7 +158,8 @@ val from_string : string -> in_channel;; (** [Scanning.from_string s] returns a formatted input channel which reads from the given string. Reading starts from the first character in the string. - The end-of-input condition is set when the end of the string is reached. *) + The end-of-input condition is set when the end of the string is reached. +*) val from_function : (unit -> char) -> in_channel;; (** [Scanning.from_function f] returns a formatted input channel with the @@ -163,20 +168,24 @@ val from_function : (unit -> char) -> in_channel;; When scanning needs one more character, the given function is called. When the function has no more character to provide, it {e must} signal an - end-of-input condition by raising the exception [End_of_file]. *) + end-of-input condition by raising the exception [End_of_file]. +*) val from_channel : Pervasives.in_channel -> in_channel;; (** [Scanning.from_channel ic] returns a formatted input channel which reads from the regular input channel [ic] argument, starting at the current - reading position. *) + reading position. +*) val end_of_input : in_channel -> bool;; (** [Scanning.end_of_input ic] tests the end-of-input condition of the given - formatted input channel. *) + formatted input channel. +*) val beginning_of_input : in_channel -> bool;; (** [Scanning.beginning_of_input ic] tests the beginning of input condition of - the given formatted input channel. *) + the given formatted input channel. +*) val name_of_input : in_channel -> string;; (** [Scanning.name_of_input ic] returns the name of the character source @@ -186,7 +195,8 @@ val name_of_input : in_channel -> string;; val stdib : in_channel;; (** A deprecated alias for [Scanning.stdin], the scanning buffer reading from - [Pervasives.stdin]. *) + [Pervasives.stdin]. +*) end;; @@ -216,8 +226,9 @@ type ('a, 'b, 'c, 'd) scanner = *) exception Scan_failure of string;; -(** The exception that formatted input functions raise when the input cannot be - read according to the given format. *) +(** The exception that formatted input functions raise when the input cannot + be read according to the given format. +*) (** {6 The general formatted input function} *) @@ -231,18 +242,21 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; [Scanf.sscanf "x= 1" "%s = %i" f] returns [2]. Arguments [r1] to [rN] are user-defined input functions that read the - argument corresponding to a [%r] conversion. *) + argument corresponding to the [%r] conversions specified in the format + string. +*) (** {6 Format string description} *) -(** The format is a character string which contains three types of +(** The format string is a character string which contains three types of objects: - plain characters, which are simply matched with the characters of the input (with a special case for space and line feed, see {!Scanf.space}), - conversion specifications, each of which causes reading and conversion of one argument for the function [f] (see {!Scanf.conversion}), - scanning indications to specify boundaries of tokens - (see scanning {!Scanf.indication}). *) + (see scanning {!Scanf.indication}). +*) (** {7:space The space character in format strings} *) @@ -261,7 +275,8 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; also matches no amount of whitespace at all; hence, the call [bscanf ib "Price = %d $" (fun p -> p)] succeeds and returns [1] when reading an input with various whitespace in it, such as [Price = 1 $], - [Price = 1 $], or even [Price=1$]. *) + [Price = 1 $], or even [Price=1$]. +*) (** {7:conversion Conversion specifications in format strings} *) @@ -385,7 +400,8 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; analysis and parsing. If it appears not expressive enough for your needs, several alternative exists: regular expressions (module [Str]), stream parsers, [ocamllex]-generated lexers, - [ocamlyacc]-generated parsers. *) + [ocamlyacc]-generated parsers. +*) (** {7:indication Scanning indications in format strings} *) @@ -401,10 +417,10 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; Note: - - As usual in format strings, [%] characters must be escaped using [%%] - and [%\@] is equivalent to [\@]; this rule still holds within range - specifications and scanning indications. - For instance, ["%s@%%"] reads a string up to the next [%] character. + - As usual in format strings, [%] and [\@] characters must be escaped + using [%%] and [%\@]; this rule still holds within range specifications + and scanning indications. + For instance, ["%s@%%"] reads a string up to the next [%] character. - The scanning indications introduce slight differences in the syntax of [Scanf] format strings, compared to those used for the [Printf] module. However, the scanning indications are similar to those used in @@ -412,7 +428,8 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; by [!Scanf.bscanf], it is wise to use printing functions from the [Format] module (or, if you need to use functions from [Printf], banish or carefully double check the format strings that contain ['\@'] - characters). *) + characters). +*) (** {7 Exceptions during scanning} *) @@ -433,7 +450,7 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; - as a consequence, scanning a [%s] conversion never raises exception [End_of_file]: if the end of input is reached the conversion succeeds and simply returns the characters read so far, or [""] if none were ever read. - *) +*) (** {6 Specialised formatted input functions} *) @@ -448,14 +465,16 @@ val fscanf : Pervasives.in_channel -> ('a, 'b, 'c, 'd) scanner;; position, and so on). As a consequence, never mix direct low level reading and high level - scanning from the same regular input channel. *) + scanning from the same regular input channel. +*) val sscanf : string -> ('a, 'b, 'c, 'd) scanner;; (** Same as {!Scanf.bscanf}, but reads from the given string. *) val scanf : ('a, 'b, 'c, 'd) scanner;; (** Same as {!Scanf.bscanf}, but reads from the predefined formatted input - channel {!Scanf.Scanning.stdin} that is connected to [Pervasives.stdin]. *) + channel {!Scanf.Scanning.stdin} that is connected to [Pervasives.stdin]. +*) val kscanf : Scanning.in_channel -> (Scanning.in_channel -> exn -> 'd) -> @@ -464,7 +483,8 @@ val kscanf : [ef] that is called in case of error: if the scanning process or some conversion fails, the scanning function aborts and calls the error handling function [ef] with the formatted input channel and the - exception that aborted the scanning process as arguments. *) + exception that aborted the scanning process as arguments. +*) (** {6 Reading format strings from input} *) @@ -496,10 +516,10 @@ val format_from_string : @since 3.10.0 *) -val unescaped : string -> string +val unescaped : string -> string;; (** Return a copy of the argument with escape sequences, following the lexical conventions of OCaml, replaced by their corresponding - special characters. If there is no escape sequence in the + special characters. If there is no escape sequence in the argument, still return a copy, contrary to String.escaped. @since 4.00.0 *) |