summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Gran <spk121@yahoo.com>2010-06-05 14:19:37 -0700
committerMichael Gran <spk121@yahoo.com>2010-06-05 14:28:07 -0700
commit62abf139798af3f35ca3c89200389dc75d51a159 (patch)
treee4936cbbf31ba4edb98a10a23f3620ef7de205d2
parent44c705d173d52cb485d761a51425b4d8e310a67a (diff)
downloadguile-mike-port-encodings.tar.gz
open-file should handle binary mode and coding declarationsmike-port-encodings
The open-file port should use the binary-friendly ISO-8859-1 encoding when a file is opened using mode "b". Also, it should honor a "coding:" declaration at the top of a file when reading files where it is present. * libguile/fports.c (scm_open_file): modified * test-suite/tests/ports.test: more tests for open-file * doc/ref/api-io.texi (File Ports): more documentation for open-file
-rw-r--r--doc/ref/api-io.texi14
-rw-r--r--libguile/fports.c33
-rw-r--r--test-suite/tests/ports.test55
3 files changed, 96 insertions, 6 deletions
diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi
index f09ecfbb4..207a07fe7 100644
--- a/doc/ref/api-io.texi
+++ b/doc/ref/api-io.texi
@@ -130,8 +130,8 @@ this fluid otherwise.
@deffn {Scheme Procedure} port-encoding port
@deffnx {C Function} scm_port_encoding
-Returns, as a string, the character encoding that @var{port} uses to
-interpret its input and output.
+Returns, as a string, the character encoding that @var{port} uses to interpret
+its input and output. The value @code{#f} is equivalent to @code{"ISO-8859-1"}.
@end deffn
@deffn {Scheme Procedure} set-port-conversion-strategy! port sym
@@ -856,11 +856,21 @@ systems, but has no effect on Unix-like systems.
(For reference, Guile leaves text versus binary up to the C library,
@code{b} here just adds @code{O_BINARY} to the underlying @code{open}
call, when that flag is available.)
+
+Also, open the file using the binary-compatible character encoding
+"ISO-8859-1", ignoring any coding declaration or port encoding.
@end table
If a file cannot be opened with the access
requested, @code{open-file} throws an exception.
+When the file is opened, this procedure will scan for a coding
+declaration (@pxref{Character Encoding of Source Files}). If present
+will use that encoding for interpreting the file. Otherwise, the
+port's encoding will be used. To supress this behavior, open
+the file in binary mode and then set the port encoding explicitly
+using @code{set-port-encoding!}.
+
In theory we could create read/write ports which were buffered
in one direction only. However this isn't included in the
current interfaces.
diff --git a/libguile/fports.c b/libguile/fports.c
index 04f3815d0..4a24dc63f 100644
--- a/libguile/fports.c
+++ b/libguile/fports.c
@@ -315,7 +315,7 @@ fport_canonicalize_filename (SCM filename)
* Return the new port.
*/
SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
- (SCM filename, SCM mode),
+ (SCM filename, SCM mode),
"Open the file whose name is @var{filename}, and return a port\n"
"representing that file. The attributes of the port are\n"
"determined by the @var{mode} string. The way in which this is\n"
@@ -336,7 +336,10 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
"The following additional characters can be appended:\n"
"@table @samp\n"
"@item b\n"
- "Open the underlying file in binary mode, if supported by the operating system. "
+ "Open the underlying file in binary mode, if supported by the system.\n"
+ "Also, open the file using the binary-compatible character encoding\n"
+ "\"ISO-8859-1\", ignoring the port's encoding and the coding declaration\n"
+ "at the top of the input file, if any.\n"
"@item +\n"
"Open the port for both input and output. E.g., @code{r+}: open\n"
"an existing file for both input and output.\n"
@@ -351,6 +354,11 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
"Add line-buffering to the port. The port output buffer will be\n"
"automatically flushed whenever a newline character is written.\n"
"@end table\n"
+ "When the file is opened, this procedure will scan for a coding\n"
+ "declaration@pxref{Character Encoding of Source Files}. If present\n"
+ "will use that encoding for interpreting the file. Otherwise, the\n"
+ "port's encoding will be used.\n"
+ "\n"
"In theory we could create read/write ports which were buffered\n"
"in one direction only. However this isn't included in the\n"
"current interfaces. If a file cannot be opened with the access\n"
@@ -358,7 +366,7 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
#define FUNC_NAME s_scm_open_file
{
SCM port;
- int fdes, flags = 0;
+ int fdes, flags = 0, use_encoding = 1;
unsigned int retries;
char *file, *md, *ptr;
@@ -393,6 +401,7 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR;
break;
case 'b':
+ use_encoding = 0;
#if defined (O_BINARY)
flags |= O_BINARY;
#endif
@@ -426,9 +435,27 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
}
}
+ /* Create a port from this file descriptor. The port's encoding is initially
+ %default-port-encoding. */
port = scm_i_fdes_to_port (fdes, scm_i_mode_bits (mode),
fport_canonicalize_filename (filename));
+ if (use_encoding)
+ {
+ /* If this file has a coding declaration, use that as the port
+ encoding. */
+ if (SCM_INPUT_PORT_P (port))
+ {
+ char *enc = scm_i_scan_for_encoding (port);
+ if (enc != NULL)
+ scm_i_set_port_encoding_x (port, enc);
+ }
+ }
+ else
+ /* If this is a binary file, use the binary-friendly ISO-8859-1
+ encoding. */
+ scm_i_set_port_encoding_x (port, NULL);
+
scm_dynwind_end ();
return port;
diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test
index 1d8bd5052..c0e0cd700 100644
--- a/test-suite/tests/ports.test
+++ b/test-suite/tests/ports.test
@@ -21,7 +21,8 @@
:use-module (test-suite lib)
:use-module (test-suite guile-test)
:use-module (ice-9 popen)
- :use-module (ice-9 rdelim))
+ :use-module (ice-9 rdelim)
+ :use-module (rnrs bytevector))
(define (display-line . args)
(for-each display args)
@@ -182,6 +183,58 @@
(string=? line test-string)))
(delete-file filename))
+;;; binary mode ignores port encoding
+(with-fluids ((%default-port-encoding "UTF-8"))
+ (let* ((filename (test-file))
+ (port (open-file filename "w"))
+ (test-string "一二三")
+ (binary-test-string
+ (apply string
+ (map integer->char
+ (uniform-vector->list (string->utf8 test-string))))))
+ (write-line test-string port)
+ (close-port port)
+ (let* ((in-port (open-file filename "rb"))
+ (line (read-line in-port)))
+ (close-port in-port)
+ (pass-if "file: binary mode ignores port encoding"
+ (string=? line binary-test-string)))))
+
+;;; binary mode ignores file coding declaration
+(with-fluids ((%default-port-encoding "UTF-8"))
+ (let* ((filename (test-file))
+ (port (open-file filename "w"))
+ (test-string "一二三")
+ (binary-test-string
+ (apply string
+ (map integer->char
+ (uniform-vector->list (string->utf8 test-string))))))
+ (write-line ";; coding: utf-8" port)
+ (write-line test-string port)
+ (close-port port)
+ (let* ((in-port (open-file filename "rb"))
+ (line1 (read-line in-port))
+ (line2 (read-line in-port)))
+ (close-port in-port)
+ (pass-if "file: binary mode ignores file coding declaration"
+ (string=? line2 binary-test-string)))))
+
+;; open-file honors file coding declarations
+(with-fluids ((%default-port-encoding "UTF-8"))
+ (let* ((filename (test-file))
+ (port (open-output-file filename))
+ (test-string "€100"))
+ (set-port-encoding! port "ISO-8859-15")
+ (write-line ";; coding: iso-8859-15" port)
+ (write-line test-string port)
+ (close-port port)
+ (let* ((in-port (open-input-file filename))
+ (line1 (read-line in-port))
+ (line2 (read-line in-port)))
+ (close-port in-port)
+ (pass-if "file: open-file honors coding declarations"
+ (string=? line2 test-string)))))
+
;;; ungetting characters and strings.
(with-input-from-string "walk on the moon\nmoon"
(lambda ()