summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt12
-rw-r--r--Makefile.in1
-rw-r--r--charconv.c214
-rw-r--r--charconv.h44
-rw-r--r--cmakeconfig.h.in6
-rw-r--r--config.h.in6
-rwxr-xr-xconfigure10
-rw-r--r--configure.ac4
-rw-r--r--fmtutils.c266
-rw-r--r--fmtutils.h2
-rw-r--r--pcap-int.h45
-rw-r--r--pcap-npf.c18
-rw-r--r--pcap-rpcap.c17
-rw-r--r--pcap.3pcap.in46
-rw-r--r--pcap.c203
-rw-r--r--pcap/pcap.h20
-rw-r--r--pcap_init.3pcap89
-rw-r--r--pcap_lookupdev.3pcap8
-rw-r--r--rpcapd/CMakeLists.txt1
-rw-r--r--rpcapd/rpcapd.c10
-rw-r--r--savefile.c109
-rw-r--r--sf-pcap.c4
-rw-r--r--sockutils.c4
23 files changed, 1080 insertions, 59 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb531994..c6f5f1cb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -427,9 +427,9 @@ main(void)
endif(NOT HAVE_GNU_STRERROR_R)
else(HAVE_STRERROR_R)
#
- # We don't have strerror_r; do we have strerror_s?
+ # We don't have strerror_r; do we have _wcserror_s?
#
- check_function_exists(strerror_s HAVE_STRERROR_S)
+ check_function_exists(_wcserror_s HAVE__WCSERROR_S)
endif(HAVE_STRERROR_R)
#
@@ -1027,11 +1027,14 @@ set(PROJECT_SOURCE_LIST_C
if(WIN32)
#
+ # We add the character set conversion routines; they're Windows-only
+ # for now.
+ #
# We assume we don't have asprintf(), and provide an implementation
# that uses _vscprintf() to determine how big the string needs to be.
#
set(PROJECT_SOURCE_LIST_C ${PROJECT_SOURCE_LIST_C}
- missing/win_asprintf.c)
+ charconv.c missing/win_asprintf.c)
else()
if(NOT HAVE_ASPRINTF)
set(PROJECT_SOURCE_LIST_C ${PROJECT_SOURCE_LIST_C} missing/asprintf.c)
@@ -1204,7 +1207,7 @@ message(STATUS "Packet capture mechanism type: ${PCAP_TYPE}")
if(WIN32)
if(PCAP_TYPE STREQUAL "npf")
#
- # Link with packet.dll before WinSock2.
+ # Link with packet.dll before Winsock2.
#
set(PCAP_LINK_LIBRARIES ${PACKET_LIBRARIES} ${PCAP_LINK_LIBRARIES})
elseif(PCAP_TYPE STREQUAL "null")
@@ -2486,6 +2489,7 @@ set(MAN3PCAP_NOEXPAND
pcap_get_required_select_timeout.3pcap
pcap_get_selectable_fd.3pcap
pcap_geterr.3pcap
+ pcap_init.3pcap
pcap_inject.3pcap
pcap_is_swapped.3pcap
pcap_lib_version.3pcap
diff --git a/Makefile.in b/Makefile.in
index 9826db20..a67d6943 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -203,6 +203,7 @@ MAN3PCAP_NOEXPAND = \
pcap_get_required_select_timeout.3pcap \
pcap_get_selectable_fd.3pcap \
pcap_geterr.3pcap \
+ pcap_init.3pcap \
pcap_inject.3pcap \
pcap_is_swapped.3pcap \
pcap_lib_version.3pcap \
diff --git a/charconv.c b/charconv.c
new file mode 100644
index 00000000..ac6ddf13
--- /dev/null
+++ b/charconv.c
@@ -0,0 +1,214 @@
+/* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
+/*
+ * Copyright (c) 1993, 1994, 1995, 1996, 1997
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _WIN32
+#include <windows.h>
+
+#include "charconv.h"
+
+wchar_t *
+cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags)
+{
+ int utf16le_len;
+ wchar_t *utf16le_string;
+
+ /*
+ * Map from the specified code page to UTF-16LE.
+ * First, find out how big a buffer we'll need.
+ */
+ utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
+ NULL, 0);
+ if (utf16le_len == 0) {
+ /*
+ * Error. Fail with EINVAL.
+ */
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ /*
+ * Now attempt to allocate a buffer for that.
+ */
+ utf16le_string = malloc(utf16le_len * sizeof (wchar_t));
+ if (utf16le_string == NULL) {
+ /*
+ * Not enough memory; assume errno has been
+ * set, and fail.
+ */
+ return (NULL);
+ }
+
+ /*
+ * Now convert.
+ */
+ utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
+ utf16le_string, utf16le_len);
+ if (utf16le_len == 0) {
+ /*
+ * Error. Fail with EINVAL.
+ * XXX - should this ever happen, given that
+ * we already ran the string through
+ * MultiByteToWideChar() to find out how big
+ * a buffer we needed?
+ */
+ free(utf16le_string);
+ errno = EINVAL;
+ return (NULL);
+ }
+ return (utf16le_string);
+}
+
+char *
+utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string)
+{
+ int cp_len;
+ char *cp_string;
+
+ /*
+ * Map from UTF-16LE to the specified code page.
+ * First, find out how big a buffer we'll need.
+ * We convert composite characters to precomposed characters,
+ * as that's what Windows expects.
+ */
+ cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
+ utf16le_string, -1, NULL, 0, NULL, NULL);
+ if (cp_len == 0) {
+ /*
+ * Error. Fail with EINVAL.
+ */
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ /*
+ * Now attempt to allocate a buffer for that.
+ */
+ cp_string = malloc(cp_len * sizeof (char));
+ if (cp_string == NULL) {
+ /*
+ * Not enough memory; assume errno has been
+ * set, and fail.
+ */
+ return (NULL);
+ }
+
+ /*
+ * Now convert.
+ */
+ cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
+ utf16le_string, -1, cp_string, cp_len, NULL, NULL);
+ if (cp_len == 0) {
+ /*
+ * Error. Fail with EINVAL.
+ * XXX - should this ever happen, given that
+ * we already ran the string through
+ * WideCharToMultiByte() to find out how big
+ * a buffer we needed?
+ */
+ free(cp_string);
+ errno = EINVAL;
+ return (NULL);
+ }
+ return (cp_string);
+}
+
+/*
+ * Convert an error message string from UTF-8 to the local code page, as
+ * best we can.
+ *
+ * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate
+ * if it doesn't fit.
+ */
+void
+utf_8_to_acp_truncated(char *errbuf)
+{
+ wchar_t *utf_16_errbuf;
+ int retval;
+ DWORD err;
+
+ /*
+ * Do this by converting to UTF-16LE and then to the local
+ * code page. That means we get to use Microsoft's
+ * conversion routines, rather than having to understand
+ * all the code pages ourselves, *and* that this routine
+ * can convert in place.
+ */
+
+ /*
+ * Map from UTF-8 to UTF-16LE.
+ * First, find out how big a buffer we'll need.
+ * Convert any invalid characters to REPLACEMENT CHARACTER.
+ */
+ utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0);
+ if (utf_16_errbuf == NULL) {
+ /*
+ * Error. Give up.
+ */
+ snprintf(errbuf, PCAP_ERRBUF_SIZE,
+ "Can't convert error string to the local code page");
+ return;
+ }
+
+ /*
+ * Now, convert that to the local code page.
+ * Use the current thread's code page. For unconvertable
+ * characters, let it pick the "best fit" character.
+ *
+ * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated()
+ * does if the buffer isn't big enough, but we don't want to have
+ * to handle all local code pages ourselves; doing so requires
+ * knowledge of all those code pages, including knowledge of how
+ * characters are formed in thoe code pages so that we can avoid
+ * cutting a multi-byte character into pieces.
+ *
+ * Converting to an un-truncated string using Windows APIs, and
+ * then copying to the buffer, still requires knowledge of how
+ * characters are formed in the target code page.
+ */
+ retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1,
+ errbuf, PCAP_ERRBUF_SIZE, NULL, NULL);
+ if (retval == 0) {
+ err = GetLastError();
+ free(utf_16_errbuf);
+ if (err == ERROR_INSUFFICIENT_BUFFER)
+ snprintf(errbuf, PCAP_ERRBUF_SIZE,
+ "The error string, in the local code page, didn't fit in the buffer");
+ else
+ snprintf(errbuf, PCAP_ERRBUF_SIZE,
+ "Can't convert error string to the local code page");
+ return;
+ }
+ free(utf_16_errbuf);
+}
+#endif
diff --git a/charconv.h b/charconv.h
new file mode 100644
index 00000000..a37d424b
--- /dev/null
+++ b/charconv.h
@@ -0,0 +1,44 @@
+/* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
+/*
+ * Copyright (c) 1993, 1994, 1995, 1996, 1997
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef charonv_h
+#define charonv_h
+
+#ifdef _WIN32
+extern wchar_t *cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags);
+extern char *utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string);
+extern void utf_8_to_acp_truncated(char *);
+#endif
+
+#endif
diff --git a/cmakeconfig.h.in b/cmakeconfig.h.in
index 73be7aa4..84fb42ae 100644
--- a/cmakeconfig.h.in
+++ b/cmakeconfig.h.in
@@ -180,9 +180,6 @@
/* Define to 1 if you have the `strerror' function. */
#cmakedefine HAVE_STRERROR 1
-/* Define to 1 if you have the `strerror_s' function. */
-#cmakedefine HAVE_STRERROR_S 1
-
/* Define to 1 if you have the <strings.h> header file. */
#cmakedefine HAVE_STRINGS_H 1
@@ -265,6 +262,9 @@
/* Define to 1 if you have the `vsyslog' function. */
#cmakedefine HAVE_VSYSLOG 1
+/* Define to 1 if you have the `_wcserror_s' function. */
+#cmakedefine HAVE__WCSERROR_S 1
+
/* Define to 1 if you have the `PacketIsLoopbackAdapter' function. */
#cmakedefine HAVE_PACKET_IS_LOOPBACK_ADAPTER 1
diff --git a/config.h.in b/config.h.in
index cbec4928..93be6269 100644
--- a/config.h.in
+++ b/config.h.in
@@ -192,9 +192,6 @@
/* Define to 1 if you have the `strerror' function. */
#undef HAVE_STRERROR
-/* Define to 1 if you have the `strerror_s' function. */
-#undef HAVE_STRERROR_S
-
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
@@ -274,6 +271,9 @@
/* Define to 1 if you have the `vsyslog' function. */
#undef HAVE_VSYSLOG
+/* Define to 1 if you have the `_wcserror_s' function. */
+#undef HAVE__WCSERROR_S
+
/* IPv6 */
#undef INET6
diff --git a/configure b/configure
index a5b1084c..1cb1df1f 100755
--- a/configure
+++ b/configure
@@ -5358,14 +5358,14 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
else
#
- # We don't have strerror_r; do we have strerror_s?
+ # We don't have strerror_r; do we have _wcserror_s?
#
- for ac_func in strerror_s
+ for ac_func in _wcserror_s
do :
- ac_fn_c_check_func "$LINENO" "strerror_s" "ac_cv_func_strerror_s"
-if test "x$ac_cv_func_strerror_s" = xyes; then :
+ ac_fn_c_check_func "$LINENO" "_wcserror_s" "ac_cv_func__wcserror_s"
+if test "x$ac_cv_func__wcserror_s" = xyes; then :
cat >>confdefs.h <<_ACEOF
-#define HAVE_STRERROR_S 1
+#define HAVE__WCSERROR_S 1
_ACEOF
fi
diff --git a/configure.ac b/configure.ac
index d63ccc7a..a396263c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -133,9 +133,9 @@ main(void)
],
[
#
- # We don't have strerror_r; do we have strerror_s?
+ # We don't have strerror_r; do we have _wcserror_s?
#
- AC_CHECK_FUNCS(strerror_s)
+ AC_CHECK_FUNCS(_wcserror_s)
])
#
diff --git a/fmtutils.c b/fmtutils.c
index a4f59c2b..8f6921fb 100644
--- a/fmtutils.c
+++ b/fmtutils.c
@@ -47,12 +47,219 @@
#include <string.h>
#include <errno.h>
-#include <pcap/pcap.h>
+#include "pcap-int.h"
#include "portability.h"
#include "fmtutils.h"
+#ifdef _WIN32
+#include "charconv.h"
+#endif
+
+/*
+ * Set the encoding.
+ */
+#ifdef _WIN32
+/*
+ * True if we shouold use UTF-8.
+ */
+static int use_utf_8;
+
+void
+pcap_fmt_set_encoding(unsigned int opts)
+{
+ if (opts == PCAP_CHAR_ENC_UTF_8)
+ use_utf_8 = 1;
+}
+#else
+void
+pcap_fmt_set_encoding(unsigned int opts _U_)
+{
+ /*
+ * Nothing to do here.
+ */
+}
+#endif
+
+#ifdef _WIN32
+/*
+ * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
+ * a buffer starting at the specified location and stopping if we go
+ * past the specified size. This will only put out complete UTF-8
+ * sequences.
+ *
+ * We do this ourselves because Microsoft doesn't offer a "convert and
+ * stop at a UTF-8 character boundary if we run out of space" routine.
+ */
+#define IS_LEADING_SURROGATE(c) \
+ ((c) >= 0xd800 && (c) < 0xdc00)
+#define IS_TRAILING_SURROGATE(c) \
+ ((c) >= 0xdc00 && (c) < 0xe000)
+#define SURROGATE_VALUE(leading, trailing) \
+ (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
+#define REPLACEMENT_CHARACTER 0x0FFFD
+
+static char *
+utf_16le_to_utf_8_truncated(wchar_t *utf_16, char *utf_8, size_t utf_8_len)
+{
+ wchar_t c, c2;
+ uint32_t uc;
+
+ if (utf_8_len == 0) {
+ /*
+ * Not even enough room for a trailing '\0'.
+ * Don't put anything into the buffer.
+ */
+ return (utf_8);
+ }
+
+ while ((c = *utf_16++) != '\0') {
+ if (IS_LEADING_SURROGATE(c)) {
+ /*
+ * Leading surrogate. Must be followed by
+ * a trailing surrogate.
+ */
+ c2 = *utf_16;
+ if (c2 == '\0') {
+ /*
+ * Oops, string ends with a lead
+ * surrogate. Try to drop in
+ * a REPLACEMENT CHARACTER, and
+ * don't move the string pointer,
+ * so on the next trip through
+ * the loop we grab the terminating
+ * '\0' and quit.
+ */
+ uc = REPLACEMENT_CHARACTER;
+ } else {
+ /*
+ * OK, we can consume this 2-octet
+ * value.
+ */
+ utf_16++;
+ if (IS_TRAILING_SURROGATE(c2)) {
+ /*
+ * Trailing surrogate.
+ * This calculation will,
+ * for c being a leading
+ * surrogate and c2 being
+ * a trailing surrogate,
+ * produce a value between
+ * 0x100000 and 0x10ffff,
+ * so it's always going to be
+ * a valid Unicode code point.
+ */
+ uc = SURROGATE_VALUE(c, c2);
+ } else {
+ /*
+ * Not a trailing surroage;
+ * try to drop in a
+ * REPLACEMENT CHARACTER.
+ */
+ uc = REPLACEMENT_CHARACTER;
+ }
+ }
+ } else {
+ /*
+ * Not a leading surrogate.
+ */
+ if (IS_TRAILING_SURROGATE(c)) {
+ /*
+ * Trailing surrogate without
+ * a preceding leading surrogate.
+ * Try to drop in a REPLACEMENT
+ * CHARACTER.
+ */
+ uc = REPLACEMENT_CHARACTER;
+ } else {
+ /*
+ * This is a valid BMP character;
+ * drop it in.
+ */
+ uc = c;
+ }
+ }
+
+ /*
+ * OK, uc is a valid Unicode character; how
+ * many bytes worth of UTF-8 does it require?
+ */
+ if (uc < 0x0080) {
+ /* 1 byte. */
+ if (utf_8_len < 2) {
+ /*
+ * Not enough room for that byte
+ * plus a trailing '\0'.
+ */
+ break;
+ }
+ *utf_8++ = (char)uc;
+ utf_8_len--;
+ } else if (uc < 0x0800) {
+ /* 2 bytes. */
+ if (utf_8_len < 3) {
+ /*
+ * Not enough room for those bytes
+ * plus a trailing '\0'.
+ */
+ break;
+ }
+ *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
+ *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+ utf_8_len -= 2;
+ } else if (uc < 0x010000) {
+ /* 3 bytes. */
+ if (utf_8_len < 4) {
+ /*
+ * Not enough room for those bytes
+ * plus a trailing '\0'.
+ */
+ break;
+ }
+ *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
+ *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
+ *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+ utf_8_len -= 3;
+ } else {
+ /* 4 bytes. */
+ if (utf_8_len < 5) {
+ /*
+ * Not enough room for those bytes
+ * plus a trailing '\0'.
+ */
+ break;
+ }
+ *utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
+ *utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
+ *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
+ *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+ utf_8_len -= 3;
+ }
+ }
+
+ /*
+ * OK, we have enough room for (at least) a trailing '\0'.
+ * (We started out with enough room, thanks to the test
+ * for a zero-length buffer at the beginning, and if
+ * there wasn't enough room for any character we wanted
+ * to put into the the buffer *plus* a trailing '\0',
+ * we'd have quit before putting it into the buffer,
+ * and thus would have left enough room for the trailing
+ * '\0'.)
+ *
+ * Drop it in.
+ */
+ *utf_8 = '\0';
+
+ /*
+ * Return a pointer to the terminating '\0', in case we
+ * want to drop something in after that.
+ */
+ return (utf_8);
+}
+#endif /* _WIN32 */
+
/*
* Generate an error message based on a format, arguments, and an
* errno, with a message for the errno after the formatted output.
@@ -89,18 +296,35 @@ pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
/*
* Now append the string for the error code.
*/
-#if defined(HAVE_STRERROR_S)
+#if defined(HAVE__WCSERROR_S)
/*
- * We have a Windows-style strerror_s().
+ * We have a Windows-style _wcserror_s().
+ * Generate a UTF-16LE error message.
*/
- errno_t err = strerror_s(p, errbuflen_remaining, errnum);
+ wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
+ errno_t err = _wcserror_s(utf_16_errbuf, sizeof (utf_16_errbuf), errnum);
if (err != 0) {
/*
* It doesn't appear to be documented anywhere obvious
- * what the error returns from strerror_s().
+ * what the error returns from _wcserror_s().
*/
snprintf(p, errbuflen_remaining, "Error %d", errnum);
+ return;
}
+
+ /*
+ * Now convert it from UTF-16LE to UTF-8, dropping it in the
+ * remaining space in the buffer, and truncating it - cleanly,
+ * on a UTF-8 character boundary - if it doesn't fit.
+ */
+ utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
+
+ /*
+ * Now, if we're not in UTF-8 mode, convert errbuf to the
+ * local code page.
+ */
+ if (!use_utf_8)
+ utf_8_to_acp_truncated(errbuf);
#elif defined(HAVE_GNU_STRERROR_R)
/*
* We have a GNU-style strerror_r(), which is *not* guaranteed to
@@ -136,7 +360,7 @@ pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
}
#else
/*
- * We have neither strerror_s() nor strerror_r(), so we're
+ * We have neither _wcserror_s() nor strerror_r(), so we're
* stuck with using pcap_strerror().
*/
snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
@@ -157,7 +381,8 @@ pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
char *p;
size_t errbuflen_remaining;
DWORD retval;
- char win32_errbuf[PCAP_ERRBUF_SIZE+1];
+ wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
+ size_t utf_8_len;
va_start(ap, fmt);
vsnprintf(errbuf, errbuflen, fmt, ap);
@@ -196,9 +421,9 @@ pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
* get the message translated if it's in a language they don't
* happen to understand.
*/
- retval = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
+ retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
- win32_errbuf, PCAP_ERRBUF_SIZE, NULL);
+ utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
if (retval == 0) {
/*
* Failed.
@@ -208,6 +433,27 @@ pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
return;
}
- snprintf(p, errbuflen_remaining, "%s (%lu)", win32_errbuf, errnum);
+ /*
+ * Now convert it from UTF-16LE to UTF-8.
+ */
+ p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
+
+ /*
+ * Now append the error number, if it fits.
+ */
+ utf_8_len = p - errbuf;
+ errbuflen_remaining -= utf_8_len;
+ if (utf_8_len == 0) {
+ /* The message was empty. */
+ snprintf(p, errbuflen_remaining, "(%lu)", errnum);
+ } else
+ snprintf(p, errbuflen_remaining, " (%lu)", errnum);
+
+ /*
+ * Now, if we're not in UTF-8 mode, convert errbuf to the
+ * local code page.
+ */
+ if (!use_utf_8)
+ utf_8_to_acp_truncated(errbuf);
}
#endif
diff --git a/fmtutils.h b/fmtutils.h
index 838948bc..ba0f66ca 100644
--- a/fmtutils.h
+++ b/fmtutils.h
@@ -40,6 +40,8 @@
extern "C" {
#endif
+void pcap_fmt_set_encoding(unsigned int);
+
void pcap_fmt_errmsg_for_errno(char *, size_t, int,
PCAP_FORMAT_STRING(const char *), ...) PCAP_PRINTFLIKE(4, 5);
diff --git a/pcap-int.h b/pcap-int.h
index 43c948fd..99ede45b 100644
--- a/pcap-int.h
+++ b/pcap-int.h
@@ -51,6 +51,33 @@
extern "C" {
#endif
+/*
+ * If pcap_new_api is set, we disable pcap_lookupdev(), because:
+ *
+ * it's not thread-safe, and is marked as deprecated, on all
+ * platforms;
+ *
+ * on Windows, it may return UTF-16LE strings, which the program
+ * might then pass to pcap_create() (or to pcap_open_live(), which
+ * then passes them to pcap_create()), requiring pcap_create() to
+ * check for UTF-16LE strings using a hack, and that hack 1)
+ * *cannot* be 100% reliable and 2) runs the risk of going past the
+ * end of the string.
+ *
+ * We keep it around in legacy mode for compatibility.
+ *
+ * We also disable the aforementioned hack in pcap_create().
+ */
+extern int pcap_new_api;
+
+/*
+ * If pcap_utf_8_mode is set, on Windows we treat strings as UTF-8.
+ *
+ * On UN*Xes, we assume all strings are and should be in UTF-8, regardless
+ * of the setting of this flag.
+ */
+extern int pcap_utf_8_mode;
+
#ifdef MSDOS
#include <fcntl.h>
#include <io.h>
@@ -260,6 +287,9 @@ struct pcap {
struct bpf_program fcode;
char errbuf[PCAP_ERRBUF_SIZE + 1];
+#ifdef _WIN32
+ char acp_errbuf[PCAP_ERRBUF_SIZE + 1]; /* buffer for local code page error strings */
+#endif
int dlt_count;
u_int *dlt_list;
int tstamp_type_count;
@@ -486,7 +516,8 @@ int add_addr_to_if(pcap_if_list_t *, const char *, bpf_u_int32,
#endif
/*
- * Internal interfaces for "pcap_open_offline()".
+ * Internal interfaces for "pcap_open_offline()" and other savefile
+ * I/O routines.
*
* "pcap_open_offline_common()" allocates and fills in a pcap_t, for use
* by pcap_open_offline routines.
@@ -497,10 +528,22 @@ int add_addr_to_if(pcap_if_list_t *, const char *, bpf_u_int32,
* "sf_cleanup()" closes the file handle associated with a pcap_t, if
* appropriate, and frees all data common to all modules for handling
* savefile types.
+ *
+ * "charset_fopen()", in UTF-8 mode on Windows, does an fopen() that
+ * treats the pathname as being in UTF-8, rather than the local
+ * code page, on Windows.
*/
pcap_t *pcap_open_offline_common(char *ebuf, size_t size);
bpf_u_int32 pcap_adjust_snapshot(bpf_u_int32 linktype, bpf_u_int32 snaplen);
void sf_cleanup(pcap_t *p);
+#ifdef _WIN32
+FILE *charset_fopen(const char *path, const char *mode);
+#else
+/*
+ * On UN*X, just use Boring Old fopen().
+ */
+#define charset_fopen(path, mode) fopen((path), (mode))
+#endif
/*
* Internal interfaces for doing user-mode filtering of packets and
diff --git a/pcap-npf.c b/pcap-npf.c
index 1c615c04..0cc4ac8a 100644
--- a/pcap-npf.c
+++ b/pcap-npf.c
@@ -937,7 +937,7 @@ pcap_activate_npf(pcap_t *p)
}
}
- /* Init WinSock */
+ /* Init Winsock if it hasn't already been initialized */
pcap_wsockinit();
pw->adapter = PacketOpenAdapter(p->opt.device);
@@ -1898,6 +1898,22 @@ pcap_lookupdev(char *errbuf)
DWORD dwVersion;
DWORD dwWindowsMajorVersion;
+ /*
+ * We disable this in "new API" mode, because 1) in WinPcap/Npcap,
+ * it may return UTF-16 strings, for backwards-compatibility
+ * reasons, and we're also disabling the hack to make that work,
+ * for not-going-past-the-end-of-a-string reasons, and 2) we
+ * want its behavior to be consistent.
+ *
+ * In addition, it's not thread-safe, so we've marked it as
+ * deprecated.
+ */
+ if (pcap_new_api) {
+ snprintf(errbuf, PCAP_ERRBUF_SIZE,
+ "pcap_lookupdev() is deprecated and is not supported in programs calling pcap_init()");
+ return (NULL);
+ }
+
/* disable MSVC's GetVersion() deprecated warning here */
DIAG_OFF_DEPRECATION
dwVersion = GetVersion(); /* get the OS version */
diff --git a/pcap-rpcap.c b/pcap-rpcap.c
index f7999fe0..836681a1 100644
--- a/pcap-rpcap.c
+++ b/pcap-rpcap.c
@@ -46,6 +46,10 @@
#include "rpcap-protocol.h"
#include "pcap-rpcap.h"
+#ifdef _WIN32
+#include "charconv.h" /* for utf_8_to_acp_truncated() */
+#endif
+
#ifdef HAVE_OPENSSL
#include "sslutils.h"
#endif
@@ -2161,7 +2165,7 @@ rpcap_setup_session(const char *source, struct pcap_rmtauth *auth,
}
/* Warning: this call can be the first one called by the user. */
- /* For this reason, we have to initialize the WinSock support. */
+ /* For this reason, we have to initialize the Winsock support. */
if (sock_init(errbuf, PCAP_ERRBUF_SIZE) == -1)
return -1;
@@ -2797,7 +2801,7 @@ SOCKET pcap_remoteact_accept_ex(const char *address, const char *port, const cha
hints.ai_socktype = SOCK_STREAM;
/* Warning: this call can be the first one called by the user. */
- /* For this reason, we have to initialize the WinSock support. */
+ /* For this reason, we have to initialize the Winsock support. */
if (sock_init(errbuf, PCAP_ERRBUF_SIZE) == -1)
return (SOCKET)-1;
@@ -3353,6 +3357,15 @@ static void rpcap_msg_err(SOCKET sockctrl, SSL *ssl, uint32 plen, char *remote_e
*/
remote_errbuf[PCAP_ERRBUF_SIZE - 1] = '\0';
+#ifdef _WIN32
+ /*
+ * If we're not in UTF-8 mode, convert it to the local
+ * code page.
+ */
+ if (!pcap_utf_8_mode)
+ utf_8_to_acp_truncated(remote_errbuf);
+#endif
+
/*
* Throw away the rest.
*/
diff --git a/pcap.3pcap.in b/pcap.3pcap.in
index cfe2288a..4dff262f 100644
--- a/pcap.3pcap.in
+++ b/pcap.3pcap.in
@@ -35,6 +35,52 @@ on the network, even those destined for other hosts, are accessible
through this mechanism.
It also supports saving captured packets to a ``savefile'', and reading
packets from a ``savefile''.
+.SS Initializing
+.BR pcap_init ()
+initializes the library. It takes an argument giving options;
+currently, the options are:
+.TP
+.B PCAP_CHAR_ENC_LOCAL
+Treat all strings supplied as arguments, and return all strings to the
+caller, as being in the local character encoding.
+.TP
+.B PCAP_CHAR_ENC_UTF_8
+Treat all strings supplied as arguments, and return all strings to the
+caller, as being in UTF-8.
+.PP
+On UNIX-like systems, the local character encoding is assumed to be
+UTF-8, so no character encoding transformations are done.
+.PP
+On Windows, the local character encoding is the local ANSI code page.
+.PP
+If
+.BR pcap_init ()
+is called, the deprecated
+.BR pcap_lookupdev ()
+routine always fails, so it should not be used, and, on Windows,
+.BR pcap_create ()
+does not attempt to handle UTF-16LE strings.
+.PP
+If
+.BR pcap_init ()
+is not called, strings are treated as being in the local ANSI code page
+on Windows,
+.BR pcap_lookupdev ()
+will succeed if there is a device on which to capture, and
+.BR pcap_create ()
+makes an attempt to check whether the string passed as an argument is a
+UTF-16LE string - note that this attempt is unsafe, as it may run past
+the end of the string - to handle
+.BR pcap_lookupdev ()
+returning a UTF-16LE string. Programs that don't call
+.BR pcap_init ()
+should, on Windows, call
+.BR pcap_wsockinit ()
+to initialize Winsock; this is not necessary if
+.BR pcap_init ()
+is called, as
+.BR pcap_init ()
+will initialize Winsock itself on Windows.
.SS Opening a capture handle for reading
To open a handle for a live capture, given the name of the network or
other interface on which the capture should be done, call
diff --git a/pcap.c b/pcap.c
index 1d684cc3..19670ab0 100644
--- a/pcap.c
+++ b/pcap.c
@@ -126,6 +126,23 @@ struct rtentry; /* declarations in <net/if.h> */
#ifdef _WIN32
/*
* DllMain(), required when built as a Windows DLL.
+ *
+ * To quote the WSAStartup() documentation:
+ *
+ * The WSAStartup function typically leads to protocol-specific helper
+ * DLLs being loaded. As a result, the WSAStartup function should not
+ * be called from the DllMain function in a application DLL. This can
+ * potentially cause deadlocks.
+ *
+ * and the WSACleanup() documentation:
+ *
+ * The WSACleanup function typically leads to protocol-specific helper
+ * DLLs being unloaded. As a result, the WSACleanup function should not
+ * be called from the DllMain function in a application DLL. This can
+ * potentially cause deadlocks.
+ *
+ * So we don't actually do anything here. pcap_init() should be called
+ * to initialize pcap on both UN*X and Windows.
*/
BOOL WINAPI DllMain(
HANDLE hinstDLL _U_,
@@ -137,42 +154,160 @@ BOOL WINAPI DllMain(
}
/*
- * Start WinSock.
- * Exported in case some applications using WinPcap/Npcap called it,
- * even though it wasn't exported.
+ * Start Winsock.
+ * Internal routine.
*/
-int
-wsockinit(void)
+static int
+internal_wsockinit(char *errbuf)
{
WORD wVersionRequested;
WSADATA wsaData;
static int err = -1;
static int done = 0;
+ int status;
if (done)
return (err);
- wVersionRequested = MAKEWORD( 1, 1);
- err = WSAStartup( wVersionRequested, &wsaData );
- atexit ((void(*)(void))WSACleanup);
+ /*
+ * Versions of Windows that don't support Winsock 2.2 are
+ * too old for us.
+ */
+ wVersionRequested = MAKEWORD(2, 2);
+ status = WSAStartup(wVersionRequested, &wsaData);
done = 1;
-
- if ( err != 0 )
- err = -1;
+ if (status != 0) {
+ if (errbuf != NULL) {
+ pcap_fmt_errmsg_for_win32_err(errbuf, PCAP_ERRBUF_SIZE,
+ status, "WSAStartup() failed");
+ }
+ return (err);
+ }
+ atexit ((void(*)(void))WSACleanup);
+ err = 0;
return (err);
}
/*
+ * Exported in case some applications using WinPcap/Npcap called it,
+ * even though it wasn't exported.
+ */
+int
+wsockinit(void)
+{
+ return (internal_wsockinit(NULL));
+}
+
+/*
* This is the exported function; new programs should call this.
+ * *Newer* programs should call pcap_init().
*/
int
pcap_wsockinit(void)
{
- return (wsockinit());
+ return (internal_wsockinit(NULL));
}
#endif /* _WIN32 */
/*
+ * Do whatever initialization is needed for libpcap.
+ *
+ * The argument specifies whether we use the local code page or UTF-8
+ * for strings; on UN*X, we just assume UTF-8 in places where the encoding
+ * would matter, whereas, on Windows, we use the local code page for
+ * PCAP_CHAR_ENC_LOCAL and UTF-8 for PCAP_CHAR_ENC_UTF_8.
+ *
+ * On Windows, we also disable the hack in pcap_create() to deal with
+ * being handed UTF-16 strings, because if the user calls this they're
+ * explicitly declaring that they will either be passing local code
+ * page strings or UTF-8 strings, so we don't need to allow UTF-16LE
+ * strings to be passed. For good measure, on Windows *and* UN*X,
+ * we disable pcap_lookupdev(), to prevent anybody from even
+ * *trying* to pass the result of pcap_lookupdev() - which might be
+ * UTF-16LE on Windows, for ugly compatibility reasons - to pcap_create()
+ * or pcap_open_live() or pcap_open().
+ *
+ * Returns 0 on success, -1 on error.
+ */
+int pcap_new_api; /* pcap_lookupdev() always fails */
+int pcap_utf_8_mode; /* Strings should be in UTF-8. */
+
+int
+pcap_init(unsigned int opts, char *errbuf)
+{
+ static int initialized;
+
+ /*
+ * Don't allow multiple calls that set different modes; that
+ * may mean a library is initializing pcap in one mode and
+ * a program using that library, or another library used by
+ * that program, is initializing it in another mode.
+ */
+ switch (opts) {
+
+ case PCAP_CHAR_ENC_LOCAL:
+ /* Leave "UTF-8 mode" off. */
+ if (initialized) {
+ if (pcap_utf_8_mode) {
+ snprintf(errbuf, PCAP_ERRBUF_SIZE,
+ "Multiple pcap_init calls with different character encodings");
+ return (-1);
+ }
+ }
+ break;
+
+ case PCAP_CHAR_ENC_UTF_8:
+ /* Turn on "UTF-8 mode". */
+ if (initialized) {
+ if (!pcap_utf_8_mode) {
+ snprintf(errbuf, PCAP_ERRBUF_SIZE,
+ "Multiple pcap_init calls with different character encodings");
+ return (-1);
+ }
+ }
+ pcap_utf_8_mode = 1;
+ break;
+
+ default:
+ snprintf(errbuf, PCAP_ERRBUF_SIZE, "Unknown options specified");
+ return (-1);
+ }
+
+ /*
+ * Turn the appropriate mode on for error messages; those routines
+ * are also used in rpcapd, which has no access to pcap's internal
+ * UTF-8 mode flag, so we have to call a routine to set its
+ * UTF-8 mode flag.
+ */
+ pcap_fmt_set_encoding(opts);
+
+ if (initialized) {
+ /*
+ * Nothing more to do; for example, on Windows, we've
+ * already initialized Winsock.
+ */
+ return (0);
+ }
+
+#ifdef _WIN32
+ /*
+ * Now set up Winsock.
+ */
+ if (internal_wsockinit(errbuf) == -1) {
+ /* Failed. */
+ return (-1);
+ }
+#endif
+
+ /*
+ * We're done.
+ */
+ initialized = 1;
+ pcap_new_api = 1;
+ return (0);
+}
+
+/*
* String containing the library version.
* Not explicitly exported via a header file - the right API to use
* is pcap_lib_version() - but some programs included it, so we
@@ -1365,6 +1500,22 @@ pcap_lookupdev(char *errbuf)
static char device[IF_NAMESIZE + 1];
char *ret;
+ /*
+ * We disable this in "new API" mode, because 1) in WinPcap/Npcap,
+ * it may return UTF-16 strings, for backwards-compatibility
+ * reasons, and we're also disabling the hack to make that work,
+ * for not-going-past-the-end-of-a-string reasons, and 2) we
+ * want its behavior to be consistent.
+ *
+ * In addition, it's not thread-safe, so we've marked it as
+ * deprecated.
+ */
+ if (pcap_new_api) {
+ snprintf(errbuf, PCAP_ERRBUF_SIZE,
+ "pcap_lookupdev() is deprecated and is not supported in programs calling pcap_init()");
+ return (NULL);
+ }
+
if (pcap_findalldevs(&alldevs, errbuf) == -1)
return (NULL);
@@ -2121,15 +2272,27 @@ pcap_create(const char *device, char *errbuf)
* so, convert it back to the local code page's
* extended ASCII.
*
- * XXX - you *cannot* reliably detect whether a
- * string is UTF-16LE or not; "a" could either
- * be a one-character ASCII string or the first
- * character of a UTF-16LE string. This particular
- * version of this heuristic dates back to WinPcap
- * 4.1.1; PacketOpenAdapter() does uses the same
- * heuristic, with the exact same vulnerability.
+ * We disable that check in "new API" mode, because:
+ *
+ * 1) You *cannot* reliably detect whether a
+ * string is UTF-16LE or not; "a" could either
+ * be a one-character ASCII string or the first
+ * character of a UTF-16LE string.
+ *
+ * 2) Doing that test can run past the end of
+ * the string, if it's a 1-character ASCII
+ * string
+ *
+ * This particular version of this heuristic dates
+ * back to WinPcap 4.1.1; PacketOpenAdapter() does
+ * uses the same heuristic, with the exact same
+ * vulnerability.
+ *
+ * That's why we disable this in "new API" mode.
+ * We keep it around in legacy mode for backwards
+ * compatibility.
*/
- if (device[0] != '\0' && device[1] == '\0') {
+ if (!pcap_new_api && device[0] != '\0' && device[1] == '\0') {
size_t length;
length = wcslen((wchar_t *)device);
diff --git a/pcap/pcap.h b/pcap/pcap.h
index 60f3dbef..c38a303c 100644
--- a/pcap/pcap.h
+++ b/pcap/pcap.h
@@ -365,6 +365,26 @@ typedef void (*pcap_handler)(u_char *, const struct pcap_pkthdr *,
#define PCAP_NETMASK_UNKNOWN 0xffffffff
/*
+ * Initialize pcap. If this isn't called, pcap is initialized to
+ * a mode source-compatible and binary-compatible with older versions
+ * that lack this routine.
+ */
+
+/*
+ * Initialization options.
+ * All bits not listed here are reserved for expansion.
+ *
+ * On UNIX-like systems, the local character encoding is assumed to be
+ * UTF-8, so no character encoding transformations are done.
+ *
+ * On Windows, the local character encoding is the local ANSI code page.
+ */
+#define PCAP_CHAR_ENC_LOCAL 0x00000000U /* strings are in the local character encoding */
+#define PCAP_CHAR_ENC_UTF_8 0x00000001U /* strings are in UTF-8 */
+
+PCAP_API int pcap_init(unsigned int, char *);
+
+/*
* We're deprecating pcap_lookupdev() for various reasons (not
* thread-safe, can behave weirdly with WinPcap). Callers
* should use pcap_findalldevs() and use the first device.
diff --git a/pcap_init.3pcap b/pcap_init.3pcap
new file mode 100644
index 00000000..05fbbd71
--- /dev/null
+++ b/pcap_init.3pcap
@@ -0,0 +1,89 @@
+.\" Copyright (c) 1994, 1996, 1997
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that: (1) source code distributions
+.\" retain the above copyright notice and this paragraph in its entirety, (2)
+.\" distributions including binary code include the above copyright notice and
+.\" this paragraph in its entirety in the documentation or other materials
+.\" provided with the distribution, and (3) all advertising materials mentioning
+.\" features or use of this software display the following acknowledgement:
+.\" ``This product includes software developed by the University of California,
+.\" Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
+.\" the University nor the names of its contributors may be used to endorse
+.\" or promote products derived from this software without specific prior
+.\" written permission.
+.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+.\" WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+.\"
+.TH PCAP_INIT 3PCAP "11 April 2020"
+.SH NAME
+pcap_init \- initialize the library
+.SH SYNOPSIS
+.nf
+.ft B
+#include <pcap/pcap.h>
+.ft
+.LP
+.nf
+.ft B
+char errbuf[PCAP_ERRBUF_SIZE];
+.ft
+.LP
+.ft B
+int pcap_init(unsigned int opts, char *errbuf);
+.ft
+.fi
+.SH DESCRIPTION
+.BR pcap_init ()
+is used to initialize the Packet Capture library.
+.I opts
+specifies options for the library;
+currently, the options are:
+.TP
+.B PCAP_CHAR_ENC_LOCAL
+Treat all strings supplied as arguments, and return all strings to the
+caller, as being in the local character encoding.
+.TP
+.B PCAP_CHAR_ENC_UTF_8
+Treat all strings supplied as arguments, and return all strings to the
+caller, as being in UTF-8.
+.PP
+On UNIX-like systems, the local character encoding is assumed to be
+UTF-8, so no character encoding transformations are done.
+.PP
+On Windows, the local character encoding is the local ANSI code page.
+.PP
+If
+.BR pcap_init ()
+is not called, strings are treated as being in the local ANSI code page
+on Windows,
+.BR pcap_lookupdev (3PCAP)
+will succeed if there is a device on which to capture, and
+.BR pcap_create (3PCAP)
+makes an attempt to check whether the string passed as an argument is a
+UTF-16LE string - note that this attempt is unsafe, as it may run past
+the end of the string - to handle
+.BR pcap_lookupdev ()
+returning a UTF-16LE string. Programs that don't call
+.BR pcap_init ()
+should, on Windows, call
+.BR pcap_wsockinit ()
+to initialize Winsock; this is not necessary if
+.BR pcap_init ()
+is called, as
+.BR pcap_init ()
+will initialize Winsock itself on Windows.
+.SH RETURN VALUE
+.BR pcap_init ()
+returns 0 on success and \-1 on failure.
+If \-1 is returned,
+.I errbuf
+is filled in with an appropriate error message.
+.I errbuf
+is assumed to be able to hold at least
+.B PCAP_ERRBUF_SIZE
+chars.
+.SH SEE ALSO
+.BR pcap (3PCAP)
diff --git a/pcap_lookupdev.3pcap b/pcap_lookupdev.3pcap
index a3807de6..57b76585 100644
--- a/pcap_lookupdev.3pcap
+++ b/pcap_lookupdev.3pcap
@@ -44,6 +44,11 @@ and, if the list it returns is not empty, use the first device in the
list. (If the list is empty, there are no devices on which capture is
possible.)
.LP
+.B If
+.BR pcap_init (3PCAP)
+.B has been called, this interface always returns
+.BR NULL .
+.LP
.BR pcap_lookupdev ()
returns a pointer to a string giving the name of a network device
suitable for use with
@@ -55,6 +60,9 @@ or with
and with
.BR pcap_lookupnet (3PCAP).
If there is an error,
+or if
+.BR pcap_init (3PCAP)
+has been called,
.B NULL
is returned and
.I errbuf
diff --git a/rpcapd/CMakeLists.txt b/rpcapd/CMakeLists.txt
index 6b4a7d9b..1ee14bbe 100644
--- a/rpcapd/CMakeLists.txt
+++ b/rpcapd/CMakeLists.txt
@@ -50,6 +50,7 @@ if(WIN32 OR ((CMAKE_USE_PTHREADS_INIT OR PTHREADS_FOUND) AND HAVE_CRYPT))
if(WIN32)
set(RPCAPD_EXTRA_SOURCES
win32-svc.c
+ ${pcap_SOURCE_DIR}/charconv.c
${pcap_SOURCE_DIR}/missing/getopt.c
rpcapd.rc)
include_directories(${pcap_SOURCE_DIR}/rpcapd ${pcap_SOURCE_DIR}/missing)
diff --git a/rpcapd/rpcapd.c b/rpcapd/rpcapd.c
index bfb6e6a0..64f91ead 100644
--- a/rpcapd/rpcapd.c
+++ b/rpcapd/rpcapd.c
@@ -325,6 +325,16 @@ int main(int argc, char *argv[])
}
#endif
+ //
+ // We want UTF-8 error messages.
+ //
+ if (pcap_init(PCAP_CHAR_ENC_UTF_8, errbuf) == -1)
+ {
+ rpcapd_log(LOGPRIO_ERROR, "%s", errbuf);
+ exit(-1);
+ }
+ pcap_fmt_set_encoding(PCAP_CHAR_ENC_UTF_8);
+
if (sock_init(errbuf, PCAP_ERRBUF_SIZE) == -1)
{
rpcapd_log(LOGPRIO_ERROR, "%s", errbuf);
diff --git a/savefile.c b/savefile.c
index aef9fb14..aa35bd3c 100644
--- a/savefile.c
+++ b/savefile.c
@@ -54,6 +54,7 @@
#include "sf-pcap.h"
#include "sf-pcapng.h"
#include "pcap-common.h"
+#include "charconv.h"
#ifdef _WIN32
/*
@@ -246,6 +247,102 @@ sf_cleanup(pcap_t *p)
pcap_freecode(&p->fcode);
}
+#ifdef _WIN32
+/*
+ * Wrapper for fopen() and _wfopen().
+ *
+ * If we're in UTF-8 mode, map the pathname from UTF-8 to UTF-16LE and
+ * call _wfopen().
+ *
+ * If we're not, just use fopen(); that'll treat it as being in the
+ * local code page.
+ */
+FILE *
+charset_fopen(const char *path, const char *mode)
+{
+ wchar_t *utf16_path;
+#define MAX_MODE_LEN 16
+ wchar_t utf16_mode[MAX_MODE_LEN+1];
+ int i;
+ char c;
+ FILE *fp;
+ int save_errno;
+
+ if (pcap_utf_8_mode) {
+ /*
+ * Map from UTF-8 to UTF-16LE.
+ * Fail if there are invalid characters in the input
+ * string, rather than converting them to REPLACEMENT
+ * CHARACTER; the latter is appropriate for strings
+ * to be displayed to the user, but for file names
+ * you just want the attempt to open the file to fail.
+ */
+ utf16_path = cp_to_utf_16le(CP_UTF8, path,
+ MB_ERR_INVALID_CHARS);
+ if (utf16_path == NULL) {
+ /*
+ * Error. Assume errno has been set.
+ *
+ * XXX - what about Windows errors?
+ */
+ return (NULL);
+ }
+
+ /*
+ * Now convert the mode to UTF-16LE as well.
+ * We assume the mode is ASCII, and that
+ * it's short, so that's easy.
+ */
+ for (i = 0; (c = *mode) != '\0'; i++, mode++) {
+ if (c > 0x7F) {
+ /* Not an ASCII character; fail with EINVAL. */
+ free(utf16_path);
+ errno = EINVAL;
+ return (NULL);
+ }
+ if (i >= MAX_MODE_LEN) {
+ /* The mode string is longer than we allow. */
+ free(utf16_path);
+ errno = EINVAL;
+ return (NULL);
+ }
+ utf16_mode[i] = c;
+ }
+ utf16_mode[i] = '\0';
+
+ /*
+ * OK, we have UTF-16LE strings; hand them to
+ * _wfopen().
+ */
+ fp = _wfopen(utf16_path, utf16_mode);
+
+ /*
+ * Make sure freeing the UTF-16LE string doesn't
+ * overwrite the error code we got from _wfopen().
+ */
+ save_errno = errno;
+ free(utf16_path);
+ errno = save_errno;
+
+ return (fp);
+ } else {
+ /*
+ * This takes strings in the local code page as an
+ * argument.
+ */
+ return (fopen(path, mode));
+ }
+}
+#else
+/*
+ * On other OSes, just use Boring Old fopen().
+ *
+ * "b" is supported as of C90, so *all* UN*Xes should support it, even
+ * though it does nothing. For MS-DOS, we again need it.
+ */
+#define charset_fopen_read(path, mode) fopen((path), (mode))
+#endif
+
pcap_t *
pcap_open_offline_with_tstamp_precision(const char *fname, u_int precision,
char *errbuf)
@@ -276,12 +373,16 @@ pcap_open_offline_with_tstamp_precision(const char *fname, u_int precision,
}
else {
/*
+ * Use charset_fopen(); on Windows, it tests whether we're
+ * in "local code page" or "UTF-8" mode, and treats the
+ * pathname appropriately, and on other platforms, it just
+ * wraps fopen().
+ *
* "b" is supported as of C90, so *all* UN*Xes should
- * support it, even though it does nothing. It's
- * required on Windows, as the file is a binary file
- * and must be read in binary mode.
+ * support it, even though it does nothing. For MS-DOS,
+ * we again need it.
*/
- fp = fopen(fname, "rb");
+ fp = charset_fopen(fname, "rb");
if (fp == NULL) {
pcap_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE,
errno, "%s", fname);
diff --git a/sf-pcap.c b/sf-pcap.c
index ab51d7ca..5c416b0c 100644
--- a/sf-pcap.c
+++ b/sf-pcap.c
@@ -831,7 +831,7 @@ pcap_dump_open(pcap_t *p, const char *fname)
* required on Windows, as the file is a binary file
* and must be written in binary mode.
*/
- f = fopen(fname, "wb");
+ f = charset_fopen(fname, "wb");
if (f == NULL) {
pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
errno, "%s", fname);
@@ -931,7 +931,7 @@ pcap_dump_open_append(pcap_t *p, const char *fname)
* even though it does nothing. It's required on Windows, as the
* file is a binary file and must be read in binary mode.
*/
- f = fopen(fname, "ab+");
+ f = charset_fopen(fname, "ab+");
if (f == NULL) {
pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
errno, "%s", fname);
diff --git a/sockutils.c b/sockutils.c
index bd3d6cc2..629e46e8 100644
--- a/sockutils.c
+++ b/sockutils.c
@@ -67,7 +67,7 @@
/*
* Winsock initialization.
*
- * Ask for WinSock 2.2.
+ * Ask for Winsock 2.2.
*/
#define WINSOCK_MAJOR_VERSION 2
#define WINSOCK_MINOR_VERSION 2
@@ -121,7 +121,7 @@ static int sock_ismcastaddr(const struct sockaddr *saddr);
****************************************************/
/*
- * Format an error message given an errno value (UN*X) or a WinSock error
+ * Format an error message given an errno value (UN*X) or a Winsock error
* (Windows).
*/
void sock_fmterror(const char *caller, int errcode, char *errbuf, int errbuflen)