Imported from /home/lorry/working-area/delta_ntp/ntp-dev-4.2.7p482.tar.gz.ntp-dev-4.2.7p482

author: Lorry Tar Creator <lorry-tar-importer@baserock.org> 2014-12-02 09:01:21 +0000
committer: <> 2014-12-04 16:11:25 +0000
commit: bdab5265fcbf3f472545073a23f8999749a9f2b9 (patch)
tree: c6018dd03dea906f8f1fb5f105f05b71a7dc250a /ports/winnt/ntpd
download: ntp-bdab5265fcbf3f472545073a23f8999749a9f2b9.tar.gz
4 files changed, 4038 insertions, 0 deletions
diff --git a/ports/winnt/ntpd/hopf_PCI_io.c b/ports/winnt/ntpd/hopf_PCI_io.c
new file mode 100644
index 0000000..a1ba200
--- /dev/null
+++ b/ports/winnt/ntpd/hopf_PCI_io.c
@@ -0,0 +1,335 @@
+/* 
+ * hopf_PCI_io.c
+ * Read data from a hopf PCI clock using the ATLSoft WinNT driver.
+ *
+ * Date: 21.03.2000 Revision: 01.10 
+ *
+ * Copyright (C) 1999, 2000 by Bernd Altmeier altmeier@ATLSoft.de
+ * 
+ */
+
+/*
+ * Ignore nonstandard extension warning.
+ * This happens when including winioctl.h
+ */
+#pragma warning(disable: 4201)
+#define _FILESYSTEMFSCTL_
+
+#include <config.h>
+#include <windows.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <winioctl.h>
+
+#include "ntp_stdlib.h"
+#include "hopf_PCI_io.h"
+
+
+#define ATL_PASSTHROUGH_READ_TOSIZE	(3 * sizeof(ULONG))
+#define ATL_PASSTHROUGH_READ_FROMSIZE	0
+#define IOCTL_ATLSOFT_PASSTHROUGH_READ	CTL_CODE(			\
+						FILE_DEVICE_UNKNOWN,	\
+						0x805,			\
+						METHOD_BUFFERED,	\
+						FILE_ANY_ACCESS)
+
+
+HANDLE	hDevice = NULL;	// this is the handle to the PCI Device
+
+HANDLE		hRdEvent;
+OVERLAPPED	Rdoverlapped;
+OVERLAPPED *	pRdOverlapped;
+
+ULONG		iobuffer[256];
+DWORD		cbReturned;
+BOOL		HaveBoard = FALSE;
+
+struct {
+	ULONG	region;
+	ULONG	offset;
+	ULONG	count;
+} io_params;
+
+
+BOOL
+OpenHopfDevice(void)
+{
+	OSVERSIONINFO	VersionInfo;
+	ULONG		deviceNumber;
+	CHAR		deviceName[255];
+			
+	VersionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+	GetVersionEx(&VersionInfo);
+	switch (VersionInfo.dwPlatformId) {
+
+	case VER_PLATFORM_WIN32_WINDOWS:	// Win95/98
+		return FALSE;	// "NTP does not support Win 95-98."
+		break;
+
+	case VER_PLATFORM_WIN32_NT:	// WinNT
+		deviceNumber = 0;
+		snprintf(deviceName, sizeof(deviceName),
+			 "\\\\.\\hclk6039%d", deviceNumber + 1);
+		hDevice = CreateFile(
+			deviceName,
+			GENERIC_WRITE | GENERIC_READ,
+			FILE_SHARE_WRITE | FILE_SHARE_READ,
+			NULL,
+			OPEN_EXISTING,
+			FILE_FLAG_DELETE_ON_CLOSE | FILE_FLAG_OVERLAPPED,
+			NULL);
+		break;
+
+	default:
+		hDevice = INVALID_HANDLE_VALUE;
+		break;
+	} // end switch
+
+	if (INVALID_HANDLE_VALUE == hDevice) // the system didn't return a handle
+		return FALSE;  //"A handle to the driver could not be obtained properly"
+
+	// an event to be used for async transfers
+	hRdEvent = CreateEvent(
+		NULL,
+		TRUE,
+		FALSE,
+		NULL);	
+
+	if (INVALID_HANDLE_VALUE == hRdEvent) 
+		return FALSE;  // the system didn't return a handle
+
+	pRdOverlapped = &Rdoverlapped;
+	pRdOverlapped->hEvent = hRdEvent;
+
+	HaveBoard = TRUE; // board installed and we have access
+
+	return TRUE;
+} // end of OpenHopfDevice()
+
+
+BOOL
+CloseHopfDevice(void)
+{
+	CloseHandle(hRdEvent);// When done, close the handle to the driver
+
+	return CloseHandle(hDevice);
+} // end of CloseHopfDevice()
+
+
+void
+ReadHopfDevice(void)
+{
+	if (!HaveBoard)
+		return;
+
+	DeviceIoControl(
+		hDevice,
+		IOCTL_ATLSOFT_PASSTHROUGH_READ,
+		&io_params,
+		ATL_PASSTHROUGH_READ_TOSIZE,
+		iobuffer,
+		ATL_PASSTHROUGH_READ_FROMSIZE
+		 + io_params.count * sizeof(ULONG),
+		&cbReturned, 
+		pRdOverlapped
+		);
+}
+
+
+#ifdef NOTUSED
+void
+GetHardwareData(
+	LPDWORD	Data32,
+	WORD	Ofs
+	)
+{
+	io_params.region = 1;
+	io_params.offset = Ofs;
+	io_params.count = 1;
+	ReadHopfDevice();
+	*Data32 = iobuffer[0];
+}
+#endif	/* NOTUSED */
+
+
+void
+GetHopfTime(
+	LPHOPFTIME	Data,
+	DWORD		Offset
+	)
+{
+	io_params.region = 1;
+	io_params.offset = Offset;
+	io_params.count = 4;
+
+	ReadHopfDevice();
+
+	Data->wHour = 0;
+	Data->wMinute = 0;
+	Data->wSecond = 0;
+	while (iobuffer[0] >= 60 * 60 * 1000) {
+		iobuffer[0] = iobuffer[0] - 60 * 60 * 1000;
+		Data->wHour++;
+	}
+	while (iobuffer[0] >= 60 * 1000) {
+		iobuffer[0] = iobuffer[0] - 60 * 1000;
+		Data->wMinute++;
+	}
+	while (iobuffer[0] >= 1000) {
+		iobuffer[0] = iobuffer[0] - 1000;
+		Data->wSecond++;
+	}
+	Data->wMilliseconds = LOWORD(iobuffer[0]);
+	Data->wDay = HIBYTE(HIWORD(iobuffer[1]));
+	Data->wMonth = LOBYTE(HIWORD(iobuffer[1]));
+	Data->wYear = LOWORD(iobuffer[1]);
+	Data->wDayOfWeek = HIBYTE(HIWORD(iobuffer[2]));
+	if (Data->wDayOfWeek == 7) // Dow Korrektur
+		Data->wDayOfWeek = 0;
+	
+ 	io_params.region = 1;
+	io_params.offset += 0x08;
+	io_params.count = 1;
+
+	ReadHopfDevice();
+
+	Data->wStatus = LOBYTE(HIWORD(iobuffer[0]));
+}
+
+
+#ifdef NOTUSED
+void
+GetHopfLocalTime(
+	LPHOPFTIME Data
+	)
+{
+	DWORD Offset = 0;
+
+	GetHopfTime(Data, Offset);
+}
+#endif	/* NOTUSED */
+
+
+void
+GetHopfSystemTime(
+	LPHOPFTIME Data
+	)
+{
+	DWORD Offset = 0x10;
+
+	GetHopfTime(Data,Offset);
+}
+
+
+#ifdef NOTUSED
+void
+GetSatData(
+	LPSATSTAT Data
+	)
+{
+	io_params.region = 1;
+	io_params.offset = 0xb0;
+	io_params.count = 5;
+
+	ReadHopfDevice();
+				
+	Data->wVisible	= HIBYTE(HIWORD(iobuffer[0]));
+	Data->wMode	= LOBYTE(LOWORD(iobuffer[0]));
+	Data->wSat0	= HIBYTE(HIWORD(iobuffer[1]));
+	Data->wRat0	= LOBYTE(HIWORD(iobuffer[1]));
+	Data->wSat1	= HIBYTE(LOWORD(iobuffer[1]));
+	Data->wRat1	= LOBYTE(LOWORD(iobuffer[1]));
+	Data->wSat2	= HIBYTE(HIWORD(iobuffer[2]));
+	Data->wRat2	= LOBYTE(HIWORD(iobuffer[2]));
+	Data->wSat3	= HIBYTE(LOWORD(iobuffer[2]));
+	Data->wRat3	= LOBYTE(LOWORD(iobuffer[2]));
+	Data->wSat4	= HIBYTE(HIWORD(iobuffer[3]));
+	Data->wRat4	= LOBYTE(HIWORD(iobuffer[3]));
+	Data->wSat5	= HIBYTE(LOWORD(iobuffer[3]));
+	Data->wRat5	= LOBYTE(LOWORD(iobuffer[3]));
+	Data->wSat6	= HIBYTE(HIWORD(iobuffer[4]));
+	Data->wRat6	= LOBYTE(HIWORD(iobuffer[4]));
+	Data->wSat7	= HIBYTE(LOWORD(iobuffer[4]));
+	Data->wRat7	= LOBYTE(LOWORD(iobuffer[4]));
+}
+
+
+void
+GetDiffTime(
+	LPLONG Data
+	)
+{
+	io_params.region = 1;
+	io_params.offset = 0x0c;
+	io_params.count = 1;
+
+	ReadHopfDevice();
+
+	*Data = iobuffer[0];
+}
+
+
+void
+GetPosition(
+	LPGPSPOS Data
+	)
+{
+	io_params.region = 1;
+	io_params.offset = 0x90; // Positionsdaten L�nge
+	io_params.count  = 1;
+
+	ReadHopfDevice();
+
+	Data->wLongitude = iobuffer[0]; //in Millisekunden
+	io_params.region = 1;
+	io_params.offset = 0xa0; // Positionsdaten Breite
+	io_params.count  = 1;
+
+	ReadHopfDevice();
+
+	Data->wLatitude	= iobuffer[0];
+	Data->wAltitude	= 0;
+}
+
+
+void
+GetHardwareVersion(
+	LPCLOCKVER Data
+	)
+{
+	int i;
+
+	io_params.region = 1;
+	io_params.offset = 0x50;
+	io_params.count = 12;
+
+	ReadHopfDevice();
+				
+	Data->cVersion[0] = '\0';
+	iobuffer[13] = 0;
+	for (i = 0; i < 13; i++) {
+		Data->cVersion[i * 4    ] = HIBYTE(HIWORD(iobuffer[i]));
+		Data->cVersion[i * 4 + 1] = LOBYTE(HIWORD(iobuffer[i]));
+		Data->cVersion[i * 4 + 2] = HIBYTE(LOWORD(iobuffer[i]));
+		Data->cVersion[i * 4 + 3] = LOBYTE(LOWORD(iobuffer[i]));
+	}
+}
+
+
+void
+GetDCFAntenne(
+	LPDCFANTENNE Data
+	)
+{
+	io_params.region = 1;
+	io_params.offset = 0xcc;
+	io_params.count = 1;
+
+	ReadHopfDevice();
+	Data->bStatus1	= HIBYTE(HIWORD(iobuffer[0]));
+	Data->bStatus	= LOBYTE(HIWORD(iobuffer[0]));
+	Data->wAntValue	= LOWORD(iobuffer[0]);
+}
+#endif	/* NOTUSED */
+
diff --git a/ports/winnt/ntpd/nt_clockstuff.c b/ports/winnt/ntpd/nt_clockstuff.c
new file mode 100644
index 0000000..052bfcd
--- /dev/null
+++ b/ports/winnt/ntpd/nt_clockstuff.c
@@ -0,0 +1,1743 @@
+/* Windows NT Clock Routines
+ *
+ * Created by Sven Dietrich  sven@inter-yacht.com
+ *
+ * New interpolation scheme by Dave Hart <davehart@davehart.com> in
+ * February 2009 overcomes 500us-1ms inherent jitter with the older
+ * scheme, first identified by Peter Rosin (nee Ekberg)
+ * <peda@lysator.liu.se> in 2003 [Bug 216].
+ *
+ * Note:  The Windows port of ntpd uses the C99-snprintf replacement for
+ * (v)snprintf(), also used by msyslog(), which does not understand the
+ * printf format specifier %I64d, only the more common %lld.  With the
+ * minimum supported compiler raised to Visual C++ 2005 in ntp-dev in
+ * August 2011, all MS C runtime routines also understand %lld and %llu.
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/resource.h>	/* our private version */
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400	/* VS 2005 */
+#include <intrin.h>				/* for __rdtsc() */
+#endif
+
+#ifdef HAVE_PPSAPI
+#include <timepps.h>
+/*
+ * ports/winnt/include/timepps.h defines EOPNOTSUPP for compatibility
+ * with PPSAPI on other platforms.  ports/winnt/include/isc/net.h has
+ * #define EOPNOTSUPP WSAEOPNOTSUPP, so to avoid a macro redefinition
+ * warning undefine it.
+ */
+#undef EOPNOTSUPP
+#endif	/* HAVE_PPSAPI */
+
+#include "ntp_stdlib.h"
+#include "ntp_unixtime.h"
+#include "ntp_timer.h"
+#include "ntp_assert.h"
+#include "ntp_leapsec.h"
+#include "clockstuff.h"
+#include "ntservice.h"
+#include "ntpd.h"
+#include "ntpd-opts.h"
+
+extern double sys_residual;	/* residual from previous adjustment */
+
+/*
+ * Include code to possibly modify the MM timer while the service is active. 
+ */
+
+/*
+ * Whether or not MM timer modifications takes place is still controlled 
+ * by the variable below which is initialized by a default value but 
+ * might be changed depending on a command line switch.
+ */
+static int modify_mm_timer = MM_TIMER_LORES;
+
+#define MM_TIMER_INTV   1  /* the interval we'd want to set the MM timer to [ms] */
+
+static UINT wTimerRes;
+
+BOOL init_randfile();
+
+static long last_Adj = 0;
+
+#define LS_CORR_INTV_SECS  2   /* seconds to apply leap second correction */
+#define LS_CORR_INTV   ( (LONGLONG) HECTONANOSECONDS * LS_CORR_INTV_SECS )  
+#define LS_CORR_LIMIT  ( (LONGLONG) HECTONANOSECONDS / 2 )  // half a second
+
+typedef union ft_ull {
+	FILETIME ft;
+	ULONGLONG ull;
+	LONGLONG ll;
+	LARGE_INTEGER li;
+} FT_ULL;
+
+/* leap second stuff */
+static FT_ULL ls_ft;
+static DWORD ls_time_adjustment;
+static ULONGLONG ls_ref_perf_cnt;
+static LONGLONG ls_elapsed;
+
+static BOOL winnt_time_initialized = FALSE;
+static BOOL winnt_use_interpolation = FALSE;
+static unsigned clock_thread_id;
+
+
+void WINAPI GetInterpTimeAsFileTime(LPFILETIME pft);
+static void StartClockThread(void);
+static void tune_ctr_freq(LONGLONG, LONGLONG);
+void StopClockThread(void);
+void atexit_revert_mm_timer(void);
+void win_time_stepped(void);
+
+static HANDLE clock_thread = NULL;
+static HANDLE TimerThreadExitRequest = NULL;
+
+/*
+ * interp_time estimates time in 100ns units
+ * based on a performance counter value given.
+ * The 2nd parameter indicates if this is
+ * part of a current time-of-day calculation.
+ */
+ULONGLONG interp_time(ULONGLONG, BOOL);
+
+/*
+ * add_counter_time_pair is called by the
+ * high priority clock thread with a new
+ * sample.
+ */
+void add_counter_time_pair(ULONGLONG, LONGLONG);
+
+/*
+ * globals used by the above two functions to 
+ * implement the counter/time history
+ */
+#define BASELINES_TOT	256
+#define BASELINES_USED	64
+
+static volatile int	newest_baseline = 0;
+static volatile int	newest_baseline_gen = 0;
+static ULONGLONG	baseline_counts[BASELINES_TOT] = {0};
+static LONGLONG		baseline_times[BASELINES_TOT] = {0};
+
+#define CLOCK_BACK_THRESHOLD	100	/* < 10us unremarkable */
+static ULONGLONG	clock_backward_max = CLOCK_BACK_THRESHOLD;
+static int		clock_backward_count;
+
+/**
+ * A flag set on Windows versions which ignore small time adjustments.
+ *
+ * Windows Vista and Windows 7 ignore TimeAdjustment less than 16.
+ * @note Has to be checked for Windows Server 2008/2012 and Windows 8.
+ * Ref: http://support.microsoft.com/kb/2537623, bug #2328
+ */
+static BOOL os_ignores_small_adjustment;
+
+/*
+ * clockperiod is the period used for SetSystemTimeAdjustment 
+ * slewing calculations but does not necessarily correspond
+ * to the precision of the OS clock.  Prior to Windows Vista
+ * (6.0) the two were identical.  In 100ns units.
+ */
+static DWORD clockperiod;
+
+/*
+ * os_clock_precision is the observed precision of the OS
+ * clock, meaning the increment between discrete values. This
+ * is currently calculated once at startup.  100ns units.
+ */
+static ULONGLONG os_clock_precision;
+
+/*
+ * NomPerfCtrFreq is from QueryPerformanceFrequency and is the 
+ * number of performance counter beats per second.  PerfCtrFreq
+ * starts from NomPerfCtrFreq but is maintained using a sliding
+ * window average based on actual performance counter behavior,
+ * to allow us to better tolerate powersaving measures that
+ * alter the effective frequency of the processor cycle counter
+ * (TSC) which sometimes underlies QueryPerformanceCounter.
+ *
+ * Note that the OS is unlikely to be so subtle in its internal
+ * scheduling of waitable timers, presumably done using the
+ * performance counter.  Therefore our calculations for
+ * interpolated time should be based on PerfCtrFreq but our
+ * calculations for SetWaitableTimer should assume the OS will
+ * convert from FILETIME 100ns units to performance counter
+ * beats using the nominal frequency.
+ */
+
+volatile ULONGLONG PerfCtrFreq = 0;
+	 ULONGLONG NomPerfCtrFreq = 0;
+
+/* 
+ * If we're using RDTSC beating at the same rate as
+ * QueryPerformanceCounter, there is a systemic
+ * offset we need to account for when using
+ * counterstamps from serialpps.sys, which are
+ * always from QPC (actually KeQueryPerformanceCounter).
+ */
+static LONGLONG QPC_offset = 0;
+
+/*
+ * Substitute RDTSC for QueryPerformanceCounter()?
+ */
+static int use_pcc = -1;
+
+/*
+ * Restrict threads that call QPC/RDTSC to one CPU?
+ */
+static int lock_interp_threads = -1;
+
+/*
+ * ppm_per_adjust_unit is parts per million effect on the OS
+ * clock per slewing adjustment unit per second.  Per haps.
+ */
+static DOUBLE ppm_per_adjust_unit;
+
+/*
+ * wintickadj emulates the functionality provided by unix tickadj,
+ * providing a baseline clock correction if needed to get the
+ * clock within a few hundred PPM of correct frequency.
+ */
+static long wintickadj;
+
+static void	choose_interp_counter(void);
+static int	is_qpc_built_on_pcc(void);
+
+/*
+ * performance counter frequency observations
+ */
+#define TUNE_CTR_DEPTH		3	/* running avg depth */
+
+static HANDLE		ctr_freq_timer = INVALID_HANDLE_VALUE;
+static ULONGLONG	tune_ctr_freq_max_interval;
+static unsigned		tune_ctr_period;
+void start_ctr_freq_timer(ULONGLONG now_time);
+void reset_ctr_freq_timer(ULONGLONG when, ULONGLONG now);
+void reset_ctr_freq_timer_abs(ULONGLONG when);
+
+/* round a Windows time to the next bottom of the second */
+
+#define ROUND_TO_NEXT_SEC_BOTTOM(t)	\
+do {	\
+	(t) += 3 * HECTONANOSECONDS / 2 - 1;	\
+	(t) /= HECTONANOSECONDS;	\
+	(t) *= HECTONANOSECONDS;	\
+	(t) -= HECTONANOSECONDS / 2;	\
+} while (0)
+
+/*
+ * NT native time format is 100's of nanoseconds since 1601-01-01.
+ * Helpers for converting between "hectonanoseconds" and the 
+ * performance counter scale from which interpolated time is
+ * derived.
+ */
+#define HNS2PERF(hns)	((hns) * PerfCtrFreq / HECTONANOSECONDS)
+#define PERF2HNS(ctr)	((ctr) * HECTONANOSECONDS / PerfCtrFreq)
+
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400	/* VS 2005 */
+#define	get_pcc()	__rdtsc()
+#else
+/*
+ * something like this can be used for a compiler without __rdtsc()
+ */
+ULONGLONG __forceinline 
+get_pcc(void)
+{
+	/* RDTSC returns in EDX:EAX, same as C compiler */
+	__asm {
+		RDTSC
+	}
+}
+#endif
+
+
+/*
+ * perf_ctr() returns the current performance counter value, 
+ * from QueryPerformanceCounter or RDTSC.
+ */
+ULONGLONG WINAPI 
+perf_ctr(void)
+{
+	FT_ULL ft;
+
+	if (use_pcc)
+		return get_pcc();
+	else {
+		QueryPerformanceCounter(&ft.li);
+		return ft.ull;
+	}
+}
+
+
+/*
+ * init_small_adjustment
+ *
+ * Set variable os_ignores_small_adjustment
+ *
+ */
+static void init_small_adjustment(void)
+{
+	OSVERSIONINFO vi;
+	memset(&vi, 0, sizeof(vi));
+	vi.dwOSVersionInfoSize = sizeof(vi);
+
+	if (!GetVersionEx(&vi)) {
+		msyslog(LOG_WARNING, "GetVersionEx failed with error code %d.", GetLastError());
+		os_ignores_small_adjustment = FALSE;
+		return;
+	}
+
+	if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 1) {
+		// Windows 7 and Windows Server 2008 R2
+		//
+		// Windows 7 is documented as affected.
+		// Windows Server 2008 R2 is assumed affected.
+		os_ignores_small_adjustment = TRUE;
+	} else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 0) {
+		// Windows Vista and Windows Server 2008
+		//
+		// Windows Vista is documented as affected.
+		// Windows Server 2008 is assumed affected.
+		os_ignores_small_adjustment = TRUE;
+	} else {
+		os_ignores_small_adjustment = FALSE;
+	}
+}
+
+
+/*
+ * choose_interp_counter - select between QueryPerformanceCounter and
+ *			   the x86 processor cycle counter (TSC).
+ */
+static void
+choose_interp_counter(void)
+{
+	const char *	ntpd_pcc_freq_text;
+	int		qpc_built_on_pcc;
+
+	/*
+	 * Regardless of whether we actually use RDTSC, first determine
+	 * if QueryPerformanceCounter is built on it, so that we can
+	 * decide whether it's prudent to lock QPC-consuming threads to
+	 * a particular CPU.
+	 */
+	qpc_built_on_pcc = is_qpc_built_on_pcc();
+	lock_interp_threads = qpc_built_on_pcc;
+
+	/*
+	 * It's time to make some more permanent knobs,
+	 * but for right now the RDTSC aka PCC dance on x86 is:
+	 *
+	 * 1.  With none of these variables defined, only QPC
+	 *     is used because there is no reliable way to
+	 *     detect counter frequency variation after ntpd
+	 *     startup implemented.
+	 * 2.  We need a better knob, but for now if you know
+	 *     your RDTSC / CPU frequency is invariant, set
+	 *     NTPD_PCC and assuming your QPC is based on the
+	 *     PCC as well, RDTSC will be substituted.
+	 * 3.  More forcefully, you can jam in a desired exact
+	 *     processor frequency, expressed in cycles per
+	 *     second by setting NTPD_PCC_FREQ=398125000, for
+	 *     example, if yor actual known CPU frequency is
+	 *     398.125 MHz, and NTPD_PCC doesn't work because
+	 *     QueryPerformanceCounter is implemented using
+	 *     another counter.  It is very easy to make ntpd
+	 *     fall down if the NTPD_PCC_FREQ value isn't very
+	 *     close to the observed RDTSC units per second.
+	 *
+	 * Items 2 and 3 could probably best be combined into one
+	 * new windows-specific command line switch such as
+	 *   ntpd --pcc
+	 * or
+	 *   ntpd --pcc=398125000
+	 *
+	 * They are currently tied to Windows because that is
+	 * the only ntpd port with its own interpolation, and
+	 * to x86/x64 because no one has ported the Windows
+	 * ntpd port to the sole remaining alternative, Intel
+	 * Itanium.
+	 */
+	if (HAVE_OPT(PCCFREQ))
+		ntpd_pcc_freq_text = OPT_ARG(PCCFREQ);
+	else
+		ntpd_pcc_freq_text = getenv("NTPD_PCC_FREQ");
+
+	if (!HAVE_OPT(USEPCC)
+	    && NULL == ntpd_pcc_freq_text
+	    && NULL == getenv("NTPD_PCC")) {
+		use_pcc = 0;
+		return;
+	}
+
+	if (!qpc_built_on_pcc && NULL == ntpd_pcc_freq_text) {
+		use_pcc = 0;
+		return;
+	}
+
+	use_pcc = 1;
+	if (ntpd_pcc_freq_text != NULL)
+		sscanf(ntpd_pcc_freq_text, 
+		       "%llu", 
+		       &NomPerfCtrFreq);
+
+	NLOG(NLOG_CLOCKINFO)
+		msyslog(LOG_INFO, 
+			"using processor cycle counter "
+			"%.3f MHz", 
+			NomPerfCtrFreq / 1e6);
+	return;
+}
+
+
+/*
+ * is_qpc_built_on_pcc - test if QueryPerformanceCounter runs at the
+ *			 same rate as the processor cycle counter (TSC).
+ */
+static int
+is_qpc_built_on_pcc(void)
+{
+	LONGLONG	offset;
+	FT_ULL		ft1;
+	FT_ULL		ft2;
+	FT_ULL		ft3;
+	FT_ULL		ft4;
+	FT_ULL		ft5;
+
+	NTP_REQUIRE(NomPerfCtrFreq != 0);
+
+	QueryPerformanceCounter(&ft1.li);
+	ft2.ull = get_pcc();
+	Sleep(1);
+	QueryPerformanceCounter(&ft3.li);
+	Sleep(1);
+	ft4.ull = get_pcc();
+	Sleep(1);
+	QueryPerformanceCounter(&ft5.li);
+
+	offset = ft2.ull - ft1.ull;
+	ft3.ull += offset;
+	ft5.ull += offset;
+
+	if (ft2.ull <= ft3.ull &&
+	    ft3.ull <= ft4.ull &&
+	    ft4.ull <= ft5.ull) {
+
+		QPC_offset = offset;
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+
+/*
+ * Request Multimedia Timer
+ */
+void
+set_mm_timer(
+	int timerres
+	)
+{
+	modify_mm_timer = timerres;
+}
+
+/*
+ * adj_systime - called once every second to discipline system clock.
+ * Normally, the offset passed in (parameter now) is in the range
+ * [-NTP_MAXFREQ, NTP_MAXFREQ].  However, at EVNT_NSET, a much larger
+ * slew is requested if the initial offset is less than the step
+ * threshold, in the range [-step, step] where step is the step
+ * threshold, 128 msec by default.  For the remainder of the frequency
+ * training interval, adj_systime is called with 0 offset each second
+ * and slew the large offset at 500 PPM (500 usec/sec).
+ * Returns 1 if okay, 0 if trouble.
+ */
+int
+adj_systime(
+	double now
+	)
+{
+        /* ntp time scale origin as ticks since 1601-01-01 */
+        static const ULONGLONG HNS_JAN_1900 = 94354848000000000ull;
+
+	static double	adjtime_carry;
+	double		dtemp;
+	u_char		isneg;
+	BOOL		rc;
+	long		TimeAdjustment;
+	SYSTEMTIME	st;
+	ULONGLONG	this_perf_count;
+	FT_ULL		curr_ft;
+        leap_result_t   lsi;
+
+	/*
+	 * Add the residual from the previous adjustment to the new
+	 * adjustment, bound and round.
+	 */
+	dtemp = adjtime_carry + sys_residual + now;
+	adjtime_carry = 0.;
+	sys_residual = 0.;
+	if (dtemp < 0) {
+		isneg = TRUE;
+		dtemp = -dtemp;
+	} else {
+		isneg = FALSE;
+	}
+
+	if (dtemp > NTP_MAXFREQ) {
+		adjtime_carry = dtemp - NTP_MAXFREQ;
+		dtemp = NTP_MAXFREQ;
+	}
+
+	if (isneg) {
+		dtemp = -dtemp;
+		adjtime_carry = -adjtime_carry;
+	}
+
+	dtemp = dtemp * 1e6;
+
+	/* 
+	 * dtemp is in micro seconds. NT uses 100 ns units,
+	 * so a unit change in TimeAdjustment corresponds
+	 * to slewing 10 ppm on a 100 Hz system. Calculate
+	 * the number of 100ns units to add, using OS tick
+	 * frequency as per suggestion from Harry Pyle,
+	 * and leave the remainder in dtemp
+	 */
+	TimeAdjustment = (long)(dtemp / ppm_per_adjust_unit +
+				((isneg)
+				     ? -0.5
+				     : 0.5));
+
+	if (os_ignores_small_adjustment) {
+		/*
+		 * As the OS ignores adjustments smaller than 16, we need to
+		 * leave these small adjustments in sys_residual, causing
+		 * the small values to be averaged over time.
+		 */
+		if (TimeAdjustment > -16 && TimeAdjustment < 16) {
+			TimeAdjustment = 0;
+		}
+	}
+
+	dtemp -= TimeAdjustment * ppm_per_adjust_unit;	
+
+
+	/* If a piping-hot close leap second is pending for the end
+         * of this day, determine the UTC time stamp when the transition
+         * must take place. (Calculated in the current leap era!) 
+	 */
+	if (leapsec >= LSPROX_ALERT) {
+                if (0 == ls_ft.ull && leapsec_frame(&lsi)) {
+                        if (lsi.tai_diff > 0) {
+                                /* A leap second insert is scheduled at the end
+                                 * of the day. Since we have not yet computed the
+                                 * time stamp, do it now. Signal electric mode
+                                 * for this insert.
+                                 */
+                                ls_ft.ull = lsi.ttime.Q_s * HECTONANOSECONDS
+                                          + HNS_JAN_1900;
+                                FileTimeToSystemTime(&ls_ft.ft, &st);
+			        msyslog(LOG_NOTICE,
+				        "Detected positive leap second announcement "
+				        "for %04d-%02d-%02d %02d:%02d:%02d UTC",
+				        st.wYear, st.wMonth, st.wDay,
+				        st.wHour, st.wMinute, st.wSecond);
+                                leapsec_electric(TRUE);
+                        } else if (lsi.tai_diff < 0) {
+                                /* Do not handle negative leap seconds here. If this
+                                 * happens, let the system step.
+                                 */
+                                leapsec_electric(FALSE);
+                        }
+                }
+        } else {
+                /* The leap second announcement is gone. Happens primarily after
+                 * the leap transition, but can also be due to a clock step.
+                 * Disarm the leap second, but only if there is one scheduled
+                 * and not currently in progress!
+                 */
+		if (ls_ft.ull != 0 && ls_time_adjustment == 0) {
+			ls_ft.ull = 0;
+			msyslog(LOG_NOTICE, "Leap second announcement disarmed");
+		}
+	}
+
+	/*
+	 * If the time stamp for the next leap second has been set
+	 * then check if the leap second must be handled
+	 */
+	if (ls_ft.ull != 0) {
+		this_perf_count = perf_ctr();
+
+		if (0 == ls_time_adjustment) { /* has not yet been scheduled */
+
+	 		GetSystemTimeAsFileTime(&curr_ft.ft);
+			if (curr_ft.ull >= ls_ft.ull) {
+				ls_time_adjustment = clockperiod / LS_CORR_INTV_SECS;
+				ls_ref_perf_cnt = this_perf_count;
+				ls_elapsed = 0;
+				msyslog(LOG_NOTICE, "Inserting positive leap second.");
+			}
+		} else {  /* leap sec adjustment has been scheduled previously */
+			ls_elapsed = (this_perf_count - ls_ref_perf_cnt) 
+				     * HECTONANOSECONDS / PerfCtrFreq;
+		}
+
+		if (ls_time_adjustment != 0) {  /* leap second adjustment is currently active */
+			if (ls_elapsed > (LS_CORR_INTV - LS_CORR_LIMIT)) {
+				ls_time_adjustment = 0;  /* leap second adjustment done */
+				ls_ft.ull = 0;
+			}
+
+			/* 
+			 * NOTE: While the system time is slewed during the leap second 
+			 * the interpolation function which is based on the performance 
+			 * counter does not account for the slew.
+			 */
+			TimeAdjustment -= ls_time_adjustment;
+		}
+	}
+
+
+	sys_residual = dtemp / 1e6;
+	DPRINTF(3, ("adj_systime: %.9f -> %.9f residual %.9f", 
+		    now, 1e-6 * (TimeAdjustment * ppm_per_adjust_unit),
+		    sys_residual));
+	if (0. == adjtime_carry)
+		DPRINTF(3, ("\n"));
+	else
+		DPRINTF(3, (" adjtime %.9f\n", adjtime_carry));
+
+	/* only adjust the clock if adjustment changes */
+	TimeAdjustment += wintickadj;
+	if (last_Adj != TimeAdjustment) {
+		last_Adj = TimeAdjustment;
+		DPRINTF(2, ("SetSystemTimeAdjustment(%+ld)\n", TimeAdjustment));
+		rc = SetSystemTimeAdjustment(clockperiod + TimeAdjustment, FALSE);
+		if (!rc)
+			msyslog(LOG_ERR, "Can't adjust time: %m");
+	} else {
+		rc = TRUE;
+	}
+
+	return rc;
+}
+
+
+void 
+init_winnt_time(void)
+{
+	static const char settod[] = "settimeofday=\"SetSystemTime\"";
+	char szMsgPath[MAX_PATH+1];
+	HANDLE hToken = INVALID_HANDLE_VALUE;
+	TOKEN_PRIVILEGES tkp;
+	TIMECAPS tc;
+	BOOL noslew;
+	DWORD adjclockperiod;
+	LARGE_INTEGER Freq;
+	FT_ULL initial_hectonanosecs;
+	FT_ULL next_hectonanosecs;
+	double adjppm;
+	double rawadj;
+	char * pch;
+
+	if (winnt_time_initialized)
+		return;
+
+	/*
+	 * Make sure the service is initialized
+	 * before we do anything else
+	 */
+	ntservice_init();
+
+	/* Set up the Console Handler */
+	if (!SetConsoleCtrlHandler(OnConsoleEvent, TRUE)) {
+		msyslog(LOG_ERR, "Can't set console control handler: %m");
+	}
+
+	/* Set the Event-ID message-file name. */
+	if (!GetModuleFileName(NULL, szMsgPath, sizeof(szMsgPath))) {
+		msyslog(LOG_ERR, "GetModuleFileName(PGM_EXE_FILE) failed: %m");
+		exit(1);
+	}
+
+	/* Initialize random file before OpenSSL checks */
+	if (!init_randfile())
+		msyslog(LOG_ERR, "Unable to initialize .rnd file");
+
+#pragma warning(push)
+#pragma warning(disable: 4127) /* conditional expression is constant */
+
+#ifdef DEBUG
+	if (SIZEOF_TIME_T != sizeof(time_t)
+	    || SIZEOF_INT != sizeof(int)
+	    || SIZEOF_SIGNED_CHAR != sizeof(char)) {
+		msyslog(LOG_ERR, "config.h SIZEOF_* macros wrong, fatal");
+		exit(1);
+	}
+#endif
+
+#pragma warning(pop)
+
+	init_small_adjustment();
+        leapsec_electric(TRUE);
+
+	/*
+	 * Get privileges needed for fiddling with the clock
+	 */
+
+	/* get the current process token handle */
+	if (!OpenProcessToken(
+		GetCurrentProcess(),
+		TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY,
+		&hToken)) {
+		msyslog(LOG_ERR, "OpenProcessToken failed: %m");
+		exit(-1);
+	}
+	/* get the LUID for system-time privilege. */
+	LookupPrivilegeValue(NULL, SE_SYSTEMTIME_NAME, &tkp.Privileges[0].Luid);
+	tkp.PrivilegeCount = 1;  /* one privilege to set */
+	tkp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+	/* get set-time privilege for this process. */
+	AdjustTokenPrivileges(hToken, FALSE, &tkp, 0,
+	 	(PTOKEN_PRIVILEGES) NULL, 0);
+
+	/* cannot use return value of AdjustTokenPrivileges. */
+	/* (success does not indicate all privileges were set) */
+	if (GetLastError() != ERROR_SUCCESS) {
+		msyslog(LOG_ERR, "AdjustTokenPrivileges failed: %m");
+	 	/* later set time call will probably fail */
+	}
+
+	CloseHandle(hToken);
+	hToken = INVALID_HANDLE_VALUE;
+
+	/*
+	 * Say how we're setting the time of day
+	 */
+	set_sys_var(settod, sizeof(settod), RO);
+
+	/*
+	 * ntpd on Windows has always raised its priority, without
+	 * requiring -N as on Unix.  Since Windows ntpd doesn't share
+	 * the history of unix ntpd of once having no -N and therefore
+	 * needing to be invoked under nice, there is no reason to
+	 * bring it in line with the Unix version in this regard.
+	 * Instsrv assumes ntpd is invoked with no arguments, and
+	 * upgrading users would be negatively surprised by the 
+	 * poor timekeeping if they failed to add -N as part of 
+	 * upgrading were we to correct this platform difference.
+	 */
+	if (-1 == setpriority(PRIO_PROCESS, 0, NTP_PRIO))
+		exit(-1);
+
+	/* Determine the existing system time slewing */
+	if (!GetSystemTimeAdjustment(&adjclockperiod, &clockperiod, &noslew)) {
+		msyslog(LOG_ERR, "GetSystemTimeAdjustment failed: %m");
+		exit(-1);
+	}
+
+	/*
+	 * If there is no slewing before ntpd, adjclockperiod and clockperiod
+	 * will be equal.  Any difference is carried into adj_systime's first
+	 * pass as the previous adjustment.
+	 */
+	last_Adj = adjclockperiod - clockperiod;
+	
+	if (last_Adj)
+		msyslog(LOG_INFO, 
+			"Clock interrupt period %.3f msec "
+			"(startup slew %.1f usec/period)",
+			clockperiod / 1e4,
+			last_Adj / 10.);
+	else
+		msyslog(LOG_INFO, 
+			"Clock interrupt period %.3f msec", 
+			clockperiod / 1e4);
+
+	/*
+	 * Calculate the time adjustment resulting from incrementing
+	 * units per tick by 1 unit for 1 second 
+	 */
+	ppm_per_adjust_unit = 1e6 / clockperiod;
+
+	pch = getenv("NTPD_TICKADJ_PPM");
+	if (pch != NULL && 1 == sscanf(pch, "%lf", &adjppm)) {
+		rawadj = adjppm / ppm_per_adjust_unit;
+		rawadj += (rawadj < 0)
+			      ? -0.5
+			      : 0.5;
+		wintickadj = (long)rawadj;
+		msyslog(LOG_INFO,
+			"Using NTPD_TICKADJ_PPM %+g ppm (%+ld)",
+			adjppm, wintickadj);
+	}
+
+	/* get the performance counter ticks per second */
+	if (!QueryPerformanceFrequency(&Freq) || !Freq.QuadPart) {
+		msyslog(LOG_ERR, "QueryPerformanceFrequency failed: %m");
+		exit(-1);
+	}
+
+	NomPerfCtrFreq = PerfCtrFreq = Freq.QuadPart;
+	msyslog(LOG_INFO, 
+		"Performance counter frequency %.3f MHz",
+		PerfCtrFreq / 1e6);
+
+	/*
+	 * With a precise system clock, our interpolation decision is
+	 * a slam dunk.
+	 */
+	if (NULL != pGetSystemTimePreciseAsFileTime) {
+		winnt_use_interpolation = FALSE;
+		winnt_time_initialized = TRUE;
+
+		return;
+	}
+
+	/* 
+	 * Implement any multimedia timer manipulation requested via -M
+	 * option.  This is rumored to be unneeded on Win8 with the
+	 * introduction of the precise (interpolated) system clock.
+	 */
+	if (modify_mm_timer) {
+		if (timeGetDevCaps(&tc, sizeof(tc)) == TIMERR_NOERROR) {
+			wTimerRes = min(max(tc.wPeriodMin, MM_TIMER_INTV), tc.wPeriodMax);
+			timeBeginPeriod(wTimerRes);
+			atexit(atexit_revert_mm_timer);
+			
+			msyslog(LOG_INFO, "MM timer resolution: %u..%u msec, set to %u msec",
+				tc.wPeriodMin, tc.wPeriodMax, wTimerRes );
+		} else {
+			msyslog(LOG_ERR, "Multimedia timer unavailable");
+		}
+	}
+	
+	/*
+	 * Spin on GetSystemTimeAsFileTime to determine its
+	 * granularity.  Prior to Windows Vista this is 
+	 * typically the same as the clock period.
+	 */
+	GetSystemTimeAsFileTime(&initial_hectonanosecs.ft);
+	do {
+		GetSystemTimeAsFileTime(&next_hectonanosecs.ft);
+	} while (initial_hectonanosecs.ull == next_hectonanosecs.ull);
+
+	os_clock_precision = next_hectonanosecs.ull -
+		initial_hectonanosecs.ull;
+
+	msyslog(LOG_INFO,
+		"Windows clock precision %.3f msec, min. slew %.3f ppm/s",
+		os_clock_precision / 1e4, ppm_per_adjust_unit);
+
+	winnt_time_initialized = TRUE;
+
+	choose_interp_counter();
+
+	if (getenv("NTPD_USE_SYSTEM_CLOCK") ||
+	    (os_clock_precision < 4 * 10000 &&
+	     !getenv("NTPD_USE_INTERP_DANGEROUS"))) {
+		msyslog(LOG_INFO, "using Windows clock directly");
+	} else {
+		winnt_use_interpolation = TRUE;
+		get_sys_time_as_filetime = GetInterpTimeAsFileTime;
+		StartClockThread();
+	}
+}
+
+
+void
+atexit_revert_mm_timer(void)
+{
+	timeEndPeriod(wTimerRes); 
+	DPRINTF(1, ("MM timer resolution reset\n"));
+}
+
+
+void 
+reset_winnt_time(void)
+{
+	SYSTEMTIME st;
+
+	/*
+	 * If we're in the 2-second slew right after a leap second, 
+	 * we don't want to continue that extreme slew, in that case
+	 * disable our slewing and return clock discipline to the 
+	 * kernel.  Similarly if we are not yet synchronized, 
+	 * our current slew may not be a good ongoing trim.
+	 * Otherwise, our leave in place the last SetSystemTimeAdjustment
+	 * as an ongoing frequency correction, better than nothing.
+	 * TODO:
+	 * Verify this will not call SetSystemTimeAdjustment if
+	 * ntpd is running in ntpdate mode.
+	 */
+	if (sys_leap == LEAP_NOTINSYNC || ls_time_adjustment != 0)
+		SetSystemTimeAdjustment(0, TRUE);	 
+
+	/*
+	 * Read the current system time, and write it back to
+	 * force CMOS update, only if we are exiting because
+	 * the computer is shutting down and we are already
+	 * synchronized.
+	 */
+	 if (ntservice_systemisshuttingdown() && sys_leap != LEAP_NOTINSYNC) {
+		GetSystemTime(&st);
+		SetSystemTime(&st);
+		NLOG(NLOG_SYSEVENT | NLOG_CLOCKINFO)
+			msyslog(LOG_NOTICE, "system is shutting down, CMOS time reset.");
+	}
+}
+
+
+/*
+ * GetSystemTimeAsFileTime() interface clone is used by getclock() in ntpd.
+ */
+
+void WINAPI 
+GetInterpTimeAsFileTime(
+	LPFILETIME pft
+	)
+{
+	static ULONGLONG last_interp_time;
+	FT_ULL now_time;
+	FT_ULL now_count;
+	ULONGLONG clock_backward;
+
+	/*
+	 * Mark a mark ASAP.  The latency to here should be reasonably
+	 * deterministic
+	 */
+
+	now_count.ull = perf_ctr();
+	now_time.ull = interp_time(now_count.ull, TRUE);
+
+	if (last_interp_time <= now_time.ull) {
+		last_interp_time = now_time.ull;
+	} else {
+		clock_backward = last_interp_time - now_time.ull;
+		if (clock_backward > clock_backward_max) {
+			clock_backward_max = clock_backward;
+			clock_backward_count++;
+		}
+		now_time.ull = last_interp_time;
+	}
+	*pft = now_time.ft;
+
+	return;
+}
+
+
+/*
+ * TimerApcFunction is invoked on the high-priority clock
+ * thread to capture a new  baseline system time and
+ * performance counter correlation every 43 msec (64Hz 
+ * OS clock precision).
+ */
+static void CALLBACK
+TimerApcFunction(
+	LPVOID lpArgToCompletionRoutine,
+	DWORD dwTimerLowValue,
+	DWORD dwTimerHighValue
+	)
+{
+	static BOOL		ctr_freq_timer_started = FALSE;
+	static ULONGLONG	prev_count;
+	ULONGLONG		now_time;
+	FT_ULL			now_count;
+
+	/* Grab the counter first of all */
+	now_count.ull = perf_ctr();
+
+	now_time = (((ULONGLONG)dwTimerHighValue << 32) |
+				dwTimerLowValue);
+
+	/*
+	 * Save this correlation in the history.
+	 */
+	add_counter_time_pair(now_count.ull, now_time);
+
+	/*
+	 * Once we're synchronized start the counter frequency
+	 * tuning timer.
+	 */
+	if (INVALID_HANDLE_VALUE == ctr_freq_timer &&
+	    LEAP_NOTINSYNC != sys_leap)
+		start_ctr_freq_timer(now_time);
+}
+
+
+unsigned WINAPI 
+ClockThread(
+	void *arg
+	)
+{
+	LARGE_INTEGER	DueTime;
+	HANDLE		timer;
+	double		HZ;
+	double		TimerHz;
+	DWORD		timer_period_msec;
+	DWORD		res;
+	char		*ntpd_int_int_text;
+
+	UNUSED_ARG(arg);
+
+	timer = CreateWaitableTimer(NULL, FALSE, NULL);
+
+	ntpd_int_int_text = getenv("NTPD_INT_INT");
+
+	HZ = (double)HECTONANOSECONDS / clockperiod;
+
+	if (HZ > 63 && HZ < 65) {
+		timer_period_msec = 43;
+	} else if (HZ > 98 && HZ < 102) {
+		timer_period_msec = 27;
+		if (NULL == ntpd_int_int_text)
+			msyslog(LOG_WARNING, 
+				"%.3f Hz system clock may benefit from "
+				"custom NTPD_INT_INT env var timer interval "
+				"override between approx. 20 and 50 msecs.",
+				HZ);
+	} else {
+		timer_period_msec = (DWORD)(0.5 + (2.752 * clockperiod / 10000));
+		if (NULL == ntpd_int_int_text)
+			msyslog(LOG_WARNING, 
+				"unfamiliar %.3f Hz system clock may benefit "
+				"from custom NTPD_INT_INT env var timer "
+				"interval override between approx. 20 and 50 "
+				"msecs.",
+				HZ);
+	}
+
+	if (ntpd_int_int_text != NULL) {
+		timer_period_msec = atoi(ntpd_int_int_text);
+		timer_period_msec = max(9, timer_period_msec);
+		msyslog(LOG_NOTICE, 
+			"using NTPD_INT_INT env var override %u", 
+			timer_period_msec);
+	}
+
+	TimerHz = 1e3 / timer_period_msec;
+	msyslog(LOG_NOTICE, "HZ %.3f using %u msec timer %.3f Hz %d deep", 
+		HZ,
+		timer_period_msec,
+		TimerHz,
+		BASELINES_USED);
+
+	/* negative DueTime means relative to now */
+	DueTime.QuadPart = -(int)timer_period_msec;
+
+	SetWaitableTimer(
+		timer, 
+		&DueTime,		/* first fire */
+		timer_period_msec,	/* period thereafter */
+		TimerApcFunction,	/* callback routine */
+		&timer,			/* context for callback */
+		FALSE);			/* do not interfere with power saving */
+
+	/*
+	 * The clock thread spends the rest of its life in the TimerApcFunction
+	 * and ctr_freq_timer_fired timer APC callbacks, which can only occur 
+	 * while this thread is in an alertable wait.  Note the Ex on 
+	 * WaitForSingleObjectEx and TRUE for fAlertable.  The wait will return 
+	 * after each APC callback in which case we simply wait again.  We will
+	 * break out of the loop when StopClockThread signals our exit event.
+	 */
+	do res = WaitForSingleObjectEx(
+			TimerThreadExitRequest, 
+			INFINITE, 
+			TRUE);
+	while (WAIT_OBJECT_0 != res);
+
+	CloseHandle(timer);
+
+	if (ctr_freq_timer != INVALID_HANDLE_VALUE) {
+		CloseHandle(ctr_freq_timer);
+		ctr_freq_timer = INVALID_HANDLE_VALUE;
+	}
+
+	return 0;
+}
+
+
+static void 
+StartClockThread(void)
+{
+	static BOOL done_once = FALSE;
+	FT_ULL StartTime;
+
+	/* init variables with the time now */
+	GetSystemTimeAsFileTime(&StartTime.ft);
+	baseline_times[0] = StartTime.ull;
+	baseline_counts[0] = perf_ctr();
+
+	/* init sync objects */
+	TimerThreadExitRequest = CreateEvent(NULL, FALSE, FALSE, NULL);
+
+	clock_thread = 
+		(HANDLE)_beginthreadex(
+			NULL, 
+			0, 
+			ClockThread, 
+			NULL, 
+			CREATE_SUSPENDED, 
+			&clock_thread_id);
+
+	if (clock_thread != NULL) {
+		/* remember the thread priority is only within the process class */
+		if (!SetThreadPriority(clock_thread, THREAD_PRIORITY_TIME_CRITICAL)) {
+			DPRINTF(1, ("Error setting thread priority\n"));
+		}
+
+		lock_thread_to_processor(clock_thread);
+		ResumeThread(clock_thread);
+
+		if (FALSE == done_once) {
+			done_once = TRUE;
+			lock_thread_to_processor(GetCurrentThread());
+			atexit( StopClockThread );
+		}
+
+		/*
+		 * Give the clock thread time to fill its counter/time
+		 * sample buffer.  This will underfill the buffer a
+		 * bit for sample periods over 43 msec.
+		 */
+		Sleep(BASELINES_USED * 43);
+	}
+}
+
+
+void 
+StopClockThread(void)
+{
+	/*
+	 * if the clock thread exit()s this routine
+	 * will be called on the clock thread and
+	 * we need not (and can't) use the normal
+	 * TimerThreadExitRequest event.
+	 */
+	if (GetCurrentThreadId() != clock_thread_id) {
+
+		if (!SetEvent(TimerThreadExitRequest) ||
+		    WaitForSingleObject(clock_thread, 2 * 1000) != 
+		    WAIT_OBJECT_0) {
+			msyslog(LOG_ERR, "Failed to stop clock thread.");
+		}
+	}
+	CloseHandle(TimerThreadExitRequest);
+	TimerThreadExitRequest = NULL;
+	CloseHandle(clock_thread);
+	clock_thread = NULL;
+}
+
+
+void
+lock_thread_to_processor(HANDLE thread)
+{
+	static	DWORD_PTR	ProcessAffinityMask;
+	static	DWORD_PTR	ThreadAffinityMask;
+	DWORD_PTR		SystemAffinityMask;
+	char			*cputext;
+	unsigned int		cpu;
+
+	if ( ! winnt_time_initialized) {
+		DPRINTF(1, ("init_winnt_time() must be called before "
+				"lock_thread_to_processor(), exiting\n"));
+		exit(-1);
+	}
+
+	if (!winnt_use_interpolation)
+		return;
+	
+	if (-1 == lock_interp_threads) {
+		DPRINTF(1, ("choose_interp_counter() is not called "
+			    "before lock_thread_to_processor()\n"));
+		exit(-1);
+	} else if (!lock_interp_threads)
+		return;
+
+	/*
+	 * Calculate the ThreadAffinityMask we'll use once on the
+	 * first invocation.
+	 */
+	if (!ProcessAffinityMask) {
+
+		/*
+		 * Choose which processor to nail the main and clock threads to.
+		 * If we have more than one, we simply choose the 2nd.
+		 * Randomly choosing from 2 to n would be better, but in
+		 * either case with clock and network interrupts more likely
+		 * to be serviced by the first procecssor, let's stay away 
+		 * from it.  QueryPerformanceCounter is not necessarily
+		 * consistent across CPUs, hence the need to nail the two
+		 * threads involved in QPC-based interpolation to the same
+		 * CPU.
+		 */
+
+		GetProcessAffinityMask(
+			GetCurrentProcess(), 
+			&ProcessAffinityMask,
+			&SystemAffinityMask);
+
+		/*
+		 * respect NTPD_CPU environment variable if present
+		 * for testing.  NTPD_CPU=0 means use all CPUs, 1-64
+		 * means lock threads involved in interpolation to
+		 * that CPU.  Default to 2nd if more than 1.
+		 */
+
+		cpu = 2;
+		cputext = getenv("NTPD_CPU");
+		if (cputext) {
+			cpu = (unsigned int) atoi(cputext);
+			cpu = min((8 * sizeof(DWORD_PTR)), cpu);
+		}
+
+		/* 
+		 * Clear all bits except the 2nd.  If we have only one proc
+		 * that leaves ThreadAffinityMask zeroed and we won't bother
+		 * with SetThreadAffinityMask.
+		 */
+
+		ThreadAffinityMask = (0 == cpu) ? 0 : (1 << (cpu - 1));
+
+		if (ThreadAffinityMask && 
+			!(ThreadAffinityMask & ProcessAffinityMask)) 
+
+			DPRINTF(1, ("Selected CPU %u (mask %x) is outside "
+					"process mask %x, using all CPUs.\n",
+					cpu, ThreadAffinityMask, 
+					ProcessAffinityMask));
+		else
+			DPRINTF(1, ("Wiring to processor %u (0 means all) "
+					"affinity mask %x\n",	
+					cpu, ThreadAffinityMask));
+
+		ThreadAffinityMask &= ProcessAffinityMask;
+	}
+
+	if (ThreadAffinityMask && 
+	    !SetThreadAffinityMask(thread, ThreadAffinityMask))
+		msyslog(LOG_ERR, 
+			"Unable to wire thread to mask %x: %m", 
+			ThreadAffinityMask);
+}
+
+
+#ifdef HAVE_PPSAPI
+static inline void ntp_timestamp_from_counter(l_fp *, ULONGLONG,
+					      ULONGLONG);
+
+/*
+ * helper routine for serial PPS which returns QueryPerformanceCounter
+ * timestamp and needs to interpolate it to an NTP timestamp.
+ */
+void 
+pps_ntp_timestamp_from_counter(
+	ntp_fp_t	*result, 
+	ULONGLONG	Timestamp, 
+	ULONGLONG	Counterstamp
+	)
+{
+	/*
+	 * convert between equivalent l_fp and PPSAPI ntp_fp_t
+	 */
+	ntp_timestamp_from_counter(
+		(l_fp *)result,
+		Timestamp,
+		Counterstamp);
+}
+
+
+static inline 
+void 
+ntp_timestamp_from_counter(
+	l_fp *result, 
+	ULONGLONG Timestamp, 
+	ULONGLONG Counterstamp
+	)
+{
+	FT_ULL		Now;
+	FT_ULL		Ctr;
+	LONGLONG	CtrDelta;
+	double		seconds;
+	ULONGLONG	InterpTimestamp;
+
+	if (winnt_use_interpolation) {
+		if (0 == Counterstamp) {
+			DPRINTF(1, ("ntp_timestamp_from_counter rejecting 0 counter.\n"));
+			ZERO(*result);
+			return;
+		}
+
+		InterpTimestamp = interp_time(Counterstamp + QPC_offset, FALSE);
+	} else {  /* ! winnt_use_interpolation */
+		if (NULL != pGetSystemTimePreciseAsFileTime &&
+		    0 != Counterstamp) {
+			QueryPerformanceCounter(&Ctr.li);
+			(*pGetSystemTimePreciseAsFileTime)(&Now.ft);
+			CtrDelta = Ctr.ull - Counterstamp;
+			seconds = (double)CtrDelta / PerfCtrFreq;
+			InterpTimestamp = Now.ull -
+			    (ULONGLONG)(seconds * HECTONANOSECONDS);
+		} else {
+			/* have to simply use the driver's system time timestamp */
+			InterpTimestamp = Timestamp;
+			GetSystemTimeAsFileTime(&Now.ft);
+		}
+	}
+
+	/* convert from 100ns units to NTP fixed point format */
+
+	InterpTimestamp -= FILETIME_1970;
+	result->l_ui = JAN_1970 + (u_int32)(InterpTimestamp / HECTONANOSECONDS);
+	result->l_uf = (u_int32)((InterpTimestamp % HECTONANOSECONDS) *
+				 (ULONGLONG)FRAC / HECTONANOSECONDS);
+}
+#endif  /* HAVE_PPSAPI */
+
+
+void
+win_time_stepped(void)
+{
+	/*
+	 * called back by ntp_set_tod after the system
+	 * time has been stepped (set).
+	 *
+	 * We normally prevent the reported time from going backwards
+	 * but need to allow it in this case.
+	 */
+	if (FALSE == winnt_use_interpolation)
+		return;
+
+
+	/*
+	 * Restart the clock thread to get a new baseline
+	 * time/counter correlation.
+	 */
+	StopClockThread();
+
+	/*
+	 * newest_baseline_gen is a generation counter
+	 * incremented once each time newest_baseline
+	 * is reset.
+	 */
+	newest_baseline_gen++;
+
+	clock_backward_max = CLOCK_BACK_THRESHOLD;
+	clock_backward_count = 0;
+	newest_baseline = 0;
+	ZERO(baseline_counts);
+	ZERO(baseline_times);
+
+	StartClockThread();
+}
+
+
+/*
+ * log2ull - log base 2 of a unsigned 64-bit number
+ */
+int 
+log2ull(
+	ULONGLONG n
+	)
+{
+	const ULONGLONG one = 1;
+	int log = 0;
+
+	if (n >= one<<32) { n >>= 32; log += 32; }
+	if (n >= one<<16) { n >>= 16; log += 16; }
+	if (n >= one<< 8) { n >>=  8; log +=  8; }
+	if (n >= one<< 4) { n >>=  4; log +=  4; }
+	if (n >= one<< 2) { n >>=  2; log +=  2; }
+	if (n >= one<< 1) {	      log +=  1; }
+
+	return (n) ? log : (-1);
+}
+
+
+/*
+ * ctr_freq_timer_fired is called once a few seconds before
+ * tune_ctr_period seconds have elapsed, to reset the timer
+ * and hopefully minimize error due to the system using the
+ * nominal performance counter frequency to set the timer
+ * internally, which is typically dozens of PPM from the
+ * actual performance counter rate.  A few seconds later
+ * it is called again to observe the counter and estimate the
+ * counter frequency.
+ */
+static void CALLBACK
+ctr_freq_timer_fired(
+	LPVOID arg,
+	DWORD dwTimeLow,
+	DWORD dwTimeHigh
+	)
+{
+	static	FT_ULL		begin_time = {0};
+	static	FT_ULL		begin_count = {0};
+	static	ULONGLONG	next_period_time = 0;
+	static	ULONGLONG	report_systemtime = 0;
+	const	ULONGLONG	five_minutes = 5ui64 * 60 * HECTONANOSECONDS;
+	FT_ULL			now_time;
+	FT_ULL			now_count;
+
+	if (!begin_time.ull) {
+		begin_count.ull = perf_ctr();
+		begin_time.ft.dwLowDateTime = dwTimeLow;
+		begin_time.ft.dwHighDateTime = dwTimeHigh;
+
+		/*
+		 * adapt perf ctr observation interval to the
+		 * counter frequency
+		 */
+		tune_ctr_period = 22680 / log2ull(NomPerfCtrFreq);
+
+		/*
+		 * reset timer 2s before period ends to minimize
+		 * error from OS timer routines using nominal 
+		 * performance frequency internally.
+		 */
+		tune_ctr_freq_max_interval = tune_ctr_period - 2;
+
+		next_period_time = begin_time.ull + 
+			(ULONGLONG)tune_ctr_period * HECTONANOSECONDS;
+
+		ROUND_TO_NEXT_SEC_BOTTOM(next_period_time);
+
+		reset_ctr_freq_timer(next_period_time, begin_time.ull);
+
+		return;
+	}
+
+	now_time.ft.dwLowDateTime = dwTimeLow;
+	now_time.ft.dwHighDateTime = dwTimeHigh;
+
+	if (now_time.ull >= next_period_time) {
+		now_count.ull = perf_ctr();
+		tune_ctr_freq(
+			now_count.ull - begin_count.ull,
+			now_time.ull - begin_time.ull);
+		next_period_time += (ULONGLONG)tune_ctr_period * HECTONANOSECONDS;
+		begin_count.ull = now_count.ull;
+		begin_time.ull = now_time.ull;
+	}
+
+	/* 
+	 * Log clock backward events no more often than 5 minutes.
+	 */
+	if (!report_systemtime) {
+		report_systemtime = now_time.ull + five_minutes;
+	} else if (report_systemtime <= now_time.ull) {
+		report_systemtime +=  five_minutes;
+		if (clock_backward_count) {
+			msyslog(LOG_WARNING, 
+				"clock would have gone backward %d times, "
+				"max %.1f usec",
+				clock_backward_count, 
+				clock_backward_max / 10.);
+
+			clock_backward_max = CLOCK_BACK_THRESHOLD;
+			clock_backward_count = 0;
+		}
+	}
+	reset_ctr_freq_timer(next_period_time, now_time.ull);
+}
+
+
+void
+reset_ctr_freq_timer_abs(
+	ULONGLONG when
+	)
+{
+	FT_ULL	fire_time;
+
+	fire_time.ull = when; 
+	SetWaitableTimer(
+		ctr_freq_timer,
+		&fire_time.li,		/* first fire */
+		0,			/* not periodic */
+		ctr_freq_timer_fired,	/* callback routine */
+		NULL,			/* context for callback */
+		FALSE);			/* do not interfere with power saving */
+}
+
+
+void
+reset_ctr_freq_timer(
+	ULONGLONG when,
+	ULONGLONG now
+	)
+{
+	if (when - now > 
+	    (tune_ctr_freq_max_interval * HECTONANOSECONDS + HECTONANOSECONDS))
+		when = now + tune_ctr_freq_max_interval * HECTONANOSECONDS;
+
+	reset_ctr_freq_timer_abs(when);
+}
+
+
+void
+start_ctr_freq_timer(
+	ULONGLONG now_time
+	)
+{
+	ULONGLONG when;
+
+	ctr_freq_timer = CreateWaitableTimer(NULL, FALSE, NULL);
+	when = now_time;
+	ROUND_TO_NEXT_SEC_BOTTOM(when);
+
+	reset_ctr_freq_timer_abs(when);
+}
+
+
+/*
+ * tune_ctr_freq is called once per tune_ctr_period seconds
+ * with a counter difference and time difference.
+ */
+void 
+tune_ctr_freq(
+	LONGLONG ctr_delta,
+	LONGLONG time_delta
+	)
+{
+	static unsigned count = 0;
+	static unsigned dispcount = 0;
+	static unsigned report_at_count = 0;
+	static int disbelieved = 0;
+	static int i = 0;
+	static double nom_freq = 0;
+	static LONGLONG diffs[TUNE_CTR_DEPTH] = {0};
+	static LONGLONG sum = 0;
+	char ctr_freq_eq[64];
+	LONGLONG delta;
+	LONGLONG deltadiff;
+	ULONGLONG ObsPerfCtrFreq;
+	double freq;
+	double this_freq;
+	BOOL isneg;
+
+	/* one-time initialization */
+	if (!report_at_count) {
+		report_at_count = 24 * 60 * 60 / tune_ctr_period;
+		nom_freq = NomPerfCtrFreq / 1e6;
+	}
+
+	/* delta is the per-second observed frequency this time */
+	delta = (LONGLONG)((double)ctr_delta * HECTONANOSECONDS /
+			   time_delta);
+
+	/* disbelieve any delta more than +/- 976 PPM from nominal */
+	deltadiff = delta - NomPerfCtrFreq;
+	if (0 > deltadiff) {
+		isneg = TRUE;
+		deltadiff = -deltadiff;
+	} else {
+		isneg = FALSE;
+	}
+
+	if ((ULONGLONG)deltadiff > (NomPerfCtrFreq / 1024)) {
+		disbelieved++;
+		dispcount++;
+#ifdef DEBUG
+		msyslog(LOG_DEBUG, "ctr delta %s%lld exceeds limit %llu",
+				   (isneg) ? "-" : "",
+				   deltadiff,
+				   NomPerfCtrFreq / 1024);
+#endif
+	} else {
+
+		/*
+		 * collect average over TUNE_CTR_DEPTH samples
+		 * for our PerfCtrFreq trimming.
+		 */
+
+		if (isneg)
+			deltadiff = -deltadiff;
+		sum -= diffs[i];
+		diffs[i] = deltadiff;
+		sum += deltadiff;
+		i = (i + 1) % COUNTOF(diffs);
+		count++;
+		dispcount++;
+	}
+
+	this_freq = delta / 1e6;
+
+	ObsPerfCtrFreq = NomPerfCtrFreq + (sum / COUNTOF(diffs));
+
+#if 1	/* #if 0 to disable changing freq used */
+	/* get rid of ObsPerfCtrFreq when removing the #ifdef */
+	PerfCtrFreq = ObsPerfCtrFreq;
+#endif
+	freq = PerfCtrFreq / 1e6;
+
+	/*
+	 * make the performance counter's frequency error from its
+	 * nominal rate, expressed in PPM, available via ntpq as
+	 * system variable "ctr_frequency".  This is consistent with
+	 * "frequency" which is the system clock drift in PPM.
+	 */
+	snprintf(ctr_freq_eq, sizeof(ctr_freq_eq), "ctr_frequency=%.2f", 
+		 1e6 * (freq - nom_freq) / nom_freq);
+	set_sys_var(ctr_freq_eq, strlen(ctr_freq_eq) + 1, RO | DEF);
+
+	/* 
+	 * report observed ctr freq each time the estimate used during
+	 * startup moves toward the observed freq from the nominal.
+	 */
+
+	if (count > COUNTOF(diffs) &&
+	    /* (count % COUNTOF(diffs)) && */	/* enables reporting each */
+	    dispcount < report_at_count)	/* TUNE_CTR_DEPTH samples */
+		return;
+
+	NLOG(NLOG_CLOCKINFO)
+		if (count <= COUNTOF(diffs))
+			/* moving to observed freq. from nominal (startup) */
+			msyslog(LOG_INFO,
+				(freq > 100)
+				   ? "ctr %.3f MHz %+6.2f PPM using %.3f MHz %+6.2f PPM"
+				   : "ctr %.6f MHz %+6.2f PPM using %.6f MHz %+6.2f PPM",
+				this_freq,
+				1e6 * (this_freq - nom_freq) / nom_freq,
+				freq, 
+				1e6 * (freq - nom_freq) / nom_freq);
+		else
+			/* steady state */
+			msyslog(LOG_INFO,
+				(freq > 100)
+				   ? "ctr %.3f MHz %+.2f PPM"
+				   : "ctr %.6f MHz %+.2f PPM",
+				freq, 
+				1e6 * (freq - nom_freq) / nom_freq);
+
+	if (disbelieved) {
+		msyslog(LOG_ERR, 
+			"%d ctr samples exceed +/- 976 PPM range gate",
+			disbelieved);
+		disbelieved = 0;
+	}
+
+	dispcount = 0;
+}
+
+
+/*
+ * add_counter_time_pair is called by the
+ * high priority clock thread with each new
+ * baseline counter/time correlation.
+ */
+void
+add_counter_time_pair(
+	ULONGLONG ctr,
+	LONGLONG time
+	)
+{
+	int i;
+
+	i = (newest_baseline + 1) % BASELINES_TOT;
+
+	baseline_counts[i] = ctr;
+	baseline_times[i] = time;
+
+	newest_baseline = i;
+}
+
+
+/*
+ * interp_time estimates NT time in 100ns units
+ * based on a performance counter value given.
+ * This must tolerate recent historical counters
+ * as well as current.  When current is FALSE
+ * we can't assume ctr is the latest/highest
+ * seen.
+ */
+ULONGLONG
+interp_time(
+	ULONGLONG ctr,
+	BOOL current
+	)
+{
+	static __declspec(thread) int		last_newest = -1;
+	static __declspec(thread) int		last_newest_gen;
+	static __declspec(thread) int		best_index;
+	ULONGLONG	this_ctr;
+	LONGLONG	this_time;
+	LONGLONG	latest_time;
+	LONGLONG	ctr_diff;
+	int		i;
+	int		i_gen;
+	int		c;
+
+	/*
+	 * Use the system time (roughly synchronised to the tick, and
+	 * extrapolated using the system performance counter.
+	 *
+	 * Cache the results per thread and only repeat the
+	 * calculation when new data has arrived.
+	 */
+	i = newest_baseline;
+	i_gen = newest_baseline_gen;
+
+	if (last_newest == i && last_newest_gen == i_gen) {
+		this_time = baseline_times[best_index];
+		ctr_diff = ctr - baseline_counts[best_index];
+		this_time += (LONGLONG)PERF2HNS((double)ctr_diff);
+
+		return this_time;
+	}
+
+	last_newest = i;
+	last_newest_gen = i_gen;
+
+	latest_time = 0;
+
+	/*
+	 * Run through the history calculating the interpolated 
+	 * time based on each counter/time correlation in turn,
+	 * and believe the latest one.  This is akin to the NTP
+	 * protocol minimum delay clock filter.  Errors due to 
+	 * counter/time correlations with stale time are all 
+	 * negative.
+	 */
+	for (c = 0; c < BASELINES_USED; c++) {
+		 if (baseline_times[i]) {
+			this_time = baseline_times[i];
+			this_ctr = baseline_counts[i];
+
+			ctr_diff = ctr - this_ctr;
+
+			if (current && ctr_diff < 0) {
+				/* 
+				 * The performance counter apparently went 
+				 * backwards without rolling over.  It might 
+				 * be nice to complain but we don't want 
+				 * to do it repeatedly.
+				 */
+				ctr_diff = 0;
+			}
+
+			this_time += (LONGLONG)PERF2HNS((double)ctr_diff);
+
+			if (this_time > latest_time) {
+				latest_time = this_time;
+				best_index = i;
+			}
+		}
+		i = i ? (i - 1) : (BASELINES_TOT - 1);
+	}
+
+	return latest_time;
+}
diff --git a/ports/winnt/ntpd/ntp_iocompletionport.c b/ports/winnt/ntpd/ntp_iocompletionport.c
new file mode 100644
index 0000000..046d6cd
--- /dev/null
+++ b/ports/winnt/ntpd/ntp_iocompletionport.c
@@ -0,0 +1,1639 @@
+/*
+-----------------------------------------------------------------------
+This is the IO completion port handling for async/overlapped IO on
+Windows >= Win2000.
+
+Some notes on the implementation:
+
++ Only one thread is used to serve the IO completion port, for several
+  reasons:
+
+  * First, there seems to be (have been?) trouble that locked up NTPD
+    when more than one thread was used for IOCPL.
+
+  * Second, for the sake of the time stamp interpolation the threads
+    must run on the same CPU as the time interpolation thread. This
+    makes using more than one thread useless, as they would compete for
+    the same core and create contention.
+
++ Some IO operations need a possibly lengthy postprocessing. Emulating
+  the UN*X line discipline is currently the only but prominent example.
+  To avoid the processing in the time-critical IOCPL thread, longer
+  processing is offloaded the worker thread pool.
+
++ A fact that seems not as well-known as it should be is that all
+  ressources passed to an overlapped IO operation must be considered
+  owned by the OS until the result has been fetched/dequeued. This
+  includes all overlapped structures and buffers involved, so cleaning
+  up on shutdown must be carefully constructed. (This includes closing
+  all the IO handles and waiting for the results to be dequeued.
+  'CancleIo()' cannot be used since it's broken beyond repair.)
+
+  If this is not possible, then all ressources should be dropped into
+  oblivion -- otherwise "bad things (tm)" are bound to happen.
+
+  Using a private heap that is silently dropped but not deleted is a
+  good way to avoid cluttering memory stats with IO context related
+  objects. Leak tracing becomes more interesting, though.
+
+
+The current implementation is based on the work of Danny Mayer who improved
+the original implementation and Dave Hart who improved on the serial I/O
+routines. The true roots of this file seem to be shrouded by the mist of time...
+
+
+This version still provides the 'user space PPS' emulation
+feature.
+
+Juergen Perlinger (perlinger@ntp.org) Feb 2012
+
+-----------------------------------------------------------------------
+*/
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#ifdef HAVE_IO_COMPLETION_PORT
+
+#include <stddef.h>
+#include <stdio.h>
+#include <process.h>
+#include <syslog.h>
+
+#include "ntpd.h"
+#include "ntp_machine.h"
+#include "ntp_iocompletionport.h"
+#include "ntp_request.h"
+#include "ntp_assert.h"
+#include "ntp_io.h"
+#include "ntp_lists.h"
+
+
+#define CONTAINEROF(p, type, member) \
+	((type *)((char *)(p) - offsetof(type, member)))
+
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 201)		/* nonstd extension nameless union */
+#endif
+
+/*
+ * ---------------------------------------------------------------------
+ * storage type for PPS data (DCD change counts & times)
+ * ---------------------------------------------------------------------
+ */
+struct PpsData {
+	u_long	cc_assert;
+	u_long	cc_clear;
+	l_fp	ts_assert;
+	l_fp	ts_clear;
+};
+typedef struct PpsData PPSData_t;
+
+struct PpsDataEx {
+	u_long		cov_count;
+	PPSData_t	data;
+};
+typedef volatile struct PpsDataEx PPSDataEx_t;
+
+/*
+ * ---------------------------------------------------------------------
+ * device context; uses reference counting to avoid nasty surprises.
+ * Currently this stores only the PPS time stamps, but it could be
+ * easily extended.
+ * ---------------------------------------------------------------------
+ */
+#define PPS_QUEUE_LEN	8u		  /* must be power of two! */
+#define PPS_QUEUE_MSK	(PPS_QUEUE_LEN-1) /* mask for easy MOD ops */
+
+struct DeviceContext {
+	volatile long	ref_count;
+	volatile u_long	cov_count;
+	PPSData_t	pps_data;
+	PPSDataEx_t	pps_buff[PPS_QUEUE_LEN];
+};
+
+typedef struct DeviceContext DevCtx_t;
+
+/*
+ * ---------------------------------------------------------------------
+ * I/O context structure
+ *
+ * This is an extended overlapped structure. Some fields are only used
+ * for serial I/O, others are used for all operations. The serial I/O is
+ * more interesting since the same context object is used for waiting,
+ * actual I/O and possibly offload processing in a worker thread until
+ * a complete operation cycle is done.
+ *
+ * In this case the I/O context is used to gather all the bits that are
+ * finally needed for the processing of the buffer.
+ * ---------------------------------------------------------------------
+ */
+//struct IoCtx;
+typedef struct IoCtx      IoCtx_t;
+typedef struct refclockio RIO_t;
+
+typedef void (*IoCompleteFunc)(ULONG_PTR, IoCtx_t *);
+
+struct IoCtx {
+	OVERLAPPED		ol;		/* 'kernel' part of the context	*/
+	union {
+		recvbuf_t *	recv_buf;	/* incoming -> buffer structure	*/
+		void *		trans_buf;	/* outgoing -> char array	*/
+		PPSData_t *	pps_buf;	/* for reading PPS seq/stamps	*/
+		HANDLE		ppswake;	/* pps wakeup for attach	*/
+	};
+	IoCompleteFunc		onIoDone;	/* HL callback to execute	*/
+	RIO_t *			rio;		/* RIO backlink (for offload)	*/
+	DevCtx_t *		devCtx;
+	l_fp			DCDSTime;	/* PPS-hack: time of DCD ON	*/
+	l_fp			FlagTime;	/* timestamp of flag/event char */
+	l_fp			RecvTime;	/* timestamp of callback        */
+	DWORD			errCode;	/* error code of last I/O	*/
+	DWORD			byteCount;	/* byte count     "             */
+	DWORD			com_events;	/* buffer for COM events	*/
+	unsigned int		flRawMem : 1;	/* buffer is raw memory -> free */
+	unsigned int		flTsDCDS : 1;	/* DCDSTime valid?		*/
+	unsigned int		flTsFlag : 1;	/* FlagTime valid?		*/
+};
+
+#ifdef _MSC_VER
+# pragma warning(pop)
+#endif
+
+/*
+ * local function definitions
+ */
+static		void ntpd_addremove_semaphore(HANDLE, int);
+static inline	void set_serial_recv_time    (recvbuf_t *, IoCtx_t *);
+
+/* Initiate/Request async IO operations */
+static	BOOL QueueSerialWait   (RIO_t *, recvbuf_t *, IoCtx_t *);
+static	BOOL QueueSerialRead   (RIO_t *, recvbuf_t *, IoCtx_t *);
+static	BOOL QueueRawSerialRead(RIO_t *, recvbuf_t *, IoCtx_t *);
+static  BOOL QueueSocketRecv   (SOCKET , recvbuf_t *, IoCtx_t *);
+
+
+/* High-level IO callback functions */
+static	void OnSocketRecv           (ULONG_PTR, IoCtx_t *);
+static	void OnSerialWaitComplete   (ULONG_PTR, IoCtx_t *);
+static	void OnSerialReadComplete   (ULONG_PTR, IoCtx_t *);
+static	void OnRawSerialReadComplete(ULONG_PTR, IoCtx_t *);
+static	void OnSerialWriteComplete  (ULONG_PTR, IoCtx_t *);
+
+/* worker pool offload functions */
+static DWORD WINAPI OnSerialReadWorker(void * ctx);
+
+
+/* keep a list to traverse to free memory on debug builds */
+#ifdef DEBUG
+static void free_io_completion_port_mem(void);
+#endif
+
+
+	HANDLE WaitableExitEventHandle;
+	HANDLE WaitableIoEventHandle;
+static	HANDLE hIoCompletionPort;
+
+DWORD	ActiveWaitHandles;
+HANDLE	WaitHandles[16];
+
+/*
+ * -------------------------------------------------------------------
+ * We make a pool of our own for IO context objects -- the are owned by
+ * the system until a completion result is pulled from the queue, and
+ * they seriously go into the way of memory tracking until we can safely
+ * cancel an IO request.
+ * -------------------------------------------------------------------
+ */
+static	HANDLE hHeapHandle;
+
+/*
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ * Create a new heap for IO context objects
+ */
+static void
+IoCtxPoolInit(
+	size_t	initObjs
+	)
+{
+	hHeapHandle = HeapCreate(0, initObjs * sizeof(IoCtx_t), 0);
+	if (hHeapHandle == NULL) {
+		msyslog(LOG_ERR, "Can't initialize Heap: %m");
+		exit(1);
+	}
+}
+
+/*
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ *
+ * Delete the IO context heap
+ *
+ * Since we do not know what callbacks are pending, we just drop the
+ * pool into oblivion. New allocs and frees will fail from this moment,
+ * but we simply don't care. At least the normal heap dump stats will
+ * show no leaks from IO context blocks. On the downside, we have to
+ * track them ourselves if something goes wrong.
+ */
+static void
+IoCtxPoolDone(void)
+{
+	hHeapHandle = NULL;
+}
+
+/*
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ * Alloc & Free on local heap
+ *
+ * When the heap handle is NULL, these both will fail; Alloc with a NULL
+ * return and Free silently.
+ */
+static void * __fastcall
+LocalPoolAlloc(
+	size_t		size,
+	const char *	desc
+)
+{
+	void *	ptr;
+
+	/* Windows heaps can't grok zero byte allocation.
+	 * We just get one byte.
+	 */
+	if (size == 0)
+		size = 1;
+	if (hHeapHandle != NULL)
+		ptr = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, size);
+	else
+		ptr = NULL;
+	DPRINTF(3, ("Allocate '%s', heap=%p, ptr=%p\n",
+			desc,  hHeapHandle, ptr));
+
+	return ptr;
+}
+
+static void __fastcall
+LocalPoolFree(
+	void *		ptr,
+	const char *	desc
+	)
+{
+	DPRINTF(3, ("Free '%s', heap=%p, ptr=%p\n",
+			desc, hHeapHandle, ptr));
+	if (ptr != NULL && hHeapHandle != NULL)
+		HeapFree(hHeapHandle, 0, ptr);
+}
+
+/*
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ * Alloc & Free of Device context
+ *
+ * When the heap handle is NULL, these both will fail; Alloc with a NULL
+ * return and Free silently.
+ */
+static DevCtx_t * __fastcall
+DevCtxAlloc(void)
+{
+	DevCtx_t *	devCtx;
+	u_long		slot;
+
+	/* allocate struct and tag all slots as invalid */
+	devCtx = (DevCtx_t *)LocalPoolAlloc(sizeof(DevCtx_t), "DEV ctx");
+	if (devCtx != NULL)
+	{
+		/* The initial COV values make sure there is no busy
+		 * loop on unused/empty slots.
+		 */
+		devCtx->cov_count = 0;
+		for (slot = 0; slot < PPS_QUEUE_LEN; slot++)
+			devCtx->pps_buff[slot].cov_count = ~slot;
+	}
+	return devCtx;
+}
+
+static void __fastcall
+DevCtxFree(
+	DevCtx_t *	devCtx
+	)
+{
+	/* this would be the place to get rid of managed ressources. */
+	LocalPoolFree(devCtx, "DEV ctx");
+}
+
+static DevCtx_t * __fastcall
+DevCtxAttach(
+	DevCtx_t *	devCtx
+	)
+{
+	if (devCtx != NULL)
+		InterlockedIncrement(&devCtx->ref_count);
+	return devCtx;
+}
+
+static void __fastcall
+DevCtxDetach(
+	DevCtx_t *	devCtx
+	)
+{
+	if (devCtx && !InterlockedDecrement(&devCtx->ref_count))
+		DevCtxFree(devCtx);
+}
+
+/*
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ * Alloc & Free of I/O context
+ *
+ * When the heap handle is NULL, these both will fail; Alloc with a NULL
+ * return and Free silently.
+ */
+static IoCtx_t * __fastcall
+IoCtxAlloc(
+	DevCtx_t *	devCtx
+	)
+{
+	IoCtx_t *	ioCtx;
+
+	ioCtx = (IoCtx_t *)LocalPoolAlloc(sizeof(IoCtx_t), "IO ctx");
+	if (ioCtx != NULL)
+		ioCtx->devCtx = DevCtxAttach(devCtx);
+	return ioCtx;
+}
+
+static void __fastcall
+IoCtxFree(
+	IoCtx_t *	ctx
+	)
+{
+	if (ctx)
+		DevCtxDetach(ctx->devCtx);
+	LocalPoolFree(ctx, "IO ctx");
+}
+
+static void __fastcall
+IoCtxReset(
+	IoCtx_t *	ctx
+	)
+{
+	RIO_t *		rio;
+	DevCtx_t *	dev;
+	if (ctx) {
+		rio = ctx->rio;
+		dev = ctx->devCtx;
+		ZERO(*ctx);
+		ctx->rio    = rio;
+		ctx->devCtx = dev;
+	}
+}
+
+/*
+ * -------------------------------------------------------------------
+ * The IO completion thread and support functions
+ *
+ * There is only one completion thread, because it is locked to the same
+ * core as the time interpolation. Having more than one causes core
+ * contention and is not useful.
+ * -------------------------------------------------------------------
+ */
+static HANDLE hIoCompletionThread;
+static UINT   tidCompletionThread;
+
+/*
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ * The IO completion worker thread
+ *
+ * Note that this thread does not enter an alertable wait state and that
+ * the only waiting point is the IO completion port. If stopping this
+ * thread with a special queued result packet does not work,
+ * 'TerminateThread()' is the only remaining weapon in the arsenal. A
+ * dangerous weapon -- it's like SIGKILL.
+ */
+static unsigned WINAPI
+iocompletionthread(void *NotUsed)
+{
+	DWORD		err;
+	DWORD		octets;
+	ULONG_PTR	key;
+	OVERLAPPED *	pol;
+	IoCtx_t *	lpo;
+
+	UNUSED_ARG(NotUsed);
+
+	/*
+	 * Socket and refclock receive call gettimeofday() so the I/O
+	 * thread needs to be on the same processor as the main and
+	 * timing threads to ensure consistent QueryPerformanceCounter()
+	 * results.
+	 *
+	 * This gets seriously into the way of efficient thread pooling
+	 * on multicore systems.
+	 */
+	lock_thread_to_processor(GetCurrentThread());
+
+	/*
+	 * Set the thread priority high enough so I/O will preempt
+	 * normal recv packet processing, but not higher than the timer
+	 * sync thread.
+	 */
+	if (!SetThreadPriority(GetCurrentThread(),
+			       THREAD_PRIORITY_ABOVE_NORMAL))
+		msyslog(LOG_ERR, "Can't set thread priority: %m");
+
+	for(;;) {
+		if (GetQueuedCompletionStatus(
+					hIoCompletionPort, 
+					&octets, 
+					&key, 
+					&pol, 
+					INFINITE)) {
+			err = ERROR_SUCCESS;
+		} else {
+			err = GetLastError();
+		}
+		if (NULL == pol) {
+			DPRINTF(2, ("Overlapped IO Thread Exiting\n"));
+			break; /* fail */
+		}
+		lpo = CONTAINEROF(pol, IoCtx_t, ol);
+		get_systime(&lpo->RecvTime);
+		lpo->byteCount = octets;
+		lpo->errCode = err;
+		handler_calls++;
+		(*lpo->onIoDone)(key, lpo);
+	}
+
+	return 0;
+}
+
+/*
+ * -------------------------------------------------------------------
+ * Create/initialise the I/O creation port
+ */
+void
+init_io_completion_port(void)
+{
+#ifdef DEBUG
+	atexit(&free_io_completion_port_mem);
+#endif
+
+	/* Create the context pool first. */
+	IoCtxPoolInit(20);
+
+	/* Create the event used to signal an IO event */
+	WaitableIoEventHandle = CreateEvent(NULL, FALSE, FALSE, NULL);
+	if (WaitableIoEventHandle == NULL) {
+		msyslog(LOG_ERR, "Can't create I/O event handle: %m");
+		exit(1);
+	}
+	/* Create the event used to signal an exit event */
+	WaitableExitEventHandle = CreateEvent(NULL, FALSE, FALSE, NULL);
+	if (WaitableExitEventHandle == NULL) {
+		msyslog(LOG_ERR, "Can't create exit event handle: %m");
+		exit(1);
+	}
+
+	/* Create the IO completion port */
+	hIoCompletionPort = CreateIoCompletionPort(
+		INVALID_HANDLE_VALUE, NULL, 0, 0);
+	if (hIoCompletionPort == NULL) {
+		msyslog(LOG_ERR, "Can't create I/O completion port: %m");
+		exit(1);
+	}
+
+	/* Initialize the Wait Handles table */
+	WaitHandles[0] = WaitableIoEventHandle;
+	WaitHandles[1] = WaitableExitEventHandle; /* exit request */
+	WaitHandles[2] = WaitableTimerHandle;
+	ActiveWaitHandles = 3;
+
+	/*
+	 * Supply ntp_worker.c with function to add or remove a
+	 * semaphore to the ntpd I/O loop which is signalled by a worker
+	 * when a response is ready.  The callback is invoked in the
+	 * parent.
+	 */
+	addremove_io_semaphore = &ntpd_addremove_semaphore;
+
+	/*
+	 * Have one thread servicing I/O. See rationale in front matter.
+ 	 */
+	hIoCompletionThread = (HANDLE)_beginthreadex(
+		NULL, 
+		0, 
+		iocompletionthread, 
+		NULL, 
+		0, 
+		&tidCompletionThread);
+}
+
+
+/*
+ * -------------------------------------------------------------------
+ * completion port teardown
+ */
+void
+uninit_io_completion_port(
+	void
+	)
+{
+        DWORD rc;
+
+	/* do noting if completion port already gone. */
+	if (NULL == hIoCompletionPort)
+		return;
+
+	/*
+	 * Service thread seems running. Terminate him with grace
+	 * first and force later...
+	 */
+        if (tidCompletionThread != GetCurrentThreadId()) {
+	        PostQueuedCompletionStatus(hIoCompletionPort, 0, 0, 0);
+                rc = WaitForSingleObject(hIoCompletionThread, 5000);
+                if (rc == WAIT_TIMEOUT) {
+		        /* Thread lost. Kill off with TerminateThread. */
+		        msyslog(LOG_ERR,
+                                "IO completion thread refuses to terminate");
+		        TerminateThread(hIoCompletionThread, ~0UL);
+                }
+	}
+
+         /* stop using the memory pool */
+	IoCtxPoolDone();
+
+	/* now reap all handles... */
+	CloseHandle(hIoCompletionThread);
+	hIoCompletionThread = NULL;
+	CloseHandle(hIoCompletionPort);
+	hIoCompletionPort = NULL;
+}
+
+
+/*
+ * -------------------------------------------------------------------
+ * external worker thread support (wait handle stuff)
+ *
+ * !Attention!
+ *
+ *  - This function must only be called from the main thread. Changing
+ *    a set of wait handles while someone is waiting on it creates
+ *    undefined behaviour. Also there's no provision for mutual
+ *    exclusion when accessing global values. 
+ *
+ *  - It's not possible to register a handle that is already in the table.
+ */
+static void
+ntpd_addremove_semaphore(
+	HANDLE	sem,
+	int	remove
+	)
+{
+	DWORD	hi;
+
+	/* search for a matching entry first. */
+	for (hi = 3; hi < ActiveWaitHandles; hi++)
+		if (sem == WaitHandles[hi])
+			break;
+
+	if (remove) {
+		/*
+		 * If found, eventually swap with last entry to keep
+		 * the table dense.
+		 */
+		if (hi < ActiveWaitHandles) {
+			ActiveWaitHandles--;
+			if (hi < ActiveWaitHandles)
+				WaitHandles[hi] =
+				    WaitHandles[ActiveWaitHandles];
+			WaitHandles[ActiveWaitHandles] = NULL;
+		}
+	} else {
+		/*
+		 * Make sure the entry is not found and there is enough
+		 * room, then append to the table array.
+		 */
+		if (hi >= ActiveWaitHandles) {
+			NTP_INSIST(ActiveWaitHandles < COUNTOF(WaitHandles));
+			WaitHandles[ActiveWaitHandles] = sem;
+			ActiveWaitHandles++;
+		}
+	}
+}
+
+
+#ifdef DEBUG
+static void
+free_io_completion_port_mem(
+	void
+	)
+{
+	/*
+	 * At the moment, do absolutely nothing. Returning memory here
+	 * requires NO PENDING OVERLAPPED OPERATIONS AT ALL at this
+	 * point in time, and as long we cannot be reasonable sure about
+	 * that the simple advice is:
+	 *
+	 * HANDS OFF!
+	 */
+}
+#endif	/* DEBUG */
+
+
+/*
+ * -------------------------------------------------------------------
+ * Serial IO stuff
+ *
+ * Prelude -- common error checking code
+ * -------------------------------------------------------------------
+ */
+extern char * NTstrerror(int err, BOOL *bfreebuf);
+
+static BOOL
+IoResultCheck(
+	DWORD		err,
+	IoCtx_t * 	ctx,
+	const char *	msg
+	)
+{
+	char * msgbuf;
+	BOOL   dynbuf;
+
+	/* If the clock is not / no longer active, assume
+	 * 'ERROR_OPERATION_ABORTED' and do the necessary cleanup.
+	 */
+	if (ctx->rio && !ctx->rio->active)
+		err = ERROR_OPERATION_ABORTED;
+	
+	switch (err)
+	{
+		/* The first ones are no real errors. */
+	case ERROR_SUCCESS:	/* all is good */
+	case ERROR_IO_PENDING:	/* callback pending */
+		return TRUE;
+
+		/* the next ones go silently -- only cleanup is done */
+	case ERROR_INVALID_PARAMETER:	/* handle already closed */
+	case ERROR_OPERATION_ABORTED:	/* handle closed while wait */
+		break;
+
+
+	default:
+		/*
+		 * We have to resort to the low level error formatting
+		 * functions here, since the error code can be an
+		 * overlapped result. Relying the value to be the same
+		 * as the 'GetLastError()' result at this point of
+		 * execution is shaky at best, and using SetLastError()
+		 * to force it seems too nasty.
+		 */
+		msgbuf = NTstrerror(err, &dynbuf);
+		msyslog(LOG_ERR, "%s: err=%u, '%s'", msg, err, msgbuf);
+		if (dynbuf)
+			LocalFree(msgbuf);
+		break;
+	}
+
+	/* If we end here, we have to mop up the buffer and context */
+	if (ctx->flRawMem) {
+		if (ctx->trans_buf)
+			free(ctx->trans_buf);
+	} else {
+		if (ctx->recv_buf)
+			freerecvbuf(ctx->recv_buf);
+	}
+	IoCtxFree(ctx);
+	return FALSE;
+}
+
+/*
+ * -------------------------------------------------------------------
+ * Serial IO stuff
+ *
+ * Part 1 -- COMM event handling
+ * -------------------------------------------------------------------
+ */
+
+static BOOL
+QueueSerialWait(
+	RIO_t *		rio,
+	recvbuf_t *	buff,
+	IoCtx_t *	lpo
+	)
+{
+	BOOL   rc;
+
+	lpo->onIoDone = OnSerialWaitComplete;
+	lpo->recv_buf = buff;
+	lpo->flRawMem = 0;
+	lpo->rio      = rio;
+	buff->fd      = rio->fd;
+
+	rc = WaitCommEvent((HANDLE)_get_osfhandle(rio->fd),
+			   &lpo->com_events, &lpo->ol);
+	if (!rc)
+		return IoResultCheck(GetLastError(), lpo,
+				     "Can't wait on Refclock");
+	return TRUE;
+}
+
+/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+static void 
+OnSerialWaitComplete(
+	ULONG_PTR	key,       
+	IoCtx_t *	lpo
+	)
+{
+	RIO_t *		rio;
+	DevCtx_t *	dev;
+	recvbuf_t * 	buff;
+	PPSDataEx_t *	ppsbuf;
+	DWORD 		modem_status;
+	u_long		covc;
+
+	/* check and bail out if operation failed */
+	if (!IoResultCheck(lpo->errCode, lpo,
+		"WaitCommEvent failed"))
+		return;
+
+	/* get & validate context and buffer. */
+	rio  = (RIO_t *)key;
+	buff = lpo->recv_buf;
+	dev  = lpo->devCtx;
+
+	NTP_INSIST(rio == lpo->rio);
+
+#ifdef DEBUG
+	if (~(EV_RXFLAG | EV_RLSD | EV_RXCHAR) & lpo->com_events) {
+		msyslog(LOG_ERR, "WaitCommEvent returned unexpected mask %x",
+			lpo->com_events);
+		exit(-1);
+	}
+#endif
+	/*
+	 * Take note of changes on DCD; 'user mode PPS hack'.
+	 * perlinger@ntp.org suggested a way of solving several problems with
+	 * this code that makes a lot of sense: move to a putative
+	 * dcdpps-ppsapi-provider.dll.
+	 */
+	if (EV_RLSD & lpo->com_events) {
+		modem_status = 0;
+		GetCommModemStatus((HANDLE)_get_osfhandle(rio->fd),
+				   &modem_status);
+
+		if (dev != NULL) {
+			/* PPS-context available -- use it! */
+			if (MS_RLSD_ON & modem_status) {
+				dev->pps_data.cc_assert++;
+				dev->pps_data.ts_assert = lpo->RecvTime;
+				DPRINTF(2, ("upps-real: fd %d DCD PPS Rise at %s\n", rio->fd,
+					ulfptoa(&lpo->RecvTime, 6)));
+			} else {
+				dev->pps_data.cc_clear++;
+				dev->pps_data.ts_clear = lpo->RecvTime;
+				DPRINTF(2, ("upps-real: fd %d DCD PPS Fall at %s\n", rio->fd,
+					ulfptoa(&lpo->RecvTime, 6)));
+			}
+			/*
+			** Update PPS buffer, writing from low to high, with index
+			** update as last action. We use interlocked ops and a
+			** volatile data destination to avoid reordering on compiler
+			** and CPU level. The interlocked instruction act as full
+			** barriers -- we need only release semantics, but we don't
+			** have them before VS2010.
+			*/
+			covc   = dev->cov_count + 1u;
+			ppsbuf = dev->pps_buff + (covc & PPS_QUEUE_MSK);
+			InterlockedExchange((PLONG)&ppsbuf->cov_count, covc);
+			ppsbuf->data = dev->pps_data;
+			InterlockedExchange((PLONG)&dev->cov_count, covc);
+		}
+		/* perlinger@ntp.org, 2012-11-19
+		It can be argued that once you have the PPS API active, you can
+		disable the old pps hack. This would give a behaviour that's much
+		more like the behaviour under a UN*Xish OS. On the other hand, it
+		will give a nasty surprise for people which have until now happily
+		taken the pps hack for granted, and after the first complaint, I have
+		decided to keep the old implementation unconditionally. So here it is:
+
+		/* backward compat: 'usermode-pps-hack' */
+		if (MS_RLSD_ON & modem_status) {
+			lpo->DCDSTime = lpo->RecvTime;
+			lpo->flTsDCDS = 1;
+			DPRINTF(2, ("upps-hack: fd %d DCD PPS Rise at %s\n", rio->fd,
+				ulfptoa(&lpo->RecvTime, 6)));
+		}
+	}
+
+	/* If IO ready, read data. Go back waiting else. */
+	if (EV_RXFLAG & lpo->com_events) {		/* line discipline */
+		lpo->FlagTime = lpo->RecvTime;
+		lpo->flTsFlag = 1;
+		QueueSerialRead(rio, buff, lpo);
+	} else if (EV_RXCHAR & lpo->com_events) {	/* raw discipline */
+		lpo->FlagTime = lpo->RecvTime;
+		lpo->flTsFlag = 1;
+		QueueRawSerialRead(rio, buff, lpo);
+	} else {					/* idle... */
+		QueueSerialWait(rio, buff, lpo);
+	}
+}
+
+/*
+ * -------------------------------------------------------------------
+ * Serial IO stuff
+ *
+ * Part 2 -- line discipline emulation
+ *
+ * Ideally this should *not* be done in the IO completion thread.
+ * We use a worker pool thread to offload the low-level processing.
+ * -------------------------------------------------------------------
+ */
+
+/*
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ * Start & Queue a serial read for line discipline emulation.
+ */
+static BOOL
+QueueSerialRead(
+	RIO_t *		rio,
+	recvbuf_t *	buff,
+	IoCtx_t *	lpo
+	)
+{
+	BOOL   rc;
+
+	lpo->onIoDone = &OnSerialReadComplete;
+	lpo->recv_buf = buff;
+	lpo->flRawMem = 0;
+	lpo->rio      = rio;
+	buff->fd      = rio->fd;
+
+	rc = ReadFile((HANDLE)_get_osfhandle(rio->fd),
+		      (char*)buff->recv_buffer  + buff->recv_length,
+		      sizeof(buff->recv_buffer) - buff->recv_length,
+		      NULL, &lpo->ol);
+	if (!rc)
+		return IoResultCheck(GetLastError(), lpo,
+				     "Can't read from Refclock");
+	return TRUE;
+}
+
+/*
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ * IO completion thread callback. Takes a time stamp and offloads the
+ * real work to the worker pool ASAP.
+ */
+static void
+OnSerialReadComplete(
+	ULONG_PTR	key,
+	IoCtx_t *	lpo
+	)
+{
+	RIO_t *		rio;
+	recvbuf_t *	buff;
+
+	/* check and bail out if operation failed */
+	if (!IoResultCheck(lpo->errCode, lpo,
+			   "Read from Refclock failed"))
+		return;
+
+	/* get & validate context and buffer. */
+	rio  = lpo->rio;
+	buff = lpo->recv_buf;
+	NTP_INSIST((ULONG_PTR)rio == key);
+
+	/* Offload to worker pool */
+	if (!QueueUserWorkItem(&OnSerialReadWorker, lpo, WT_EXECUTEDEFAULT)) {
+		msyslog(LOG_ERR,
+			"Can't offload to worker thread, will skip data: %m");
+		IoCtxReset(lpo);
+		buff->recv_length = 0;
+		QueueSerialWait(rio, buff, lpo);
+	}
+}
+
+
+/*
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ * Worker pool offload function -- avoid lengthy operations in the IO
+ * completion thread (affects timing...)
+ *
+ * This function does the real work of emulating the UN*X line
+ * discipline. Since this involves allocation of additional buffers and
+ * string parsing/copying, it is offloaded to the worker thread pool so
+ * the IO completion thread can resume faster.
+ */
+static DWORD WINAPI
+OnSerialReadWorker(void * ctx)
+{
+	IoCtx_t *	lpo;
+	recvbuf_t *	buff, *obuf;
+	RIO_t *		rio;
+	char		*sptr, *send, *dptr;
+	BOOL		eol;
+	char		ch;
+
+	/* Get context back */
+	lpo  = (IoCtx_t*)ctx;
+	buff = lpo->recv_buf;
+	rio  = lpo->rio;
+	/*
+	 * ignore 0 bytes read due to closure on fd.
+	 * Eat the first line of input as it's possibly partial.
+	 */
+	if (lpo->byteCount && rio->recvcount++) {
+		/* account for additional input */
+		buff->recv_length += (int)lpo->byteCount;
+
+		/*
+		 * Now mimic the Unix line discipline. 
+		 */
+		sptr = (char *)buff->recv_buffer;
+		send = sptr + buff->recv_length;
+		obuf = NULL;
+		dptr = NULL;
+
+		/* hack #1: eat away leading CR/LF if here is any */
+		while (sptr != send) {
+			ch = *sptr;
+			if (ch != '\n' && ch != '\r')
+				break;
+			sptr++;
+		}
+
+		while (sptr != send)
+		{
+			/* get new buffer to store line */
+			obuf = get_free_recv_buffer_alloc();
+			obuf->fd          = rio->fd;
+			obuf->receiver    = &process_refclock_packet;
+			obuf->dstadr      = NULL;
+			obuf->recv_peer   = rio->srcclock;
+			set_serial_recv_time(obuf, lpo);
+
+			/*
+			 * Copy data to new buffer, convert CR to LF on
+			 * the fly.  Stop after either.
+			 */
+			dptr = (char *)obuf->recv_buffer;
+			eol  = FALSE;
+			while (sptr != send && !eol) {
+				ch  = *sptr++;
+				if ('\r' == ch) {
+					ch = '\n';
+				}
+				*dptr++ = ch;
+				eol = ('\n' == ch);
+			}
+			obuf->recv_length =
+			    (int)(dptr - (char *)obuf->recv_buffer);
+
+			/*
+			 * If NL found, push this buffer and prepare to
+			 * get a new one.
+			 */
+			if (eol) {
+				add_full_recv_buffer(obuf);
+				SetEvent(WaitableIoEventHandle);
+				obuf = NULL;
+			}
+		}
+
+		/*
+		 * If we still have an output buffer, continue to fill
+		 * it again.
+		 */
+		if (obuf) {
+			obuf->recv_length =
+			    (int)(dptr - (char *)obuf->recv_buffer);
+			freerecvbuf(buff);
+			buff = obuf;
+		} else {
+			/* clear the current buffer, continue */
+			buff->recv_length = 0;
+		}
+	} else {
+		buff->recv_length = 0;
+	}
+
+	IoCtxReset(lpo);
+	QueueSerialWait(rio, buff, lpo);
+	return 0;
+}
+
+
+/*
+ * -------------------------------------------------------------------
+ * Serial IO stuff
+ *
+ * Part 3 -- raw data input
+ *
+ * Raw data processing is fast enough to do without offloading to the
+ * worker pool, so this is rather short'n sweet...
+ * -------------------------------------------------------------------
+ */
+
+static BOOL
+QueueRawSerialRead(
+	RIO_t *		rio,
+	recvbuf_t *	buff,
+	IoCtx_t *	lpo
+	)
+{
+	BOOL   rc;
+
+	lpo->onIoDone = OnRawSerialReadComplete;
+	lpo->recv_buf = buff;
+	lpo->flRawMem = 0;
+	lpo->rio      = rio;
+	buff->fd      = rio->fd;
+
+	rc = ReadFile((HANDLE)_get_osfhandle(rio->fd),
+		      buff->recv_buffer,
+		      sizeof(buff->recv_buffer),
+		      NULL, &lpo->ol);
+	if (!rc)
+		return IoResultCheck(GetLastError(), lpo,
+				     "Can't read raw from Refclock");
+	return TRUE;
+}
+
+
+static void 
+OnRawSerialReadComplete(
+	ULONG_PTR	key,
+	IoCtx_t *	lpo
+	)
+{
+	RIO_t *		rio;
+	recvbuf_t *	buff;
+
+	/* check and bail out if operation failed */
+	if (!IoResultCheck(lpo->errCode, lpo,
+			   "Raw read from Refclock failed"))
+		return;
+
+	/* get & validate context and buffer. */
+	rio  = lpo->rio;
+	buff = lpo->recv_buf;
+	NTP_INSIST((ULONG_PTR)rio == key);
+
+	/* ignore 0 bytes read. */
+	if (lpo->byteCount > 0) {
+		buff->recv_length = (int)lpo->byteCount;
+		buff->dstadr      = NULL;
+		buff->receiver    = process_refclock_packet;
+		buff->recv_peer   = rio->srcclock;
+		set_serial_recv_time(buff, lpo);
+		add_full_recv_buffer(buff);
+		SetEvent(WaitableIoEventHandle);
+		buff = get_free_recv_buffer_alloc();
+	}
+
+	buff->recv_length = 0;
+	QueueSerialWait(rio, buff, lpo);
+}
+
+
+static inline void
+set_serial_recv_time(
+	recvbuf_t *	obuf,
+	IoCtx_t *	lpo
+	)
+{
+	/*
+	 * Time stamp assignment is interesting.  If we
+	 * have a DCD stamp, we use it, otherwise we use
+	 * the FLAG char event time, and if that is also
+	 * not / no longer available we use the arrival
+	 * time.
+	 */
+	if (lpo->flTsDCDS)
+		obuf->recv_time = lpo->DCDSTime;
+	else if (lpo->flTsFlag)
+		obuf->recv_time = lpo->FlagTime;
+	else
+		obuf->recv_time = lpo->RecvTime;
+
+	lpo->flTsDCDS = lpo->flTsFlag = 0; /* use only once... */
+}
+
+
+/*
+ * -------------------------------------------------------------------
+ * Serial IO stuff
+ *
+ * Part 4 -- Overlapped serial output
+ *
+ * Again, no need to offload any work.
+ * -------------------------------------------------------------------
+ */
+
+/*
+ * async_write, clone of write(), used by some reflock drivers
+ */
+int	
+async_write(
+	int		fd,
+	const void *	data,
+	unsigned int	count
+	)
+{
+	IoCtx_t *	lpo;
+	BOOL		rc;
+
+	lpo  = IoCtxAlloc(NULL);
+	if (lpo == NULL) {
+		DPRINTF(1, ("async_write: out of memory\n"));
+		errno = ENOMEM;
+		return -1;
+	}
+
+	lpo->onIoDone  = OnSerialWriteComplete;
+	lpo->trans_buf = emalloc(count);
+	lpo->flRawMem  = 1;
+	memcpy(lpo->trans_buf, data, count);
+
+	rc = WriteFile((HANDLE)_get_osfhandle(fd),
+		       lpo->trans_buf, count,
+		       NULL, &lpo->ol);
+	if (!rc && !IoResultCheck(GetLastError(), lpo,
+				  "Can't write to Refclock")) {
+		errno = EBADF;
+		return -1;
+	}
+	return count;
+}
+
+static void
+OnSerialWriteComplete(
+	ULONG_PTR	key,
+	IoCtx_t *	lpo
+	)
+{
+	/* set RIO and force silent cleanup if no error */
+	lpo->rio = (RIO_t *)key;
+	if (ERROR_SUCCESS == lpo->errCode)
+		lpo->errCode = ERROR_OPERATION_ABORTED;
+	IoResultCheck(lpo->errCode, lpo,
+		      "Write to Refclock failed");
+}
+
+
+/*
+ * -------------------------------------------------------------------
+ * Serial IO stuff
+ *
+ * Part 5 -- read PPS time stamps
+ *
+ * -------------------------------------------------------------------
+ */
+
+/* The dummy read procedure is used for getting the device context
+ * into the IO completion thread, using the IO completion queue for
+ * transport. There are other ways to accomplish what we need here,
+ * but using the IO machine is handy and avoids a lot of trouble.
+ */
+static void
+OnPpsDummyRead(
+	ULONG_PTR	key,
+	IoCtx_t *	lpo
+	)
+{
+	RIO_t *	rio;
+
+	rio = (RIO_t *)key;
+	lpo->devCtx = DevCtxAttach(rio->device_context);
+	SetEvent(lpo->ppswake);
+}
+
+__declspec(dllexport) void* __stdcall
+ntp_pps_attach_device(
+	HANDLE	hndIo
+	)
+{
+	IoCtx_t		myIoCtx;
+	HANDLE		myEvt;
+	DevCtx_t *	dev;
+	DWORD		rc;
+
+	if (!isserialhandle(hndIo)) {
+		SetLastError(ERROR_INVALID_HANDLE);
+		return NULL;
+	}
+
+	ZERO(myIoCtx);
+	dev   = NULL;
+	myEvt = CreateEvent(NULL, FALSE, FALSE, NULL);
+	if (NULL == myEvt)
+		goto done;
+
+	myIoCtx.ppswake   = myEvt;
+	myIoCtx.onIoDone  = OnPpsDummyRead;
+	rc = ReadFile(hndIo, &myIoCtx.byteCount, 0,
+			&myIoCtx.byteCount, &myIoCtx.ol);
+	if (!rc && (GetLastError() != ERROR_IO_PENDING))
+		goto done;
+	if (WaitForSingleObject(myEvt, INFINITE) == WAIT_OBJECT_0)
+		if (NULL == (dev = myIoCtx.devCtx))
+			SetLastError(ERROR_INVALID_HANDLE);
+done:
+	rc = GetLastError();
+	CloseHandle(myEvt);
+	SetLastError(rc);
+	return dev;
+}
+
+__declspec(dllexport) void __stdcall
+ntp_pps_detach_device(
+	DevCtx_t *	dev
+	)
+{
+	DevCtxDetach(dev);
+}
+
+__declspec(dllexport) BOOL __stdcall
+ntp_pps_read(
+	DevCtx_t *	dev,
+	PPSData_t *	data,
+	size_t		dlen
+	)
+{
+	u_long		guard, covc;
+	int		repc;
+	PPSDataEx_t *	ppsbuf;
+
+
+	if (dev == NULL) {
+		SetLastError(ERROR_INVALID_HANDLE);
+		return FALSE;
+	}
+	if (data == NULL || dlen != sizeof(PPSData_t)) {
+		SetLastError(ERROR_INVALID_PARAMETER);
+		return FALSE;
+	}
+	/*
+	** Reading from shared memory in a lock-free fashion can be
+	** a bit tricky, since we have to read the components in the
+	** opposite direction from the write, and the compiler must
+	** not reorder the read sequence.
+	** We use interlocked ops and a volatile data source to avoid
+	** reordering on compiler and CPU level. The interlocked
+	** instruction act as full barriers -- we need only aquire
+	** semantics, but we don't have them before VS2010.
+	*/
+	repc = 3;
+	do {
+		InterlockedExchange((PLONG)&covc, dev->cov_count);
+		ppsbuf = dev->pps_buff + (covc & PPS_QUEUE_MSK);
+		*data = ppsbuf->data;
+		InterlockedExchange((PLONG)&guard, ppsbuf->cov_count);
+		guard ^= covc;
+	} while (guard && ~guard && --repc);
+
+	if (guard) {
+		SetLastError(ERROR_INVALID_DATA);
+		return FALSE;
+	}
+	return TRUE;
+}
+
+/*
+ * Add a reference clock data structures I/O handles to
+ * the I/O completion port. Return 1 if any error.
+ */  
+int
+io_completion_port_add_clock_io(
+	RIO_t *rio
+	)
+{
+	IoCtx_t *	lpo;
+	DevCtx_t *	dev;
+	recvbuf_t *	buff;
+	HANDLE		h;
+
+	h = (HANDLE)_get_osfhandle(rio->fd);
+	if (NULL == CreateIoCompletionPort(
+			h, 
+			hIoCompletionPort, 
+			(ULONG_PTR)rio,
+			0)) {
+		msyslog(LOG_ERR, "Can't add COM port to i/o completion port: %m");
+		return 1;
+	}
+
+	dev = DevCtxAlloc();
+	if (NULL == dev) {
+		msyslog(LOG_ERR, "Can't allocate device context for i/o completion port: %m");
+		return 1;
+	}
+	rio->device_context = DevCtxAttach(dev);
+	lpo = IoCtxAlloc(dev);
+	if (NULL == lpo) {
+		msyslog(LOG_ERR, "Can't allocate heap for completion port: %m");
+		return 1;
+	}
+	buff = get_free_recv_buffer_alloc();
+	buff->recv_length = 0;
+	QueueSerialWait(rio, buff, lpo);
+
+	return 0;
+}
+
+void
+io_completion_port_remove_clock_io(
+	RIO_t *rio
+	)
+{
+	if (rio)
+		DevCtxDetach((DevCtx_t *)rio->device_context);
+}
+
+/*
+ * Queue a receiver on a socket. Returns 0 if no buffer can be queued 
+ *
+ *  Note: As per the winsock documentation, we use WSARecvFrom. Using
+ *	  ReadFile() is less efficient.
+ */
+static BOOL 
+QueueSocketRecv(
+	SOCKET		s,
+	recvbuf_t *	buff,
+	IoCtx_t *	lpo
+	)
+{
+	WSABUF wsabuf;
+	DWORD  Flags;
+	int    rc;
+
+	lpo->onIoDone = OnSocketRecv;
+	lpo->recv_buf = buff;
+	lpo->flRawMem = 0;
+	lpo->rio      = NULL;
+
+	Flags = 0;
+	buff->fd = s;
+	buff->recv_srcadr_len = sizeof(buff->recv_srcadr);
+	wsabuf.buf = (char *)buff->recv_buffer;
+	wsabuf.len = sizeof(buff->recv_buffer);
+
+	rc = WSARecvFrom(buff->fd, &wsabuf, 1, NULL, &Flags, 
+			 &buff->recv_srcadr.sa, &buff->recv_srcadr_len, 
+			 &lpo->ol, NULL);
+	if (SOCKET_ERROR == rc) 
+		return IoResultCheck(GetLastError(), lpo,
+				     "Can't read from Socket");
+	return TRUE;
+}
+
+
+static void 
+OnSocketRecv(
+	ULONG_PTR	key,
+	IoCtx_t *	lpo
+	)
+{
+	recvbuf_t * buff;
+	recvbuf_t * newbuff;
+	struct interface * inter = (struct interface *)key;
+	
+	NTP_REQUIRE(NULL != lpo);
+	NTP_REQUIRE(NULL != lpo->recv_buf);
+
+	/* check and bail out if operation failed */
+	if (!IoResultCheck(lpo->errCode, lpo,
+			   "Read from Socket failed"))
+		return;
+
+	/*
+	 * Convert the overlapped pointer back to a recvbuf pointer.
+	 * Fetch items that are lost when the context is queued again.
+	 */
+	buff = lpo->recv_buf;
+	buff->recv_time   = lpo->RecvTime;
+	buff->recv_length = (int)lpo->byteCount;
+
+	/*
+	 * Get a new recv buffer for the replacement socket receive
+	 */
+	newbuff = get_free_recv_buffer_alloc();
+	if (NULL != newbuff) {
+		QueueSocketRecv(inter->fd, newbuff, lpo);
+	} else {
+		IoCtxFree(lpo);
+		msyslog(LOG_ERR, "Can't add I/O request to socket");
+	}
+	DPRINTF(4, ("%sfd %d %s recv packet mode is %d\n", 
+		    (MODE_BROADCAST == get_packet_mode(buff))
+			? " **** Broadcast "
+			: "",
+		    (int)buff->fd, stoa(&buff->recv_srcadr),
+		    get_packet_mode(buff)));
+
+	/*
+	 * If we keep it add some info to the structure
+	 */
+	if (buff->recv_length && !inter->ignore_packets) {
+		NTP_INSIST(buff->recv_srcadr_len <=
+			   sizeof(buff->recv_srcadr));
+		buff->receiver = &receive; 
+		buff->dstadr   = inter;
+		packets_received++;
+		handler_pkts++;
+		inter->received++;
+		add_full_recv_buffer(buff);
+
+		DPRINTF(2, ("Received %d bytes fd %d in buffer %p from %s\n", 
+			    buff->recv_length, (int)buff->fd, buff,
+			    stoa(&buff->recv_srcadr)));
+
+		/*
+		 * Now signal we have something to process
+		 */
+		SetEvent(WaitableIoEventHandle);
+	} else
+		freerecvbuf(buff);
+}
+
+
+/*
+ * Add a socket handle to the I/O completion port, and send 
+ * NTP_RECVS_PER_SOCKET recv requests to the kernel.
+ */
+int
+io_completion_port_add_socket(
+	SOCKET			fd,
+	struct interface *	inter
+	)
+{
+	IoCtx_t *	lpo;
+	recvbuf_t *	buff;
+	int		n;
+
+	if (fd != INVALID_SOCKET) {
+		if (NULL == CreateIoCompletionPort((HANDLE)fd, 
+		    hIoCompletionPort, (ULONG_PTR)inter, 0)) {
+			msyslog(LOG_ERR,
+				"Can't add socket to i/o completion port: %m");
+			return 1;
+		}
+	}
+
+	/*
+	 * Windows 2000 bluescreens with bugcheck 0x76
+	 * PROCESS_HAS_LOCKED_PAGES at ntpd process
+	 * termination when using more than one pending
+	 * receive per socket.  A runtime version test
+	 * would allow using more on newer versions
+	 * of Windows.
+	 */
+
+#define WINDOWS_RECVS_PER_SOCKET 1
+
+	for (n = 0; n < WINDOWS_RECVS_PER_SOCKET; n++) {
+
+		buff = get_free_recv_buffer_alloc();
+		lpo = IoCtxAlloc(NULL);
+		if (lpo == NULL)
+		{
+			msyslog(LOG_ERR
+				, "Can't allocate IO completion context: %m");
+			return 1;
+		}
+
+		QueueSocketRecv(fd, buff, lpo);
+
+	}
+	return 0;
+}
+
+
+/*
+ * io_completion_port_sendto() -- sendto() replacement for Windows
+ *
+ * Returns len after successful send.
+ * Returns -1 for any error, with the error code available via
+ *	msyslog() %m, or GetLastError().
+ */
+int
+io_completion_port_sendto(
+	int		fd,
+	void  *		pkt,
+	size_t		len,
+	sockaddr_u *	dest
+	)
+{
+	static u_long time_next_ifscan_after_error;
+	WSABUF wsabuf;
+	DWORD octets_sent;
+	DWORD Result;
+	int errval;
+	int AddrLen;
+
+	wsabuf.buf = (void *)pkt;
+	wsabuf.len = len;
+	AddrLen = SOCKLEN(dest);
+	octets_sent = 0;
+
+	Result = WSASendTo(fd, &wsabuf, 1, &octets_sent, 0,
+			   &dest->sa, AddrLen, NULL, NULL);
+	errval = GetLastError();
+	if (SOCKET_ERROR == Result) {
+		if (ERROR_UNEXP_NET_ERR == errval) {
+			/*
+			 * We get this error when trying to send if the
+			 * network interface is gone or has lost link.
+			 * Rescan interfaces to catch on sooner, but no
+			 * more often than once per minute.  Once ntpd
+			 * is able to detect changes without polling
+			 * this should be unneccessary
+			 */
+			if (time_next_ifscan_after_error < current_time) {
+				time_next_ifscan_after_error = current_time + 60;
+				timer_interfacetimeout(current_time);
+			}
+			DPRINTF(4, ("sendto unexpected network error, interface may be down\n"));
+		} else {
+			msyslog(LOG_ERR, "WSASendTo(%s) error %m",
+				stoa(dest));
+		}
+		SetLastError(errval);
+		return -1;
+	}
+
+	if (len != (int)octets_sent) {
+		msyslog(LOG_ERR, "WSASendTo(%s) sent %u of %d octets",
+			stoa(dest), octets_sent, len);
+		SetLastError(ERROR_BAD_LENGTH);
+		return -1;
+	}
+
+	DPRINTF(4, ("sendto %s %d octets\n", stoa(dest), len));
+
+	return len;
+}
+
+
+
+/*
+ * GetReceivedBuffers
+ * Note that this is in effect the main loop for processing requests
+ * both send and receive. This should be reimplemented
+ */
+int
+GetReceivedBuffers()
+{
+	DWORD	index;
+	HANDLE	ready;
+	int	have_packet;
+
+	have_packet = FALSE;
+	while (!have_packet) {
+		index = WaitForMultipleObjects(ActiveWaitHandles,
+					       WaitHandles, FALSE,
+					       INFINITE);
+		switch (index) {
+
+		case WAIT_OBJECT_0 + 0: /* Io event */
+			DPRINTF(4, ("IoEvent occurred\n"));
+			have_packet = TRUE;
+			break;
+
+		case WAIT_OBJECT_0 + 1: /* exit request */
+			exit(0);
+			break;
+
+		case WAIT_OBJECT_0 + 2: /* timer */
+			timer();
+			break;
+
+		case WAIT_IO_COMPLETION: /* loop */
+			break;
+
+		case WAIT_TIMEOUT:
+			msyslog(LOG_ERR,
+				"WaitForMultipleObjects INFINITE timed out.");
+			exit(1);
+			break;
+
+		case WAIT_FAILED:
+			msyslog(LOG_ERR,
+				"WaitForMultipleObjects Failed: Error: %m");
+			exit(1);
+			break;
+
+		default:
+			DEBUG_INSIST((index - WAIT_OBJECT_0) <
+				     ActiveWaitHandles);
+			ready = WaitHandles[index - WAIT_OBJECT_0];
+			handle_blocking_resp_sem(ready);
+			break;
+				
+		} /* switch */
+	}
+
+	return (full_recvbuffs());	/* get received buffers */
+}
+
+#else
+  static int NonEmptyCompilationUnit;
+#endif
+
diff --git a/ports/winnt/ntpd/ntservice.c b/ports/winnt/ntpd/ntservice.c
new file mode 100644
index 0000000..f0840bd
--- /dev/null
+++ b/ports/winnt/ntpd/ntservice.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2004  Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1999-2002  Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+
+#include <ntp_stdlib.h>
+#include "syslog.h"
+#include "ntpd.h"
+#include "ntservice.h"
+#include "clockstuff.h"
+#include "ntp_iocompletionport.h"
+#include "ntpd-opts.h"
+#include "isc/win32os.h"
+#include <ssl_applink.c>
+
+
+/*
+ * Globals
+ */
+static SERVICE_STATUS_HANDLE hServiceStatus = 0;
+static BOOL foreground = FALSE;
+static BOOL computer_shutting_down = FALSE;
+static int glb_argc;
+static char **glb_argv;
+HANDLE hServDoneEvent = NULL;
+extern int accept_wildcard_if_for_winnt;
+
+/*
+ * Forward declarations
+ */
+void uninit_io_completion_port();
+int ntpdmain(int argc, char *argv[]);
+void WINAPI ServiceControl(DWORD dwCtrlCode);
+void ntservice_exit(void);
+
+#ifdef WRAP_DBG_MALLOC
+void *wrap_dbg_malloc(size_t s, const char *f, int l);
+void *wrap_dbg_realloc(void *p, size_t s, const char *f, int l);
+void wrap_dbg_free(void *p);
+#endif
+
+void WINAPI
+service_main(
+	DWORD argc,
+	LPTSTR *argv
+	)
+{
+	if (argc > 1) {
+		/*
+		 * Let command line parameters from the Windows SCM GUI
+		 * override the standard parameters from the ImagePath registry key.
+		 */
+		glb_argc = argc;
+		glb_argv = argv;
+	}
+
+	ntpdmain(glb_argc, glb_argv);
+}
+
+
+/*
+ * This is the entry point for the executable 
+ * We can call ntpdmain() explicitly or via StartServiceCtrlDispatcher()
+ * as we need to.
+ */
+int main(
+	int	argc,
+	char **	argv
+	)
+{
+	int	rc;
+	int	argc_after_opts;
+	char **	argv_after_opts;
+
+	ssl_applink();
+
+	/* Save the command line parameters */
+	glb_argc = argc;
+	glb_argv = argv;
+
+	/* Under original Windows NT we must not discard the wildcard */
+	/* socket to workaround a bug in NT's getsockname(). */
+	if (isc_win32os_majorversion() <= 4)
+		accept_wildcard_if_for_winnt = TRUE;
+
+	argc_after_opts = argc;
+	argv_after_opts = argv;
+	parse_cmdline_opts(&argc_after_opts, &argv_after_opts);
+
+	if (HAVE_OPT(QUIT)
+	    || HAVE_OPT(SAVECONFIGQUIT)
+	    || HAVE_OPT(HELP)
+#ifdef DEBUG
+	    || OPT_VALUE_SET_DEBUG_LEVEL != 0
+#endif
+	    || HAVE_OPT(NOFORK))
+		foreground = TRUE;
+
+	if (foreground)			/* run in console window */
+		rc = ntpdmain(argc, argv);
+	else {
+		/* Start up as service */
+
+		SERVICE_TABLE_ENTRY dispatchTable[] = {
+			{ TEXT(NTP_DISPLAY_NAME), service_main },
+			{ NULL, NULL }
+		};
+
+		rc = StartServiceCtrlDispatcher(dispatchTable);
+		if (rc)
+			rc = 0; 
+		else {
+			rc = GetLastError();
+			fprintf(stderr,
+				"%s: unable to start as service:\n"
+				"%s\n"
+				"Use -d, -q, -n, -?, --help or "
+				"--saveconfigquit to run "
+				"interactive.\n",
+				argv[0], ntp_strerror(rc));
+		}
+	}
+	return rc;
+}
+
+
+/*
+ * Initialize the Service by registering it.
+ */
+void
+ntservice_init() {
+	char ConsoleTitle[256];
+
+	if (!foreground) {
+		/* Register handler with the SCM */
+		hServiceStatus = RegisterServiceCtrlHandler(NTP_DISPLAY_NAME,
+					ServiceControl);
+		if (!hServiceStatus) {
+			NTReportError(NTP_SERVICE_NAME,
+				"could not register service control handler");
+			exit(1);
+		}
+		UpdateSCM(SERVICE_RUNNING);
+	} else {
+		snprintf(ConsoleTitle, sizeof(ConsoleTitle),
+			 "NTP Version %s", Version);
+		ConsoleTitle[sizeof(ConsoleTitle) - 1] = '\0';
+		SetConsoleTitle(ConsoleTitle);
+	}
+
+#ifdef _CRTDBG_MAP_ALLOC
+		/* ask the runtime to dump memory leaks at exit */
+		_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF
+			       | _CRTDBG_LEAK_CHECK_DF		/* report on leaks at exit */
+			       | _CRTDBG_CHECK_ALWAYS_DF	/* Check heap every alloc/dealloc */
+#ifdef MALLOC_LINT
+			       | _CRTDBG_DELAY_FREE_MEM_DF	/* Don't actually free memory */
+#endif
+			       );
+#ifdef DOES_NOT_WORK
+			/*
+			 * hart: I haven't seen this work, running ntpd.exe -n from a shell
+			 * to both a file and the debugger output window.  Docs indicate it
+			 * should cause leak report to go to stderr, but it's only seen if
+			 * ntpd runs under a debugger (in the debugger's output), even with
+			 * this block of code enabled.
+			 */
+			_CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
+			_CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+#endif
+#endif /* using MS debug C runtime heap, _CRTDBG_MAP_ALLOC */
+
+	atexit( ntservice_exit );
+}
+
+
+/*
+ * Routine to check if the service is stopping
+ * because the computer is shutting down
+ */
+BOOL
+ntservice_systemisshuttingdown() {
+	return computer_shutting_down;
+}
+
+void
+ntservice_exit( void )
+{
+	uninit_io_completion_port();
+	Sleep( 200 );  	//##++ 
+
+	reset_winnt_time();
+
+	msyslog(LOG_INFO, "ntservice: The Network Time Protocol Service is stopping.");
+
+	if (!foreground) {
+		/* service mode, need to have the service_main routine
+		 * register with the service control manager that the 
+		 * service has stopped running, before exiting
+		 */
+		UpdateSCM(SERVICE_STOPPED);
+	}
+}
+
+/* 
+ * ServiceControl(): Handles requests from the SCM and passes them on
+ * to the service.
+ */
+void WINAPI
+ServiceControl(
+	DWORD dwCtrlCode
+	) 
+{
+	switch (dwCtrlCode) {
+
+	case SERVICE_CONTROL_SHUTDOWN:
+		computer_shutting_down = TRUE;
+		/* fall through to stop case */
+
+	case SERVICE_CONTROL_STOP:
+		if (WaitableExitEventHandle != NULL) {
+			SetEvent(WaitableExitEventHandle);
+			UpdateSCM(SERVICE_STOP_PENDING);
+			Sleep(100);  //##++
+		}
+		return;
+
+	case SERVICE_CONTROL_PAUSE:
+	case SERVICE_CONTROL_CONTINUE:
+	case SERVICE_CONTROL_INTERROGATE:
+	default:
+		break;
+	}
+	UpdateSCM(SERVICE_RUNNING);
+}
+
+/*
+ * Tell the Service Control Manager the state of the service.
+ */
+void UpdateSCM(DWORD state) {
+	SERVICE_STATUS ss;
+	static DWORD dwState = SERVICE_STOPPED;
+
+	if (hServiceStatus) {
+		if (state)
+			dwState = state;
+
+		ZERO(ss);
+		ss.dwServiceType |= SERVICE_WIN32_OWN_PROCESS;
+		ss.dwCurrentState = dwState;
+		ss.dwControlsAccepted = SERVICE_ACCEPT_STOP |
+					SERVICE_ACCEPT_SHUTDOWN;
+		ss.dwCheckPoint = 0;
+		ss.dwServiceSpecificExitCode = 0;
+		ss.dwWin32ExitCode = NO_ERROR;
+		ss.dwWaitHint = dwState == SERVICE_STOP_PENDING ? 5000 : 1000;
+
+		SetServiceStatus(hServiceStatus, &ss);
+	}
+}
+
+BOOL WINAPI 
+OnConsoleEvent(  
+	DWORD dwCtrlType
+	)
+{
+	switch (dwCtrlType) {
+#ifdef DEBUG
+		case CTRL_BREAK_EVENT:
+			if (debug > 0) {
+				debug <<= 1;
+			}
+			else {
+				debug = 1;
+			}
+			if (debug > 8) {
+				debug = 0;
+			}
+			msyslog(LOG_DEBUG, "debug level %d", debug);
+			break;
+#else
+		case CTRL_BREAK_EVENT:
+			break;
+#endif
+
+		case CTRL_C_EVENT:
+		case CTRL_CLOSE_EVENT:
+		case CTRL_SHUTDOWN_EVENT:
+			if (WaitableExitEventHandle != NULL) {
+				SetEvent(WaitableExitEventHandle);
+				Sleep(100);  //##++
+			}
+			break;
+
+		default :
+			/* pass to next handler */
+			return FALSE; 
+	}
+
+	/* we've handled it, no more handlers should be called */
+	return TRUE;
+}
+
author	Lorry Tar Creator <lorry-tar-importer@baserock.org>	2014-12-02 09:01:21 +0000
committer	<>	2014-12-04 16:11:25 +0000
commit	bdab5265fcbf3f472545073a23f8999749a9f2b9 (patch)
tree	c6018dd03dea906f8f1fb5f105f05b71a7dc250a /ports/winnt/ntpd
download	ntp-bdab5265fcbf3f472545073a23f8999749a9f2b9.tar.gz