diff options
Diffstat (limited to 'ports/winnt/ntpd')
-rw-r--r-- | ports/winnt/ntpd/hopf_PCI_io.c | 335 | ||||
-rw-r--r-- | ports/winnt/ntpd/nt_clockstuff.c | 1743 | ||||
-rw-r--r-- | ports/winnt/ntpd/ntp_iocompletionport.c | 1639 | ||||
-rw-r--r-- | ports/winnt/ntpd/ntservice.c | 321 |
4 files changed, 4038 insertions, 0 deletions
diff --git a/ports/winnt/ntpd/hopf_PCI_io.c b/ports/winnt/ntpd/hopf_PCI_io.c new file mode 100644 index 0000000..a1ba200 --- /dev/null +++ b/ports/winnt/ntpd/hopf_PCI_io.c @@ -0,0 +1,335 @@ +/* + * hopf_PCI_io.c + * Read data from a hopf PCI clock using the ATLSoft WinNT driver. + * + * Date: 21.03.2000 Revision: 01.10 + * + * Copyright (C) 1999, 2000 by Bernd Altmeier altmeier@ATLSoft.de + * + */ + +/* + * Ignore nonstandard extension warning. + * This happens when including winioctl.h + */ +#pragma warning(disable: 4201) +#define _FILESYSTEMFSCTL_ + +#include <config.h> +#include <windows.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <winioctl.h> + +#include "ntp_stdlib.h" +#include "hopf_PCI_io.h" + + +#define ATL_PASSTHROUGH_READ_TOSIZE (3 * sizeof(ULONG)) +#define ATL_PASSTHROUGH_READ_FROMSIZE 0 +#define IOCTL_ATLSOFT_PASSTHROUGH_READ CTL_CODE( \ + FILE_DEVICE_UNKNOWN, \ + 0x805, \ + METHOD_BUFFERED, \ + FILE_ANY_ACCESS) + + +HANDLE hDevice = NULL; // this is the handle to the PCI Device + +HANDLE hRdEvent; +OVERLAPPED Rdoverlapped; +OVERLAPPED * pRdOverlapped; + +ULONG iobuffer[256]; +DWORD cbReturned; +BOOL HaveBoard = FALSE; + +struct { + ULONG region; + ULONG offset; + ULONG count; +} io_params; + + +BOOL +OpenHopfDevice(void) +{ + OSVERSIONINFO VersionInfo; + ULONG deviceNumber; + CHAR deviceName[255]; + + VersionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&VersionInfo); + switch (VersionInfo.dwPlatformId) { + + case VER_PLATFORM_WIN32_WINDOWS: // Win95/98 + return FALSE; // "NTP does not support Win 95-98." + break; + + case VER_PLATFORM_WIN32_NT: // WinNT + deviceNumber = 0; + snprintf(deviceName, sizeof(deviceName), + "\\\\.\\hclk6039%d", deviceNumber + 1); + hDevice = CreateFile( + deviceName, + GENERIC_WRITE | GENERIC_READ, + FILE_SHARE_WRITE | FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_FLAG_DELETE_ON_CLOSE | FILE_FLAG_OVERLAPPED, + NULL); + break; + + default: + hDevice = INVALID_HANDLE_VALUE; + break; + } // end switch + + if (INVALID_HANDLE_VALUE == hDevice) // the system didn't return a handle + return FALSE; //"A handle to the driver could not be obtained properly" + + // an event to be used for async transfers + hRdEvent = CreateEvent( + NULL, + TRUE, + FALSE, + NULL); + + if (INVALID_HANDLE_VALUE == hRdEvent) + return FALSE; // the system didn't return a handle + + pRdOverlapped = &Rdoverlapped; + pRdOverlapped->hEvent = hRdEvent; + + HaveBoard = TRUE; // board installed and we have access + + return TRUE; +} // end of OpenHopfDevice() + + +BOOL +CloseHopfDevice(void) +{ + CloseHandle(hRdEvent);// When done, close the handle to the driver + + return CloseHandle(hDevice); +} // end of CloseHopfDevice() + + +void +ReadHopfDevice(void) +{ + if (!HaveBoard) + return; + + DeviceIoControl( + hDevice, + IOCTL_ATLSOFT_PASSTHROUGH_READ, + &io_params, + ATL_PASSTHROUGH_READ_TOSIZE, + iobuffer, + ATL_PASSTHROUGH_READ_FROMSIZE + + io_params.count * sizeof(ULONG), + &cbReturned, + pRdOverlapped + ); +} + + +#ifdef NOTUSED +void +GetHardwareData( + LPDWORD Data32, + WORD Ofs + ) +{ + io_params.region = 1; + io_params.offset = Ofs; + io_params.count = 1; + ReadHopfDevice(); + *Data32 = iobuffer[0]; +} +#endif /* NOTUSED */ + + +void +GetHopfTime( + LPHOPFTIME Data, + DWORD Offset + ) +{ + io_params.region = 1; + io_params.offset = Offset; + io_params.count = 4; + + ReadHopfDevice(); + + Data->wHour = 0; + Data->wMinute = 0; + Data->wSecond = 0; + while (iobuffer[0] >= 60 * 60 * 1000) { + iobuffer[0] = iobuffer[0] - 60 * 60 * 1000; + Data->wHour++; + } + while (iobuffer[0] >= 60 * 1000) { + iobuffer[0] = iobuffer[0] - 60 * 1000; + Data->wMinute++; + } + while (iobuffer[0] >= 1000) { + iobuffer[0] = iobuffer[0] - 1000; + Data->wSecond++; + } + Data->wMilliseconds = LOWORD(iobuffer[0]); + Data->wDay = HIBYTE(HIWORD(iobuffer[1])); + Data->wMonth = LOBYTE(HIWORD(iobuffer[1])); + Data->wYear = LOWORD(iobuffer[1]); + Data->wDayOfWeek = HIBYTE(HIWORD(iobuffer[2])); + if (Data->wDayOfWeek == 7) // Dow Korrektur + Data->wDayOfWeek = 0; + + io_params.region = 1; + io_params.offset += 0x08; + io_params.count = 1; + + ReadHopfDevice(); + + Data->wStatus = LOBYTE(HIWORD(iobuffer[0])); +} + + +#ifdef NOTUSED +void +GetHopfLocalTime( + LPHOPFTIME Data + ) +{ + DWORD Offset = 0; + + GetHopfTime(Data, Offset); +} +#endif /* NOTUSED */ + + +void +GetHopfSystemTime( + LPHOPFTIME Data + ) +{ + DWORD Offset = 0x10; + + GetHopfTime(Data,Offset); +} + + +#ifdef NOTUSED +void +GetSatData( + LPSATSTAT Data + ) +{ + io_params.region = 1; + io_params.offset = 0xb0; + io_params.count = 5; + + ReadHopfDevice(); + + Data->wVisible = HIBYTE(HIWORD(iobuffer[0])); + Data->wMode = LOBYTE(LOWORD(iobuffer[0])); + Data->wSat0 = HIBYTE(HIWORD(iobuffer[1])); + Data->wRat0 = LOBYTE(HIWORD(iobuffer[1])); + Data->wSat1 = HIBYTE(LOWORD(iobuffer[1])); + Data->wRat1 = LOBYTE(LOWORD(iobuffer[1])); + Data->wSat2 = HIBYTE(HIWORD(iobuffer[2])); + Data->wRat2 = LOBYTE(HIWORD(iobuffer[2])); + Data->wSat3 = HIBYTE(LOWORD(iobuffer[2])); + Data->wRat3 = LOBYTE(LOWORD(iobuffer[2])); + Data->wSat4 = HIBYTE(HIWORD(iobuffer[3])); + Data->wRat4 = LOBYTE(HIWORD(iobuffer[3])); + Data->wSat5 = HIBYTE(LOWORD(iobuffer[3])); + Data->wRat5 = LOBYTE(LOWORD(iobuffer[3])); + Data->wSat6 = HIBYTE(HIWORD(iobuffer[4])); + Data->wRat6 = LOBYTE(HIWORD(iobuffer[4])); + Data->wSat7 = HIBYTE(LOWORD(iobuffer[4])); + Data->wRat7 = LOBYTE(LOWORD(iobuffer[4])); +} + + +void +GetDiffTime( + LPLONG Data + ) +{ + io_params.region = 1; + io_params.offset = 0x0c; + io_params.count = 1; + + ReadHopfDevice(); + + *Data = iobuffer[0]; +} + + +void +GetPosition( + LPGPSPOS Data + ) +{ + io_params.region = 1; + io_params.offset = 0x90; // Positionsdaten Länge + io_params.count = 1; + + ReadHopfDevice(); + + Data->wLongitude = iobuffer[0]; //in Millisekunden + io_params.region = 1; + io_params.offset = 0xa0; // Positionsdaten Breite + io_params.count = 1; + + ReadHopfDevice(); + + Data->wLatitude = iobuffer[0]; + Data->wAltitude = 0; +} + + +void +GetHardwareVersion( + LPCLOCKVER Data + ) +{ + int i; + + io_params.region = 1; + io_params.offset = 0x50; + io_params.count = 12; + + ReadHopfDevice(); + + Data->cVersion[0] = '\0'; + iobuffer[13] = 0; + for (i = 0; i < 13; i++) { + Data->cVersion[i * 4 ] = HIBYTE(HIWORD(iobuffer[i])); + Data->cVersion[i * 4 + 1] = LOBYTE(HIWORD(iobuffer[i])); + Data->cVersion[i * 4 + 2] = HIBYTE(LOWORD(iobuffer[i])); + Data->cVersion[i * 4 + 3] = LOBYTE(LOWORD(iobuffer[i])); + } +} + + +void +GetDCFAntenne( + LPDCFANTENNE Data + ) +{ + io_params.region = 1; + io_params.offset = 0xcc; + io_params.count = 1; + + ReadHopfDevice(); + Data->bStatus1 = HIBYTE(HIWORD(iobuffer[0])); + Data->bStatus = LOBYTE(HIWORD(iobuffer[0])); + Data->wAntValue = LOWORD(iobuffer[0]); +} +#endif /* NOTUSED */ + diff --git a/ports/winnt/ntpd/nt_clockstuff.c b/ports/winnt/ntpd/nt_clockstuff.c new file mode 100644 index 0000000..052bfcd --- /dev/null +++ b/ports/winnt/ntpd/nt_clockstuff.c @@ -0,0 +1,1743 @@ +/* Windows NT Clock Routines + * + * Created by Sven Dietrich sven@inter-yacht.com + * + * New interpolation scheme by Dave Hart <davehart@davehart.com> in + * February 2009 overcomes 500us-1ms inherent jitter with the older + * scheme, first identified by Peter Rosin (nee Ekberg) + * <peda@lysator.liu.se> in 2003 [Bug 216]. + * + * Note: The Windows port of ntpd uses the C99-snprintf replacement for + * (v)snprintf(), also used by msyslog(), which does not understand the + * printf format specifier %I64d, only the more common %lld. With the + * minimum supported compiler raised to Visual C++ 2005 in ntp-dev in + * August 2011, all MS C runtime routines also understand %lld and %llu. + */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/resource.h> /* our private version */ + +#if defined(_MSC_VER) && _MSC_VER >= 1400 /* VS 2005 */ +#include <intrin.h> /* for __rdtsc() */ +#endif + +#ifdef HAVE_PPSAPI +#include <timepps.h> +/* + * ports/winnt/include/timepps.h defines EOPNOTSUPP for compatibility + * with PPSAPI on other platforms. ports/winnt/include/isc/net.h has + * #define EOPNOTSUPP WSAEOPNOTSUPP, so to avoid a macro redefinition + * warning undefine it. + */ +#undef EOPNOTSUPP +#endif /* HAVE_PPSAPI */ + +#include "ntp_stdlib.h" +#include "ntp_unixtime.h" +#include "ntp_timer.h" +#include "ntp_assert.h" +#include "ntp_leapsec.h" +#include "clockstuff.h" +#include "ntservice.h" +#include "ntpd.h" +#include "ntpd-opts.h" + +extern double sys_residual; /* residual from previous adjustment */ + +/* + * Include code to possibly modify the MM timer while the service is active. + */ + +/* + * Whether or not MM timer modifications takes place is still controlled + * by the variable below which is initialized by a default value but + * might be changed depending on a command line switch. + */ +static int modify_mm_timer = MM_TIMER_LORES; + +#define MM_TIMER_INTV 1 /* the interval we'd want to set the MM timer to [ms] */ + +static UINT wTimerRes; + +BOOL init_randfile(); + +static long last_Adj = 0; + +#define LS_CORR_INTV_SECS 2 /* seconds to apply leap second correction */ +#define LS_CORR_INTV ( (LONGLONG) HECTONANOSECONDS * LS_CORR_INTV_SECS ) +#define LS_CORR_LIMIT ( (LONGLONG) HECTONANOSECONDS / 2 ) // half a second + +typedef union ft_ull { + FILETIME ft; + ULONGLONG ull; + LONGLONG ll; + LARGE_INTEGER li; +} FT_ULL; + +/* leap second stuff */ +static FT_ULL ls_ft; +static DWORD ls_time_adjustment; +static ULONGLONG ls_ref_perf_cnt; +static LONGLONG ls_elapsed; + +static BOOL winnt_time_initialized = FALSE; +static BOOL winnt_use_interpolation = FALSE; +static unsigned clock_thread_id; + + +void WINAPI GetInterpTimeAsFileTime(LPFILETIME pft); +static void StartClockThread(void); +static void tune_ctr_freq(LONGLONG, LONGLONG); +void StopClockThread(void); +void atexit_revert_mm_timer(void); +void win_time_stepped(void); + +static HANDLE clock_thread = NULL; +static HANDLE TimerThreadExitRequest = NULL; + +/* + * interp_time estimates time in 100ns units + * based on a performance counter value given. + * The 2nd parameter indicates if this is + * part of a current time-of-day calculation. + */ +ULONGLONG interp_time(ULONGLONG, BOOL); + +/* + * add_counter_time_pair is called by the + * high priority clock thread with a new + * sample. + */ +void add_counter_time_pair(ULONGLONG, LONGLONG); + +/* + * globals used by the above two functions to + * implement the counter/time history + */ +#define BASELINES_TOT 256 +#define BASELINES_USED 64 + +static volatile int newest_baseline = 0; +static volatile int newest_baseline_gen = 0; +static ULONGLONG baseline_counts[BASELINES_TOT] = {0}; +static LONGLONG baseline_times[BASELINES_TOT] = {0}; + +#define CLOCK_BACK_THRESHOLD 100 /* < 10us unremarkable */ +static ULONGLONG clock_backward_max = CLOCK_BACK_THRESHOLD; +static int clock_backward_count; + +/** + * A flag set on Windows versions which ignore small time adjustments. + * + * Windows Vista and Windows 7 ignore TimeAdjustment less than 16. + * @note Has to be checked for Windows Server 2008/2012 and Windows 8. + * Ref: http://support.microsoft.com/kb/2537623, bug #2328 + */ +static BOOL os_ignores_small_adjustment; + +/* + * clockperiod is the period used for SetSystemTimeAdjustment + * slewing calculations but does not necessarily correspond + * to the precision of the OS clock. Prior to Windows Vista + * (6.0) the two were identical. In 100ns units. + */ +static DWORD clockperiod; + +/* + * os_clock_precision is the observed precision of the OS + * clock, meaning the increment between discrete values. This + * is currently calculated once at startup. 100ns units. + */ +static ULONGLONG os_clock_precision; + +/* + * NomPerfCtrFreq is from QueryPerformanceFrequency and is the + * number of performance counter beats per second. PerfCtrFreq + * starts from NomPerfCtrFreq but is maintained using a sliding + * window average based on actual performance counter behavior, + * to allow us to better tolerate powersaving measures that + * alter the effective frequency of the processor cycle counter + * (TSC) which sometimes underlies QueryPerformanceCounter. + * + * Note that the OS is unlikely to be so subtle in its internal + * scheduling of waitable timers, presumably done using the + * performance counter. Therefore our calculations for + * interpolated time should be based on PerfCtrFreq but our + * calculations for SetWaitableTimer should assume the OS will + * convert from FILETIME 100ns units to performance counter + * beats using the nominal frequency. + */ + +volatile ULONGLONG PerfCtrFreq = 0; + ULONGLONG NomPerfCtrFreq = 0; + +/* + * If we're using RDTSC beating at the same rate as + * QueryPerformanceCounter, there is a systemic + * offset we need to account for when using + * counterstamps from serialpps.sys, which are + * always from QPC (actually KeQueryPerformanceCounter). + */ +static LONGLONG QPC_offset = 0; + +/* + * Substitute RDTSC for QueryPerformanceCounter()? + */ +static int use_pcc = -1; + +/* + * Restrict threads that call QPC/RDTSC to one CPU? + */ +static int lock_interp_threads = -1; + +/* + * ppm_per_adjust_unit is parts per million effect on the OS + * clock per slewing adjustment unit per second. Per haps. + */ +static DOUBLE ppm_per_adjust_unit; + +/* + * wintickadj emulates the functionality provided by unix tickadj, + * providing a baseline clock correction if needed to get the + * clock within a few hundred PPM of correct frequency. + */ +static long wintickadj; + +static void choose_interp_counter(void); +static int is_qpc_built_on_pcc(void); + +/* + * performance counter frequency observations + */ +#define TUNE_CTR_DEPTH 3 /* running avg depth */ + +static HANDLE ctr_freq_timer = INVALID_HANDLE_VALUE; +static ULONGLONG tune_ctr_freq_max_interval; +static unsigned tune_ctr_period; +void start_ctr_freq_timer(ULONGLONG now_time); +void reset_ctr_freq_timer(ULONGLONG when, ULONGLONG now); +void reset_ctr_freq_timer_abs(ULONGLONG when); + +/* round a Windows time to the next bottom of the second */ + +#define ROUND_TO_NEXT_SEC_BOTTOM(t) \ +do { \ + (t) += 3 * HECTONANOSECONDS / 2 - 1; \ + (t) /= HECTONANOSECONDS; \ + (t) *= HECTONANOSECONDS; \ + (t) -= HECTONANOSECONDS / 2; \ +} while (0) + +/* + * NT native time format is 100's of nanoseconds since 1601-01-01. + * Helpers for converting between "hectonanoseconds" and the + * performance counter scale from which interpolated time is + * derived. + */ +#define HNS2PERF(hns) ((hns) * PerfCtrFreq / HECTONANOSECONDS) +#define PERF2HNS(ctr) ((ctr) * HECTONANOSECONDS / PerfCtrFreq) + + +#if defined(_MSC_VER) && _MSC_VER >= 1400 /* VS 2005 */ +#define get_pcc() __rdtsc() +#else +/* + * something like this can be used for a compiler without __rdtsc() + */ +ULONGLONG __forceinline +get_pcc(void) +{ + /* RDTSC returns in EDX:EAX, same as C compiler */ + __asm { + RDTSC + } +} +#endif + + +/* + * perf_ctr() returns the current performance counter value, + * from QueryPerformanceCounter or RDTSC. + */ +ULONGLONG WINAPI +perf_ctr(void) +{ + FT_ULL ft; + + if (use_pcc) + return get_pcc(); + else { + QueryPerformanceCounter(&ft.li); + return ft.ull; + } +} + + +/* + * init_small_adjustment + * + * Set variable os_ignores_small_adjustment + * + */ +static void init_small_adjustment(void) +{ + OSVERSIONINFO vi; + memset(&vi, 0, sizeof(vi)); + vi.dwOSVersionInfoSize = sizeof(vi); + + if (!GetVersionEx(&vi)) { + msyslog(LOG_WARNING, "GetVersionEx failed with error code %d.", GetLastError()); + os_ignores_small_adjustment = FALSE; + return; + } + + if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 1) { + // Windows 7 and Windows Server 2008 R2 + // + // Windows 7 is documented as affected. + // Windows Server 2008 R2 is assumed affected. + os_ignores_small_adjustment = TRUE; + } else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 0) { + // Windows Vista and Windows Server 2008 + // + // Windows Vista is documented as affected. + // Windows Server 2008 is assumed affected. + os_ignores_small_adjustment = TRUE; + } else { + os_ignores_small_adjustment = FALSE; + } +} + + +/* + * choose_interp_counter - select between QueryPerformanceCounter and + * the x86 processor cycle counter (TSC). + */ +static void +choose_interp_counter(void) +{ + const char * ntpd_pcc_freq_text; + int qpc_built_on_pcc; + + /* + * Regardless of whether we actually use RDTSC, first determine + * if QueryPerformanceCounter is built on it, so that we can + * decide whether it's prudent to lock QPC-consuming threads to + * a particular CPU. + */ + qpc_built_on_pcc = is_qpc_built_on_pcc(); + lock_interp_threads = qpc_built_on_pcc; + + /* + * It's time to make some more permanent knobs, + * but for right now the RDTSC aka PCC dance on x86 is: + * + * 1. With none of these variables defined, only QPC + * is used because there is no reliable way to + * detect counter frequency variation after ntpd + * startup implemented. + * 2. We need a better knob, but for now if you know + * your RDTSC / CPU frequency is invariant, set + * NTPD_PCC and assuming your QPC is based on the + * PCC as well, RDTSC will be substituted. + * 3. More forcefully, you can jam in a desired exact + * processor frequency, expressed in cycles per + * second by setting NTPD_PCC_FREQ=398125000, for + * example, if yor actual known CPU frequency is + * 398.125 MHz, and NTPD_PCC doesn't work because + * QueryPerformanceCounter is implemented using + * another counter. It is very easy to make ntpd + * fall down if the NTPD_PCC_FREQ value isn't very + * close to the observed RDTSC units per second. + * + * Items 2 and 3 could probably best be combined into one + * new windows-specific command line switch such as + * ntpd --pcc + * or + * ntpd --pcc=398125000 + * + * They are currently tied to Windows because that is + * the only ntpd port with its own interpolation, and + * to x86/x64 because no one has ported the Windows + * ntpd port to the sole remaining alternative, Intel + * Itanium. + */ + if (HAVE_OPT(PCCFREQ)) + ntpd_pcc_freq_text = OPT_ARG(PCCFREQ); + else + ntpd_pcc_freq_text = getenv("NTPD_PCC_FREQ"); + + if (!HAVE_OPT(USEPCC) + && NULL == ntpd_pcc_freq_text + && NULL == getenv("NTPD_PCC")) { + use_pcc = 0; + return; + } + + if (!qpc_built_on_pcc && NULL == ntpd_pcc_freq_text) { + use_pcc = 0; + return; + } + + use_pcc = 1; + if (ntpd_pcc_freq_text != NULL) + sscanf(ntpd_pcc_freq_text, + "%llu", + &NomPerfCtrFreq); + + NLOG(NLOG_CLOCKINFO) + msyslog(LOG_INFO, + "using processor cycle counter " + "%.3f MHz", + NomPerfCtrFreq / 1e6); + return; +} + + +/* + * is_qpc_built_on_pcc - test if QueryPerformanceCounter runs at the + * same rate as the processor cycle counter (TSC). + */ +static int +is_qpc_built_on_pcc(void) +{ + LONGLONG offset; + FT_ULL ft1; + FT_ULL ft2; + FT_ULL ft3; + FT_ULL ft4; + FT_ULL ft5; + + NTP_REQUIRE(NomPerfCtrFreq != 0); + + QueryPerformanceCounter(&ft1.li); + ft2.ull = get_pcc(); + Sleep(1); + QueryPerformanceCounter(&ft3.li); + Sleep(1); + ft4.ull = get_pcc(); + Sleep(1); + QueryPerformanceCounter(&ft5.li); + + offset = ft2.ull - ft1.ull; + ft3.ull += offset; + ft5.ull += offset; + + if (ft2.ull <= ft3.ull && + ft3.ull <= ft4.ull && + ft4.ull <= ft5.ull) { + + QPC_offset = offset; + return TRUE; + } + + return FALSE; +} + + +/* + * Request Multimedia Timer + */ +void +set_mm_timer( + int timerres + ) +{ + modify_mm_timer = timerres; +} + +/* + * adj_systime - called once every second to discipline system clock. + * Normally, the offset passed in (parameter now) is in the range + * [-NTP_MAXFREQ, NTP_MAXFREQ]. However, at EVNT_NSET, a much larger + * slew is requested if the initial offset is less than the step + * threshold, in the range [-step, step] where step is the step + * threshold, 128 msec by default. For the remainder of the frequency + * training interval, adj_systime is called with 0 offset each second + * and slew the large offset at 500 PPM (500 usec/sec). + * Returns 1 if okay, 0 if trouble. + */ +int +adj_systime( + double now + ) +{ + /* ntp time scale origin as ticks since 1601-01-01 */ + static const ULONGLONG HNS_JAN_1900 = 94354848000000000ull; + + static double adjtime_carry; + double dtemp; + u_char isneg; + BOOL rc; + long TimeAdjustment; + SYSTEMTIME st; + ULONGLONG this_perf_count; + FT_ULL curr_ft; + leap_result_t lsi; + + /* + * Add the residual from the previous adjustment to the new + * adjustment, bound and round. + */ + dtemp = adjtime_carry + sys_residual + now; + adjtime_carry = 0.; + sys_residual = 0.; + if (dtemp < 0) { + isneg = TRUE; + dtemp = -dtemp; + } else { + isneg = FALSE; + } + + if (dtemp > NTP_MAXFREQ) { + adjtime_carry = dtemp - NTP_MAXFREQ; + dtemp = NTP_MAXFREQ; + } + + if (isneg) { + dtemp = -dtemp; + adjtime_carry = -adjtime_carry; + } + + dtemp = dtemp * 1e6; + + /* + * dtemp is in micro seconds. NT uses 100 ns units, + * so a unit change in TimeAdjustment corresponds + * to slewing 10 ppm on a 100 Hz system. Calculate + * the number of 100ns units to add, using OS tick + * frequency as per suggestion from Harry Pyle, + * and leave the remainder in dtemp + */ + TimeAdjustment = (long)(dtemp / ppm_per_adjust_unit + + ((isneg) + ? -0.5 + : 0.5)); + + if (os_ignores_small_adjustment) { + /* + * As the OS ignores adjustments smaller than 16, we need to + * leave these small adjustments in sys_residual, causing + * the small values to be averaged over time. + */ + if (TimeAdjustment > -16 && TimeAdjustment < 16) { + TimeAdjustment = 0; + } + } + + dtemp -= TimeAdjustment * ppm_per_adjust_unit; + + + /* If a piping-hot close leap second is pending for the end + * of this day, determine the UTC time stamp when the transition + * must take place. (Calculated in the current leap era!) + */ + if (leapsec >= LSPROX_ALERT) { + if (0 == ls_ft.ull && leapsec_frame(&lsi)) { + if (lsi.tai_diff > 0) { + /* A leap second insert is scheduled at the end + * of the day. Since we have not yet computed the + * time stamp, do it now. Signal electric mode + * for this insert. + */ + ls_ft.ull = lsi.ttime.Q_s * HECTONANOSECONDS + + HNS_JAN_1900; + FileTimeToSystemTime(&ls_ft.ft, &st); + msyslog(LOG_NOTICE, + "Detected positive leap second announcement " + "for %04d-%02d-%02d %02d:%02d:%02d UTC", + st.wYear, st.wMonth, st.wDay, + st.wHour, st.wMinute, st.wSecond); + leapsec_electric(TRUE); + } else if (lsi.tai_diff < 0) { + /* Do not handle negative leap seconds here. If this + * happens, let the system step. + */ + leapsec_electric(FALSE); + } + } + } else { + /* The leap second announcement is gone. Happens primarily after + * the leap transition, but can also be due to a clock step. + * Disarm the leap second, but only if there is one scheduled + * and not currently in progress! + */ + if (ls_ft.ull != 0 && ls_time_adjustment == 0) { + ls_ft.ull = 0; + msyslog(LOG_NOTICE, "Leap second announcement disarmed"); + } + } + + /* + * If the time stamp for the next leap second has been set + * then check if the leap second must be handled + */ + if (ls_ft.ull != 0) { + this_perf_count = perf_ctr(); + + if (0 == ls_time_adjustment) { /* has not yet been scheduled */ + + GetSystemTimeAsFileTime(&curr_ft.ft); + if (curr_ft.ull >= ls_ft.ull) { + ls_time_adjustment = clockperiod / LS_CORR_INTV_SECS; + ls_ref_perf_cnt = this_perf_count; + ls_elapsed = 0; + msyslog(LOG_NOTICE, "Inserting positive leap second."); + } + } else { /* leap sec adjustment has been scheduled previously */ + ls_elapsed = (this_perf_count - ls_ref_perf_cnt) + * HECTONANOSECONDS / PerfCtrFreq; + } + + if (ls_time_adjustment != 0) { /* leap second adjustment is currently active */ + if (ls_elapsed > (LS_CORR_INTV - LS_CORR_LIMIT)) { + ls_time_adjustment = 0; /* leap second adjustment done */ + ls_ft.ull = 0; + } + + /* + * NOTE: While the system time is slewed during the leap second + * the interpolation function which is based on the performance + * counter does not account for the slew. + */ + TimeAdjustment -= ls_time_adjustment; + } + } + + + sys_residual = dtemp / 1e6; + DPRINTF(3, ("adj_systime: %.9f -> %.9f residual %.9f", + now, 1e-6 * (TimeAdjustment * ppm_per_adjust_unit), + sys_residual)); + if (0. == adjtime_carry) + DPRINTF(3, ("\n")); + else + DPRINTF(3, (" adjtime %.9f\n", adjtime_carry)); + + /* only adjust the clock if adjustment changes */ + TimeAdjustment += wintickadj; + if (last_Adj != TimeAdjustment) { + last_Adj = TimeAdjustment; + DPRINTF(2, ("SetSystemTimeAdjustment(%+ld)\n", TimeAdjustment)); + rc = SetSystemTimeAdjustment(clockperiod + TimeAdjustment, FALSE); + if (!rc) + msyslog(LOG_ERR, "Can't adjust time: %m"); + } else { + rc = TRUE; + } + + return rc; +} + + +void +init_winnt_time(void) +{ + static const char settod[] = "settimeofday=\"SetSystemTime\""; + char szMsgPath[MAX_PATH+1]; + HANDLE hToken = INVALID_HANDLE_VALUE; + TOKEN_PRIVILEGES tkp; + TIMECAPS tc; + BOOL noslew; + DWORD adjclockperiod; + LARGE_INTEGER Freq; + FT_ULL initial_hectonanosecs; + FT_ULL next_hectonanosecs; + double adjppm; + double rawadj; + char * pch; + + if (winnt_time_initialized) + return; + + /* + * Make sure the service is initialized + * before we do anything else + */ + ntservice_init(); + + /* Set up the Console Handler */ + if (!SetConsoleCtrlHandler(OnConsoleEvent, TRUE)) { + msyslog(LOG_ERR, "Can't set console control handler: %m"); + } + + /* Set the Event-ID message-file name. */ + if (!GetModuleFileName(NULL, szMsgPath, sizeof(szMsgPath))) { + msyslog(LOG_ERR, "GetModuleFileName(PGM_EXE_FILE) failed: %m"); + exit(1); + } + + /* Initialize random file before OpenSSL checks */ + if (!init_randfile()) + msyslog(LOG_ERR, "Unable to initialize .rnd file"); + +#pragma warning(push) +#pragma warning(disable: 4127) /* conditional expression is constant */ + +#ifdef DEBUG + if (SIZEOF_TIME_T != sizeof(time_t) + || SIZEOF_INT != sizeof(int) + || SIZEOF_SIGNED_CHAR != sizeof(char)) { + msyslog(LOG_ERR, "config.h SIZEOF_* macros wrong, fatal"); + exit(1); + } +#endif + +#pragma warning(pop) + + init_small_adjustment(); + leapsec_electric(TRUE); + + /* + * Get privileges needed for fiddling with the clock + */ + + /* get the current process token handle */ + if (!OpenProcessToken( + GetCurrentProcess(), + TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, + &hToken)) { + msyslog(LOG_ERR, "OpenProcessToken failed: %m"); + exit(-1); + } + /* get the LUID for system-time privilege. */ + LookupPrivilegeValue(NULL, SE_SYSTEMTIME_NAME, &tkp.Privileges[0].Luid); + tkp.PrivilegeCount = 1; /* one privilege to set */ + tkp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + /* get set-time privilege for this process. */ + AdjustTokenPrivileges(hToken, FALSE, &tkp, 0, + (PTOKEN_PRIVILEGES) NULL, 0); + + /* cannot use return value of AdjustTokenPrivileges. */ + /* (success does not indicate all privileges were set) */ + if (GetLastError() != ERROR_SUCCESS) { + msyslog(LOG_ERR, "AdjustTokenPrivileges failed: %m"); + /* later set time call will probably fail */ + } + + CloseHandle(hToken); + hToken = INVALID_HANDLE_VALUE; + + /* + * Say how we're setting the time of day + */ + set_sys_var(settod, sizeof(settod), RO); + + /* + * ntpd on Windows has always raised its priority, without + * requiring -N as on Unix. Since Windows ntpd doesn't share + * the history of unix ntpd of once having no -N and therefore + * needing to be invoked under nice, there is no reason to + * bring it in line with the Unix version in this regard. + * Instsrv assumes ntpd is invoked with no arguments, and + * upgrading users would be negatively surprised by the + * poor timekeeping if they failed to add -N as part of + * upgrading were we to correct this platform difference. + */ + if (-1 == setpriority(PRIO_PROCESS, 0, NTP_PRIO)) + exit(-1); + + /* Determine the existing system time slewing */ + if (!GetSystemTimeAdjustment(&adjclockperiod, &clockperiod, &noslew)) { + msyslog(LOG_ERR, "GetSystemTimeAdjustment failed: %m"); + exit(-1); + } + + /* + * If there is no slewing before ntpd, adjclockperiod and clockperiod + * will be equal. Any difference is carried into adj_systime's first + * pass as the previous adjustment. + */ + last_Adj = adjclockperiod - clockperiod; + + if (last_Adj) + msyslog(LOG_INFO, + "Clock interrupt period %.3f msec " + "(startup slew %.1f usec/period)", + clockperiod / 1e4, + last_Adj / 10.); + else + msyslog(LOG_INFO, + "Clock interrupt period %.3f msec", + clockperiod / 1e4); + + /* + * Calculate the time adjustment resulting from incrementing + * units per tick by 1 unit for 1 second + */ + ppm_per_adjust_unit = 1e6 / clockperiod; + + pch = getenv("NTPD_TICKADJ_PPM"); + if (pch != NULL && 1 == sscanf(pch, "%lf", &adjppm)) { + rawadj = adjppm / ppm_per_adjust_unit; + rawadj += (rawadj < 0) + ? -0.5 + : 0.5; + wintickadj = (long)rawadj; + msyslog(LOG_INFO, + "Using NTPD_TICKADJ_PPM %+g ppm (%+ld)", + adjppm, wintickadj); + } + + /* get the performance counter ticks per second */ + if (!QueryPerformanceFrequency(&Freq) || !Freq.QuadPart) { + msyslog(LOG_ERR, "QueryPerformanceFrequency failed: %m"); + exit(-1); + } + + NomPerfCtrFreq = PerfCtrFreq = Freq.QuadPart; + msyslog(LOG_INFO, + "Performance counter frequency %.3f MHz", + PerfCtrFreq / 1e6); + + /* + * With a precise system clock, our interpolation decision is + * a slam dunk. + */ + if (NULL != pGetSystemTimePreciseAsFileTime) { + winnt_use_interpolation = FALSE; + winnt_time_initialized = TRUE; + + return; + } + + /* + * Implement any multimedia timer manipulation requested via -M + * option. This is rumored to be unneeded on Win8 with the + * introduction of the precise (interpolated) system clock. + */ + if (modify_mm_timer) { + if (timeGetDevCaps(&tc, sizeof(tc)) == TIMERR_NOERROR) { + wTimerRes = min(max(tc.wPeriodMin, MM_TIMER_INTV), tc.wPeriodMax); + timeBeginPeriod(wTimerRes); + atexit(atexit_revert_mm_timer); + + msyslog(LOG_INFO, "MM timer resolution: %u..%u msec, set to %u msec", + tc.wPeriodMin, tc.wPeriodMax, wTimerRes ); + } else { + msyslog(LOG_ERR, "Multimedia timer unavailable"); + } + } + + /* + * Spin on GetSystemTimeAsFileTime to determine its + * granularity. Prior to Windows Vista this is + * typically the same as the clock period. + */ + GetSystemTimeAsFileTime(&initial_hectonanosecs.ft); + do { + GetSystemTimeAsFileTime(&next_hectonanosecs.ft); + } while (initial_hectonanosecs.ull == next_hectonanosecs.ull); + + os_clock_precision = next_hectonanosecs.ull - + initial_hectonanosecs.ull; + + msyslog(LOG_INFO, + "Windows clock precision %.3f msec, min. slew %.3f ppm/s", + os_clock_precision / 1e4, ppm_per_adjust_unit); + + winnt_time_initialized = TRUE; + + choose_interp_counter(); + + if (getenv("NTPD_USE_SYSTEM_CLOCK") || + (os_clock_precision < 4 * 10000 && + !getenv("NTPD_USE_INTERP_DANGEROUS"))) { + msyslog(LOG_INFO, "using Windows clock directly"); + } else { + winnt_use_interpolation = TRUE; + get_sys_time_as_filetime = GetInterpTimeAsFileTime; + StartClockThread(); + } +} + + +void +atexit_revert_mm_timer(void) +{ + timeEndPeriod(wTimerRes); + DPRINTF(1, ("MM timer resolution reset\n")); +} + + +void +reset_winnt_time(void) +{ + SYSTEMTIME st; + + /* + * If we're in the 2-second slew right after a leap second, + * we don't want to continue that extreme slew, in that case + * disable our slewing and return clock discipline to the + * kernel. Similarly if we are not yet synchronized, + * our current slew may not be a good ongoing trim. + * Otherwise, our leave in place the last SetSystemTimeAdjustment + * as an ongoing frequency correction, better than nothing. + * TODO: + * Verify this will not call SetSystemTimeAdjustment if + * ntpd is running in ntpdate mode. + */ + if (sys_leap == LEAP_NOTINSYNC || ls_time_adjustment != 0) + SetSystemTimeAdjustment(0, TRUE); + + /* + * Read the current system time, and write it back to + * force CMOS update, only if we are exiting because + * the computer is shutting down and we are already + * synchronized. + */ + if (ntservice_systemisshuttingdown() && sys_leap != LEAP_NOTINSYNC) { + GetSystemTime(&st); + SetSystemTime(&st); + NLOG(NLOG_SYSEVENT | NLOG_CLOCKINFO) + msyslog(LOG_NOTICE, "system is shutting down, CMOS time reset."); + } +} + + +/* + * GetSystemTimeAsFileTime() interface clone is used by getclock() in ntpd. + */ + +void WINAPI +GetInterpTimeAsFileTime( + LPFILETIME pft + ) +{ + static ULONGLONG last_interp_time; + FT_ULL now_time; + FT_ULL now_count; + ULONGLONG clock_backward; + + /* + * Mark a mark ASAP. The latency to here should be reasonably + * deterministic + */ + + now_count.ull = perf_ctr(); + now_time.ull = interp_time(now_count.ull, TRUE); + + if (last_interp_time <= now_time.ull) { + last_interp_time = now_time.ull; + } else { + clock_backward = last_interp_time - now_time.ull; + if (clock_backward > clock_backward_max) { + clock_backward_max = clock_backward; + clock_backward_count++; + } + now_time.ull = last_interp_time; + } + *pft = now_time.ft; + + return; +} + + +/* + * TimerApcFunction is invoked on the high-priority clock + * thread to capture a new baseline system time and + * performance counter correlation every 43 msec (64Hz + * OS clock precision). + */ +static void CALLBACK +TimerApcFunction( + LPVOID lpArgToCompletionRoutine, + DWORD dwTimerLowValue, + DWORD dwTimerHighValue + ) +{ + static BOOL ctr_freq_timer_started = FALSE; + static ULONGLONG prev_count; + ULONGLONG now_time; + FT_ULL now_count; + + /* Grab the counter first of all */ + now_count.ull = perf_ctr(); + + now_time = (((ULONGLONG)dwTimerHighValue << 32) | + dwTimerLowValue); + + /* + * Save this correlation in the history. + */ + add_counter_time_pair(now_count.ull, now_time); + + /* + * Once we're synchronized start the counter frequency + * tuning timer. + */ + if (INVALID_HANDLE_VALUE == ctr_freq_timer && + LEAP_NOTINSYNC != sys_leap) + start_ctr_freq_timer(now_time); +} + + +unsigned WINAPI +ClockThread( + void *arg + ) +{ + LARGE_INTEGER DueTime; + HANDLE timer; + double HZ; + double TimerHz; + DWORD timer_period_msec; + DWORD res; + char *ntpd_int_int_text; + + UNUSED_ARG(arg); + + timer = CreateWaitableTimer(NULL, FALSE, NULL); + + ntpd_int_int_text = getenv("NTPD_INT_INT"); + + HZ = (double)HECTONANOSECONDS / clockperiod; + + if (HZ > 63 && HZ < 65) { + timer_period_msec = 43; + } else if (HZ > 98 && HZ < 102) { + timer_period_msec = 27; + if (NULL == ntpd_int_int_text) + msyslog(LOG_WARNING, + "%.3f Hz system clock may benefit from " + "custom NTPD_INT_INT env var timer interval " + "override between approx. 20 and 50 msecs.", + HZ); + } else { + timer_period_msec = (DWORD)(0.5 + (2.752 * clockperiod / 10000)); + if (NULL == ntpd_int_int_text) + msyslog(LOG_WARNING, + "unfamiliar %.3f Hz system clock may benefit " + "from custom NTPD_INT_INT env var timer " + "interval override between approx. 20 and 50 " + "msecs.", + HZ); + } + + if (ntpd_int_int_text != NULL) { + timer_period_msec = atoi(ntpd_int_int_text); + timer_period_msec = max(9, timer_period_msec); + msyslog(LOG_NOTICE, + "using NTPD_INT_INT env var override %u", + timer_period_msec); + } + + TimerHz = 1e3 / timer_period_msec; + msyslog(LOG_NOTICE, "HZ %.3f using %u msec timer %.3f Hz %d deep", + HZ, + timer_period_msec, + TimerHz, + BASELINES_USED); + + /* negative DueTime means relative to now */ + DueTime.QuadPart = -(int)timer_period_msec; + + SetWaitableTimer( + timer, + &DueTime, /* first fire */ + timer_period_msec, /* period thereafter */ + TimerApcFunction, /* callback routine */ + &timer, /* context for callback */ + FALSE); /* do not interfere with power saving */ + + /* + * The clock thread spends the rest of its life in the TimerApcFunction + * and ctr_freq_timer_fired timer APC callbacks, which can only occur + * while this thread is in an alertable wait. Note the Ex on + * WaitForSingleObjectEx and TRUE for fAlertable. The wait will return + * after each APC callback in which case we simply wait again. We will + * break out of the loop when StopClockThread signals our exit event. + */ + do res = WaitForSingleObjectEx( + TimerThreadExitRequest, + INFINITE, + TRUE); + while (WAIT_OBJECT_0 != res); + + CloseHandle(timer); + + if (ctr_freq_timer != INVALID_HANDLE_VALUE) { + CloseHandle(ctr_freq_timer); + ctr_freq_timer = INVALID_HANDLE_VALUE; + } + + return 0; +} + + +static void +StartClockThread(void) +{ + static BOOL done_once = FALSE; + FT_ULL StartTime; + + /* init variables with the time now */ + GetSystemTimeAsFileTime(&StartTime.ft); + baseline_times[0] = StartTime.ull; + baseline_counts[0] = perf_ctr(); + + /* init sync objects */ + TimerThreadExitRequest = CreateEvent(NULL, FALSE, FALSE, NULL); + + clock_thread = + (HANDLE)_beginthreadex( + NULL, + 0, + ClockThread, + NULL, + CREATE_SUSPENDED, + &clock_thread_id); + + if (clock_thread != NULL) { + /* remember the thread priority is only within the process class */ + if (!SetThreadPriority(clock_thread, THREAD_PRIORITY_TIME_CRITICAL)) { + DPRINTF(1, ("Error setting thread priority\n")); + } + + lock_thread_to_processor(clock_thread); + ResumeThread(clock_thread); + + if (FALSE == done_once) { + done_once = TRUE; + lock_thread_to_processor(GetCurrentThread()); + atexit( StopClockThread ); + } + + /* + * Give the clock thread time to fill its counter/time + * sample buffer. This will underfill the buffer a + * bit for sample periods over 43 msec. + */ + Sleep(BASELINES_USED * 43); + } +} + + +void +StopClockThread(void) +{ + /* + * if the clock thread exit()s this routine + * will be called on the clock thread and + * we need not (and can't) use the normal + * TimerThreadExitRequest event. + */ + if (GetCurrentThreadId() != clock_thread_id) { + + if (!SetEvent(TimerThreadExitRequest) || + WaitForSingleObject(clock_thread, 2 * 1000) != + WAIT_OBJECT_0) { + msyslog(LOG_ERR, "Failed to stop clock thread."); + } + } + CloseHandle(TimerThreadExitRequest); + TimerThreadExitRequest = NULL; + CloseHandle(clock_thread); + clock_thread = NULL; +} + + +void +lock_thread_to_processor(HANDLE thread) +{ + static DWORD_PTR ProcessAffinityMask; + static DWORD_PTR ThreadAffinityMask; + DWORD_PTR SystemAffinityMask; + char *cputext; + unsigned int cpu; + + if ( ! winnt_time_initialized) { + DPRINTF(1, ("init_winnt_time() must be called before " + "lock_thread_to_processor(), exiting\n")); + exit(-1); + } + + if (!winnt_use_interpolation) + return; + + if (-1 == lock_interp_threads) { + DPRINTF(1, ("choose_interp_counter() is not called " + "before lock_thread_to_processor()\n")); + exit(-1); + } else if (!lock_interp_threads) + return; + + /* + * Calculate the ThreadAffinityMask we'll use once on the + * first invocation. + */ + if (!ProcessAffinityMask) { + + /* + * Choose which processor to nail the main and clock threads to. + * If we have more than one, we simply choose the 2nd. + * Randomly choosing from 2 to n would be better, but in + * either case with clock and network interrupts more likely + * to be serviced by the first procecssor, let's stay away + * from it. QueryPerformanceCounter is not necessarily + * consistent across CPUs, hence the need to nail the two + * threads involved in QPC-based interpolation to the same + * CPU. + */ + + GetProcessAffinityMask( + GetCurrentProcess(), + &ProcessAffinityMask, + &SystemAffinityMask); + + /* + * respect NTPD_CPU environment variable if present + * for testing. NTPD_CPU=0 means use all CPUs, 1-64 + * means lock threads involved in interpolation to + * that CPU. Default to 2nd if more than 1. + */ + + cpu = 2; + cputext = getenv("NTPD_CPU"); + if (cputext) { + cpu = (unsigned int) atoi(cputext); + cpu = min((8 * sizeof(DWORD_PTR)), cpu); + } + + /* + * Clear all bits except the 2nd. If we have only one proc + * that leaves ThreadAffinityMask zeroed and we won't bother + * with SetThreadAffinityMask. + */ + + ThreadAffinityMask = (0 == cpu) ? 0 : (1 << (cpu - 1)); + + if (ThreadAffinityMask && + !(ThreadAffinityMask & ProcessAffinityMask)) + + DPRINTF(1, ("Selected CPU %u (mask %x) is outside " + "process mask %x, using all CPUs.\n", + cpu, ThreadAffinityMask, + ProcessAffinityMask)); + else + DPRINTF(1, ("Wiring to processor %u (0 means all) " + "affinity mask %x\n", + cpu, ThreadAffinityMask)); + + ThreadAffinityMask &= ProcessAffinityMask; + } + + if (ThreadAffinityMask && + !SetThreadAffinityMask(thread, ThreadAffinityMask)) + msyslog(LOG_ERR, + "Unable to wire thread to mask %x: %m", + ThreadAffinityMask); +} + + +#ifdef HAVE_PPSAPI +static inline void ntp_timestamp_from_counter(l_fp *, ULONGLONG, + ULONGLONG); + +/* + * helper routine for serial PPS which returns QueryPerformanceCounter + * timestamp and needs to interpolate it to an NTP timestamp. + */ +void +pps_ntp_timestamp_from_counter( + ntp_fp_t *result, + ULONGLONG Timestamp, + ULONGLONG Counterstamp + ) +{ + /* + * convert between equivalent l_fp and PPSAPI ntp_fp_t + */ + ntp_timestamp_from_counter( + (l_fp *)result, + Timestamp, + Counterstamp); +} + + +static inline +void +ntp_timestamp_from_counter( + l_fp *result, + ULONGLONG Timestamp, + ULONGLONG Counterstamp + ) +{ + FT_ULL Now; + FT_ULL Ctr; + LONGLONG CtrDelta; + double seconds; + ULONGLONG InterpTimestamp; + + if (winnt_use_interpolation) { + if (0 == Counterstamp) { + DPRINTF(1, ("ntp_timestamp_from_counter rejecting 0 counter.\n")); + ZERO(*result); + return; + } + + InterpTimestamp = interp_time(Counterstamp + QPC_offset, FALSE); + } else { /* ! winnt_use_interpolation */ + if (NULL != pGetSystemTimePreciseAsFileTime && + 0 != Counterstamp) { + QueryPerformanceCounter(&Ctr.li); + (*pGetSystemTimePreciseAsFileTime)(&Now.ft); + CtrDelta = Ctr.ull - Counterstamp; + seconds = (double)CtrDelta / PerfCtrFreq; + InterpTimestamp = Now.ull - + (ULONGLONG)(seconds * HECTONANOSECONDS); + } else { + /* have to simply use the driver's system time timestamp */ + InterpTimestamp = Timestamp; + GetSystemTimeAsFileTime(&Now.ft); + } + } + + /* convert from 100ns units to NTP fixed point format */ + + InterpTimestamp -= FILETIME_1970; + result->l_ui = JAN_1970 + (u_int32)(InterpTimestamp / HECTONANOSECONDS); + result->l_uf = (u_int32)((InterpTimestamp % HECTONANOSECONDS) * + (ULONGLONG)FRAC / HECTONANOSECONDS); +} +#endif /* HAVE_PPSAPI */ + + +void +win_time_stepped(void) +{ + /* + * called back by ntp_set_tod after the system + * time has been stepped (set). + * + * We normally prevent the reported time from going backwards + * but need to allow it in this case. + */ + if (FALSE == winnt_use_interpolation) + return; + + + /* + * Restart the clock thread to get a new baseline + * time/counter correlation. + */ + StopClockThread(); + + /* + * newest_baseline_gen is a generation counter + * incremented once each time newest_baseline + * is reset. + */ + newest_baseline_gen++; + + clock_backward_max = CLOCK_BACK_THRESHOLD; + clock_backward_count = 0; + newest_baseline = 0; + ZERO(baseline_counts); + ZERO(baseline_times); + + StartClockThread(); +} + + +/* + * log2ull - log base 2 of a unsigned 64-bit number + */ +int +log2ull( + ULONGLONG n + ) +{ + const ULONGLONG one = 1; + int log = 0; + + if (n >= one<<32) { n >>= 32; log += 32; } + if (n >= one<<16) { n >>= 16; log += 16; } + if (n >= one<< 8) { n >>= 8; log += 8; } + if (n >= one<< 4) { n >>= 4; log += 4; } + if (n >= one<< 2) { n >>= 2; log += 2; } + if (n >= one<< 1) { log += 1; } + + return (n) ? log : (-1); +} + + +/* + * ctr_freq_timer_fired is called once a few seconds before + * tune_ctr_period seconds have elapsed, to reset the timer + * and hopefully minimize error due to the system using the + * nominal performance counter frequency to set the timer + * internally, which is typically dozens of PPM from the + * actual performance counter rate. A few seconds later + * it is called again to observe the counter and estimate the + * counter frequency. + */ +static void CALLBACK +ctr_freq_timer_fired( + LPVOID arg, + DWORD dwTimeLow, + DWORD dwTimeHigh + ) +{ + static FT_ULL begin_time = {0}; + static FT_ULL begin_count = {0}; + static ULONGLONG next_period_time = 0; + static ULONGLONG report_systemtime = 0; + const ULONGLONG five_minutes = 5ui64 * 60 * HECTONANOSECONDS; + FT_ULL now_time; + FT_ULL now_count; + + if (!begin_time.ull) { + begin_count.ull = perf_ctr(); + begin_time.ft.dwLowDateTime = dwTimeLow; + begin_time.ft.dwHighDateTime = dwTimeHigh; + + /* + * adapt perf ctr observation interval to the + * counter frequency + */ + tune_ctr_period = 22680 / log2ull(NomPerfCtrFreq); + + /* + * reset timer 2s before period ends to minimize + * error from OS timer routines using nominal + * performance frequency internally. + */ + tune_ctr_freq_max_interval = tune_ctr_period - 2; + + next_period_time = begin_time.ull + + (ULONGLONG)tune_ctr_period * HECTONANOSECONDS; + + ROUND_TO_NEXT_SEC_BOTTOM(next_period_time); + + reset_ctr_freq_timer(next_period_time, begin_time.ull); + + return; + } + + now_time.ft.dwLowDateTime = dwTimeLow; + now_time.ft.dwHighDateTime = dwTimeHigh; + + if (now_time.ull >= next_period_time) { + now_count.ull = perf_ctr(); + tune_ctr_freq( + now_count.ull - begin_count.ull, + now_time.ull - begin_time.ull); + next_period_time += (ULONGLONG)tune_ctr_period * HECTONANOSECONDS; + begin_count.ull = now_count.ull; + begin_time.ull = now_time.ull; + } + + /* + * Log clock backward events no more often than 5 minutes. + */ + if (!report_systemtime) { + report_systemtime = now_time.ull + five_minutes; + } else if (report_systemtime <= now_time.ull) { + report_systemtime += five_minutes; + if (clock_backward_count) { + msyslog(LOG_WARNING, + "clock would have gone backward %d times, " + "max %.1f usec", + clock_backward_count, + clock_backward_max / 10.); + + clock_backward_max = CLOCK_BACK_THRESHOLD; + clock_backward_count = 0; + } + } + reset_ctr_freq_timer(next_period_time, now_time.ull); +} + + +void +reset_ctr_freq_timer_abs( + ULONGLONG when + ) +{ + FT_ULL fire_time; + + fire_time.ull = when; + SetWaitableTimer( + ctr_freq_timer, + &fire_time.li, /* first fire */ + 0, /* not periodic */ + ctr_freq_timer_fired, /* callback routine */ + NULL, /* context for callback */ + FALSE); /* do not interfere with power saving */ +} + + +void +reset_ctr_freq_timer( + ULONGLONG when, + ULONGLONG now + ) +{ + if (when - now > + (tune_ctr_freq_max_interval * HECTONANOSECONDS + HECTONANOSECONDS)) + when = now + tune_ctr_freq_max_interval * HECTONANOSECONDS; + + reset_ctr_freq_timer_abs(when); +} + + +void +start_ctr_freq_timer( + ULONGLONG now_time + ) +{ + ULONGLONG when; + + ctr_freq_timer = CreateWaitableTimer(NULL, FALSE, NULL); + when = now_time; + ROUND_TO_NEXT_SEC_BOTTOM(when); + + reset_ctr_freq_timer_abs(when); +} + + +/* + * tune_ctr_freq is called once per tune_ctr_period seconds + * with a counter difference and time difference. + */ +void +tune_ctr_freq( + LONGLONG ctr_delta, + LONGLONG time_delta + ) +{ + static unsigned count = 0; + static unsigned dispcount = 0; + static unsigned report_at_count = 0; + static int disbelieved = 0; + static int i = 0; + static double nom_freq = 0; + static LONGLONG diffs[TUNE_CTR_DEPTH] = {0}; + static LONGLONG sum = 0; + char ctr_freq_eq[64]; + LONGLONG delta; + LONGLONG deltadiff; + ULONGLONG ObsPerfCtrFreq; + double freq; + double this_freq; + BOOL isneg; + + /* one-time initialization */ + if (!report_at_count) { + report_at_count = 24 * 60 * 60 / tune_ctr_period; + nom_freq = NomPerfCtrFreq / 1e6; + } + + /* delta is the per-second observed frequency this time */ + delta = (LONGLONG)((double)ctr_delta * HECTONANOSECONDS / + time_delta); + + /* disbelieve any delta more than +/- 976 PPM from nominal */ + deltadiff = delta - NomPerfCtrFreq; + if (0 > deltadiff) { + isneg = TRUE; + deltadiff = -deltadiff; + } else { + isneg = FALSE; + } + + if ((ULONGLONG)deltadiff > (NomPerfCtrFreq / 1024)) { + disbelieved++; + dispcount++; +#ifdef DEBUG + msyslog(LOG_DEBUG, "ctr delta %s%lld exceeds limit %llu", + (isneg) ? "-" : "", + deltadiff, + NomPerfCtrFreq / 1024); +#endif + } else { + + /* + * collect average over TUNE_CTR_DEPTH samples + * for our PerfCtrFreq trimming. + */ + + if (isneg) + deltadiff = -deltadiff; + sum -= diffs[i]; + diffs[i] = deltadiff; + sum += deltadiff; + i = (i + 1) % COUNTOF(diffs); + count++; + dispcount++; + } + + this_freq = delta / 1e6; + + ObsPerfCtrFreq = NomPerfCtrFreq + (sum / COUNTOF(diffs)); + +#if 1 /* #if 0 to disable changing freq used */ + /* get rid of ObsPerfCtrFreq when removing the #ifdef */ + PerfCtrFreq = ObsPerfCtrFreq; +#endif + freq = PerfCtrFreq / 1e6; + + /* + * make the performance counter's frequency error from its + * nominal rate, expressed in PPM, available via ntpq as + * system variable "ctr_frequency". This is consistent with + * "frequency" which is the system clock drift in PPM. + */ + snprintf(ctr_freq_eq, sizeof(ctr_freq_eq), "ctr_frequency=%.2f", + 1e6 * (freq - nom_freq) / nom_freq); + set_sys_var(ctr_freq_eq, strlen(ctr_freq_eq) + 1, RO | DEF); + + /* + * report observed ctr freq each time the estimate used during + * startup moves toward the observed freq from the nominal. + */ + + if (count > COUNTOF(diffs) && + /* (count % COUNTOF(diffs)) && */ /* enables reporting each */ + dispcount < report_at_count) /* TUNE_CTR_DEPTH samples */ + return; + + NLOG(NLOG_CLOCKINFO) + if (count <= COUNTOF(diffs)) + /* moving to observed freq. from nominal (startup) */ + msyslog(LOG_INFO, + (freq > 100) + ? "ctr %.3f MHz %+6.2f PPM using %.3f MHz %+6.2f PPM" + : "ctr %.6f MHz %+6.2f PPM using %.6f MHz %+6.2f PPM", + this_freq, + 1e6 * (this_freq - nom_freq) / nom_freq, + freq, + 1e6 * (freq - nom_freq) / nom_freq); + else + /* steady state */ + msyslog(LOG_INFO, + (freq > 100) + ? "ctr %.3f MHz %+.2f PPM" + : "ctr %.6f MHz %+.2f PPM", + freq, + 1e6 * (freq - nom_freq) / nom_freq); + + if (disbelieved) { + msyslog(LOG_ERR, + "%d ctr samples exceed +/- 976 PPM range gate", + disbelieved); + disbelieved = 0; + } + + dispcount = 0; +} + + +/* + * add_counter_time_pair is called by the + * high priority clock thread with each new + * baseline counter/time correlation. + */ +void +add_counter_time_pair( + ULONGLONG ctr, + LONGLONG time + ) +{ + int i; + + i = (newest_baseline + 1) % BASELINES_TOT; + + baseline_counts[i] = ctr; + baseline_times[i] = time; + + newest_baseline = i; +} + + +/* + * interp_time estimates NT time in 100ns units + * based on a performance counter value given. + * This must tolerate recent historical counters + * as well as current. When current is FALSE + * we can't assume ctr is the latest/highest + * seen. + */ +ULONGLONG +interp_time( + ULONGLONG ctr, + BOOL current + ) +{ + static __declspec(thread) int last_newest = -1; + static __declspec(thread) int last_newest_gen; + static __declspec(thread) int best_index; + ULONGLONG this_ctr; + LONGLONG this_time; + LONGLONG latest_time; + LONGLONG ctr_diff; + int i; + int i_gen; + int c; + + /* + * Use the system time (roughly synchronised to the tick, and + * extrapolated using the system performance counter. + * + * Cache the results per thread and only repeat the + * calculation when new data has arrived. + */ + i = newest_baseline; + i_gen = newest_baseline_gen; + + if (last_newest == i && last_newest_gen == i_gen) { + this_time = baseline_times[best_index]; + ctr_diff = ctr - baseline_counts[best_index]; + this_time += (LONGLONG)PERF2HNS((double)ctr_diff); + + return this_time; + } + + last_newest = i; + last_newest_gen = i_gen; + + latest_time = 0; + + /* + * Run through the history calculating the interpolated + * time based on each counter/time correlation in turn, + * and believe the latest one. This is akin to the NTP + * protocol minimum delay clock filter. Errors due to + * counter/time correlations with stale time are all + * negative. + */ + for (c = 0; c < BASELINES_USED; c++) { + if (baseline_times[i]) { + this_time = baseline_times[i]; + this_ctr = baseline_counts[i]; + + ctr_diff = ctr - this_ctr; + + if (current && ctr_diff < 0) { + /* + * The performance counter apparently went + * backwards without rolling over. It might + * be nice to complain but we don't want + * to do it repeatedly. + */ + ctr_diff = 0; + } + + this_time += (LONGLONG)PERF2HNS((double)ctr_diff); + + if (this_time > latest_time) { + latest_time = this_time; + best_index = i; + } + } + i = i ? (i - 1) : (BASELINES_TOT - 1); + } + + return latest_time; +} diff --git a/ports/winnt/ntpd/ntp_iocompletionport.c b/ports/winnt/ntpd/ntp_iocompletionport.c new file mode 100644 index 0000000..046d6cd --- /dev/null +++ b/ports/winnt/ntpd/ntp_iocompletionport.c @@ -0,0 +1,1639 @@ +/* +----------------------------------------------------------------------- +This is the IO completion port handling for async/overlapped IO on +Windows >= Win2000. + +Some notes on the implementation: + ++ Only one thread is used to serve the IO completion port, for several + reasons: + + * First, there seems to be (have been?) trouble that locked up NTPD + when more than one thread was used for IOCPL. + + * Second, for the sake of the time stamp interpolation the threads + must run on the same CPU as the time interpolation thread. This + makes using more than one thread useless, as they would compete for + the same core and create contention. + ++ Some IO operations need a possibly lengthy postprocessing. Emulating + the UN*X line discipline is currently the only but prominent example. + To avoid the processing in the time-critical IOCPL thread, longer + processing is offloaded the worker thread pool. + ++ A fact that seems not as well-known as it should be is that all + ressources passed to an overlapped IO operation must be considered + owned by the OS until the result has been fetched/dequeued. This + includes all overlapped structures and buffers involved, so cleaning + up on shutdown must be carefully constructed. (This includes closing + all the IO handles and waiting for the results to be dequeued. + 'CancleIo()' cannot be used since it's broken beyond repair.) + + If this is not possible, then all ressources should be dropped into + oblivion -- otherwise "bad things (tm)" are bound to happen. + + Using a private heap that is silently dropped but not deleted is a + good way to avoid cluttering memory stats with IO context related + objects. Leak tracing becomes more interesting, though. + + +The current implementation is based on the work of Danny Mayer who improved +the original implementation and Dave Hart who improved on the serial I/O +routines. The true roots of this file seem to be shrouded by the mist of time... + + +This version still provides the 'user space PPS' emulation +feature. + +Juergen Perlinger (perlinger@ntp.org) Feb 2012 + +----------------------------------------------------------------------- +*/ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#ifdef HAVE_IO_COMPLETION_PORT + +#include <stddef.h> +#include <stdio.h> +#include <process.h> +#include <syslog.h> + +#include "ntpd.h" +#include "ntp_machine.h" +#include "ntp_iocompletionport.h" +#include "ntp_request.h" +#include "ntp_assert.h" +#include "ntp_io.h" +#include "ntp_lists.h" + + +#define CONTAINEROF(p, type, member) \ + ((type *)((char *)(p) - offsetof(type, member))) + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 201) /* nonstd extension nameless union */ +#endif + +/* + * --------------------------------------------------------------------- + * storage type for PPS data (DCD change counts & times) + * --------------------------------------------------------------------- + */ +struct PpsData { + u_long cc_assert; + u_long cc_clear; + l_fp ts_assert; + l_fp ts_clear; +}; +typedef struct PpsData PPSData_t; + +struct PpsDataEx { + u_long cov_count; + PPSData_t data; +}; +typedef volatile struct PpsDataEx PPSDataEx_t; + +/* + * --------------------------------------------------------------------- + * device context; uses reference counting to avoid nasty surprises. + * Currently this stores only the PPS time stamps, but it could be + * easily extended. + * --------------------------------------------------------------------- + */ +#define PPS_QUEUE_LEN 8u /* must be power of two! */ +#define PPS_QUEUE_MSK (PPS_QUEUE_LEN-1) /* mask for easy MOD ops */ + +struct DeviceContext { + volatile long ref_count; + volatile u_long cov_count; + PPSData_t pps_data; + PPSDataEx_t pps_buff[PPS_QUEUE_LEN]; +}; + +typedef struct DeviceContext DevCtx_t; + +/* + * --------------------------------------------------------------------- + * I/O context structure + * + * This is an extended overlapped structure. Some fields are only used + * for serial I/O, others are used for all operations. The serial I/O is + * more interesting since the same context object is used for waiting, + * actual I/O and possibly offload processing in a worker thread until + * a complete operation cycle is done. + * + * In this case the I/O context is used to gather all the bits that are + * finally needed for the processing of the buffer. + * --------------------------------------------------------------------- + */ +//struct IoCtx; +typedef struct IoCtx IoCtx_t; +typedef struct refclockio RIO_t; + +typedef void (*IoCompleteFunc)(ULONG_PTR, IoCtx_t *); + +struct IoCtx { + OVERLAPPED ol; /* 'kernel' part of the context */ + union { + recvbuf_t * recv_buf; /* incoming -> buffer structure */ + void * trans_buf; /* outgoing -> char array */ + PPSData_t * pps_buf; /* for reading PPS seq/stamps */ + HANDLE ppswake; /* pps wakeup for attach */ + }; + IoCompleteFunc onIoDone; /* HL callback to execute */ + RIO_t * rio; /* RIO backlink (for offload) */ + DevCtx_t * devCtx; + l_fp DCDSTime; /* PPS-hack: time of DCD ON */ + l_fp FlagTime; /* timestamp of flag/event char */ + l_fp RecvTime; /* timestamp of callback */ + DWORD errCode; /* error code of last I/O */ + DWORD byteCount; /* byte count " */ + DWORD com_events; /* buffer for COM events */ + unsigned int flRawMem : 1; /* buffer is raw memory -> free */ + unsigned int flTsDCDS : 1; /* DCDSTime valid? */ + unsigned int flTsFlag : 1; /* FlagTime valid? */ +}; + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +/* + * local function definitions + */ +static void ntpd_addremove_semaphore(HANDLE, int); +static inline void set_serial_recv_time (recvbuf_t *, IoCtx_t *); + +/* Initiate/Request async IO operations */ +static BOOL QueueSerialWait (RIO_t *, recvbuf_t *, IoCtx_t *); +static BOOL QueueSerialRead (RIO_t *, recvbuf_t *, IoCtx_t *); +static BOOL QueueRawSerialRead(RIO_t *, recvbuf_t *, IoCtx_t *); +static BOOL QueueSocketRecv (SOCKET , recvbuf_t *, IoCtx_t *); + + +/* High-level IO callback functions */ +static void OnSocketRecv (ULONG_PTR, IoCtx_t *); +static void OnSerialWaitComplete (ULONG_PTR, IoCtx_t *); +static void OnSerialReadComplete (ULONG_PTR, IoCtx_t *); +static void OnRawSerialReadComplete(ULONG_PTR, IoCtx_t *); +static void OnSerialWriteComplete (ULONG_PTR, IoCtx_t *); + +/* worker pool offload functions */ +static DWORD WINAPI OnSerialReadWorker(void * ctx); + + +/* keep a list to traverse to free memory on debug builds */ +#ifdef DEBUG +static void free_io_completion_port_mem(void); +#endif + + + HANDLE WaitableExitEventHandle; + HANDLE WaitableIoEventHandle; +static HANDLE hIoCompletionPort; + +DWORD ActiveWaitHandles; +HANDLE WaitHandles[16]; + +/* + * ------------------------------------------------------------------- + * We make a pool of our own for IO context objects -- the are owned by + * the system until a completion result is pulled from the queue, and + * they seriously go into the way of memory tracking until we can safely + * cancel an IO request. + * ------------------------------------------------------------------- + */ +static HANDLE hHeapHandle; + +/* + * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * Create a new heap for IO context objects + */ +static void +IoCtxPoolInit( + size_t initObjs + ) +{ + hHeapHandle = HeapCreate(0, initObjs * sizeof(IoCtx_t), 0); + if (hHeapHandle == NULL) { + msyslog(LOG_ERR, "Can't initialize Heap: %m"); + exit(1); + } +} + +/* + * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * + * Delete the IO context heap + * + * Since we do not know what callbacks are pending, we just drop the + * pool into oblivion. New allocs and frees will fail from this moment, + * but we simply don't care. At least the normal heap dump stats will + * show no leaks from IO context blocks. On the downside, we have to + * track them ourselves if something goes wrong. + */ +static void +IoCtxPoolDone(void) +{ + hHeapHandle = NULL; +} + +/* + * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * Alloc & Free on local heap + * + * When the heap handle is NULL, these both will fail; Alloc with a NULL + * return and Free silently. + */ +static void * __fastcall +LocalPoolAlloc( + size_t size, + const char * desc +) +{ + void * ptr; + + /* Windows heaps can't grok zero byte allocation. + * We just get one byte. + */ + if (size == 0) + size = 1; + if (hHeapHandle != NULL) + ptr = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, size); + else + ptr = NULL; + DPRINTF(3, ("Allocate '%s', heap=%p, ptr=%p\n", + desc, hHeapHandle, ptr)); + + return ptr; +} + +static void __fastcall +LocalPoolFree( + void * ptr, + const char * desc + ) +{ + DPRINTF(3, ("Free '%s', heap=%p, ptr=%p\n", + desc, hHeapHandle, ptr)); + if (ptr != NULL && hHeapHandle != NULL) + HeapFree(hHeapHandle, 0, ptr); +} + +/* + * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * Alloc & Free of Device context + * + * When the heap handle is NULL, these both will fail; Alloc with a NULL + * return and Free silently. + */ +static DevCtx_t * __fastcall +DevCtxAlloc(void) +{ + DevCtx_t * devCtx; + u_long slot; + + /* allocate struct and tag all slots as invalid */ + devCtx = (DevCtx_t *)LocalPoolAlloc(sizeof(DevCtx_t), "DEV ctx"); + if (devCtx != NULL) + { + /* The initial COV values make sure there is no busy + * loop on unused/empty slots. + */ + devCtx->cov_count = 0; + for (slot = 0; slot < PPS_QUEUE_LEN; slot++) + devCtx->pps_buff[slot].cov_count = ~slot; + } + return devCtx; +} + +static void __fastcall +DevCtxFree( + DevCtx_t * devCtx + ) +{ + /* this would be the place to get rid of managed ressources. */ + LocalPoolFree(devCtx, "DEV ctx"); +} + +static DevCtx_t * __fastcall +DevCtxAttach( + DevCtx_t * devCtx + ) +{ + if (devCtx != NULL) + InterlockedIncrement(&devCtx->ref_count); + return devCtx; +} + +static void __fastcall +DevCtxDetach( + DevCtx_t * devCtx + ) +{ + if (devCtx && !InterlockedDecrement(&devCtx->ref_count)) + DevCtxFree(devCtx); +} + +/* + * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * Alloc & Free of I/O context + * + * When the heap handle is NULL, these both will fail; Alloc with a NULL + * return and Free silently. + */ +static IoCtx_t * __fastcall +IoCtxAlloc( + DevCtx_t * devCtx + ) +{ + IoCtx_t * ioCtx; + + ioCtx = (IoCtx_t *)LocalPoolAlloc(sizeof(IoCtx_t), "IO ctx"); + if (ioCtx != NULL) + ioCtx->devCtx = DevCtxAttach(devCtx); + return ioCtx; +} + +static void __fastcall +IoCtxFree( + IoCtx_t * ctx + ) +{ + if (ctx) + DevCtxDetach(ctx->devCtx); + LocalPoolFree(ctx, "IO ctx"); +} + +static void __fastcall +IoCtxReset( + IoCtx_t * ctx + ) +{ + RIO_t * rio; + DevCtx_t * dev; + if (ctx) { + rio = ctx->rio; + dev = ctx->devCtx; + ZERO(*ctx); + ctx->rio = rio; + ctx->devCtx = dev; + } +} + +/* + * ------------------------------------------------------------------- + * The IO completion thread and support functions + * + * There is only one completion thread, because it is locked to the same + * core as the time interpolation. Having more than one causes core + * contention and is not useful. + * ------------------------------------------------------------------- + */ +static HANDLE hIoCompletionThread; +static UINT tidCompletionThread; + +/* + * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * The IO completion worker thread + * + * Note that this thread does not enter an alertable wait state and that + * the only waiting point is the IO completion port. If stopping this + * thread with a special queued result packet does not work, + * 'TerminateThread()' is the only remaining weapon in the arsenal. A + * dangerous weapon -- it's like SIGKILL. + */ +static unsigned WINAPI +iocompletionthread(void *NotUsed) +{ + DWORD err; + DWORD octets; + ULONG_PTR key; + OVERLAPPED * pol; + IoCtx_t * lpo; + + UNUSED_ARG(NotUsed); + + /* + * Socket and refclock receive call gettimeofday() so the I/O + * thread needs to be on the same processor as the main and + * timing threads to ensure consistent QueryPerformanceCounter() + * results. + * + * This gets seriously into the way of efficient thread pooling + * on multicore systems. + */ + lock_thread_to_processor(GetCurrentThread()); + + /* + * Set the thread priority high enough so I/O will preempt + * normal recv packet processing, but not higher than the timer + * sync thread. + */ + if (!SetThreadPriority(GetCurrentThread(), + THREAD_PRIORITY_ABOVE_NORMAL)) + msyslog(LOG_ERR, "Can't set thread priority: %m"); + + for(;;) { + if (GetQueuedCompletionStatus( + hIoCompletionPort, + &octets, + &key, + &pol, + INFINITE)) { + err = ERROR_SUCCESS; + } else { + err = GetLastError(); + } + if (NULL == pol) { + DPRINTF(2, ("Overlapped IO Thread Exiting\n")); + break; /* fail */ + } + lpo = CONTAINEROF(pol, IoCtx_t, ol); + get_systime(&lpo->RecvTime); + lpo->byteCount = octets; + lpo->errCode = err; + handler_calls++; + (*lpo->onIoDone)(key, lpo); + } + + return 0; +} + +/* + * ------------------------------------------------------------------- + * Create/initialise the I/O creation port + */ +void +init_io_completion_port(void) +{ +#ifdef DEBUG + atexit(&free_io_completion_port_mem); +#endif + + /* Create the context pool first. */ + IoCtxPoolInit(20); + + /* Create the event used to signal an IO event */ + WaitableIoEventHandle = CreateEvent(NULL, FALSE, FALSE, NULL); + if (WaitableIoEventHandle == NULL) { + msyslog(LOG_ERR, "Can't create I/O event handle: %m"); + exit(1); + } + /* Create the event used to signal an exit event */ + WaitableExitEventHandle = CreateEvent(NULL, FALSE, FALSE, NULL); + if (WaitableExitEventHandle == NULL) { + msyslog(LOG_ERR, "Can't create exit event handle: %m"); + exit(1); + } + + /* Create the IO completion port */ + hIoCompletionPort = CreateIoCompletionPort( + INVALID_HANDLE_VALUE, NULL, 0, 0); + if (hIoCompletionPort == NULL) { + msyslog(LOG_ERR, "Can't create I/O completion port: %m"); + exit(1); + } + + /* Initialize the Wait Handles table */ + WaitHandles[0] = WaitableIoEventHandle; + WaitHandles[1] = WaitableExitEventHandle; /* exit request */ + WaitHandles[2] = WaitableTimerHandle; + ActiveWaitHandles = 3; + + /* + * Supply ntp_worker.c with function to add or remove a + * semaphore to the ntpd I/O loop which is signalled by a worker + * when a response is ready. The callback is invoked in the + * parent. + */ + addremove_io_semaphore = &ntpd_addremove_semaphore; + + /* + * Have one thread servicing I/O. See rationale in front matter. + */ + hIoCompletionThread = (HANDLE)_beginthreadex( + NULL, + 0, + iocompletionthread, + NULL, + 0, + &tidCompletionThread); +} + + +/* + * ------------------------------------------------------------------- + * completion port teardown + */ +void +uninit_io_completion_port( + void + ) +{ + DWORD rc; + + /* do noting if completion port already gone. */ + if (NULL == hIoCompletionPort) + return; + + /* + * Service thread seems running. Terminate him with grace + * first and force later... + */ + if (tidCompletionThread != GetCurrentThreadId()) { + PostQueuedCompletionStatus(hIoCompletionPort, 0, 0, 0); + rc = WaitForSingleObject(hIoCompletionThread, 5000); + if (rc == WAIT_TIMEOUT) { + /* Thread lost. Kill off with TerminateThread. */ + msyslog(LOG_ERR, + "IO completion thread refuses to terminate"); + TerminateThread(hIoCompletionThread, ~0UL); + } + } + + /* stop using the memory pool */ + IoCtxPoolDone(); + + /* now reap all handles... */ + CloseHandle(hIoCompletionThread); + hIoCompletionThread = NULL; + CloseHandle(hIoCompletionPort); + hIoCompletionPort = NULL; +} + + +/* + * ------------------------------------------------------------------- + * external worker thread support (wait handle stuff) + * + * !Attention! + * + * - This function must only be called from the main thread. Changing + * a set of wait handles while someone is waiting on it creates + * undefined behaviour. Also there's no provision for mutual + * exclusion when accessing global values. + * + * - It's not possible to register a handle that is already in the table. + */ +static void +ntpd_addremove_semaphore( + HANDLE sem, + int remove + ) +{ + DWORD hi; + + /* search for a matching entry first. */ + for (hi = 3; hi < ActiveWaitHandles; hi++) + if (sem == WaitHandles[hi]) + break; + + if (remove) { + /* + * If found, eventually swap with last entry to keep + * the table dense. + */ + if (hi < ActiveWaitHandles) { + ActiveWaitHandles--; + if (hi < ActiveWaitHandles) + WaitHandles[hi] = + WaitHandles[ActiveWaitHandles]; + WaitHandles[ActiveWaitHandles] = NULL; + } + } else { + /* + * Make sure the entry is not found and there is enough + * room, then append to the table array. + */ + if (hi >= ActiveWaitHandles) { + NTP_INSIST(ActiveWaitHandles < COUNTOF(WaitHandles)); + WaitHandles[ActiveWaitHandles] = sem; + ActiveWaitHandles++; + } + } +} + + +#ifdef DEBUG +static void +free_io_completion_port_mem( + void + ) +{ + /* + * At the moment, do absolutely nothing. Returning memory here + * requires NO PENDING OVERLAPPED OPERATIONS AT ALL at this + * point in time, and as long we cannot be reasonable sure about + * that the simple advice is: + * + * HANDS OFF! + */ +} +#endif /* DEBUG */ + + +/* + * ------------------------------------------------------------------- + * Serial IO stuff + * + * Prelude -- common error checking code + * ------------------------------------------------------------------- + */ +extern char * NTstrerror(int err, BOOL *bfreebuf); + +static BOOL +IoResultCheck( + DWORD err, + IoCtx_t * ctx, + const char * msg + ) +{ + char * msgbuf; + BOOL dynbuf; + + /* If the clock is not / no longer active, assume + * 'ERROR_OPERATION_ABORTED' and do the necessary cleanup. + */ + if (ctx->rio && !ctx->rio->active) + err = ERROR_OPERATION_ABORTED; + + switch (err) + { + /* The first ones are no real errors. */ + case ERROR_SUCCESS: /* all is good */ + case ERROR_IO_PENDING: /* callback pending */ + return TRUE; + + /* the next ones go silently -- only cleanup is done */ + case ERROR_INVALID_PARAMETER: /* handle already closed */ + case ERROR_OPERATION_ABORTED: /* handle closed while wait */ + break; + + + default: + /* + * We have to resort to the low level error formatting + * functions here, since the error code can be an + * overlapped result. Relying the value to be the same + * as the 'GetLastError()' result at this point of + * execution is shaky at best, and using SetLastError() + * to force it seems too nasty. + */ + msgbuf = NTstrerror(err, &dynbuf); + msyslog(LOG_ERR, "%s: err=%u, '%s'", msg, err, msgbuf); + if (dynbuf) + LocalFree(msgbuf); + break; + } + + /* If we end here, we have to mop up the buffer and context */ + if (ctx->flRawMem) { + if (ctx->trans_buf) + free(ctx->trans_buf); + } else { + if (ctx->recv_buf) + freerecvbuf(ctx->recv_buf); + } + IoCtxFree(ctx); + return FALSE; +} + +/* + * ------------------------------------------------------------------- + * Serial IO stuff + * + * Part 1 -- COMM event handling + * ------------------------------------------------------------------- + */ + +static BOOL +QueueSerialWait( + RIO_t * rio, + recvbuf_t * buff, + IoCtx_t * lpo + ) +{ + BOOL rc; + + lpo->onIoDone = OnSerialWaitComplete; + lpo->recv_buf = buff; + lpo->flRawMem = 0; + lpo->rio = rio; + buff->fd = rio->fd; + + rc = WaitCommEvent((HANDLE)_get_osfhandle(rio->fd), + &lpo->com_events, &lpo->ol); + if (!rc) + return IoResultCheck(GetLastError(), lpo, + "Can't wait on Refclock"); + return TRUE; +} + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + +static void +OnSerialWaitComplete( + ULONG_PTR key, + IoCtx_t * lpo + ) +{ + RIO_t * rio; + DevCtx_t * dev; + recvbuf_t * buff; + PPSDataEx_t * ppsbuf; + DWORD modem_status; + u_long covc; + + /* check and bail out if operation failed */ + if (!IoResultCheck(lpo->errCode, lpo, + "WaitCommEvent failed")) + return; + + /* get & validate context and buffer. */ + rio = (RIO_t *)key; + buff = lpo->recv_buf; + dev = lpo->devCtx; + + NTP_INSIST(rio == lpo->rio); + +#ifdef DEBUG + if (~(EV_RXFLAG | EV_RLSD | EV_RXCHAR) & lpo->com_events) { + msyslog(LOG_ERR, "WaitCommEvent returned unexpected mask %x", + lpo->com_events); + exit(-1); + } +#endif + /* + * Take note of changes on DCD; 'user mode PPS hack'. + * perlinger@ntp.org suggested a way of solving several problems with + * this code that makes a lot of sense: move to a putative + * dcdpps-ppsapi-provider.dll. + */ + if (EV_RLSD & lpo->com_events) { + modem_status = 0; + GetCommModemStatus((HANDLE)_get_osfhandle(rio->fd), + &modem_status); + + if (dev != NULL) { + /* PPS-context available -- use it! */ + if (MS_RLSD_ON & modem_status) { + dev->pps_data.cc_assert++; + dev->pps_data.ts_assert = lpo->RecvTime; + DPRINTF(2, ("upps-real: fd %d DCD PPS Rise at %s\n", rio->fd, + ulfptoa(&lpo->RecvTime, 6))); + } else { + dev->pps_data.cc_clear++; + dev->pps_data.ts_clear = lpo->RecvTime; + DPRINTF(2, ("upps-real: fd %d DCD PPS Fall at %s\n", rio->fd, + ulfptoa(&lpo->RecvTime, 6))); + } + /* + ** Update PPS buffer, writing from low to high, with index + ** update as last action. We use interlocked ops and a + ** volatile data destination to avoid reordering on compiler + ** and CPU level. The interlocked instruction act as full + ** barriers -- we need only release semantics, but we don't + ** have them before VS2010. + */ + covc = dev->cov_count + 1u; + ppsbuf = dev->pps_buff + (covc & PPS_QUEUE_MSK); + InterlockedExchange((PLONG)&ppsbuf->cov_count, covc); + ppsbuf->data = dev->pps_data; + InterlockedExchange((PLONG)&dev->cov_count, covc); + } + /* perlinger@ntp.org, 2012-11-19 + It can be argued that once you have the PPS API active, you can + disable the old pps hack. This would give a behaviour that's much + more like the behaviour under a UN*Xish OS. On the other hand, it + will give a nasty surprise for people which have until now happily + taken the pps hack for granted, and after the first complaint, I have + decided to keep the old implementation unconditionally. So here it is: + + /* backward compat: 'usermode-pps-hack' */ + if (MS_RLSD_ON & modem_status) { + lpo->DCDSTime = lpo->RecvTime; + lpo->flTsDCDS = 1; + DPRINTF(2, ("upps-hack: fd %d DCD PPS Rise at %s\n", rio->fd, + ulfptoa(&lpo->RecvTime, 6))); + } + } + + /* If IO ready, read data. Go back waiting else. */ + if (EV_RXFLAG & lpo->com_events) { /* line discipline */ + lpo->FlagTime = lpo->RecvTime; + lpo->flTsFlag = 1; + QueueSerialRead(rio, buff, lpo); + } else if (EV_RXCHAR & lpo->com_events) { /* raw discipline */ + lpo->FlagTime = lpo->RecvTime; + lpo->flTsFlag = 1; + QueueRawSerialRead(rio, buff, lpo); + } else { /* idle... */ + QueueSerialWait(rio, buff, lpo); + } +} + +/* + * ------------------------------------------------------------------- + * Serial IO stuff + * + * Part 2 -- line discipline emulation + * + * Ideally this should *not* be done in the IO completion thread. + * We use a worker pool thread to offload the low-level processing. + * ------------------------------------------------------------------- + */ + +/* + * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * Start & Queue a serial read for line discipline emulation. + */ +static BOOL +QueueSerialRead( + RIO_t * rio, + recvbuf_t * buff, + IoCtx_t * lpo + ) +{ + BOOL rc; + + lpo->onIoDone = &OnSerialReadComplete; + lpo->recv_buf = buff; + lpo->flRawMem = 0; + lpo->rio = rio; + buff->fd = rio->fd; + + rc = ReadFile((HANDLE)_get_osfhandle(rio->fd), + (char*)buff->recv_buffer + buff->recv_length, + sizeof(buff->recv_buffer) - buff->recv_length, + NULL, &lpo->ol); + if (!rc) + return IoResultCheck(GetLastError(), lpo, + "Can't read from Refclock"); + return TRUE; +} + +/* + * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * IO completion thread callback. Takes a time stamp and offloads the + * real work to the worker pool ASAP. + */ +static void +OnSerialReadComplete( + ULONG_PTR key, + IoCtx_t * lpo + ) +{ + RIO_t * rio; + recvbuf_t * buff; + + /* check and bail out if operation failed */ + if (!IoResultCheck(lpo->errCode, lpo, + "Read from Refclock failed")) + return; + + /* get & validate context and buffer. */ + rio = lpo->rio; + buff = lpo->recv_buf; + NTP_INSIST((ULONG_PTR)rio == key); + + /* Offload to worker pool */ + if (!QueueUserWorkItem(&OnSerialReadWorker, lpo, WT_EXECUTEDEFAULT)) { + msyslog(LOG_ERR, + "Can't offload to worker thread, will skip data: %m"); + IoCtxReset(lpo); + buff->recv_length = 0; + QueueSerialWait(rio, buff, lpo); + } +} + + +/* + * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * Worker pool offload function -- avoid lengthy operations in the IO + * completion thread (affects timing...) + * + * This function does the real work of emulating the UN*X line + * discipline. Since this involves allocation of additional buffers and + * string parsing/copying, it is offloaded to the worker thread pool so + * the IO completion thread can resume faster. + */ +static DWORD WINAPI +OnSerialReadWorker(void * ctx) +{ + IoCtx_t * lpo; + recvbuf_t * buff, *obuf; + RIO_t * rio; + char *sptr, *send, *dptr; + BOOL eol; + char ch; + + /* Get context back */ + lpo = (IoCtx_t*)ctx; + buff = lpo->recv_buf; + rio = lpo->rio; + /* + * ignore 0 bytes read due to closure on fd. + * Eat the first line of input as it's possibly partial. + */ + if (lpo->byteCount && rio->recvcount++) { + /* account for additional input */ + buff->recv_length += (int)lpo->byteCount; + + /* + * Now mimic the Unix line discipline. + */ + sptr = (char *)buff->recv_buffer; + send = sptr + buff->recv_length; + obuf = NULL; + dptr = NULL; + + /* hack #1: eat away leading CR/LF if here is any */ + while (sptr != send) { + ch = *sptr; + if (ch != '\n' && ch != '\r') + break; + sptr++; + } + + while (sptr != send) + { + /* get new buffer to store line */ + obuf = get_free_recv_buffer_alloc(); + obuf->fd = rio->fd; + obuf->receiver = &process_refclock_packet; + obuf->dstadr = NULL; + obuf->recv_peer = rio->srcclock; + set_serial_recv_time(obuf, lpo); + + /* + * Copy data to new buffer, convert CR to LF on + * the fly. Stop after either. + */ + dptr = (char *)obuf->recv_buffer; + eol = FALSE; + while (sptr != send && !eol) { + ch = *sptr++; + if ('\r' == ch) { + ch = '\n'; + } + *dptr++ = ch; + eol = ('\n' == ch); + } + obuf->recv_length = + (int)(dptr - (char *)obuf->recv_buffer); + + /* + * If NL found, push this buffer and prepare to + * get a new one. + */ + if (eol) { + add_full_recv_buffer(obuf); + SetEvent(WaitableIoEventHandle); + obuf = NULL; + } + } + + /* + * If we still have an output buffer, continue to fill + * it again. + */ + if (obuf) { + obuf->recv_length = + (int)(dptr - (char *)obuf->recv_buffer); + freerecvbuf(buff); + buff = obuf; + } else { + /* clear the current buffer, continue */ + buff->recv_length = 0; + } + } else { + buff->recv_length = 0; + } + + IoCtxReset(lpo); + QueueSerialWait(rio, buff, lpo); + return 0; +} + + +/* + * ------------------------------------------------------------------- + * Serial IO stuff + * + * Part 3 -- raw data input + * + * Raw data processing is fast enough to do without offloading to the + * worker pool, so this is rather short'n sweet... + * ------------------------------------------------------------------- + */ + +static BOOL +QueueRawSerialRead( + RIO_t * rio, + recvbuf_t * buff, + IoCtx_t * lpo + ) +{ + BOOL rc; + + lpo->onIoDone = OnRawSerialReadComplete; + lpo->recv_buf = buff; + lpo->flRawMem = 0; + lpo->rio = rio; + buff->fd = rio->fd; + + rc = ReadFile((HANDLE)_get_osfhandle(rio->fd), + buff->recv_buffer, + sizeof(buff->recv_buffer), + NULL, &lpo->ol); + if (!rc) + return IoResultCheck(GetLastError(), lpo, + "Can't read raw from Refclock"); + return TRUE; +} + + +static void +OnRawSerialReadComplete( + ULONG_PTR key, + IoCtx_t * lpo + ) +{ + RIO_t * rio; + recvbuf_t * buff; + + /* check and bail out if operation failed */ + if (!IoResultCheck(lpo->errCode, lpo, + "Raw read from Refclock failed")) + return; + + /* get & validate context and buffer. */ + rio = lpo->rio; + buff = lpo->recv_buf; + NTP_INSIST((ULONG_PTR)rio == key); + + /* ignore 0 bytes read. */ + if (lpo->byteCount > 0) { + buff->recv_length = (int)lpo->byteCount; + buff->dstadr = NULL; + buff->receiver = process_refclock_packet; + buff->recv_peer = rio->srcclock; + set_serial_recv_time(buff, lpo); + add_full_recv_buffer(buff); + SetEvent(WaitableIoEventHandle); + buff = get_free_recv_buffer_alloc(); + } + + buff->recv_length = 0; + QueueSerialWait(rio, buff, lpo); +} + + +static inline void +set_serial_recv_time( + recvbuf_t * obuf, + IoCtx_t * lpo + ) +{ + /* + * Time stamp assignment is interesting. If we + * have a DCD stamp, we use it, otherwise we use + * the FLAG char event time, and if that is also + * not / no longer available we use the arrival + * time. + */ + if (lpo->flTsDCDS) + obuf->recv_time = lpo->DCDSTime; + else if (lpo->flTsFlag) + obuf->recv_time = lpo->FlagTime; + else + obuf->recv_time = lpo->RecvTime; + + lpo->flTsDCDS = lpo->flTsFlag = 0; /* use only once... */ +} + + +/* + * ------------------------------------------------------------------- + * Serial IO stuff + * + * Part 4 -- Overlapped serial output + * + * Again, no need to offload any work. + * ------------------------------------------------------------------- + */ + +/* + * async_write, clone of write(), used by some reflock drivers + */ +int +async_write( + int fd, + const void * data, + unsigned int count + ) +{ + IoCtx_t * lpo; + BOOL rc; + + lpo = IoCtxAlloc(NULL); + if (lpo == NULL) { + DPRINTF(1, ("async_write: out of memory\n")); + errno = ENOMEM; + return -1; + } + + lpo->onIoDone = OnSerialWriteComplete; + lpo->trans_buf = emalloc(count); + lpo->flRawMem = 1; + memcpy(lpo->trans_buf, data, count); + + rc = WriteFile((HANDLE)_get_osfhandle(fd), + lpo->trans_buf, count, + NULL, &lpo->ol); + if (!rc && !IoResultCheck(GetLastError(), lpo, + "Can't write to Refclock")) { + errno = EBADF; + return -1; + } + return count; +} + +static void +OnSerialWriteComplete( + ULONG_PTR key, + IoCtx_t * lpo + ) +{ + /* set RIO and force silent cleanup if no error */ + lpo->rio = (RIO_t *)key; + if (ERROR_SUCCESS == lpo->errCode) + lpo->errCode = ERROR_OPERATION_ABORTED; + IoResultCheck(lpo->errCode, lpo, + "Write to Refclock failed"); +} + + +/* + * ------------------------------------------------------------------- + * Serial IO stuff + * + * Part 5 -- read PPS time stamps + * + * ------------------------------------------------------------------- + */ + +/* The dummy read procedure is used for getting the device context + * into the IO completion thread, using the IO completion queue for + * transport. There are other ways to accomplish what we need here, + * but using the IO machine is handy and avoids a lot of trouble. + */ +static void +OnPpsDummyRead( + ULONG_PTR key, + IoCtx_t * lpo + ) +{ + RIO_t * rio; + + rio = (RIO_t *)key; + lpo->devCtx = DevCtxAttach(rio->device_context); + SetEvent(lpo->ppswake); +} + +__declspec(dllexport) void* __stdcall +ntp_pps_attach_device( + HANDLE hndIo + ) +{ + IoCtx_t myIoCtx; + HANDLE myEvt; + DevCtx_t * dev; + DWORD rc; + + if (!isserialhandle(hndIo)) { + SetLastError(ERROR_INVALID_HANDLE); + return NULL; + } + + ZERO(myIoCtx); + dev = NULL; + myEvt = CreateEvent(NULL, FALSE, FALSE, NULL); + if (NULL == myEvt) + goto done; + + myIoCtx.ppswake = myEvt; + myIoCtx.onIoDone = OnPpsDummyRead; + rc = ReadFile(hndIo, &myIoCtx.byteCount, 0, + &myIoCtx.byteCount, &myIoCtx.ol); + if (!rc && (GetLastError() != ERROR_IO_PENDING)) + goto done; + if (WaitForSingleObject(myEvt, INFINITE) == WAIT_OBJECT_0) + if (NULL == (dev = myIoCtx.devCtx)) + SetLastError(ERROR_INVALID_HANDLE); +done: + rc = GetLastError(); + CloseHandle(myEvt); + SetLastError(rc); + return dev; +} + +__declspec(dllexport) void __stdcall +ntp_pps_detach_device( + DevCtx_t * dev + ) +{ + DevCtxDetach(dev); +} + +__declspec(dllexport) BOOL __stdcall +ntp_pps_read( + DevCtx_t * dev, + PPSData_t * data, + size_t dlen + ) +{ + u_long guard, covc; + int repc; + PPSDataEx_t * ppsbuf; + + + if (dev == NULL) { + SetLastError(ERROR_INVALID_HANDLE); + return FALSE; + } + if (data == NULL || dlen != sizeof(PPSData_t)) { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + /* + ** Reading from shared memory in a lock-free fashion can be + ** a bit tricky, since we have to read the components in the + ** opposite direction from the write, and the compiler must + ** not reorder the read sequence. + ** We use interlocked ops and a volatile data source to avoid + ** reordering on compiler and CPU level. The interlocked + ** instruction act as full barriers -- we need only aquire + ** semantics, but we don't have them before VS2010. + */ + repc = 3; + do { + InterlockedExchange((PLONG)&covc, dev->cov_count); + ppsbuf = dev->pps_buff + (covc & PPS_QUEUE_MSK); + *data = ppsbuf->data; + InterlockedExchange((PLONG)&guard, ppsbuf->cov_count); + guard ^= covc; + } while (guard && ~guard && --repc); + + if (guard) { + SetLastError(ERROR_INVALID_DATA); + return FALSE; + } + return TRUE; +} + +/* + * Add a reference clock data structures I/O handles to + * the I/O completion port. Return 1 if any error. + */ +int +io_completion_port_add_clock_io( + RIO_t *rio + ) +{ + IoCtx_t * lpo; + DevCtx_t * dev; + recvbuf_t * buff; + HANDLE h; + + h = (HANDLE)_get_osfhandle(rio->fd); + if (NULL == CreateIoCompletionPort( + h, + hIoCompletionPort, + (ULONG_PTR)rio, + 0)) { + msyslog(LOG_ERR, "Can't add COM port to i/o completion port: %m"); + return 1; + } + + dev = DevCtxAlloc(); + if (NULL == dev) { + msyslog(LOG_ERR, "Can't allocate device context for i/o completion port: %m"); + return 1; + } + rio->device_context = DevCtxAttach(dev); + lpo = IoCtxAlloc(dev); + if (NULL == lpo) { + msyslog(LOG_ERR, "Can't allocate heap for completion port: %m"); + return 1; + } + buff = get_free_recv_buffer_alloc(); + buff->recv_length = 0; + QueueSerialWait(rio, buff, lpo); + + return 0; +} + +void +io_completion_port_remove_clock_io( + RIO_t *rio + ) +{ + if (rio) + DevCtxDetach((DevCtx_t *)rio->device_context); +} + +/* + * Queue a receiver on a socket. Returns 0 if no buffer can be queued + * + * Note: As per the winsock documentation, we use WSARecvFrom. Using + * ReadFile() is less efficient. + */ +static BOOL +QueueSocketRecv( + SOCKET s, + recvbuf_t * buff, + IoCtx_t * lpo + ) +{ + WSABUF wsabuf; + DWORD Flags; + int rc; + + lpo->onIoDone = OnSocketRecv; + lpo->recv_buf = buff; + lpo->flRawMem = 0; + lpo->rio = NULL; + + Flags = 0; + buff->fd = s; + buff->recv_srcadr_len = sizeof(buff->recv_srcadr); + wsabuf.buf = (char *)buff->recv_buffer; + wsabuf.len = sizeof(buff->recv_buffer); + + rc = WSARecvFrom(buff->fd, &wsabuf, 1, NULL, &Flags, + &buff->recv_srcadr.sa, &buff->recv_srcadr_len, + &lpo->ol, NULL); + if (SOCKET_ERROR == rc) + return IoResultCheck(GetLastError(), lpo, + "Can't read from Socket"); + return TRUE; +} + + +static void +OnSocketRecv( + ULONG_PTR key, + IoCtx_t * lpo + ) +{ + recvbuf_t * buff; + recvbuf_t * newbuff; + struct interface * inter = (struct interface *)key; + + NTP_REQUIRE(NULL != lpo); + NTP_REQUIRE(NULL != lpo->recv_buf); + + /* check and bail out if operation failed */ + if (!IoResultCheck(lpo->errCode, lpo, + "Read from Socket failed")) + return; + + /* + * Convert the overlapped pointer back to a recvbuf pointer. + * Fetch items that are lost when the context is queued again. + */ + buff = lpo->recv_buf; + buff->recv_time = lpo->RecvTime; + buff->recv_length = (int)lpo->byteCount; + + /* + * Get a new recv buffer for the replacement socket receive + */ + newbuff = get_free_recv_buffer_alloc(); + if (NULL != newbuff) { + QueueSocketRecv(inter->fd, newbuff, lpo); + } else { + IoCtxFree(lpo); + msyslog(LOG_ERR, "Can't add I/O request to socket"); + } + DPRINTF(4, ("%sfd %d %s recv packet mode is %d\n", + (MODE_BROADCAST == get_packet_mode(buff)) + ? " **** Broadcast " + : "", + (int)buff->fd, stoa(&buff->recv_srcadr), + get_packet_mode(buff))); + + /* + * If we keep it add some info to the structure + */ + if (buff->recv_length && !inter->ignore_packets) { + NTP_INSIST(buff->recv_srcadr_len <= + sizeof(buff->recv_srcadr)); + buff->receiver = &receive; + buff->dstadr = inter; + packets_received++; + handler_pkts++; + inter->received++; + add_full_recv_buffer(buff); + + DPRINTF(2, ("Received %d bytes fd %d in buffer %p from %s\n", + buff->recv_length, (int)buff->fd, buff, + stoa(&buff->recv_srcadr))); + + /* + * Now signal we have something to process + */ + SetEvent(WaitableIoEventHandle); + } else + freerecvbuf(buff); +} + + +/* + * Add a socket handle to the I/O completion port, and send + * NTP_RECVS_PER_SOCKET recv requests to the kernel. + */ +int +io_completion_port_add_socket( + SOCKET fd, + struct interface * inter + ) +{ + IoCtx_t * lpo; + recvbuf_t * buff; + int n; + + if (fd != INVALID_SOCKET) { + if (NULL == CreateIoCompletionPort((HANDLE)fd, + hIoCompletionPort, (ULONG_PTR)inter, 0)) { + msyslog(LOG_ERR, + "Can't add socket to i/o completion port: %m"); + return 1; + } + } + + /* + * Windows 2000 bluescreens with bugcheck 0x76 + * PROCESS_HAS_LOCKED_PAGES at ntpd process + * termination when using more than one pending + * receive per socket. A runtime version test + * would allow using more on newer versions + * of Windows. + */ + +#define WINDOWS_RECVS_PER_SOCKET 1 + + for (n = 0; n < WINDOWS_RECVS_PER_SOCKET; n++) { + + buff = get_free_recv_buffer_alloc(); + lpo = IoCtxAlloc(NULL); + if (lpo == NULL) + { + msyslog(LOG_ERR + , "Can't allocate IO completion context: %m"); + return 1; + } + + QueueSocketRecv(fd, buff, lpo); + + } + return 0; +} + + +/* + * io_completion_port_sendto() -- sendto() replacement for Windows + * + * Returns len after successful send. + * Returns -1 for any error, with the error code available via + * msyslog() %m, or GetLastError(). + */ +int +io_completion_port_sendto( + int fd, + void * pkt, + size_t len, + sockaddr_u * dest + ) +{ + static u_long time_next_ifscan_after_error; + WSABUF wsabuf; + DWORD octets_sent; + DWORD Result; + int errval; + int AddrLen; + + wsabuf.buf = (void *)pkt; + wsabuf.len = len; + AddrLen = SOCKLEN(dest); + octets_sent = 0; + + Result = WSASendTo(fd, &wsabuf, 1, &octets_sent, 0, + &dest->sa, AddrLen, NULL, NULL); + errval = GetLastError(); + if (SOCKET_ERROR == Result) { + if (ERROR_UNEXP_NET_ERR == errval) { + /* + * We get this error when trying to send if the + * network interface is gone or has lost link. + * Rescan interfaces to catch on sooner, but no + * more often than once per minute. Once ntpd + * is able to detect changes without polling + * this should be unneccessary + */ + if (time_next_ifscan_after_error < current_time) { + time_next_ifscan_after_error = current_time + 60; + timer_interfacetimeout(current_time); + } + DPRINTF(4, ("sendto unexpected network error, interface may be down\n")); + } else { + msyslog(LOG_ERR, "WSASendTo(%s) error %m", + stoa(dest)); + } + SetLastError(errval); + return -1; + } + + if (len != (int)octets_sent) { + msyslog(LOG_ERR, "WSASendTo(%s) sent %u of %d octets", + stoa(dest), octets_sent, len); + SetLastError(ERROR_BAD_LENGTH); + return -1; + } + + DPRINTF(4, ("sendto %s %d octets\n", stoa(dest), len)); + + return len; +} + + + +/* + * GetReceivedBuffers + * Note that this is in effect the main loop for processing requests + * both send and receive. This should be reimplemented + */ +int +GetReceivedBuffers() +{ + DWORD index; + HANDLE ready; + int have_packet; + + have_packet = FALSE; + while (!have_packet) { + index = WaitForMultipleObjects(ActiveWaitHandles, + WaitHandles, FALSE, + INFINITE); + switch (index) { + + case WAIT_OBJECT_0 + 0: /* Io event */ + DPRINTF(4, ("IoEvent occurred\n")); + have_packet = TRUE; + break; + + case WAIT_OBJECT_0 + 1: /* exit request */ + exit(0); + break; + + case WAIT_OBJECT_0 + 2: /* timer */ + timer(); + break; + + case WAIT_IO_COMPLETION: /* loop */ + break; + + case WAIT_TIMEOUT: + msyslog(LOG_ERR, + "WaitForMultipleObjects INFINITE timed out."); + exit(1); + break; + + case WAIT_FAILED: + msyslog(LOG_ERR, + "WaitForMultipleObjects Failed: Error: %m"); + exit(1); + break; + + default: + DEBUG_INSIST((index - WAIT_OBJECT_0) < + ActiveWaitHandles); + ready = WaitHandles[index - WAIT_OBJECT_0]; + handle_blocking_resp_sem(ready); + break; + + } /* switch */ + } + + return (full_recvbuffs()); /* get received buffers */ +} + +#else + static int NonEmptyCompilationUnit; +#endif + diff --git a/ports/winnt/ntpd/ntservice.c b/ports/winnt/ntpd/ntservice.c new file mode 100644 index 0000000..f0840bd --- /dev/null +++ b/ports/winnt/ntpd/ntservice.c @@ -0,0 +1,321 @@ +/* + * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 1999-2002 Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> + +#include <ntp_stdlib.h> +#include "syslog.h" +#include "ntpd.h" +#include "ntservice.h" +#include "clockstuff.h" +#include "ntp_iocompletionport.h" +#include "ntpd-opts.h" +#include "isc/win32os.h" +#include <ssl_applink.c> + + +/* + * Globals + */ +static SERVICE_STATUS_HANDLE hServiceStatus = 0; +static BOOL foreground = FALSE; +static BOOL computer_shutting_down = FALSE; +static int glb_argc; +static char **glb_argv; +HANDLE hServDoneEvent = NULL; +extern int accept_wildcard_if_for_winnt; + +/* + * Forward declarations + */ +void uninit_io_completion_port(); +int ntpdmain(int argc, char *argv[]); +void WINAPI ServiceControl(DWORD dwCtrlCode); +void ntservice_exit(void); + +#ifdef WRAP_DBG_MALLOC +void *wrap_dbg_malloc(size_t s, const char *f, int l); +void *wrap_dbg_realloc(void *p, size_t s, const char *f, int l); +void wrap_dbg_free(void *p); +#endif + +void WINAPI +service_main( + DWORD argc, + LPTSTR *argv + ) +{ + if (argc > 1) { + /* + * Let command line parameters from the Windows SCM GUI + * override the standard parameters from the ImagePath registry key. + */ + glb_argc = argc; + glb_argv = argv; + } + + ntpdmain(glb_argc, glb_argv); +} + + +/* + * This is the entry point for the executable + * We can call ntpdmain() explicitly or via StartServiceCtrlDispatcher() + * as we need to. + */ +int main( + int argc, + char ** argv + ) +{ + int rc; + int argc_after_opts; + char ** argv_after_opts; + + ssl_applink(); + + /* Save the command line parameters */ + glb_argc = argc; + glb_argv = argv; + + /* Under original Windows NT we must not discard the wildcard */ + /* socket to workaround a bug in NT's getsockname(). */ + if (isc_win32os_majorversion() <= 4) + accept_wildcard_if_for_winnt = TRUE; + + argc_after_opts = argc; + argv_after_opts = argv; + parse_cmdline_opts(&argc_after_opts, &argv_after_opts); + + if (HAVE_OPT(QUIT) + || HAVE_OPT(SAVECONFIGQUIT) + || HAVE_OPT(HELP) +#ifdef DEBUG + || OPT_VALUE_SET_DEBUG_LEVEL != 0 +#endif + || HAVE_OPT(NOFORK)) + foreground = TRUE; + + if (foreground) /* run in console window */ + rc = ntpdmain(argc, argv); + else { + /* Start up as service */ + + SERVICE_TABLE_ENTRY dispatchTable[] = { + { TEXT(NTP_DISPLAY_NAME), service_main }, + { NULL, NULL } + }; + + rc = StartServiceCtrlDispatcher(dispatchTable); + if (rc) + rc = 0; + else { + rc = GetLastError(); + fprintf(stderr, + "%s: unable to start as service:\n" + "%s\n" + "Use -d, -q, -n, -?, --help or " + "--saveconfigquit to run " + "interactive.\n", + argv[0], ntp_strerror(rc)); + } + } + return rc; +} + + +/* + * Initialize the Service by registering it. + */ +void +ntservice_init() { + char ConsoleTitle[256]; + + if (!foreground) { + /* Register handler with the SCM */ + hServiceStatus = RegisterServiceCtrlHandler(NTP_DISPLAY_NAME, + ServiceControl); + if (!hServiceStatus) { + NTReportError(NTP_SERVICE_NAME, + "could not register service control handler"); + exit(1); + } + UpdateSCM(SERVICE_RUNNING); + } else { + snprintf(ConsoleTitle, sizeof(ConsoleTitle), + "NTP Version %s", Version); + ConsoleTitle[sizeof(ConsoleTitle) - 1] = '\0'; + SetConsoleTitle(ConsoleTitle); + } + +#ifdef _CRTDBG_MAP_ALLOC + /* ask the runtime to dump memory leaks at exit */ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF + | _CRTDBG_LEAK_CHECK_DF /* report on leaks at exit */ + | _CRTDBG_CHECK_ALWAYS_DF /* Check heap every alloc/dealloc */ +#ifdef MALLOC_LINT + | _CRTDBG_DELAY_FREE_MEM_DF /* Don't actually free memory */ +#endif + ); +#ifdef DOES_NOT_WORK + /* + * hart: I haven't seen this work, running ntpd.exe -n from a shell + * to both a file and the debugger output window. Docs indicate it + * should cause leak report to go to stderr, but it's only seen if + * ntpd runs under a debugger (in the debugger's output), even with + * this block of code enabled. + */ + _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR); + _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG); +#endif +#endif /* using MS debug C runtime heap, _CRTDBG_MAP_ALLOC */ + + atexit( ntservice_exit ); +} + + +/* + * Routine to check if the service is stopping + * because the computer is shutting down + */ +BOOL +ntservice_systemisshuttingdown() { + return computer_shutting_down; +} + +void +ntservice_exit( void ) +{ + uninit_io_completion_port(); + Sleep( 200 ); //##++ + + reset_winnt_time(); + + msyslog(LOG_INFO, "ntservice: The Network Time Protocol Service is stopping."); + + if (!foreground) { + /* service mode, need to have the service_main routine + * register with the service control manager that the + * service has stopped running, before exiting + */ + UpdateSCM(SERVICE_STOPPED); + } +} + +/* + * ServiceControl(): Handles requests from the SCM and passes them on + * to the service. + */ +void WINAPI +ServiceControl( + DWORD dwCtrlCode + ) +{ + switch (dwCtrlCode) { + + case SERVICE_CONTROL_SHUTDOWN: + computer_shutting_down = TRUE; + /* fall through to stop case */ + + case SERVICE_CONTROL_STOP: + if (WaitableExitEventHandle != NULL) { + SetEvent(WaitableExitEventHandle); + UpdateSCM(SERVICE_STOP_PENDING); + Sleep(100); //##++ + } + return; + + case SERVICE_CONTROL_PAUSE: + case SERVICE_CONTROL_CONTINUE: + case SERVICE_CONTROL_INTERROGATE: + default: + break; + } + UpdateSCM(SERVICE_RUNNING); +} + +/* + * Tell the Service Control Manager the state of the service. + */ +void UpdateSCM(DWORD state) { + SERVICE_STATUS ss; + static DWORD dwState = SERVICE_STOPPED; + + if (hServiceStatus) { + if (state) + dwState = state; + + ZERO(ss); + ss.dwServiceType |= SERVICE_WIN32_OWN_PROCESS; + ss.dwCurrentState = dwState; + ss.dwControlsAccepted = SERVICE_ACCEPT_STOP | + SERVICE_ACCEPT_SHUTDOWN; + ss.dwCheckPoint = 0; + ss.dwServiceSpecificExitCode = 0; + ss.dwWin32ExitCode = NO_ERROR; + ss.dwWaitHint = dwState == SERVICE_STOP_PENDING ? 5000 : 1000; + + SetServiceStatus(hServiceStatus, &ss); + } +} + +BOOL WINAPI +OnConsoleEvent( + DWORD dwCtrlType + ) +{ + switch (dwCtrlType) { +#ifdef DEBUG + case CTRL_BREAK_EVENT: + if (debug > 0) { + debug <<= 1; + } + else { + debug = 1; + } + if (debug > 8) { + debug = 0; + } + msyslog(LOG_DEBUG, "debug level %d", debug); + break; +#else + case CTRL_BREAK_EVENT: + break; +#endif + + case CTRL_C_EVENT: + case CTRL_CLOSE_EVENT: + case CTRL_SHUTDOWN_EVENT: + if (WaitableExitEventHandle != NULL) { + SetEvent(WaitableExitEventHandle); + Sleep(100); //##++ + } + break; + + default : + /* pass to next handler */ + return FALSE; + } + + /* we've handled it, no more handlers should be called */ + return TRUE; +} + |