#!/usr/bin/env python
#
# Public Domain 2014-2019 MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#

# latency_metric.py
# Print latency metrics for workgen runs that generate monitor.json
import json, sys
from datetime import datetime

# A 'safe' divide shown as a string.
def divide(a, b):
    if b == 0.0:
        if a == 0.0:
            return '0.000  (no entries)'
        else:
            return 'infinite  (divide by zero)'
    else:
        return '%.3f' % (float(a) / float(b))

def value_as_str(self):
    return '%.3f' % self.latency_average()

# A collection of statastics that are related to a specific condition
# during the run, for example during checkpoints or not during checkpoints.
class Digest:
    def __init__(self):
        self.entries = 0
        self.ops = 0
        self.lat = 0
        self.lat_99 = 0
        self.lat_99_raw = 0
        self.lat_max = 0
        self.secs = 0.0

    def entry(self, secs, ops, lat, lat_99, lat_max):
        self.secs += secs
        self.ops += ops
        self.lat += lat * ops
        self.lat_99_raw += lat_99
        self.lat_99 += lat_99 * ops
        if lat_max > self.lat_max:
            self.lat_max = lat_max
        self.entries += 1

    def time_secs(self):
        return self.secs

    def latency_99_raw_average(self):
        return float(self.lat_99_raw) / float(self.entries)

    def latency_max(self):
        return self.lat_max

    def latency_average(self):
        return float(self.lat) / float(self.ops)

    def dump(self, prefix):
        print(prefix + 'json entries: ' + str(self.entries))
        print(prefix + 'operations: ' + str(self.ops))
        print(prefix + 'total latency us: ' + str(self.lat))
        print(prefix + 'latency 99% us, weighted sum: ' + str(self.lat_99))
        print(prefix + 'latency 99% us, raw sum: ' + str(self.lat_99_raw))
        print(prefix + 'latency max us: ' + str(self.lat_max))
        print(prefix + 'elapsed secs: ' + str(self.secs))

class Metric:
    def __init__(self, name, desc):
        self.name = name
        self.desc = desc
        self.value = 0.0

    def set_value(self, value):
        self.value = value

# A set of latency metrics collected for a single run.
# A user will see the short name, and if the script is run with the '--raw'
# option, the elaborated description will also be shown.
class FileMetrics:
    def __init__(self, filename):
        self.filename = filename
        all = []

        # This is the average latency for all read operations.
        # Lower is better.
        self.latency_avg = m = Metric('Average latency reads us',
            'total latency of read ops divided by operations')
        all.append(m)

        # This is the maximum latency over all read operations.
        # Lower is better.
        self.latency_max = m = Metric('Max latency reads us',
            'maximum of all read op latencies')
        all.append(m)

        # This is the ratio of the two previously reported numbers, latency_max
        # and latency_avg.
        #
        # Lower is better (best is 1.0), it means more predictable response
        # times.
        self.ratio_max_avg = m = Metric('Max vs average latency',
            'ratio of maximum latency to average latency for read ops')
        all.append(m)

        # This is the ratio of 99% latency reads, checkpoint vs normal.
        # That is, for all 1-second intervals that occur during a checkpoint,
        # we take the 99 percentile latency for read operations, and average
        # them, weighted by how many read operations were performed in each
        # interval.  We do the same for all 1-second intervals that occur
        # outside of a checkpoint, and finally take the ratio between these
        # two numbers.  This is a more sophisticated measure of the smoothness
        # of overall response times, looking at all latencies, rather than
        # focusing on the one worst latency.
        #
        # Lower is better (best is 1.0), it means more predictable response
        # times.
        self.ratio_latency_99 = m = Metric('Checkpoint vs normal 99%',
            'ratio of the average of all 99% latency operations, ' +
            'during checkpoint times vs normal')
        all.append(m)

        # The proportion of time spent in a checkpoint
        self.proportion_checkpoint_time = m = Metric('Proportion of ckpt time',
            'the proportion of time doing checkpoints')
        all.append(m)
        self.all_metrics = all
        self.read_normal = None
        self.read_ckpt = None
        self.read_all = None

    def calculate(self):
        with open(self.filename) as f:
            lines = f.readlines()
            s = '{ "ts" : [' + ','.join(lines) + '] }'
            json_data = json.loads(s)
            self.calculate_using_json(json_data)

    def calculate_using_json(self, json_data):
        ckpt_in_progress = False
        ckpt_count = 0
        prev_dt = None
        # We digest the latency stats during 'normal' (non-checkpoint) times
        # and also during checkpoint times.
        self.read_normal = Digest()
        self.read_ckpt = Digest()
        self.read_all = Digest()
        for entry in json_data['ts']:
            time_s = entry['localTime']
            dt = datetime.strptime(time_s, '%Y-%m-%dT%H:%M:%S.%fZ')
            is_ckpt = entry['workgen']['checkpoint']['active'] > 0
            if not ckpt_in_progress and is_ckpt:
                ckpt_count += 1
            ckpt_in_progress = is_ckpt

            # In the first entry, we don't have an elapsed time, so
            # we'll ignore its data.
            if prev_dt != None:
                timediff = dt - prev_dt
                seconds = timediff.total_seconds()
                if seconds <= 0.0:
                    raise Exception('invalid time span between entries')
                if is_ckpt:
                    digest = self.read_ckpt
                else:
                    digest = self.read_normal
                rentry = entry['workgen']['read']
                ops = rentry['ops per sec'] * seconds
                lat_avg = rentry['average latency']
                lat_99 = rentry['99% latency']
                lat_max = rentry['max latency']
                digest.entry(seconds, ops, lat_avg, lat_99, lat_max)
                self.read_all.entry(seconds, ops, lat_avg, lat_99, lat_max)
            prev_dt = dt
        if self.read_all.time_secs() == 0.0:
            raise(Exception(self.filename +
                ': no entries, or no time elapsed'))
        if self.read_normal.entries == 0 or self.read_normal.ops == 0:
            raise(Exception(self.filename +
                ': no operations or entries during non-checkpoint time period'))
        if self.read_ckpt.entries == 0 or self.read_ckpt.ops == 0:
            raise(Exception(self.filename +
                ': no operations or entries during checkpoint'))
        if ckpt_count < 2:
            raise(Exception(self.filename +
                ': need at least 2 checkpoints started'))

        self.latency_avg.set_value(self.read_all.latency_average())
        self.latency_max.set_value(self.read_all.latency_max())
        self.ratio_max_avg.set_value(
            float(self.read_all.latency_max()) /
            float(self.read_all.latency_average()))
        self.ratio_latency_99.set_value(
            self.read_ckpt.latency_99_raw_average() /
            self.read_normal.latency_99_raw_average())
        self.proportion_checkpoint_time.set_value(
            self.read_ckpt.time_secs() /
            self.read_all.time_secs())

def table_line(leftcol, cols, spacer):
    return leftcol + spacer + spacer.join(cols) + '\n'

def make_len(value, l):
    return ('%%%ds' % l) % str(value)

def value_format(value):
    return '%.3f' % value

fmlist = []
raw = False
for arg in sys.argv[1:]:
    if arg == '--raw':
        raw = True
    else:
        fm = FileMetrics(arg)
        fm.calculate()
        fmlist.append(fm)

leftlen = 25
collen = 20
filecount = len(fmlist)
dashes = '-' * leftlen, []
if filecount == 0:
    print('Usage: python latency_metric.py [ --raw ] file.json...')
    print('  input files are typically monitor.json files produced by workgen')
else:
    out = ''
    cols = [make_len(fm.filename, collen) for fm in fmlist]
    out += table_line(' ' * leftlen, cols, ' | ')
    cols = ['-' * collen for fm in fmlist]
    out += table_line('-' * leftlen, cols, '-+-')
    pos = 0
    for m in fmlist[0].all_metrics:
        cols = [make_len(value_format(fm.all_metrics[pos].value), collen) \
            for fm in fmlist]
        out += table_line(make_len(m.name, leftlen), cols, ' | ')
        pos += 1

    print(out)

if raw:
    for fm in fmlist:
        print('file: ' + fm.filename)
        print('  digested metrics collected for reads during non-checkpoints:')
        fm.read_normal.dump('    ')
        print('  digested metrics collected for reads during checkpoints:')
        fm.read_ckpt.dump('    ')
        print('')
        for m in fm.all_metrics:
            print('  ' + m.name + ' (' + m.desc + '): ' + str(m.value))
        print('\nSee ' + __file__ +
            ' for a more detailed description of each metric.')