summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/tools
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2019-05-14 17:09:46 +1000
committerLuke Chen <luke.chen@mongodb.com>2019-05-14 17:09:46 +1000
commit4686f07b4b51cb34156275c44fc1138b950f56dc (patch)
treed28fe8413ce82397bbdedf40025801aed2d8d7a4 /src/third_party/wiredtiger/tools
parentba8a1d5c3f912de150234e7513dedd2b083444ed (diff)
downloadmongo-4686f07b4b51cb34156275c44fc1138b950f56dc.tar.gz
Import wiredtiger: a3325482022bc77846ffc0265097edfebccaadd3 from branch mongodb-4.2
ref: 315563f288..a332548202 for: 4.1.12 WT-4747 Python3: change optrack scripts to run under Python3 WT-4779 Disable new assertion related to timestamp consistency
Diffstat (limited to 'src/third_party/wiredtiger/tools')
-rwxr-xr-xsrc/third_party/wiredtiger/tools/optrack/find-latency-spikes.py99
-rwxr-xr-xsrc/third_party/wiredtiger/tools/optrack/optrack_to_t2.py12
-rwxr-xr-xsrc/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py19
3 files changed, 71 insertions, 59 deletions
diff --git a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
index 93eed8fea8b..ca2b2d814a8 100755
--- a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
+++ b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
@@ -25,7 +25,6 @@
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
-#!/usr/bin/env python
import argparse
from bokeh.layouts import column
@@ -35,14 +34,12 @@ from bokeh.models import NumeralTickFormatter, OpenURL, Range1d, TapTool
from bokeh.models.annotations import Label
from bokeh.plotting import figure, output_file, reset_output, save, show
from bokeh.resources import CDN
-import matplotlib
from multiprocessing import Process, Queue, Array
import multiprocessing
import numpy as np
import os
import pandas as pd
import sys
-import statvfs
import traceback
import time
@@ -120,21 +117,29 @@ timeUnitString = "nanoseconds";
PERCENTILE = 0.999;
def initColorList():
+ def hex2(n):
+ return hex(n)[2:].zfill(2) # two digit hex string
global colorList;
- colorList = matplotlib.colors.cnames.keys();
-
- for color in colorList:
- # Some browsers break if you try to give them 'sage'
- if (color == "sage"):
- colorList.remove(color);
- # We reserve red to highlight occurrences of functions
- # that exceeded the user-defined latency threshold. Do
- # not use red for regular function colors.
- #
- elif (color == "red"):
- colorList.remove(color);
+ colorList = []
+ # Create a palette of 100 colors that are spaced around the RGB
+ # color wheel. We advance R,G,B independently at different rates.
+ r = g = b = 0
+ while len(colorList) < 100:
+ r = (r + 87) % 256
+ g = (g + 153) % 256
+ b = (b + 61) % 256
+ # If the color is too close to red or white/grey/black, or
+ # too light, we'll choose a different color. We reserve red
+ # to highlight occurrences of functions that exceeded the
+ # user-defined latency threshold. And white/grey/black and
+ # light colors do not stand out enough.
+ if (r > 128 and g < 20 and b < 20) or \
+ (abs(r - g) < 20 and abs(g - b) < 20 and abs(r - b) < 20) or \
+ (r > 246 and g > 246 and b > 246):
+ g = (g + 100) % 256
+ colorList.append('#' + hex2(r) + hex2(g) + hex2(b))
#
# Each unique function name gets a unique color.
@@ -147,7 +152,7 @@ def getColorForFunction(function):
global lastColorUsed;
global funcToColor;
- if not funcToColor.has_key(function):
+ if not function in funcToColor:
funcToColor[function] = colorList[lastColorUsed % len(colorList)];
lastColorUsed += 1;
@@ -225,14 +230,14 @@ def plotOutlierHistogram(dataframe, maxOutliers, func,
y_axis_label = "Number of outliers",
tools = TOOLS, toolbar_location="above");
- y_ticker_max = p.plot_height / pixelsPerHeightUnit;
- y_ticker_step = max(1, (maxOutliers + 1)/y_ticker_max);
- y_upper_bound = (maxOutliers / y_ticker_step + 2) * y_ticker_step;
+ y_ticker_max = p.plot_height // pixelsPerHeightUnit;
+ y_ticker_step = max(1, (maxOutliers + 1)//y_ticker_max);
+ y_upper_bound = (maxOutliers // y_ticker_step + 2) * y_ticker_step;
p.yaxis.ticker = FixedTicker(ticks =
- range(0, y_upper_bound, y_ticker_step));
+ list(range(0, y_upper_bound, y_ticker_step)));
p.ygrid.ticker = FixedTicker(ticks =
- range(0, y_upper_bound, y_ticker_step));
+ list(range(0, y_upper_bound, y_ticker_step)));
p.xaxis.formatter = NumeralTickFormatter(format="0,");
p.y_range = Range1d(0, y_upper_bound);
@@ -284,7 +289,7 @@ def normalizeIntervalData():
print(color.BLUE + color.BOLD + "Normalizing data..." + color.END);
- for file, df in perFileDataFrame.iteritems():
+ for file, df in perFileDataFrame.items():
df['origstart'] = df['start'];
df['start'] = df['start'] - firstTimeStamp;
df['end'] = df['end'] - firstTimeStamp;
@@ -531,8 +536,8 @@ def generateBucketChartForFile(figureName, dataframe, y_max, x_min, x_max):
p.yaxis.major_tick_line_color = None;
p.yaxis.minor_tick_line_color = None;
p.yaxis.major_label_text_font_size = '0pt';
- p.yaxis.ticker = FixedTicker(ticks = range(0, y_max+1));
- p.ygrid.ticker = FixedTicker(ticks = range(0, y_max+1));
+ p.yaxis.ticker = FixedTicker(ticks = list(range(0, y_max+1)));
+ p.ygrid.ticker = FixedTicker(ticks = list(range(0, y_max+1)));
p.xaxis.formatter = NumeralTickFormatter(format="0,");
p.title.text_font_style = "bold";
@@ -541,7 +546,7 @@ def generateBucketChartForFile(figureName, dataframe, y_max, x_min, x_max):
top = 'stackdepthNext', color = 'color', line_color = "lightgrey",
line_width = 0.5, source=cds);
- for func, fColor in funcToColor.iteritems():
+ for func, fColor in funcToColor.items():
# If this function is not present in this dataframe,
# we don't care about it.
@@ -555,7 +560,7 @@ def generateBucketChartForFile(figureName, dataframe, y_max, x_min, x_max):
# add it again to avoid redundancy in the charts and
# in order not to waste space.
#
- if (colorAlreadyUsedInLegend.has_key(fColor)):
+ if fColor in colorAlreadyUsedInLegend:
continue;
else:
colorAlreadyUsedInLegend[fColor] = True;
@@ -676,7 +681,7 @@ def generateCrossFilePlotsForBucket(i, lowerBound, upperBound, navigatorDF,
save(column(figuresForAllFiles), filename = fileName,
title=intervalTitle, resources=CDN);
- retFilename.value = fileName;
+ retFilename.value = fileName.encode();
# Generate a plot that shows a view of the entire timeline in a form of
@@ -718,8 +723,8 @@ def generateNavigatorFigure(dataframe, i, title):
p.yaxis.major_tick_line_color = None;
p.yaxis.minor_tick_line_color = None;
p.yaxis.major_label_text_font_size = '0pt';
- p.yaxis.ticker = FixedTicker(ticks = range(0, 1));
- p.ygrid.ticker = FixedTicker(ticks = range(0, 1));
+ p.yaxis.ticker = FixedTicker(ticks = list(range(0, 1)));
+ p.ygrid.ticker = FixedTicker(ticks = list(range(0, 1)));
p.xaxis.formatter = NumeralTickFormatter(format="0,");
@@ -780,7 +785,8 @@ def createIntervalNavigatorDF(numBuckets, timeUnitsPerBucket):
def waitOnOneProcess(runningProcesses):
success = False;
- for i, p in runningProcesses.items():
+ # Use a copy since we will be deleting entries from the original
+ for i, p in runningProcesses.copy().items():
if (not p.is_alive()):
del runningProcesses[i];
success = True;
@@ -797,7 +803,7 @@ def updatePercentComplete(runnableProcesses, runningProcesses,
percentComplete = float(totalWorkItems - len(runnableProcesses) \
- len(runningProcesses)) / float(totalWorkItems) * 100;
- print(color.BLUE + color.BOLD + " " + workName),
+ sys.stdout.write(color.BLUE + color.BOLD + "... " + workName);
sys.stdout.write(" %d%% complete \r" % (percentComplete) );
sys.stdout.flush();
@@ -817,8 +823,8 @@ def generateTSSlicesForBuckets():
returnValues = {};
spawnedProcesses = {};
- numBuckets = plotWidth / pixelsPerWidthUnit;
- timeUnitsPerBucket = (lastTimeStamp - firstTimeStamp) / numBuckets;
+ numBuckets = plotWidth // pixelsPerWidthUnit;
+ timeUnitsPerBucket = (lastTimeStamp - firstTimeStamp) // numBuckets;
navigatorDF = createIntervalNavigatorDF(numBuckets, timeUnitsPerBucket);
@@ -827,7 +833,7 @@ def generateTSSlicesForBuckets():
+ color.END);
for i in range(numBuckets):
- retFilename = Array('c', os.statvfs('/')[statvfs.F_NAMEMAX]);
+ retFilename = Array('c', os.statvfs('/').f_namemax)
lowerBound = i * timeUnitsPerBucket;
upperBound = (i+1) * timeUnitsPerBucket;
@@ -914,7 +920,7 @@ def checkForTimestampAndGetRowSkip(fname):
if (len(words) == 1):
try:
- perFileTimeStamps[fname] = long(words[0]);
+ perFileTimeStamps[fname] = int(words[0]);
except ValueError:
print(color.BOLD + color.RED +
"Could not parse seconds since Epoch on first line" +
@@ -952,7 +958,7 @@ def processFile(fname, dumpCleanDataBool):
funcDF = iDF.loc[lambda iDF: iDF.function == func, :];
funcDF = funcDF.drop(columns = ['function']);
- if (not perFuncDF.has_key(func)):
+ if not func in perFuncDF:
perFuncDF[func] = funcDF;
else:
perFuncDF[func] = pd.concat([perFuncDF[func], funcDF]);
@@ -974,7 +980,7 @@ def createOutlierHistogramForFunction(func, funcDF, bucketFilenames):
statisticalOutlierThreshold = 0;
statisticalOutlierThresholdDescr = "";
- userLatencyThreshold = sys.maxint;
+ userLatencyThreshold = sys.maxsize;
userLatencyThresholdDescr = None;
@@ -1007,10 +1013,10 @@ def createOutlierHistogramForFunction(func, funcDF, bucketFilenames):
# we will highlight those bars that contain operations whose
# duration exceeded the user-defined threshold.
#
- if (userDefinedLatencyThresholds.has_key(func)):
+ if (func in userDefinedLatencyThresholds):
userLatencyThreshold = userDefinedLatencyThresholds[func];
userLatencyThresholdDescr = userDefinedThresholdNames[func];
- elif (userDefinedLatencyThresholds.has_key("*")):
+ elif ("*" in userDefinedLatencyThresholds):
userLatencyThreshold = userDefinedLatencyThresholds["*"];
userLatencyThresholdDescr = userDefinedThresholdNames["*"];
@@ -1022,8 +1028,8 @@ def createOutlierHistogramForFunction(func, funcDF, bucketFilenames):
"th percentile)";
- numBuckets = plotWidth / pixelsPerWidthUnit;
- timeUnitsPerBucket = (lastTimeStamp - firstTimeStamp) / numBuckets;
+ numBuckets = plotWidth // pixelsPerWidthUnit;
+ timeUnitsPerBucket = (lastTimeStamp - firstTimeStamp) // numBuckets;
bucketHeights = [];
markers = [];
@@ -1081,7 +1087,7 @@ def createOutlierHistogramForFunction(func, funcDF, bucketFilenames):
dataframe = pd.DataFrame(data=dict);
dataframe['markerX'] = dataframe['lowerbound'] + \
(dataframe['upperbound'] -
- dataframe['lowerbound']) / 2 ;
+ dataframe['lowerbound']) // 2 ;
dataframe['markerY'] = dataframe['height'] + 0.2;
return plotOutlierHistogram(dataframe, maxOutliers, func,
@@ -1172,9 +1178,9 @@ def parseConfigFile(fname):
elif (firstNonCommentLine):
try:
unitsPerSecond = int(line);
- unitsPerMillisecond = unitsPerSecond / 1000;
- unitsPerMicrosecond = unitsPerSecond / 1000000;
- unitsPerNanosecond = unitsPerSecond / 1000000000;
+ unitsPerMillisecond = unitsPerSecond // 1000;
+ unitsPerMicrosecond = unitsPerSecond // 1000000;
+ unitsPerNanosecond = unitsPerSecond // 1000000000;
timeUnitString = getTimeUnitString(unitsPerSecond);
@@ -1321,7 +1327,8 @@ def main():
i += 1;
percentComplete = float(i) / float(totalFuncs) * 100;
- print(color.BLUE + color.BOLD + " Generating outlier histograms... "),
+ sys.stdout.write(color.BLUE + color.BOLD +
+ " Generating outlier histograms... ");
sys.stdout.write("%d%% complete \r" % (percentComplete) );
sys.stdout.flush();
diff --git a/src/third_party/wiredtiger/tools/optrack/optrack_to_t2.py b/src/third_party/wiredtiger/tools/optrack/optrack_to_t2.py
index 80d004376f2..beb11627567 100755
--- a/src/third_party/wiredtiger/tools/optrack/optrack_to_t2.py
+++ b/src/third_party/wiredtiger/tools/optrack/optrack_to_t2.py
@@ -230,7 +230,7 @@ def checkForTimestampAndGetRowSkip(fname):
if (len(words) == 1):
try:
- firstTimeStamp = long(words[0]);
+ firstTimeStamp = int(words[0]);
except ValueError:
print(color.BOLD + color.RED +
"Could not parse seconds since Epoch on first line" +
@@ -308,7 +308,7 @@ def parseIntervals(df, firstTimeStamp, fname):
firstIntervalTimestampSeconds = firstTimeStamp;
lastIntervalTimestampSeconds = firstIntervalTimestampSeconds + \
(lastTimestampUnits - firstTimestampUnits) \
- / unitsPerSecond;
+ // unitsPerSecond;
if (lastIntervalTimestampSeconds < firstIntervalTimestampSeconds):
print(color.BOLD + color.RED +
@@ -404,9 +404,9 @@ def parseIntervals(df, firstTimeStamp, fname):
# Convert the durations to percentages and record them
# in the output dictionary
- for func, duration in thisIntDict.iteritems():
+ for func, duration in thisIntDict.items():
outputDictKey = columnNamePrefix + func;
- percentDuration = float(duration) / \
+ percentDuration = float(duration) // \
float(intervalLength * unitsPerSecond) * 100;
outputDict[outputDictKey].append(percentDuration);
@@ -415,7 +415,7 @@ def parseIntervals(df, firstTimeStamp, fname):
# The list at each function's key should be as long as the list
# at key 'time'.
targetLen = len(outputDict['time']);
- for key, theList in outputDict.iteritems():
+ for key, theList in outputDict.items():
if len(theList) < targetLen:
theList.append(0);
@@ -427,7 +427,7 @@ def parseIntervals(df, firstTimeStamp, fname):
#
targetColumns = ['time'];
- for key, value in outputDict.iteritems():
+ for key, value in outputDict.items():
if key != 'time':
targetColumns.append(key);
diff --git a/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py b/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py
index a00ab0d2c7b..06dec019fcd 100755
--- a/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py
+++ b/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py
@@ -70,7 +70,7 @@ def buildTranslationMap(mapFileName):
print(color.BOLD + color.RED);
print("Could not open " + mapFileName + " for reading");
print(color.END);
- return;
+ raise;
# Read lines from the map file and build an in-memory map
# of translations. Each line has a function ID followed by space and
@@ -97,7 +97,7 @@ def buildTranslationMap(mapFileName):
def funcIDtoName(funcID):
- if (functionMap.has_key(funcID)):
+ if funcID in functionMap:
return functionMap[funcID];
else:
print("Could not find the name for func " + str(funcID));
@@ -147,10 +147,14 @@ def validateHeader(file):
try:
bytesRead = file.read(MIN_HEADER_SIZE);
except:
- return False, -1;
+ print(color.BOLD + color.RED +
+ "failed read of input file" + color.END);
+ raise;
if (len(bytesRead) < MIN_HEADER_SIZE):
- return False, -1;
+ print(color.BOLD + color.RED +
+ "unexpected sized input file" + color.END);
+ raise;
version, threadType, tsc_nsec = struct.unpack('=III', bytesRead);
print("VERSION IS " + str(version));
@@ -203,11 +207,11 @@ def parseFile(fileName):
# Open the log file for reading
try:
- file = open(fileName, "r");
+ file = open(fileName, "rb");
except:
print(color.BOLD + color.RED +
"Could not open " + fileName + " for reading" + color.END);
- return;
+ raise;
# Read and validate log header
validVersion, threadType, tsc_nsec_ratio, sec_from_epoch = \
@@ -275,7 +279,8 @@ def parseFile(fileName):
def waitOnOneProcess(runningProcesses):
success = False;
- for fname, p in runningProcesses.items():
+ # Use a copy since we will be deleting entries from the original
+ for fname, p in runningProcesses.copy().items():
if (not p.is_alive()):
del runningProcesses[fname];
success = True;