some changes

master
Сергей Маринкевич 4 years ago
parent 98e162dfb0
commit f3e44d3aa6

@ -19,9 +19,12 @@ __CONTEXT = {
"OUTPUT_PATH": "./",
"FILTER_BY_STDDEVS": 1,
"DRAW_AVG_PLOT": 0,
"DRAW_PERCENTILE_SCATTER": False,
"DRAW_STDDEV_PLOT": False,
"BOLD_LINES_ON_PLOT": False,
"VALUE_MULTI": 1000000000,
"LIMITER": False,
"OUTPUT_FILES_PREFIX": "output",
}
@ -31,14 +34,18 @@ def std_dev(values):
return (squares_sum / (len(values) - 1))**(0.5)
def calc_avg(values):
avg_plot_buf = []
avg_plot = []
for val in values:
avg_plot_buf.append(val)
if len(avg_plot_buf) > DRAW_AVG_PLOT:
avg_plot_buf.pop(0)
avg_plot.append(statistics.mean(avg_plot_buf))
""" Average (mean) value is the divison of the sum of values by the number of
values. So do it reverse: collect all divisions of each value to sum values
that in the window of specified size.
"""
pieces = [value / DRAW_AVG_PLOT for value in values]
avg_plot_buf = pieces[:DRAW_AVG_PLOT]
avg_plot = [sum(avg_plot_buf)]
for value in pieces[DRAW_AVG_PLOT:]:
avg_plot_buf.pop(0)
avg_plot_buf.append(value)
avg_plot.append(sum(avg_plot_buf))
return avg_plot
@ -53,6 +60,53 @@ def __calc_stats(values):
"STDDEV": std_dev(values),
}
def calc_stddevs(values):
stats = __calc_stats(values)
stddev = stats["STDDEV"]
mean = stats["MEDIAN"]
stddevs = {
"+3<": [],
"+2<": [],
"+1<": [],
"±1": [],
"±2": [],
"±3": [],
"-1>": [],
"-2>": [],
"-3>": [],
}
for val in values:
if mean + stddev >= val >= mean - stddev:
stddevs["±1"].append(val)
elif val > mean + stddev * 3:
stddevs["+3<"].append(val)
elif val > mean + stddev * 2:
stddevs["+2<"].append(val)
elif val > mean + stddev:
stddevs["+1<"].append(val)
elif mean - stddev * 3 > val:
stddevs["-3>"].append(val)
elif mean - stddev * 2 > val:
stddevs["-2>"].append(val)
elif mean - stddev > val:
stddevs["-1>"].append(val)
else:
raise ValueError("Shouldn't be rised ever")
for key, val in stddevs.items():
stddevs[key] = len(val) / len(values) * 100
stddevs.update({"|3|": stddevs["+3<"] + stddevs["-3>"]})
stddevs.update({"|2|": stddevs["+2<"] + stddevs["-2>"]})
stddevs.update({"±2": stddevs["±1"] + \
stddevs["-1>"] + \
stddevs["+1<"] })
stddevs.update({"±3": stddevs["±2"] + \
stddevs["-2>"] + \
stddevs["+2<"] })
return stddevs
def calc_stats(values):
twentyciles = statistics.quantiles(values, n=20)
stats = {
@ -71,7 +125,8 @@ def calc_stats(values):
"75%": twentyciles[-5],
"90%": twentyciles[-2],
"95%": twentyciles[-1],
}
},
"STDDEVs": calc_stddevs(values),
}
if DRAW_AVG_PLOT:
@ -114,28 +169,38 @@ def text_to_val(text):
return multi_map[text]
def init_table():
def init_table(header):
table = texttable.Texttable()
table.set_deco(table.HEADER | table.VLINES | table.BORDER)
table.set_chars(['-', '|', '|', '-'])
table.set_cols_dtype(["t", "e", "f", "t"] if VERBOSE else ["t", "t"])
table.set_cols_width([8, 20, 20, 15] if VERBOSE else ["8", "20"])
table.set_cols_align(["r", "l", "l", "l"] if VERBOSE else ["r", "l"])
table.set_cols_dtype(["t"] + ["t"] * (len(header) - 1))
table.set_cols_width(["8"] + ["20"] * (len(header) - 1))
table.set_cols_align(["r"] + ["l"] * (len(header) - 1))
table.set_precision(12)
table.header(["Name", "Value(e)", "Value(f)", "Value(t)"] if VERBOSE else ["Name", "Value"])
table.header(header)
return table
def print_stats(stats, label):
def print_stats(stats, label, formatter=str, header=["Name", "Value"]):
print(f"\n{label}:")
table = init_table()
table = init_table(header)
if not isinstance(stats, list):
stats = [stats]
table_dict = {}
for stat in stats:
for name, val in stat.items():
if name not in table_dict:
table_dict.update({name: [val]})
else:
table_dict[name].append(val)
for stat, val in stats.items():
row = [stat]
if VERBOSE:
row += [val, val]
row += [f"{val_to_text(val)}"]
for name, values in table_dict.items():
row = [name]
for val in values:
row += [f"{formatter(val)}"]
table.add_row(row)
print(table.draw())
@ -149,66 +214,103 @@ def do_statistics():
measurements.append(float(line.split('\n')[0]))
stats = calc_stats(measurements)
print_stats(stats["Main"], "Non-filtered")
if LIMITER or FILTER_BY_STDDEVS:
print_stats(stats["Main"], "Non-filtered", val_to_text)
if LIMITER:
def limiter_filter(value):
if "LIMIT_HIGH" in __CONTEXT and LIMIT_HIGH < value:
return True
if "LIMIT_LOW" in __CONTEXT and LIMIT_LOW > value:
return True
return False
outliers = [value for value in measurements if limiter_filter(value)]
measurements = [value for value in measurements if not limiter_filter(value)]
print(f"\nLimiter dropped {len(outliers)} measurements")
stats = calc_stats(measurements)
print_stats(stats["Main"], "Limited", val_to_text)
if FILTER_BY_STDDEVS:
outliers = [value for value in measurements
if abs(value) >= stats["Main"]["STDDEV"] * FILTER_BY_STDDEVS]
measurements = [value for value in measurements
if abs(value) < stats["Main"]["STDDEV"] * FILTER_BY_STDDEVS]
filter_high = stats["Main"]["MEDIAN"] + stats["Main"]["STDDEV"] * FILTER_BY_STDDEVS
filter_low = stats["Main"]["MEDIAN"] - stats["Main"]["STDDEV"] * FILTER_BY_STDDEVS
outliers = [value for value in measurements if value > filter_high]
outliers += [value for value in measurements if value < filter_low]
measurements = [value for value in measurements if filter_low <= value <= filter_high]
print(f"\nOutliers({FILTER_BY_STDDEVS}xSTDDEV):")
for outlier in outliers:
print(val_to_text(outlier))
stats = calc_stats(measurements)
print_stats(stats["Main"], "Filtered")
print_stats(stats["Average"], "Average(filtered)")
else:
print_stats(stats["Average"], "Average(non-filtered)")
title = "Statistics"
if LIMITER:
title += " limited"
if FILTER_BY_STDDEVS:
title += " filtered"
print_stats([stats["Main"], stats["Average"]], title, val_to_text, ["Name", "Main", "Average"])
print_stats(stats["Percentiles"], "Percentiles", val_to_text)
def format_percent(val):
return f"~{val:.2f}%"
print_stats(stats["Percentiles"], "Percentiles")
print_stats(stats["STDDEVs"], "STDDEV parts", format_percent)
return stats, measurements
def do_plot(stats, measurements):
# Simple plot
linewidth = 1 if BOLD_LINES_ON_PLOT else 0.2
linewidth = 0.5 if BOLD_LINES_ON_PLOT else 0.2
plt.figure(figsize=(15, 5))
plt.plot(range(len(measurements)), [val * VALUE_MULTI for val in measurements], linewidth=linewidth)
if DRAW_AVG_PLOT:
avg_plot = [val * VALUE_MULTI for val in calc_avg(measurements)]
plt.plot(range(DRAW_AVG_PLOT//2, len(avg_plot)+DRAW_AVG_PLOT//2), avg_plot, linewidth=1)
if DRAW_STDDEV_PLOT:
stddev = stats["Main"]["STDDEV"] * VALUE_MULTI
mean = stats["Main"]["MEDIAN"] * VALUE_MULTI
edges = [
([mean + 1 * stddev, mean - 1 * stddev], "dotted", "grey"),
([mean + 2 * stddev, mean - 2 * stddev], "dashed", "grey"),
([mean + 3 * stddev, mean - 3 * stddev], "dashdot", "red"),
([mean + 1 * stddev, mean - 1 * stddev], "dotted", "green", "1xSTDDEV"),
([mean + 2 * stddev, mean - 2 * stddev], "dashed", "blue", "2xSTDDEV"),
([mean + 3 * stddev, mean - 3 * stddev], "dashdot", "red", "3xSTDDEV"),
]
for lines, style, color in edges:
for lines, style, color, label in edges:
for line in lines:
plt.axhline(y=line, color=color, linewidth=linewidth, linestyle=style)
plt.plot(range(len(measurements)), [val * VALUE_MULTI for val in measurements], linewidth=linewidth)
if DRAW_AVG_PLOT:
avg_plot = [val * VALUE_MULTI for val in calc_avg(measurements)]
plt.plot(range(DRAW_AVG_PLOT//2, len(avg_plot)+DRAW_AVG_PLOT//2), avg_plot, linewidth=linewidth)
plt.axhline(y=line, color=color, linewidth=0.5, linestyle=style, label=label)
label = ""
plt.grid(axis='both')
plt.title('Generated from ' + DATALOG_PATH)
plt.xlabel('Time, readings')
plt.ylabel('Diff, nanoseconds')
plt.savefig(os.path.join(OUTPUT_PATH, "plot.png"), dpi=300)
plt.legend()
plt.savefig(OUTPUT_PATH_PLOT, dpi=300)
# Simple scatter
plt.figure(figsize=(15, 5))
plt.scatter(range(len(measurements)), [val * VALUE_MULTI for val in measurements], s=2)
plt.grid(axis='both')
plt.title('Generated from ' + DATALOG_PATH)
plt.xlabel('Time, readings')
plt.ylabel('Diff, nanoseconds')
plt.savefig(os.path.join(OUTPUT_PATH, "scatter.png"), dpi=300)
plt.scatter(range(len(measurements)), [val * VALUE_MULTI for val in measurements], s=2)
if DRAW_PERCENTILE_SCATTER:
lines = []
for percentile, value in stats["Percentiles"].items():
lines.append((percentile, value * VALUE_MULTI))
colors = ["red", "green", "blue", "yellow", "brown", "orange", "cyan"]
for label, line in lines:
plt.axhline(y=line, color=colors.pop(), linewidth=0.7, linestyle="dashed", label=label)
plt.legend()
plt.savefig(OUTPUT_PATH_SCATTER, dpi=300)
# Probability distribution
fig, axs = plt.subplots(1, 1, tight_layout=True)
@ -218,7 +320,7 @@ def do_plot(stats, measurements):
N, bins, patches = axs.hist([val * VALUE_MULTI for val in measurements], bins=21)
axs.yaxis.set_major_formatter(PercentFormatter(xmax=len(measurements)))
plt.savefig(os.path.join(OUTPUT_PATH, "histogram.png"), dpi=300)
plt.savefig(OUTPUT_PATH_HISTOGRAM, dpi=300)
def eat_param(param, type_class, i):
@ -254,12 +356,16 @@ def show_help(exitcode=0):
print("\t-f FILE \t— give a input csv-file")
print("\t-p FILE \t— give a file that contains parameters(as showed in [Params] of stats.txt)")
print("\t-o DIRECTORY \t— give a output directory")
print("\t-n PREFIX \t— name output files with given prefix")
print("\t-F VALUE \t— give a number of STDDEVs to use in filter (`0` means do not filter, default is `1`)")
print("\t-A VALUE \t— draw avg plot for given count of measurements (`0`(default) means do not draw)")
print("\t-M VALUE \t— multiply measurements to make it one of: `n`(default), `u`, `m`, " + \
"`s`(means `1`),\n\t\t\t or any numeric multiplicator.")
print("\t-L VALUE \t— limit measurements by VALUE as lowest value.")
print("\t-H VALUE \t— limit measurements by VALUE as highest value.")
print("\t-b\t\t— bold lines on a plot")
print("\t-s\t\t— draw STDDEV lines on a plot")
print("\t-P\t\t— draw percentile lines on a scatter")
print("\t-v\t\t— verbose (show digits in scientific and very long float)")
sys.exit(exitcode)
@ -267,10 +373,14 @@ ARG_MAP = {
"-v": (bool, "VERBOSE"),
"-b": (bool, "BOLD_LINES_ON_PLOT"),
"-s": (bool, "DRAW_STDDEV_PLOT"),
"-P": (bool, "DRAW_PERCENTILE_SCATTER"),
"-f": (str, "DATALOG_PATH"),
"-o": (str, "OUTPUT_PATH"),
"-n": (str, "OUTPUT_FILES_PREFIX"),
"-F": (int, "FILTER_BY_STDDEVS"),
"-A": (int, "DRAW_AVG_PLOT"),
"-L": (float, "LIMIT_LOW"),
"-H": (float, "LIMIT_HIGH"),
"-M": (text_to_val, "VALUE_MULTI"),
"-p": (str, "PARSE_CONFIG"),
"--help": (None, show_help),
@ -284,6 +394,9 @@ def eat_args():
type_class, param = ARG_MAP.get(arg, (eat_unknown, arg))
i += eat_param(param, type_class, i)
if "LIMIT_HIGH" in __CONTEXT or "LIMIT_LOW" in __CONTEXT:
__CONTEXT.update({"LIMITER": True})
g = globals()
g.update(__CONTEXT)
@ -332,18 +445,29 @@ def startup():
print("Gimme directory as output path!")
show_help(-2)
files_prefix = OUTPUT_FILES_PREFIX + "_"
__CONTEXT.update({
"OUTPUT_PATH_PLOT": os.path.join(OUTPUT_PATH, files_prefix + "plot.png"),
"OUTPUT_PATH_SCATTER": os.path.join(OUTPUT_PATH, files_prefix + "scatter.png"),
"OUTPUT_PATH_HISTOGRAM": os.path.join(OUTPUT_PATH, files_prefix + "histogram.png"),
"OUTPUT_PATH_STATS": os.path.join(OUTPUT_PATH, files_prefix + "stats.txt"),
"OUTPUT_PATH_FILTERED": os.path.join(OUTPUT_PATH, files_prefix + "filtered.csv"),
})
g = globals()
g.update(__CONTEXT)
def main():
startup()
stats, measurements = do_statistics()
with open(os.path.join(OUTPUT_PATH, "filtered.csv"), 'w') as output_csv:
with open(OUTPUT_PATH_FILTERED, 'w') as output_csv:
for value in measurements:
output_csv.write(f"{value:+.12E}\n")
parser = ConfigParser()
parser.read_dict(stats)
with open(os.path.join(OUTPUT_PATH, "stats.txt"), 'w') as output:
with open(OUTPUT_PATH_STATS, 'w') as output:
parser.write(output)
do_plot(stats, measurements)

Loading…
Cancel
Save