#!/usr/bin/env python3 from Naming import name_dict, get_marker_type, get_time from typing import * import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec # import matplotlib # import pandas as pd import os import sys # https://github.com/bendichter/brokenaxes from brokenaxes import brokenaxes from statistics import median USE_AVERAGE = 0 USE_MEDIAN = 1 assert (USE_AVERAGE ^ USE_MEDIAN) def get_lines(dir: str) -> list: # file_name = os.path.basename(dir) f = open(dir, "r") lines = f.readlines() f.close() return lines def find_line_index(lines: list, phrase: str) -> int: for i in range(len(lines)): line = lines[i].strip() if (len(line) == 0) or (line.startswith("#")): continue if line.startswith(phrase): return i return -1 def get_filter_name_from_lines(lines): i = find_line_index(lines, "NAME") if (i == -1): print(lines) assert 0 assert i != -1 name = lines[i][5:].strip() return name def get_average_performance_list(lines: List[str]): def find_all_partial_matches_in_lines(lines: List[str], pattern: str): indexes = [] for i in range(len(lines)): if lines[i].startswith(pattern): indexes.append(i) return indexes beg_list = find_all_partial_matches_in_lines(lines, "NAME") end_list = find_all_partial_matches_in_lines(lines, "BENCH_END") def get_performance_list(lines: List[str]): start_line = find_line_index(lines, "BENCH_START") + 1 end_line = find_line_index(lines, "BENCH_END") - 1 # inclusive. performance_list = [[] for _ in range(4)] for line in lines[start_line:end_line]: if not (len(line.strip())): continue temp_list0 = line.split(",") temp_list1 = [int(i.strip()) for i in temp_list0] assert(len(temp_list1) == 4) for i in range(4): performance_list[i].append(temp_list1[i]) name = get_filter_name_from_lines(lines) # print(name) # print(performance_list) return performance_list assert(len(beg_list) == len(end_list)) perfs_lists = [] for i in range(len(beg_list)): temp_lines = lines[beg_list[i]: end_list[i] + 1] assert temp_lines[0].startswith("NAME") assert temp_lines[-1].startswith("BENCH") temp_perf = get_performance_list(temp_lines) perfs_lists.append(temp_perf) def get_lists_average(ll: List[List]): k = len(ll) cols = len(ll[0]) assert (cols == 4) rows = len(ll[0][0]) z_list = [0] * rows avg_fl_list = [z_list[:] for _ in range(cols)] # print(len(avg_fl_list)) # print(len(avg_fl_list[0])) for col in range(cols): for row in range(rows): for i in range(k): # print(col, row) avg_fl_list[col][row] += ll[i][col][row] for col in range(cols): for row in range(rows): avg_fl_list[col][row] /= k return avg_fl_list avg_fl_list = get_lists_average(perfs_lists) return avg_fl_list # end_list = [] def get_median_performance_list(lines: List[str]) -> List[List[float]]: """Returns a List of Lists: [operation][round(load)] Args: lines (List[str]): [description] Returns: List[List[float]]: [description] """ def find_all_partial_matches_in_lines(lines: List[str], pattern: str): indexes = [] for i in range(len(lines)): if lines[i].startswith(pattern): indexes.append(i) return indexes beg_list = find_all_partial_matches_in_lines(lines, "NAME") end_list = find_all_partial_matches_in_lines(lines, "BENCH_END") def get_performance_list(lines: List[str]): start_line = find_line_index(lines, "BENCH_START") + 1 end_line = find_line_index(lines, "BENCH_END") - 1 # inclusive. performance_list = [[] for _ in range(4)] for line in lines[start_line:end_line]: if not (len(line.strip())): continue temp_list0 = line.split(",") temp_list1 = [int(i.strip()) for i in temp_list0] assert(len(temp_list1) == 4) for i in range(4): performance_list[i].append(temp_list1[i]) return performance_list assert(len(beg_list) == len(end_list)) perfs_lists = [] for i in range(len(beg_list)): temp_lines = lines[beg_list[i]: end_list[i] + 1] assert temp_lines[0].startswith("NAME") assert temp_lines[-1].startswith("BENCH") temp_perf = get_performance_list(temp_lines) perfs_lists.append(temp_perf) def built_temp_list(single_perf_list: List, op: int, k: int) -> List[float]: # single_perf_list[Run][Operation][Round] runs = len(single_perf_list) assert len(single_perf_list) == runs temp_l = [single_perf_list[i][op][k] for i in range(runs)] return temp_l # print("="*40) # print(len(perfs_lists)) # print(len(perfs_lists[0])) # print(len(perfs_lists[0][0])) # print("="*40) runs = len(perfs_lists) assert len(perfs_lists[0]) == 4 operations = len(perfs_lists[0]) rounds = len(perfs_lists[0][0]) def built_median_single_list() -> List[List[float]]: fl = [[0] * rounds for _ in range(operations)] assert operations == 4 for op in range(operations - 1): for k in range(rounds): # print(op, k) temp_med = median(built_temp_list(perfs_lists, op, k)) fl[op][k] = temp_med return fl med_fl_list = built_median_single_list() return med_fl_list def get_all_values_from_perf_list(lines: List[str]): def find_all_partial_matches_in_lines(lines: List[str], pattern: str): indexes = [] for i in range(len(lines)): if lines[i].startswith(pattern): indexes.append(i) return indexes beg_list = find_all_partial_matches_in_lines(lines, "NAME") end_list = find_all_partial_matches_in_lines(lines, "BENCH_END") def get_performance_list(lines: List[str]): start_line = find_line_index(lines, "BENCH_START") + 1 end_line = find_line_index(lines, "BENCH_END") - 1 # inclusive. performance_list = [[] for _ in range(4)] for line in lines[start_line:end_line]: if not (len(line.strip())): continue temp_list0 = line.split(",") temp_list1 = [int(i.strip()) for i in temp_list0] assert(len(temp_list1) == 4) for i in range(4): performance_list[i].append(temp_list1[i]) return performance_list assert(len(beg_list) == len(end_list)) perfs_lists = [] for i in range(len(beg_list)): temp_lines = lines[beg_list[i]: end_list[i] + 1] assert temp_lines[0].startswith("NAME") assert temp_lines[-1].startswith("BENCH") temp_perf = get_performance_list(temp_lines) perfs_lists.append(temp_perf) def built_temp_list(single_perf_list: List, op: int, k: int): """[summary] Args: single_perf_list (List): [description] op (int): [description] k (int): [description] Returns: [Lists of lists]: [Each cell in the list, is ] """ # single_perf_list[Run][Operation][Round] # temp_l == [Operation][Round][Run(0);Run(last)] runs = len(single_perf_list) assert len(single_perf_list) == runs temp_l = [single_perf_list[i][op][k] for i in range(runs)] return temp_l assert len(perfs_lists[0]) == 4 operations = len(perfs_lists[0]) rounds = len(perfs_lists[0][0]) def built_raw_single_list(): fl = [[0] * rounds for _ in range(operations)] assert operations == 4 for op in range(operations - 1): for k in range(rounds): temp_med = built_temp_list(perfs_lists, op, k) fl[op][k] = temp_med return fl med_fl_list = built_raw_single_list() return med_fl_list def get_op_divisors(lines: List[str]): # a = find_line_index(lines, "NAME") b = find_line_index(lines, "FILTER_MAX_CAPACITY") c = find_line_index(lines, "NUMBER_OF_LOOKUP") # name = lines[a].split()[1] filter_max_cap = int(lines[b].split()[1]) lookup_reps = int(lines[c].split()[1]) return filter_max_cap, lookup_reps def get_y_values(filters_names: List[str], lists_of_lists: List[List], divisor, op_name: str, units=1e9): assert (len(filters_names) == len(lists_of_lists)) fl_y = [] for temp_y_vals in lists_of_lists: y_range = temp_y_vals y_range = [divisor / (i / units) if i != 0 else 0 for i in y_range] fl_y.append(y_range) return fl_y def get_raw_all_data(f_list: list): lines_list = [] names_list = [] for temp_file in f_list: temp_lines = get_lines(temp_file) lines_list.append(temp_lines) names_list.append(get_filter_name_from_lines(temp_lines)) perf_lists = [] for temp_lines in lines_list: # temp_perf == [Operation][Round][Run(0);Run(last)] temp_perf = get_all_values_from_perf_list(temp_lines) perf_lists.append(temp_perf) def get_all_diviate_list(l): def ratio(x, denom): assert(denom != 0) return (x-denom)/denom t_med = median(l) fl = [ratio(i, t_med) for i in l] return fl flat_div_list = [] filters_num = len(perf_lists) operation_num = len(perf_lists[0]) rounds = len(perf_lists[0][0]) for op in range(3): for t_filter in range(filters_num): for t_round in range(rounds): temp_l = perf_lists[t_filter][op][t_round] # print(op, t_filter, t_round, end=":\t") # print(temp_l) # if temp_l == 0: temp_res = get_all_diviate_list(temp_l) flat_div_list += temp_res # print() return flat_div_list def final_diver(f_list: list) -> None: flat_diviate = get_raw_all_data(f_list) s_fd = sorted(flat_diviate) above_05 = [i for i in s_fd if abs(i) > 0.005] above_1 = [i for i in s_fd if abs(i) > 0.01] r1 = len(above_1)/len(s_fd) r05 = len(above_05)/len(s_fd) print( "fraction of elements that are <=1% from median (in thier category) \t{:}".format(r1)) print( "fraction of elements that are <=0.5% from median (in thier category)\t{:}".format(r05)) print("min & max diviations:", s_fd[0], s_fd[-1]) def get_data(f_list: list): lines_list = [] names_list = [] for temp_file in f_list: temp_lines = get_lines(temp_file) lines_list.append(temp_lines) names_list.append(get_filter_name_from_lines(temp_lines)) perf_lists = [] for temp_lines in lines_list: temp_perf = [] if (USE_MEDIAN): assert(not USE_AVERAGE) temp_perf = get_median_performance_list(temp_lines) elif (USE_AVERAGE): temp_perf = get_average_performance_list(temp_lines) else: assert(0) perf_lists.append(temp_perf) filter_max_cap, lookup_reps = get_op_divisors(lines_list[0]) rounds_num = len(perf_lists[0][0]) add_step = round(filter_max_cap / rounds_num) find_step = round(lookup_reps / rounds_num) ########################################### ins_list = [temp_perf[0] for temp_perf in perf_lists] uni_list = [temp_perf[1] for temp_perf in perf_lists] yes_list = [temp_perf[2] for temp_perf in perf_lists] ins_arg = (names_list, ins_list, add_step, "Insertions") uni_arg = (names_list, uni_list, find_step, "Uni-Lookups") yes_arg = (names_list, yes_list, add_step, "Yes-Lookups") ########################################### args = [ins_arg, uni_arg, yes_arg] fl_y_list = [get_y_values(*temp_arg) for temp_arg in args] rounds = len(ins_list[0]) x_range = [round((i + 1) / rounds, 4) for i in range(rounds)] filters_names = [name_dict(t) for t in names_list] def sorter(): size = len(filters_names) temp_for_sort = [(filters_names[i], fl_y_list[0][i],fl_y_list[1][i],fl_y_list[2][i]) for i in range(size)] temp_for_sort.sort() flatted_data = [] for op in range(4): temp = [temp_for_sort[i][op] for i in range(size)] flatted_data.append(temp[:]) new_fl_y_list = flatted_data[1:] new_filters_names = flatted_data[0] return new_fl_y_list, x_range, new_filters_names return sorter() # return fl_y_list, x_range, filters_names def get_paths_from_base_names(base_names: List[str]): return [os.path.join("Inputs", i) for i in base_names] def get_ba_limits(op_ll: list): filter_num = len(filters_names) if filter_num != len(op_ll): print(filter_num, len(op_ll)) assert filter_num == len(op_ll) min_max_bbf = [] min_others = 1e10 max_others = 0 for i in range(filter_num): temp_filter_name = filters_names[i] # y_range = [i] y_range = op_ll[i] if (temp_filter_name == "BBF"): min_max_bbf = min(y_range), max(y_range) else: max_others = max(max_others, max(y_range)) min_others = min(min_others, min(y_range)) return *min_max_bbf, min_others, max_others def get_ba_limits_all(op_ll: list): filter_num = len(filters_names) if filter_num != len(op_ll): print(filter_num, len(op_ll)) assert filter_num == len(op_ll) min_max_bbf = [1e10, 0] min_others = 1e10 max_others = 0 for i in range(filter_num): temp_filter_name = filters_names[i] # y_range = [i] y_range = op_ll[i] if (temp_filter_name.startswith("BBF")): if (temp_filter_name == "BBF-Flex"): min_max_bbf[0] = min(y_range) elif (temp_filter_name == "BBF"): min_max_bbf[1] = max(y_range) else: print(temp_filter_name) assert 0 # min_max_bbf = min(y_range), max(y_range) else: max_others = max(max_others, max(y_range)) min_others = min(min_others, min(y_range)) return *min_max_bbf, min_others, max_others def fig3_ba_gridspec_all(data, name="", set_fontsize: int = 14): y_lll, x_range, filters_names = data # fig = plt.subplots(figsize=(15,5)) fig = plt.figure(figsize=(15, 5)) sps1, sps2, sps3 = GridSpec(1, 3, figure=fig) spec_list = [sps1, sps2, sps3] baxs = [] op_names_list = ["(a) Insertions", "(b) Uniform lookups", "(c) Yes lookups"] def get_ba_limits_all(op_ll: list): bbfs_names = ["BBF", "BBF-Flex", "Impala"] assert len(filters_names) == len(op_ll) indexes = [] for i in range(len(filters_names)): t_name = filters_names[i] for prefix in bbfs_names: if t_name.startswith(prefix): indexes.append(i) break min_max_bbf = [1e10, 0] min_others = 1e10 max_others = 0 for i in range(len(filters_names)): if i not in indexes: y_range = op_ll[i] max_others = max(max_others, max(y_range)) min_others = min(min_others, min(y_range)) # min_max_bbf = [1e10, 0] if len(indexes): min_max_bbf[0] = min([min(op_ll[i]) for i in indexes]) min_max_bbf[1] = max([max(op_ll[i]) for i in indexes]) return *min_max_bbf, min_others, max_others for fig_index in range(len(y_lll)): yba_lim = get_ba_limits_all(y_lll[fig_index]) y_delim = ((yba_lim[2]*0.9, yba_lim[3] * 1.04), (yba_lim[0]*0.98, yba_lim[1]*1.02)) if fig_index == 1: bax = brokenaxes(subplot_spec=spec_list[fig_index]) else: bax = brokenaxes(ylims=y_delim, d=0.005, subplot_spec=spec_list[fig_index]) bax.set_title(op_names_list[fig_index], fontsize=18) bax.set_xlabel("Load", fontsize=14) bax.set_ylabel("ops/sec", fontsize=14) for i in range(len(y_lll[fig_index])): yv = y_lll[fig_index][i] marker_shape = get_marker_type(filters_names[i]) bax.plot( x_range, yv, label=filters_names[i], marker=marker_shape, markersize=2) # bax.grid(axis='both', which='major', ls='-',linewidth=1) # bax.grid(axis='both', which='minor', ls='--',linewidth=0.4) bax.grid(axis='y', which='major', ls='-', linewidth=1) bax.grid(axis='y', which='minor', ls='--', linewidth=0.4) baxs.append(bax) # axes[fig_index].plot(x_range, yv, label=filters_names[i], marker=marker_shape, markersize=3) plt.tight_layout() plt.subplots_adjust(top=0.88) for bax in baxs: for handle in bax.diag_handles: handle.remove() bax.draw_diags() bax.minorticks_on() # plt.show() # return # plt.show() handles, labels = baxs[0].axs[0].get_legend_handles_labels() # labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t)) plt.legend(handles, labels, loc='upper center', bbox_to_anchor=(-0.6, -0.1), ncol=(len(filters_names) + 1)//2) # plt.legend(handles, labels, loc='upper center', bbox_to_anchor=(-0.6, -0.05), ncol=5) # plt.tight_layout() file_name = name if not len(name): file_name = "bench-median-lables-" + str(set_fontsize) + ".pdf" else: file_name += ".pdf" plt.savefig(file_name, dpi=400, bbox_inches="tight") # plt.show() def fig3_no_brokenaxis_local_arg(y_lll, x_range, filters_names, name: str = "default"): fig = plt.figure(figsize=(15, 5)) sps1, sps2, sps3 = GridSpec(1, 3, figure=fig) spec_list = [sps1, sps2, sps3] baxs = [] op_names_list = ["(a) Insertions", "(b) Uniform lookups", "(c) Yes lookups"] for fig_index in range(len(y_lll)): bax = brokenaxes(subplot_spec=spec_list[fig_index]) bax.set_title(op_names_list[fig_index]) for i in range(len(y_lll[fig_index])): yv = y_lll[fig_index][i] marker_shape = get_marker_type(filters_names[i]) bax.plot( x_range, yv, label=filters_names[i], marker=marker_shape, markersize=2) bax.grid(axis='y', which='major', ls='-', linewidth=1) bax.grid(axis='y', which='minor', ls='--', linewidth=0.4) baxs.append(bax) plt.tight_layout() plt.subplots_adjust(top=0.88) for bax in baxs: for handle in bax.diag_handles: handle.remove() bax.draw_diags() bax.minorticks_on() handles, labels = baxs[0].axs[0].get_legend_handles_labels() plt.legend(handles, labels, loc='upper center', bbox_to_anchor=(-0.6, -0.05), ncol=len(filters_names)//3) plt.savefig(name + ".pdf", dpi=400, bbox_inches="tight") plt.show() def fig3_no_brokenAxis(name: str = "default"): fig = plt.figure(figsize=(15, 5)) sps1, sps2, sps3 = GridSpec(1, 3, figure=fig) spec_list = [sps1, sps2, sps3] baxs = [] op_names_list = ["(a) Insertions", "(b) Uniform lookups", "(c) Yes lookups"] for fig_index in range(len(y_lll)): bax = brokenaxes(subplot_spec=spec_list[fig_index]) bax.set_title(op_names_list[fig_index]) for i in range(len(y_lll[fig_index])): yv = y_lll[fig_index][i] marker_shape = get_marker_type(filters_names[i]) bax.plot( x_range, yv, label=filters_names[i], marker=marker_shape, markersize=2) # bax.grid(axis='both', which='major', ls='-',linewidth=1) # bax.grid(axis='both', which='minor', ls='--',linewidth=0.4) bax.grid(axis='y', which='major', ls='-', linewidth=1) bax.grid(axis='y', which='minor', ls='--', linewidth=0.4) baxs.append(bax) # axes[fig_index].plot(x_range, yv, label=filters_names[i], marker=marker_shape, markersize=3) plt.tight_layout() plt.subplots_adjust(top=0.88) for bax in baxs: for handle in bax.diag_handles: handle.remove() bax.draw_diags() bax.minorticks_on() # plt.show() # return # plt.show() handles, labels = baxs[0].axs[0].get_legend_handles_labels() # labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t)) plt.legend(handles, labels, loc='upper center', bbox_to_anchor=(-0.6, -0.05), ncol=len(filters_names)//3) # plt.tight_layout() plt.savefig(name + ".pdf", dpi=400, bbox_inches="tight") plt.show() def old_main(): """ sys.argv[1] = path """ def main_helper(path, name): chosen_files = os.listdir(path) files_list = [os.path.join(path, i) for i in chosen_files if not i.endswith(".csv")] files_list.sort() data = get_data(files_list) final_diver(files_list) fig3_ba_gridspec_all(data, name) argc: int = len(sys.argv) if argc == 1: path = os.path.abspath(os.getcwd()) path = os.path.join(path, "Inputs") assert os.path.isdir(path) name = "bench{:}".format(get_time()) main_helper(path, name) elif argc == 2: path = sys.argv[1] name = "bench{:}".format(get_time()) main_helper(path, name) else: print("Too many arguments where given ({:})".format(argc)) old_main()