#!/usr/bin/env python3 from Naming import name_dict, get_time from typing import * import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec # import matplotlib # import pandas as pd import os import sys # https://github.com/bendichter/brokenaxes from brokenaxes import brokenaxes from statistics import median USE_AVERAGE = 0 USE_MEDIAN = 1 assert (USE_AVERAGE ^ USE_MEDIAN) def get_lines(dir: str) -> list: # file_name = os.path.basename(dir) f = open(dir, "r") lines = f.readlines() f.close() return lines def find_line_index(lines: list, phrase: str) -> int: for i in range(len(lines)): line = lines[i].strip() if (len(line) == 0) or (line.startswith("#")): continue if line.startswith(phrase): return i return -1 def get_filter_name_from_lines(lines): i = find_line_index(lines, "NAME") if (i == -1): print(lines) assert 0 assert i != -1 name = lines[i][5:].strip() return name def get_all_values_from_perf_list(lines: List[str]): def find_all_partial_matches_in_lines(lines: List[str], pattern: str): indexes = [] for i in range(len(lines)): if lines[i].startswith(pattern): indexes.append(i) return indexes beg_list = find_all_partial_matches_in_lines(lines, "NAME") end_list = find_all_partial_matches_in_lines(lines, "BENCH_END") def get_performance_list(lines: List[str]): start_line = find_line_index(lines, "BENCH_START") + 1 end_line = find_line_index(lines, "BENCH_END") - 1 # inclusive. performance_list = [[] for _ in range(4)] for line in lines[start_line:end_line]: if not (len(line.strip())): continue temp_list0 = line.split(",") temp_list1 = [int(i.strip()) for i in temp_list0] assert(len(temp_list1) == 4) for i in range(4): performance_list[i].append(temp_list1[i]) return performance_list assert(len(beg_list) == len(end_list)) perfs_lists = [] for i in range(len(beg_list)): temp_lines = lines[beg_list[i]: end_list[i] + 1] assert temp_lines[0].startswith("NAME") assert temp_lines[-1].startswith("BENCH") temp_perf = get_performance_list(temp_lines) perfs_lists.append(temp_perf) def built_temp_list(single_perf_list: List, op: int, k: int): """[summary] Args: single_perf_list (List): [description] op (int): [description] k (int): [description] Returns: [Lists of lists]: [Each cell in the list, is ] """ # single_perf_list[Run][Operation][Round] # temp_l == [Operation][Round][Run(0);Run(last)] runs = len(single_perf_list) assert len(single_perf_list) == runs temp_l = [single_perf_list[i][op][k] for i in range(runs)] return temp_l assert len(perfs_lists[0]) == 4 operations = len(perfs_lists[0]) rounds = len(perfs_lists[0][0]) def built_raw_single_list(): fl = [[0] * rounds for _ in range(operations)] assert operations == 4 for op in range(operations - 1): for k in range(rounds): temp_med = built_temp_list(perfs_lists, op, k) fl[op][k] = temp_med return fl med_fl_list = built_raw_single_list() return med_fl_list def get_median_performance_list(lines: List[str]) -> List[List[float]]: """Returns a List of Lists: [operation][round(load)] Args: lines (List[str]): [description] Returns: List[List[float]]: [description] """ def find_all_partial_matches_in_lines(lines: List[str], pattern: str): indexes = [] for i in range(len(lines)): if lines[i].startswith(pattern): indexes.append(i) return indexes beg_list = find_all_partial_matches_in_lines(lines, "NAME") end_list = find_all_partial_matches_in_lines(lines, "BENCH_END") def get_performance_list(lines: List[str]): start_line = find_line_index(lines, "BENCH_START") + 1 end_line = find_line_index(lines, "BENCH_END") - 1 # inclusive. performance_list = [[] for _ in range(4)] for line in lines[start_line:end_line]: if not (len(line.strip())): continue temp_list0 = line.split(",") temp_list1 = [int(i.strip()) for i in temp_list0] assert(len(temp_list1) == 4) for i in range(4): performance_list[i].append(temp_list1[i]) return performance_list assert(len(beg_list) == len(end_list)) perfs_lists = [] for i in range(len(beg_list)): temp_lines = lines[beg_list[i]: end_list[i] + 1] assert temp_lines[0].startswith("NAME") assert temp_lines[-1].startswith("BENCH") temp_perf = get_performance_list(temp_lines) perfs_lists.append(temp_perf) def built_temp_list(single_perf_list: List, op: int, k: int) -> List[float]: # single_perf_list[Run][Operation][Round] runs = len(single_perf_list) assert len(single_perf_list) == runs temp_l = [single_perf_list[i][op][k] for i in range(runs)] return temp_l assert len(perfs_lists[0]) == 4 operations = len(perfs_lists[0]) rounds = len(perfs_lists[0][0]) def built_median_single_list() -> List[List[float]]: fl = [[0] * rounds for _ in range(operations)] assert operations == 4 for op in range(operations - 1): for k in range(rounds): temp_med = median(built_temp_list(perfs_lists, op, k)) fl[op][k] = temp_med return fl med_fl_list = built_median_single_list() return med_fl_list def get_op_divisors(lines: List[str]): # a = find_line_index(lines, "NAME") b = find_line_index(lines, "FILTER_MAX_CAPACITY") c = find_line_index(lines, "NUMBER_OF_LOOKUP") # name = lines[a].split()[1] filter_max_cap = int(lines[b].split()[1]) lookup_reps = int(lines[c].split()[1]) return filter_max_cap, lookup_reps def get_raw_all_data(f_list: list): lines_list = [] names_list = [] for temp_file in f_list: temp_lines = get_lines(temp_file) lines_list.append(temp_lines) names_list.append(get_filter_name_from_lines(temp_lines)) perf_lists = [] for temp_lines in lines_list: # temp_perf == [Operation][Round][Run(0);Run(last)] temp_perf = get_all_values_from_perf_list(temp_lines) perf_lists.append(temp_perf) def get_all_diviate_list(l): def ratio(x, denom): assert(denom != 0) return (x-denom)/denom t_med = median(l) fl = [ratio(i, t_med) for i in l] return fl flat_div_list = [] filters_num = len(perf_lists) operation_num = len(perf_lists[0]) rounds = len(perf_lists[0][0]) for op in range(3): for t_filter in range(filters_num): for t_round in range(rounds): temp_l = perf_lists[t_filter][op][t_round] # print(op, t_filter, t_round, end=":\t") # print(temp_l) # if temp_l == 0: temp_res = get_all_diviate_list(temp_l) flat_div_list += temp_res # print() return flat_div_list def final_diver(f_list: list) -> None: flat_diviate = get_raw_all_data(f_list) s_fd = sorted(flat_diviate) above_05 = [i for i in s_fd if abs(i) > 0.005] above_1 = [i for i in s_fd if abs(i) > 0.01] r1 = len(above_1)/len(s_fd) r05 = len(above_05)/len(s_fd) print( "fraction of elements that are at most 1% away from median (in thier category) \t{:<.5f}".format(1-r1)) print( "fraction of elements that are at most 0.5% from median (in thier category)\t{:<.5f}".format(1-r05)) print("min & max diviations:", s_fd[0], s_fd[-1]) def generate_csvs(f_list: list): lines_list = [] names_list = [] for temp_file in f_list: temp_lines = get_lines(temp_file) lines_list.append(temp_lines) names_list.append(get_filter_name_from_lines(temp_lines)) perf_lists = [] for temp_lines in lines_list: temp_perf = [] if (USE_MEDIAN): assert(not USE_AVERAGE) temp_perf = get_median_performance_list(temp_lines) elif (USE_AVERAGE): temp_perf = get_average_performance_list(temp_lines) else: assert(0) perf_lists.append(temp_perf) filter_max_cap, lookup_reps = get_op_divisors(lines_list[0]) rounds_num = len(perf_lists[0][0]) add_step = round(filter_max_cap / rounds_num) find_step = round(lookup_reps / rounds_num) ########################################### ins_list = [temp_perf[0] for temp_perf in perf_lists] uni_list = [temp_perf[1] for temp_perf in perf_lists] yes_list = [temp_perf[2] for temp_perf in perf_lists] ########################################### curr_time = get_time() names = ["add-med-({:}).csv", "uni-med-({:}).csv", "yes-med-({:}).csv"] names = [nm.format(curr_time) for nm in names] f_add = open(names[0], "a") f_uni = open(names[1], "a") f_yes = open(names[2], "a") files = [f_add, f_uni, f_yes] lists = [ins_list, uni_list, yes_list] filters_names = [name_dict(t) for t in names_list] size = len(filters_names) assert size == len(ins_list) for op in range(len(files)): t_file = files[op] t_list = lists[op] for i in range(size): t_file.write(filters_names[i] + ",") temp_line = str(t_list[i])[1:-1] t_file.write(temp_line + "\n") f_add.close() f_uni.close() f_yes.close() def main_helper(path): chosen_files = os.listdir(path) files_list = [os.path.join(path, i) for i in chosen_files if not i.endswith(".csv")] files_list.sort() final_diver(files_list) generate_csvs(files_list) def main(): """ sys.argv[1] = path """ argc: int = len(sys.argv) if argc == 1: path = os.path.abspath(os.getcwd()) path = os.path.join(path, "Inputs") assert os.path.isdir(path) # name = "bench{:}".get_time() main_helper(path) elif argc == 2: path = sys.argv[1] # name = "bench{:}".format(get_time()) main_helper(path) else: print("Too many arguments where given ({:})".format(argc)) main_helper("./Inputs/")