Benchmark results#

Note

The implementation used to demonstrate a distributed computing strategy was done using a straightforward, easy to follow concept. The best possible performance depends on a variety of factors. Most dominant is the network bandwidth in this case. To achieve the best possible results, take extra care to minimize the amount of data transfer and keep the managment overhead as low as possible.

view_names = ["base", "slight01", "medium01"]
host_name = "picm00"
engine_names = ["naive", "dask-numpy"]
Hide code cell source
from pathlib import Path
import json

import ipywidgets as ipw
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

sns.set()
plt.rcParams['figure.dpi'] = 300
    

results_dir = Path("/Volumes/projects/mandelbrot/results")

results = []
for results_file in results_dir.glob("*/*/*.json"):

    with results_file.open("r") as fd:
        result = json.loads(fd.read())
        results.append(result)
        
df = pd.DataFrame(results)

dfs = df[
    (df["view"].isin(view_names))
    & (df["hostname"] == host_name)
    & (df["engine"].isin(engine_names))
].reset_index(drop=True)
print(f"We have {len(dfs)} results to analyze.")

tabs = ipw.Tab()

tabs_children = []
tab_names = []
for view_name, _dfs in dfs.groupby("view"):
    tab_names.append(view_name)
    with plt.ioff():
        output = ipw.Output()
        with output:
            fig, axes = plt.subplots()
            sns.violinplot(data=_dfs, x="engine", y="calculation_time", order=engine_names, ax=axes)
            axes.set_ylim(0, None)
            axes.set_ylabel("Calculation time [s]")
            axes.set_xlabel(None)

            axes.set_xticklabels(["\n" * (x_position%2) + label.get_text() for x_position, label in enumerate(axes.get_xticklabels())])
            axes.set_title(view_name)

            plt.tight_layout()
            plt.show()
        tabs_children.append(output)
        
tabs.children = tabs_children
for index, tab_name in enumerate(tab_names):
    tabs.set_title(index, tab_name)

tabs
We have 102 results to analyze.