PySDM_examples.Bulenok_2023_MasterThesis.utils

  1import gc
  2import json
  3import os
  4import time
  5
  6import numba
  7import numpy as np
  8from matplotlib import pyplot
  9
 10from PySDM.backends import CPU, GPU
 11
 12
 13class ProductsNames:
 14    super_particle_count = "super_particle_count"
 15    total_volume = "total_volume"
 16    total_number = "total_number"
 17
 18
 19def print_all_products(particulator):
 20    print(
 21        ProductsNames.total_number,
 22        particulator.products[ProductsNames.total_number].get(),
 23    )
 24    print(
 25        ProductsNames.total_volume,
 26        particulator.products[ProductsNames.total_volume].get(),
 27    )
 28    print(
 29        ProductsNames.super_particle_count,
 30        particulator.products[ProductsNames.super_particle_count].get(),
 31    )
 32
 33
 34def get_prod_dict(particulator):
 35    d = {
 36        ProductsNames.total_number: list(
 37            particulator.products[ProductsNames.total_number].get()
 38        ),
 39        ProductsNames.total_volume: list(
 40            particulator.products[ProductsNames.total_volume].get()
 41        ),
 42        ProductsNames.super_particle_count: list(
 43            particulator.products[ProductsNames.super_particle_count].get()
 44        ),
 45    }
 46
 47    return d
 48
 49
 50def measure_time_for_each_step(particulator, n_steps):
 51    particulator.run(steps=1)
 52
 53    res = []
 54    for _ in range(n_steps):
 55        t0 = time.time()
 56        particulator.run(steps=1)
 57        t1 = time.time()
 58
 59        res.append(t1 - t0)
 60
 61    return res
 62
 63
 64def measure_time_per_timestep(particulator, n_steps):
 65    particulator.run(steps=1)
 66
 67    t0 = time.time()
 68    particulator.run(steps=n_steps)
 69    t1 = time.time()
 70
 71    return (t1 - t0) / n_steps
 72
 73
 74def go_benchmark(
 75    setup_sim,
 76    n_sds,
 77    n_steps,
 78    seeds,
 79    *,
 80    numba_n_threads=None,
 81    double_precision=True,
 82    sim_run_filename=None,
 83    total_number=None,
 84    dv=None,
 85    time_measurement_fun=measure_time_per_timestep,
 86    backends=(CPU, GPU),
 87):
 88    products = {}
 89    results = {}
 90
 91    backend_configs = []
 92    if CPU in backends:
 93        cpu_backends_configs = [(CPU, i) for i in numba_n_threads]
 94        backend_configs = [*backend_configs, *cpu_backends_configs]
 95    if GPU in backends:
 96        backend_configs.append((GPU, None))
 97
 98    for backend_class, n_threads in backend_configs:
 99        backend_name = backend_class().__class__.__name__
100        if n_threads:
101            numba.set_num_threads(n_threads)
102            backend_name += "_" + str(numba.get_num_threads())
103
104        results[backend_name] = {}
105        products[backend_name] = {}
106
107        print()
108        print("before")
109
110        for n_sd in n_sds:
111            print("\n")
112            print(backend_name, n_sd)
113
114            results[backend_name][n_sd] = {}
115            products[backend_name][n_sd] = {}
116
117            for seed in seeds:
118                gc.collect()
119
120                particulator = setup_sim(
121                    n_sd,
122                    backend_class,
123                    seed,
124                    double_precision=double_precision,
125                    total_number=total_number,
126                    dv=dv,
127                )
128
129                print()
130                print("products before simulation")
131                print_all_products(particulator)
132
133                print()
134                print("start simulation")
135
136                elapsed_time = time_measurement_fun(particulator, n_steps)
137
138                print()
139                print("products after simulation")
140                print_all_products(particulator)
141
142                results[backend_name][n_sd][seed] = elapsed_time
143                products[backend_name][n_sd][seed] = get_prod_dict(particulator)
144
145                gc.collect()
146                del particulator
147                gc.collect()
148
149    if sim_run_filename:
150        write_to_file(filename=f"{sim_run_filename}-products.txt", d=products)
151
152    return results
153
154
155def process_results(res_d, axis=None):
156    processed_d = {}
157    for backend in res_d.keys():
158        processed_d[backend] = {}
159
160        for n_sd in res_d[backend].keys():
161            processed_d[backend][n_sd] = {}
162
163            vals = res_d[backend][n_sd].values()
164            vals = np.array(list(vals))
165
166            processed_d[backend][n_sd]["mean"] = np.mean(vals, axis=axis)
167            processed_d[backend][n_sd]["std"] = np.std(vals, axis=axis)
168            processed_d[backend][n_sd]["max"] = np.amax(vals, axis=axis)
169            processed_d[backend][n_sd]["min"] = np.amin(vals, axis=axis)
170
171    return processed_d
172
173
174def write_to_file(filename, d):
175    assert not os.path.isfile(filename), filename
176
177    with open(filename, "w", encoding="utf-8") as fp:
178        json.dump(d, fp)
179
180
181class PlottingHelpers:
182    @staticmethod
183    def get_backend_markers(backends):
184        markers = {backend: "o" if "Numba" in backend else "x" for backend in backends}
185        return markers
186
187    @staticmethod
188    def get_sorted_backend_list(processed_d):
189        backends = list(processed_d.keys())
190
191        backends.sort()
192        backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10)
193
194        return backends
195
196    @staticmethod
197    def get_n_sd_list(backends, processed_d):
198        x = []
199
200        for backend in backends:
201            for n_sd in processed_d[backend].keys():
202                if n_sd not in x:
203                    x.append(n_sd)
204
205        x.sort()
206        return x
207
208
209def plot_processed_results(
210    processed_d,
211    *,
212    plot_label="",
213    plot_title=None,
214    metric="min",
215    plot_filename,
216    markers=None,
217    colors=None,
218):
219    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
220
221    if markers is None:
222        markers = PlottingHelpers.get_backend_markers(backends)
223
224    x = PlottingHelpers.get_n_sd_list(backends, processed_d)
225
226    for backend in backends:
227        y = []
228        for n_sd in x:
229            v = processed_d[backend][n_sd][metric]
230            assert isinstance(v, (float, int)), "must be scalar"
231            y.append(v)
232
233        if colors:
234            pyplot.plot(
235                x,
236                y,
237                label=backend + plot_label,
238                marker=markers[backend],
239                color=colors[backend],
240                linewidth=2,
241            )
242        else:
243            pyplot.plot(
244                x, y, label=backend + plot_label, marker=markers[backend], linewidth=2
245            )
246
247    pyplot.legend()
248    pyplot.xscale("log", base=2)
249    pyplot.yscale("log", base=2)
250    pyplot.ylim(bottom=2**-15, top=2**3)
251
252    pyplot.grid()
253    pyplot.xticks(x)
254    pyplot.xlabel("number of super-droplets")
255    pyplot.ylabel("wall time per timestep [s]")
256
257    if plot_title:
258        pyplot.title(plot_title)
259
260    pyplot.savefig(plot_filename)
261
262
263def plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
264    filename = "same_plot.svg"
265    plot_processed_results(coal_d, plot_label="-c", plot_filename=filename)
266    plot_processed_results(break_d, plot_label="-b", plot_filename=filename)
267    plot_processed_results(coal_break_d, plot_label="-cb", plot_filename=filename)
268
269
270def plot_time_per_step(
271    processed_d,
272    n_sd,
273    *,
274    plot_label="",
275    plot_title=None,
276    metric="mean",
277    plot_filename,
278    step_from_to=None,
279):
280    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
281    markers = PlottingHelpers.get_backend_markers(backends)
282
283    for backend in backends:
284        y = processed_d[backend][n_sd][metric]
285        x = np.arange(len(y))
286
287        if step_from_to is not None:
288            x = x[step_from_to[0] : step_from_to[1]]
289            y = y[step_from_to[0] : step_from_to[1]]
290
291        pyplot.plot(x, y, label=backend + plot_label, marker=markers[backend])
292
293    pyplot.legend()
294    pyplot.grid()
295    pyplot.xticks(x)
296    pyplot.xlabel("number of super-droplets")
297    pyplot.ylabel("wall time per timestep [s]")
298
299    if plot_title:
300        pyplot.title(plot_title + f"(n_sd: {n_sd})")
301
302    pyplot.savefig(plot_filename)
class ProductsNames:
14class ProductsNames:
15    super_particle_count = "super_particle_count"
16    total_volume = "total_volume"
17    total_number = "total_number"
super_particle_count = 'super_particle_count'
total_volume = 'total_volume'
total_number = 'total_number'
def get_prod_dict(particulator):
35def get_prod_dict(particulator):
36    d = {
37        ProductsNames.total_number: list(
38            particulator.products[ProductsNames.total_number].get()
39        ),
40        ProductsNames.total_volume: list(
41            particulator.products[ProductsNames.total_volume].get()
42        ),
43        ProductsNames.super_particle_count: list(
44            particulator.products[ProductsNames.super_particle_count].get()
45        ),
46    }
47
48    return d
def measure_time_for_each_step(particulator, n_steps):
51def measure_time_for_each_step(particulator, n_steps):
52    particulator.run(steps=1)
53
54    res = []
55    for _ in range(n_steps):
56        t0 = time.time()
57        particulator.run(steps=1)
58        t1 = time.time()
59
60        res.append(t1 - t0)
61
62    return res
def measure_time_per_timestep(particulator, n_steps):
65def measure_time_per_timestep(particulator, n_steps):
66    particulator.run(steps=1)
67
68    t0 = time.time()
69    particulator.run(steps=n_steps)
70    t1 = time.time()
71
72    return (t1 - t0) / n_steps
def go_benchmark( setup_sim, n_sds, n_steps, seeds, *, numba_n_threads=None, double_precision=True, sim_run_filename=None, total_number=None, dv=None, time_measurement_fun=<function measure_time_per_timestep>, backends=(functools.partial(<function _cached_backend>, backend_class=<class 'PySDM.backends.Numba'>), functools.partial(<function _cached_backend>, backend_class=<class 'PySDM.backends.ThrustRTC'>))):
 75def go_benchmark(
 76    setup_sim,
 77    n_sds,
 78    n_steps,
 79    seeds,
 80    *,
 81    numba_n_threads=None,
 82    double_precision=True,
 83    sim_run_filename=None,
 84    total_number=None,
 85    dv=None,
 86    time_measurement_fun=measure_time_per_timestep,
 87    backends=(CPU, GPU),
 88):
 89    products = {}
 90    results = {}
 91
 92    backend_configs = []
 93    if CPU in backends:
 94        cpu_backends_configs = [(CPU, i) for i in numba_n_threads]
 95        backend_configs = [*backend_configs, *cpu_backends_configs]
 96    if GPU in backends:
 97        backend_configs.append((GPU, None))
 98
 99    for backend_class, n_threads in backend_configs:
100        backend_name = backend_class().__class__.__name__
101        if n_threads:
102            numba.set_num_threads(n_threads)
103            backend_name += "_" + str(numba.get_num_threads())
104
105        results[backend_name] = {}
106        products[backend_name] = {}
107
108        print()
109        print("before")
110
111        for n_sd in n_sds:
112            print("\n")
113            print(backend_name, n_sd)
114
115            results[backend_name][n_sd] = {}
116            products[backend_name][n_sd] = {}
117
118            for seed in seeds:
119                gc.collect()
120
121                particulator = setup_sim(
122                    n_sd,
123                    backend_class,
124                    seed,
125                    double_precision=double_precision,
126                    total_number=total_number,
127                    dv=dv,
128                )
129
130                print()
131                print("products before simulation")
132                print_all_products(particulator)
133
134                print()
135                print("start simulation")
136
137                elapsed_time = time_measurement_fun(particulator, n_steps)
138
139                print()
140                print("products after simulation")
141                print_all_products(particulator)
142
143                results[backend_name][n_sd][seed] = elapsed_time
144                products[backend_name][n_sd][seed] = get_prod_dict(particulator)
145
146                gc.collect()
147                del particulator
148                gc.collect()
149
150    if sim_run_filename:
151        write_to_file(filename=f"{sim_run_filename}-products.txt", d=products)
152
153    return results
def process_results(res_d, axis=None):
156def process_results(res_d, axis=None):
157    processed_d = {}
158    for backend in res_d.keys():
159        processed_d[backend] = {}
160
161        for n_sd in res_d[backend].keys():
162            processed_d[backend][n_sd] = {}
163
164            vals = res_d[backend][n_sd].values()
165            vals = np.array(list(vals))
166
167            processed_d[backend][n_sd]["mean"] = np.mean(vals, axis=axis)
168            processed_d[backend][n_sd]["std"] = np.std(vals, axis=axis)
169            processed_d[backend][n_sd]["max"] = np.amax(vals, axis=axis)
170            processed_d[backend][n_sd]["min"] = np.amin(vals, axis=axis)
171
172    return processed_d
def write_to_file(filename, d):
175def write_to_file(filename, d):
176    assert not os.path.isfile(filename), filename
177
178    with open(filename, "w", encoding="utf-8") as fp:
179        json.dump(d, fp)
class PlottingHelpers:
182class PlottingHelpers:
183    @staticmethod
184    def get_backend_markers(backends):
185        markers = {backend: "o" if "Numba" in backend else "x" for backend in backends}
186        return markers
187
188    @staticmethod
189    def get_sorted_backend_list(processed_d):
190        backends = list(processed_d.keys())
191
192        backends.sort()
193        backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10)
194
195        return backends
196
197    @staticmethod
198    def get_n_sd_list(backends, processed_d):
199        x = []
200
201        for backend in backends:
202            for n_sd in processed_d[backend].keys():
203                if n_sd not in x:
204                    x.append(n_sd)
205
206        x.sort()
207        return x
@staticmethod
def get_backend_markers(backends):
183    @staticmethod
184    def get_backend_markers(backends):
185        markers = {backend: "o" if "Numba" in backend else "x" for backend in backends}
186        return markers
@staticmethod
def get_sorted_backend_list(processed_d):
188    @staticmethod
189    def get_sorted_backend_list(processed_d):
190        backends = list(processed_d.keys())
191
192        backends.sort()
193        backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10)
194
195        return backends
@staticmethod
def get_n_sd_list(backends, processed_d):
197    @staticmethod
198    def get_n_sd_list(backends, processed_d):
199        x = []
200
201        for backend in backends:
202            for n_sd in processed_d[backend].keys():
203                if n_sd not in x:
204                    x.append(n_sd)
205
206        x.sort()
207        return x
def plot_processed_results( processed_d, *, plot_label='', plot_title=None, metric='min', plot_filename, markers=None, colors=None):
210def plot_processed_results(
211    processed_d,
212    *,
213    plot_label="",
214    plot_title=None,
215    metric="min",
216    plot_filename,
217    markers=None,
218    colors=None,
219):
220    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
221
222    if markers is None:
223        markers = PlottingHelpers.get_backend_markers(backends)
224
225    x = PlottingHelpers.get_n_sd_list(backends, processed_d)
226
227    for backend in backends:
228        y = []
229        for n_sd in x:
230            v = processed_d[backend][n_sd][metric]
231            assert isinstance(v, (float, int)), "must be scalar"
232            y.append(v)
233
234        if colors:
235            pyplot.plot(
236                x,
237                y,
238                label=backend + plot_label,
239                marker=markers[backend],
240                color=colors[backend],
241                linewidth=2,
242            )
243        else:
244            pyplot.plot(
245                x, y, label=backend + plot_label, marker=markers[backend], linewidth=2
246            )
247
248    pyplot.legend()
249    pyplot.xscale("log", base=2)
250    pyplot.yscale("log", base=2)
251    pyplot.ylim(bottom=2**-15, top=2**3)
252
253    pyplot.grid()
254    pyplot.xticks(x)
255    pyplot.xlabel("number of super-droplets")
256    pyplot.ylabel("wall time per timestep [s]")
257
258    if plot_title:
259        pyplot.title(plot_title)
260
261    pyplot.savefig(plot_filename)
def plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
264def plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
265    filename = "same_plot.svg"
266    plot_processed_results(coal_d, plot_label="-c", plot_filename=filename)
267    plot_processed_results(break_d, plot_label="-b", plot_filename=filename)
268    plot_processed_results(coal_break_d, plot_label="-cb", plot_filename=filename)
def plot_time_per_step( processed_d, n_sd, *, plot_label='', plot_title=None, metric='mean', plot_filename, step_from_to=None):
271def plot_time_per_step(
272    processed_d,
273    n_sd,
274    *,
275    plot_label="",
276    plot_title=None,
277    metric="mean",
278    plot_filename,
279    step_from_to=None,
280):
281    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
282    markers = PlottingHelpers.get_backend_markers(backends)
283
284    for backend in backends:
285        y = processed_d[backend][n_sd][metric]
286        x = np.arange(len(y))
287
288        if step_from_to is not None:
289            x = x[step_from_to[0] : step_from_to[1]]
290            y = y[step_from_to[0] : step_from_to[1]]
291
292        pyplot.plot(x, y, label=backend + plot_label, marker=markers[backend])
293
294    pyplot.legend()
295    pyplot.grid()
296    pyplot.xticks(x)
297    pyplot.xlabel("number of super-droplets")
298    pyplot.ylabel("wall time per timestep [s]")
299
300    if plot_title:
301        pyplot.title(plot_title + f"(n_sd: {n_sd})")
302
303    pyplot.savefig(plot_filename)