PySDM_examples.Bulenok_2023_MasterThesis.utils

  1import gc
  2import json
  3import os
  4import time
  5
  6import numba
  7import numpy as np
  8from matplotlib import pyplot
  9from open_atmos_jupyter_utils import show_plot
 10
 11from PySDM.backends import CPU, GPU
 12
 13
 14class ProductsNames:
 15    super_particle_count = "super_particle_count"
 16    total_volume = "total_volume"
 17    total_number = "total_number"
 18
 19
 20def print_all_products(particulator):
 21    print(
 22        ProductsNames.total_number,
 23        particulator.products[ProductsNames.total_number].get(),
 24    )
 25    print(
 26        ProductsNames.total_volume,
 27        particulator.products[ProductsNames.total_volume].get(),
 28    )
 29    print(
 30        ProductsNames.super_particle_count,
 31        particulator.products[ProductsNames.super_particle_count].get(),
 32    )
 33
 34
 35def get_prod_dict(particulator):
 36    d = {
 37        ProductsNames.total_number: list(
 38            particulator.products[ProductsNames.total_number].get()
 39        ),
 40        ProductsNames.total_volume: list(
 41            particulator.products[ProductsNames.total_volume].get()
 42        ),
 43        ProductsNames.super_particle_count: list(
 44            particulator.products[ProductsNames.super_particle_count].get()
 45        ),
 46    }
 47
 48    return d
 49
 50
 51def measure_time_for_each_step(particulator, n_steps):
 52    particulator.run(steps=1)
 53
 54    res = []
 55    for _ in range(n_steps):
 56        t0 = time.time()
 57        particulator.run(steps=1)
 58        t1 = time.time()
 59
 60        res.append(t1 - t0)
 61
 62    return res
 63
 64
 65def measure_time_per_timestep(particulator, n_steps):
 66    particulator.run(steps=1)
 67
 68    t0 = time.time()
 69    particulator.run(steps=n_steps)
 70    t1 = time.time()
 71
 72    return (t1 - t0) / n_steps
 73
 74
 75def go_benchmark(
 76    setup_sim,
 77    n_sds,
 78    n_steps,
 79    seeds,
 80    numba_n_threads=None,
 81    double_precision=True,
 82    sim_run_filename=None,
 83    total_number=None,
 84    dv=None,
 85    time_measurement_fun=measure_time_per_timestep,
 86    backends=(CPU, GPU),
 87):
 88    products = {}
 89    results = {}
 90
 91    backend_configs = []
 92    if CPU in backends:
 93        cpu_backends_configs = [(CPU, i) for i in numba_n_threads]
 94        backend_configs = [*backend_configs, *cpu_backends_configs]
 95    if GPU in backends:
 96        backend_configs.append((GPU, None))
 97
 98    for backend_class, n_threads in backend_configs:
 99        backend_name = backend_class.__name__
100        if n_threads:
101            numba.set_num_threads(n_threads)
102            backend_name += "_" + str(numba.get_num_threads())
103
104        results[backend_name] = {}
105        products[backend_name] = {}
106
107        print()
108        print("before")
109
110        for n_sd in n_sds:
111            print("\n")
112            print(backend_name, n_sd)
113
114            results[backend_name][n_sd] = {}
115            products[backend_name][n_sd] = {}
116
117            for seed in seeds:
118                gc.collect()
119
120                particulator = setup_sim(
121                    n_sd,
122                    backend_class,
123                    seed,
124                    double_precision=double_precision,
125                    total_number=total_number,
126                    dv=dv,
127                )
128
129                print()
130                print("products before simulation")
131                print_all_products(particulator)
132
133                print()
134                print("start simulation")
135
136                elapsed_time = time_measurement_fun(particulator, n_steps)
137
138                print()
139                print("products after simulation")
140                print_all_products(particulator)
141
142                results[backend_name][n_sd][seed] = elapsed_time
143                products[backend_name][n_sd][seed] = get_prod_dict(particulator)
144
145                gc.collect()
146                del particulator
147                gc.collect()
148
149    if sim_run_filename:
150        write_to_file(filename=f"{sim_run_filename}-products.txt", d=products)
151
152    return results
153
154
155def process_results(res_d, axis=None):
156    processed_d = {}
157    for backend in res_d.keys():
158        processed_d[backend] = {}
159
160        for n_sd in res_d[backend].keys():
161            processed_d[backend][n_sd] = {}
162
163            vals = res_d[backend][n_sd].values()
164            vals = np.array(list(vals))
165
166            processed_d[backend][n_sd]["mean"] = np.mean(vals, axis=axis)
167            processed_d[backend][n_sd]["std"] = np.std(vals, axis=axis)
168            processed_d[backend][n_sd]["max"] = np.amax(vals, axis=axis)
169            processed_d[backend][n_sd]["min"] = np.amin(vals, axis=axis)
170
171    return processed_d
172
173
174def write_to_file(filename, d):
175    assert not os.path.isfile(filename), filename
176
177    with open(filename, "w", encoding="utf-8") as fp:
178        json.dump(d, fp)
179
180
181class PlottingHelpers:
182    @staticmethod
183    def get_backend_markers(backends):
184        markers = {backend: "o" if "Numba" in backend else "x" for backend in backends}
185        return markers
186
187    @staticmethod
188    def get_sorted_backend_list(processed_d):
189        backends = list(processed_d.keys())
190
191        backends.sort()
192        backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10)
193
194        return backends
195
196    @staticmethod
197    def get_n_sd_list(backends, processed_d):
198        x = []
199
200        for backend in backends:
201            for n_sd in processed_d[backend].keys():
202                if n_sd not in x:
203                    x.append(n_sd)
204
205        x.sort()
206        return x
207
208
209def plot_processed_results(
210    processed_d,
211    show=True,
212    plot_label="",
213    plot_title=None,
214    metric="min",
215    plot_filename=None,
216    markers=None,
217    colors=None,
218):
219    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
220
221    if markers is None:
222        markers = PlottingHelpers.get_backend_markers(backends)
223
224    x = PlottingHelpers.get_n_sd_list(backends, processed_d)
225
226    for backend in backends:
227        y = []
228        for n_sd in x:
229            v = processed_d[backend][n_sd][metric]
230            assert isinstance(v, (float, int)), "must be scalar"
231            y.append(v)
232
233        if colors:
234            pyplot.plot(
235                x,
236                y,
237                label=backend + plot_label,
238                marker=markers[backend],
239                color=colors[backend],
240                linewidth=2,
241            )
242        else:
243            pyplot.plot(
244                x, y, label=backend + plot_label, marker=markers[backend], linewidth=2
245            )
246
247    pyplot.legend()
248    pyplot.xscale("log", base=2)
249    pyplot.yscale("log", base=2)
250    pyplot.ylim(bottom=2**-15, top=2**3)
251
252    pyplot.grid()
253    pyplot.xticks(x)
254    pyplot.xlabel("number of super-droplets")
255    pyplot.ylabel("wall time per timestep [s]")
256
257    if plot_title:
258        pyplot.title(plot_title)
259
260    if show:
261        if plot_filename:
262            show_plot(filename=plot_filename)
263        else:
264            pyplot.show()
265
266
267def plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
268    plot_processed_results(coal_d, plot_label="-c", show=False)
269    plot_processed_results(break_d, plot_label="-b", show=False)
270    plot_processed_results(coal_break_d, plot_label="-cb", show=False)
271
272    show_plot()
273
274
275def plot_time_per_step(
276    processed_d,
277    n_sd,
278    show=True,
279    plot_label="",
280    plot_title=None,
281    metric="mean",
282    plot_filename=None,
283    step_from_to=None,
284):
285    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
286
287    markers = PlottingHelpers.get_backend_markers(backends)
288
289    for backend in backends:
290        y = processed_d[backend][n_sd][metric]
291        x = np.arange(len(y))
292
293        if step_from_to is not None:
294            x = x[step_from_to[0] : step_from_to[1]]
295            y = y[step_from_to[0] : step_from_to[1]]
296
297        pyplot.plot(x, y, label=backend + plot_label, marker=markers[backend])
298
299    pyplot.legend()
300    pyplot.grid()
301    pyplot.xticks(x)
302    pyplot.xlabel("number of super-droplets")
303    pyplot.ylabel("wall time per timestep [s]")
304
305    if plot_title:
306        pyplot.title(plot_title + f"(n_sd: {n_sd})")
307
308    if show:
309        if plot_filename:
310            show_plot(filename=plot_filename)
311        else:
312            pyplot.show()
class ProductsNames:
15class ProductsNames:
16    super_particle_count = "super_particle_count"
17    total_volume = "total_volume"
18    total_number = "total_number"
super_particle_count = 'super_particle_count'
total_volume = 'total_volume'
total_number = 'total_number'
def get_prod_dict(particulator):
36def get_prod_dict(particulator):
37    d = {
38        ProductsNames.total_number: list(
39            particulator.products[ProductsNames.total_number].get()
40        ),
41        ProductsNames.total_volume: list(
42            particulator.products[ProductsNames.total_volume].get()
43        ),
44        ProductsNames.super_particle_count: list(
45            particulator.products[ProductsNames.super_particle_count].get()
46        ),
47    }
48
49    return d
def measure_time_for_each_step(particulator, n_steps):
52def measure_time_for_each_step(particulator, n_steps):
53    particulator.run(steps=1)
54
55    res = []
56    for _ in range(n_steps):
57        t0 = time.time()
58        particulator.run(steps=1)
59        t1 = time.time()
60
61        res.append(t1 - t0)
62
63    return res
def measure_time_per_timestep(particulator, n_steps):
66def measure_time_per_timestep(particulator, n_steps):
67    particulator.run(steps=1)
68
69    t0 = time.time()
70    particulator.run(steps=n_steps)
71    t1 = time.time()
72
73    return (t1 - t0) / n_steps
def go_benchmark( setup_sim, n_sds, n_steps, seeds, numba_n_threads=None, double_precision=True, sim_run_filename=None, total_number=None, dv=None, time_measurement_fun=<function measure_time_per_timestep>, backends=(<class 'PySDM.backends.numba.Numba'>, <class 'PySDM.backends.thrust_rtc.ThrustRTC'>)):
 76def go_benchmark(
 77    setup_sim,
 78    n_sds,
 79    n_steps,
 80    seeds,
 81    numba_n_threads=None,
 82    double_precision=True,
 83    sim_run_filename=None,
 84    total_number=None,
 85    dv=None,
 86    time_measurement_fun=measure_time_per_timestep,
 87    backends=(CPU, GPU),
 88):
 89    products = {}
 90    results = {}
 91
 92    backend_configs = []
 93    if CPU in backends:
 94        cpu_backends_configs = [(CPU, i) for i in numba_n_threads]
 95        backend_configs = [*backend_configs, *cpu_backends_configs]
 96    if GPU in backends:
 97        backend_configs.append((GPU, None))
 98
 99    for backend_class, n_threads in backend_configs:
100        backend_name = backend_class.__name__
101        if n_threads:
102            numba.set_num_threads(n_threads)
103            backend_name += "_" + str(numba.get_num_threads())
104
105        results[backend_name] = {}
106        products[backend_name] = {}
107
108        print()
109        print("before")
110
111        for n_sd in n_sds:
112            print("\n")
113            print(backend_name, n_sd)
114
115            results[backend_name][n_sd] = {}
116            products[backend_name][n_sd] = {}
117
118            for seed in seeds:
119                gc.collect()
120
121                particulator = setup_sim(
122                    n_sd,
123                    backend_class,
124                    seed,
125                    double_precision=double_precision,
126                    total_number=total_number,
127                    dv=dv,
128                )
129
130                print()
131                print("products before simulation")
132                print_all_products(particulator)
133
134                print()
135                print("start simulation")
136
137                elapsed_time = time_measurement_fun(particulator, n_steps)
138
139                print()
140                print("products after simulation")
141                print_all_products(particulator)
142
143                results[backend_name][n_sd][seed] = elapsed_time
144                products[backend_name][n_sd][seed] = get_prod_dict(particulator)
145
146                gc.collect()
147                del particulator
148                gc.collect()
149
150    if sim_run_filename:
151        write_to_file(filename=f"{sim_run_filename}-products.txt", d=products)
152
153    return results
def process_results(res_d, axis=None):
156def process_results(res_d, axis=None):
157    processed_d = {}
158    for backend in res_d.keys():
159        processed_d[backend] = {}
160
161        for n_sd in res_d[backend].keys():
162            processed_d[backend][n_sd] = {}
163
164            vals = res_d[backend][n_sd].values()
165            vals = np.array(list(vals))
166
167            processed_d[backend][n_sd]["mean"] = np.mean(vals, axis=axis)
168            processed_d[backend][n_sd]["std"] = np.std(vals, axis=axis)
169            processed_d[backend][n_sd]["max"] = np.amax(vals, axis=axis)
170            processed_d[backend][n_sd]["min"] = np.amin(vals, axis=axis)
171
172    return processed_d
def write_to_file(filename, d):
175def write_to_file(filename, d):
176    assert not os.path.isfile(filename), filename
177
178    with open(filename, "w", encoding="utf-8") as fp:
179        json.dump(d, fp)
class PlottingHelpers:
182class PlottingHelpers:
183    @staticmethod
184    def get_backend_markers(backends):
185        markers = {backend: "o" if "Numba" in backend else "x" for backend in backends}
186        return markers
187
188    @staticmethod
189    def get_sorted_backend_list(processed_d):
190        backends = list(processed_d.keys())
191
192        backends.sort()
193        backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10)
194
195        return backends
196
197    @staticmethod
198    def get_n_sd_list(backends, processed_d):
199        x = []
200
201        for backend in backends:
202            for n_sd in processed_d[backend].keys():
203                if n_sd not in x:
204                    x.append(n_sd)
205
206        x.sort()
207        return x
@staticmethod
def get_backend_markers(backends):
183    @staticmethod
184    def get_backend_markers(backends):
185        markers = {backend: "o" if "Numba" in backend else "x" for backend in backends}
186        return markers
@staticmethod
def get_sorted_backend_list(processed_d):
188    @staticmethod
189    def get_sorted_backend_list(processed_d):
190        backends = list(processed_d.keys())
191
192        backends.sort()
193        backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10)
194
195        return backends
@staticmethod
def get_n_sd_list(backends, processed_d):
197    @staticmethod
198    def get_n_sd_list(backends, processed_d):
199        x = []
200
201        for backend in backends:
202            for n_sd in processed_d[backend].keys():
203                if n_sd not in x:
204                    x.append(n_sd)
205
206        x.sort()
207        return x
def plot_processed_results( processed_d, show=True, plot_label='', plot_title=None, metric='min', plot_filename=None, markers=None, colors=None):
210def plot_processed_results(
211    processed_d,
212    show=True,
213    plot_label="",
214    plot_title=None,
215    metric="min",
216    plot_filename=None,
217    markers=None,
218    colors=None,
219):
220    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
221
222    if markers is None:
223        markers = PlottingHelpers.get_backend_markers(backends)
224
225    x = PlottingHelpers.get_n_sd_list(backends, processed_d)
226
227    for backend in backends:
228        y = []
229        for n_sd in x:
230            v = processed_d[backend][n_sd][metric]
231            assert isinstance(v, (float, int)), "must be scalar"
232            y.append(v)
233
234        if colors:
235            pyplot.plot(
236                x,
237                y,
238                label=backend + plot_label,
239                marker=markers[backend],
240                color=colors[backend],
241                linewidth=2,
242            )
243        else:
244            pyplot.plot(
245                x, y, label=backend + plot_label, marker=markers[backend], linewidth=2
246            )
247
248    pyplot.legend()
249    pyplot.xscale("log", base=2)
250    pyplot.yscale("log", base=2)
251    pyplot.ylim(bottom=2**-15, top=2**3)
252
253    pyplot.grid()
254    pyplot.xticks(x)
255    pyplot.xlabel("number of super-droplets")
256    pyplot.ylabel("wall time per timestep [s]")
257
258    if plot_title:
259        pyplot.title(plot_title)
260
261    if show:
262        if plot_filename:
263            show_plot(filename=plot_filename)
264        else:
265            pyplot.show()
def plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
268def plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
269    plot_processed_results(coal_d, plot_label="-c", show=False)
270    plot_processed_results(break_d, plot_label="-b", show=False)
271    plot_processed_results(coal_break_d, plot_label="-cb", show=False)
272
273    show_plot()
def plot_time_per_step( processed_d, n_sd, show=True, plot_label='', plot_title=None, metric='mean', plot_filename=None, step_from_to=None):
276def plot_time_per_step(
277    processed_d,
278    n_sd,
279    show=True,
280    plot_label="",
281    plot_title=None,
282    metric="mean",
283    plot_filename=None,
284    step_from_to=None,
285):
286    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
287
288    markers = PlottingHelpers.get_backend_markers(backends)
289
290    for backend in backends:
291        y = processed_d[backend][n_sd][metric]
292        x = np.arange(len(y))
293
294        if step_from_to is not None:
295            x = x[step_from_to[0] : step_from_to[1]]
296            y = y[step_from_to[0] : step_from_to[1]]
297
298        pyplot.plot(x, y, label=backend + plot_label, marker=markers[backend])
299
300    pyplot.legend()
301    pyplot.grid()
302    pyplot.xticks(x)
303    pyplot.xlabel("number of super-droplets")
304    pyplot.ylabel("wall time per timestep [s]")
305
306    if plot_title:
307        pyplot.title(plot_title + f"(n_sd: {n_sd})")
308
309    if show:
310        if plot_filename:
311            show_plot(filename=plot_filename)
312        else:
313            pyplot.show()