PySDM_examples.Bulenok_2023_MasterThesis.utils

  1import gc
  2import json
  3import os
  4import time
  5
  6import numba
  7import numpy as np
  8from matplotlib import pyplot
  9
 10from PySDM.backends import CPU, GPU
 11
 12
 13class ProductsNames:
 14    super_particle_count = "super_particle_count"
 15    total_volume = "total_volume"
 16    total_number = "total_number"
 17
 18
 19def print_all_products(particulator):
 20    print(
 21        ProductsNames.total_number,
 22        particulator.products[ProductsNames.total_number].get(),
 23    )
 24    print(
 25        ProductsNames.total_volume,
 26        particulator.products[ProductsNames.total_volume].get(),
 27    )
 28    print(
 29        ProductsNames.super_particle_count,
 30        particulator.products[ProductsNames.super_particle_count].get(),
 31    )
 32
 33
 34def get_prod_dict(particulator):
 35    d = {
 36        ProductsNames.total_number: list(
 37            particulator.products[ProductsNames.total_number].get()
 38        ),
 39        ProductsNames.total_volume: list(
 40            particulator.products[ProductsNames.total_volume].get()
 41        ),
 42        ProductsNames.super_particle_count: list(
 43            particulator.products[ProductsNames.super_particle_count].get()
 44        ),
 45    }
 46
 47    return d
 48
 49
 50def measure_time_for_each_step(particulator, n_steps):
 51    particulator.run(steps=1)
 52
 53    res = []
 54    for _ in range(n_steps):
 55        t0 = time.time()
 56        particulator.run(steps=1)
 57        t1 = time.time()
 58
 59        res.append(t1 - t0)
 60
 61    return res
 62
 63
 64def measure_time_per_timestep(particulator, n_steps):
 65    particulator.run(steps=1)
 66
 67    t0 = time.time()
 68    particulator.run(steps=n_steps)
 69    t1 = time.time()
 70
 71    return (t1 - t0) / n_steps
 72
 73
 74def go_benchmark(
 75    setup_sim,
 76    n_sds,
 77    n_steps,
 78    seeds,
 79    numba_n_threads=None,
 80    double_precision=True,
 81    sim_run_filename=None,
 82    total_number=None,
 83    dv=None,
 84    time_measurement_fun=measure_time_per_timestep,
 85    backends=(CPU, GPU),
 86):
 87    products = {}
 88    results = {}
 89
 90    backend_configs = []
 91    if CPU in backends:
 92        cpu_backends_configs = [(CPU, i) for i in numba_n_threads]
 93        backend_configs = [*backend_configs, *cpu_backends_configs]
 94    if GPU in backends:
 95        backend_configs.append((GPU, None))
 96
 97    for backend_class, n_threads in backend_configs:
 98        backend_name = backend_class.__name__
 99        if n_threads:
100            numba.set_num_threads(n_threads)
101            backend_name += "_" + str(numba.get_num_threads())
102
103        results[backend_name] = {}
104        products[backend_name] = {}
105
106        print()
107        print("before")
108
109        for n_sd in n_sds:
110            print("\n")
111            print(backend_name, n_sd)
112
113            results[backend_name][n_sd] = {}
114            products[backend_name][n_sd] = {}
115
116            for seed in seeds:
117                gc.collect()
118
119                particulator = setup_sim(
120                    n_sd,
121                    backend_class,
122                    seed,
123                    double_precision=double_precision,
124                    total_number=total_number,
125                    dv=dv,
126                )
127
128                print()
129                print("products before simulation")
130                print_all_products(particulator)
131
132                print()
133                print("start simulation")
134
135                elapsed_time = time_measurement_fun(particulator, n_steps)
136
137                print()
138                print("products after simulation")
139                print_all_products(particulator)
140
141                results[backend_name][n_sd][seed] = elapsed_time
142                products[backend_name][n_sd][seed] = get_prod_dict(particulator)
143
144                gc.collect()
145                del particulator
146                gc.collect()
147
148    if sim_run_filename:
149        write_to_file(filename=f"{sim_run_filename}-products.txt", d=products)
150
151    return results
152
153
154def process_results(res_d, axis=None):
155    processed_d = {}
156    for backend in res_d.keys():
157        processed_d[backend] = {}
158
159        for n_sd in res_d[backend].keys():
160            processed_d[backend][n_sd] = {}
161
162            vals = res_d[backend][n_sd].values()
163            vals = np.array(list(vals))
164
165            processed_d[backend][n_sd]["mean"] = np.mean(vals, axis=axis)
166            processed_d[backend][n_sd]["std"] = np.std(vals, axis=axis)
167            processed_d[backend][n_sd]["max"] = np.amax(vals, axis=axis)
168            processed_d[backend][n_sd]["min"] = np.amin(vals, axis=axis)
169
170    return processed_d
171
172
173def write_to_file(filename, d):
174    assert not os.path.isfile(filename), filename
175
176    with open(filename, "w", encoding="utf-8") as fp:
177        json.dump(d, fp)
178
179
180class PlottingHelpers:
181    @staticmethod
182    def get_backend_markers(backends):
183        markers = {backend: "o" if "Numba" in backend else "x" for backend in backends}
184        return markers
185
186    @staticmethod
187    def get_sorted_backend_list(processed_d):
188        backends = list(processed_d.keys())
189
190        backends.sort()
191        backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10)
192
193        return backends
194
195    @staticmethod
196    def get_n_sd_list(backends, processed_d):
197        x = []
198
199        for backend in backends:
200            for n_sd in processed_d[backend].keys():
201                if n_sd not in x:
202                    x.append(n_sd)
203
204        x.sort()
205        return x
206
207
208def plot_processed_results(
209    processed_d,
210    *,
211    plot_label="",
212    plot_title=None,
213    metric="min",
214    plot_filename,
215    markers=None,
216    colors=None,
217):
218    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
219
220    if markers is None:
221        markers = PlottingHelpers.get_backend_markers(backends)
222
223    x = PlottingHelpers.get_n_sd_list(backends, processed_d)
224
225    for backend in backends:
226        y = []
227        for n_sd in x:
228            v = processed_d[backend][n_sd][metric]
229            assert isinstance(v, (float, int)), "must be scalar"
230            y.append(v)
231
232        if colors:
233            pyplot.plot(
234                x,
235                y,
236                label=backend + plot_label,
237                marker=markers[backend],
238                color=colors[backend],
239                linewidth=2,
240            )
241        else:
242            pyplot.plot(
243                x, y, label=backend + plot_label, marker=markers[backend], linewidth=2
244            )
245
246    pyplot.legend()
247    pyplot.xscale("log", base=2)
248    pyplot.yscale("log", base=2)
249    pyplot.ylim(bottom=2**-15, top=2**3)
250
251    pyplot.grid()
252    pyplot.xticks(x)
253    pyplot.xlabel("number of super-droplets")
254    pyplot.ylabel("wall time per timestep [s]")
255
256    if plot_title:
257        pyplot.title(plot_title)
258
259    pyplot.savefig(plot_filename)
260
261
262def plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
263    filename = "same_plot.svg"
264    plot_processed_results(coal_d, plot_label="-c", plot_filename=filename)
265    plot_processed_results(break_d, plot_label="-b", plot_filename=filename)
266    plot_processed_results(coal_break_d, plot_label="-cb", plot_filename=filename)
267
268
269def plot_time_per_step(
270    processed_d,
271    n_sd,
272    *,
273    plot_label="",
274    plot_title=None,
275    metric="mean",
276    plot_filename,
277    step_from_to=None,
278):
279    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
280    markers = PlottingHelpers.get_backend_markers(backends)
281
282    for backend in backends:
283        y = processed_d[backend][n_sd][metric]
284        x = np.arange(len(y))
285
286        if step_from_to is not None:
287            x = x[step_from_to[0] : step_from_to[1]]
288            y = y[step_from_to[0] : step_from_to[1]]
289
290        pyplot.plot(x, y, label=backend + plot_label, marker=markers[backend])
291
292    pyplot.legend()
293    pyplot.grid()
294    pyplot.xticks(x)
295    pyplot.xlabel("number of super-droplets")
296    pyplot.ylabel("wall time per timestep [s]")
297
298    if plot_title:
299        pyplot.title(plot_title + f"(n_sd: {n_sd})")
300
301    pyplot.savefig(plot_filename)
class ProductsNames:
14class ProductsNames:
15    super_particle_count = "super_particle_count"
16    total_volume = "total_volume"
17    total_number = "total_number"
super_particle_count = 'super_particle_count'
total_volume = 'total_volume'
total_number = 'total_number'
def get_prod_dict(particulator):
35def get_prod_dict(particulator):
36    d = {
37        ProductsNames.total_number: list(
38            particulator.products[ProductsNames.total_number].get()
39        ),
40        ProductsNames.total_volume: list(
41            particulator.products[ProductsNames.total_volume].get()
42        ),
43        ProductsNames.super_particle_count: list(
44            particulator.products[ProductsNames.super_particle_count].get()
45        ),
46    }
47
48    return d
def measure_time_for_each_step(particulator, n_steps):
51def measure_time_for_each_step(particulator, n_steps):
52    particulator.run(steps=1)
53
54    res = []
55    for _ in range(n_steps):
56        t0 = time.time()
57        particulator.run(steps=1)
58        t1 = time.time()
59
60        res.append(t1 - t0)
61
62    return res
def measure_time_per_timestep(particulator, n_steps):
65def measure_time_per_timestep(particulator, n_steps):
66    particulator.run(steps=1)
67
68    t0 = time.time()
69    particulator.run(steps=n_steps)
70    t1 = time.time()
71
72    return (t1 - t0) / n_steps
def go_benchmark( setup_sim, n_sds, n_steps, seeds, numba_n_threads=None, double_precision=True, sim_run_filename=None, total_number=None, dv=None, time_measurement_fun=<function measure_time_per_timestep>, backends=(<class 'PySDM.backends.numba.Numba'>, <class 'PySDM.backends.thrust_rtc.ThrustRTC'>)):
 75def go_benchmark(
 76    setup_sim,
 77    n_sds,
 78    n_steps,
 79    seeds,
 80    numba_n_threads=None,
 81    double_precision=True,
 82    sim_run_filename=None,
 83    total_number=None,
 84    dv=None,
 85    time_measurement_fun=measure_time_per_timestep,
 86    backends=(CPU, GPU),
 87):
 88    products = {}
 89    results = {}
 90
 91    backend_configs = []
 92    if CPU in backends:
 93        cpu_backends_configs = [(CPU, i) for i in numba_n_threads]
 94        backend_configs = [*backend_configs, *cpu_backends_configs]
 95    if GPU in backends:
 96        backend_configs.append((GPU, None))
 97
 98    for backend_class, n_threads in backend_configs:
 99        backend_name = backend_class.__name__
100        if n_threads:
101            numba.set_num_threads(n_threads)
102            backend_name += "_" + str(numba.get_num_threads())
103
104        results[backend_name] = {}
105        products[backend_name] = {}
106
107        print()
108        print("before")
109
110        for n_sd in n_sds:
111            print("\n")
112            print(backend_name, n_sd)
113
114            results[backend_name][n_sd] = {}
115            products[backend_name][n_sd] = {}
116
117            for seed in seeds:
118                gc.collect()
119
120                particulator = setup_sim(
121                    n_sd,
122                    backend_class,
123                    seed,
124                    double_precision=double_precision,
125                    total_number=total_number,
126                    dv=dv,
127                )
128
129                print()
130                print("products before simulation")
131                print_all_products(particulator)
132
133                print()
134                print("start simulation")
135
136                elapsed_time = time_measurement_fun(particulator, n_steps)
137
138                print()
139                print("products after simulation")
140                print_all_products(particulator)
141
142                results[backend_name][n_sd][seed] = elapsed_time
143                products[backend_name][n_sd][seed] = get_prod_dict(particulator)
144
145                gc.collect()
146                del particulator
147                gc.collect()
148
149    if sim_run_filename:
150        write_to_file(filename=f"{sim_run_filename}-products.txt", d=products)
151
152    return results
def process_results(res_d, axis=None):
155def process_results(res_d, axis=None):
156    processed_d = {}
157    for backend in res_d.keys():
158        processed_d[backend] = {}
159
160        for n_sd in res_d[backend].keys():
161            processed_d[backend][n_sd] = {}
162
163            vals = res_d[backend][n_sd].values()
164            vals = np.array(list(vals))
165
166            processed_d[backend][n_sd]["mean"] = np.mean(vals, axis=axis)
167            processed_d[backend][n_sd]["std"] = np.std(vals, axis=axis)
168            processed_d[backend][n_sd]["max"] = np.amax(vals, axis=axis)
169            processed_d[backend][n_sd]["min"] = np.amin(vals, axis=axis)
170
171    return processed_d
def write_to_file(filename, d):
174def write_to_file(filename, d):
175    assert not os.path.isfile(filename), filename
176
177    with open(filename, "w", encoding="utf-8") as fp:
178        json.dump(d, fp)
class PlottingHelpers:
181class PlottingHelpers:
182    @staticmethod
183    def get_backend_markers(backends):
184        markers = {backend: "o" if "Numba" in backend else "x" for backend in backends}
185        return markers
186
187    @staticmethod
188    def get_sorted_backend_list(processed_d):
189        backends = list(processed_d.keys())
190
191        backends.sort()
192        backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10)
193
194        return backends
195
196    @staticmethod
197    def get_n_sd_list(backends, processed_d):
198        x = []
199
200        for backend in backends:
201            for n_sd in processed_d[backend].keys():
202                if n_sd not in x:
203                    x.append(n_sd)
204
205        x.sort()
206        return x
@staticmethod
def get_backend_markers(backends):
182    @staticmethod
183    def get_backend_markers(backends):
184        markers = {backend: "o" if "Numba" in backend else "x" for backend in backends}
185        return markers
@staticmethod
def get_sorted_backend_list(processed_d):
187    @staticmethod
188    def get_sorted_backend_list(processed_d):
189        backends = list(processed_d.keys())
190
191        backends.sort()
192        backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10)
193
194        return backends
@staticmethod
def get_n_sd_list(backends, processed_d):
196    @staticmethod
197    def get_n_sd_list(backends, processed_d):
198        x = []
199
200        for backend in backends:
201            for n_sd in processed_d[backend].keys():
202                if n_sd not in x:
203                    x.append(n_sd)
204
205        x.sort()
206        return x
def plot_processed_results( processed_d, *, plot_label='', plot_title=None, metric='min', plot_filename, markers=None, colors=None):
209def plot_processed_results(
210    processed_d,
211    *,
212    plot_label="",
213    plot_title=None,
214    metric="min",
215    plot_filename,
216    markers=None,
217    colors=None,
218):
219    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
220
221    if markers is None:
222        markers = PlottingHelpers.get_backend_markers(backends)
223
224    x = PlottingHelpers.get_n_sd_list(backends, processed_d)
225
226    for backend in backends:
227        y = []
228        for n_sd in x:
229            v = processed_d[backend][n_sd][metric]
230            assert isinstance(v, (float, int)), "must be scalar"
231            y.append(v)
232
233        if colors:
234            pyplot.plot(
235                x,
236                y,
237                label=backend + plot_label,
238                marker=markers[backend],
239                color=colors[backend],
240                linewidth=2,
241            )
242        else:
243            pyplot.plot(
244                x, y, label=backend + plot_label, marker=markers[backend], linewidth=2
245            )
246
247    pyplot.legend()
248    pyplot.xscale("log", base=2)
249    pyplot.yscale("log", base=2)
250    pyplot.ylim(bottom=2**-15, top=2**3)
251
252    pyplot.grid()
253    pyplot.xticks(x)
254    pyplot.xlabel("number of super-droplets")
255    pyplot.ylabel("wall time per timestep [s]")
256
257    if plot_title:
258        pyplot.title(plot_title)
259
260    pyplot.savefig(plot_filename)
def plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
263def plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
264    filename = "same_plot.svg"
265    plot_processed_results(coal_d, plot_label="-c", plot_filename=filename)
266    plot_processed_results(break_d, plot_label="-b", plot_filename=filename)
267    plot_processed_results(coal_break_d, plot_label="-cb", plot_filename=filename)
def plot_time_per_step( processed_d, n_sd, *, plot_label='', plot_title=None, metric='mean', plot_filename, step_from_to=None):
270def plot_time_per_step(
271    processed_d,
272    n_sd,
273    *,
274    plot_label="",
275    plot_title=None,
276    metric="mean",
277    plot_filename,
278    step_from_to=None,
279):
280    backends = PlottingHelpers.get_sorted_backend_list(processed_d)
281    markers = PlottingHelpers.get_backend_markers(backends)
282
283    for backend in backends:
284        y = processed_d[backend][n_sd][metric]
285        x = np.arange(len(y))
286
287        if step_from_to is not None:
288            x = x[step_from_to[0] : step_from_to[1]]
289            y = y[step_from_to[0] : step_from_to[1]]
290
291        pyplot.plot(x, y, label=backend + plot_label, marker=markers[backend])
292
293    pyplot.legend()
294    pyplot.grid()
295    pyplot.xticks(x)
296    pyplot.xlabel("number of super-droplets")
297    pyplot.ylabel("wall time per timestep [s]")
298
299    if plot_title:
300        pyplot.title(plot_title + f"(n_sd: {n_sd})")
301
302    pyplot.savefig(plot_filename)