PySDM_examples.Bulenok_2023_MasterThesis.utils
1import gc 2import json 3import os 4import time 5 6import numba 7import numpy as np 8from matplotlib import pyplot 9 10from PySDM.backends import CPU, GPU 11 12 13class ProductsNames: 14 super_particle_count = "super_particle_count" 15 total_volume = "total_volume" 16 total_number = "total_number" 17 18 19def print_all_products(particulator): 20 print( 21 ProductsNames.total_number, 22 particulator.products[ProductsNames.total_number].get(), 23 ) 24 print( 25 ProductsNames.total_volume, 26 particulator.products[ProductsNames.total_volume].get(), 27 ) 28 print( 29 ProductsNames.super_particle_count, 30 particulator.products[ProductsNames.super_particle_count].get(), 31 ) 32 33 34def get_prod_dict(particulator): 35 d = { 36 ProductsNames.total_number: list( 37 particulator.products[ProductsNames.total_number].get() 38 ), 39 ProductsNames.total_volume: list( 40 particulator.products[ProductsNames.total_volume].get() 41 ), 42 ProductsNames.super_particle_count: list( 43 particulator.products[ProductsNames.super_particle_count].get() 44 ), 45 } 46 47 return d 48 49 50def measure_time_for_each_step(particulator, n_steps): 51 particulator.run(steps=1) 52 53 res = [] 54 for _ in range(n_steps): 55 t0 = time.time() 56 particulator.run(steps=1) 57 t1 = time.time() 58 59 res.append(t1 - t0) 60 61 return res 62 63 64def measure_time_per_timestep(particulator, n_steps): 65 particulator.run(steps=1) 66 67 t0 = time.time() 68 particulator.run(steps=n_steps) 69 t1 = time.time() 70 71 return (t1 - t0) / n_steps 72 73 74def go_benchmark( 75 setup_sim, 76 n_sds, 77 n_steps, 78 seeds, 79 *, 80 numba_n_threads=None, 81 double_precision=True, 82 sim_run_filename=None, 83 total_number=None, 84 dv=None, 85 time_measurement_fun=measure_time_per_timestep, 86 backends=(CPU, GPU), 87): 88 products = {} 89 results = {} 90 91 backend_configs = [] 92 if CPU in backends: 93 cpu_backends_configs = [(CPU, i) for i in numba_n_threads] 94 backend_configs = [*backend_configs, *cpu_backends_configs] 95 if GPU in backends: 96 backend_configs.append((GPU, None)) 97 98 for backend_class, n_threads in backend_configs: 99 backend_name = backend_class().__class__.__name__ 100 if n_threads: 101 numba.set_num_threads(n_threads) 102 backend_name += "_" + str(numba.get_num_threads()) 103 104 results[backend_name] = {} 105 products[backend_name] = {} 106 107 print() 108 print("before") 109 110 for n_sd in n_sds: 111 print("\n") 112 print(backend_name, n_sd) 113 114 results[backend_name][n_sd] = {} 115 products[backend_name][n_sd] = {} 116 117 for seed in seeds: 118 gc.collect() 119 120 particulator = setup_sim( 121 n_sd, 122 backend_class, 123 seed, 124 double_precision=double_precision, 125 total_number=total_number, 126 dv=dv, 127 ) 128 129 print() 130 print("products before simulation") 131 print_all_products(particulator) 132 133 print() 134 print("start simulation") 135 136 elapsed_time = time_measurement_fun(particulator, n_steps) 137 138 print() 139 print("products after simulation") 140 print_all_products(particulator) 141 142 results[backend_name][n_sd][seed] = elapsed_time 143 products[backend_name][n_sd][seed] = get_prod_dict(particulator) 144 145 gc.collect() 146 del particulator 147 gc.collect() 148 149 if sim_run_filename: 150 write_to_file(filename=f"{sim_run_filename}-products.txt", d=products) 151 152 return results 153 154 155def process_results(res_d, axis=None): 156 processed_d = {} 157 for backend in res_d.keys(): 158 processed_d[backend] = {} 159 160 for n_sd in res_d[backend].keys(): 161 processed_d[backend][n_sd] = {} 162 163 vals = res_d[backend][n_sd].values() 164 vals = np.array(list(vals)) 165 166 processed_d[backend][n_sd]["mean"] = np.mean(vals, axis=axis) 167 processed_d[backend][n_sd]["std"] = np.std(vals, axis=axis) 168 processed_d[backend][n_sd]["max"] = np.amax(vals, axis=axis) 169 processed_d[backend][n_sd]["min"] = np.amin(vals, axis=axis) 170 171 return processed_d 172 173 174def write_to_file(filename, d): 175 assert not os.path.isfile(filename), filename 176 177 with open(filename, "w", encoding="utf-8") as fp: 178 json.dump(d, fp) 179 180 181class PlottingHelpers: 182 @staticmethod 183 def get_backend_markers(backends): 184 markers = {backend: "o" if "Numba" in backend else "x" for backend in backends} 185 return markers 186 187 @staticmethod 188 def get_sorted_backend_list(processed_d): 189 backends = list(processed_d.keys()) 190 191 backends.sort() 192 backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10) 193 194 return backends 195 196 @staticmethod 197 def get_n_sd_list(backends, processed_d): 198 x = [] 199 200 for backend in backends: 201 for n_sd in processed_d[backend].keys(): 202 if n_sd not in x: 203 x.append(n_sd) 204 205 x.sort() 206 return x 207 208 209def plot_processed_results( 210 processed_d, 211 *, 212 plot_label="", 213 plot_title=None, 214 metric="min", 215 plot_filename, 216 markers=None, 217 colors=None, 218): 219 backends = PlottingHelpers.get_sorted_backend_list(processed_d) 220 221 if markers is None: 222 markers = PlottingHelpers.get_backend_markers(backends) 223 224 x = PlottingHelpers.get_n_sd_list(backends, processed_d) 225 226 for backend in backends: 227 y = [] 228 for n_sd in x: 229 v = processed_d[backend][n_sd][metric] 230 assert isinstance(v, (float, int)), "must be scalar" 231 y.append(v) 232 233 if colors: 234 pyplot.plot( 235 x, 236 y, 237 label=backend + plot_label, 238 marker=markers[backend], 239 color=colors[backend], 240 linewidth=2, 241 ) 242 else: 243 pyplot.plot( 244 x, y, label=backend + plot_label, marker=markers[backend], linewidth=2 245 ) 246 247 pyplot.legend() 248 pyplot.xscale("log", base=2) 249 pyplot.yscale("log", base=2) 250 pyplot.ylim(bottom=2**-15, top=2**3) 251 252 pyplot.grid() 253 pyplot.xticks(x) 254 pyplot.xlabel("number of super-droplets") 255 pyplot.ylabel("wall time per timestep [s]") 256 257 if plot_title: 258 pyplot.title(plot_title) 259 260 pyplot.savefig(plot_filename) 261 262 263def plot_processed_on_same_plot(coal_d, break_d, coal_break_d): 264 filename = "same_plot.svg" 265 plot_processed_results(coal_d, plot_label="-c", plot_filename=filename) 266 plot_processed_results(break_d, plot_label="-b", plot_filename=filename) 267 plot_processed_results(coal_break_d, plot_label="-cb", plot_filename=filename) 268 269 270def plot_time_per_step( 271 processed_d, 272 n_sd, 273 *, 274 plot_label="", 275 plot_title=None, 276 metric="mean", 277 plot_filename, 278 step_from_to=None, 279): 280 backends = PlottingHelpers.get_sorted_backend_list(processed_d) 281 markers = PlottingHelpers.get_backend_markers(backends) 282 283 for backend in backends: 284 y = processed_d[backend][n_sd][metric] 285 x = np.arange(len(y)) 286 287 if step_from_to is not None: 288 x = x[step_from_to[0] : step_from_to[1]] 289 y = y[step_from_to[0] : step_from_to[1]] 290 291 pyplot.plot(x, y, label=backend + plot_label, marker=markers[backend]) 292 293 pyplot.legend() 294 pyplot.grid() 295 pyplot.xticks(x) 296 pyplot.xlabel("number of super-droplets") 297 pyplot.ylabel("wall time per timestep [s]") 298 299 if plot_title: 300 pyplot.title(plot_title + f"(n_sd: {n_sd})") 301 302 pyplot.savefig(plot_filename)
class
ProductsNames:
def
print_all_products(particulator):
20def print_all_products(particulator): 21 print( 22 ProductsNames.total_number, 23 particulator.products[ProductsNames.total_number].get(), 24 ) 25 print( 26 ProductsNames.total_volume, 27 particulator.products[ProductsNames.total_volume].get(), 28 ) 29 print( 30 ProductsNames.super_particle_count, 31 particulator.products[ProductsNames.super_particle_count].get(), 32 )
def
get_prod_dict(particulator):
35def get_prod_dict(particulator): 36 d = { 37 ProductsNames.total_number: list( 38 particulator.products[ProductsNames.total_number].get() 39 ), 40 ProductsNames.total_volume: list( 41 particulator.products[ProductsNames.total_volume].get() 42 ), 43 ProductsNames.super_particle_count: list( 44 particulator.products[ProductsNames.super_particle_count].get() 45 ), 46 } 47 48 return d
def
measure_time_for_each_step(particulator, n_steps):
def
measure_time_per_timestep(particulator, n_steps):
def
go_benchmark( setup_sim, n_sds, n_steps, seeds, *, numba_n_threads=None, double_precision=True, sim_run_filename=None, total_number=None, dv=None, time_measurement_fun=<function measure_time_per_timestep>, backends=(functools.partial(<function _cached_backend>, backend_class=<class 'PySDM.backends.Numba'>), functools.partial(<function _cached_backend>, backend_class=<class 'PySDM.backends.ThrustRTC'>))):
75def go_benchmark( 76 setup_sim, 77 n_sds, 78 n_steps, 79 seeds, 80 *, 81 numba_n_threads=None, 82 double_precision=True, 83 sim_run_filename=None, 84 total_number=None, 85 dv=None, 86 time_measurement_fun=measure_time_per_timestep, 87 backends=(CPU, GPU), 88): 89 products = {} 90 results = {} 91 92 backend_configs = [] 93 if CPU in backends: 94 cpu_backends_configs = [(CPU, i) for i in numba_n_threads] 95 backend_configs = [*backend_configs, *cpu_backends_configs] 96 if GPU in backends: 97 backend_configs.append((GPU, None)) 98 99 for backend_class, n_threads in backend_configs: 100 backend_name = backend_class().__class__.__name__ 101 if n_threads: 102 numba.set_num_threads(n_threads) 103 backend_name += "_" + str(numba.get_num_threads()) 104 105 results[backend_name] = {} 106 products[backend_name] = {} 107 108 print() 109 print("before") 110 111 for n_sd in n_sds: 112 print("\n") 113 print(backend_name, n_sd) 114 115 results[backend_name][n_sd] = {} 116 products[backend_name][n_sd] = {} 117 118 for seed in seeds: 119 gc.collect() 120 121 particulator = setup_sim( 122 n_sd, 123 backend_class, 124 seed, 125 double_precision=double_precision, 126 total_number=total_number, 127 dv=dv, 128 ) 129 130 print() 131 print("products before simulation") 132 print_all_products(particulator) 133 134 print() 135 print("start simulation") 136 137 elapsed_time = time_measurement_fun(particulator, n_steps) 138 139 print() 140 print("products after simulation") 141 print_all_products(particulator) 142 143 results[backend_name][n_sd][seed] = elapsed_time 144 products[backend_name][n_sd][seed] = get_prod_dict(particulator) 145 146 gc.collect() 147 del particulator 148 gc.collect() 149 150 if sim_run_filename: 151 write_to_file(filename=f"{sim_run_filename}-products.txt", d=products) 152 153 return results
def
process_results(res_d, axis=None):
156def process_results(res_d, axis=None): 157 processed_d = {} 158 for backend in res_d.keys(): 159 processed_d[backend] = {} 160 161 for n_sd in res_d[backend].keys(): 162 processed_d[backend][n_sd] = {} 163 164 vals = res_d[backend][n_sd].values() 165 vals = np.array(list(vals)) 166 167 processed_d[backend][n_sd]["mean"] = np.mean(vals, axis=axis) 168 processed_d[backend][n_sd]["std"] = np.std(vals, axis=axis) 169 processed_d[backend][n_sd]["max"] = np.amax(vals, axis=axis) 170 processed_d[backend][n_sd]["min"] = np.amin(vals, axis=axis) 171 172 return processed_d
def
write_to_file(filename, d):
class
PlottingHelpers:
182class PlottingHelpers: 183 @staticmethod 184 def get_backend_markers(backends): 185 markers = {backend: "o" if "Numba" in backend else "x" for backend in backends} 186 return markers 187 188 @staticmethod 189 def get_sorted_backend_list(processed_d): 190 backends = list(processed_d.keys()) 191 192 backends.sort() 193 backends.sort(key=lambda x: int(x[6:]) if "Numba_" in x else 100**10) 194 195 return backends 196 197 @staticmethod 198 def get_n_sd_list(backends, processed_d): 199 x = [] 200 201 for backend in backends: 202 for n_sd in processed_d[backend].keys(): 203 if n_sd not in x: 204 x.append(n_sd) 205 206 x.sort() 207 return x
def
plot_processed_results( processed_d, *, plot_label='', plot_title=None, metric='min', plot_filename, markers=None, colors=None):
210def plot_processed_results( 211 processed_d, 212 *, 213 plot_label="", 214 plot_title=None, 215 metric="min", 216 plot_filename, 217 markers=None, 218 colors=None, 219): 220 backends = PlottingHelpers.get_sorted_backend_list(processed_d) 221 222 if markers is None: 223 markers = PlottingHelpers.get_backend_markers(backends) 224 225 x = PlottingHelpers.get_n_sd_list(backends, processed_d) 226 227 for backend in backends: 228 y = [] 229 for n_sd in x: 230 v = processed_d[backend][n_sd][metric] 231 assert isinstance(v, (float, int)), "must be scalar" 232 y.append(v) 233 234 if colors: 235 pyplot.plot( 236 x, 237 y, 238 label=backend + plot_label, 239 marker=markers[backend], 240 color=colors[backend], 241 linewidth=2, 242 ) 243 else: 244 pyplot.plot( 245 x, y, label=backend + plot_label, marker=markers[backend], linewidth=2 246 ) 247 248 pyplot.legend() 249 pyplot.xscale("log", base=2) 250 pyplot.yscale("log", base=2) 251 pyplot.ylim(bottom=2**-15, top=2**3) 252 253 pyplot.grid() 254 pyplot.xticks(x) 255 pyplot.xlabel("number of super-droplets") 256 pyplot.ylabel("wall time per timestep [s]") 257 258 if plot_title: 259 pyplot.title(plot_title) 260 261 pyplot.savefig(plot_filename)
def
plot_processed_on_same_plot(coal_d, break_d, coal_break_d):
264def plot_processed_on_same_plot(coal_d, break_d, coal_break_d): 265 filename = "same_plot.svg" 266 plot_processed_results(coal_d, plot_label="-c", plot_filename=filename) 267 plot_processed_results(break_d, plot_label="-b", plot_filename=filename) 268 plot_processed_results(coal_break_d, plot_label="-cb", plot_filename=filename)
def
plot_time_per_step( processed_d, n_sd, *, plot_label='', plot_title=None, metric='mean', plot_filename, step_from_to=None):
271def plot_time_per_step( 272 processed_d, 273 n_sd, 274 *, 275 plot_label="", 276 plot_title=None, 277 metric="mean", 278 plot_filename, 279 step_from_to=None, 280): 281 backends = PlottingHelpers.get_sorted_backend_list(processed_d) 282 markers = PlottingHelpers.get_backend_markers(backends) 283 284 for backend in backends: 285 y = processed_d[backend][n_sd][metric] 286 x = np.arange(len(y)) 287 288 if step_from_to is not None: 289 x = x[step_from_to[0] : step_from_to[1]] 290 y = y[step_from_to[0] : step_from_to[1]] 291 292 pyplot.plot(x, y, label=backend + plot_label, marker=markers[backend]) 293 294 pyplot.legend() 295 pyplot.grid() 296 pyplot.xticks(x) 297 pyplot.xlabel("number of super-droplets") 298 pyplot.ylabel("wall time per timestep [s]") 299 300 if plot_title: 301 pyplot.title(plot_title + f"(n_sd: {n_sd})") 302 303 pyplot.savefig(plot_filename)