des_parallel_process

View Source

  1from datetime import datetime, timedelta
  2import logging
  3import time
  4import glob
  5import os, sys
  6import pandas as pd
  7from utils import Utils
  8from des_hems import DES_HEMS
  9import multiprocessing as mp
 10from joblib import Parallel, delayed
 11from numpy.random import SeedSequence
 12
 13def write_run_params(model) -> None:
 14    """
 15    Write the simulation parameters used in a DES model run to a CSV file.
 16
 17    Extracts key configuration parameters from the given model instance and writes them
 18    to a CSV file (`run_params_used.csv`) in the designated results folder. This provides
 19    a record of the conditions under which the simulation was executed.
 20
 21    SR NOTE: It would also be good to add some sort of identifier to both the run results csv
 22        and this csv so you can confirm that they came from the same model execution (to avoid
 23        issues with calculations being incorrect if e.g. it was not possible to write one of the
 24        outputs due to an error, write protection, etc.)
 25
 26    Parameters
 27    ----------
 28    model : DES_HEMS
 29        The simulation model instance from which to extract run parameters. Must have attributes
 30        including `sim_start_date`, `sim_duration`, `warm_up_duration`, `amb_data`, and
 31        `activity_duration_multiplier`.
 32
 33    Returns
 34    -------
 35    None
 36
 37    Notes
 38    -----
 39    - The function calculates `sim_end_date` and `warm_up_end_date` based on the provided
 40      `sim_start_date` and durations.
 41    - Output CSV includes timing and configuration values such as:
 42        - Simulation duration and warm-up duration
 43        - Simulation start, end, and warm-up end datetimes
 44        - Whether ambulance data was used
 45        - Activity duration multiplier
 46        - Model execution timestamp
 47        - Assumed summer and winter period start dates
 48    - The output CSV is saved to `Utils.RESULTS_FOLDER/run_params_used.csv`.
 49    - Only supports a single simulation's parameters at a time.
 50    - Future improvements may include adding a unique identifier for linking this file
 51      with the corresponding simulation results.
 52
 53    See Also
 54    --------
 55    runSim : Runs an individual simulation and optionally calls this function.
 56    parallelProcessJoblib : Executes multiple `runSim` runs in parallel.
 57    """
 58    # Ensure sim_start_date is a datetime object
 59    sim_start_date = model.sim_start_date
 60    if isinstance(sim_start_date, str):
 61        sim_start_date = datetime.fromisoformat(sim_start_date)  # Convert string to datetime
 62
 63    sim_end_date = sim_start_date + timedelta(minutes=model.sim_duration)
 64    warm_up_end_date = sim_start_date + timedelta(minutes=model.warm_up_duration)
 65
 66    params_df = pd.DataFrame.from_dict({
 67        'sim_duration': [model.sim_duration],
 68        'warm_up_duration': [model.warm_up_duration],
 69        'sim_start_date': [sim_start_date],
 70        'sim_end_date': [sim_end_date],
 71        'warm_up_end_date': [warm_up_end_date],
 72        'amb_data': [model.amb_data],
 73        'model_exec_time': [datetime.now()],
 74        # Assuming summer hours are quarters 2 and 3 i.e. April-September
 75        # This is defined in class_hems and will need updating here too
 76        'summer_start_date': [f'{sim_start_date.year}-04-01'],
 77        'winter_start_date':  [f'{sim_start_date.year}-10-01'],
 78        'activity_duration_multiplier': [model.activity_duration_multiplier]
 79    }, orient='index', columns=['value'])
 80
 81    params_df.index.name = "parameter"
 82
 83    params_df.to_csv(f"{Utils.RESULTS_FOLDER}/run_params_used.csv", header="column_names")
 84
 85try:
 86     __file__
 87except NameError:
 88    __file__ = sys.argv[0]
 89
 90os.chdir(os.path.dirname(os.path.realpath(__file__)))
 91sys.path.append(os.path.dirname(os.path.realpath(__file__)))
 92
 93def runSim(run: int,
 94           total_runs: int,
 95           sim_duration: int,
 96           warm_up_time: int,
 97           sim_start_date: datetime,
 98           amb_data: bool,
 99           random_seed: int = 101,
100           save_params_csv: bool = True,
101           demand_increase_percent: float = 1.0,
102           activity_duration_multiplier: float = 1.0,
103           print_debug_messages: bool = False):
104    """
105    Run a single discrete event simulation (DES) for the specified configuration.
106
107    This function initializes and runs a DES_HEMS simulation model for a given run number,
108    logs performance and configuration details, and optionally saves simulation parameters.
109
110    Parameters
111    ----------
112    run : int
113        The index of the current simulation run (starting from 0).
114    total_runs : int
115        Total number of simulation runs being executed.
116    sim_duration : int
117        The total simulation duration (excluding warm-up) in minutes or other time unit.
118    warm_up_time : int
119        The warm-up period to discard before recording results.
120    sim_start_date : datetime
121        The datetime representing the start of the simulation.
122    amb_data : bool
123        Flag indicating whether ambulance-specific data should be generated in the simulation.
124    save_params_csv : bool, optional
125        If True, simulation parameters will be saved to CSV (only on the first run). Default is True.
126    demand_increase_percent : float, optional
127        Factor by which demand is increased (e.g., 1.10 for a 10% increase). Default is 1.0.
128    activity_duration_multiplier : float, optional
129        Multiplier to adjust generated durations of activities (e.g., 1.10 for a 10% increase). Default is 1.0.
130    print_debug_messages : bool, optional
131        If True, enables additional debug message output. Default is False.
132
133    Returns
134    -------
135    pandas.DataFrame
136        A DataFrame containing the simulation results.
137
138    Notes
139    -----
140    - Only the first run (i.e., `run == 0`) will trigger the saving of run parameters if `save_params_csv` is True.
141    - Timing information and configuration details are printed and logged for transparency.
142    """
143    #print(f"Inside runSim and {sim_start_date} and {what_if_sim_run}")
144
145    print(f'{Utils.current_time()}: Demand increase set to {demand_increase_percent*100}%')
146    logging.debug(f'{Utils.current_time()}: Demand increase set to {demand_increase_percent*100}%')
147
148    start = time.process_time()
149
150    print (f"{Utils.current_time()}: Run {run+1} of {total_runs}")
151    logging.debug(f"{Utils.current_time()}: Run {run+1} of {total_runs}")
152
153    #print(f"Sim start date is {sim_start_date}")
154    daa_model = DES_HEMS(run_number=run,
155                        sim_duration=sim_duration,
156                        warm_up_duration=warm_up_time,
157                        sim_start_date=sim_start_date,
158                        amb_data=amb_data,
159                        random_seed=random_seed,
160                        demand_increase_percent=demand_increase_percent,
161                        activity_duration_multiplier=activity_duration_multiplier,
162                        print_debug_messages=print_debug_messages
163                        )
164    daa_model.run()
165
166    print(f'{Utils.current_time()}: Run {run+1} took {round((time.process_time() - start)/60, 1)} minutes to run')
167    logging.debug(f'{Utils.current_time()}: Run {run+1} took {round((time.process_time() - start)/60, 1)} minutes to run')
168
169    # SR NOTE: This could cause issues if we decide to use 1 as the starting number of runs
170    if (run==0) and (save_params_csv):
171        write_run_params(daa_model)
172
173    return daa_model.results_df
174
175def collateRunResults() -> None:
176    """
177    Collates results from a series of runs into a single csv
178    """
179    matching_files = glob.glob(os.path.join(Utils.RESULTS_FOLDER, "output_run_*.csv"))
180
181    combined_df = pd.concat([pd.read_csv(f) for f in matching_files], ignore_index=True)
182
183    combined_df.to_csv(Utils.RUN_RESULTS_CSV, index=False)
184
185    for file in matching_files:
186            os.remove(file)
187
188def removeExistingResults(remove_run_results_csv=False) -> None:
189    """
190    Removes results from previous simulation runs
191    """
192    matching_files = glob.glob(os.path.join(Utils.RESULTS_FOLDER, "output_run_*.csv"))
193
194    for file in matching_files:
195            os.remove(file)
196
197    all_results_file_path = os.path.join(Utils.RESULTS_FOLDER, "all_results.csv")
198    if os.path.isfile(all_results_file_path):
199        os.unlink(all_results_file_path)
200
201    if remove_run_results_csv:
202            run_results_file_path = os.path.join(Utils.RESULTS_FOLDER, "run_results.csv")
203            if os.path.isfile(run_results_file_path):
204                os.unlink(run_results_file_path)
205
206def parallelProcessJoblib(total_runs: int,
207                          sim_duration: int,
208                          warm_up_time: int,
209                          sim_start_date: datetime,
210                          amb_data: bool,
211                          save_params_csv: bool = True,
212                          demand_increase_percent: float = 1.0,
213                          activity_duration_multiplier: float = 1.0,
214                          print_debug_messages: bool = False,
215                          master_seed=42,
216                          n_cores=-1):
217    """
218    Execute multiple simulation runs in parallel using joblib.
219
220    Parameters
221    ----------
222    total_runs : int
223        The total number of simulation runs to execute.
224    sim_duration : int
225        The duration of each simulation (excluding warm-up).
226    warm_up_time : int
227        The warm-up period to discard before recording results.
228    sim_start_date : datetime
229        The datetime representing the start of the simulation.
230    amb_data : bool
231        Flag indicating whether ambulance-specific data should be generated in the simulation.
232    save_params_csv : bool, optional
233        If True, simulation parameters will be saved to CSV during the first run. Default is True.
234    demand_increase_percent : float, optional
235        Factor by which demand is increased (e.g., 1.10 for a 10% increase). Default is 1.0.
236    activity_duration_multiplier : float, optional
237        Multiplier to adjust generated durations of activities (e.g., 1.10 for a 10% increase). Default is 1.0.
238    print_debug_messages : bool, optional
239        If True, enables additional debug message output during each run. Default is False.
240    master_seed : int, optional
241        Master seed used to generate the uncorrelated random number streams for replication consistency
242    n_cores : int, optional
243        Determines how many parallel simulations will be run at a time (which is equivalent to the
244        number of cores). Default is -1, which means all available cores will be utilised.
245
246    Returns
247    -------
248    list of pandas.DataFrame
249        A list of DataFrames, each containing the results of an individual simulation run.
250
251    Notes
252    -----
253    - This function distributes simulation runs across available CPU cores using joblib's
254    `Parallel` and `delayed` utilities. Each run is executed with the `runSim` function,
255    with the given configuration parameters.
256    - Runs are distributed across all available CPU cores (`n_jobs=-1`).
257    - Only the first run will save parameter data if `save_params_csv` is True.
258    - If a single output csv is required, use of this function must be followed by
259      collateRunResults()
260    """
261
262    # seeds = Utils.get_distribution_seeds(master_seed=master_seed, n_replications=total_runs,
263    #                                      n_dists_per_rep=30)
264
265    # Generate a number of uncorrelated seeds that will always be the same given the same
266    # master seed (which is determined as a parameter)
267    # We start with a SeedSequence from the master seed, and then generate a number of
268    # child SeedSequences equal to the total number of runs
269    seed_sequence = SeedSequence(master_seed).spawn(total_runs)
270    # We then turn these seeds into integer random numbers, and we will pass a different seed
271    # into each run of the simulation.
272    seeds = [i.generate_state(1)[0] for i in seed_sequence]
273
274    # Run the simulation in parallel, using all available cores
275    return Parallel(n_jobs=n_cores)(
276    delayed(runSim)(
277        run=run,
278        total_runs=total_runs,
279        sim_duration=sim_duration,
280        warm_up_time=warm_up_time,
281        sim_start_date=sim_start_date,
282        amb_data=amb_data,
283        random_seed=seeds[run],
284        save_params_csv=save_params_csv,
285        demand_increase_percent=demand_increase_percent,
286        activity_duration_multiplier=activity_duration_multiplier,
287        print_debug_messages=print_debug_messages
288        )
289        for run in range(total_runs)
290    )
291
292
293if __name__ == "__main__":
294    removeExistingResults()
295    #parallelProcessJoblib(1, (1*365*24*60), (0*60), datetime.strptime("2023-01-01 05:00:00", "%Y-%m-%d %H:%M:%S"), False, False, 1.0, 1.0, True)
296    parallelProcessJoblib(12, (2*365*24*60), (0*60), datetime.strptime("2023-01-01 05:00:00", "%Y-%m-%d %H:%M:%S"), False, False, 1.0, 1.0)
297
298# Testing ----------
299# python des_parallel_process.py

def write_run_params(model) -> None: View Source

14def write_run_params(model) -> None:
15    """
16    Write the simulation parameters used in a DES model run to a CSV file.
17
18    Extracts key configuration parameters from the given model instance and writes them
19    to a CSV file (`run_params_used.csv`) in the designated results folder. This provides
20    a record of the conditions under which the simulation was executed.
21
22    SR NOTE: It would also be good to add some sort of identifier to both the run results csv
23        and this csv so you can confirm that they came from the same model execution (to avoid
24        issues with calculations being incorrect if e.g. it was not possible to write one of the
25        outputs due to an error, write protection, etc.)
26
27    Parameters
28    ----------
29    model : DES_HEMS
30        The simulation model instance from which to extract run parameters. Must have attributes
31        including `sim_start_date`, `sim_duration`, `warm_up_duration`, `amb_data`, and
32        `activity_duration_multiplier`.
33
34    Returns
35    -------
36    None
37
38    Notes
39    -----
40    - The function calculates `sim_end_date` and `warm_up_end_date` based on the provided
41      `sim_start_date` and durations.
42    - Output CSV includes timing and configuration values such as:
43        - Simulation duration and warm-up duration
44        - Simulation start, end, and warm-up end datetimes
45        - Whether ambulance data was used
46        - Activity duration multiplier
47        - Model execution timestamp
48        - Assumed summer and winter period start dates
49    - The output CSV is saved to `Utils.RESULTS_FOLDER/run_params_used.csv`.
50    - Only supports a single simulation's parameters at a time.
51    - Future improvements may include adding a unique identifier for linking this file
52      with the corresponding simulation results.
53
54    See Also
55    --------
56    runSim : Runs an individual simulation and optionally calls this function.
57    parallelProcessJoblib : Executes multiple `runSim` runs in parallel.
58    """
59    # Ensure sim_start_date is a datetime object
60    sim_start_date = model.sim_start_date
61    if isinstance(sim_start_date, str):
62        sim_start_date = datetime.fromisoformat(sim_start_date)  # Convert string to datetime
63
64    sim_end_date = sim_start_date + timedelta(minutes=model.sim_duration)
65    warm_up_end_date = sim_start_date + timedelta(minutes=model.warm_up_duration)
66
67    params_df = pd.DataFrame.from_dict({
68        'sim_duration': [model.sim_duration],
69        'warm_up_duration': [model.warm_up_duration],
70        'sim_start_date': [sim_start_date],
71        'sim_end_date': [sim_end_date],
72        'warm_up_end_date': [warm_up_end_date],
73        'amb_data': [model.amb_data],
74        'model_exec_time': [datetime.now()],
75        # Assuming summer hours are quarters 2 and 3 i.e. April-September
76        # This is defined in class_hems and will need updating here too
77        'summer_start_date': [f'{sim_start_date.year}-04-01'],
78        'winter_start_date':  [f'{sim_start_date.year}-10-01'],
79        'activity_duration_multiplier': [model.activity_duration_multiplier]
80    }, orient='index', columns=['value'])
81
82    params_df.index.name = "parameter"
83
84    params_df.to_csv(f"{Utils.RESULTS_FOLDER}/run_params_used.csv", header="column_names")

Write the simulation parameters used in a DES model run to a CSV file.

Extracts key configuration parameters from the given model instance and writes them to a CSV file (run_params_used.csv) in the designated results folder. This provides a record of the conditions under which the simulation was executed.

SR NOTE: It would also be good to add some sort of identifier to both the run results csv and this csv so you can confirm that they came from the same model execution (to avoid issues with calculations being incorrect if e.g. it was not possible to write one of the outputs due to an error, write protection, etc.)

Parameters

model : DES_HEMS The simulation model instance from which to extract run parameters. Must have attributes including sim_start_date, sim_duration, warm_up_duration, amb_data, and activity_duration_multiplier.

Returns

None

Notes

The function calculates sim_end_date and warm_up_end_date based on the provided sim_start_date and durations.
Output CSV includes timing and configuration values such as:
- Simulation duration and warm-up duration
- Simulation start, end, and warm-up end datetimes
- Whether ambulance data was used
- Activity duration multiplier
- Model execution timestamp
- Assumed summer and winter period start dates
The output CSV is saved to Utils.RESULTS_FOLDER/run_params_used.csv.
Only supports a single simulation's parameters at a time.
Future improvements may include adding a unique identifier for linking this file with the corresponding simulation results.

Parameters

run : int The index of the current simulation run (starting from 0). total_runs : int Total number of simulation runs being executed. sim_duration : int The total simulation duration (excluding warm-up) in minutes or other time unit. warm_up_time : int The warm-up period to discard before recording results. sim_start_date : datetime The datetime representing the start of the simulation. amb_data : bool Flag indicating whether ambulance-specific data should be generated in the simulation. save_params_csv : bool, optional If True, simulation parameters will be saved to CSV (only on the first run). Default is True. demand_increase_percent : float, optional Factor by which demand is increased (e.g., 1.10 for a 10% increase). Default is 1.0. activity_duration_multiplier : float, optional Multiplier to adjust generated durations of activities (e.g., 1.10 for a 10% increase). Default is 1.0. print_debug_messages : bool, optional If True, enables additional debug message output. Default is False.

Returns

pandas.DataFrame A DataFrame containing the simulation results.

Notes

Only the first run (i.e., run == 0) will trigger the saving of run parameters if save_params_csv is True.
Timing information and configuration details are printed and logged for transparency.

def collateRunResults() -> None: View Source

176def collateRunResults() -> None:
177    """
178    Collates results from a series of runs into a single csv
179    """
180    matching_files = glob.glob(os.path.join(Utils.RESULTS_FOLDER, "output_run_*.csv"))
181
182    combined_df = pd.concat([pd.read_csv(f) for f in matching_files], ignore_index=True)
183
184    combined_df.to_csv(Utils.RUN_RESULTS_CSV, index=False)
185
186    for file in matching_files:
187            os.remove(file)

Collates results from a series of runs into a single csv

def removeExistingResults(remove_run_results_csv=False) -> None: View Source

189def removeExistingResults(remove_run_results_csv=False) -> None:
190    """
191    Removes results from previous simulation runs
192    """
193    matching_files = glob.glob(os.path.join(Utils.RESULTS_FOLDER, "output_run_*.csv"))
194
195    for file in matching_files:
196            os.remove(file)
197
198    all_results_file_path = os.path.join(Utils.RESULTS_FOLDER, "all_results.csv")
199    if os.path.isfile(all_results_file_path):
200        os.unlink(all_results_file_path)
201
202    if remove_run_results_csv:
203            run_results_file_path = os.path.join(Utils.RESULTS_FOLDER, "run_results.csv")
204            if os.path.isfile(run_results_file_path):
205                os.unlink(run_results_file_path)

Removes results from previous simulation runs

def parallelProcessJoblib( total_runs: int, sim_duration: int, warm_up_time: int, sim_start_date: datetime.datetime, amb_data: bool, save_params_csv: bool = True, demand_increase_percent: float = 1.0, activity_duration_multiplier: float = 1.0, print_debug_messages: bool = False, master_seed=42, n_cores=-1): View Source

207def parallelProcessJoblib(total_runs: int,
208                          sim_duration: int,
209                          warm_up_time: int,
210                          sim_start_date: datetime,
211                          amb_data: bool,
212                          save_params_csv: bool = True,
213                          demand_increase_percent: float = 1.0,
214                          activity_duration_multiplier: float = 1.0,
215                          print_debug_messages: bool = False,
216                          master_seed=42,
217                          n_cores=-1):
218    """
219    Execute multiple simulation runs in parallel using joblib.
220
221    Parameters
222    ----------
223    total_runs : int
224        The total number of simulation runs to execute.
225    sim_duration : int
226        The duration of each simulation (excluding warm-up).
227    warm_up_time : int
228        The warm-up period to discard before recording results.
229    sim_start_date : datetime
230        The datetime representing the start of the simulation.
231    amb_data : bool
232        Flag indicating whether ambulance-specific data should be generated in the simulation.
233    save_params_csv : bool, optional
234        If True, simulation parameters will be saved to CSV during the first run. Default is True.
235    demand_increase_percent : float, optional
236        Factor by which demand is increased (e.g., 1.10 for a 10% increase). Default is 1.0.
237    activity_duration_multiplier : float, optional
238        Multiplier to adjust generated durations of activities (e.g., 1.10 for a 10% increase). Default is 1.0.
239    print_debug_messages : bool, optional
240        If True, enables additional debug message output during each run. Default is False.
241    master_seed : int, optional
242        Master seed used to generate the uncorrelated random number streams for replication consistency
243    n_cores : int, optional
244        Determines how many parallel simulations will be run at a time (which is equivalent to the
245        number of cores). Default is -1, which means all available cores will be utilised.
246
247    Returns
248    -------
249    list of pandas.DataFrame
250        A list of DataFrames, each containing the results of an individual simulation run.
251
252    Notes
253    -----
254    - This function distributes simulation runs across available CPU cores using joblib's
255    `Parallel` and `delayed` utilities. Each run is executed with the `runSim` function,
256    with the given configuration parameters.
257    - Runs are distributed across all available CPU cores (`n_jobs=-1`).
258    - Only the first run will save parameter data if `save_params_csv` is True.
259    - If a single output csv is required, use of this function must be followed by
260      collateRunResults()
261    """
262
263    # seeds = Utils.get_distribution_seeds(master_seed=master_seed, n_replications=total_runs,
264    #                                      n_dists_per_rep=30)
265
266    # Generate a number of uncorrelated seeds that will always be the same given the same
267    # master seed (which is determined as a parameter)
268    # We start with a SeedSequence from the master seed, and then generate a number of
269    # child SeedSequences equal to the total number of runs
270    seed_sequence = SeedSequence(master_seed).spawn(total_runs)
271    # We then turn these seeds into integer random numbers, and we will pass a different seed
272    # into each run of the simulation.
273    seeds = [i.generate_state(1)[0] for i in seed_sequence]
274
275    # Run the simulation in parallel, using all available cores
276    return Parallel(n_jobs=n_cores)(
277    delayed(runSim)(
278        run=run,
279        total_runs=total_runs,
280        sim_duration=sim_duration,
281        warm_up_time=warm_up_time,
282        sim_start_date=sim_start_date,
283        amb_data=amb_data,
284        random_seed=seeds[run],
285        save_params_csv=save_params_csv,
286        demand_increase_percent=demand_increase_percent,
287        activity_duration_multiplier=activity_duration_multiplier,
288        print_debug_messages=print_debug_messages
289        )
290        for run in range(total_runs)
291    )

Execute multiple simulation runs in parallel using joblib.

Parameters

total_runs : int The total number of simulation runs to execute. sim_duration : int The duration of each simulation (excluding warm-up). warm_up_time : int The warm-up period to discard before recording results. sim_start_date : datetime The datetime representing the start of the simulation. amb_data : bool Flag indicating whether ambulance-specific data should be generated in the simulation. save_params_csv : bool, optional If True, simulation parameters will be saved to CSV during the first run. Default is True. demand_increase_percent : float, optional Factor by which demand is increased (e.g., 1.10 for a 10% increase). Default is 1.0. activity_duration_multiplier : float, optional Multiplier to adjust generated durations of activities (e.g., 1.10 for a 10% increase). Default is 1.0. print_debug_messages : bool, optional If True, enables additional debug message output during each run. Default is False. master_seed : int, optional Master seed used to generate the uncorrelated random number streams for replication consistency n_cores : int, optional Determines how many parallel simulations will be run at a time (which is equivalent to the number of cores). Default is -1, which means all available cores will be utilised.

Returns

list of pandas.DataFrame A list of DataFrames, each containing the results of an individual simulation run.

Notes

This function distributes simulation runs across available CPU cores using joblib's Parallel and delayed utilities. Each run is executed with the runSim function, with the given configuration parameters.
Runs are distributed across all available CPU cores (n_jobs=-1).
Only the first run will save parameter data if save_params_csv is True.
If a single output csv is required, use of this function must be followed by collateRunResults()