romtools.workflows.sampling_with_holdout.sampling_with_holdout
1# 2# ************************************************************************ 3# 4# ROM Tools and Workflows 5# Copyright 2019 National Technology & Engineering Solutions of Sandia,LLC 6# (NTESS) 7# 8# Under the terms of Contract DE-NA0003525 with NTESS, the 9# U.S. Government retains certain rights in this software. 10# 11# ROM Tools and Workflows is licensed under BSD-3-Clause terms of use: 12# 13# Redistribution and use in source and binary forms, with or without 14# modification, are permitted provided that the following conditions 15# are met: 16# 17# 1. Redistributions of source code must retain the above copyright 18# notice, this list of conditions and the following disclaimer. 19# 20# 2. Redistributions in binary form must reproduce the above copyright 21# notice, this list of conditions and the following disclaimer in the 22# documentation and/or other materials provided with the distribution. 23# 24# 3. Neither the name of the copyright holder nor the names of its 25# contributors may be used to endorse or promote products derived 26# from this software without specific prior written permission. 27# 28# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 31# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 32# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 33# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 34# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 35# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 37# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 38# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39# POSSIBILITY OF SUCH DAMAGE. 40# 41# Questions? Contact Eric Parish (ejparis@sandia.gov) 42# 43# ************************************************************************ 44# 45 46import time 47import numpy as np 48 49from romtools.workflows.models import QoiModel 50from romtools.workflows.parameter_spaces import ParameterSpace 51from romtools.workflows.workflow_utils import create_empty_dir 52from romtools.workflows.model_builders import QoiModelBuilder 53 54 55def _create_parameter_dict(parameter_names, parameter_values): 56 return dict(zip(parameter_names, parameter_values)) 57 58 59def run_sampling_with_holdout( 60 fom_model: QoiModel, 61 rom_model_builder: QoiModelBuilder, 62 parameter_space: ParameterSpace, 63 absolute_work_directory: str, 64 holdout_set_size: int = 5, 65 max_number_of_rom_samples: int = 20, 66 tolerance=1e-5, 67 random_seed: int = 1, 68): 69 ''' 70 Core algorithm 71 ''' 72 assert max_number_of_rom_samples >= 2 73 74 sampling_directory = absolute_work_directory 75 create_empty_dir(sampling_directory) 76 offline_directory_prefix = "offline_data" 77 78 run_directory_prefix = "run_" 79 sampling_file = open(f"{sampling_directory}/sampling_with_holdout_status.log", "w", encoding="utf-8") # pylint: disable=consider-using-with 80 sampling_file.write("Holdout sampling status \n") 81 sampling_file.flush() 82 fom_time = 0.0 83 rom_time = 0.0 84 basis_time = 0.0 85 86 np.random.seed(random_seed) 87 88 parameter_samples = parameter_space.generate_samples( 89 holdout_set_size + max_number_of_rom_samples 90 ) 91 parameter_names = parameter_space.get_names() 92 93 holdout_sample_indices = range(holdout_set_size) 94 training_sample_indices = range( 95 holdout_set_size, holdout_set_size + max_number_of_rom_samples 96 ) 97 holdout_samples = parameter_samples[holdout_sample_indices] 98 training_samples = parameter_samples[training_sample_indices] 99 100 # Setup FOM directories for potential training runs 101 for sample_index, sample in enumerate(training_samples): 102 fom_run_directory = f"{sampling_directory}/fom/training_set/{run_directory_prefix}{sample_index}" 103 create_empty_dir(fom_run_directory) 104 105 parameter_dict = _create_parameter_dict(parameter_names, sample) 106 fom_model.populate_run_directory(fom_run_directory, parameter_dict) 107 108 # Setup FOM directories and run samples to build holdout set. 109 t0 = time.time() 110 sampling_file.write("Building holdout set \n") 111 sampling_file.flush() 112 for sample_index in holdout_sample_indices: 113 sampling_file.write(f"Running holdout FOM sample {sample_index} \n") 114 sampling_file.flush() 115 parameter_dict = _create_parameter_dict( 116 parameter_names, parameter_samples[sample_index] 117 ) 118 fom_run_directory = ( 119 f"{sampling_directory}/fom/holdout_set/{run_directory_prefix}{sample_index}" 120 ) 121 create_empty_dir(fom_run_directory) 122 fom_model.populate_run_directory(fom_run_directory, parameter_dict) 123 fom_model.run_model(fom_run_directory, parameter_dict) 124 fom_qoi = fom_model.compute_qoi(fom_run_directory, parameter_dict) 125 if sample_index == 0: 126 fom_qois_holdout_set = fom_qoi[None] 127 else: 128 fom_qois_holdout_set = np.append( 129 fom_qois_holdout_set, fom_qoi[None], axis=0 130 ) 131 fom_time += time.time() - t0 132 133 sampling_file.write("Beginning sampling procedure \n") 134 sampling_file.flush() 135 136 converged = False 137 training_dirs = [] 138 training_params = [] 139 trained_samples = [] 140 rom_qois_holdout_set = np.zeros(holdout_set_size) 141 holdout_set_errs = np.array([]) 142 sample_index = 0 143 144 145 # Initialize FOM to be run at first two training set samples 146 sampling_file.write(f"Holdout set iteration # {sample_index}\n") 147 sampling_file.flush() 148 149 parameter_dict = _create_parameter_dict( 150 parameter_names, training_samples[sample_index] 151 ) 152 fom_run_directory = f"{sampling_directory}/fom/training_set/{run_directory_prefix}{sample_index}" 153 154 # Run FOM at training parameter 155 t0 = time.time() 156 sampling_file.write(f"Running training FOM sample {sample_index} \n") 157 sampling_file.flush() 158 fom_model.run_model(fom_run_directory, parameter_dict) 159 fom_time += time.time() - t0 160 161 sampling_file.write(f"Adding training FOM sample {sample_index} to basis \n") 162 sampling_file.flush() 163 training_params.append(parameter_samples[sample_index]) 164 training_dirs.append(fom_run_directory) 165 trained_samples.append(sample_index) 166 sampling_file.write(f"Parameter samples: \n {np.asarray(training_params)}\n") 167 sampling_file.flush() 168 169 # Run FOM at next training parameter 170 sample_index += 1 171 172 173 while converged is False and sample_index < max_number_of_rom_samples: 174 175 sampling_file.write(f"Holdout set iteration # {sample_index}\n") 176 sampling_file.flush() 177 parameter_dict = _create_parameter_dict( 178 parameter_names, training_samples[sample_index] 179 ) 180 fom_run_directory = f"{sampling_directory}/fom/training_set/{run_directory_prefix}{sample_index}" 181 182 # Run FOM at training parameter 183 t0 = time.time() 184 sampling_file.write(f"Running training FOM sample {sample_index} \n") 185 sampling_file.flush() 186 fom_model.run_model(fom_run_directory, parameter_dict) 187 fom_time += time.time() - t0 188 189 sampling_file.write(f"Adding training FOM sample {sample_index} to basis \n") 190 sampling_file.flush() 191 training_params.append(parameter_samples[sample_index]) 192 training_dirs.append(fom_run_directory) 193 trained_samples.append(sample_index) 194 sampling_file.write(f"Parameter samples: \n {np.asarray(training_params)}\n") 195 sampling_file.flush() 196 197 # Add FOM sample to ROM basis 198 t0 = time.time() 199 sampling_file.write(f"Constructing ROM iteration {sample_index} \n") 200 sampling_file.flush() 201 updated_offline_data_dir = f"{sampling_directory}/rom_iteration_{sample_index}/{offline_directory_prefix}/" 202 create_empty_dir(updated_offline_data_dir) 203 rom_model = rom_model_builder.build_from_training_dirs( 204 updated_offline_data_dir, training_dirs 205 ) 206 basis_time += time.time() - t0 207 208 # Evaluate ROM at holdout set and compute QOI errors 209 t0 = time.time() 210 for holdout_sample_index in holdout_sample_indices: 211 sampling_file.write( 212 f" Running ROM at holdout sample {holdout_sample_index}\n" 213 ) 214 sampling_file.flush() 215 rom_run_directory = ( 216 f"{sampling_directory}/rom_iteration_{sample_index}/{run_directory_prefix}{holdout_sample_index}" 217 ) 218 219 parameter_dict = _create_parameter_dict( 220 parameter_names, parameter_samples[holdout_sample_index] 221 ) 222 create_empty_dir(rom_run_directory) 223 rom_model.populate_run_directory(rom_run_directory, parameter_dict) 224 rom_model.run_model(rom_run_directory, parameter_dict) 225 rom_qois_holdout_set[holdout_sample_index] = rom_model.compute_qoi( 226 rom_run_directory, parameter_dict 227 ) 228 rom_time += time.time() - t0 229 230 # If Max QOI error is less than tolerance, converged = True 231 holdout_set_abs_errs_at_it = np.abs(rom_qois_holdout_set - fom_qois_holdout_set) 232 holdout_set_errs_at_it = np.abs(rom_qois_holdout_set - fom_qois_holdout_set) / ( np.abs(fom_qois_holdout_set) + 1.e-30) 233 holdout_set_err = np.linalg.norm( 234 holdout_set_errs_at_it, np.inf 235 ) 236 237 holdout_set_errs = np.append(holdout_set_errs, holdout_set_err) 238 sampling_file.write( 239 f" Max holdout set error = {holdout_set_err}\n" 240 ) 241 sampling_file.write(f" Holdout set relative errors: \n {np.asarray(holdout_set_errs_at_it)}\n") 242 sampling_file.write(f" Holdout set absolute errors: \n {np.asarray(holdout_set_abs_errs_at_it)}\n") 243 sampling_file.flush() 244 245 if holdout_set_err < tolerance: 246 converged = True 247 print(f"Holdout sampling run converged with QoI error {holdout_set_err}\n") 248 249 sample_index += 1 250 if sample_index == max_number_of_rom_samples: 251 print("Warning: Max number of iterations reached for holdout sampling") 252 253 np.savez( 254 f"{sampling_directory}/holdout_stats", 255 holdout_set_errs=holdout_set_errs, 256 trained_samples=trained_samples, 257 fom_time=fom_time, 258 rom_time=rom_time, 259 basis_time=basis_time, 260 ) 261 262 sampling_file.close()
def
run_sampling_with_holdout( fom_model: romtools.workflows.models.QoiModel, rom_model_builder: romtools.workflows.model_builders.QoiModelBuilder, parameter_space: romtools.workflows.parameter_spaces.ParameterSpace, absolute_work_directory: str, holdout_set_size: int = 5, max_number_of_rom_samples: int = 20, tolerance=1e-05, random_seed: int = 1):
60def run_sampling_with_holdout( 61 fom_model: QoiModel, 62 rom_model_builder: QoiModelBuilder, 63 parameter_space: ParameterSpace, 64 absolute_work_directory: str, 65 holdout_set_size: int = 5, 66 max_number_of_rom_samples: int = 20, 67 tolerance=1e-5, 68 random_seed: int = 1, 69): 70 ''' 71 Core algorithm 72 ''' 73 assert max_number_of_rom_samples >= 2 74 75 sampling_directory = absolute_work_directory 76 create_empty_dir(sampling_directory) 77 offline_directory_prefix = "offline_data" 78 79 run_directory_prefix = "run_" 80 sampling_file = open(f"{sampling_directory}/sampling_with_holdout_status.log", "w", encoding="utf-8") # pylint: disable=consider-using-with 81 sampling_file.write("Holdout sampling status \n") 82 sampling_file.flush() 83 fom_time = 0.0 84 rom_time = 0.0 85 basis_time = 0.0 86 87 np.random.seed(random_seed) 88 89 parameter_samples = parameter_space.generate_samples( 90 holdout_set_size + max_number_of_rom_samples 91 ) 92 parameter_names = parameter_space.get_names() 93 94 holdout_sample_indices = range(holdout_set_size) 95 training_sample_indices = range( 96 holdout_set_size, holdout_set_size + max_number_of_rom_samples 97 ) 98 holdout_samples = parameter_samples[holdout_sample_indices] 99 training_samples = parameter_samples[training_sample_indices] 100 101 # Setup FOM directories for potential training runs 102 for sample_index, sample in enumerate(training_samples): 103 fom_run_directory = f"{sampling_directory}/fom/training_set/{run_directory_prefix}{sample_index}" 104 create_empty_dir(fom_run_directory) 105 106 parameter_dict = _create_parameter_dict(parameter_names, sample) 107 fom_model.populate_run_directory(fom_run_directory, parameter_dict) 108 109 # Setup FOM directories and run samples to build holdout set. 110 t0 = time.time() 111 sampling_file.write("Building holdout set \n") 112 sampling_file.flush() 113 for sample_index in holdout_sample_indices: 114 sampling_file.write(f"Running holdout FOM sample {sample_index} \n") 115 sampling_file.flush() 116 parameter_dict = _create_parameter_dict( 117 parameter_names, parameter_samples[sample_index] 118 ) 119 fom_run_directory = ( 120 f"{sampling_directory}/fom/holdout_set/{run_directory_prefix}{sample_index}" 121 ) 122 create_empty_dir(fom_run_directory) 123 fom_model.populate_run_directory(fom_run_directory, parameter_dict) 124 fom_model.run_model(fom_run_directory, parameter_dict) 125 fom_qoi = fom_model.compute_qoi(fom_run_directory, parameter_dict) 126 if sample_index == 0: 127 fom_qois_holdout_set = fom_qoi[None] 128 else: 129 fom_qois_holdout_set = np.append( 130 fom_qois_holdout_set, fom_qoi[None], axis=0 131 ) 132 fom_time += time.time() - t0 133 134 sampling_file.write("Beginning sampling procedure \n") 135 sampling_file.flush() 136 137 converged = False 138 training_dirs = [] 139 training_params = [] 140 trained_samples = [] 141 rom_qois_holdout_set = np.zeros(holdout_set_size) 142 holdout_set_errs = np.array([]) 143 sample_index = 0 144 145 146 # Initialize FOM to be run at first two training set samples 147 sampling_file.write(f"Holdout set iteration # {sample_index}\n") 148 sampling_file.flush() 149 150 parameter_dict = _create_parameter_dict( 151 parameter_names, training_samples[sample_index] 152 ) 153 fom_run_directory = f"{sampling_directory}/fom/training_set/{run_directory_prefix}{sample_index}" 154 155 # Run FOM at training parameter 156 t0 = time.time() 157 sampling_file.write(f"Running training FOM sample {sample_index} \n") 158 sampling_file.flush() 159 fom_model.run_model(fom_run_directory, parameter_dict) 160 fom_time += time.time() - t0 161 162 sampling_file.write(f"Adding training FOM sample {sample_index} to basis \n") 163 sampling_file.flush() 164 training_params.append(parameter_samples[sample_index]) 165 training_dirs.append(fom_run_directory) 166 trained_samples.append(sample_index) 167 sampling_file.write(f"Parameter samples: \n {np.asarray(training_params)}\n") 168 sampling_file.flush() 169 170 # Run FOM at next training parameter 171 sample_index += 1 172 173 174 while converged is False and sample_index < max_number_of_rom_samples: 175 176 sampling_file.write(f"Holdout set iteration # {sample_index}\n") 177 sampling_file.flush() 178 parameter_dict = _create_parameter_dict( 179 parameter_names, training_samples[sample_index] 180 ) 181 fom_run_directory = f"{sampling_directory}/fom/training_set/{run_directory_prefix}{sample_index}" 182 183 # Run FOM at training parameter 184 t0 = time.time() 185 sampling_file.write(f"Running training FOM sample {sample_index} \n") 186 sampling_file.flush() 187 fom_model.run_model(fom_run_directory, parameter_dict) 188 fom_time += time.time() - t0 189 190 sampling_file.write(f"Adding training FOM sample {sample_index} to basis \n") 191 sampling_file.flush() 192 training_params.append(parameter_samples[sample_index]) 193 training_dirs.append(fom_run_directory) 194 trained_samples.append(sample_index) 195 sampling_file.write(f"Parameter samples: \n {np.asarray(training_params)}\n") 196 sampling_file.flush() 197 198 # Add FOM sample to ROM basis 199 t0 = time.time() 200 sampling_file.write(f"Constructing ROM iteration {sample_index} \n") 201 sampling_file.flush() 202 updated_offline_data_dir = f"{sampling_directory}/rom_iteration_{sample_index}/{offline_directory_prefix}/" 203 create_empty_dir(updated_offline_data_dir) 204 rom_model = rom_model_builder.build_from_training_dirs( 205 updated_offline_data_dir, training_dirs 206 ) 207 basis_time += time.time() - t0 208 209 # Evaluate ROM at holdout set and compute QOI errors 210 t0 = time.time() 211 for holdout_sample_index in holdout_sample_indices: 212 sampling_file.write( 213 f" Running ROM at holdout sample {holdout_sample_index}\n" 214 ) 215 sampling_file.flush() 216 rom_run_directory = ( 217 f"{sampling_directory}/rom_iteration_{sample_index}/{run_directory_prefix}{holdout_sample_index}" 218 ) 219 220 parameter_dict = _create_parameter_dict( 221 parameter_names, parameter_samples[holdout_sample_index] 222 ) 223 create_empty_dir(rom_run_directory) 224 rom_model.populate_run_directory(rom_run_directory, parameter_dict) 225 rom_model.run_model(rom_run_directory, parameter_dict) 226 rom_qois_holdout_set[holdout_sample_index] = rom_model.compute_qoi( 227 rom_run_directory, parameter_dict 228 ) 229 rom_time += time.time() - t0 230 231 # If Max QOI error is less than tolerance, converged = True 232 holdout_set_abs_errs_at_it = np.abs(rom_qois_holdout_set - fom_qois_holdout_set) 233 holdout_set_errs_at_it = np.abs(rom_qois_holdout_set - fom_qois_holdout_set) / ( np.abs(fom_qois_holdout_set) + 1.e-30) 234 holdout_set_err = np.linalg.norm( 235 holdout_set_errs_at_it, np.inf 236 ) 237 238 holdout_set_errs = np.append(holdout_set_errs, holdout_set_err) 239 sampling_file.write( 240 f" Max holdout set error = {holdout_set_err}\n" 241 ) 242 sampling_file.write(f" Holdout set relative errors: \n {np.asarray(holdout_set_errs_at_it)}\n") 243 sampling_file.write(f" Holdout set absolute errors: \n {np.asarray(holdout_set_abs_errs_at_it)}\n") 244 sampling_file.flush() 245 246 if holdout_set_err < tolerance: 247 converged = True 248 print(f"Holdout sampling run converged with QoI error {holdout_set_err}\n") 249 250 sample_index += 1 251 if sample_index == max_number_of_rom_samples: 252 print("Warning: Max number of iterations reached for holdout sampling") 253 254 np.savez( 255 f"{sampling_directory}/holdout_stats", 256 holdout_set_errs=holdout_set_errs, 257 trained_samples=trained_samples, 258 fom_time=fom_time, 259 rom_time=rom_time, 260 basis_time=basis_time, 261 ) 262 263 sampling_file.close()
Core algorithm