Source code for romtools.workflows.greedy.run_greedy

#
# ************************************************************************
#
#                         ROM Tools and Workflows
# Copyright 2019 National Technology & Engineering Solutions of Sandia,LLC
#                              (NTESS)
#
# Under the terms of Contract DE-NA0003525 with NTESS, the
# U.S. Government retains certain rights in this software.
#
# ROM Tools and Workflows is licensed under BSD-3-Clause terms of use:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Questions? Contact Eric Parish (ejparis@sandia.gov)
#
# ************************************************************************
#

'''
The greedy procedure iteratively constructs a reduced basis ROM until it reaches a desired tolerance.
The algorithm is as follows:
 1. We generate a parameter training set, :math:`\\mathcal{D}_{\\mathrm{train}}`,
    :math:`|\\mathcal{D}_{\\mathrm{train}}| = N_{\\mathrm{train}}`
 2. We select an initial sample, :math:`\\mu_1 \\in \\mathcal{D}_{\\mathrm{train}}` and set
    :math:`\\mathcal{D}_{\\mathrm{train}}=\\mathcal{D}_{\\mathrm{train}} - \\{\\mu_1\\}`
 3. We then solve the FOM to obtain the solution, :math:`\\mathbf{u}(\\mu_1)`.
 4. We select a second sample, :math:`\\mu_2 \\in \\mathcal{D}_{\\mathrm{train}}` and set
    :math:`\\mathcal{D}_{\\mathrm{train}}=\\mathcal{D}_{\\mathrm{train}} - \\{\\mu_2\\}`
 4. We then solve the FOM to obtain the solution, :math:`\\mathbf{u}(\\mu_2)`.
 5. We employ the first two solutions to compute the trial space, e.g.,

    .. math::

       \\boldsymbol \\Phi = \\mathrm{orthogonalize}(\\mathbf{u}(\\mu_2)) - \\mathbf{u}_{\\mathrm{shift}}, \\\\
       \\mathbf{u}_{\\mathrm{shift}} = \\mathbf{u}(\\mu_1)

 6. We then solve the resulting ROM for the remaining parameter samples :math:`\\mathcal{D}_{\\mathrm{train}}` to
    generate approximate solutions :math:`\\mathbf{u}(\\mu), \\mu \\in \\mathcal{D}_{\\mathrm{train}}`
 7. For each ROM solution :math:`\\mathbf{u}(\\mu), \\mu \\in \\mathcal{D}_{\\mathrm{train}}` we compute an error
    estimate, :math:`e(\\mu)`
 8. If the maximum error estimate is less than some tolerance, we exit the algorithm. If not, we:
   - Set :math:`\\mu^* = \\underset{ \\mu \\in \\mathcal{D}_{\\mathrm{train}} }{ \\mathrm{arg\\; max} } \\; e(\\mu)`
   - Remove :math:`\\mu^{*}` from the training set,
     :math:`\\mathcal{D}_{\\mathrm{train}}=\\mathcal{D}_{\\mathrm{train}} - \\{\\mu^{*}\\}`
   - Solve the FOM for :math:`\\mathbf{u}(\\mu^{*})`
   - Set the reduced basis to be

     .. math::

        \\boldsymbol \\Phi = \\mathrm{orthogonalize}(\\boldsymbol \\Phi, \\mathbf{u}(\\mu^{*})) - \\mathbf{u}_{\\mathrm{shift}}
   - Go back to step 6 and continue until convergence.

This function implements the basic greedy workflow. In addition, we enable an adaptive
error estimate based on a QoI. This is based on the fact that, throughout the greedy algorithm, we have a
history of error indicators as well as a set of FOM and ROM runs for the same parameter instance. We leverage this data
to improve the error estimate. Specifically, for the max error estimates over the first :math:`j` iterations,
:math:`e_1,\\ldots,e_j`, we additionally can access QoI errors :math:`e^q_1,\\ldots,e^q_j` from our FOM-ROM runs, and we
define a scaling based on

.. math::

   C = \\frac{\\sum_{i=1}^j | e^q_j| }{ \\sum_{i=1}^j | e_j | }.

The error at the :math:`j+1` iteration can be approximated by :math:`C e(\\mu)`.
This will adaptively scale the error estimate to match the QoI error as
closely as possible, which can be helpful for defining exit criterion.

Alternatively, if `calibrated_error` is set to `False`, then the scaling factor :math:`C` will always be :math:`1`.
'''

import time
import numpy as np

import romtools.linalg.linalg as la
from romtools.workflows.models import QoiModel
from romtools.workflows.parameter_spaces import ParameterSpace
from romtools.workflows.workflow_utils import create_empty_dir
from romtools.workflows.model_builders import QoiModelWithErrorEstimateBuilder


def _create_parameter_dict(parameter_names, parameter_values):
    return dict(zip(parameter_names, parameter_values))


[docs]
def run_greedy(fom_model: QoiModel,
               rom_model_builder: QoiModelWithErrorEstimateBuilder,
               parameter_space: ParameterSpace,
               absolute_greedy_work_directory: str,
               tolerance: float,
               testing_sample_size: int = 10,
               random_seed: int = 1,
               calibrated_error: bool=True):
    '''
    Main implementation of the greedy algorithm.
    '''
    greedy_directory = absolute_greedy_work_directory
    create_empty_dir(greedy_directory)
    offline_directory_prefix = 'offline_data'

    run_directory_prefix = "run_"
    greedy_file = open(f"{greedy_directory}/greedy_status.log", "w", encoding="utf-8") # pylint: disable=consider-using-with
    greedy_file.write("Greedy reduced basis status \n")
    fom_time = 0.
    rom_time = 0.
    basis_time = 0.

    np.random.seed(random_seed)

    # create parameter samples
    parameter_samples = parameter_space.generate_samples(testing_sample_size)
    parameter_names = parameter_space.get_names()

    # Make FOM/ROM directories
    for sample_index, sample in enumerate(parameter_samples):
        fom_run_directory =  f'{greedy_directory}/fom/{run_directory_prefix}{sample_index}'

        create_empty_dir(fom_run_directory)
        parameter_dict = _create_parameter_dict(parameter_names, sample)
        fom_model.populate_run_directory(fom_run_directory, parameter_dict)

    training_samples = np.array([0, 1], dtype='int')
    samples_left = np.arange(2, testing_sample_size)

    # Run FOM training cases
    t0 = time.time()
    training_dirs = []

    for i in training_samples:
        greedy_file.write(f"Running FOM sample {i} \n")
        sample_index = i
        parameter_dict = _create_parameter_dict(parameter_names,parameter_samples[sample_index])
        fom_run_directory = f'{greedy_directory}/fom/{run_directory_prefix}{sample_index}'
        create_empty_dir(fom_run_directory)
        fom_model.populate_run_directory(fom_run_directory,parameter_dict)
        fom_model.run_model(fom_run_directory,parameter_dict)
        fom_qoi = fom_model.compute_qoi(fom_run_directory,parameter_dict)
        if i == 0:
            fom_qois = fom_qoi[None]
        else:
            fom_qois = np.append(fom_qois,fom_qoi[None],axis=0)
        training_dirs.append(fom_run_directory)
    fom_time += time.time() - t0

    # Create ROM bases
    t0 = time.time()
    greedy_file.write("Creating ROM bases \n")

    outer_loop_counter = 0
    updated_offline_data_dir = f'{greedy_directory}/rom_iteration_{outer_loop_counter}/{offline_directory_prefix}/'

    create_empty_dir(updated_offline_data_dir)
    rom_model = rom_model_builder.build_from_training_dirs(updated_offline_data_dir,training_dirs)

    basis_time += time.time() - t0

    converged = False
    max_error_indicators = np.zeros(0)
    reg = QoIvsErrorIndicatorRegressor(calibrated_error)
    qoi_errors = np.zeros(0)
    predicted_qoi_errors = np.zeros(0)
    greedy_file.flush()

    while converged is False:
        print(f'Greedy iteration # {outer_loop_counter}')
        error_indicators = np.zeros(samples_left.size)

        t0 = time.time()
        greedy_file.write(f"Greedy iteration # {outer_loop_counter}\n" +
                          f"Parameter samples: \n {parameter_samples}\n")
        greedy_file.flush()
        for counter, sample_index in enumerate(samples_left):
            greedy_file.write(f"    Running ROM sample {sample_index}\n")
            greedy_file.flush()
            parameter_dict = _create_parameter_dict(parameter_names,parameter_samples[sample_index])
            rom_run_directory = f'{greedy_directory}/rom_iteration_{outer_loop_counter}/{run_directory_prefix}{sample_index}'
            create_empty_dir(rom_run_directory)
            rom_model.populate_run_directory(rom_run_directory,parameter_dict)
            rom_model.run_model(rom_run_directory,parameter_dict)
            error_indicators[counter] = rom_model.compute_error_estimate(rom_run_directory,parameter_dict)

        rom_time += time.time() - t0

        sample_with_highest_error_indicator = samples_left[la.argmax(error_indicators)]
        max_error_indicators = np.append(max_error_indicators,
                                         np.amax(error_indicators))
        greedy_file.write(f"Sample {sample_with_highest_error_indicator}"
                          " had the highest error indicator of"
                          f" {max_error_indicators[-1]} \n")

        outer_loop_counter += 1
        if outer_loop_counter > 1:
            predicted_max_qoi_error = reg.predict(error_indicators[la.argmax(error_indicators)])
            greedy_file.write("Our MLEM error estimate is "
                              f"{predicted_max_qoi_error}\n")
            greedy_file.flush()
            predicted_qoi_errors = np.append(predicted_qoi_errors,
                                             predicted_max_qoi_error)
            if np.amax(predicted_max_qoi_error < tolerance):
                converged = True
                print('Run converged, max approximate qoi error'
                      f' = {predicted_max_qoi_error}')
                break

        t0 = time.time()
        greedy_file.write("Running FOM sample"
                          f" {sample_with_highest_error_indicator}\n")
        greedy_file.flush()


        ## Identify the sample with the highest error and run FOM
        fom_run_directory = f'{greedy_directory}/fom/{run_directory_prefix}{sample_with_highest_error_indicator}'
        parameter_dict = _create_parameter_dict(parameter_names,parameter_samples[sample_with_highest_error_indicator])
        fom_model.run_model(fom_run_directory,parameter_dict)
        fom_qoi = fom_model.compute_qoi(fom_run_directory,parameter_dict)

        fom_time += time.time() - t0

        # Get ROM QoI to calibrate our error estimator
        outer_loop_counter_m_1 = outer_loop_counter - 1
        rom_run_directory = (
            f'{greedy_directory}/rom_iteration_{outer_loop_counter_m_1}/'
            f'{run_directory_prefix}{sample_with_highest_error_indicator}'
        )
        rom_qoi = rom_model.compute_qoi(rom_run_directory,parameter_dict)
        qoi_error = np.linalg.norm(rom_qoi - fom_qoi) / np.linalg.norm(fom_qoi)
        greedy_file.write(f"Sample {sample_with_highest_error_indicator} had "
                          f"an error of {qoi_error}\n")
        qoi_errors = np.append(qoi_errors, qoi_error)
        reg.fit(max_error_indicators, qoi_errors)

        ## Update our samples
        training_samples = np.append(training_samples,
                                     sample_with_highest_error_indicator)
        samples_left = np.delete(samples_left, la.argmax(error_indicators))

        # Update ROM basis
        t0 = time.time()
        greedy_file.write("Computing ROM bases \n")
        greedy_file.flush()
        training_dirs = [f'{greedy_directory}/fom/run_{i}' for i in training_samples]

        updated_offline_data_dir = f'{greedy_directory}/rom_iteration_{outer_loop_counter}/{offline_directory_prefix}/'
        create_empty_dir(updated_offline_data_dir)
        rom_model = rom_model_builder.build_from_training_dirs(updated_offline_data_dir,training_dirs)

        basis_time += time.time() - t0

        # Add a new sample
        new_parameter_sample = parameter_space.generate_samples(1)
        parameter_samples = np.append(parameter_samples,
                                      new_parameter_sample, axis=0)
        new_sample_number = testing_sample_size + outer_loop_counter - 1
        rom_run_directory = f'{greedy_directory}/rom_iteration_{outer_loop_counter}/{run_directory_prefix}{new_sample_number}'
        fom_run_directory = f'{greedy_directory}/fom/{run_directory_prefix}{new_sample_number}'
        create_empty_dir(rom_run_directory)
        create_empty_dir(fom_run_directory)
        parameter_dict = _create_parameter_dict(parameter_names,new_parameter_sample[0])

        rom_model.populate_run_directory(rom_run_directory,parameter_dict)
        fom_model.populate_run_directory(fom_run_directory,parameter_dict)

        samples_left = np.append(samples_left, new_sample_number)
        greedy_file.flush()
        np.savez(f'{greedy_directory}/greedy_stats',
                 max_error_indicators=max_error_indicators,
                 qoi_errors=qoi_errors,
                 predicted_qoi_errors=predicted_qoi_errors,
                 training_samples=training_samples,
                 fom_time=fom_time,
                 rom_time=rom_time,
                 basis_time=basis_time)

    greedy_file.close()



class QoIvsErrorIndicatorRegressor:
    '''
    Regressor for modeling the relationship between QoI error and error
    indicator.

    This class provides a simple linear regression model for estimating the
    scaling factor (c) between QoI error and the error indicator.
    '''
    def __init__(self, calibrated_error: bool=True):
        '''
        Initializes the regressor with a default scaling factor of 1.0.
        '''
        self.__c = 1.
        self.__calibrated_error = calibrated_error

    def fit(self, x, y):
        '''
        If calibrated_error was specified, this function fits the regression
        model to the provided data points (x, y) to estimate the scaling factor.

        Args:
            x: Error indicator values.
            y: Corresponding QoI error values.
        '''
        if self.__calibrated_error:
            self.__c = np.mean(y) / np.mean(x)

    def predict(self, x):
        '''
        Predicts QoI error based on the error indicator using the estimated
        scaling factor.

        Args:
            x: Error indicator value(s) for prediction.
        '''
        return self.__c*x