#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
from scipy.spatial import distance
[docs]
class Exploration:
"""
Generates samples from the prior distribution using the ``ExpDesign`` class
attributes and functions. Two strategies are available:
* **Voronoi** sampling (to be defined).
* **mc_samples** -- Monte Carlo sampling using random, Sobol or
Latin-hypercube samples. Previously sampled candidates may also be passed,
in which case no new sampling is done and only the scores are estimated.
Each candidate is scored by its distance to the existing training points so
that the whole domain is explored. Scores are normalised to ``[0, 1]``; the
highest values are the best.
Based on the Surrogate Modeling Toolbox (SUMO) [1]_ and modelled after code
in BayesValidRox [2]_.
.. [1] Gorissen, D., Couckuyt, I., Demeester, P., Dhaene, T. and Crombecq,
K., 2010. A surrogate modeling and adaptive sampling toolbox for
computer based design. Journal of Machine Learning Research, 11,
pp. 2051-2055.
.. [2] Mohammadi, F. https://pypi.org/project/bayesvalidrox/
Attributes
----------
exp_design : obj
ExpDesign object, needed to sample from the prior distribution
n_candidate : int
Number of candidate samples.
mc_criterion : str
Selection criterion. The default is `'mc-intersite-proj-th'`. Another
option is `'mc-intersite-proj'`.
w : int
Number of random points in the domain for each sample of the
training set.
"""
def __init__(self, n_candidate,
old_tp,
exp_design=None,
mc_criterion='mc-intersite-proj-th',
w=100):
self.n_candidate = n_candidate
self.mc_criterion = mc_criterion
self.w = w
self.old_ED_X = old_tp
self.ExpDesign = exp_design
[docs]
def get_exploration_samples(self, prior_candidates=None):
"""
This function generates candidates to be selected as new design and
their associated exploration scores.
Returns
-------
all_candidates : array of shape (n_candidate, n_params)
A list of samples.
exploration_scores: arrays of shape (n_candidate)
Exploration scores.
"""
explore_method = self.ExpDesign.explore_method
# print("\n")
# print(f' The {explore_method}-Method is selected as the exploration method.')
# print("\n")
if explore_method == 'Voronoi':
# Generate samples using the Voronoi method
all_candidates, exploration_scores = self.get_vornoi_samples()
else:
# Generate samples using the MC method
all_candidates, exploration_scores = self.get_mc_samples(all_candidates=prior_candidates)
return all_candidates, exploration_scores
# -------------------------------------------------------------------------
[docs]
def get_vornoi_samples(self):
"""
This function generates samples based on voronoi cells and their
corresponding scores
Returns
-------
new_samples : array of shape (n_candidate, n_params)
A list of samples.
exploration_scores: arrays of shape (n_candidate)
Exploration scores.
"""
mc_criterion = self.mc_criterion
n_candidate = self.n_candidate
# Get the Old ExpDesign #samples
# old_ED_X = self.MetaModel.ExpDesign.X
old_ED_X = self.old_ED_X
ndim = old_ED_X.shape[1]
# calculate error #averageErrors
error_voronoi, all_candidates = self.approximate_voronoi(
self.w, old_ED_X
)
# Pick the best candidate point in the voronoi cell
# for each best sample
selected_samples = np.empty((0, ndim))
bad_samples = []
for index in range(len(error_voronoi)):
# get candidate new samples from voronoi tesselation
candidates = self.closest_points[index]
# get total number of candidates
n_new_samples = candidates.shape[0]
# still no candidate samples around this one, skip it!
if n_new_samples == 0:
print('The following sample has been skipped because there '
'were no candidate samples around it...')
print(old_ED_X[index])
bad_samples.append(index)
continue
# find candidate that is farthest away from any existing sample
max_min_distance = 0
best_candidate = 0
min_intersite_dist = np.zeros((n_new_samples))
min_projected_dist = np.zeros((n_new_samples))
for j in range(n_new_samples):
new_samples = np.vstack((old_ED_X, selected_samples))
# find min distorted distance from all other samples
euclidean_dist = self._build_dist_matrix_point(
new_samples, candidates[j], do_sqrt=True)
min_euclidean_dist = np.min(euclidean_dist)
min_intersite_dist[j] = min_euclidean_dist
# Check if this is the maximum minimum distance from all other
# samples
if min_euclidean_dist >= max_min_distance:
max_min_distance = min_euclidean_dist
best_candidate = j
# Projected distance
projected_dist = distance.cdist(
new_samples, [candidates[j]], 'chebyshev')
min_projected_dist[j] = np.min(projected_dist)
if mc_criterion == 'mc-intersite-proj':
weight_euclidean_dist = 0.5 * ((n_new_samples + 1) ** (1 / ndim) - 1)
weight_projected_dist = 0.5 * (n_new_samples + 1)
total_dist_scores = weight_euclidean_dist * min_intersite_dist
total_dist_scores += weight_projected_dist * min_projected_dist
elif mc_criterion == 'mc-intersite-proj-th':
alpha = 0.5 # chosen (tradeoff)
d_min = 2 * alpha / n_new_samples
if any(min_projected_dist < d_min):
candidates = np.delete(
candidates, [min_projected_dist < d_min], axis=0
)
total_dist_scores = np.delete(
min_intersite_dist, [min_projected_dist < d_min],
axis=0
)
else:
total_dist_scores = min_intersite_dist
else:
raise NameError(
'The MC-Criterion you requested is not available.'
)
# Add the best candidate to the list of new samples
best_candidate = np.argsort(total_dist_scores)[::-1][:n_candidate]
selected_samples = np.vstack(
(selected_samples, candidates[best_candidate])
)
self.new_samples = selected_samples
self.exploration_scores = np.delete(error_voronoi, bad_samples, axis=0)
return self.new_samples, self.exploration_scores
# -------------------------------------------------------------------------
[docs]
def get_mc_samples(self, all_candidates=None):
"""
This function generates random samples based on Global Monte Carlo
methods and their corresponding scores, based on [1].
[1] Crombecq, K., Laermans, E. and Dhaene, T., 2011. Efficient
space-filling and non-collapsing sequential design strategies for
simulation-based modeling. European Journal of Operational Research
, 214(3), pp.683-696.
DOI: https://doi.org/10.1016/j.ejor.2011.05.032
Implemented methods to compute scores:
1) mc-intersite-proj
2) mc-intersite-proj-th
Arguments
---------
all_candidates : array, optional
Samples to compute the scores for. The default is `None`. In this
case, samples will be generated by defined model input marginals.
Returns
-------
new_samples : array of shape (n_candidate, n_params)
A list of samples.
exploration_scores: arrays of shape (n_candidate)
Exploration scores.
"""
# MetaModel = self.MetaModel
# explore_method = MetaModel.ExpDesign.explore_method
ExpDesign = self.ExpDesign
explore_method = ExpDesign.explore_method
mc_criterion = self.mc_criterion
# Get number of candidates to evaluate
if all_candidates is None:
n_candidate = self.n_candidate
else:
n_candidate = all_candidates.shape[0]
# Get the Old ExpDesign #samples
old_ED_X = self.old_ED_X
ndim = old_ED_X.shape[1] # Number of parameters in each parameter set
# Sample from prior-----------------------------------------
if all_candidates is None:
# Generate MC Samples
all_candidates = ExpDesign.generate_samples(
self.n_candidate, explore_method)
self.all_candidates = all_candidates
# initialization
min_intersite_dist = np.zeros((n_candidate))
min_projected_dist = np.zeros((n_candidate))
for i, candidate in enumerate(all_candidates):
# find candidate that is farthest away from any existing sample
maxMinDistance = 0
# find min distorted distance from all other samples
euclidean_dist = self._build_dist_matrix_point(
old_ED_X, candidate, do_sqrt=True)
min_euclidean_dist = np.min(euclidean_dist)
min_intersite_dist[i] = min_euclidean_dist
# Check if this is the maximum minimum distance from all other
# samples
if min_euclidean_dist >= maxMinDistance:
maxMinDistance = min_euclidean_dist
# Projected distance
projected_dist = self._build_dist_matrix_point(
old_ED_X, candidate, 'chebyshev')
min_projected_dist[i] = np.min(projected_dist)
if mc_criterion == 'mc-intersite-proj':
weight_euclidean_dist = ((n_candidate + 1) ** (1 / ndim) - 1) * 0.5
weight_projected_dist = (n_candidate + 1) * 0.5
total_dist_scores = weight_euclidean_dist * min_intersite_dist
total_dist_scores += weight_projected_dist * min_projected_dist
elif mc_criterion == 'mc-intersite-proj-th':
alpha = 0.5 # chosen (tradeoff)
d_min = 2 * alpha / n_candidate
if any(min_projected_dist < d_min):
all_candidates = np.delete(
all_candidates, [min_projected_dist < d_min], axis=0
)
total_dist_scores = np.delete(
min_intersite_dist, [min_projected_dist < d_min], axis=0
)
else:
total_dist_scores = min_intersite_dist
else:
raise NameError('The MC-Criterion you requested is not available.')
self.new_samples = all_candidates
self.exploration_scores = total_dist_scores
self.exploration_scores /= np.nansum(total_dist_scores)
return self.new_samples, self.exploration_scores
# -------------------------------------------------------------------------
[docs]
def approximate_voronoi(self, w, samples):
"""
An approximate (monte carlo) version of Matlab's voronoi command.
Arguments
---------
samples : array
Old experimental design to be used as center points for voronoi
cells.
Returns
-------
areas : array
An approximation of the voronoi cells' areas.
all_candidates: list of arrays
A list of samples in each voronoi cell.
"""
# MetaModel = self.MetaModel
ExpDesign = self.ExpDesign
n_samples = samples.shape[0]
ndim = samples.shape[1]
# Compute the number of random points
n_points = w * samples.shape[0]
# Generate w random points in the domain for each sample
points = ExpDesign.generate_samples(n_points, 'random')
self.all_candidates = points
# Calculate the nearest sample to each point
self.areas = np.zeros((n_samples))
self.closest_points = [np.empty((0, ndim)) for i in range(n_samples)]
# Compute the minimum distance from all the samples of old_ED_X for
# each test point
for idx in range(n_points):
# calculate the minimum distance
distances = self._build_dist_matrix_point(
samples, points[idx], do_sqrt=True
)
closest_sample = np.argmin(distances)
# Add to the voronoi list of the closest sample
self.areas[closest_sample] = self.areas[closest_sample] + 1
prev_closest_points = self.closest_points[closest_sample]
self.closest_points[closest_sample] = np.vstack(
(prev_closest_points, points[idx])
)
# Divide by the amount of points to get the estimated volume of each
# voronoi cell
self.areas /= n_points
self.perc = np.max(self.areas * 100)
self.errors = self.areas
return self.areas, self.all_candidates
# -------------------------------------------------------------------------
def _build_dist_matrix_point(self, samples, point, method='euclidean',
do_sqrt=False):
"""
Calculates the intersite distance of all points in samples from point.
Parameters
----------
samples : array of shape (n_samples, n_params)
The old experimental design.
point : array
A candidate point.
method : str
Distance method.
do_sqrt : bool, optional
Whether to return distances or squared distances. The default is
`False`.
Returns
-------
distances : array
Distances.
"""
distances = distance.cdist(samples, np.array([point]), method)
# do square root?
if do_sqrt:
return distances
else:
return distances ** 2