Module jidenn.evaluation.evaluator
Expand source code
from typing import Optional, List, Dict, Tuple, Union, Callable, Literal
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
import pandas as pd
import logging
import time
#
from jidenn.data.JIDENNDataset import JIDENNDataset, ROOTVariables
from jidenn.data.TrainInput import input_classes_lookup
from .evaluation_metrics import calculate_metrics
from .WorkingPoint import BinnedVariable
from multiprocessing import Pool
def add_score_to_dataset(dataset: JIDENNDataset,
score: np.ndarray,
score_name: str = 'score') -> JIDENNDataset:
"""Add a score array to a JIDENNDataset. Score could me any variable that is not part of the original dataset.
It is important that the score array has the same length as the dataset. This is useful for adding the output of a
ML model to the original dataset before the trining input is created.
I/O Example:
```python
example_input_element = {'E': 1.0, 'eta': 1.0, 'pt': 1.0, 'phi': 1.0, 'label': 1, 'num': 1, 'event': 1, 'mu': 1.0, 'corr_mu': 1.0}
example_output_element = {'E': 1.0, 'eta': 1.0, 'pt': 1.0, 'phi': 1.0, 'label': 1, 'num': 1, 'event': 1, 'mu': 1.0, 'corr_mu': 1.0, 'score': 0.5}
```
Args:
dataset (JIDENNDataset): JIDENNDataset to add the score to.
score (np.ndarray): Array containing the score values to add.
score_name (str, optional): Name of the score variable inside the new dataset. Default is 'score'.
Returns:
JIDENNDataset: JIDENNDataset with the score added. Its elements will have the same structure as the original,
i.e. a dictionary with the same kay-value pairs with one additional key-value pair for the score `{score_name: score[i]}`.
"""
@tf.function
def add_to_dict(data_label: Tuple[ROOTVariables, tf.Tensor], score: tf.Tensor) -> Tuple[ROOTVariables, tf.Tensor]:
data, label = data_label[0].copy(), data_label[1]
data[score_name] = score
return data, label
score_dataset = tf.data.Dataset.from_tensor_slices(score)
dataset = tf.data.Dataset.zip((dataset.dataset, score_dataset))
dataset = dataset.map(add_to_dict)
variables = list(dataset.element_spec[0].keys())
return JIDENNDataset(variables).set_dataset(dataset, element_spec=dataset.element_spec)
def _calculate_metrics_in_bin(x):
y, score_variable, threshold, validation_plotter = x
inter, x = y
if x.empty:
return
if len(x['label'].unique()) < 2:
return
if isinstance(threshold, BinnedVariable):
threshold_val = threshold[x['bin'].iloc[0]]
else:
threshold_val = threshold
if validation_plotter is not None:
validation_plotter(x)
ret = calculate_metrics(x['label'], x[score_variable], threshold=threshold_val)
ret['num_events'] = len(x)
ret['bin'] = inter
return ret
def calculate_binned_metrics(df: pd.DataFrame,
binned_variable: str,
score_variable: str,
bins: Union[List[Union[float, int]], np.ndarray],
validation_plotter: Optional[Callable[[pd.DataFrame], None]] = None,
threshold: Union[BinnedVariable, float] = 0.5,
threads: Optional[int] = None) -> pd.DataFrame:
"""Calculate metrics for a binary classification problem binned by a continuous variable.
Example pd.DataFrame structure:
```python
df = pd.DataFrame({'label': [0, 1, 0, 1, 0, 1, 0, 1],
'score': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, .7, .8],
'jets_pt': [1, 2, 3, 4, 5, 6, 7, 8]})
score_variable = 'score'
binning_variable = 'jets_pt'
bins = [2, 5, 7]
binned_metrics = calculate_binned_metrics(df=df,
binned_variable=binned_variable,
score_variable=score_variable,
bins=bins)
print(binned_metrics)
# Output:
# accuracy signal_efficiency background_efficiency num_events bin
# 1 0.500000 0.500000 0.500000 2 '(2.0, 5.0]'
# 2 0.666667 0.666667 0.666667 3 '(5.0, 7.0]'
```
Args:
df (pd.DataFrame): DataFrame containing columns `label`, `score_variable` and `binned_variable`.
binned_variable (str): Name of the column containing the continuous variable to bin.
score_variable (str): Name of the column containing the model scores.
bins (Union[List[Union[float, int]], np.ndarray]): List or array of bin edges to use.
validation_plotter (Callable[[pd.DataFrame], None], optional): Function to plot validation data
for each bin (confusion matrix, ROC, score outputs histogram,...). Default is None.
threshold (Union[pd.DataFrame, float], optional): Threshold value for the binary classification.
If a DataFrame is provided, it should contain a 'bin' column with string representation of
pd.Interval (e.g. 'm(0.5, 1.0]') and a column with the name specified in `threshold_name` containing
the threshold values for each bin. Default is 0.5.
threshold_name (str, optional): Name of the column containing the threshold values in the threshold
DataFrame. Only used if a DataFrame is provided as the threshold argument. Default is None.
Returns:
pd.DataFrame: DataFrame containing the calculated metrics for each bin.
"""
df['bin'] = pd.cut(df[binned_variable], bins=bins)
grouped_metrics = df.groupby('bin')
args = [(x, score_variable, threshold, validation_plotter) for x in grouped_metrics]
if threads is not None and threads > 1:
with Pool(threads) as pool:
metrics = pool.map(_calculate_metrics_in_bin, args)
else:
metrics = map(_calculate_metrics_in_bin, args)
metrics = [x for x in metrics if x is not None]
metrics = pd.DataFrame(metrics)
return metrics
def evaluate_multiple_models(model_paths: List[str],
model_names: List[str],
dataset: JIDENNDataset,
model_input_name: List[Literal['highlevel',
'highlevel_constituents',
'constituents',
'relative_constituents',
'interaction_constituents']],
batch_size: int,
take: Optional[int] = None,
score_name: str = 'score',
log: Optional[logging.Logger] = None,
custom_objects: Optional[Dict[str, Callable]] = None,
distribution_drawer: Optional[Callable[[JIDENNDataset], None]] = None) -> JIDENNDataset:
"""Evaluate multiple Keras models on a JIDENNDataset. The explicit training inputs are created automatically
from the JIDENNDataset. Input type for each model is deduced from the `model_input_name` argument. The order of
evaluation is **NOT** determined by the `model_names` argument. The iteration order is given by the unigue values
in `model_input_name`, to reduce the number of times the dataset is prepared.
Args:
model_paths (List[str]): List of paths to the Keras model files. They will be loaded with
`tf.keras.models.load_model(model_path, custom_objects=custom_objects)`.
model_names (List[str]): List of names for each model.
dataset (JIDENNDataset): JIDENNDataset to evaluate the models on and to add the scores to.
model_input_name (List[str]): List of input names for each model. See `jidenn.data.TrainInput.input_classes_lookup`
for options.
batch_size (int): Batch size to use for the evaluation.
take (int, optional): Number of events to evaluate. If not provided, all events will be used. Default is None.
score_name (str, optional): Name of the score variable to add to the dataset. Default is 'score'. For each model,
the score will be added with the name `f'{model_name}_{score_name}'`.
log (logging.Logger, optional): Logger to use for logging messages and evaluation/loading times. Default is None.
custom_objects (Dict[str, Callable], optional): Dictionary of custom objects to use when loading the models.
Passed to `tf.keras.models.load_model(model_path, custom_objects=custom_objects)`. Default is None.
distribution_drawer (Callable[[JIDENNDataset], None], optional): Function to plot the data distribution of
the input variables which are automatically created with the `jidenn.data.TrainInput` class. Default is None.
Returns:
JIDENNDataset: JIDENNDataset with the scores added.
"""
# iterate over all input types to reduce the number of times the dataset is prepared
log.info(f'Batches will be of size: {batch_size}, total number of events: {take}') if log is not None else None
for input_type in set(model_input_name):
train_input_class = input_classes_lookup(input_type)
train_input_class = train_input_class()
model_input = tf.function(func=train_input_class)
ds = dataset.create_train_input(model_input)
if distribution_drawer is not None:
log.info(f'----- Drawing data distribution for: {input_type}') if log is not None else None
distribution_drawer(ds)
ds = ds.get_prepared_dataset(batch_size=batch_size, take=take)
# iterate over all models with the same input type
idxs = np.array(model_input_name) == input_type
for model_path, model_name in zip(np.array(model_paths)[idxs], np.array(model_names)[idxs]):
log.info(f'----- Loading model: {model_name}') if log is not None else None
start = time.time()
model = tf.keras.models.load_model(model_path, custom_objects=custom_objects)
stop = time.time()
log.info(f'----- Loading model took: {stop-start:.2f} s') if log is not None else None
log.info(f'----- Predicting with model: {model_name}') if log is not None else None
start = time.time()
score = model.predict(ds).ravel()
stop = time.time()
log.info(f'----- Predicting took: {stop-start:.2f} s') if log is not None else None
dataset = add_score_to_dataset(dataset, score, f'{model_name}_{score_name}')
return dataset
Functions
def add_score_to_dataset(dataset: JIDENNDataset, score: numpy.ndarray, score_name: str = 'score') ‑> JIDENNDataset
-
Add a score array to a JIDENNDataset. Score could me any variable that is not part of the original dataset. It is important that the score array has the same length as the dataset. This is useful for adding the output of a ML model to the original dataset before the trining input is created.
I/O Example:
example_input_element = {'E': 1.0, 'eta': 1.0, 'pt': 1.0, 'phi': 1.0, 'label': 1, 'num': 1, 'event': 1, 'mu': 1.0, 'corr_mu': 1.0} example_output_element = {'E': 1.0, 'eta': 1.0, 'pt': 1.0, 'phi': 1.0, 'label': 1, 'num': 1, 'event': 1, 'mu': 1.0, 'corr_mu': 1.0, 'score': 0.5}
Args
dataset
:JIDENNDataset
- JIDENNDataset to add the score to.
score
:np.ndarray
- Array containing the score values to add.
score_name
:str
, optional- Name of the score variable inside the new dataset. Default is 'score'.
Returns
JIDENNDataset
- JIDENNDataset with the score added. Its elements will have the same structure as the original,
i.e. a dictionary with the same kay-value pairs with one additional key-value pair for the score
{score_name: score[i]}
.Expand source code
def add_score_to_dataset(dataset: JIDENNDataset, score: np.ndarray, score_name: str = 'score') -> JIDENNDataset: """Add a score array to a JIDENNDataset. Score could me any variable that is not part of the original dataset. It is important that the score array has the same length as the dataset. This is useful for adding the output of a ML model to the original dataset before the trining input is created. I/O Example: ```python example_input_element = {'E': 1.0, 'eta': 1.0, 'pt': 1.0, 'phi': 1.0, 'label': 1, 'num': 1, 'event': 1, 'mu': 1.0, 'corr_mu': 1.0} example_output_element = {'E': 1.0, 'eta': 1.0, 'pt': 1.0, 'phi': 1.0, 'label': 1, 'num': 1, 'event': 1, 'mu': 1.0, 'corr_mu': 1.0, 'score': 0.5} ``` Args: dataset (JIDENNDataset): JIDENNDataset to add the score to. score (np.ndarray): Array containing the score values to add. score_name (str, optional): Name of the score variable inside the new dataset. Default is 'score'. Returns: JIDENNDataset: JIDENNDataset with the score added. Its elements will have the same structure as the original, i.e. a dictionary with the same kay-value pairs with one additional key-value pair for the score `{score_name: score[i]}`. """ @tf.function def add_to_dict(data_label: Tuple[ROOTVariables, tf.Tensor], score: tf.Tensor) -> Tuple[ROOTVariables, tf.Tensor]: data, label = data_label[0].copy(), data_label[1] data[score_name] = score return data, label score_dataset = tf.data.Dataset.from_tensor_slices(score) dataset = tf.data.Dataset.zip((dataset.dataset, score_dataset)) dataset = dataset.map(add_to_dict) variables = list(dataset.element_spec[0].keys()) return JIDENNDataset(variables).set_dataset(dataset, element_spec=dataset.element_spec)
def calculate_binned_metrics(df: pandas.core.frame.DataFrame, binned_variable: str, score_variable: str, bins: Union[List[Union[float, int]], numpy.ndarray], validation_plotter: Optional[Callable[[pandas.core.frame.DataFrame], None]] = None, threshold: Union[BinnedVariable, float] = 0.5, threads: Optional[int] = None) ‑> pandas.core.frame.DataFrame
-
Calculate metrics for a binary classification problem binned by a continuous variable.
Example pd.DataFrame structure:
df = pd.DataFrame({'label': [0, 1, 0, 1, 0, 1, 0, 1], 'score': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, .7, .8], 'jets_pt': [1, 2, 3, 4, 5, 6, 7, 8]}) score_variable = 'score' binning_variable = 'jets_pt' bins = [2, 5, 7] binned_metrics = calculate_binned_metrics(df=df, binned_variable=binned_variable, score_variable=score_variable, bins=bins) print(binned_metrics) # Output: # accuracy signal_efficiency background_efficiency num_events bin # 1 0.500000 0.500000 0.500000 2 '(2.0, 5.0]' # 2 0.666667 0.666667 0.666667 3 '(5.0, 7.0]'
Args
df
:pd.DataFrame
- DataFrame containing columns
label
,score_variable
andbinned_variable
. binned_variable
:str
- Name of the column containing the continuous variable to bin.
score_variable
:str
- Name of the column containing the model scores.
bins
:Union[List[Union[float, int]], np.ndarray]
- List or array of bin edges to use.
validation_plotter
:Callable[[pd.DataFrame], None]
, optional- Function to plot validation data for each bin (confusion matrix, ROC, score outputs histogram,…). Default is None.
threshold
:Union[pd.DataFrame, float]
, optional- Threshold value for the binary classification.
If a DataFrame is provided, it should contain a 'bin' column with string representation of
pd.Interval (e.g. 'm(0.5, 1.0]') and a column with the name specified in
threshold_name
containing the threshold values for each bin. Default is 0.5. threshold_name
:str
, optional- Name of the column containing the threshold values in the threshold DataFrame. Only used if a DataFrame is provided as the threshold argument. Default is None.
Returns
pd.DataFrame
- DataFrame containing the calculated metrics for each bin.
Expand source code
def calculate_binned_metrics(df: pd.DataFrame, binned_variable: str, score_variable: str, bins: Union[List[Union[float, int]], np.ndarray], validation_plotter: Optional[Callable[[pd.DataFrame], None]] = None, threshold: Union[BinnedVariable, float] = 0.5, threads: Optional[int] = None) -> pd.DataFrame: """Calculate metrics for a binary classification problem binned by a continuous variable. Example pd.DataFrame structure: ```python df = pd.DataFrame({'label': [0, 1, 0, 1, 0, 1, 0, 1], 'score': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, .7, .8], 'jets_pt': [1, 2, 3, 4, 5, 6, 7, 8]}) score_variable = 'score' binning_variable = 'jets_pt' bins = [2, 5, 7] binned_metrics = calculate_binned_metrics(df=df, binned_variable=binned_variable, score_variable=score_variable, bins=bins) print(binned_metrics) # Output: # accuracy signal_efficiency background_efficiency num_events bin # 1 0.500000 0.500000 0.500000 2 '(2.0, 5.0]' # 2 0.666667 0.666667 0.666667 3 '(5.0, 7.0]' ``` Args: df (pd.DataFrame): DataFrame containing columns `label`, `score_variable` and `binned_variable`. binned_variable (str): Name of the column containing the continuous variable to bin. score_variable (str): Name of the column containing the model scores. bins (Union[List[Union[float, int]], np.ndarray]): List or array of bin edges to use. validation_plotter (Callable[[pd.DataFrame], None], optional): Function to plot validation data for each bin (confusion matrix, ROC, score outputs histogram,...). Default is None. threshold (Union[pd.DataFrame, float], optional): Threshold value for the binary classification. If a DataFrame is provided, it should contain a 'bin' column with string representation of pd.Interval (e.g. 'm(0.5, 1.0]') and a column with the name specified in `threshold_name` containing the threshold values for each bin. Default is 0.5. threshold_name (str, optional): Name of the column containing the threshold values in the threshold DataFrame. Only used if a DataFrame is provided as the threshold argument. Default is None. Returns: pd.DataFrame: DataFrame containing the calculated metrics for each bin. """ df['bin'] = pd.cut(df[binned_variable], bins=bins) grouped_metrics = df.groupby('bin') args = [(x, score_variable, threshold, validation_plotter) for x in grouped_metrics] if threads is not None and threads > 1: with Pool(threads) as pool: metrics = pool.map(_calculate_metrics_in_bin, args) else: metrics = map(_calculate_metrics_in_bin, args) metrics = [x for x in metrics if x is not None] metrics = pd.DataFrame(metrics) return metrics
def evaluate_multiple_models(model_paths: List[str], model_names: List[str], dataset: JIDENNDataset, model_input_name: List[Literal['highlevel', 'highlevel_constituents', 'constituents', 'relative_constituents', 'interaction_constituents']], batch_size: int, take: Optional[int] = None, score_name: str = 'score', log: Optional[logging.Logger] = None, custom_objects: Optional[Dict[str, Callable]] = None, distribution_drawer: Optional[Callable[[JIDENNDataset], None]] = None) ‑> JIDENNDataset
-
Evaluate multiple Keras models on a JIDENNDataset. The explicit training inputs are created automatically from the JIDENNDataset. Input type for each model is deduced from the
model_input_name
argument. The order of evaluation is NOT determined by themodel_names
argument. The iteration order is given by the unigue values inmodel_input_name
, to reduce the number of times the dataset is prepared.Args
model_paths
:List[str]
- List of paths to the Keras model files. They will be loaded with
tf.keras.models.load_model(model_path, custom_objects=custom_objects)
. model_names
:List[str]
- List of names for each model.
dataset
:JIDENNDataset
- JIDENNDataset to evaluate the models on and to add the scores to.
model_input_name
:List[str]
- List of input names for each model. See
input_classes_lookup()
for options. batch_size
:int
- Batch size to use for the evaluation.
take
:int
, optional- Number of events to evaluate. If not provided, all events will be used. Default is None.
score_name
:str
, optional- Name of the score variable to add to the dataset. Default is 'score'. For each model,
the score will be added with the name
f'{model_name}_{score_name}'
. log
:logging.Logger
, optional- Logger to use for logging messages and evaluation/loading times. Default is None.
custom_objects
:Dict[str, Callable]
, optional- Dictionary of custom objects to use when loading the models.
Passed to
tf.keras.models.load_model(model_path, custom_objects=custom_objects)
. Default is None. distribution_drawer
:Callable[[JIDENNDataset], None]
, optional- Function to plot the data distribution of
the input variables which are automatically created with the
jidenn.data.TrainInput
class. Default is None.
Returns
JIDENNDataset
- JIDENNDataset with the scores added.
Expand source code
def evaluate_multiple_models(model_paths: List[str], model_names: List[str], dataset: JIDENNDataset, model_input_name: List[Literal['highlevel', 'highlevel_constituents', 'constituents', 'relative_constituents', 'interaction_constituents']], batch_size: int, take: Optional[int] = None, score_name: str = 'score', log: Optional[logging.Logger] = None, custom_objects: Optional[Dict[str, Callable]] = None, distribution_drawer: Optional[Callable[[JIDENNDataset], None]] = None) -> JIDENNDataset: """Evaluate multiple Keras models on a JIDENNDataset. The explicit training inputs are created automatically from the JIDENNDataset. Input type for each model is deduced from the `model_input_name` argument. The order of evaluation is **NOT** determined by the `model_names` argument. The iteration order is given by the unigue values in `model_input_name`, to reduce the number of times the dataset is prepared. Args: model_paths (List[str]): List of paths to the Keras model files. They will be loaded with `tf.keras.models.load_model(model_path, custom_objects=custom_objects)`. model_names (List[str]): List of names for each model. dataset (JIDENNDataset): JIDENNDataset to evaluate the models on and to add the scores to. model_input_name (List[str]): List of input names for each model. See `jidenn.data.TrainInput.input_classes_lookup` for options. batch_size (int): Batch size to use for the evaluation. take (int, optional): Number of events to evaluate. If not provided, all events will be used. Default is None. score_name (str, optional): Name of the score variable to add to the dataset. Default is 'score'. For each model, the score will be added with the name `f'{model_name}_{score_name}'`. log (logging.Logger, optional): Logger to use for logging messages and evaluation/loading times. Default is None. custom_objects (Dict[str, Callable], optional): Dictionary of custom objects to use when loading the models. Passed to `tf.keras.models.load_model(model_path, custom_objects=custom_objects)`. Default is None. distribution_drawer (Callable[[JIDENNDataset], None], optional): Function to plot the data distribution of the input variables which are automatically created with the `jidenn.data.TrainInput` class. Default is None. Returns: JIDENNDataset: JIDENNDataset with the scores added. """ # iterate over all input types to reduce the number of times the dataset is prepared log.info(f'Batches will be of size: {batch_size}, total number of events: {take}') if log is not None else None for input_type in set(model_input_name): train_input_class = input_classes_lookup(input_type) train_input_class = train_input_class() model_input = tf.function(func=train_input_class) ds = dataset.create_train_input(model_input) if distribution_drawer is not None: log.info(f'----- Drawing data distribution for: {input_type}') if log is not None else None distribution_drawer(ds) ds = ds.get_prepared_dataset(batch_size=batch_size, take=take) # iterate over all models with the same input type idxs = np.array(model_input_name) == input_type for model_path, model_name in zip(np.array(model_paths)[idxs], np.array(model_names)[idxs]): log.info(f'----- Loading model: {model_name}') if log is not None else None start = time.time() model = tf.keras.models.load_model(model_path, custom_objects=custom_objects) stop = time.time() log.info(f'----- Loading model took: {stop-start:.2f} s') if log is not None else None log.info(f'----- Predicting with model: {model_name}') if log is not None else None start = time.time() score = model.predict(ds).ravel() stop = time.time() log.info(f'----- Predicting took: {stop-start:.2f} s') if log is not None else None dataset = add_score_to_dataset(dataset, score, f'{model_name}_{score_name}') return dataset