Source code for contur.scan.scanning_functions

# -*- python -*-
import pickle

import numpy as np
import pandas as pd

from .os_functions import *
import contur.util.utils as cutil
import contur.config.config as cfg
import contur.util.utils as cutil

[docs] def invalid_params(run_point, param_dict): """ return false is any of the params are invalid for this run_point """ for param, info in sorted(param_dict.items()): value = info['values'][run_point] try: if np.isnan(value): cfg.contur_log.warning("Invalid parameter value {} for {}".format(value,param)) return True except: # this test is moot pass if info['mode']=='CONDITION' and not bool(value): return True return False
[docs] def read_template_file(template_file_in): """Read in template files and store contents in dictionary""" template = {} with open(template_file_in, 'r') as f: template[cfg.mceg_template] = f.read() return template
[docs] def check_param_consistency(param_dict, template_path): """Check that parameters in param file match those in templates.""" with open(template_path, 'r') as template_file: template_text = template_file.read() from string import Formatter template_param_names = set( [fn for _, fn, _, _ in Formatter().parse(template_text) if fn is not None]) param_names = set(param_dict.keys()) # Check that there aren't unknown params in the template if not template_param_names.issubset(param_names): unknown = template_param_names - param_names cfg.contur_log.critical("Error: unknown parameters {} in template file {}".format( list(unknown), template_path)) # TODO: it'd be better to raise this as an exception, so the calling # code can take evasive action, e.g. cleaning up aborted scan dirs. # Usually a good plan that only top-level code calls exit() sys.exit(1) # Check that all param names are used: warn but not exit if not if template_param_names < param_names: # < i.e. proper subset unused = param_names - template_param_names cfg.contur_log.warning("parameters {} unused in template file {}".format( list(unused), template_path))
[docs] def sample_fromvalues(param_dict, num_points): """ for read mode we return the grid by having already worked out each axis (allows us to mix LIN and LOG) """ dimensions = len(param_dict) points_per_dim = round(num_points ** (1. / dimensions)) if not np.isclose(points_per_dim, num_points ** (float(1. / dimensions))): permission_message = ("If using uniform mode number of points^" "(1/dimensions) must be an integer!\n" "Do you want to use %i points?" % points_per_dim ** dimensions) if not cutil.permission_to_continue(permission_message): sys.exit() grid = np.meshgrid(*[param["axes"] for param in param_dict.values()]) coords = [np.reshape(dim, dim.size) for dim in grid] return coords
[docs] def coords_from_axes(param_dict, param_names=None): """ Read the param_dict that has been dressed with axes values and build a grid to return """ if param_names is None: axes_list = [v['axes'] for v in param_dict.values()] else: axes_list = [v['axes'] for k, v in param_dict.items() if k in param_names] grid = np.meshgrid(*axes_list) coords = [np.reshape(dim, dim.size) for dim in grid] return coords
[docs] def generate_points(param_dict): """ Generate points to sample using given mode. parameters: param_dict: Dictionary with parameter names as keys each containing another dictionary with keys 'range' and 'values'. returns: - param_dict: Same param_dict as was passed in to the function but now filled with points. - num_points: The number of points generated. """ # We use a dictionary to group the param names to reduce the number of # iterations and avoid duplicating the dictionary. # Currently, we classify them only for DATAFRAME and REL categories, as these # modes require special action. DATAFRAME requires loading a dataframe and merging # the data points back into the grid separately. REL requires filling in all # points first to then determine its value relative to another parameter. # CONDITION requires filling all point first then using them to see # whether the condition is satisfied. grouped_param_names = { 'df': [], 'non-df': [], 'condition': [], 'rel': [], 'non-rel': [] } # This dictionary will hold three dataframes: one for the data loaded # from the Pickle file, another for the axes generated programmatically, # and a final dataframe that will contain both. points = { 'df': None, 'non-df': None, 'final': None } def replace_and_eval_rel(series): """ evaluate the expressions for relative parameters and replece them in ``series``. """ for rel_param in grouped_param_names['rel']: conditions = {param: series[param] for param in grouped_param_names['condition']} if all(conditions.values()): non_rel_values = {param: series[param] for param in grouped_param_names['non-rel']} try: formatted_expr = param_dict[rel_param]['form'].format(**non_rel_values) except KeyError: cfg.contur_log.critical("KeyError on {}. This may mean you have tried to specify one REL parameter in terms of another.".format(rel_param)) syst.exit(1) try: series[rel_param] = eval(formatted_expr) except: cfg.contur_log.error("Error evaluating one of your relative parameters.") cfg.contur_log.error("Could not evaluate {}. Check your values?".format(formatted_expr)) raise val = float(series[rel_param]) else: val = float("NaN") return series def replace_and_eval_conditions(series): """ evaluate the expressions for relative parameters and replece them in ``series``. """ for condition in grouped_param_names['condition']: non_rel_values = {param: series[param] for param in grouped_param_names['non-rel']} rel_values = {param: series[param] for param in grouped_param_names['rel']} try: formatted_expr = param_dict[condition]['form'].format(**non_rel_values,**rel_values) except KeyError: cfg.contur_log.critical("KeyError on {}. This may mean you have tried to specify a condition using REL parameters.".format(rel_param)) sys.exit(1) series[condition] = eval(formatted_expr) val = bool(series[condition]) return series def append_param(param_name, mode): """ This function classifies a given param name according to its mode for efficient iteration in other parts of the code. """ if mode == 'DATAFRAME': grouped_param_names['df'].append(param_name) else: grouped_param_names['non-df'].append(param_name) if mode == 'REL': grouped_param_names['rel'].append(param_name) elif not mode == 'CONDITION': grouped_param_names['non-rel'].append(param_name) if mode == 'CONDITION': grouped_param_names['condition'].append(param_name) for param_name, param_config_dict in param_dict.items(): create_axes(param_config_dict) append_param(param_name, param_config_dict['mode']) if len(grouped_param_names['df']) > 0: points['df'] = load_df_points(param_dict) if len(grouped_param_names['non-df']) > 0: # If there are params that do not have the DATAFRAME mode, we create the # grid independently for those. points['non-df'] = coords_from_axes(param_dict, grouped_param_names['non-df']) # Non-df refers to the fact that the points are not loaded from a # dataframe. However, we do use a pandas DataFrame to manipulate the # data. points['non-df'] = pd.DataFrame( np.array(points['non-df']).T, columns=grouped_param_names['non-df']) # Merge the non-df and df param grids, creating a new grid will all # possible combinations. It is important to preserve the fixed # combinations provided in the Pickle Dataframe file, i.e. treat # every row as a fixed object and not combine the pickle dataframe # columns separately. if points['df'] is None: points['final'] = points['non-df'] elif points['non-df'] is None: points['final'] = points['df'] else: points['final'] = (pd .merge(points['non-df'].assign(key=0), points['df'].assign(key=0), on='key') .drop('key', axis=1)) # and for the conditions points['final']=points['final'].apply(replace_and_eval_conditions, axis=1) # Now we have merged the two separate grids, we will do the required # substitutions for the REL params. points['final']=points['final'].apply(replace_and_eval_rel, axis=1) for param in param_dict.keys(): # Now, copy the coordinates into the param dictionary. param_dict[param]['values'] = np.array(points['final'][param]) num_points = len(points['final'].index) return param_dict, num_points
[docs] def create_axes(param_config_dict): """ this is a bit of hybrid mess, should just make the grid directly out of these axes and attach to object I think?.... """ mode = param_config_dict["mode"] if mode in ("LOG", "LIN"): start = param_config_dict["start"] stop = param_config_dict["stop"] number = param_config_dict["number"] if mode == "LOG": # quick work to make the file read log spacing logspace = cutil.newlogspace param_config_dict["axes"] = logspace(start, stop, number) else: param_config_dict["axes"] = np.linspace(start, stop, num=number) elif mode == "CONST": param_config_dict["axes"] = param_config_dict["value"] elif mode == "REL": # for now just give REL a dummy 0 axis param_config_dict["axes"] = 0.0 elif mode == "DIR": param_config_dict["axes"] = 0.0 scan_dir = param_config_dict["name"] if os.path.isdir(scan_dir): param_config_dict["axes"] = os.listdir(scan_dir) else: cfg.contur_log.critical("{} is not a directory".format(scan_dir)) sys.exit(1) elif mode == "SINGLE" or mode == "SCALED": # single string, usually an SLHA file name param_config_dict["axes"] = param_config_dict["name"] elif mode == "DATAFRAME": param_config_dict["axes"] = 0 elif mode == "CONDITION": param_config_dict["axes"] = 0 else: raise ValueError( "Unrecognised mode parameter %s in param_card.dat " % mode)
[docs] def filter_param_dict_by_key(param_dict, expr): """ filter the parameter dictionary based on the key :param param_dict: dictionary containing the model parameters. :param expr: expression which, if true, means the parameter passes """ return { param_name: param_config_dict for (param_name, param_config_dict) in param_dict.items() if expr(param_config_dict['mode']) }
[docs] def load_df_points(param_dict): """ Loads the parameter points for DATAFRAME mode from the given Pickle file(s). """ # These are the dataframe parameters. df_params = filter_param_dict_by_key(param_dict, lambda mode: mode == 'DATAFRAME') # We have to load the pickle file, verify its a valid file, and load the # values in the dataframe into the grid. files = list(set(param_config_dict['name'] for param_config_dict in df_params.values())) # For now, we will enforce that only one file can be loaded at a time. # If not, it is ambiguous in what way we should combine the values # (element-wise or an outer product, i.e. forming a mesh-grid?) # This can be easily extended if a clear implementation is set out. if len(files) > 1: raise ValueError('At the moment, only one Pickle file can be used for ' 'the same param file, for parameters in DATAFRAME mode.') filename = files[0] cwd = os.getcwd() if os.path.isabs(filename): file_path = filename else: file_path = os.path.join(cwd, filename) if not os.path.exists(file_path): raise ValueError('File {filename} does not exist (cwd {cwd})'.format( filename=filename, cwd=cwd)) df = pd.read_pickle(file_path) if not isinstance(df, pd.DataFrame): raise ValueError('{filename} does not contain a valid pandas ' 'DataFrame.'.format(filename=filename)) provided_params = set(df.columns) required_params = df_params.keys() missing_params = set(required_params) - provided_params if len(missing_params) > 0: raise ValueError('Parameters {missing_params} specified as DATAFRAME mode ' 'in param file but not found in Pickle ' 'file.'.format(missing_params=missing_params)) return df[required_params]
[docs] def run_scan(param_dict, beams, num_points, template_file, out_dir, pipe_hepmc, seed, num_events, single, exclusions=[]): """ Given points defined in param_dict, create run point directories and populate with MCEG template file and parameter file. :param param_dict: Dictionary with parameter names as keys each containing another dictionary with keys 'range' and 'values'. :param out_dir: top level output directory for the grid scan :param pipe_hepmc: is True, running hepmc events to a pipe for rivet to read. otherwise using a native generator rivet interface. :param seed: random number seed :param num_events: number of events to generate :param exclusions: list of run points to be excluded :returns: None """ # Read in run card template files template = read_template_file(template_file) cutil.mkoutdir(out_dir) for beam in beams: if single: msg_string = "Building example for {}".format(beam.id) else: msg_string = "Building scripts for {}".format(beam.id) directory = os.path.join(out_dir,beam.id) cutil.mkoutdir(directory) for run_point in cutil.progress_bar(range(num_points), desc=msg_string): # Skip this point if it is excluded in param file if run_point in exclusions: continue if invalid_params(run_point, param_dict): continue # Run point directories are inside the output directory and hold # the necessary files to run the generator with the param_dict # associated with that point run_point_path = make_run_point_directory(run_point, directory) # Write generator run card files formatted with parameter values write_generator_files(template, param_dict, run_point, run_point_path, pipe_hepmc, seed, num_events, beam) # Write all sampled points and their run points to a .dat file write_map = True if "slha_file" in param_dict.keys(): import pyslha slha_mode = param_dict['slha_file']['mode'] if slha_mode == "DIR": # copy the SLHA file over info = param_dict['slha_file'] f_source = os.path.join( param_dict['slha_file']['name'], info['values'][run_point]) f_dest = os.path.join( os.getcwd(), run_point_path, info['values'][run_point]) shutil.copyfile(f_source, f_dest) write_map = False elif slha_mode == "SINGLE" or slha_mode == 'SCALED': # need to add any modified parameters to the dict for writing out. # Read the SLHA, modify it and write it out. slha_name = param_dict['slha_file']['name'] susy = pyslha.read(slha_name) for param, info in sorted(param_dict.items()): # looks at all the other params now. if param != 'slha_file': if slha_mode == 'SCALED': # they should all be blocks that exist in the file try: for ident in susy.blocks[param].keys(): slha_val = susy.blocks[param][ident] susy.blocks[param][ident] = slha_val * float(info['values'][run_point]) except KeyError: cfg.contur_log.critical("SLHA BLOCK not found:{}".format(param)) sys.exit(1) elif slha_mode == 'SINGLE': try: block = param_dict[param]['block'] except KeyError: cfg.contur_log.critical("Missing SLHA Block Name:{}".format(param)) sys.exit(1) for ident in susy.blocks[block].keys(): try: if ident == int(param[1:]): susy.blocks[block][ident] = float(info['values'][run_point]) except ValueError: cfg.contur_log.critical("Malformed SLHA ident:{}".format(param)) sys.exit(1) f_dest = open(os.path.join( os.getcwd(), run_point_path, slha_name), "w") pyslha.write(f_dest, susy) # Write parameter file inside run point directory. This is purely to # record what the param_dict are at this run pointg write_param_dat(param_dict, run_point_path, run_point) if single: break if write_map: write_sampled_map(directory, param_dict)