From 3469368a55d4dc34ea4470a7c574edc06d25baa7 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 22 Feb 2022 17:51:28 +0100 Subject: [PATCH 01/64] Fixing loaded_commondata_with_cuts import and add theory_covmat to exp_covmat --- n3fit/src/n3fit/performfit.py | 1 - n3fit/src/n3fit/scripts/vp_setupfit.py | 1 - validphys2/src/validphys/config.py | 2 +- validphys2/src/validphys/covmats.py | 17 +++++++++++++++-- validphys2/src/validphys/n3fit_data.py | 4 +++- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index c2acd8a712..e79c564936 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -148,7 +148,6 @@ def performfit( # [list of all NN seeds] # ) # - n_models = len(replicas_nnseed_fitting_data_dict) if parallel_models and n_models != 1: replicas, replica_experiments, nnseeds = zip(*replicas_nnseed_fitting_data_dict) diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 0534081769..ea839b40cc 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -158,7 +158,6 @@ def from_yaml(cls, o, *args, **kwargs): SETUPFIT_FIXED_CONFIG['actions_'] += [filter_action] else: SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action] - if file_content.get('theorycovmatconfig') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append( 'datacuts::theory::theorycovmatconfig nnfit_theory_covmat') diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 791d5d6c2c..7ced4a9e38 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1053,7 +1053,6 @@ def produce_nnfit_theory_covmat( # Only user uncertainties from validphys.theorycovariance.construction import user_covmat_fitting f = user_covmat_fitting - @functools.wraps(f) def res(*args, **kwargs): return f(*args, **kwargs) @@ -1512,6 +1511,7 @@ def produce_group_dataset_inputs_by_experiment(self, data_input): def produce_group_dataset_inputs_by_process(self, data_input): return self.produce_group_dataset_inputs_by_metadata(data_input, "nnpdf31_process") + def produce_scale_variation_theories(self, theoryid, point_prescription): """Produces a list of theoryids given a theoryid at central scales and a point prescription. The options for the latter are '3 point', '5 point', '5bar point', '7 point' diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index bd09c15d5b..40bcd2c751 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd import scipy.linalg as la +import pathlib from reportengine import collect from reportengine.table import table @@ -23,7 +24,7 @@ from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.covmats_utils import construct_covmat, systematics_matrix from validphys.results import ThPredictionsResult - +from validphys.commondata import loaded_commondata_with_cuts log = logging.getLogger(__name__) INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR") @@ -227,7 +228,19 @@ def dataset_inputs_covmat_from_systematics( covmat, norm_threshold=norm_threshold ) - return covmat + try: + theory_covmat_path = pathlib.Path.cwd() + data = pd.read_csv(theory_covmat_path / "prov_moredata" / "tables" / "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv", sep='\t') + datael = data.iloc[3:] + datael = datael.drop(['group'], axis=1) + datael = datael.drop(['Unnamed: 1'], axis=1) + datael = datael.drop(['Unnamed: 2'], axis=1) + theory_covmat = np.copy(datael.values) + theory_covmat = theory_covmat.astype(np.float) + except FileNotFoundError: + theory_covmat = np.zeros(covmat.shape) + total_covmat = np.add(covmat, theory_covmat) + return total_covmat @check_cuts_considered diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index f2ceda5573..05a2ecf4e8 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -320,7 +320,9 @@ def fitting_data_dict( } return dict_out -exps_fitting_data_dict = collect("fitting_data_dict", ("group_dataset_inputs_by_experiment",)) +#exps_fitting_data_dict = collect("fitting_data_dict", ("group_dataset_inputs_by_experiment",)) +exps_fitting_data_dict = collect("fitting_data_dict", () ) + def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nnseed): """For a single replica return a tuple of the inputs to this function. From 80bba9dcfa9374eafb98e6842561a621a92f7f2c Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 24 Feb 2022 18:37:17 +0100 Subject: [PATCH 02/64] Fixed loading of theory_covmat also for user provided covmat --- n3fit/src/n3fit/performfit.py | 2 +- n3fit/src/n3fit/scripts/n3fit_exec.py | 5 ++- validphys2/src/validphys/config.py | 6 ++- validphys2/src/validphys/covmats.py | 60 ++++++++++++++++++++------ validphys2/src/validphys/n3fit_data.py | 9 ++-- 5 files changed, 60 insertions(+), 22 deletions(-) diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index e79c564936..516c0afd0f 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -37,7 +37,7 @@ def performfit( tensorboard=None, debug=False, maxcores=None, - parallel_models=False + parallel_models=False, ): """ This action will (upon having read a validcard) process a full PDF fit diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 9616a0795c..d74a820247 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -145,7 +145,10 @@ def from_yaml(cls, o, *args, **kwargs): validation_action = namespace + "validation_pseudodata" N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) - + if file_content.get('theorycovmatconfig') is not None: + N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True + N3FIT_FIXED_CONFIG['use_user_uncertainties'] = file_content.get('theorycovmatconfig').get('use_user_uncertainties') + N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = file_content.get('theorycovmatconfig').get('use_scalevar_uncertainties') file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 7ced4a9e38..bd859cbdf4 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1056,7 +1056,6 @@ def produce_nnfit_theory_covmat( @functools.wraps(f) def res(*args, **kwargs): return f(*args, **kwargs) - # Set this to get the same filename regardless of the action. res.__name__ = "theory_covmat" return res @@ -1503,6 +1502,11 @@ def produce_group_dataset_inputs_by_metadata( {"data_input": NSList(group, nskey="dataset_input"), "group_name": name} for name, group in res.items() ] + + def produce_group_dataset_inputs_by_fitting_group(self, data_input, theory_covmat_flag): + if theory_covmat_flag is True: + return self.produce_group_dataset_inputs_by_metadata(data_input, "custom_group") + return self.produce_group_dataset_inputs_by_metadata(data_input, "experiment") def produce_group_dataset_inputs_by_experiment(self, data_input): diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 40bcd2c751..c51a2003cf 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -228,19 +228,18 @@ def dataset_inputs_covmat_from_systematics( covmat, norm_threshold=norm_threshold ) - try: - theory_covmat_path = pathlib.Path.cwd() - data = pd.read_csv(theory_covmat_path / "prov_moredata" / "tables" / "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv", sep='\t') - datael = data.iloc[3:] - datael = datael.drop(['group'], axis=1) - datael = datael.drop(['Unnamed: 1'], axis=1) - datael = datael.drop(['Unnamed: 2'], axis=1) - theory_covmat = np.copy(datael.values) - theory_covmat = theory_covmat.astype(np.float) - except FileNotFoundError: - theory_covmat = np.zeros(covmat.shape) - total_covmat = np.add(covmat, theory_covmat) - return total_covmat + # try: + # theory_covmat_path = pathlib.Path.cwd() + # data = pd.read_csv(theory_covmat_path / "prov_moredata" / "tables" / "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv", sep='\t') + # datael = data.iloc[3:] + # datael = datael.drop(['group'], axis=1) + # datael = datael.drop(['Unnamed: 1'], axis=1) + # datael = datael.drop(['Unnamed: 2'], axis=1) + # theory_covmat = np.copy(datael.values) + #except FileNotFoundError: + # theory_covmat = np.zeros(covmat.shape) + #total_covmat = np.add(covmat, theory_covmat) + return covmat @check_cuts_considered @@ -352,6 +351,41 @@ def dataset_inputs_t0_covmat_from_systematics( _list_of_central_values=dataset_inputs_t0_predictions ) +def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, + *, + data_input, + use_weights_in_covmat=True, + norm_threshold=None, + dataset_inputs_t0_predictions, + output_path, + theory_covmat_flag, + use_user_uncertainties, + use_scalevar_uncertainties): + exp_covmat = dataset_inputs_covmat_from_systematics( + dataset_inputs_loaded_cd_with_cuts, + data_input, + use_weights_in_covmat, + norm_threshold=norm_threshold, + _list_of_central_values=dataset_inputs_t0_predictions + ) + if theory_covmat_flag is True: + generic_path = None + if use_scalevar_uncertainties is True: + if use_user_uncertainties is True: + generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_total_theory_covmat.csv") + else: + generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") + else: + if use_user_uncertainties is True: + generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_user_covmat.csv") + else: + generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") + theorypath = pathlib.Path(str(output_path/"tables"/generic_path.relative_to(generic_path.anchor))) + theory_covmat = pd.read_csv(theorypath, sep='\t') + theory_covmat = theory_covmat.iloc[3:].drop(['group'], axis=1).drop(['Unnamed: 1'], axis=1).drop(['Unnamed: 2'], axis=1) + return np.add(exp_covmat,theory_covmat.values.astype(np.float)) + return exp_covmat + def sqrt_covmat(covariance_matrix): """Function that computes the square root of the covariance matrix. diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 05a2ecf4e8..8e5eeaace7 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -188,7 +188,7 @@ def _mask_fk_tables(dataset_dicts, tr_masks): def fitting_data_dict( data, make_replica, - dataset_inputs_t0_covmat_from_systematics, + dataset_inputs_t0_total_covmat, tr_masks, kfold_masks, diagonal_basis=None, @@ -244,7 +244,7 @@ def fitting_data_dict( datasets = common_data_reader_experiment(spec_c, data) # t0 covmat - covmat = dataset_inputs_t0_covmat_from_systematics + covmat = dataset_inputs_t0_total_covmat inv_true = np.linalg.inv(covmat) if diagonal_basis: @@ -297,7 +297,6 @@ def fitting_data_dict( folds["training"].append(fold[tr_mask]) folds["validation"].append(fold[vl_mask]) folds["experimental"].append(~fold) - dict_out = { "datasets": datasets_copy, "name": str(data), @@ -320,9 +319,7 @@ def fitting_data_dict( } return dict_out -#exps_fitting_data_dict = collect("fitting_data_dict", ("group_dataset_inputs_by_experiment",)) -exps_fitting_data_dict = collect("fitting_data_dict", () ) - +exps_fitting_data_dict = collect("fitting_data_dict", ("group_dataset_inputs_by_fitting_group",)) def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nnseed): """For a single replica return a tuple of the inputs to this function. From 815a595c1cfb8e06c5d8ce0f0d6d63bde9c59333 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 24 Feb 2022 22:01:34 +0100 Subject: [PATCH 03/64] Fixed theory_covmat flags --- n3fit/src/n3fit/scripts/n3fit_exec.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index d74a820247..f85678d0b4 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -145,6 +145,9 @@ def from_yaml(cls, o, *args, **kwargs): validation_action = namespace + "validation_pseudodata" N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) + N3FIT_FIXED_CONFIG['theory_covmat_flag'] = False + N3FIT_FIXED_CONFIG['use_user_uncertainties'] = None + N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None if file_content.get('theorycovmatconfig') is not None: N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True N3FIT_FIXED_CONFIG['use_user_uncertainties'] = file_content.get('theorycovmatconfig').get('use_user_uncertainties') From 539f366998d284c6ee6c54715949a9b15db72a40 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 1 Mar 2022 15:38:34 +0100 Subject: [PATCH 04/64] Fixed some doc --- doc/sphinx/source/vp/theorycov/index.rst | 2 +- doc/sphinx/source/vp/theorycov/runcard_layout.rst | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/doc/sphinx/source/vp/theorycov/index.rst b/doc/sphinx/source/vp/theorycov/index.rst index 11bcc5cd3e..308f3ee3ee 100644 --- a/doc/sphinx/source/vp/theorycov/index.rst +++ b/doc/sphinx/source/vp/theorycov/index.rst @@ -29,7 +29,7 @@ Summary - Theoretical covariance matrices are built according to the various prescriptions in :ref:`prescrips`. -- The prescription must be one of 3 point, 5 point, 5bar point, 7 point or 9 point. You can specify +- The prescription must be one of 3 point, 3r point, 3f point, 5 point, 5bar point, 7 point or 9 point. You can specify this using ``point_prescription: "x point"`` in the runcard. The translation of this flag into the relevant ``theoryids`` is handled by the ``scalevariations`` module in ``validphys``. diff --git a/doc/sphinx/source/vp/theorycov/runcard_layout.rst b/doc/sphinx/source/vp/theorycov/runcard_layout.rst index 35bcd7f8b9..f2707fcfa2 100644 --- a/doc/sphinx/source/vp/theorycov/runcard_layout.rst +++ b/doc/sphinx/source/vp/theorycov/runcard_layout.rst @@ -1,11 +1,6 @@ Important information about runcard layout ========================================== -- The flag ``fivetheories`` specifies the choice of 5 or - :math:`\bar{5}` prescription for the case of 5 input theories. You - must assign a value ``nobar`` or ``bar`` correspondingly. If you do - not do this, ``validphys`` will give an error. - - The default behaviour for the 7-point prescription is to use Gavin Salam's modification to it. To use the original 7-point prescription instead, the ``seventheories`` flag must be set to ``original``. From 99ffdd439f1b71f689b37b2b2b55cbdc30428a25 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 10 Mar 2022 14:12:02 +0100 Subject: [PATCH 05/64] removed old runcards and added new working one --- .../Fit_with_theory_covmat.yml | 150 +++++++++++++ .../fit_with_sv_and_user_thcovmat.yaml | 200 ------------------ .../theory_covariance/fit_with_thcovmat.yaml | 183 ---------------- .../fit_with_user_thcovmat.yaml | 184 ---------------- 4 files changed, 150 insertions(+), 567 deletions(-) create mode 100644 validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml delete mode 100644 validphys2/examples/theory_covariance/fit_with_sv_and_user_thcovmat.yaml delete mode 100644 validphys2/examples/theory_covariance/fit_with_thcovmat.yaml delete mode 100644 validphys2/examples/theory_covariance/fit_with_user_thcovmat.yaml diff --git a/validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml b/validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml new file mode 100644 index 0000000000..2a2bed7222 --- /dev/null +++ b/validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml @@ -0,0 +1,150 @@ +# +# Configuration file for NNPDF++ +# +########################################################################################## +description: "NNPDF4.0 methodology fit with theory covariance matrix (9pt prescription) like NNPDF31_nlo_as_0118_scalecov_9pt" + +########################################################################################## +# frac: training fraction +# ewk: apply ewk k-factors +# sys: systematics treatment (see systypes) +dataset_inputs: + - {dataset: NMCPD, frac: 0.5} + - {dataset: NMC, frac: 0.5} + - {dataset: SLACP, frac: 0.5} + - {dataset: SLACD, frac: 0.5} + - {dataset: BCDMSP, frac: 0.5} + - {dataset: BCDMSD, frac: 0.5} + - {dataset: CHORUSNU, frac: 0.5} + - {dataset: CHORUSNB, frac: 0.5} + - {dataset: NTVNUDMN, frac: 0.5} + - {dataset: NTVNBDMN, frac: 0.5} + - {dataset: HERACOMBNCEM, frac: 0.5} + - {dataset: HERACOMBNCEP460, frac: 0.5} + - {dataset: HERACOMBNCEP575, frac: 0.5} + - {dataset: HERACOMBNCEP820, frac: 0.5} + - {dataset: HERACOMBNCEP920, frac: 0.5} + - {dataset: HERACOMBCCEM, frac: 0.5} + - {dataset: HERACOMBCCEP, frac: 0.5} + - {dataset: HERAF2CHARM, frac: 0.5} + - {dataset: CDFZRAP, frac: 1.0} + - {dataset: D0ZRAP, frac: 1.0} + - {dataset: D0WEASY, frac: 1.0} + - {dataset: D0WMASY, frac: 1.0} + - {dataset: ATLASWZRAP36PB, frac: 1.0} + - {dataset: ATLASZHIGHMASS49FB, frac: 1.0} + - {dataset: ATLASLOMASSDY11EXT, frac: 1.0} + - {dataset: ATLASWZRAP11, frac: 0.5} + - {dataset: ATLAS1JET11, frac: 0.5} + - {dataset: ATLASZPT8TEVMDIST, frac: 0.5} + - {dataset: ATLASZPT8TEVYDIST, frac: 0.5} + - {dataset: ATLASTTBARTOT, frac: 1.0} + - {dataset: ATLASTOPDIFF8TEVTRAPNORM, frac: 1.0} + - {dataset: CMSWEASY840PB, frac: 1.0} + - {dataset: CMSWMASY47FB, frac: 1.0} + - {dataset: CMSDY2D11, frac: 0.5} + - {dataset: CMSWMU8TEV, frac: 1.0} + - {dataset: CMSZDIFF12, frac: 1.0, cfac: [NRM]} + - {dataset: CMSJETS11, frac: 0.5} + - {dataset: CMSTTBARTOT, frac: 1.0} + - {dataset: CMSTOPDIFF8TEVTTRAPNORM, frac: 1.0} + - {dataset: LHCBZ940PB, frac: 1.0} + - {dataset: LHCBZEE2FB, frac: 1.0} + - {dataset: LHCBWZMU7TEV, frac: 1.0, cfac: [NRM]} + - {dataset: LHCBWZMU8TEV, frac: 1.0, cfac: [NRM]} + + +############################################################ +datacuts: + t0pdfset: 190310-tg-nlo-global # PDF set to generate t0 covmat + q2min: 13.96 # Q2 minimum + w2min: 12.5 # W2 minimum + combocuts: NNPDF31 # NNPDF3.0 final kin. cuts + jetptcut_tev: 0 # jet pt cut for tevatron + jetptcut_lhc: 0 # jet pt cut for lhc + wptcut_lhc: 30.0 # Minimum pT for W pT diff distributions + jetycut_tev: 1e30 # jet rap. cut for tevatron + jetycut_lhc: 1e30 # jet rap. cut for lhc + dymasscut_min: 0 # dy inv.mass. min cut + dymasscut_max: 1e30 # dy inv.mass. max cut + jetcfactcut: 1e30 # jet cfact. cut + use_cuts: fromintersection + cuts_intersection_spec: + - theoryid: 163 + - theoryid: 53 + +############################################################ +theory: + theoryid: 163 # database id +theorycovmatconfig: + point_prescription: "9 point" + theoryids: + from_: scale_variation_theories + pdf: NNPDF31_nlo_as_0118 + use_thcovmat_in_fitting: true + use_thcovmat_in_sampling: true + +sampling_t0: + use_t0: false +fitting_t0: + use_t0: true + +############################################################ +trvlseed: 376191634 +nnseed: 2080989803 +mcseed: 75955222 +save: false +genrep: true # true = generate MC replicas, false = use real data + +parameters: # This defines the parameter dictionary that is passed to the Model Trainer + nodes_per_layer: [25, 20, 8] + activation_per_layer: [tanh, tanh, linear] + initializer: glorot_normal + optimizer: + clipnorm: 6.073e-6 + learning_rate: 2.621e-3 + optimizer_name: Nadam + epochs: 17000 + positivity: + initial: 184.8 + multiplier: + stopping_patience: 0.1 + layer_type: dense + dropout: 0.0 + threshold_chi2: 3.5 +############################################################ +fitting: + fitbasis: EVOL # EVOL (7), EVOLQED (8), etc. + basis: + - {fl: sng, trainable: false, smallx: [1.046, 1.188], largex: [1.437, 2.716]} + - {fl: g, trainable: false, smallx: [0.9604, 1.23], largex: [0.08459, 6.137]} + - {fl: v, trainable: false, smallx: [0.5656, 0.7242], largex: [1.153, 2.838]} + - {fl: v3, trainable: false, smallx: [0.1521, 0.5611], largex: [1.236, 2.976]} + - {fl: v8, trainable: false, smallx: [0.5264, 0.7246], largex: [0.6919, 3.198]} + - {fl: t3, trainable: false, smallx: [-0.3687, 1.459], largex: [1.664, 3.373]} + - {fl: t8, trainable: false, smallx: [0.5357, 1.267], largex: [1.433, 2.866]} + - {fl: t15, trainable: false, smallx: [1.073, 1.164], largex: [1.503, 3.636]} +# basis: +# - {fl: sng, trainable: false, smallx: [1.121, 1.154], largex: [1.498, 3.138]} +# - {fl: g, trainable: false, smallx: [0.9224, 1.149], largex: [3.266, 6.214]} +# - {fl: v, trainable: false, smallx: [0.5279, 0.8017], largex: [1.6, 3.588]} +# - {fl: v3, trainable: false, smallx: [0.2011, 0.4374], largex: [1.761, 3.427]} +# - {fl: v8, trainable: false, smallx: [0.5775, 0.8357], largex: [1.589, 3.378]} +# - {fl: t3, trainable: false, smallx: [-0.484, 1.0], largex: [1.763, 3.397]} +# - {fl: t8, trainable: false, smallx: [0.6714, 0.9197], largex: [1.572, 3.496]} +# - {fl: t15, trainable: false, smallx: [1.073, 1.164], largex: [1.503, 3.636]} + +############################################################ +positivity: + posdatasets: + - {dataset: POSF2U, maxlambda: 1e6} # Positivity Lagrange Multiplier + - {dataset: POSF2DW, maxlambda: 1e6} + - {dataset: POSF2S, maxlambda: 1e6} + - {dataset: POSFLL, maxlambda: 1e6} + - {dataset: POSDYU, maxlambda: 1e10} + - {dataset: POSDYD, maxlambda: 1e10} + - {dataset: POSDYS, maxlambda: 1e10} + +############################################################ +debug: False +maxcores: 4 diff --git a/validphys2/examples/theory_covariance/fit_with_sv_and_user_thcovmat.yaml b/validphys2/examples/theory_covariance/fit_with_sv_and_user_thcovmat.yaml deleted file mode 100644 index 3f5e455c3d..0000000000 --- a/validphys2/examples/theory_covariance/fit_with_sv_and_user_thcovmat.yaml +++ /dev/null @@ -1,200 +0,0 @@ -# -# Configuration file for NNPDF++ -# -###################################################################################### -description: Example runcard for NLO NNPDF3.1 style fit with both scale variation and user-defined covariance matrices - -###################################################################################### -# frac: training fraction -# ewk: apply ewk k-factors -# sys: systematics treatment (see systypes) - -metadata_group: "nnpdf31_process" # You need to provide this because all theory - # covariance matrices must be grouped by process -experiments: - - experiment: BIGEXP - datasets: - # DIS NC - - {dataset: NMCPD, frac: 0.5} - - {dataset: NMC, frac: 0.5} - - {dataset: SLACP, frac: 0.5} - - {dataset: SLACD, frac: 0.5} - - {dataset: BCDMSP, frac: 0.5} - - {dataset: BCDMSD, frac: 0.5} - - {dataset: HERACOMBNCEM, frac: 0.5} - - {dataset: HERACOMBNCEP460, frac: 0.5} - - {dataset: HERACOMBNCEP575, frac: 0.5} - - {dataset: HERACOMBNCEP820, frac: 0.5} - - {dataset: HERACOMBNCEP920, frac: 0.5} - - {dataset: HERAF2CHARM, frac: 0.5} - # DIS CC - - {dataset: CHORUSNU, frac: 0.5} - - {dataset: CHORUSNB, frac: 0.5} - - {dataset: NTVNUDMN, frac: 0.5} - - {dataset: NTVNBDMN, frac: 0.5} - - {dataset: HERACOMBCCEM, frac: 0.5} - - {dataset: HERACOMBCCEP, frac: 0.5} - # DY - - {dataset: CDFZRAP, frac: 1.0} - - {dataset: D0ZRAP, frac: 1.0} - - {dataset: D0WEASY, frac: 1.0} - - {dataset: D0WMASY, frac: 1.0} - - {dataset: ATLASWZRAP36PB, frac: 1.0} - - {dataset: ATLASZHIGHMASS49FB, frac: 1.0} - - {dataset: ATLASLOMASSDY11EXT, frac: 1.0} - - {dataset: ATLASWZRAP11, frac: 0.5} - - {dataset: ATLASZPT8TEVMDIST, frac: 0.5} - - {dataset: ATLASZPT8TEVYDIST, frac: 0.5} - - {dataset: CMSWEASY840PB, frac: 1.0} - - {dataset: CMSWMASY47FB, frac: 1.0} - - {dataset: CMSDY2D11, frac: 0.5} - - {dataset: CMSWMU8TEV, frac: 1.0} - - {dataset: CMSZDIFF12, frac: 1.0, cfac: [NRM]} - - {dataset: LHCBZ940PB, frac: 1.0} - - {dataset: LHCBZEE2FB, frac: 1.0} - - {dataset: LHCBWZMU7TEV, frac: 1.0, cfac: [NRM]} - - {dataset: LHCBWZMU8TEV, frac: 1.0, cfac: [NRM]} - # JETS - - {dataset: ATLAS1JET11, frac: 0.5} - - {dataset: CMSJETS11, frac: 0.5} - # TOP - - {dataset: ATLASTTBARTOT, frac: 1.0} - - {dataset: ATLASTOPDIFF8TEVTRAPNORM, frac: 1.0} - - {dataset: CMSTTBARTOT, frac: 1.0} - - {dataset: CMSTOPDIFF8TEVTTRAPNORM, frac: 1.0} - -############################################################ -datacuts: - t0pdfset: 190310-tg-nlo-global # PDF set to generate t0 covmat - q2min: 13.96 # Q2 minimum - w2min: 12.5 # W2 minimum - combocuts: NNPDF31 # NNPDF3.0 final kin. cuts - jetptcut_tev: 0 # jet pt cut for tevatron - jetptcut_lhc: 0 # jet pt cut for lhc - wptcut_lhc: 30.0 # Minimum pT for W pT diff distributions - jetycut_tev: 1e30 # jet rap. cut for tevatron - jetycut_lhc: 1e30 # jet rap. cut for lhc - dymasscut_min: 0 # dy inv.mass. min cut - dymasscut_max: 1e30 # dy inv.mass. max cut - jetcfactcut: 1e30 # jet cfact. cut - use_cuts: fromintersection - cuts_intersection_spec: - - theoryid: 163 - - theoryid: 53 - -############################################################ -theory: - theoryid: 163 # database id - -theorycovmatconfig: - # Scale variation uncertainties - point_prescription: "3 point" - theoryids: - from_: scale_variation_theories - fivetheories: None - pdf: NNPDF31_nlo_as_0118 - use_scalevar_uncertainties: True - # User uncertainties (top mass uncertainty) - use_user_uncertainties: True - user_covmat_path: "bOcHfQMaSD26p6PFJlT1nQ==/topthcovmat_mt.csv" # Indexed by nnpdf31_process - # Don't change these unless you know what you're doing - use_thcovmat_in_sampling: True - use_thcovmat_in_fitting: True - -sampling_t0: - use_t0: false - -fitting_t0: - use_t0: true - - theory: - theoryid: 163 # database id - - -############################################################ -fitting: - seed: 65532133530 # set the seed for the random generator - genrep: on # on = generate MC replicas, off = use real data - rngalgo: 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc - fitmethod: NGA # Minimization algorithm - ngen: 30000 # Maximum number of generations - nmutants: 80 # Number of mutants for replica - paramtype: NN - nnodes: [2, 5, 3, 1] - - # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7) - # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7) - # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7) - # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7) - fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc. - basis: - # remeber to change the name of PDF accordingly with fitbasis - # pos: on for NN squared - # mutsize: mutation size - # mutprob: mutation probability - # smallx, largex: preprocessing ranges - - {fl: sng, pos: off, mutsize: [15], mutprob: [0.05], smallx: [1.046, 1.188], largex: [ - 1.437, 2.716]} - - {fl: g, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.9604, 1.23], largex: [ - 0.08459, 6.137]} - - {fl: v, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.5656, 0.7242], largex: [ - 1.153, 2.838]} - - {fl: v3, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.1521, 0.5611], largex: [ - 1.236, 2.976]} - - {fl: v8, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.5264, 0.7246], largex: [ - 0.6919, 3.198]} - - {fl: t3, pos: off, mutsize: [15], mutprob: [0.05], smallx: [-0.3687, 1.459], largex: [ - 1.664, 3.373]} - - {fl: t8, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.5357, 1.267], largex: [ - 1.433, 2.866]} - - {fl: cp, pos: off, mutsize: [15], mutprob: [0.05], smallx: [-0.09635, 1.204], - largex: [1.654, 7.456]} - -############################################################ -stopping: - stopmethod: LOOKBACK # Stopping method - lbdelta: 0 # Delta for look-back stopping - mingen: 0 # Minimum number of generations - window: 500 # Window for moving average - minchi2: 3.5 # Minimum chi2 - minchi2exp: 6.0 # Minimum chi2 for experiments - nsmear: 200 # Smear for stopping - deltasm: 200 # Delta smear for stopping - rv: 2 # Ratio for validation stopping - rt: 0.5 # Ratio for training stopping - epsilon: 1e-6 # Gradient epsilon - -############################################################ -positivity: - posdatasets: - - {dataset: POSF2U, poslambda: 1e6} # Positivity Lagrange Multiplier - - {dataset: POSF2DW, poslambda: 1e6} - - {dataset: POSF2S, poslambda: 1e6} - - {dataset: POSFLL, poslambda: 1e6} - - {dataset: POSDYU, poslambda: 1e10} - - {dataset: POSDYD, poslambda: 1e10} - - {dataset: POSDYS, poslambda: 1e10} - -############################################################ -closuretest: - filterseed: 0 # Random seed to be used in filtering data partitions - fakedata: off # on = to use FAKEPDF to generate pseudo-data - fakepdf: MSTW2008nlo68cl # Theory input for pseudo-data - errorsize: 1.0 # uncertainties rescaling - fakenoise: off # on = to add random fluctuations to pseudo-data - rancutprob: 1.0 # Fraction of data to be included in the fit - rancutmethod: 0 # Method to select rancutprob data fraction - rancuttrnval: off # 0(1) to output training(valiation) chi2 in report - printpdf4gen: off # To print info on PDFs during minimization - -############################################################ -lhagrid: - nx: 150 - xmin: 1e-9 - xmed: 0.1 - xmax: 1.0 - nq: 50 - qmax: 1e5 - -############################################################ -debug: off diff --git a/validphys2/examples/theory_covariance/fit_with_thcovmat.yaml b/validphys2/examples/theory_covariance/fit_with_thcovmat.yaml deleted file mode 100644 index a82b8b70f7..0000000000 --- a/validphys2/examples/theory_covariance/fit_with_thcovmat.yaml +++ /dev/null @@ -1,183 +0,0 @@ -# -# Configuration file for NNPDF++ -# -###################################################################################### -description: Example runcard for NNPDF3.1 style fit with 3pt theory covariance matrix - -###################################################################################### -# frac: training fraction -# ewk: apply ewk k-factors -# sys: systematics treatment (see systypes) -experiments: - - experiment: BIGEXP - datasets: - - {dataset: NMCPD, frac: 0.5} - - {dataset: NMC, frac: 0.5} - - {dataset: SLACP, frac: 0.5} - - {dataset: SLACD, frac: 0.5} - - {dataset: BCDMSP, frac: 0.5} - - {dataset: BCDMSD, frac: 0.5} - - {dataset: HERACOMBNCEM, frac: 0.5} - - {dataset: HERACOMBNCEP460, frac: 0.5} - - {dataset: HERACOMBNCEP575, frac: 0.5} - - {dataset: HERACOMBNCEP820, frac: 0.5} - - {dataset: HERACOMBNCEP920, frac: 0.5} - - {dataset: HERAF2CHARM, frac: 0.5} - - {dataset: CHORUSNU, frac: 0.5} - - {dataset: CHORUSNB, frac: 0.5} - - {dataset: NTVNUDMN, frac: 0.5} - - {dataset: NTVNBDMN, frac: 0.5} - - {dataset: HERACOMBCCEM, frac: 0.5} - - {dataset: HERACOMBCCEP, frac: 0.5} - - {dataset: CDFZRAP, frac: 1.0} - - {dataset: D0ZRAP, frac: 1.0} - - {dataset: D0WEASY, frac: 1.0} - - {dataset: D0WMASY, frac: 1.0} - - {dataset: ATLASWZRAP36PB, frac: 1.0} - - {dataset: ATLASZHIGHMASS49FB, frac: 1.0} - - {dataset: ATLASLOMASSDY11EXT, frac: 1.0} - - {dataset: ATLASWZRAP11, frac: 0.5} - - {dataset: ATLASZPT8TEVMDIST, frac: 0.5} - - {dataset: ATLASZPT8TEVYDIST, frac: 0.5} - - {dataset: CMSWEASY840PB, frac: 1.0} - - {dataset: CMSWMASY47FB, frac: 1.0} - - {dataset: CMSDY2D11, frac: 0.5} - - {dataset: CMSWMU8TEV, frac: 1.0} - - {dataset: CMSZDIFF12, frac: 1.0, cfac: [NRM]} - - {dataset: LHCBZ940PB, frac: 1.0} - - {dataset: LHCBZEE2FB, frac: 1.0} - - {dataset: LHCBWZMU7TEV, frac: 1.0, cfac: [NRM]} - - {dataset: LHCBWZMU8TEV, frac: 1.0, cfac: [NRM]} - - {dataset: ATLAS1JET11, frac: 0.5} - - {dataset: CMSJETS11, frac: 0.5} - - {dataset: ATLASTTBARTOT, frac: 1.0} - - {dataset: ATLASTOPDIFF8TEVTRAPNORM, frac: 1.0} - - {dataset: CMSTTBARTOT, frac: 1.0} - - {dataset: CMSTOPDIFF8TEVTTRAPNORM, frac: 1.0} - - -############################################################ -datacuts: - t0pdfset: 190310-tg-nlo-global # PDF set to generate t0 covmat - q2min: 13.96 # Q2 minimum - w2min: 12.5 # W2 minimum - combocuts: NNPDF31 # NNPDF3.0 final kin. cuts - jetptcut_tev: 0 # jet pt cut for tevatron - jetptcut_lhc: 0 # jet pt cut for lhc - wptcut_lhc: 30.0 # Minimum pT for W pT diff distributions - jetycut_tev: 1e30 # jet rap. cut for tevatron - jetycut_lhc: 1e30 # jet rap. cut for lhc - dymasscut_min: 0 # dy inv.mass. min cut - dymasscut_max: 1e30 # dy inv.mass. max cut - jetcfactcut: 1e30 # jet cfact. cut - use_cuts: fromintersection - cuts_intersection_spec: - - theoryid: 163 - - theoryid: 53 - -############################################################ -theory: - theoryid: 163 # database id - -theorycovmatconfig: - point_prescription: "3 point" - theoryids: - from_: scale_variation_theories - fivetheories: None - pdf: NNPDF31_nlo_as_0118 - use_thcovmat_in_fitting: true - use_thcovmat_in_sampling: true - -sampling_t0: - use_t0: false - -fitting_t0: - use_t0: true - -############################################################ -fitting: - seed: 65532133530 # set the seed for the random generator - genrep: on # on = generate MC replicas, off = use real data - rngalgo: 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc - fitmethod: NGA # Minimization algorithm - ngen: 30000 # Maximum number of generations - nmutants: 80 # Number of mutants for replica - paramtype: NN - nnodes: [2, 5, 3, 1] - - # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7) - # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7) - # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7) - # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7) - fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc. - basis: - # remeber to change the name of PDF accordingly with fitbasis - # pos: on for NN squared - # mutsize: mutation size - # mutprob: mutation probability - # smallx, largex: preprocessing ranges - - {fl: sng, pos: off, mutsize: [15], mutprob: [0.05], smallx: [1.046, 1.188], largex: [ - 1.437, 2.716]} - - {fl: g, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.9604, 1.23], largex: [ - 0.08459, 6.137]} - - {fl: v, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.5656, 0.7242], largex: [ - 1.153, 2.838]} - - {fl: v3, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.1521, 0.5611], largex: [ - 1.236, 2.976]} - - {fl: v8, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.5264, 0.7246], largex: [ - 0.6919, 3.198]} - - {fl: t3, pos: off, mutsize: [15], mutprob: [0.05], smallx: [-0.3687, 1.459], largex: [ - 1.664, 3.373]} - - {fl: t8, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.5357, 1.267], largex: [ - 1.433, 2.866]} - - {fl: cp, pos: off, mutsize: [15], mutprob: [0.05], smallx: [-0.09635, 1.204], - largex: [1.654, 7.456]} - -############################################################ -stopping: - stopmethod: LOOKBACK # Stopping method - lbdelta: 0 # Delta for look-back stopping - mingen: 0 # Minimum number of generations - window: 500 # Window for moving average - minchi2: 3.5 # Minimum chi2 - minchi2exp: 6.0 # Minimum chi2 for experiments - nsmear: 200 # Smear for stopping - deltasm: 200 # Delta smear for stopping - rv: 2 # Ratio for validation stopping - rt: 0.5 # Ratio for training stopping - epsilon: 1e-6 # Gradient epsilon - -############################################################ -positivity: - posdatasets: - - {dataset: POSF2U, poslambda: 1e6} # Positivity Lagrange Multiplier - - {dataset: POSF2DW, poslambda: 1e6} - - {dataset: POSF2S, poslambda: 1e6} - - {dataset: POSFLL, poslambda: 1e6} - - {dataset: POSDYU, poslambda: 1e10} - - {dataset: POSDYD, poslambda: 1e10} - - {dataset: POSDYS, poslambda: 1e10} - -############################################################ -closuretest: - filterseed: 0 # Random seed to be used in filtering data partitions - fakedata: off # on = to use FAKEPDF to generate pseudo-data - fakepdf: MSTW2008nlo68cl # Theory input for pseudo-data - errorsize: 1.0 # uncertainties rescaling - fakenoise: off # on = to add random fluctuations to pseudo-data - rancutprob: 1.0 # Fraction of data to be included in the fit - rancutmethod: 0 # Method to select rancutprob data fraction - rancuttrnval: off # 0(1) to output training(valiation) chi2 in report - printpdf4gen: off # To print info on PDFs during minimization - -############################################################ -lhagrid: - nx: 150 - xmin: 1e-9 - xmed: 0.1 - xmax: 1.0 - nq: 50 - qmax: 1e5 - -############################################################ -debug: off diff --git a/validphys2/examples/theory_covariance/fit_with_user_thcovmat.yaml b/validphys2/examples/theory_covariance/fit_with_user_thcovmat.yaml deleted file mode 100644 index a3e9652639..0000000000 --- a/validphys2/examples/theory_covariance/fit_with_user_thcovmat.yaml +++ /dev/null @@ -1,184 +0,0 @@ -# -# Configuration file for NNPDF++ -# -###################################################################################### -description: Example runcard for NLO NNPDF3.1 style fit with user-defined covariance matrix - -###################################################################################### -# frac: training fraction -# ewk: apply ewk k-factors -# sys: systematics treatment (see systypes) - -metadata_group: "nnpdf31_process" # You need to provide this because all theory - # covariance matrices must be grouped by process -experiments: - - experiment: BIGEXP - datasets: - # DIS NC - - {dataset: NMCPD, frac: 0.5} - - {dataset: NMC, frac: 0.5} - - {dataset: SLACP, frac: 0.5} - - {dataset: SLACD, frac: 0.5} - - {dataset: BCDMSP, frac: 0.5} - - {dataset: BCDMSD, frac: 0.5} - - {dataset: HERACOMBNCEM, frac: 0.5} - - {dataset: HERACOMBNCEP460, frac: 0.5} - - {dataset: HERACOMBNCEP575, frac: 0.5} - - {dataset: HERACOMBNCEP820, frac: 0.5} - - {dataset: HERACOMBNCEP920, frac: 0.5} - - {dataset: HERAF2CHARM, frac: 0.5} - # DIS CC - - {dataset: CHORUSNU, frac: 0.5} - - {dataset: CHORUSNB, frac: 0.5} - - {dataset: NTVNUDMN, frac: 0.5} - - {dataset: NTVNBDMN, frac: 0.5} - - {dataset: HERACOMBCCEM, frac: 0.5} - - {dataset: HERACOMBCCEP, frac: 0.5} - # DY - - {dataset: CDFZRAP, frac: 1.0} - - {dataset: D0ZRAP, frac: 1.0} - - {dataset: D0WEASY, frac: 1.0} - - {dataset: D0WMASY, frac: 1.0} - - {dataset: ATLASWZRAP36PB, frac: 1.0} - - {dataset: ATLASZHIGHMASS49FB, frac: 1.0} - - {dataset: ATLASLOMASSDY11EXT, frac: 1.0} - - {dataset: ATLASWZRAP11, frac: 0.5} - - {dataset: ATLASZPT8TEVMDIST, frac: 0.5} - - {dataset: ATLASZPT8TEVYDIST, frac: 0.5} - - {dataset: CMSWEASY840PB, frac: 1.0} - - {dataset: CMSWMASY47FB, frac: 1.0} - - {dataset: CMSDY2D11, frac: 0.5} - - {dataset: CMSWMU8TEV, frac: 1.0} - - {dataset: CMSZDIFF12, frac: 1.0, cfac: [NRM]} - - {dataset: LHCBZ940PB, frac: 1.0} - - {dataset: LHCBZEE2FB, frac: 1.0} - - {dataset: LHCBWZMU7TEV, frac: 1.0, cfac: [NRM]} - - {dataset: LHCBWZMU8TEV, frac: 1.0, cfac: [NRM]} - # JETS - - {dataset: ATLAS1JET11, frac: 0.5} - - {dataset: CMSJETS11, frac: 0.5} - # TOP - - {dataset: ATLASTTBARTOT, frac: 1.0} - - {dataset: ATLASTOPDIFF8TEVTRAPNORM, frac: 1.0} - - {dataset: CMSTTBARTOT, frac: 1.0} - - {dataset: CMSTOPDIFF8TEVTTRAPNORM, frac: 1.0} - -############################################################ -datacuts: - t0pdfset: 190310-tg-nlo-global # PDF set to generate t0 covmat - q2min: 13.96 # Q2 minimum - w2min: 12.5 # W2 minimum - combocuts: NNPDF31 # NNPDF3.0 final kin. cuts - jetptcut_tev: 0 # jet pt cut for tevatron - jetptcut_lhc: 0 # jet pt cut for lhc - wptcut_lhc: 30.0 # Minimum pT for W pT diff distributions - jetycut_tev: 1e30 # jet rap. cut for tevatron - jetycut_lhc: 1e30 # jet rap. cut for lhc - dymasscut_min: 0 # dy inv.mass. min cut - dymasscut_max: 1e30 # dy inv.mass. max cut - jetcfactcut: 1e30 # jet cfact. cut - use_cuts: "internal" - -############################################################ -theory: - theoryid: 52 # database id - -theorycovmatconfig: - use_user_uncertainties: True - user_covmat_path: "bOcHfQMaSD26p6PFJlT1nQ==/topthcovmat_mt.csv" # Indexed by nnpdf31_process - use_thcovmat_in_sampling: True - use_thcovmat_in_fitting: True - -sampling_t0: - use_t0: false - -fitting_t0: - use_t0: true - -############################################################ -fitting: - seed: 65532133530 # set the seed for the random generator - genrep: on # on = generate MC replicas, off = use real data - rngalgo: 0 # 0 = ranlux, 1 = cmrg, see randomgenerator.cc - fitmethod: NGA # Minimization algorithm - ngen: 30000 # Maximum number of generations - nmutants: 80 # Number of mutants for replica - paramtype: NN - nnodes: [2, 5, 3, 1] - - # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7) - # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7) - # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7) - # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7) - fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc. - basis: - # remeber to change the name of PDF accordingly with fitbasis - # pos: on for NN squared - # mutsize: mutation size - # mutprob: mutation probability - # smallx, largex: preprocessing ranges - - {fl: sng, pos: off, mutsize: [15], mutprob: [0.05], smallx: [1.046, 1.188], largex: [ - 1.437, 2.716]} - - {fl: g, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.9604, 1.23], largex: [ - 0.08459, 6.137]} - - {fl: v, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.5656, 0.7242], largex: [ - 1.153, 2.838]} - - {fl: v3, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.1521, 0.5611], largex: [ - 1.236, 2.976]} - - {fl: v8, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.5264, 0.7246], largex: [ - 0.6919, 3.198]} - - {fl: t3, pos: off, mutsize: [15], mutprob: [0.05], smallx: [-0.3687, 1.459], largex: [ - 1.664, 3.373]} - - {fl: t8, pos: off, mutsize: [15], mutprob: [0.05], smallx: [0.5357, 1.267], largex: [ - 1.433, 2.866]} - - {fl: cp, pos: off, mutsize: [15], mutprob: [0.05], smallx: [-0.09635, 1.204], - largex: [1.654, 7.456]} - -############################################################ -stopping: - stopmethod: LOOKBACK # Stopping method - lbdelta: 0 # Delta for look-back stopping - mingen: 0 # Minimum number of generations - window: 500 # Window for moving average - minchi2: 3.5 # Minimum chi2 - minchi2exp: 6.0 # Minimum chi2 for experiments - nsmear: 200 # Smear for stopping - deltasm: 200 # Delta smear for stopping - rv: 2 # Ratio for validation stopping - rt: 0.5 # Ratio for training stopping - epsilon: 1e-6 # Gradient epsilon - -############################################################ -positivity: - posdatasets: - - {dataset: POSF2U, poslambda: 1e6} # Positivity Lagrange Multiplier - - {dataset: POSF2DW, poslambda: 1e6} - - {dataset: POSF2S, poslambda: 1e6} - - {dataset: POSFLL, poslambda: 1e6} - - {dataset: POSDYU, poslambda: 1e10} - - {dataset: POSDYD, poslambda: 1e10} - - {dataset: POSDYS, poslambda: 1e10} - -############################################################ -closuretest: - filterseed: 0 # Random seed to be used in filtering data partitions - fakedata: off # on = to use FAKEPDF to generate pseudo-data - fakepdf: MSTW2008nlo68cl # Theory input for pseudo-data - errorsize: 1.0 # uncertainties rescaling - fakenoise: off # on = to add random fluctuations to pseudo-data - rancutprob: 1.0 # Fraction of data to be included in the fit - rancutmethod: 0 # Method to select rancutprob data fraction - rancuttrnval: off # 0(1) to output training(valiation) chi2 in report - printpdf4gen: off # To print info on PDFs during minimization - -############################################################ -lhagrid: - nx: 150 - xmin: 1e-9 - xmed: 0.1 - xmax: 1.0 - nq: 50 - qmax: 1e5 - -############################################################ -debug: off From f1ac85bac7a6434f6482b164d1d8b22bc3f1841b Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 10 Mar 2022 14:16:16 +0100 Subject: [PATCH 06/64] Fixed conflicts --- doc/sphinx/source/vp/theorycov/index.rst | 4 ++-- n3fit/src/n3fit/scripts/n3fit_exec.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/sphinx/source/vp/theorycov/index.rst b/doc/sphinx/source/vp/theorycov/index.rst index 308f3ee3ee..78c2000274 100644 --- a/doc/sphinx/source/vp/theorycov/index.rst +++ b/doc/sphinx/source/vp/theorycov/index.rst @@ -29,8 +29,8 @@ Summary - Theoretical covariance matrices are built according to the various prescriptions in :ref:`prescrips`. -- The prescription must be one of 3 point, 3r point, 3f point, 5 point, 5bar point, 7 point or 9 point. You can specify - this using ``point_prescription: "x point"`` in the runcard. The translation of this flag +- The prescription must be one of 3(f, r) point, 5(bar) point, 7(original) point or 9 point, see :ref:`definitions `. + You can specify this using ``point_prescription: "x point"`` in the runcard. The translation of this flag into the relevant ``theoryids`` is handled by the ``scalevariations`` module in ``validphys``. - As input you need theories for the relevant scale combinations which diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index f85678d0b4..2ceb70bd8e 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -148,10 +148,10 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['theory_covmat_flag'] = False N3FIT_FIXED_CONFIG['use_user_uncertainties'] = None N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None - if file_content.get('theorycovmatconfig') is not None: + if thconfig:=file_content.get('theorycovmatconfig') is not None: N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True - N3FIT_FIXED_CONFIG['use_user_uncertainties'] = file_content.get('theorycovmatconfig').get('use_user_uncertainties') - N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = file_content.get('theorycovmatconfig').get('use_scalevar_uncertainties') + N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('theorycovmatconfig').get('use_user_uncertainties') + N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('theorycovmatconfig').get('use_scalevar_uncertainties') file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) From 6e64443d2e0285d528f7108a02087a956a20e180 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 10 Mar 2022 14:18:31 +0100 Subject: [PATCH 07/64] Fixing conflicts --- doc/sphinx/source/vp/theorycov/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx/source/vp/theorycov/index.rst b/doc/sphinx/source/vp/theorycov/index.rst index 78c2000274..5e513327b1 100644 --- a/doc/sphinx/source/vp/theorycov/index.rst +++ b/doc/sphinx/source/vp/theorycov/index.rst @@ -30,7 +30,7 @@ Summary in :ref:`prescrips`. - The prescription must be one of 3(f, r) point, 5(bar) point, 7(original) point or 9 point, see :ref:`definitions `. - You can specify this using ``point_prescription: "x point"`` in the runcard. The translation of this flag + You can specify this using ``point_prescription: "x point"`` in the runcard. The translation of this flag into the relevant ``theoryids`` is handled by the ``scalevariations`` module in ``validphys``. - As input you need theories for the relevant scale combinations which From 2601ce6759313744c8aaed599c82145f84e1e202 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 10 Mar 2022 14:25:24 +0100 Subject: [PATCH 08/64] Removing comments --- validphys2/src/validphys/covmats.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index c51a2003cf..515d479cdb 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -227,18 +227,7 @@ def dataset_inputs_covmat_from_systematics( covmat = regularize_covmat( covmat, norm_threshold=norm_threshold - ) - # try: - # theory_covmat_path = pathlib.Path.cwd() - # data = pd.read_csv(theory_covmat_path / "prov_moredata" / "tables" / "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv", sep='\t') - # datael = data.iloc[3:] - # datael = datael.drop(['group'], axis=1) - # datael = datael.drop(['Unnamed: 1'], axis=1) - # datael = datael.drop(['Unnamed: 2'], axis=1) - # theory_covmat = np.copy(datael.values) - #except FileNotFoundError: - # theory_covmat = np.zeros(covmat.shape) - #total_covmat = np.add(covmat, theory_covmat) + ) return covmat From 9a8cf90ab3edbef4beb3411aebf5c26df1e693d0 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 10 Mar 2022 14:50:18 +0100 Subject: [PATCH 09/64] Fixing stuffs --- n3fit/src/n3fit/scripts/n3fit_exec.py | 6 +++--- validphys2/src/validphys/config.py | 5 ++++- validphys2/src/validphys/covmats.py | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 2ceb70bd8e..f85678d0b4 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -148,10 +148,10 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['theory_covmat_flag'] = False N3FIT_FIXED_CONFIG['use_user_uncertainties'] = None N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None - if thconfig:=file_content.get('theorycovmatconfig') is not None: + if file_content.get('theorycovmatconfig') is not None: N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True - N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('theorycovmatconfig').get('use_user_uncertainties') - N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('theorycovmatconfig').get('use_scalevar_uncertainties') + N3FIT_FIXED_CONFIG['use_user_uncertainties'] = file_content.get('theorycovmatconfig').get('use_user_uncertainties') + N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = file_content.get('theorycovmatconfig').get('use_scalevar_uncertainties') file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index bd859cbdf4..aa645fa011 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1482,6 +1482,9 @@ def produce_group_dataset_inputs_by_metadata( # special case of custom group, take the grouping from the dataset input if processed_metadata_group == "custom_group": group_name = str(dsinput.custom_group) + #special case of ALL, grouping everything together + if processed_metadata_group == "ALL": + group_name = processed_metadata_group # otherwise try and take the key from the metadata. else: cd = self.produce_commondata(dataset_input=dsinput) @@ -1505,7 +1508,7 @@ def produce_group_dataset_inputs_by_metadata( def produce_group_dataset_inputs_by_fitting_group(self, data_input, theory_covmat_flag): if theory_covmat_flag is True: - return self.produce_group_dataset_inputs_by_metadata(data_input, "custom_group") + return self.produce_group_dataset_inputs_by_metadata(data_input, "ALL") return self.produce_group_dataset_inputs_by_metadata(data_input, "experiment") diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 515d479cdb..0465b7ba21 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -369,7 +369,7 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_user_covmat.csv") else: generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") - theorypath = pathlib.Path(str(output_path/"tables"/generic_path.relative_to(generic_path.anchor))) + theorypath = pathlib.Path(str(output_path/"tables"/generic_path)) theory_covmat = pd.read_csv(theorypath, sep='\t') theory_covmat = theory_covmat.iloc[3:].drop(['group'], axis=1).drop(['Unnamed: 1'], axis=1).drop(['Unnamed: 2'], axis=1) return np.add(exp_covmat,theory_covmat.values.astype(np.float)) From 88adcadc19a114779e64d149bb04d4d0a1dcdea9 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 10 Mar 2022 14:52:55 +0100 Subject: [PATCH 10/64] Changing n3fit_exec --- n3fit/src/n3fit/scripts/n3fit_exec.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index f85678d0b4..a9622ebe70 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -148,10 +148,10 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['theory_covmat_flag'] = False N3FIT_FIXED_CONFIG['use_user_uncertainties'] = None N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None - if file_content.get('theorycovmatconfig') is not None: + if (thconfig:=file_content.get('theorycovmatconfig')) is not None: N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True - N3FIT_FIXED_CONFIG['use_user_uncertainties'] = file_content.get('theorycovmatconfig').get('use_user_uncertainties') - N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = file_content.get('theorycovmatconfig').get('use_scalevar_uncertainties') + N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('use_user_uncertainties') + N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties') file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) From 3c0d0f572ce0f90c67937afa90e9a77c5621526d Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 15 Mar 2022 11:42:06 +0100 Subject: [PATCH 11/64] Starting to include thcovmat in make_replica --- validphys2/src/validphys/commondata.py | 2 ++ validphys2/src/validphys/covmats.py | 46 +++++++++++++++----------- validphys2/src/validphys/n3fit_data.py | 1 - validphys2/src/validphys/pseudodata.py | 16 ++++++--- 4 files changed, 40 insertions(+), 25 deletions(-) diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py index 6f6445a466..eb1667af81 100644 --- a/validphys2/src/validphys/commondata.py +++ b/validphys2/src/validphys/commondata.py @@ -36,3 +36,5 @@ def loaded_commondata_with_cuts(commondata, cuts): "loaded_commondata_with_cuts", ("group_dataset_inputs_by_metadata", "data_input") ) + + diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 0465b7ba21..7041eda790 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -340,16 +340,37 @@ def dataset_inputs_t0_covmat_from_systematics( _list_of_central_values=dataset_inputs_t0_predictions ) +def load_theory_covmat(output_path, + use_user_uncertainties, + use_scalevar_uncertainties + ): + generic_path = None + if use_scalevar_uncertainties is True: + if use_user_uncertainties is True: + generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_total_theory_covmat.csv") + else: + generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") + else: + if use_user_uncertainties is True: + generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_user_covmat.csv") + else: + generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") + theorypath = pathlib.Path(str(output_path/"tables"/generic_path)) + theory_covmat = pd.read_csv(theorypath, sep='\t') + theory_covmat = theory_covmat.iloc[3:].drop(['group'], axis=1).drop(['Unnamed: 1'], axis=1).drop(['Unnamed: 2'], axis=1) + return theory_covmat.values.astype(np.float) + + + def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, *, data_input, use_weights_in_covmat=True, norm_threshold=None, dataset_inputs_t0_predictions, - output_path, - theory_covmat_flag, - use_user_uncertainties, - use_scalevar_uncertainties): + load_theory_covmat, + theory_covmat_flag + ): exp_covmat = dataset_inputs_covmat_from_systematics( dataset_inputs_loaded_cd_with_cuts, data_input, @@ -357,22 +378,9 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, norm_threshold=norm_threshold, _list_of_central_values=dataset_inputs_t0_predictions ) + import ipdb; ipdb.set_trace() if theory_covmat_flag is True: - generic_path = None - if use_scalevar_uncertainties is True: - if use_user_uncertainties is True: - generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_total_theory_covmat.csv") - else: - generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") - else: - if use_user_uncertainties is True: - generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_user_covmat.csv") - else: - generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") - theorypath = pathlib.Path(str(output_path/"tables"/generic_path)) - theory_covmat = pd.read_csv(theorypath, sep='\t') - theory_covmat = theory_covmat.iloc[3:].drop(['group'], axis=1).drop(['Unnamed: 1'], axis=1).drop(['Unnamed: 2'], axis=1) - return np.add(exp_covmat,theory_covmat.values.astype(np.float)) + return np.add(exp_covmat,load_theory_covmat) return exp_covmat diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 8e5eeaace7..9c1d23ae7d 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -238,7 +238,6 @@ def fitting_data_dict( spec_c = data.load() ndata = spec_c.GetNData() expdata_true = spec_c.get_cv().reshape(1, ndata) - expdata = make_replica datasets = common_data_reader_experiment(spec_c, data) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 5c9d0b04bd..58b57c3284 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -101,7 +101,10 @@ def read_replica_pseudodata(fit, context_index, replica): return DataTrValSpec(pseudodata.drop("type", axis=1), tr.index, val.index) -def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, genrep=True): +def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, + theory_covmat_flag, + load_theory_covmat, + genrep=True): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for possible correlations between systematic uncertainties. @@ -143,14 +146,18 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, genr """ if not genrep: return np.concatenate([cd.central_values for cd in groups_dataset_inputs_loaded_cd_with_cuts]) - # Seed the numpy RNG with the seed and the name of the datasets in this run name_salt = "-".join(i.setname for i in groups_dataset_inputs_loaded_cd_with_cuts) name_seed = int(hashlib.sha256(name_salt.encode()).hexdigest(), 16) % 10 ** 8 rng = np.random.default_rng(seed=replica_mcseed+name_seed) - # The inner while True loop is for ensuring a positive definite # pseudodata replica + + #loading theory_covmat if requested + theory_covmat = pd.DataFrame([]) + if theory_covmat_flag is True: + theory_covmat = load_theory_covmat + while True: pseudodatas = [] special_add = [] @@ -209,11 +216,10 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, genr # non-overlapping systematics are set to NaN by concat, fill with 0 instead. special_add_errors = pd.concat(special_add, axis=0, sort=True).fillna(0).to_numpy() special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() - - all_pseudodata = ( np.concatenate(pseudodatas, axis=0) + special_add_errors @ rng.normal(size=special_add_errors.shape[1]) + + theory_covmat @ rng.normal(size=theory_covmat.shape[1]) ) * ( np.concatenate(mult_shifts, axis=0) * (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) From 6278c58b7257f06bfe5a2a3bf7dff871924fd3b7 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 15 Mar 2022 14:31:48 +0100 Subject: [PATCH 12/64] Fixing --- validphys2/src/validphys/covmats.py | 9 ++++----- validphys2/src/validphys/pseudodata.py | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 7041eda790..27ed8a97f1 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -340,7 +340,7 @@ def dataset_inputs_t0_covmat_from_systematics( _list_of_central_values=dataset_inputs_t0_predictions ) -def load_theory_covmat(output_path, +def loaded_theory_covmat(output_path, use_user_uncertainties, use_scalevar_uncertainties ): @@ -355,7 +355,7 @@ def load_theory_covmat(output_path, generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_user_covmat.csv") else: generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") - theorypath = pathlib.Path(str(output_path/"tables"/generic_path)) + theorypath = output_path/"tables"/generic_path theory_covmat = pd.read_csv(theorypath, sep='\t') theory_covmat = theory_covmat.iloc[3:].drop(['group'], axis=1).drop(['Unnamed: 1'], axis=1).drop(['Unnamed: 2'], axis=1) return theory_covmat.values.astype(np.float) @@ -368,7 +368,7 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, use_weights_in_covmat=True, norm_threshold=None, dataset_inputs_t0_predictions, - load_theory_covmat, + loaded_theory_covmat, theory_covmat_flag ): exp_covmat = dataset_inputs_covmat_from_systematics( @@ -378,9 +378,8 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, norm_threshold=norm_threshold, _list_of_central_values=dataset_inputs_t0_predictions ) - import ipdb; ipdb.set_trace() if theory_covmat_flag is True: - return np.add(exp_covmat,load_theory_covmat) + return np.add(exp_covmat,loaded_theory_covmat) return exp_covmat diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 58b57c3284..9af6713397 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd +from scipy import linalg as lin from validphys.covmats import INTRA_DATASET_SYS_NAME @@ -103,7 +104,7 @@ def read_replica_pseudodata(fit, context_index, replica): def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, theory_covmat_flag, - load_theory_covmat, + loaded_theory_covmat, genrep=True): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for @@ -156,7 +157,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, #loading theory_covmat if requested theory_covmat = pd.DataFrame([]) if theory_covmat_flag is True: - theory_covmat = load_theory_covmat + theory_covmat = loaded_theory_covmat while True: pseudodatas = [] @@ -216,10 +217,20 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, # non-overlapping systematics are set to NaN by concat, fill with 0 instead. special_add_errors = pd.concat(special_add, axis=0, sort=True).fillna(0).to_numpy() special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() + import ipdb; ipdb.set_trace() + #eliminate negative eignvalues of thcovmat + eigval, eigvec = lin.eig(theory_covmat) + for j in range(len(eigval)): + if eigval[j] < 0.: + eigval[j] = 1.e-8 + Lambda = np.diag(eigval) + new_theory_covmat = eigvec @ Lambda @ lin.inv(eigvec) + #compute cholesky + chol_theory_covmat = lin.cholesky(new_theory_covmat) all_pseudodata = ( np.concatenate(pseudodatas, axis=0) + special_add_errors @ rng.normal(size=special_add_errors.shape[1]) - + theory_covmat @ rng.normal(size=theory_covmat.shape[1]) + + chol_theory_covmat @ rng.normal(size=theory_covmat.shape[1]) ) * ( np.concatenate(mult_shifts, axis=0) * (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) From 6a2c0e71dc76dadee3b8fa221c6167747e1f78a7 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 15 Mar 2022 15:15:34 +0100 Subject: [PATCH 13/64] Added sqrt of thcovmat to make_replica --- validphys2/src/validphys/covmats.py | 4 ++++ validphys2/src/validphys/pseudodata.py | 28 +++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 27ed8a97f1..9929e89457 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -341,9 +341,13 @@ def dataset_inputs_t0_covmat_from_systematics( ) def loaded_theory_covmat(output_path, + theory_covmat_flag, use_user_uncertainties, use_scalevar_uncertainties ): + if theory_covmat_flag is False: + return np.array([]) + generic_path = None if use_scalevar_uncertainties is True: if use_user_uncertainties is True: diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 9af6713397..a8e3e337a7 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -11,7 +11,7 @@ import pandas as pd from scipy import linalg as lin -from validphys.covmats import INTRA_DATASET_SYS_NAME +from validphys.covmats import INTRA_DATASET_SYS_NAME, sqrt_covmat from reportengine import collect @@ -155,9 +155,8 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, # pseudodata replica #loading theory_covmat if requested - theory_covmat = pd.DataFrame([]) - if theory_covmat_flag is True: - theory_covmat = loaded_theory_covmat + + theory_covmat = loaded_theory_covmat while True: pseudodatas = [] @@ -217,20 +216,21 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, # non-overlapping systematics are set to NaN by concat, fill with 0 instead. special_add_errors = pd.concat(special_add, axis=0, sort=True).fillna(0).to_numpy() special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() - import ipdb; ipdb.set_trace() + #eliminate negative eignvalues of thcovmat - eigval, eigvec = lin.eig(theory_covmat) - for j in range(len(eigval)): - if eigval[j] < 0.: - eigval[j] = 1.e-8 - Lambda = np.diag(eigval) - new_theory_covmat = eigvec @ Lambda @ lin.inv(eigvec) - #compute cholesky - chol_theory_covmat = lin.cholesky(new_theory_covmat) + chol_theory_covmat = np.zeros(shape = (1,1)) + if theory_covmat_flag is True: + eigval, eigvec = lin.eig(theory_covmat) + for j in range(len(eigval)): + if eigval[j] < 0.: + eigval[j] = 1.e-9 + new_eigval_diag = np.diag(eigval) + new_theory_covmat = eigvec @ new_eigval_diag @ lin.inv(eigvec) + chol_theory_covmat = sqrt_covmat(new_theory_covmat) all_pseudodata = ( np.concatenate(pseudodatas, axis=0) + special_add_errors @ rng.normal(size=special_add_errors.shape[1]) - + chol_theory_covmat @ rng.normal(size=theory_covmat.shape[1]) + + chol_theory_covmat @ rng.normal(size=chol_theory_covmat.shape[1]) ) * ( np.concatenate(mult_shifts, axis=0) * (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) From 2b615048bad7404a6d1e56ef373112fd222c10e9 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 15 Mar 2022 18:09:21 +0100 Subject: [PATCH 14/64] Fixed sqrt of covmat --- validphys2/src/validphys/pseudodata.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index a8e3e337a7..3c983dec53 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -155,9 +155,19 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, # pseudodata replica #loading theory_covmat if requested - theory_covmat = loaded_theory_covmat - + #eliminate negative eignvalues of thcovmat and compute sqrt + chol_theory_covmat = np.zeros(shape = (1,1)) + if theory_covmat_flag is True: + tr = 1.e-9 + eigval, eigvec = lin.eig(theory_covmat) + for j in range(len(eigval)): + if eigval[j] < tr: + eigval[j] = tr + new_eigval_diag = np.diag(eigval) + new_theory_covmat = eigvec @ new_eigval_diag @ lin.inv(eigvec) + chol_theory_covmat = sqrt_covmat(new_theory_covmat) + while True: pseudodatas = [] special_add = [] @@ -217,16 +227,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, special_add_errors = pd.concat(special_add, axis=0, sort=True).fillna(0).to_numpy() special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() - #eliminate negative eignvalues of thcovmat - chol_theory_covmat = np.zeros(shape = (1,1)) - if theory_covmat_flag is True: - eigval, eigvec = lin.eig(theory_covmat) - for j in range(len(eigval)): - if eigval[j] < 0.: - eigval[j] = 1.e-9 - new_eigval_diag = np.diag(eigval) - new_theory_covmat = eigvec @ new_eigval_diag @ lin.inv(eigvec) - chol_theory_covmat = sqrt_covmat(new_theory_covmat) + all_pseudodata = ( np.concatenate(pseudodatas, axis=0) + special_add_errors @ rng.normal(size=special_add_errors.shape[1]) From 73e578cba7fbb42d7549a4173f4bb147aa8d88f1 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 15 Mar 2022 19:20:06 +0100 Subject: [PATCH 15/64] Fixing number for sqrt --- validphys2/src/validphys/pseudodata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 3c983dec53..5e0f95d12a 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -159,7 +159,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, #eliminate negative eignvalues of thcovmat and compute sqrt chol_theory_covmat = np.zeros(shape = (1,1)) if theory_covmat_flag is True: - tr = 1.e-9 + tr = 1.e-6 eigval, eigvec = lin.eig(theory_covmat) for j in range(len(eigval)): if eigval[j] < tr: From 3789d1eed5d4368b98a8e5082cb56f959a839817 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Wed, 16 Mar 2022 10:40:40 +0100 Subject: [PATCH 16/64] Fixing loading of thcovmat --- validphys2/src/validphys/covmats.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 9929e89457..ddb7157f45 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -360,9 +360,8 @@ def loaded_theory_covmat(output_path, else: generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") theorypath = output_path/"tables"/generic_path - theory_covmat = pd.read_csv(theorypath, sep='\t') - theory_covmat = theory_covmat.iloc[3:].drop(['group'], axis=1).drop(['Unnamed: 1'], axis=1).drop(['Unnamed: 2'], axis=1) - return theory_covmat.values.astype(np.float) + theory_covmat = pd.read_csv(theorypath, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python").fillna(0) + return theory_covmat.values From 82ce369054d35874aa4daea7b31f671e5224b5c5 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Wed, 16 Mar 2022 11:35:00 +0100 Subject: [PATCH 17/64] First solution to sqrt of thcovmat --- validphys2/src/validphys/pseudodata.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 5e0f95d12a..27794adfdd 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -159,14 +159,14 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, #eliminate negative eignvalues of thcovmat and compute sqrt chol_theory_covmat = np.zeros(shape = (1,1)) if theory_covmat_flag is True: - tr = 1.e-6 + tr = 1.e-3 eigval, eigvec = lin.eig(theory_covmat) for j in range(len(eigval)): - if eigval[j] < tr: - eigval[j] = tr + if eigval[j] < max(eigval)*tr: + eigval[j] = max(eigval)*tr new_eigval_diag = np.diag(eigval) new_theory_covmat = eigvec @ new_eigval_diag @ lin.inv(eigvec) - chol_theory_covmat = sqrt_covmat(new_theory_covmat) + chol_theory_covmat = np.real(sqrt_covmat(new_theory_covmat)) while True: pseudodatas = [] From 234cf803eb2a80d7fef3bb5a783429e7ed2b9566 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 17 Mar 2022 12:04:18 +0100 Subject: [PATCH 18/64] Added theory_covmat to additive contrib for make_replica --- validphys2/src/validphys/covmats.py | 31 ++++++++++++++++-- validphys2/src/validphys/pseudodata.py | 45 ++------------------------ 2 files changed, 31 insertions(+), 45 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index ddb7157f45..0c90cf6cf4 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -128,13 +128,34 @@ def covmat_from_systematics( ) return covmat +def dataset_inputs_only_additive_covmat_plus_thcovmat( + dataset_inputs_loaded_cd_with_cuts, + data_input, + theory_covmat_flag, + loaded_theory_covmat, + use_weights_in_covmat=True, + norm_threshold=None, + _list_of_central_values=None, +): + exp_covmat = dataset_inputs_covmat_from_systematics(dataset_inputs_loaded_cd_with_cuts, + data_input, + use_weights_in_covmat, + norm_threshold, + _list_of_central_values, _only_additive = True) + if theory_covmat_flag: + return exp_covmat + loaded_theory_covmat + return exp_covmat + + + def dataset_inputs_covmat_from_systematics( dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat=True, norm_threshold=None, - _list_of_central_values=None + _list_of_central_values=None, + _only_additive=False ): """Given a list containing :py:class:`validphys.coredata.CommonData` s, construct the full covariance matrix. @@ -201,7 +222,11 @@ def dataset_inputs_covmat_from_systematics( data_input, _list_of_central_values ): - sys_errors = cd.systematic_errors(central_values) + if _only_additive: + sys_errors = cd.additive_errors + else: + sys_errors = cd.systematic_errors(central_values) + stat_errors = cd.stat_errors.to_numpy() weights.append(np.full_like(stat_errors, dsinp.weight)) # separate out the special uncertainties which can be correlated across @@ -382,7 +407,7 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, _list_of_central_values=dataset_inputs_t0_predictions ) if theory_covmat_flag is True: - return np.add(exp_covmat,loaded_theory_covmat) + return exp_covmat + loaded_theory_covmat return exp_covmat diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 27794adfdd..b91e2cc138 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -101,10 +101,7 @@ def read_replica_pseudodata(fit, context_index, replica): return DataTrValSpec(pseudodata.drop("type", axis=1), tr.index, val.index) - -def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, - theory_covmat_flag, - loaded_theory_covmat, +def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, dataset_inputs_only_additive_covmat_plus_thcovmat, replica_mcseed, genrep=True): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for @@ -153,24 +150,8 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, rng = np.random.default_rng(seed=replica_mcseed+name_seed) # The inner while True loop is for ensuring a positive definite # pseudodata replica - - #loading theory_covmat if requested - theory_covmat = loaded_theory_covmat - #eliminate negative eignvalues of thcovmat and compute sqrt - chol_theory_covmat = np.zeros(shape = (1,1)) - if theory_covmat_flag is True: - tr = 1.e-3 - eigval, eigvec = lin.eig(theory_covmat) - for j in range(len(eigval)): - if eigval[j] < max(eigval)*tr: - eigval[j] = max(eigval)*tr - new_eigval_diag = np.diag(eigval) - new_theory_covmat = eigvec @ new_eigval_diag @ lin.inv(eigvec) - chol_theory_covmat = np.real(sqrt_covmat(new_theory_covmat)) - while True: pseudodatas = [] - special_add = [] special_mult = [] mult_shifts = [] check_positive_masks = [] @@ -180,23 +161,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, # add contribution from statistical uncertainty pseudodata += (cd.stat_errors.to_numpy() * rng.normal(size=cd.ndata)) - - # ~~~ ADDITIVE ERRORS ~~~ - add_errors = cd.additive_errors - add_uncorr_errors = add_errors.loc[:, add_errors.columns=="UNCORR"].to_numpy() - - pseudodata += (add_uncorr_errors * rng.normal(size=add_uncorr_errors.shape)).sum(axis=1) - - # correlated within dataset - add_corr_errors = add_errors.loc[:, add_errors.columns == "CORR"].to_numpy() - pseudodata += add_corr_errors @ rng.normal(size=add_corr_errors.shape[1]) - - # append the partially shifted pseudodata pseudodatas.append(pseudodata) - # store the additive errors with correlations between datasets for later use - special_add.append( - add_errors.loc[:, ~add_errors.columns.isin(INTRA_DATASET_SYS_NAME)] - ) # ~~~ MULTIPLICATIVE ERRORS ~~~ mult_errors = cd.multiplicative_errors mult_uncorr_errors = mult_errors.loc[:, mult_errors.columns == "UNCORR"].to_numpy() @@ -222,16 +187,12 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, check_positive_masks.append(np.zeros_like(pseudodata, dtype=bool)) else: check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) - # non-overlapping systematics are set to NaN by concat, fill with 0 instead. - special_add_errors = pd.concat(special_add, axis=0, sort=True).fillna(0).to_numpy() special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() - - + total_covmat_sqrt = sqrt_covmat(dataset_inputs_only_additive_covmat_plus_thcovmat) all_pseudodata = ( np.concatenate(pseudodatas, axis=0) - + special_add_errors @ rng.normal(size=special_add_errors.shape[1]) - + chol_theory_covmat @ rng.normal(size=chol_theory_covmat.shape[1]) + + total_covmat_sqrt @ rng.normal(size=total_covmat_sqrt.shape[1]) ) * ( np.concatenate(mult_shifts, axis=0) * (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) From 9d83d751fbb14979298d5b04e914f05fd9b4679c Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 17 Mar 2022 12:22:51 +0100 Subject: [PATCH 19/64] Added flags for thcovmat --- n3fit/src/n3fit/scripts/n3fit_exec.py | 2 ++ validphys2/src/validphys/covmats.py | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index a9622ebe70..fc4f7a8996 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -150,6 +150,8 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None if (thconfig:=file_content.get('theorycovmatconfig')) is not None: N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True + N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = thconfig.get('use_thcovmat_in_fitting') + N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = thconfig.get('use_thcovmat_in_sampling') N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('use_user_uncertainties') N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties') file_content.update(N3FIT_FIXED_CONFIG) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 0c90cf6cf4..71c17d3e59 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -132,6 +132,7 @@ def dataset_inputs_only_additive_covmat_plus_thcovmat( dataset_inputs_loaded_cd_with_cuts, data_input, theory_covmat_flag, + use_thcovmat_in_sampling, loaded_theory_covmat, use_weights_in_covmat=True, norm_threshold=None, @@ -143,7 +144,8 @@ def dataset_inputs_only_additive_covmat_plus_thcovmat( norm_threshold, _list_of_central_values, _only_additive = True) if theory_covmat_flag: - return exp_covmat + loaded_theory_covmat + if use_thcovmat_in_sampling: + return exp_covmat + loaded_theory_covmat return exp_covmat @@ -397,7 +399,8 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, norm_threshold=None, dataset_inputs_t0_predictions, loaded_theory_covmat, - theory_covmat_flag + theory_covmat_flag, + use_thcovmat_in_fitting, ): exp_covmat = dataset_inputs_covmat_from_systematics( dataset_inputs_loaded_cd_with_cuts, @@ -407,7 +410,8 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, _list_of_central_values=dataset_inputs_t0_predictions ) if theory_covmat_flag is True: - return exp_covmat + loaded_theory_covmat + if use_thcovmat_in_fitting is True: + return exp_covmat + loaded_theory_covmat return exp_covmat From 2064796a72cc9669d9657e2d2d7d3b89783b511e Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 17 Mar 2022 16:05:49 +0100 Subject: [PATCH 20/64] Adding regularization to thcovmat --- validphys2/src/validphys/covmats.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 71c17d3e59..b2b3f92f8a 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -145,7 +145,9 @@ def dataset_inputs_only_additive_covmat_plus_thcovmat( _list_of_central_values, _only_additive = True) if theory_covmat_flag: if use_thcovmat_in_sampling: - return exp_covmat + loaded_theory_covmat + #Adding a small contribution to the diagonal to regularize it (make it positive definite) + diag_enha = 1.e-6 + return exp_covmat + loaded_theory_covmat*(np.ones_like(loaded_theory_covmat) + diag_enha*np.eye(loaded_theory_covmat.shape[0])) return exp_covmat From abc1a0d3436be43472c9dcfd01b8d7de1b3a4d33 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 17 Mar 2022 19:47:39 +0100 Subject: [PATCH 21/64] Fixing flags --- n3fit/src/n3fit/scripts/n3fit_exec.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index fc4f7a8996..32954fc31d 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -148,6 +148,8 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['theory_covmat_flag'] = False N3FIT_FIXED_CONFIG['use_user_uncertainties'] = None N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None + N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = None + N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = None if (thconfig:=file_content.get('theorycovmatconfig')) is not None: N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = thconfig.get('use_thcovmat_in_fitting') From 9dc505cf89a6c0e742641af2828aca16ea57b2c6 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 18 Mar 2022 10:39:00 +0100 Subject: [PATCH 22/64] Fixed make_replica and vp-comparefits --- validphys2/src/validphys/config.py | 2 +- validphys2/src/validphys/pseudodata.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index aa645fa011..f0dcf74780 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1099,7 +1099,7 @@ def produce_fitthcovmat( / "tables" ) # All possible valid files - covfiles = sorted(covmat_path.glob("*theory_covmat.csv")) + covfiles = sorted(covmat_path.glob("*theory_covmat_*.csv")) if not covfiles: raise ConfigError( "Fit appeared to use theory covmat in fit but the file was not at the " diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index b91e2cc138..a68f3b4899 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -159,8 +159,6 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, dataset_inputs_only_ # copy here to avoid mutating the central values. pseudodata = cd.central_values.to_numpy(copy=True) - # add contribution from statistical uncertainty - pseudodata += (cd.stat_errors.to_numpy() * rng.normal(size=cd.ndata)) pseudodatas.append(pseudodata) # ~~~ MULTIPLICATIVE ERRORS ~~~ mult_errors = cd.multiplicative_errors @@ -187,7 +185,9 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, dataset_inputs_only_ check_positive_masks.append(np.zeros_like(pseudodata, dtype=bool)) else: check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) + # non-overlapping systematics are set to NaN by concat, fill with 0 instead. + import ipdb; ipdb.set_trace() special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() total_covmat_sqrt = sqrt_covmat(dataset_inputs_only_additive_covmat_plus_thcovmat) all_pseudodata = ( From dd96091d50506dc143a7af63925250adc0bc1864 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 18 Mar 2022 11:13:32 +0100 Subject: [PATCH 23/64] Remove pdb --- validphys2/src/validphys/pseudodata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index a68f3b4899..e92de8a9d4 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -187,7 +187,6 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, dataset_inputs_only_ check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) # non-overlapping systematics are set to NaN by concat, fill with 0 instead. - import ipdb; ipdb.set_trace() special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() total_covmat_sqrt = sqrt_covmat(dataset_inputs_only_additive_covmat_plus_thcovmat) all_pseudodata = ( From 36b0a5526ca772cbe1aa3c16de9f516834f509ec Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 18 Mar 2022 12:31:52 +0100 Subject: [PATCH 24/64] Changing implementation of make_replica (1st step) --- validphys2/src/validphys/covmats.py | 30 ++----------------- validphys2/src/validphys/pseudodata.py | 2 +- .../theorycovariance/construction.py | 1 + 3 files changed, 4 insertions(+), 29 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index b2b3f92f8a..e655f34c78 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -128,30 +128,6 @@ def covmat_from_systematics( ) return covmat -def dataset_inputs_only_additive_covmat_plus_thcovmat( - dataset_inputs_loaded_cd_with_cuts, - data_input, - theory_covmat_flag, - use_thcovmat_in_sampling, - loaded_theory_covmat, - use_weights_in_covmat=True, - norm_threshold=None, - _list_of_central_values=None, -): - exp_covmat = dataset_inputs_covmat_from_systematics(dataset_inputs_loaded_cd_with_cuts, - data_input, - use_weights_in_covmat, - norm_threshold, - _list_of_central_values, _only_additive = True) - if theory_covmat_flag: - if use_thcovmat_in_sampling: - #Adding a small contribution to the diagonal to regularize it (make it positive definite) - diag_enha = 1.e-6 - return exp_covmat + loaded_theory_covmat*(np.ones_like(loaded_theory_covmat) + diag_enha*np.eye(loaded_theory_covmat.shape[0])) - return exp_covmat - - - def dataset_inputs_covmat_from_systematics( dataset_inputs_loaded_cd_with_cuts, @@ -226,10 +202,8 @@ def dataset_inputs_covmat_from_systematics( data_input, _list_of_central_values ): - if _only_additive: - sys_errors = cd.additive_errors - else: - sys_errors = cd.systematic_errors(central_values) + + sys_errors = cd.systematic_errors(central_values) stat_errors = cd.stat_errors.to_numpy() weights.append(np.full_like(stat_errors, dsinp.weight)) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index e92de8a9d4..168928834b 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -101,7 +101,7 @@ def read_replica_pseudodata(fit, context_index, replica): return DataTrValSpec(pseudodata.drop("type", axis=1), tr.index, val.index) -def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, dataset_inputs_only_additive_covmat_plus_thcovmat, replica_mcseed, +def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, genrep=True): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 128f2fe5df..d289e06890 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -448,6 +448,7 @@ def theory_covmat_custom(covs_pt_prescrip, covmap, procs_index): for i in range(matlength): for j in range(matlength): cov_by_exp[covmap[i]][covmap[j]] = mat[i][j] + import ipdb; ipdb.set_trace() df = pd.DataFrame(cov_by_exp, index=procs_index, columns=procs_index) return df From c3dc6d7cb47d93663886e64dd67ed2451815aaa6 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 18 Mar 2022 12:44:02 +0100 Subject: [PATCH 25/64] Implemented make_replica with full covmat --- validphys2/src/validphys/pseudodata.py | 45 +++++++------------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 168928834b..824c119c8f 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -101,8 +101,7 @@ def read_replica_pseudodata(fit, context_index, replica): return DataTrValSpec(pseudodata.drop("type", axis=1), tr.index, val.index) -def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, - genrep=True): +def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_t0_covmat_from_systematics, loaded_theory_covmat, theory_covmat_flag, use_thcovmat_in_sampling, genrep=True): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for possible correlations between systematic uncertainties. @@ -144,59 +143,37 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, """ if not genrep: return np.concatenate([cd.central_values for cd in groups_dataset_inputs_loaded_cd_with_cuts]) + # Seed the numpy RNG with the seed and the name of the datasets in this run name_salt = "-".join(i.setname for i in groups_dataset_inputs_loaded_cd_with_cuts) name_seed = int(hashlib.sha256(name_salt.encode()).hexdigest(), 16) % 10 ** 8 rng = np.random.default_rng(seed=replica_mcseed+name_seed) + #construct covmat + covmat = dataset_inputs_t0_covmat_from_systematics + if theory_covmat_flag: + if use_thcovmat_in_sampling: + covmat += loaded_theory_covmat + covmat_sqrt = sqrt_covmat(covmat) + # The inner while True loop is for ensuring a positive definite # pseudodata replica while True: pseudodatas = [] - special_mult = [] - mult_shifts = [] check_positive_masks = [] for cd in groups_dataset_inputs_loaded_cd_with_cuts: # copy here to avoid mutating the central values. pseudodata = cd.central_values.to_numpy(copy=True) pseudodatas.append(pseudodata) - # ~~~ MULTIPLICATIVE ERRORS ~~~ - mult_errors = cd.multiplicative_errors - mult_uncorr_errors = mult_errors.loc[:, mult_errors.columns == "UNCORR"].to_numpy() - # convert to from percent to fraction - mult_shift = ( - 1 + mult_uncorr_errors * rng.normal(size=mult_uncorr_errors.shape) / 100 - ).prod(axis=1) - - mult_corr_errors = mult_errors.loc[:, mult_errors.columns == "CORR"].to_numpy() - mult_shift *= ( - 1 + mult_corr_errors * rng.normal(size=(1, mult_corr_errors.shape[1])) / 100 - ).prod(axis=1) - - mult_shifts.append(mult_shift) - - # store the multiplicative errors with correlations between datasets for later use - special_mult.append( - mult_errors.loc[:, ~mult_errors.columns.isin(INTRA_DATASET_SYS_NAME)] - ) - - # mask out the data we want to check are all positive if "ASY" in cd.commondataproc: check_positive_masks.append(np.zeros_like(pseudodata, dtype=bool)) else: check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) - - # non-overlapping systematics are set to NaN by concat, fill with 0 instead. - special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() - total_covmat_sqrt = sqrt_covmat(dataset_inputs_only_additive_covmat_plus_thcovmat) + shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) all_pseudodata = ( np.concatenate(pseudodatas, axis=0) - + total_covmat_sqrt @ rng.normal(size=total_covmat_sqrt.shape[1]) - ) * ( - np.concatenate(mult_shifts, axis=0) - * (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) + + shifts ) - if np.all(all_pseudodata[np.concatenate(check_positive_masks, axis=0)] >= 0): break From 9fe8d256554d651dd2399a39dfbc5e82ae2ca183 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 18 Mar 2022 12:57:51 +0100 Subject: [PATCH 26/64] Added regularization to thcovmat --- validphys2/src/validphys/pseudodata.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 824c119c8f..e6a63b314b 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -152,7 +152,10 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat covmat = dataset_inputs_t0_covmat_from_systematics if theory_covmat_flag: if use_thcovmat_in_sampling: - covmat += loaded_theory_covmat + #Adding a little regularization to theory covmat to do the sqrt + diag_enha = 1.e-6 + covmat += loaded_theory_covmat*(np.ones_like(loaded_theory_covmat) + diag_enha*np.eye(loaded_theory_covmat.shape[0])) + covmat_sqrt = sqrt_covmat(covmat) # The inner while True loop is for ensuring a positive definite From 91a9c802a270ccc1476adbffc59db95e55f7c1a3 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Sat, 19 Mar 2022 19:10:02 +0100 Subject: [PATCH 27/64] Removing a pdb --- validphys2/src/validphys/theorycovariance/construction.py | 1 - 1 file changed, 1 deletion(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index d289e06890..128f2fe5df 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -448,7 +448,6 @@ def theory_covmat_custom(covs_pt_prescrip, covmap, procs_index): for i in range(matlength): for j in range(matlength): cov_by_exp[covmap[i]][covmap[j]] = mat[i][j] - import ipdb; ipdb.set_trace() df = pd.DataFrame(cov_by_exp, index=procs_index, columns=procs_index) return df From 43e60ee970c307f33e5a4853b2ee87fc8ed2e4d4 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 21 Mar 2022 11:30:30 +0100 Subject: [PATCH 28/64] Fix wrong t0 for make_replica --- validphys2/src/validphys/covmats.py | 30 ++++++++++++++++++++++---- validphys2/src/validphys/pseudodata.py | 11 +++------- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index e655f34c78..99c31f3935 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -135,7 +135,6 @@ def dataset_inputs_covmat_from_systematics( use_weights_in_covmat=True, norm_threshold=None, _list_of_central_values=None, - _only_additive=False ): """Given a list containing :py:class:`validphys.coredata.CommonData` s, construct the full covariance matrix. @@ -367,6 +366,28 @@ def loaded_theory_covmat(output_path, return theory_covmat.values +def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, + *, + data_input, + use_weights_in_covmat=True, + norm_threshold=None, + loaded_theory_covmat, + theory_covmat_flag, + use_thcovmat_in_sampling, + ): + covmat = dataset_inputs_t0_total_covmat( + dataset_inputs_loaded_cd_with_cuts, + data_input=data_input, + use_weights_in_covmat=use_weights_in_covmat, + norm_threshold=norm_threshold, + dataset_inputs_t0_predictions=None, + loaded_theory_covmat=loaded_theory_covmat, + theory_covmat_flag=theory_covmat_flag, + use_thcovmat_in_fitting=use_thcovmat_in_sampling, + ) + + return covmat + def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, *, @@ -385,9 +406,10 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, norm_threshold=norm_threshold, _list_of_central_values=dataset_inputs_t0_predictions ) - if theory_covmat_flag is True: - if use_thcovmat_in_fitting is True: - return exp_covmat + loaded_theory_covmat + if theory_covmat_flag and use_thcovmat_in_fitting: + #Adding regularization to theory covmat to make sqrt possible + diag_enha = 1.e-6 + return exp_covmat + loaded_theory_covmat*(np.ones_like(loaded_theory_covmat) + diag_enha*np.eye(loaded_theory_covmat.shape[0])) return exp_covmat diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index e6a63b314b..73e497a9bc 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -101,7 +101,7 @@ def read_replica_pseudodata(fit, context_index, replica): return DataTrValSpec(pseudodata.drop("type", axis=1), tr.index, val.index) -def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_t0_covmat_from_systematics, loaded_theory_covmat, theory_covmat_flag, use_thcovmat_in_sampling, genrep=True): +def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_total_covmat, genrep=True): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for possible correlations between systematic uncertainties. @@ -149,13 +149,8 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat name_seed = int(hashlib.sha256(name_salt.encode()).hexdigest(), 16) % 10 ** 8 rng = np.random.default_rng(seed=replica_mcseed+name_seed) #construct covmat - covmat = dataset_inputs_t0_covmat_from_systematics - if theory_covmat_flag: - if use_thcovmat_in_sampling: - #Adding a little regularization to theory covmat to do the sqrt - diag_enha = 1.e-6 - covmat += loaded_theory_covmat*(np.ones_like(loaded_theory_covmat) + diag_enha*np.eye(loaded_theory_covmat.shape[0])) - + covmat = dataset_inputs_total_covmat + import ipdb; ipdb.set_trace() covmat_sqrt = sqrt_covmat(covmat) # The inner while True loop is for ensuring a positive definite From 662bcf0d14bfd78535530caf69d555935912842b Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 21 Mar 2022 11:41:15 +0100 Subject: [PATCH 29/64] Added flags for t0 --- n3fit/src/n3fit/scripts/n3fit_exec.py | 6 ++++++ validphys2/src/validphys/covmats.py | 30 ++++++++++++++++++++++++-- validphys2/src/validphys/pseudodata.py | 1 - 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 32954fc31d..65405facc0 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -156,6 +156,12 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = thconfig.get('use_thcovmat_in_sampling') N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('use_user_uncertainties') N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties') + N3FIT_FIXED_CONFIG['use_t0_sampling'] = False + if(sam_t0:=file_content.get('sampling_t0')) is not None: + N3FIT_FIXED_CONFIG['use_t0_sampling'] = sam_t0.get('use_t0') + N3FIT_FIXED_CONFIG['use_t0_fitting'] = True + if(fit_t0:=file_content.get('fitting_t0')) is not None: + N3FIT_FIXED_CONFIG['use_t0_fitting'] = fit_t0.get('use_t0') file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 99c31f3935..a70fb5211c 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -371,11 +371,14 @@ def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat=True, norm_threshold=None, + dataset_inputs_t0_predictions, loaded_theory_covmat, theory_covmat_flag, use_thcovmat_in_sampling, + use_t0_sampling ): - covmat = dataset_inputs_t0_total_covmat( + if use_t0_sampling is False: + covmat = dataset_inputs_t0_total_covmat( dataset_inputs_loaded_cd_with_cuts, data_input=data_input, use_weights_in_covmat=use_weights_in_covmat, @@ -384,6 +387,19 @@ def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, loaded_theory_covmat=loaded_theory_covmat, theory_covmat_flag=theory_covmat_flag, use_thcovmat_in_fitting=use_thcovmat_in_sampling, + use_t0_fitting=use_t0_sampling + ) + else: + covmat = dataset_inputs_t0_total_covmat( + dataset_inputs_loaded_cd_with_cuts, + data_input=data_input, + use_weights_in_covmat=use_weights_in_covmat, + norm_threshold=norm_threshold, + dataset_inputs_t0_predictions=dataset_inputs_t0_predictions, + loaded_theory_covmat=loaded_theory_covmat, + theory_covmat_flag=theory_covmat_flag, + use_thcovmat_in_fitting=use_thcovmat_in_sampling, + use_t0_fitting=use_t0_sampling ) return covmat @@ -398,8 +414,18 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, loaded_theory_covmat, theory_covmat_flag, use_thcovmat_in_fitting, + use_t0_fitting ): - exp_covmat = dataset_inputs_covmat_from_systematics( + if use_t0_fitting is False: + exp_covmat = dataset_inputs_covmat_from_systematics( + dataset_inputs_loaded_cd_with_cuts, + data_input, + use_weights_in_covmat, + norm_threshold=norm_threshold, + _list_of_central_values=None + ) + else: + exp_covmat = dataset_inputs_covmat_from_systematics( dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 73e497a9bc..cbda9e5d1b 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -150,7 +150,6 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat rng = np.random.default_rng(seed=replica_mcseed+name_seed) #construct covmat covmat = dataset_inputs_total_covmat - import ipdb; ipdb.set_trace() covmat_sqrt = sqrt_covmat(covmat) # The inner while True loop is for ensuring a positive definite From 0b070c4abf71367c6415312f1e4e1bc6b430c28f Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 24 Mar 2022 12:29:26 +0100 Subject: [PATCH 30/64] minor changes --- validphys2/src/validphys/commondata.py | 5 +---- validphys2/src/validphys/config.py | 1 + validphys2/src/validphys/covmats.py | 19 ++++++++----------- validphys2/src/validphys/pseudodata.py | 7 ++----- 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py index eb1667af81..436cd9edbf 100644 --- a/validphys2/src/validphys/commondata.py +++ b/validphys2/src/validphys/commondata.py @@ -34,7 +34,4 @@ def loaded_commondata_with_cuts(commondata, cuts): groups_dataset_inputs_loaded_cd_with_cuts = collect( "loaded_commondata_with_cuts", ("group_dataset_inputs_by_metadata", "data_input") -) - - - +) \ No newline at end of file diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index f0dcf74780..a1fe065d71 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1053,6 +1053,7 @@ def produce_nnfit_theory_covmat( # Only user uncertainties from validphys.theorycovariance.construction import user_covmat_fitting f = user_covmat_fitting + @functools.wraps(f) def res(*args, **kwargs): return f(*args, **kwargs) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index a70fb5211c..7a8ab6b654 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -24,6 +24,7 @@ from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.covmats_utils import construct_covmat, systematics_matrix from validphys.results import ThPredictionsResult + from validphys.commondata import loaded_commondata_with_cuts log = logging.getLogger(__name__) @@ -201,9 +202,7 @@ def dataset_inputs_covmat_from_systematics( data_input, _list_of_central_values ): - sys_errors = cd.systematic_errors(central_values) - stat_errors = cd.stat_errors.to_numpy() weights.append(np.full_like(stat_errors, dsinp.weight)) # separate out the special uncertainties which can be correlated across @@ -351,16 +350,14 @@ def loaded_theory_covmat(output_path, return np.array([]) generic_path = None - if use_scalevar_uncertainties is True: - if use_user_uncertainties is True: - generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_total_theory_covmat.csv") + if use_user_uncertainties is True: + if use_scalevar_uncertainties is True: + generic_path = "datacuts_theory_theorycovmatconfig_total_theory_covmat.csv" else: - generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") + generic_path = "datacuts_theory_theorycovmatconfig_user_covmat.csv" else: - if use_user_uncertainties is True: - generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_user_covmat.csv") - else: - generic_path = pathlib.Path("datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv") + generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" + theorypath = output_path/"tables"/generic_path theory_covmat = pd.read_csv(theorypath, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python").fillna(0) return theory_covmat.values @@ -433,7 +430,7 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, _list_of_central_values=dataset_inputs_t0_predictions ) if theory_covmat_flag and use_thcovmat_in_fitting: - #Adding regularization to theory covmat to make sqrt possible + #Adding regularization to theory covmat to make sqrt possible diag_enha = 1.e-6 return exp_covmat + loaded_theory_covmat*(np.ones_like(loaded_theory_covmat) + diag_enha*np.eye(loaded_theory_covmat.shape[0])) return exp_covmat diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index cbda9e5d1b..04cb9ad207 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -159,7 +159,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat check_positive_masks = [] for cd in groups_dataset_inputs_loaded_cd_with_cuts: # copy here to avoid mutating the central values. - pseudodata = cd.central_values.to_numpy(copy=True) + pseudodata = cd.central_values.to_numpy() pseudodatas.append(pseudodata) if "ASY" in cd.commondataproc: @@ -167,10 +167,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat else: check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) - all_pseudodata = ( - np.concatenate(pseudodatas, axis=0) - + shifts - ) + all_pseudodata = (np.concatenate(pseudodatas, axis=0) + shifts) if np.all(all_pseudodata[np.concatenate(check_positive_masks, axis=0)] >= 0): break From f1c514a95bbeacca3ce8ddf126d269ea2e740d01 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 24 Mar 2022 13:47:59 +0100 Subject: [PATCH 31/64] Separated loops in make_replica --- validphys2/src/validphys/pseudodata.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 04cb9ad207..bc5861c567 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -151,21 +151,23 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat #construct covmat covmat = dataset_inputs_total_covmat covmat_sqrt = sqrt_covmat(covmat) + #Loading the data + pseudodatas = [] + check_positive_masks = [] + for cd in groups_dataset_inputs_loaded_cd_with_cuts: + # copy here to avoid mutating the central values. + pseudodata = cd.central_values.to_numpy() + + pseudodatas.append(pseudodata) + if "ASY" in cd.commondataproc: + check_positive_masks.append(np.zeros_like(pseudodata, dtype=bool)) + else: + check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) # The inner while True loop is for ensuring a positive definite # pseudodata replica + all_pseudodata = [] while True: - pseudodatas = [] - check_positive_masks = [] - for cd in groups_dataset_inputs_loaded_cd_with_cuts: - # copy here to avoid mutating the central values. - pseudodata = cd.central_values.to_numpy() - - pseudodatas.append(pseudodata) - if "ASY" in cd.commondataproc: - check_positive_masks.append(np.zeros_like(pseudodata, dtype=bool)) - else: - check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) all_pseudodata = (np.concatenate(pseudodatas, axis=0) + shifts) if np.all(all_pseudodata[np.concatenate(check_positive_masks, axis=0)] >= 0): From d43b3577e94e99902df425b35e95d2ba0e76ae98 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 24 Mar 2022 14:48:44 +0100 Subject: [PATCH 32/64] solved bug in thcovmat order --- validphys2/src/validphys/covmats.py | 12 +++++++----- validphys2/src/validphys/pseudodata.py | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 7a8ab6b654..0ed2774327 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -342,6 +342,7 @@ def dataset_inputs_t0_covmat_from_systematics( ) def loaded_theory_covmat(output_path, + data_input, theory_covmat_flag, use_user_uncertainties, use_scalevar_uncertainties @@ -360,8 +361,11 @@ def loaded_theory_covmat(output_path, theorypath = output_path/"tables"/generic_path theory_covmat = pd.read_csv(theorypath, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python").fillna(0) - return theory_covmat.values - + #change ordering + tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) + bb = [str(i) for i in data_input] + return tmp.reindex(index=bb, columns=bb, level=0).values + def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, *, @@ -430,9 +434,7 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, _list_of_central_values=dataset_inputs_t0_predictions ) if theory_covmat_flag and use_thcovmat_in_fitting: - #Adding regularization to theory covmat to make sqrt possible - diag_enha = 1.e-6 - return exp_covmat + loaded_theory_covmat*(np.ones_like(loaded_theory_covmat) + diag_enha*np.eye(loaded_theory_covmat.shape[0])) + return exp_covmat + loaded_theory_covmat return exp_covmat diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index bc5861c567..a38db30e0a 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -166,11 +166,11 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat # The inner while True loop is for ensuring a positive definite # pseudodata replica - all_pseudodata = [] + all_pseudodata = np.concatenate(pseudodatas, axis=0) while True: shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) - all_pseudodata = (np.concatenate(pseudodatas, axis=0) + shifts) - if np.all(all_pseudodata[np.concatenate(check_positive_masks, axis=0)] >= 0): + shifted_pseudodata = (all_pseudodata + shifts) + if np.all(shifted_pseudodata[np.concatenate(check_positive_masks, axis=0)] >= 0): break return all_pseudodata From 7e89de08e20fe068c8a4f105ee1e20529a0ae2a8 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 25 Mar 2022 10:35:06 +0100 Subject: [PATCH 33/64] Fixed bug in pseudodata --- validphys2/src/validphys/pseudodata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index a38db30e0a..c5ff901821 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -173,7 +173,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat if np.all(shifted_pseudodata[np.concatenate(check_positive_masks, axis=0)] >= 0): break - return all_pseudodata + return shifted_pseudodata def indexed_make_replica(groups_index, make_replica): From bf4f841b8414f5a109f5cab265def6d3436acb3d Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 25 Mar 2022 10:38:20 +0100 Subject: [PATCH 34/64] Minor change --- validphys2/src/validphys/pseudodata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index c5ff901821..eab6151548 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -167,10 +167,11 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat # The inner while True loop is for ensuring a positive definite # pseudodata replica all_pseudodata = np.concatenate(pseudodatas, axis=0) + full_mask=np.concatenate(check_positive_masks, axis=0) while True: shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) shifted_pseudodata = (all_pseudodata + shifts) - if np.all(shifted_pseudodata[np.concatenate(check_positive_masks, axis=0)] >= 0): + if np.all(shifted_pseudodata[full_mask] >= 0): break return shifted_pseudodata From 684cba59b128e61c8fb2ed21376b0d6af4211862 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 25 Mar 2022 15:42:38 +0100 Subject: [PATCH 35/64] Restoring possibility of separating mult errors for replica generation --- n3fit/src/n3fit/scripts/n3fit_exec.py | 5 +++- validphys2/src/validphys/covmats.py | 24 ++++++++++++------ validphys2/src/validphys/pseudodata.py | 35 ++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 65405facc0..07ed72de9e 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -145,6 +145,7 @@ def from_yaml(cls, o, *args, **kwargs): validation_action = namespace + "validation_pseudodata" N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) + #Theorycovmat_flags N3FIT_FIXED_CONFIG['theory_covmat_flag'] = False N3FIT_FIXED_CONFIG['use_user_uncertainties'] = None N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None @@ -157,8 +158,10 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('use_user_uncertainties') N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties') N3FIT_FIXED_CONFIG['use_t0_sampling'] = False - if(sam_t0:=file_content.get('sampling_t0')) is not None: + N3FIT_FIXED_CONFIG['separate_multiplicative'] = False + if (sam_t0:=file_content.get('sampling_t0')) is not None: N3FIT_FIXED_CONFIG['use_t0_sampling'] = sam_t0.get('use_t0') + N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get('separate_multiplicative') N3FIT_FIXED_CONFIG['use_t0_fitting'] = True if(fit_t0:=file_content.get('fitting_t0')) is not None: N3FIT_FIXED_CONFIG['use_t0_fitting'] = fit_t0.get('use_t0') diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 0ed2774327..4759446962 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -136,6 +136,7 @@ def dataset_inputs_covmat_from_systematics( use_weights_in_covmat=True, norm_threshold=None, _list_of_central_values=None, + only_additive=False, ): """Given a list containing :py:class:`validphys.coredata.CommonData` s, construct the full covariance matrix. @@ -202,7 +203,10 @@ def dataset_inputs_covmat_from_systematics( data_input, _list_of_central_values ): - sys_errors = cd.systematic_errors(central_values) + if only_additive: + sys_errors = cd.additive_errors + else: + sys_errors = cd.systematic_errors(central_values) stat_errors = cd.stat_errors.to_numpy() weights.append(np.full_like(stat_errors, dsinp.weight)) # separate out the special uncertainties which can be correlated across @@ -376,7 +380,8 @@ def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, loaded_theory_covmat, theory_covmat_flag, use_thcovmat_in_sampling, - use_t0_sampling + use_t0_sampling, + separate_multiplicative, ): if use_t0_sampling is False: covmat = dataset_inputs_t0_total_covmat( @@ -388,7 +393,8 @@ def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, loaded_theory_covmat=loaded_theory_covmat, theory_covmat_flag=theory_covmat_flag, use_thcovmat_in_fitting=use_thcovmat_in_sampling, - use_t0_fitting=use_t0_sampling + use_t0_fitting=use_t0_sampling, + only_add = separate_multiplicative ) else: covmat = dataset_inputs_t0_total_covmat( @@ -400,7 +406,8 @@ def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, loaded_theory_covmat=loaded_theory_covmat, theory_covmat_flag=theory_covmat_flag, use_thcovmat_in_fitting=use_thcovmat_in_sampling, - use_t0_fitting=use_t0_sampling + use_t0_fitting=use_t0_sampling, + only_add = separate_multiplicative ) return covmat @@ -415,7 +422,8 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, loaded_theory_covmat, theory_covmat_flag, use_thcovmat_in_fitting, - use_t0_fitting + use_t0_fitting, + only_add=False, ): if use_t0_fitting is False: exp_covmat = dataset_inputs_covmat_from_systematics( @@ -423,7 +431,8 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold=norm_threshold, - _list_of_central_values=None + _list_of_central_values=None, + only_additive = only_add ) else: exp_covmat = dataset_inputs_covmat_from_systematics( @@ -431,7 +440,8 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold=norm_threshold, - _list_of_central_values=dataset_inputs_t0_predictions + _list_of_central_values=dataset_inputs_t0_predictions, + only_additive = only_add ) if theory_covmat_flag and use_thcovmat_in_fitting: return exp_covmat + loaded_theory_covmat diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index eab6151548..2c08d9a006 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -101,7 +101,7 @@ def read_replica_pseudodata(fit, context_index, replica): return DataTrValSpec(pseudodata.drop("type", axis=1), tr.index, val.index) -def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_total_covmat, genrep=True): +def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_total_covmat, separate_multiplicative, genrep=True, ): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for possible correlations between systematic uncertainties. @@ -154,11 +154,21 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat #Loading the data pseudodatas = [] check_positive_masks = [] + nonspecial_mult = [] + special_mult = [] for cd in groups_dataset_inputs_loaded_cd_with_cuts: # copy here to avoid mutating the central values. pseudodata = cd.central_values.to_numpy() pseudodatas.append(pseudodata) + if separate_multiplicative: + mult_errors = cd.multiplicative_errors + mult_uncorr_errors = mult_errors.loc[:, mult_errors.columns == "UNCORR"].to_numpy() + mult_corr_errors = mult_errors.loc[:, mult_errors.columns == "CORR"].to_numpy() + nonspecial_mult.append( (mult_uncorr_errors, mult_corr_errors) ) + special_mult.append( + mult_errors.loc[:, ~mult_errors.columns.isin(INTRA_DATASET_SYS_NAME)] + ) if "ASY" in cd.commondataproc: check_positive_masks.append(np.zeros_like(pseudodata, dtype=bool)) else: @@ -166,11 +176,32 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat # The inner while True loop is for ensuring a positive definite # pseudodata replica + if separate_multiplicative: + special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() all_pseudodata = np.concatenate(pseudodatas, axis=0) full_mask=np.concatenate(check_positive_masks, axis=0) while True: + if separate_multiplicative: + mult_shifts = [] + # Prepare the per-dataset multiplicative shifts + for mult_uncorr_errors, mult_corr_errors in nonspecial_mult: + # convert to from percent to fraction + mult_shift = ( + 1 + mult_uncorr_errors * rng.normal(size=mult_uncorr_errors.shape) / 100 + ).prod(axis=1) + + mult_shift *= ( + 1 + mult_corr_errors * rng.normal(size=(1, mult_corr_errors.shape[1])) / 100 + ).prod(axis=1) + + mult_shifts.append(mult_shift) + special_mult = (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) + shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) - shifted_pseudodata = (all_pseudodata + shifts) + mult_part = 1. + if separate_multiplicative: + mult_part = np.concatenate(mult_shifts, axis=0)*special_mult + shifted_pseudodata = (all_pseudodata*mult_part + shifts) if np.all(shifted_pseudodata[full_mask] >= 0): break From 0497427394b65f16aaabf990ccb8d750481f644e Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 25 Mar 2022 15:55:33 +0100 Subject: [PATCH 36/64] Providing defaults --- n3fit/src/n3fit/scripts/n3fit_exec.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 07ed72de9e..f17274bdbc 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -145,23 +145,25 @@ def from_yaml(cls, o, *args, **kwargs): validation_action = namespace + "validation_pseudodata" N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) - #Theorycovmat_flags + #Theorycovmat flags and defaults N3FIT_FIXED_CONFIG['theory_covmat_flag'] = False N3FIT_FIXED_CONFIG['use_user_uncertainties'] = None N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = None N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = None + import ipdb; ipdb.set_trace() if (thconfig:=file_content.get('theorycovmatconfig')) is not None: N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True - N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = thconfig.get('use_thcovmat_in_fitting') - N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = thconfig.get('use_thcovmat_in_sampling') - N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('use_user_uncertainties') - N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties') + N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = thconfig.get('use_thcovmat_in_fitting') if thconfig.get('use_thcovmat_in_fitting') is not None else True + N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = thconfig.get('use_thcovmat_in_sampling') if thconfig.get('use_thcovmat_in_sampling') is not None else True + N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('use_user_uncertainties') if thconfig.get('use_user_uncertainties') is not None else False + N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties') if thconfig.get('use_scalevar_uncertainties') is not None else True + #Sampling flags and defaults N3FIT_FIXED_CONFIG['use_t0_sampling'] = False N3FIT_FIXED_CONFIG['separate_multiplicative'] = False if (sam_t0:=file_content.get('sampling_t0')) is not None: - N3FIT_FIXED_CONFIG['use_t0_sampling'] = sam_t0.get('use_t0') - N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get('separate_multiplicative') + N3FIT_FIXED_CONFIG['use_t0_sampling'] = sam_t0.get('use_t0') if sam_t0.get('use_t0') is not None else False + N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get('separate_multiplicative') if sam_t0.get('separate_multiplicative') is not None else False N3FIT_FIXED_CONFIG['use_t0_fitting'] = True if(fit_t0:=file_content.get('fitting_t0')) is not None: N3FIT_FIXED_CONFIG['use_t0_fitting'] = fit_t0.get('use_t0') From f52c492f72497dabea37b6738fd28554989fd229 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 25 Mar 2022 16:03:37 +0100 Subject: [PATCH 37/64] Changed flags names and added comments --- n3fit/src/n3fit/scripts/n3fit_exec.py | 5 +++-- validphys2/src/validphys/covmats.py | 9 +++++---- validphys2/src/validphys/pseudodata.py | 12 ++++++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index f17274bdbc..6cdc7e2cd0 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -161,11 +161,12 @@ def from_yaml(cls, o, *args, **kwargs): #Sampling flags and defaults N3FIT_FIXED_CONFIG['use_t0_sampling'] = False N3FIT_FIXED_CONFIG['separate_multiplicative'] = False - if (sam_t0:=file_content.get('sampling_t0')) is not None: + if (sam_t0:=file_content.get('sampling')) is not None: N3FIT_FIXED_CONFIG['use_t0_sampling'] = sam_t0.get('use_t0') if sam_t0.get('use_t0') is not None else False N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get('separate_multiplicative') if sam_t0.get('separate_multiplicative') is not None else False N3FIT_FIXED_CONFIG['use_t0_fitting'] = True - if(fit_t0:=file_content.get('fitting_t0')) is not None: + #Fitting flags and defaults + if(fit_t0:=file_content.get('fitting')) is not None: N3FIT_FIXED_CONFIG['use_t0_fitting'] = fit_t0.get('use_t0') file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 4759446962..83c19bf40b 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -203,6 +203,7 @@ def dataset_inputs_covmat_from_systematics( data_input, _list_of_central_values ): + #used if we want to separate additive and multiplicative errors in make_replica if only_additive: sys_errors = cd.additive_errors else: @@ -353,7 +354,7 @@ def loaded_theory_covmat(output_path, ): if theory_covmat_flag is False: return np.array([]) - + #Load correct file according to how the thcovmat was generated by vp-setupfit generic_path = None if use_user_uncertainties is True: if use_scalevar_uncertainties is True: @@ -365,12 +366,12 @@ def loaded_theory_covmat(output_path, theorypath = output_path/"tables"/generic_path theory_covmat = pd.read_csv(theorypath, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python").fillna(0) - #change ordering + #change ordering according to exp_covmat (so according to runcard order) tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) bb = [str(i) for i in data_input] return tmp.reindex(index=bb, columns=bb, level=0).values - +#Function called by make_replica def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, *, data_input, @@ -412,7 +413,7 @@ def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, return covmat - +#Function called by n3fit_data def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, *, data_input, diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 2c08d9a006..3dc6dc7f15 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -161,6 +161,8 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat pseudodata = cd.central_values.to_numpy() pseudodatas.append(pseudodata) + #Separation of multiplicative errors. If separate_multiplicative is True also the exp_covmat is produced + # without multiplicative errors if separate_multiplicative: mult_errors = cd.multiplicative_errors mult_uncorr_errors = mult_errors.loc[:, mult_errors.columns == "UNCORR"].to_numpy() @@ -173,13 +175,13 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat check_positive_masks.append(np.zeros_like(pseudodata, dtype=bool)) else: check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) - - # The inner while True loop is for ensuring a positive definite - # pseudodata replica + #concatenating special multiplicative errors, pseudodatas and positive mask if separate_multiplicative: special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() all_pseudodata = np.concatenate(pseudodatas, axis=0) full_mask=np.concatenate(check_positive_masks, axis=0) + # The inner while True loop is for ensuring a positive definite + # pseudodata replica while True: if separate_multiplicative: mult_shifts = [] @@ -196,12 +198,14 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat mult_shifts.append(mult_shift) special_mult = (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) - + #Additive shifts (if separate_multiplicative is True) or total shifts (if separate_multiplicative is False) shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) mult_part = 1. if separate_multiplicative: mult_part = np.concatenate(mult_shifts, axis=0)*special_mult + #Shifting pseudodata shifted_pseudodata = (all_pseudodata*mult_part + shifts) + #positivity control if np.all(shifted_pseudodata[full_mask] >= 0): break From d6158062f0e83ec638586c40dc333ce3f1ff9e7f Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 25 Mar 2022 17:09:24 +0100 Subject: [PATCH 38/64] Fixing flags and covmats --- n3fit/src/n3fit/scripts/n3fit_exec.py | 19 +++---- validphys2/src/validphys/covmats.py | 69 +++++++------------------- validphys2/src/validphys/pseudodata.py | 36 +++++++------- 3 files changed, 45 insertions(+), 79 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 6cdc7e2cd0..73dc544b71 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -151,23 +151,20 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = None N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = None - import ipdb; ipdb.set_trace() if (thconfig:=file_content.get('theorycovmatconfig')) is not None: N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True - N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = thconfig.get('use_thcovmat_in_fitting') if thconfig.get('use_thcovmat_in_fitting') is not None else True - N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = thconfig.get('use_thcovmat_in_sampling') if thconfig.get('use_thcovmat_in_sampling') is not None else True - N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('use_user_uncertainties') if thconfig.get('use_user_uncertainties') is not None else False - N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties') if thconfig.get('use_scalevar_uncertainties') is not None else True + N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = thconfig.get('use_thcovmat_in_fitting', True) + N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = thconfig.get('use_thcovmat_in_sampling', True) + N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('use_user_uncertainties', False) + N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties', True) #Sampling flags and defaults N3FIT_FIXED_CONFIG['use_t0_sampling'] = False N3FIT_FIXED_CONFIG['separate_multiplicative'] = False if (sam_t0:=file_content.get('sampling')) is not None: - N3FIT_FIXED_CONFIG['use_t0_sampling'] = sam_t0.get('use_t0') if sam_t0.get('use_t0') is not None else False - N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get('separate_multiplicative') if sam_t0.get('separate_multiplicative') is not None else False - N3FIT_FIXED_CONFIG['use_t0_fitting'] = True - #Fitting flags and defaults - if(fit_t0:=file_content.get('fitting')) is not None: - N3FIT_FIXED_CONFIG['use_t0_fitting'] = fit_t0.get('use_t0') + N3FIT_FIXED_CONFIG['use_t0_sampling'] = sam_t0.get('use_t0', False) + N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get('separate_multiplicative', False) + #Fitting flag + N3FIT_FIXED_CONFIG['use_t0_fitting'] = file_content.get('fitting').get('use_t0', True) file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 83c19bf40b..0a4e660b06 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -136,7 +136,7 @@ def dataset_inputs_covmat_from_systematics( use_weights_in_covmat=True, norm_threshold=None, _list_of_central_values=None, - only_additive=False, + _only_additive=False, ): """Given a list containing :py:class:`validphys.coredata.CommonData` s, construct the full covariance matrix. @@ -204,7 +204,7 @@ def dataset_inputs_covmat_from_systematics( _list_of_central_values ): #used if we want to separate additive and multiplicative errors in make_replica - if only_additive: + if _only_additive: sys_errors = cd.additive_errors else: sys_errors = cd.systematic_errors(central_values) @@ -372,7 +372,7 @@ def loaded_theory_covmat(output_path, return tmp.reindex(index=bb, columns=bb, level=0).values #Function called by make_replica -def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, +def dataset_inputs_sampling_covmat(dataset_inputs_loaded_cd_with_cuts, *, data_input, use_weights_in_covmat=True, @@ -384,33 +384,9 @@ def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, use_t0_sampling, separate_multiplicative, ): - if use_t0_sampling is False: - covmat = dataset_inputs_t0_total_covmat( - dataset_inputs_loaded_cd_with_cuts, - data_input=data_input, - use_weights_in_covmat=use_weights_in_covmat, - norm_threshold=norm_threshold, - dataset_inputs_t0_predictions=None, - loaded_theory_covmat=loaded_theory_covmat, - theory_covmat_flag=theory_covmat_flag, - use_thcovmat_in_fitting=use_thcovmat_in_sampling, - use_t0_fitting=use_t0_sampling, - only_add = separate_multiplicative - ) - else: - covmat = dataset_inputs_t0_total_covmat( - dataset_inputs_loaded_cd_with_cuts, - data_input=data_input, - use_weights_in_covmat=use_weights_in_covmat, - norm_threshold=norm_threshold, - dataset_inputs_t0_predictions=dataset_inputs_t0_predictions, - loaded_theory_covmat=loaded_theory_covmat, - theory_covmat_flag=theory_covmat_flag, - use_thcovmat_in_fitting=use_thcovmat_in_sampling, - use_t0_fitting=use_t0_sampling, - only_add = separate_multiplicative - ) - + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions if use_t0_sampling is True else None, separate_multiplicative ) + if theory_covmat_flag and use_thcovmat_in_sampling: + covmat += loaded_theory_covmat return covmat #Function called by n3fit_data @@ -426,27 +402,20 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, use_t0_fitting, only_add=False, ): - if use_t0_fitting is False: - exp_covmat = dataset_inputs_covmat_from_systematics( - dataset_inputs_loaded_cd_with_cuts, - data_input, - use_weights_in_covmat, - norm_threshold=norm_threshold, - _list_of_central_values=None, - only_additive = only_add - ) - else: - exp_covmat = dataset_inputs_covmat_from_systematics( - dataset_inputs_loaded_cd_with_cuts, - data_input, - use_weights_in_covmat, - norm_threshold=norm_threshold, - _list_of_central_values=dataset_inputs_t0_predictions, - only_additive = only_add - ) + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions if use_t0_fitting is True else None, only_add) if theory_covmat_flag and use_thcovmat_in_fitting: - return exp_covmat + loaded_theory_covmat - return exp_covmat + covmat += loaded_theory_covmat + return covmat + +def generate_exp_covmat(datasets_input, data, use_weights, norm_thre, _list_of_c_values, only_add): + return dataset_inputs_covmat_from_systematics( + datasets_input, + data, + use_weights, + norm_threshold=norm_thre, + _list_of_central_values=_list_of_c_values, + _only_additive = only_add + ) def sqrt_covmat(covariance_matrix): diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 3dc6dc7f15..f57213eb7f 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -101,7 +101,7 @@ def read_replica_pseudodata(fit, context_index, replica): return DataTrValSpec(pseudodata.drop("type", axis=1), tr.index, val.index) -def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_total_covmat, separate_multiplicative, genrep=True, ): +def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_sampling_covmat, separate_multiplicative, genrep=True, ): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for possible correlations between systematic uncertainties. @@ -149,7 +149,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat name_seed = int(hashlib.sha256(name_salt.encode()).hexdigest(), 16) % 10 ** 8 rng = np.random.default_rng(seed=replica_mcseed+name_seed) #construct covmat - covmat = dataset_inputs_total_covmat + covmat = dataset_inputs_sampling_covmat covmat_sqrt = sqrt_covmat(covmat) #Loading the data pseudodatas = [] @@ -183,28 +183,28 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat # The inner while True loop is for ensuring a positive definite # pseudodata replica while True: - if separate_multiplicative: - mult_shifts = [] - # Prepare the per-dataset multiplicative shifts - for mult_uncorr_errors, mult_corr_errors in nonspecial_mult: - # convert to from percent to fraction - mult_shift = ( - 1 + mult_uncorr_errors * rng.normal(size=mult_uncorr_errors.shape) / 100 - ).prod(axis=1) - - mult_shift *= ( - 1 + mult_corr_errors * rng.normal(size=(1, mult_corr_errors.shape[1])) / 100 - ).prod(axis=1) - - mult_shifts.append(mult_shift) - special_mult = (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) + mult_shifts = [] + # Prepare the per-dataset multiplicative shifts + for mult_uncorr_errors, mult_corr_errors in nonspecial_mult: + # convert to from percent to fraction + mult_shift = ( + 1 + mult_uncorr_errors * rng.normal(size=mult_uncorr_errors.shape) / 100 + ).prod(axis=1) + + mult_shift *= ( + 1 + mult_corr_errors * rng.normal(size=(1, mult_corr_errors.shape[1])) / 100 + ).prod(axis=1) + + mult_shifts.append(mult_shift) + #Additive shifts (if separate_multiplicative is True) or total shifts (if separate_multiplicative is False) shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) mult_part = 1. if separate_multiplicative: + special_mult = (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) mult_part = np.concatenate(mult_shifts, axis=0)*special_mult #Shifting pseudodata - shifted_pseudodata = (all_pseudodata*mult_part + shifts) + shifted_pseudodata = (all_pseudodata + shifts)*mult_part #positivity control if np.all(shifted_pseudodata[full_mask] >= 0): break From 585e69a8a6ac404901df9dfb4cac367868796c89 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 25 Mar 2022 17:40:11 +0100 Subject: [PATCH 39/64] Changed default --- n3fit/src/n3fit/scripts/n3fit_exec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 73dc544b71..4da1f2db0d 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -159,10 +159,10 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties', True) #Sampling flags and defaults N3FIT_FIXED_CONFIG['use_t0_sampling'] = False - N3FIT_FIXED_CONFIG['separate_multiplicative'] = False + N3FIT_FIXED_CONFIG['separate_multiplicative'] = True if (sam_t0:=file_content.get('sampling')) is not None: N3FIT_FIXED_CONFIG['use_t0_sampling'] = sam_t0.get('use_t0', False) - N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get('separate_multiplicative', False) + N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get('separate_multiplicative', True) #Fitting flag N3FIT_FIXED_CONFIG['use_t0_fitting'] = file_content.get('fitting').get('use_t0', True) file_content.update(N3FIT_FIXED_CONFIG) From 9a1e9f63a0da1eb56a426ced43ae2b6bcbe4f677 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 28 Mar 2022 12:02:21 +0200 Subject: [PATCH 40/64] Added new sampling flag in runcards --- n3fit/runcards/examples/Basic_feature_scaling.yml | 4 +++- n3fit/runcards/examples/Basic_hyperopt.yml | 4 ++++ n3fit/runcards/examples/Basic_runcard.yml | 4 ++++ n3fit/runcards/examples/Basic_runcard_parallel.yml | 3 +++ n3fit/runcards/examples/DIS_diagonal_l2reg_example.yml | 4 ++++ n3fit/runcards/examples/developing.yml | 4 ++++ .../examples/theory_covariance/Fit_with_theory_covmat.yml | 5 ++--- 7 files changed, 24 insertions(+), 4 deletions(-) diff --git a/n3fit/runcards/examples/Basic_feature_scaling.yml b/n3fit/runcards/examples/Basic_feature_scaling.yml index 13a7f11f82..e5e305e880 100644 --- a/n3fit/runcards/examples/Basic_feature_scaling.yml +++ b/n3fit/runcards/examples/Basic_feature_scaling.yml @@ -32,7 +32,9 @@ datacuts: ############################################################ theory: theoryid: 200 # database id - +sampling: + use_t0: false + separate_multiplicative: true ############################################################ trvlseed: 1 nnseed: 2 diff --git a/n3fit/runcards/examples/Basic_hyperopt.yml b/n3fit/runcards/examples/Basic_hyperopt.yml index a8de9c80f4..6c69cbffd8 100644 --- a/n3fit/runcards/examples/Basic_hyperopt.yml +++ b/n3fit/runcards/examples/Basic_hyperopt.yml @@ -48,6 +48,10 @@ datacuts: theory: theoryid: 200 # database id +sampling: + use_t0: false + separate_multiplicative: true + hyperscan_config: stopping: min_epochs: 1e3 diff --git a/n3fit/runcards/examples/Basic_runcard.yml b/n3fit/runcards/examples/Basic_runcard.yml index 062beb3094..53159b8c86 100644 --- a/n3fit/runcards/examples/Basic_runcard.yml +++ b/n3fit/runcards/examples/Basic_runcard.yml @@ -33,6 +33,10 @@ datacuts: theory: theoryid: 200 # database id +sampling: + use_t0: false + separate_multiplicative: true + parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [15, 10, 8] activation_per_layer: ['sigmoid', 'sigmoid', 'linear'] diff --git a/n3fit/runcards/examples/Basic_runcard_parallel.yml b/n3fit/runcards/examples/Basic_runcard_parallel.yml index 837e70ac37..2a17f8f384 100644 --- a/n3fit/runcards/examples/Basic_runcard_parallel.yml +++ b/n3fit/runcards/examples/Basic_runcard_parallel.yml @@ -34,6 +34,9 @@ datacuts: theory: theoryid: 162 # database id +sampling: + use_t0: false + separate_multiplicative: true ############################################################ trvlseed: 1 nnseed: 2 diff --git a/n3fit/runcards/examples/DIS_diagonal_l2reg_example.yml b/n3fit/runcards/examples/DIS_diagonal_l2reg_example.yml index 1c76cc3697..1300558b4e 100644 --- a/n3fit/runcards/examples/DIS_diagonal_l2reg_example.yml +++ b/n3fit/runcards/examples/DIS_diagonal_l2reg_example.yml @@ -56,6 +56,10 @@ datacuts: theory: theoryid: 162 # database id +sampling: + use_t0: false + separate_multiplicative: true + ############################################################ trvlseed: 1 nnseed: 2 diff --git a/n3fit/runcards/examples/developing.yml b/n3fit/runcards/examples/developing.yml index 3a174f19ad..643492e353 100644 --- a/n3fit/runcards/examples/developing.yml +++ b/n3fit/runcards/examples/developing.yml @@ -62,6 +62,10 @@ datacuts: theory: theoryid: 200 # database id +sampling: + use_t0: false + separate_multiplicative: true + ############################################################ trvlseed: 1 nnseed: 2 diff --git a/validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml b/validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml index 2a2bed7222..7fc96563f4 100644 --- a/validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml +++ b/validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml @@ -84,10 +84,9 @@ theorycovmatconfig: use_thcovmat_in_fitting: true use_thcovmat_in_sampling: true -sampling_t0: +sampling: use_t0: false -fitting_t0: - use_t0: true + separate_multiplicative: false ############################################################ trvlseed: 376191634 From 166873c78b21160f58b526428880939b0ec27788 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Wed, 30 Mar 2022 18:33:02 +0200 Subject: [PATCH 41/64] Starting fix of tests --- validphys2/src/validphys/tests/conftest.py | 2 +- .../validphys/tests/test_pythonmakereplica.py | 45 ++++++++++++++++--- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py index cce61eb541..c96d8b70bc 100644 --- a/validphys2/src/validphys/tests/conftest.py +++ b/validphys2/src/validphys/tests/conftest.py @@ -51,7 +51,7 @@ def tmp(tmpdir): THEORYID = 162 FIT = "NNPDF40_nnlo_lowprecision" FIT_ITERATED = "NNPDF40_nnlo_low_precision_iterated" -PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_220208" +PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_220330" base_config = dict( pdf=PDF, diff --git a/validphys2/src/validphys/tests/test_pythonmakereplica.py b/validphys2/src/validphys/tests/test_pythonmakereplica.py index 429628e46d..fdd706d271 100644 --- a/validphys2/src/validphys/tests/test_pythonmakereplica.py +++ b/validphys2/src/validphys/tests/test_pythonmakereplica.py @@ -12,6 +12,7 @@ from validphys.api import API from validphys.pseudodata import make_replica +from validphys.covmats import dataset_inputs_sampling_covmat from validphys.tests.conftest import DATA from validphys.tests.test_covmats import CORR_DATA @@ -21,9 +22,9 @@ SINGLE_SYS_DATASETS = [ {"dataset": "DYE886R"}, {"dataset": "D0ZRAP", "cfac": ["QCD"]}, - {"dataset": "ATLAS_SINGLETOP_TCH_R_13TEV", "cfac": ["QCD"]}, - {"dataset": "CMS_SINGLETOP_TCH_R_8TEV", "cfac": ["QCD"]}, - {"dataset": "CMS_SINGLETOP_TCH_R_13TEV", "cfac": ["QCD"]}, + # {"dataset": "ATLAS_SINGLETOP_TCH_R_13TEV", "cfac": ["QCD"]}, + # {"dataset": "CMS_SINGLETOP_TCH_R_8TEV", "cfac": ["QCD"]}, + # {"dataset": "CMS_SINGLETOP_TCH_R_13TEV", "cfac": ["QCD"]}, ] @@ -38,14 +39,24 @@ def test_commondata_unchanged(data_config, dataset_inputs, use_cuts): config = dict(data_config) config["dataset_inputs"] = dataset_inputs config["use_cuts"] = use_cuts + config["replica_mcseed"] = SEED + config["use_t0_sampling"] = False + config["separate_multiplicative"]= True + config["output_path"] = None + config["theory_covmat_flag"] = False + config["use_user_uncertainties"] = None + config["use_scalevar_uncertainties"] = None + config["use_thcovmat_in_sampling"]=None + config["dataset_t0_predictions"] = None ld_cds = API.dataset_inputs_loaded_cd_with_cuts(**config) # keep a copy of all dataframes/series pre make replica pre_mkrep_cvs = [deepcopy(cd.central_values) for cd in ld_cds] pre_mkrep_sys_tabs = [deepcopy(cd.systematics_table) for cd in ld_cds] pre_mkrep_cd_tabs = [deepcopy(cd.commondata_table) for cd in ld_cds] - - make_replica(ld_cds, SEED) + + + make_replica = API.make_replica(**config) for post_mkrep_cd, pre_mkrep_cv in zip(ld_cds, pre_mkrep_cvs): assert_series_equal(post_mkrep_cd.central_values, pre_mkrep_cv) @@ -70,6 +81,14 @@ def test_pseudodata_seeding(data_config, dataset_inputs, use_cuts): config["dataset_inputs"] = dataset_inputs config["use_cuts"] = use_cuts config["replica_mcseed"] = SEED + config["use_t0_sampling"] = False + config["separate_multiplicative"]= True + config["output_path"] = None + config["theory_covmat_flag"] = False + config["use_user_uncertainties"] = None + config["use_scalevar_uncertainties"] = None + config["use_thcovmat_in_sampling"]=None + config["dataset_t0_predictions"] = None rep_1 = API.make_replica(**config) rep_2 = API.make_replica(**config) np.testing.assert_allclose(rep_1, rep_2) @@ -83,6 +102,14 @@ def test_pseudodata_has_correct_ndata(data_config, dataset_inputs, use_cuts): config["dataset_inputs"] = dataset_inputs config["use_cuts"] = use_cuts config["replica_mcseed"] = SEED + config["use_t0_sampling"] = False + config["separate_multiplicative"]= True + config["output_path"] = None + config["theory_covmat_flag"] = False + config["use_user_uncertainties"] = None + config["use_scalevar_uncertainties"] = None + config["use_thcovmat_in_sampling"]=None + config["dataset_t0_predictions"] = None ld_cds = API.dataset_inputs_loaded_cd_with_cuts(**config) rep = API.make_replica(**config) ndata = np.sum([cd.ndata for cd in ld_cds]) @@ -97,6 +124,14 @@ def test_genrep_off(data_config, dataset_inputs, use_cuts): config["dataset_inputs"] = dataset_inputs config["use_cuts"] = use_cuts config["replica_mcseed"] = SEED + config["use_t0_sampling"] = False + config["separate_multiplicative"]= True + config["output_path"] = None + config["theory_covmat_flag"] = False + config["use_user_uncertainties"] = None + config["use_scalevar_uncertainties"] = None + config["use_thcovmat_in_sampling"]=None + config["dataset_t0_predictions"] = None config["genrep"] = False ld_cds = API.dataset_inputs_loaded_cd_with_cuts(**config) not_replica = API.make_replica(**config) From 05d3d619dbc20ba927e6b251183c280499f2edb1 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 31 Mar 2022 09:44:19 +0200 Subject: [PATCH 42/64] Fixed test_pythonmakereplica --- validphys2/src/validphys/tests/test_pythonmakereplica.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/tests/test_pythonmakereplica.py b/validphys2/src/validphys/tests/test_pythonmakereplica.py index fdd706d271..649cb94870 100644 --- a/validphys2/src/validphys/tests/test_pythonmakereplica.py +++ b/validphys2/src/validphys/tests/test_pythonmakereplica.py @@ -22,9 +22,9 @@ SINGLE_SYS_DATASETS = [ {"dataset": "DYE886R"}, {"dataset": "D0ZRAP", "cfac": ["QCD"]}, - # {"dataset": "ATLAS_SINGLETOP_TCH_R_13TEV", "cfac": ["QCD"]}, - # {"dataset": "CMS_SINGLETOP_TCH_R_8TEV", "cfac": ["QCD"]}, - # {"dataset": "CMS_SINGLETOP_TCH_R_13TEV", "cfac": ["QCD"]}, + {"dataset": "NMC"}, + {"dataset": "NMCPD"}, + {"dataset": "ATLASZPT8TEVMDIST", "cfac": ["QCD"], } ] From 419e9b02603c909c05f89bf0270c24be8660ccbd Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 31 Mar 2022 09:56:04 +0200 Subject: [PATCH 43/64] Fixed test_pseudodata --- .../src/validphys/tests/test_pseudodata.py | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 413975af7c..cfffc124c4 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -38,8 +38,17 @@ def test_read_pdf_pseudodata(): def test_recreate_fit_pseudodata(): - fit_pseudodata = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT) - + config = {} + config["use_t0_sampling"] = False + config["separate_multiplicative"]= True + config["output_path"] = None + config["theory_covmat_flag"] = False + config["use_user_uncertainties"] = None + config["use_scalevar_uncertainties"] = None + config["use_thcovmat_in_sampling"]=None + config["dataset_t0_predictions"] = None + + fit_pseudodata = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT, **config) nrep = API.num_fitted_replicas(fit=PSEUDODATA_FIT) assert nrep == len(fit_pseudodata) @@ -49,7 +58,17 @@ def test_recreate_fit_pseudodata(): def test_recreate_pdf_pseudodata(): - pdf_pseudodata = API.recreate_pdf_pseudodata(fit=PSEUDODATA_FIT) + config = {} + config["use_t0_sampling"] = False + config["separate_multiplicative"]= True + config["output_path"] = None + config["theory_covmat_flag"] = False + config["use_user_uncertainties"] = None + config["use_scalevar_uncertainties"] = None + config["use_thcovmat_in_sampling"]=None + config["dataset_t0_predictions"] = None + + pdf_pseudodata = API.recreate_pdf_pseudodata(fit=PSEUDODATA_FIT, **config) pdf = API.pdf(pdf=PSEUDODATA_FIT) # -1 because we ignore replica 0 @@ -70,8 +89,18 @@ def test_no_savepseudodata(): def test_read_matches_recreate(): + config = {} + config["use_t0_sampling"] = False + config["separate_multiplicative"]= True + config["output_path"] = None + config["theory_covmat_flag"] = False + config["use_user_uncertainties"] = None + config["use_scalevar_uncertainties"] = None + config["use_thcovmat_in_sampling"]=None + config["dataset_t0_predictions"] = None + reads = API.read_fit_pseudodata(fit=PSEUDODATA_FIT) - recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT) + recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT, **config) for read, recreate in zip(reads, recreates): # We ignore the absolute ordering of the dataframes and just check # that they contain identical elements. From 0ce016bf2885c1152c2f7bb056da7e149498c4e1 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 31 Mar 2022 10:40:58 +0200 Subject: [PATCH 44/64] Fixed regressions test --- .../regressions/test_art_rep_generation.csv | 532 +++++++++--------- .../tests/regressions/test_mcreplica.csv | 532 +++++++++--------- .../src/validphys/tests/test_regressions.py | 18 + 3 files changed, 550 insertions(+), 532 deletions(-) diff --git a/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv b/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv index 2218a3d302..f2b40b260b 100644 --- a/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv +++ b/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv @@ -1,267 +1,267 @@ rep0 -0 633490.4999226296 -1 650261.4618599716 -2 688011.4163880365 -3 664091.4097540335 -4 693555.2641191484 -5 695715.4655630704 -6 670367.1509971565 -7 692724.5263929808 -8 710404.2079007125 -9 670571.228296545 -10 619448.305414312 -11 491397.84974347835 -12 495948.11467374745 -13 512684.37198035215 -14 475283.89356928284 -15 470365.38567149243 -16 469487.547361425 -17 420953.3285008754 -18 425338.16010336106 -19 413137.7442684508 -20 390485.1231199583 -21 374509.76609374065 -22 143091.91023026235 -23 139931.3658359923 -24 136957.38229116378 -25 127472.41710436667 -26 120202.13033124941 -27 111275.00387771968 -28 103809.81938520276 -29 59552.351903938026 -30 209.37695989099717 -31 98.40316902040941 -32 49.2497740869741 -33 27.58434933202042 -34 18.35526412499345 -35 10.554875054776277 -36 7.777557841643708 -37 4.417052698318435 -38 1.5742575378573114 -39 0.422612825478562 -40 0.13682132397836097 -41 0.03510263367447681 -42 0.004698257316456899 -43 11890.155989445098 -44 22359.68289507337 -45 14101.342891956067 -46 6644.69729342571 -47 2822.6308894773833 -48 1233.8596869120365 -49 579038.4258179407 -50 576528.3107445787 -51 581701.7689447994 -52 588179.7102308077 -53 586945.2879102776 -54 604111.3407156208 -55 593258.6487213245 -56 603644.5177219567 -57 608495.1908181651 -58 595509.225319293 -59 561644.3506746119 -60 436237.8328453793 -61 435556.63818005344 -62 431171.1027183144 -63 425292.60897198925 -64 415586.54611315456 -65 405922.60305236233 -66 387996.23035792325 -67 378919.66171730385 -68 365187.23098249844 -69 345762.7628760283 -70 320757.44589791953 -71 136209.6577787449 -72 135408.40065666087 -73 134316.25944452107 -74 133348.61881629226 -75 132887.679683866 -76 129235.91741349488 -77 120553.29128631057 -78 107942.42572728968 -79 90329.54699860106 -80 69302.74039584062 -81 45839.59047891375 -82 22432.82813378355 -83 9204.06801064637 -84 2724.250400232874 -85 1001.5846561665476 -86 433.63971386587485 -87 207.72541129939756 -88 102.3527345822488 -89 58.628672392597316 -90 28.50573482692631 -91 13.229832355932025 -92 0.5546100674751899 -93 9170.664054514693 -94 2661.1195560543483 -95 992.6102343262079 -96 417.5115980431531 -97 203.73258445861532 -98 101.88763674123366 -99 56.2673681686863 -100 28.900503278349674 -101 13.009329569045816 -102 0.5764836327549459 -103 8568.748117623398 -104 2416.53129892463 -105 887.5757353714051 -106 394.4640262463605 -107 190.12804409242128 -108 98.08182648251439 -109 51.761056671616444 -110 26.527093697402574 -111 12.676558387722144 -112 0.5027289657279904 -113 6465.658336294937 -114 1800.551276557205 -115 666.0656986190276 -116 300.32016624475597 -117 152.2745389765957 -118 77.58629999369195 -119 44.07851992478712 -120 23.30915949239041 -121 10.17717272646144 -122 0.4651782628151642 -123 3466.7287990772666 -124 960.8232085350094 -125 358.08945526237744 -126 163.4472817553788 -127 84.17962445230033 -128 42.59359310555523 -129 26.95446134124415 -130 13.366775641441505 -131 6.607137755402402 -132 0.2472836246490158 -133 2691.727704806833 -134 1216.1771113589175 -135 581.1143546293026 -136 283.2352804587056 -137 148.2496696025246 -138 77.96068412499022 -139 42.38768130911187 -140 24.29352252952324 -141 13.205430976599937 -142 7.349502782237547 -143 4.204767506027611 -144 2.4556061322752907 -145 1.448595872394093 -146 0.817431643220775 -147 0.4841028836507029 -148 0.29992720050763594 -149 0.17031987066085846 -150 0.10120543307860101 -151 0.06134131461370317 -152 0.03658041578528522 -153 0.021345041671104976 -154 0.012748720313588528 -155 0.007079082347174064 -156 0.003920213438042602 -157 0.002266670398859797 -158 0.0014068623785516484 -159 0.0007513603856672534 -160 0.00033740998986572925 -161 0.00016370264643019477 -162 0.00010113687190000773 -163 4.525970514575044e-05 -164 1.8064765076633583e-05 -165 1.434493690063622e-06 -166 2466.826648405205 -167 1113.2742930039678 -168 535.9468272506306 -169 250.19429369801443 -170 132.96755857265154 -171 70.6198804882995 -172 37.2444149240981 -173 21.46556619847905 -174 11.287970463262605 -175 6.591322598971706 -176 3.6586429332661687 -177 2.1186220677169865 -178 1.2260832812420408 -179 0.7094811187504051 -180 0.38729896744561965 -181 0.2258204394497233 -182 0.1306948563058494 -183 0.07545757928951509 -184 0.044310105288621386 -185 0.024849304446966775 -186 0.014180797728865515 -187 0.007608922263314865 -188 0.003937701172351607 -189 0.0021192067593864285 -190 0.0013144949753359151 -191 0.0006952643789509187 -192 0.00029438472123852715 -193 0.0001029150079447012 -194 7.024237650184838e-05 -195 1.4231373825039173e-05 -196 2078.530929761868 -197 907.7647672215089 -198 431.5896937531928 -199 215.9403805895772 -200 110.3274783987277 -201 57.820061820962856 -202 29.70620911063281 -203 15.918599460271942 -204 8.646382304531874 -205 4.722869628510812 -206 2.6117359802996165 -207 1.4408245600491343 -208 0.803045110254087 -209 0.42593349818856957 -210 0.24098706675457796 -211 0.1293749973321942 -212 0.07057632484179366 -213 0.03810000565431785 -214 0.02007769399362644 -215 0.010133616137379944 -216 0.005227992681746092 -217 0.0024808929237388057 -218 0.0012284922451901911 -219 0.0004993039349341299 -220 0.00025281147908095213 -221 0.00011737575595207742 -222 9.105845875762706e-06 -223 1606.9097587694307 -224 711.7482571522985 -225 313.8396903804125 -226 158.93758772287657 -227 76.47143733017214 -228 38.053039165069414 -229 18.952823855270637 -230 9.5026302772685 -231 5.238349038476234 -232 2.6159380360213067 -233 1.330376652993563 -234 0.7225972667790801 -235 0.35570265864019274 -236 0.18080570222179237 -237 0.08056397313905127 -238 0.037123382073522726 -239 0.01653051688656446 -240 0.007731969915574423 -241 0.0033356949025333074 -242 0.0012972611712680079 -243 0.0004729677145880346 -244 0.0001525652631024003 -245 7.622510019677747e-05 -246 1.2950899542674862e-05 -247 1159.5448650238645 -248 480.2388603670686 -249 209.60810586244082 -250 101.03451630330463 -251 44.08780902603859 -252 21.1843288040316 -253 9.661218648442901 -254 3.836918842507069 -255 1.7810655783793576 -256 0.7994362264679773 -257 0.3179922710449306 -258 0.12547361241921207 -259 0.048151646375884455 -260 0.01587115985391355 -261 0.005406454268369427 -262 0.0016890696522439395 -263 0.0003397057859191928 -264 9.542714923424896e-05 -265 1.1299817548038665e-05 +0 572338.2619436508 +1 596772.0069843959 +2 622904.7201631162 +3 607453.75642249 +4 637687.7926100292 +5 648667.2363033721 +6 627358.5180855879 +7 635201.25311407 +8 641790.2072383957 +9 612196.3033407939 +10 566873.4104745645 +11 459963.15651932725 +12 449188.0970550337 +13 465168.9144436549 +14 437789.5177336811 +15 423294.806643127 +16 424770.8407629348 +17 384322.06275548146 +18 383728.5860944929 +19 379658.20346904657 +20 358425.79045571556 +21 336036.764622581 +22 128270.33907424095 +23 127313.63435401936 +24 121747.84807633635 +25 116081.26162372652 +26 112542.18223029248 +27 109322.01308160098 +28 94651.56123910741 +29 49971.329309147884 +30 214.5479042739518 +31 97.06060692325975 +32 47.9983713987643 +33 26.299866399550105 +34 16.94687166264714 +35 10.674622552292847 +36 8.784023835631519 +37 4.4048614269844775 +38 1.6682536560225298 +39 0.421406486396608 +40 0.1376189308949718 +41 0.02252926875035294 +42 0.00018468057557725955 +43 11910.238104190781 +44 22369.93892247344 +45 14797.936272861301 +46 6556.219118746482 +47 2865.0757989137924 +48 1259.8980642461127 +49 567698.8603346754 +50 567231.134899294 +51 571212.053448358 +52 575226.4858537482 +53 575045.0416090337 +54 587004.4762809476 +55 586441.5376787345 +56 592387.6620752425 +57 594126.9775321253 +58 583041.4672596841 +59 547693.7991397335 +60 429652.56814327586 +61 424818.35770645714 +62 422565.22325764695 +63 416267.7226266565 +64 405667.99481589353 +65 397626.28077965864 +66 379853.848159021 +67 371228.72940974287 +68 358734.3267166241 +69 337883.7207831668 +70 315011.79304459266 +71 132494.1071343203 +72 132145.16915198253 +73 132101.72480913674 +74 130755.78839421351 +75 129719.95938137927 +76 126659.82525026547 +77 117414.79348046287 +78 105252.57820714905 +79 87925.2709898477 +80 67111.09477549692 +81 44527.73196420436 +82 21692.56196485259 +83 9934.576451247054 +84 2937.440410068339 +85 1083.5592811814865 +86 465.40181383891604 +87 225.25190998608966 +88 109.99883961788008 +89 63.0372160442829 +90 30.778925085918033 +91 13.842990269969459 +92 0.6078909848538023 +93 9916.473005193431 +94 2887.2034765574404 +95 1060.0864661901162 +96 446.7471535874767 +97 215.75567549660968 +98 111.47773698101243 +99 58.82695809985779 +100 30.295930272035996 +101 14.43305658770841 +102 0.6112944798426417 +103 9282.3667409623 +104 2597.7270482741005 +105 936.6038667988288 +106 415.09739332628783 +107 197.6308858348342 +108 105.14207511471372 +109 54.245068344823565 +110 28.470488997238352 +111 14.482599019943558 +112 0.5482576962229411 +113 6952.345323407019 +114 1940.0408769623482 +115 724.2517047702736 +116 320.16834247680816 +117 162.55356259267228 +118 85.29441797340209 +119 48.50339016924902 +120 23.038277268015324 +121 12.191075048632776 +122 0.4607532958378488 +123 3747.6185874706525 +124 1008.6514704783864 +125 374.2053933966203 +126 175.56425446021737 +127 87.0644009313675 +128 45.23122737710637 +129 28.91782376119734 +130 13.799692781933386 +131 7.4527929770482775 +132 0.24351277208125405 +133 2386.887483454138 +134 1067.6544694491088 +135 531.1168891617723 +136 262.32477380121435 +137 137.93604290163785 +138 74.36469502618685 +139 39.713994766626584 +140 22.48543746088209 +141 12.647375374675915 +142 7.199406415744378 +143 4.111801672132422 +144 2.414509110905533 +145 1.4188024885552728 +146 0.8207977601292881 +147 0.47656271329837846 +148 0.2838569234645444 +149 0.16971532594913027 +150 0.1002136530650072 +151 0.0601100630068519 +152 0.035230496430079736 +153 0.02130047931965792 +154 0.012452417780681234 +155 0.0070363308999392546 +156 0.004050678360243494 +157 0.0023252509995018607 +158 0.001428092429638368 +159 0.000753361693564295 +160 0.0003607834128790654 +161 0.00020925977049832047 +162 9.078003985940052e-05 +163 3.969721474510014e-05 +164 1.534885633778826e-05 +165 2.5880644471206604e-06 +166 2247.5230380667645 +167 1028.4679700022664 +168 499.05541628854405 +169 247.0020781690897 +170 124.81653095635592 +171 65.91633551471716 +172 35.39235697842073 +173 20.093370981905924 +174 10.935357033173892 +175 6.161781311943428 +176 3.6069680587677753 +177 2.0206894942565876 +178 1.1707016914872734 +179 0.6791100435131828 +180 0.3941762190058309 +181 0.2223680311958661 +182 0.13170745216398652 +183 0.07739746584032883 +184 0.044113615887447596 +185 0.025186749610226572 +186 0.01402545626252293 +187 0.007581746469185237 +188 0.004133426152193461 +189 0.002266359431091458 +190 0.0013059919248187017 +191 0.0006670874792954667 +192 0.00030021491334179356 +193 0.00012356062525679305 +194 6.382109942376619e-05 +195 1.1732044013110468e-05 +196 1898.2279524932765 +197 832.2492696213204 +198 393.69740641706017 +199 195.4282584335513 +200 98.62792073487915 +201 55.10552450933891 +202 27.868421731430647 +203 14.875430721551961 +204 8.114677001115693 +205 4.560309665855867 +206 2.5668972629082383 +207 1.3986338901123239 +208 0.7620528237010448 +209 0.42861160577168406 +210 0.23704920521614217 +211 0.12941470506450975 +212 0.07234763577715209 +213 0.03845214404535268 +214 0.020526248245488755 +215 0.009968215905962282 +216 0.004786435377408995 +217 0.0024036687344686568 +218 0.0013496862019566976 +219 0.0005208860313161 +220 0.0002925583561848862 +221 0.0001328309405971603 +222 1.2210439630810448e-05 +223 1308.5396880372498 +224 601.2443232515859 +225 272.73464044459627 +226 133.67432898320448 +227 66.65844945313148 +228 33.02151502379638 +229 18.35794366533659 +230 8.395711530986889 +231 4.622296733093797 +232 2.2879143443797387 +233 1.1591221566732326 +234 0.5872274532359599 +235 0.2886237949160734 +236 0.14288412822589422 +237 0.06677669893044819 +238 0.03029279654794861 +239 0.013274742601567975 +240 0.006031811843137097 +241 0.002578803153752064 +242 0.001081415571290085 +243 0.00039400902288564435 +244 0.0001225330481916096 +245 5.6198893296294255e-05 +246 3.964872743130553e-06 +247 900.3932079207191 +248 384.7256071515713 +249 167.24577326346187 +250 77.24106320690723 +251 33.37582778283644 +252 14.67078311540335 +253 7.713195971120774 +254 3.1256816112679284 +255 1.3031655722005295 +256 0.5458554262500815 +257 0.22533962883281664 +258 0.08455759158725962 +259 0.03115423657566459 +260 0.011298555215657584 +261 0.003444992062416434 +262 0.0011078793577092233 +263 0.0002600323184075804 +264 5.350718479436167e-05 +265 3.2847363505796767e-06 diff --git a/validphys2/src/validphys/tests/regressions/test_mcreplica.csv b/validphys2/src/validphys/tests/regressions/test_mcreplica.csv index e52f7fe4bf..68fbb927b7 100644 --- a/validphys2/src/validphys/tests/regressions/test_mcreplica.csv +++ b/validphys2/src/validphys/tests/regressions/test_mcreplica.csv @@ -1,267 +1,267 @@ group dataset id data -ATLAS ATLASWZRAP36PB 0 608652.6243943771 -ATLAS ATLASWZRAP36PB 1 593166.9777498295 -ATLAS ATLASWZRAP36PB 2 627145.2586644667 -ATLAS ATLASWZRAP36PB 3 623266.2404144956 -ATLAS ATLASWZRAP36PB 4 641980.9316518015 -ATLAS ATLASWZRAP36PB 5 650410.6100024394 -ATLAS ATLASWZRAP36PB 6 625963.8653881561 -ATLAS ATLASWZRAP36PB 7 636331.5459649436 -ATLAS ATLASWZRAP36PB 8 638515.4136194638 -ATLAS ATLASWZRAP36PB 9 632497.4265367464 -ATLAS ATLASWZRAP36PB 10 584056.6331084496 -ATLAS ATLASWZRAP36PB 11 441353.1961233583 -ATLAS ATLASWZRAP36PB 12 445688.0974579613 -ATLAS ATLASWZRAP36PB 13 445839.8817361687 -ATLAS ATLASWZRAP36PB 14 443993.21189039503 -ATLAS ATLASWZRAP36PB 15 433279.78259040223 -ATLAS ATLASWZRAP36PB 16 414244.8745479703 -ATLAS ATLASWZRAP36PB 17 385821.10672576993 -ATLAS ATLASWZRAP36PB 18 385858.5047893755 -ATLAS ATLASWZRAP36PB 19 365000.6510327925 -ATLAS ATLASWZRAP36PB 20 355820.7325257509 -ATLAS ATLASWZRAP36PB 21 338128.579487228 -ATLAS ATLASWZRAP36PB 22 126100.93179790147 -ATLAS ATLASWZRAP36PB 23 134151.7847011488 -ATLAS ATLASWZRAP36PB 24 128116.36706004967 -ATLAS ATLASWZRAP36PB 25 119310.47900387672 -ATLAS ATLASWZRAP36PB 26 109229.94264752026 -ATLAS ATLASWZRAP36PB 27 102868.10191629753 -ATLAS ATLASWZRAP36PB 28 82472.50837188475 -ATLAS ATLASWZRAP36PB 29 52326.91517728639 -ATLAS ATLASZHIGHMASS49FB 0 218.10909206523837 -ATLAS ATLASZHIGHMASS49FB 1 98.02522270644252 -ATLAS ATLASZHIGHMASS49FB 2 49.21680098974891 -ATLAS ATLASZHIGHMASS49FB 3 28.65430352779187 -ATLAS ATLASZHIGHMASS49FB 4 17.639360502700235 -ATLAS ATLASZHIGHMASS49FB 5 10.210894458810182 -ATLAS ATLASZHIGHMASS49FB 6 7.763786589036428 -ATLAS ATLASZHIGHMASS49FB 7 4.337965961581601 -ATLAS ATLASZHIGHMASS49FB 8 1.5186713965314216 -ATLAS ATLASZHIGHMASS49FB 9 0.3883560022466392 -ATLAS ATLASZHIGHMASS49FB 10 0.1454509170774958 -ATLAS ATLASZHIGHMASS49FB 11 0.03143501124492095 -ATLAS ATLASZHIGHMASS49FB 12 0.0006796736643361407 -ATLAS ATLASLOMASSDY11EXT 0 12107.467028441262 -ATLAS ATLASLOMASSDY11EXT 1 21810.272570642945 -ATLAS ATLASLOMASSDY11EXT 2 14713.505350353096 -ATLAS ATLASLOMASSDY11EXT 3 6369.361622125726 -ATLAS ATLASLOMASSDY11EXT 4 2740.668488628792 -ATLAS ATLASLOMASSDY11EXT 5 1193.6039252097805 -ATLAS ATLASWZRAP11 0 578903.747158099 -ATLAS ATLASWZRAP11 1 580012.9332539001 -ATLAS ATLASWZRAP11 2 586970.8779430153 -ATLAS ATLASWZRAP11 3 588210.6081543558 -ATLAS ATLASWZRAP11 4 588596.280196161 -ATLAS ATLASWZRAP11 5 601997.4945308277 -ATLAS ATLASWZRAP11 6 601952.3216430657 -ATLAS ATLASWZRAP11 7 607434.5983783993 -ATLAS ATLASWZRAP11 8 606557.7526328663 -ATLAS ATLASWZRAP11 9 597282.3705544483 -ATLAS ATLASWZRAP11 10 559860.4190578399 -ATLAS ATLASWZRAP11 11 439721.0509928435 -ATLAS ATLASWZRAP11 12 434769.4996744579 -ATLAS ATLASWZRAP11 13 432271.5970705862 -ATLAS ATLASWZRAP11 14 426744.98048986646 -ATLAS ATLASWZRAP11 15 417525.4100017134 -ATLAS ATLASWZRAP11 16 407059.0142406454 -ATLAS ATLASWZRAP11 17 390181.45325692266 -ATLAS ATLASWZRAP11 18 377906.56510553224 -ATLAS ATLASWZRAP11 19 368332.0633313512 -ATLAS ATLASWZRAP11 20 347162.17137070175 -ATLAS ATLASWZRAP11 21 320548.3884555529 -ATLAS ATLASWZRAP11 22 135263.48713291908 -ATLAS ATLASWZRAP11 23 134524.02920170207 -ATLAS ATLASWZRAP11 24 134056.46239919937 -ATLAS ATLASWZRAP11 25 132891.57683197857 -ATLAS ATLASWZRAP11 26 133166.017069511 -ATLAS ATLASWZRAP11 27 128696.5610503247 -ATLAS ATLASWZRAP11 28 120489.77708017877 -ATLAS ATLASWZRAP11 29 107409.3533679216 -ATLAS ATLASWZRAP11 30 90698.6159092256 -ATLAS ATLASWZRAP11 31 68395.27538146226 -ATLAS ATLASWZRAP11 32 45433.52324270277 -ATLAS ATLASWZRAP11 33 22111.870031541133 -CMS CMSZDIFF12 0 9929.404728476226 -CMS CMSZDIFF12 1 2914.60177780458 -CMS CMSZDIFF12 2 1079.8320156899078 -CMS CMSZDIFF12 3 470.09859985878825 -CMS CMSZDIFF12 4 227.34108113991135 -CMS CMSZDIFF12 5 111.75318466176192 -CMS CMSZDIFF12 6 64.36889507558753 -CMS CMSZDIFF12 7 30.922326938058777 -CMS CMSZDIFF12 8 14.177295726573188 -CMS CMSZDIFF12 9 0.6430975761577076 -CMS CMSZDIFF12 10 9932.363879204115 -CMS CMSZDIFF12 11 2902.329277970309 -CMS CMSZDIFF12 12 1059.2380882325058 -CMS CMSZDIFF12 13 455.0616650558543 -CMS CMSZDIFF12 14 217.20951953229712 -CMS CMSZDIFF12 15 111.37941335250686 -CMS CMSZDIFF12 16 60.489758733907045 -CMS CMSZDIFF12 17 30.384904893688198 -CMS CMSZDIFF12 18 14.364731912483231 -CMS CMSZDIFF12 19 0.6339015074803488 -CMS CMSZDIFF12 20 9256.335456847677 -CMS CMSZDIFF12 21 2645.119618345174 -CMS CMSZDIFF12 22 952.4356429636829 -CMS CMSZDIFF12 23 425.2898678080916 -CMS CMSZDIFF12 24 202.8816989841668 -CMS CMSZDIFF12 25 102.78303589400272 -CMS CMSZDIFF12 26 56.6181419131961 -CMS CMSZDIFF12 27 29.781479511314142 -CMS CMSZDIFF12 28 13.921018573459083 -CMS CMSZDIFF12 29 0.5768562333132778 -CMS CMSZDIFF12 30 6950.816380235768 -CMS CMSZDIFF12 31 1974.6189158754028 -CMS CMSZDIFF12 32 735.3345839574494 -CMS CMSZDIFF12 33 330.69953166445646 -CMS CMSZDIFF12 34 163.44023154668602 -CMS CMSZDIFF12 35 84.77793885130951 -CMS CMSZDIFF12 36 51.0504257262343 -CMS CMSZDIFF12 37 25.23870119622656 -CMS CMSZDIFF12 38 11.385877528329235 -CMS CMSZDIFF12 39 0.48396221557117947 -CMS CMSZDIFF12 40 3782.9563428926454 -CMS CMSZDIFF12 41 1036.0723331677457 -CMS CMSZDIFF12 42 389.01489063160994 -CMS CMSZDIFF12 43 178.9788585402045 -CMS CMSZDIFF12 44 94.19635278698803 -CMS CMSZDIFF12 45 48.13802896934606 -CMS CMSZDIFF12 46 28.699586748964602 -CMS CMSZDIFF12 47 14.530437906378951 -CMS CMSZDIFF12 48 7.62313769875309 -CMS CMSZDIFF12 49 0.2735326633007505 -CMS CMSJETS11 0 2369.8463318166773 -CMS CMSJETS11 1 1116.721459875212 -CMS CMSJETS11 2 543.9553377984435 -CMS CMSJETS11 3 274.28250596257993 -CMS CMSJETS11 4 143.40837523846923 -CMS CMSJETS11 5 75.86336505536751 -CMS CMSJETS11 6 40.81978739196266 -CMS CMSJETS11 7 22.053860923424253 -CMS CMSJETS11 8 12.973344229269609 -CMS CMSJETS11 9 7.365405704415488 -CMS CMSJETS11 10 4.192918185068617 -CMS CMSJETS11 11 2.540941503212711 -CMS CMSJETS11 12 1.4467612809719266 -CMS CMSJETS11 13 0.8138478190340867 -CMS CMSJETS11 14 0.5034370564405674 -CMS CMSJETS11 15 0.3013508669118244 -CMS CMSJETS11 16 0.1746773957907988 -CMS CMSJETS11 17 0.10619051548285496 -CMS CMSJETS11 18 0.06275898949763745 -CMS CMSJETS11 19 0.03840111514489119 -CMS CMSJETS11 20 0.021851875417752263 -CMS CMSJETS11 21 0.013271432573749867 -CMS CMSJETS11 22 0.00764786373471376 -CMS CMSJETS11 23 0.004027339079492259 -CMS CMSJETS11 24 0.002413613160269949 -CMS CMSJETS11 25 0.0015466295056348058 -CMS CMSJETS11 26 0.0008216169391300068 -CMS CMSJETS11 27 0.0003535913201141704 -CMS CMSJETS11 28 0.00019376487728983312 -CMS CMSJETS11 29 0.00013974463878168163 -CMS CMSJETS11 30 4.1103856217883983e-05 -CMS CMSJETS11 31 1.8477905276983794e-05 -CMS CMSJETS11 32 6.579956861722431e-07 -CMS CMSJETS11 33 2199.749671474309 -CMS CMSJETS11 34 1043.8631743292726 -CMS CMSJETS11 35 503.72142901828596 -CMS CMSJETS11 36 249.58656216884282 -CMS CMSJETS11 37 128.48885455794368 -CMS CMSJETS11 38 67.75968678888103 -CMS CMSJETS11 39 37.23914862014485 -CMS CMSJETS11 40 20.638197977631393 -CMS CMSJETS11 41 11.205475791616863 -CMS CMSJETS11 42 6.403262909509072 -CMS CMSJETS11 43 3.646402301934515 -CMS CMSJETS11 44 2.1291885185734003 -CMS CMSJETS11 45 1.2308256047226098 -CMS CMSJETS11 46 0.7238074890739072 -CMS CMSJETS11 47 0.4043713052874548 -CMS CMSJETS11 48 0.23681701558595317 -CMS CMSJETS11 49 0.1367481839558163 -CMS CMSJETS11 50 0.08176407402257972 -CMS CMSJETS11 51 0.04615426582742597 -CMS CMSJETS11 52 0.026391361922424587 -CMS CMSJETS11 53 0.015381276938198444 -CMS CMSJETS11 54 0.008194807032809469 -CMS CMSJETS11 55 0.004222985663639738 -CMS CMSJETS11 56 0.0024234097723377594 -CMS CMSJETS11 57 0.00143734837937178 -CMS CMSJETS11 58 0.0007274505352408457 -CMS CMSJETS11 59 0.00032606274094751955 -CMS CMSJETS11 60 0.00012168711487859289 -CMS CMSJETS11 61 7.531239940563878e-05 -CMS CMSJETS11 62 1.5982704839082665e-05 -CMS CMSJETS11 63 1888.718208095221 -CMS CMSJETS11 64 852.6874790470484 -CMS CMSJETS11 65 395.0211966250713 -CMS CMSJETS11 66 199.7726681499629 -CMS CMSJETS11 67 99.79368700677466 -CMS CMSJETS11 68 55.271374724330094 -CMS CMSJETS11 69 28.503519701200428 -CMS CMSJETS11 70 14.913812166501422 -CMS CMSJETS11 71 8.368950583676861 -CMS CMSJETS11 72 4.529706663784493 -CMS CMSJETS11 73 2.4871073851143555 -CMS CMSJETS11 74 1.3932795284915118 -CMS CMSJETS11 75 0.7745630737416582 -CMS CMSJETS11 76 0.44047021269459746 -CMS CMSJETS11 77 0.2317292953704745 -CMS CMSJETS11 78 0.13043282527187824 -CMS CMSJETS11 79 0.06953213090852366 -CMS CMSJETS11 80 0.03770255849306804 -CMS CMSJETS11 81 0.02014441853835702 -CMS CMSJETS11 82 0.010054687373112017 -CMS CMSJETS11 83 0.005015329906094411 -CMS CMSJETS11 84 0.00247985516533429 -CMS CMSJETS11 85 0.0012018176542779395 -CMS CMSJETS11 86 0.0005038294485451054 -CMS CMSJETS11 87 0.00025355391852091466 -CMS CMSJETS11 88 0.00013024148253486579 -CMS CMSJETS11 89 7.204445166121315e-06 -CMS CMSJETS11 90 1403.4746093065025 -CMS CMSJETS11 91 656.118560557962 -CMS CMSJETS11 92 293.6944519763029 -CMS CMSJETS11 93 142.3379268512767 -CMS CMSJETS11 94 71.98634581151227 -CMS CMSJETS11 95 33.375072852368646 -CMS CMSJETS11 96 18.395215542741234 -CMS CMSJETS11 97 9.525820022642097 -CMS CMSJETS11 98 4.887477955944698 -CMS CMSJETS11 99 2.564599814774939 -CMS CMSJETS11 100 1.2697368840606127 -CMS CMSJETS11 101 0.6551706373139751 -CMS CMSJETS11 102 0.3316318975076803 -CMS CMSJETS11 103 0.1711119282074019 -CMS CMSJETS11 104 0.07842251789035866 -CMS CMSJETS11 105 0.03593152497389695 -CMS CMSJETS11 106 0.0166299498457832 -CMS CMSJETS11 107 0.007465345520145227 -CMS CMSJETS11 108 0.0033782459859269323 -CMS CMSJETS11 109 0.0014026406473273527 -CMS CMSJETS11 110 0.0005426673988420396 -CMS CMSJETS11 111 0.00016368424865740314 -CMS CMSJETS11 112 7.43915569783823e-05 -CMS CMSJETS11 113 1.067047084208022e-05 -CMS CMSJETS11 114 930.5156395398096 -CMS CMSJETS11 115 397.8365256789552 -CMS CMSJETS11 116 182.96047884828087 -CMS CMSJETS11 117 80.80629430647166 -CMS CMSJETS11 118 37.933765612946154 -CMS CMSJETS11 119 17.05158699899813 -CMS CMSJETS11 120 8.085977260851605 -CMS CMSJETS11 121 3.3716239469700606 -CMS CMSJETS11 122 1.5246283999715562 -CMS CMSJETS11 123 0.6822874769572232 -CMS CMSJETS11 124 0.28128028392055066 -CMS CMSJETS11 125 0.10783421842106765 -CMS CMSJETS11 126 0.04147597893499884 -CMS CMSJETS11 127 0.013550620090506716 -CMS CMSJETS11 128 0.004628634285245149 -CMS CMSJETS11 129 0.0015415520490305195 -CMS CMSJETS11 130 0.0002389847219284128 -CMS CMSJETS11 131 0.00010491820779625063 -CMS CMSJETS11 132 6.004512595258441e-06 +ATLAS ATLASWZRAP36PB 0 632751.995285571 +ATLAS ATLASWZRAP36PB 1 617041.3770628204 +ATLAS ATLASWZRAP36PB 2 657031.1685434954 +ATLAS ATLASWZRAP36PB 3 632499.3965744593 +ATLAS ATLASWZRAP36PB 4 654675.1207168144 +ATLAS ATLASWZRAP36PB 5 671000.2517548004 +ATLAS ATLASWZRAP36PB 6 664492.5507759772 +ATLAS ATLASWZRAP36PB 7 673032.1212200349 +ATLAS ATLASWZRAP36PB 8 665215.788381472 +ATLAS ATLASWZRAP36PB 9 658123.0178381058 +ATLAS ATLASWZRAP36PB 10 597391.6454749889 +ATLAS ATLASWZRAP36PB 11 444047.30792477645 +ATLAS ATLASWZRAP36PB 12 461624.00308643933 +ATLAS ATLASWZRAP36PB 13 466682.65352411283 +ATLAS ATLASWZRAP36PB 14 443015.00625158753 +ATLAS ATLASWZRAP36PB 15 445907.12168187584 +ATLAS ATLASWZRAP36PB 16 434486.1014580558 +ATLAS ATLASWZRAP36PB 17 409162.7570787424 +ATLAS ATLASWZRAP36PB 18 391449.7144861298 +ATLAS ATLASWZRAP36PB 19 382191.8381592813 +ATLAS ATLASWZRAP36PB 20 370809.4669654236 +ATLAS ATLASWZRAP36PB 21 353657.9867115815 +ATLAS ATLASWZRAP36PB 22 134625.48129342342 +ATLAS ATLASWZRAP36PB 23 135958.30317018856 +ATLAS ATLASWZRAP36PB 24 132213.084983002 +ATLAS ATLASWZRAP36PB 25 125052.71400189628 +ATLAS ATLASWZRAP36PB 26 119121.54397226803 +ATLAS ATLASWZRAP36PB 27 108213.68507780871 +ATLAS ATLASWZRAP36PB 28 96961.99666273188 +ATLAS ATLASWZRAP36PB 29 64388.217499498656 +ATLAS ATLASZHIGHMASS49FB 0 230.863232943678 +ATLAS ATLASZHIGHMASS49FB 1 107.43186743916318 +ATLAS ATLASZHIGHMASS49FB 2 55.354769299173796 +ATLAS ATLASZHIGHMASS49FB 3 28.774798522358218 +ATLAS ATLASZHIGHMASS49FB 4 17.458543602853286 +ATLAS ATLASZHIGHMASS49FB 5 10.560106436914356 +ATLAS ATLASZHIGHMASS49FB 6 8.67634565339668 +ATLAS ATLASZHIGHMASS49FB 7 5.016068002659696 +ATLAS ATLASZHIGHMASS49FB 8 1.9123472960960715 +ATLAS ATLASZHIGHMASS49FB 9 0.5494356862069665 +ATLAS ATLASZHIGHMASS49FB 10 0.16298647575482017 +ATLAS ATLASZHIGHMASS49FB 11 0.01973183570563772 +ATLAS ATLASZHIGHMASS49FB 12 0.0018251705795726374 +ATLAS ATLASLOMASSDY11EXT 0 10808.878537570747 +ATLAS ATLASLOMASSDY11EXT 1 19705.82762538828 +ATLAS ATLASLOMASSDY11EXT 2 13611.88043321348 +ATLAS ATLASLOMASSDY11EXT 3 6742.29000774712 +ATLAS ATLASLOMASSDY11EXT 4 3062.341386829926 +ATLAS ATLASLOMASSDY11EXT 5 1144.0425488456915 +ATLAS ATLASWZRAP11 0 579892.564369993 +ATLAS ATLASWZRAP11 1 577538.777034694 +ATLAS ATLASWZRAP11 2 581347.7591704145 +ATLAS ATLASWZRAP11 3 586943.2495616123 +ATLAS ATLASWZRAP11 4 587766.8452181742 +ATLAS ATLASWZRAP11 5 602012.8788272106 +ATLAS ATLASWZRAP11 6 598972.0674475665 +ATLAS ATLASWZRAP11 7 607522.4413855699 +ATLAS ATLASWZRAP11 8 611429.4681550585 +ATLAS ATLASWZRAP11 9 593914.3178345475 +ATLAS ATLASWZRAP11 10 561082.3978170604 +ATLAS ATLASWZRAP11 11 435989.8968037723 +ATLAS ATLASWZRAP11 12 431876.8692722203 +ATLAS ATLASWZRAP11 13 430253.4855627184 +ATLAS ATLASWZRAP11 14 424616.82405726955 +ATLAS ATLASWZRAP11 15 413741.1449964676 +ATLAS ATLASWZRAP11 16 406734.1556979907 +ATLAS ATLASWZRAP11 17 388721.21275393164 +ATLAS ATLASWZRAP11 18 378010.63699750183 +ATLAS ATLASWZRAP11 19 365519.5919179437 +ATLAS ATLASWZRAP11 20 342374.11104580783 +ATLAS ATLASWZRAP11 21 318195.0037195112 +ATLAS ATLASWZRAP11 22 135688.59397551182 +ATLAS ATLASWZRAP11 23 135304.3146698204 +ATLAS ATLASWZRAP11 24 134892.22436536354 +ATLAS ATLASWZRAP11 25 134095.36778946864 +ATLAS ATLASWZRAP11 26 133245.75835627335 +ATLAS ATLASWZRAP11 27 130310.38801630742 +ATLAS ATLASWZRAP11 28 120143.11243095703 +ATLAS ATLASWZRAP11 29 107797.31258370636 +ATLAS ATLASWZRAP11 30 90352.71360992908 +ATLAS ATLASWZRAP11 31 68682.15364616222 +ATLAS ATLASWZRAP11 32 45813.25437907438 +ATLAS ATLASWZRAP11 33 22330.442530746688 +CMS CMSZDIFF12 0 10016.306542097831 +CMS CMSZDIFF12 1 2974.426946446786 +CMS CMSZDIFF12 2 1098.561402409915 +CMS CMSZDIFF12 3 468.0042581959393 +CMS CMSZDIFF12 4 227.29925782138432 +CMS CMSZDIFF12 5 111.4133239603819 +CMS CMSZDIFF12 6 63.14608039765307 +CMS CMSZDIFF12 7 30.729908706359893 +CMS CMSZDIFF12 8 13.39448651183371 +CMS CMSZDIFF12 9 0.6305055189415907 +CMS CMSZDIFF12 10 9976.588537992388 +CMS CMSZDIFF12 11 2920.000107191632 +CMS CMSZDIFF12 12 1059.855951361378 +CMS CMSZDIFF12 13 457.2657235905892 +CMS CMSZDIFF12 14 217.43993285918245 +CMS CMSZDIFF12 15 112.70193793748422 +CMS CMSZDIFF12 16 59.952654876832554 +CMS CMSZDIFF12 17 29.255650830913773 +CMS CMSZDIFF12 18 13.642130122757841 +CMS CMSZDIFF12 19 0.597736772308694 +CMS CMSZDIFF12 20 9301.8549971645 +CMS CMSZDIFF12 21 2630.785874651044 +CMS CMSZDIFF12 22 948.6196747511287 +CMS CMSZDIFF12 23 421.21488540456266 +CMS CMSZDIFF12 24 199.73769810977757 +CMS CMSZDIFF12 25 107.61490343152016 +CMS CMSZDIFF12 26 55.35126035167617 +CMS CMSZDIFF12 27 28.81397749740639 +CMS CMSZDIFF12 28 13.920711734960106 +CMS CMSZDIFF12 29 0.5529100062622566 +CMS CMSZDIFF12 30 6971.420556212593 +CMS CMSZDIFF12 31 1957.8059529195225 +CMS CMSZDIFF12 32 731.709438556571 +CMS CMSZDIFF12 33 323.8053001213361 +CMS CMSZDIFF12 34 162.1152404148366 +CMS CMSZDIFF12 35 83.65642327478605 +CMS CMSZDIFF12 36 49.04469933211526 +CMS CMSZDIFF12 37 24.02844258391299 +CMS CMSZDIFF12 38 11.795099052669572 +CMS CMSZDIFF12 39 0.4828905067898072 +CMS CMSZDIFF12 40 3719.636483895721 +CMS CMSZDIFF12 41 1031.1244149657834 +CMS CMSZDIFF12 42 388.42071444501534 +CMS CMSZDIFF12 43 175.09221572399971 +CMS CMSZDIFF12 44 93.44568616773181 +CMS CMSZDIFF12 45 47.76370573557689 +CMS CMSZDIFF12 46 27.105910132295225 +CMS CMSZDIFF12 47 13.869461090325169 +CMS CMSZDIFF12 48 7.057237601132097 +CMS CMSZDIFF12 49 0.2719062340527183 +CMS CMSJETS11 0 2836.2030417634883 +CMS CMSJETS11 1 1345.7532630733779 +CMS CMSJETS11 2 634.6436997586477 +CMS CMSJETS11 3 309.56910077055295 +CMS CMSJETS11 4 164.27761410876445 +CMS CMSJETS11 5 88.1936589735802 +CMS CMSJETS11 6 46.27482331468973 +CMS CMSJETS11 7 25.289669337789356 +CMS CMSJETS11 8 14.33053535939792 +CMS CMSJETS11 9 7.905701459588236 +CMS CMSJETS11 10 4.5598955159622125 +CMS CMSJETS11 11 2.664842622350123 +CMS CMSJETS11 12 1.5388429481820294 +CMS CMSJETS11 13 0.8937587737718031 +CMS CMSJETS11 14 0.5210956278760327 +CMS CMSJETS11 15 0.3162661816624626 +CMS CMSJETS11 16 0.18260888452086887 +CMS CMSJETS11 17 0.10775338781947348 +CMS CMSJETS11 18 0.06620558945502449 +CMS CMSJETS11 19 0.03914272643720642 +CMS CMSJETS11 20 0.02260209002089533 +CMS CMSJETS11 21 0.013723658827195494 +CMS CMSJETS11 22 0.00759304116017701 +CMS CMSJETS11 23 0.004184230904886669 +CMS CMSJETS11 24 0.0024954436792720127 +CMS CMSJETS11 25 0.0013972199920462293 +CMS CMSJETS11 26 0.0008933560193993895 +CMS CMSJETS11 27 0.0003325873103371806 +CMS CMSJETS11 28 0.00018058333451851635 +CMS CMSJETS11 29 9.960200362435802e-05 +CMS CMSJETS11 30 5.1321072609889224e-05 +CMS CMSJETS11 31 1.629322890659932e-05 +CMS CMSJETS11 32 1.9592476185984054e-06 +CMS CMSJETS11 33 2665.7745885614404 +CMS CMSJETS11 34 1210.338364298024 +CMS CMSJETS11 35 561.7265973472279 +CMS CMSJETS11 36 283.72483786301325 +CMS CMSJETS11 37 144.9613095154424 +CMS CMSJETS11 38 73.34171439354476 +CMS CMSJETS11 39 41.43387131896532 +CMS CMSJETS11 40 21.792080548506476 +CMS CMSJETS11 41 12.480497488291462 +CMS CMSJETS11 42 7.15937493181071 +CMS CMSJETS11 43 3.9430741584890483 +CMS CMSJETS11 44 2.2790730369157326 +CMS CMSJETS11 45 1.3447190139705814 +CMS CMSJETS11 46 0.7640278657132634 +CMS CMSJETS11 47 0.42507777984526823 +CMS CMSJETS11 48 0.2499014498688195 +CMS CMSJETS11 49 0.14579615149938088 +CMS CMSJETS11 50 0.08521658660010055 +CMS CMSJETS11 51 0.04865543434639711 +CMS CMSJETS11 52 0.029001995856644484 +CMS CMSJETS11 53 0.016148872914339755 +CMS CMSJETS11 54 0.008778120418387826 +CMS CMSJETS11 55 0.004536436500928358 +CMS CMSJETS11 56 0.002569069785429614 +CMS CMSJETS11 57 0.0013632986959596282 +CMS CMSJETS11 58 0.0007260229303574061 +CMS CMSJETS11 59 0.0003435110008164818 +CMS CMSJETS11 60 0.00012040513083324868 +CMS CMSJETS11 61 5.6446541700038946e-05 +CMS CMSJETS11 62 1.8176857077638343e-05 +CMS CMSJETS11 63 2348.5754867286455 +CMS CMSJETS11 64 1026.9753280614386 +CMS CMSJETS11 65 468.8272299654121 +CMS CMSJETS11 66 233.28918887426224 +CMS CMSJETS11 67 121.47907358761735 +CMS CMSJETS11 68 63.88193907672322 +CMS CMSJETS11 69 32.01878187199972 +CMS CMSJETS11 70 16.18559830624964 +CMS CMSJETS11 71 9.171324432188916 +CMS CMSJETS11 72 5.191936794250497 +CMS CMSJETS11 73 2.8096637846396253 +CMS CMSJETS11 74 1.56414923090165 +CMS CMSJETS11 75 0.8532271368722754 +CMS CMSJETS11 76 0.4789635487736927 +CMS CMSJETS11 77 0.2673701359235665 +CMS CMSJETS11 78 0.14668511728252742 +CMS CMSJETS11 79 0.0791392833575752 +CMS CMSJETS11 80 0.04235568198710219 +CMS CMSJETS11 81 0.022675041694509752 +CMS CMSJETS11 82 0.010988063023390593 +CMS CMSJETS11 83 0.005349995074472067 +CMS CMSJETS11 84 0.002736701950843297 +CMS CMSJETS11 85 0.00131829946525759 +CMS CMSJETS11 86 0.00054182686876253 +CMS CMSJETS11 87 0.0002916457821984037 +CMS CMSJETS11 88 0.0001309994685988435 +CMS CMSJETS11 89 1.1338241946878379e-05 +CMS CMSJETS11 90 1876.5543704367226 +CMS CMSJETS11 91 832.5319643197482 +CMS CMSJETS11 92 364.5553170756696 +CMS CMSJETS11 93 176.1086954514688 +CMS CMSJETS11 94 86.19203052129257 +CMS CMSJETS11 95 41.89183739884312 +CMS CMSJETS11 96 22.650386576567435 +CMS CMSJETS11 97 11.30324216952335 +CMS CMSJETS11 98 5.932602470200546 +CMS CMSJETS11 99 3.048353182372997 +CMS CMSJETS11 100 1.5854877318714393 +CMS CMSJETS11 101 0.8125421924047361 +CMS CMSJETS11 102 0.40254368797676904 +CMS CMSJETS11 103 0.20885995072964111 +CMS CMSJETS11 104 0.09645245394723312 +CMS CMSJETS11 105 0.04219173908721096 +CMS CMSJETS11 106 0.019825898122851033 +CMS CMSJETS11 107 0.009001715343320841 +CMS CMSJETS11 108 0.0037857604327620276 +CMS CMSJETS11 109 0.0016836767998027493 +CMS CMSJETS11 110 0.0005893079068735742 +CMS CMSJETS11 111 0.0001892889114787047 +CMS CMSJETS11 112 8.975142650946757e-05 +CMS CMSJETS11 113 1.569164305785028e-05 +CMS CMSJETS11 114 1276.9035553893127 +CMS CMSJETS11 115 534.3423409955832 +CMS CMSJETS11 116 242.7797534905747 +CMS CMSJETS11 117 109.99431238506284 +CMS CMSJETS11 118 49.96334506637459 +CMS CMSJETS11 119 23.27305208474039 +CMS CMSJETS11 120 11.069738730460578 +CMS CMSJETS11 121 4.582810326569979 +CMS CMSJETS11 122 2.1177771504283496 +CMS CMSJETS11 123 0.8929702642458269 +CMS CMSJETS11 124 0.3748166065113357 +CMS CMSJETS11 125 0.14916727511158034 +CMS CMSJETS11 126 0.05779550882847963 +CMS CMSJETS11 127 0.020452658041684203 +CMS CMSJETS11 128 0.006210658604464283 +CMS CMSJETS11 129 0.001980347973639748 +CMS CMSJETS11 130 0.00045727392946408855 +CMS CMSJETS11 131 0.0001613869323937578 +CMS CMSJETS11 132 4.80909177807789e-06 diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py index 604b675e8a..0a8b26e01e 100644 --- a/validphys2/src/validphys/tests/test_regressions.py +++ b/validphys2/src/validphys/tests/test_regressions.py @@ -54,8 +54,17 @@ def f_(*args, **kwargs): @make_table_comp(parse_data_cv) def test_mcreplica(data_config): + #### config = dict(data_config) config["dataset_inputs"] = CORR_DATA + config["use_t0_sampling"] = False + config["separate_multiplicative"]= True + config["output_path"] = None + config["theory_covmat_flag"] = False + config["use_user_uncertainties"] = None + config["use_scalevar_uncertainties"] = None + config["use_thcovmat_in_sampling"]=None + config["dataset_t0_predictions"] = None seed = 123456 # Use no cuts because if filter rules change in the # future then this test will end up failing @@ -138,10 +147,19 @@ def test_datasetchi2(data_singleexp_witht0_config): @make_table_comp(sane_load) def test_art_rep_generation(data_config): + ### config = dict(data_config) config["dataset_inputs"] = CORR_DATA config["mcseed"] = 123456 config["genrep"] = True config["nreplica"] = 1 + config["use_t0_sampling"] = False + config["separate_multiplicative"]= True + config["output_path"] = None + config["theory_covmat_flag"] = False + config["use_user_uncertainties"] = None + config["use_scalevar_uncertainties"] = None + config["use_thcovmat_in_sampling"]=None + config["dataset_t0_predictions"] = None _, art_replicas, _,_ = API.art_rep_generation(**config) return pd.DataFrame(art_replicas.T, columns=['rep0']) From 894138a9af1a9e309ac61a6840c76efc672ef64a Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 31 Mar 2022 11:01:44 +0200 Subject: [PATCH 45/64] Added docs --- validphys2/src/validphys/covmats.py | 45 ++++++++++++++++++++++++++ validphys2/src/validphys/pseudodata.py | 12 ++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 0a4e660b06..960ef7ef6b 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -384,6 +384,13 @@ def dataset_inputs_sampling_covmat(dataset_inputs_loaded_cd_with_cuts, use_t0_sampling, separate_multiplicative, ): + """ + Function to compute the total covmat to be used to generate replicas by make_replica. + Depending on the theory_covmat_flag value, the theory covmat will be added to the + experimental covmat or not. With the use_t0_sampling flag it is possible to choose + if using the t0 prescription in the covmat computation. In order to compute the covmat + only using the additive errors, you can set separate_multiplicative to True. + """ covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions if use_t0_sampling is True else None, separate_multiplicative ) if theory_covmat_flag and use_thcovmat_in_sampling: covmat += loaded_theory_covmat @@ -402,12 +409,50 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, use_t0_fitting, only_add=False, ): + """ + Function to compute the total covmat to be used for the chi2 by fitting_data_dict. + Depending on the theory_covmat_flag value, the theory covmat will be added to the + experimental covmat or not. With the use_t0_fitting flag it is possible to choose + if using the t0 prescription in the covmat computation. In order to compute the covmat + only using the additive errors, you can set only_add to True. + """ covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions if use_t0_fitting is True else None, only_add) if theory_covmat_flag and use_thcovmat_in_fitting: covmat += loaded_theory_covmat return covmat def generate_exp_covmat(datasets_input, data, use_weights, norm_thre, _list_of_c_values, only_add): + """ + Function to generate the experimental covmat eventually using the t0 prescription. It is also + possible to compute it only with the additive errors. + + Parameters + ---------- + dataset_inputs: list[validphys.coredata.CommonData] + list of CommonData objects. + data: list[validphys.core.DataSetInput] + Settings for each dataset, each element contains the weight for the + current dataset. The elements of the returned covmat for dataset + i and j will be divided by sqrt(weight_i)*sqrt(weight_j), if + ``use_weights_in_covmat``. The default weight is 1, which means + the returned covmat will be unmodified. + use_weights: bool + Whether to weight the covmat, True by default. + norm_thre: number + threshold used to regularize covariance matrix + _list_of_c_values: None, list[np.array] + list of 1-D arrays which contain alternative central values which are + combined with the multiplicative errors to calculate their absolute + contribution. By default this is None and the experimental central + values are used. + only_add: bool + specifies whether to use only the additive errors to compute the covmat + + Returns + ------- + : np.array + experimental covariance matrix + """ return dataset_inputs_covmat_from_systematics( datasets_input, data, diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index f57213eb7f..7cc72d724c 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -116,10 +116,20 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat List of CommonData objects which stores information about systematic errors, their treatment and description, for each dataset. - seed: int, None + replica_mcseed: int, None Seed used to initialise the numpy random number generator. If ``None`` then a random seed is allocated using the default numpy behaviour. + dataset_inputs_sampling_covmat: np.array + Full covmat to be used. It can be either only experimental or also theoretical. + + separate_multiplicative: bool + Specifies whether computing the shifts with the full covmat or separating multiplicative + errors (in the latter case remember to generate the covmat coherently) + + genrep: bool + Specifies whether computing replicas or not + Returns ------- pseudodata: np.array From c94f7414bdc0b3728e270913fa3b350fc3be0401 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 31 Mar 2022 11:22:45 +0200 Subject: [PATCH 46/64] Fixed test_fit in n3fit --- .../n3fit/tests/regressions/quickcard_2.json | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_2.json b/n3fit/src/n3fit/tests/regressions/quickcard_2.json index 0ce746c1f5..0bdc66854b 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard_2.json +++ b/n3fit/src/n3fit/tests/regressions/quickcard_2.json @@ -2,94 +2,94 @@ "preprocessing": [ { "fl": "sng", - "smallx": 1.099926233291626, - "largex": 2.694965362548828, + "smallx": 1.1001018285751343, + "largex": 2.6947875022888184, "trainable": true }, { "fl": "g", - "smallx": 0.9449679851531982, - "largex": 1.6326133012771606, + "smallx": 0.9447894096374512, + "largex": 1.6324347257614136, "trainable": true }, { "fl": "v", - "smallx": 0.7404175400733948, - "largex": 1.7165066003799438, + "smallx": 0.7402390837669373, + "largex": 1.7163281440734863, "trainable": true }, { "fl": "v3", - "smallx": 0.21026656031608582, - "largex": 1.3537929058074951, + "smallx": 0.2104424387216568, + "largex": 1.3539683818817139, "trainable": true }, { "fl": "v8", "smallx": 0.7599998712539673, - "largex": 2.401231050491333, + "largex": 2.40140700340271, "trainable": true }, { "fl": "t3", - "smallx": 1.4331011772155762, - "largex": 2.2847883701324463, + "smallx": 1.4329228401184082, + "largex": 2.284966230392456, "trainable": true }, { "fl": "t8", - "smallx": 1.0498263835906982, - "largex": 1.7643688917160034, + "smallx": 1.0496479272842407, + "largex": 1.7641903162002563, "trainable": true }, { "fl": "cp", - "smallx": 0.23803016543388367, - "largex": 2.786621570587158, + "smallx": 0.23785176873207092, + "largex": 2.786799430847168, "trainable": true } ], - "stop_epoch": 302, - "best_epoch": 0, - "erf_tr": 3.5290472507476807, - "erf_vl": 4.136209487915039, - "chi2": 2.187250852584839, + "stop_epoch": 314, + "best_epoch": 12, + "erf_tr": 3.859008312225342, + "erf_vl": 3.781097650527954, + "chi2": 2.1904516220092773, "pos_state": "POS_PASS", "arc_lengths": [ - 1.9042933742638075, - 1.167423470814666, - 1.1344623574622406, - 1.5302676267761284, - 1.100881911421756 + 1.9062157860541882, + 1.1675613091142847, + 1.1346750403691772, + 1.5211630991705873, + 1.099993511588776 ], "integrability": [ - 0.025669212918728868, - 0.02566921291872748, - 2.930567893488778e-05, - 16.312403678894043, - 0.028826501220466372 + 0.025609320495277865, + 0.025609320495276755, + 2.8907650913900795e-05, + 16.2582106590271, + 0.028889004141091856 ], "timing": { "walltime": { - "Total": 11.875385284423828, + "Total": 7.219811916351318, "start": 0.0, - "replica_set": 0.003345489501953125, - "replica_fitted": 11.875233173370361, - "replica_set_to_replica_fitted": 11.871887683868408 + "replica_set": 0.00436091423034668, + "replica_fitted": 7.219769716262817, + "replica_set_to_replica_fitted": 7.215408802032471 }, "cputime": { - "Total": 14.101436303, + "Total": 8.290757, "start": 0.0, - "replica_set": 0.003337935000000236, - "replica_fitted": 14.101281113000002, - "replica_set_to_replica_fitted": 14.097943178000001 + "replica_set": 0.03159199999999984, + "replica_fitted": 8.290709999999999, + "replica_set_to_replica_fitted": 8.259117999999999 } }, "version": { - "keras": "2.8.0", - "tensorflow": "2.8.0, mkl=True", - "numpy": "1.22.2", - "nnpdf": "4.0.4.152+g84203534c-dev", - "validphys": "4.0.4.152+g84203534c-dev" + "keras": "2.4.0", + "tensorflow": "2.4.1, mkl=False", + "numpy": "1.21.2", + "nnpdf": "4.0.4.220+g894138a9a", + "validphys": "4.0.4.220+g894138a9a" } } \ No newline at end of file From e9d81f87ad7d13c9b71846c6f530545e1da8ee93 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 31 Mar 2022 11:51:00 +0200 Subject: [PATCH 47/64] Fixed test_fit_and_timing --- .../n3fit/tests/regressions/quickcard_1.json | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_1.json b/n3fit/src/n3fit/tests/regressions/quickcard_1.json index d49120fced..75cb9f5b75 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard_1.json +++ b/n3fit/src/n3fit/tests/regressions/quickcard_1.json @@ -20,8 +20,8 @@ }, { "fl": "v3", - "smallx": 0.3031329810619354, - "largex": 2.6214101314544678, + "smallx": 0.30309754610061646, + "largex": 2.6213722229003906, "trainable": true }, { @@ -51,45 +51,45 @@ ], "stop_epoch": 1000, "best_epoch": 999, - "erf_tr": 36.54704284667969, - "erf_vl": 30.701587677001953, - "chi2": 22.70619773864746, + "erf_tr": 38.022315979003906, + "erf_vl": 31.481945037841797, + "chi2": 22.705814361572266, "pos_state": "POS_VETO", "arc_lengths": [ - 1.1037201268771095, - 1.505700648681965, - 1.2027295354983405, - 1.4316397494895012, - 1.1901164504449115 + 1.103731936457393, + 1.505681943929697, + 1.2027105035135508, + 1.426451562695196, + 1.1901165080869252 ], "integrability": [ - 0.002630541814142684, - 0.0026305418141405745, - 0.00014138085179959958, - 3.6656303405761737, - 0.004028238385220817 + 0.002630652510560605, + 0.0026306525105581624, + 0.00013975353567441395, + 3.6658432483673087, + 0.004028247582030353 ], "timing": { "walltime": { - "Total": 20.638618230819702, + "Total": 12.020201921463013, "start": 0.0, - "replica_set": 0.0033304691314697266, - "replica_fitted": 20.638471841812134, - "replica_set_to_replica_fitted": 20.635141372680664 + "replica_set": 0.003970146179199219, + "replica_fitted": 12.02015495300293, + "replica_set_to_replica_fitted": 12.01618480682373 }, "cputime": { - "Total": 27.691918045999998, + "Total": 13.138117000000001, "start": 0.0, - "replica_set": 0.003325130999999537, - "replica_fitted": 27.691770436, - "replica_set_to_replica_fitted": 27.688445305000002 + "replica_set": 0.029436999999999713, + "replica_fitted": 13.138062999999999, + "replica_set_to_replica_fitted": 13.108626 } }, "version": { - "keras": "2.8.0", - "tensorflow": "2.8.0, mkl=True", - "numpy": "1.22.2", - "nnpdf": "4.0.4.152+g84203534c", - "validphys": "4.0.4.152+g84203534c" + "keras": "2.4.0", + "tensorflow": "2.4.1, mkl=False", + "numpy": "1.21.2", + "nnpdf": "4.0.4.221+gc94f7414b-dev", + "validphys": "4.0.4.221+gc94f7414b-dev" } } \ No newline at end of file From 176eb3d59b9bbe22528ac18fb067a058193c8b39 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 31 Mar 2022 12:20:22 +0200 Subject: [PATCH 48/64] Resolve conflicting files --- validphys2/src/validphys/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index a1fe065d71..153ae95f61 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1100,7 +1100,7 @@ def produce_fitthcovmat( / "tables" ) # All possible valid files - covfiles = sorted(covmat_path.glob("*theory_covmat_*.csv")) + covfiles = sorted(covmat_path.glob("*theory_covmat*.csv")) if not covfiles: raise ConfigError( "Fit appeared to use theory covmat in fit but the file was not at the " From a4f413376209847f5bc096bae0dc59fd64ab3025 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 31 Mar 2022 13:01:28 +0200 Subject: [PATCH 49/64] minor changes --- validphys2/src/validphys/config.py | 1 - validphys2/src/validphys/covmats.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 153ae95f61..145cd7df3d 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1053,7 +1053,6 @@ def produce_nnfit_theory_covmat( # Only user uncertainties from validphys.theorycovariance.construction import user_covmat_fitting f = user_covmat_fitting - @functools.wraps(f) def res(*args, **kwargs): return f(*args, **kwargs) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 960ef7ef6b..61c90cd580 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd import scipy.linalg as la -import pathlib from reportengine import collect from reportengine.table import table @@ -24,7 +23,6 @@ from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.covmats_utils import construct_covmat, systematics_matrix from validphys.results import ThPredictionsResult - from validphys.commondata import loaded_commondata_with_cuts log = logging.getLogger(__name__) From 89ac78f5bf0a3c9c25b14673a6bd89f478830ceb Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 5 Apr 2022 14:09:02 +0200 Subject: [PATCH 50/64] Added produce action in config --- n3fit/src/n3fit/scripts/n3fit_exec.py | 8 +- validphys2/src/validphys/config.py | 74 ++++++++++++ validphys2/src/validphys/covmats.py | 149 ++++++++++++++++++------- validphys2/src/validphys/n3fit_data.py | 4 +- 4 files changed, 183 insertions(+), 52 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 4da1f2db0d..c57445e627 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -147,19 +147,13 @@ def from_yaml(cls, o, *args, **kwargs): N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) #Theorycovmat flags and defaults N3FIT_FIXED_CONFIG['theory_covmat_flag'] = False - N3FIT_FIXED_CONFIG['use_user_uncertainties'] = None - N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = None - N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = None - N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = None if (thconfig:=file_content.get('theorycovmatconfig')) is not None: N3FIT_FIXED_CONFIG['theory_covmat_flag'] = True N3FIT_FIXED_CONFIG['use_thcovmat_in_fitting'] = thconfig.get('use_thcovmat_in_fitting', True) N3FIT_FIXED_CONFIG['use_thcovmat_in_sampling'] = thconfig.get('use_thcovmat_in_sampling', True) N3FIT_FIXED_CONFIG['use_user_uncertainties'] = thconfig.get('use_user_uncertainties', False) N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get('use_scalevar_uncertainties', True) - #Sampling flags and defaults - N3FIT_FIXED_CONFIG['use_t0_sampling'] = False - N3FIT_FIXED_CONFIG['separate_multiplicative'] = True + #Sampling flags if (sam_t0:=file_content.get('sampling')) is not None: N3FIT_FIXED_CONFIG['use_t0_sampling'] = sam_t0.get('use_t0', False) N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get('separate_multiplicative', True) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 145cd7df3d..33d3d42656 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -55,6 +55,9 @@ import validphys.scalevariations +import numpy as np +import pandas as pd +import pathlib log = logging.getLogger(__name__) @@ -692,6 +695,77 @@ def produce_experiment_from_input( fit=fit, ) } + @configparser.explicit_node + def produce_dataset_inputs_fitting_covmat(self,theory_covmat_flag=False, + use_thcovmat_in_fitting=None, + use_t0_fitting=True, + ): + from validphys import covmats + if use_t0_fitting: + if theory_covmat_flag and use_thcovmat_in_fitting: + return covmats.dataset_inputs_t0_total_covmat + else: + return covmats.dataset_inputs_t0_exp_covmat + else: + if theory_covmat_flag and use_thcovmat_in_fitting: + return covmats.dataset_inputs_total_covmat + else: + return covmats.dataset_inputs_exp_covmat + @configparser.explicit_node + def produce_dataset_inputs_sampling_covmat(self,theory_covmat_flag=False, + use_thcovmat_in_sampling=None, + use_t0_sampling=True, + separate_multiplicative=True + ): + from validphys import covmats + if use_t0_sampling: + if theory_covmat_flag and use_thcovmat_in_sampling: + if separate_multiplicative: + return covmats.dataset_inputs_t0_total_covmat_separate + else: + return covmats.dataset_inputs_t0_total_covmat + else: + if separate_multiplicative: + return covmats.dataset_inputs_t0_exp_covmat_separate + else: + return covmats.dataset_inputs_t0_exp_covmat + else: + if theory_covmat_flag and use_thcovmat_in_sampling: + if separate_multiplicative: + return covmats.dataset_inputs_total_covmat_separate + else: + return covmats.dataset_inputs_total_covmat + else: + if separate_multiplicative: + return covmats.dataset_inputs_exp_covmat_separate + else: + return covmats.dataset_inputs_exp_covmat + + def produce_loaded_theory_covmat(self,output_path, + data_input, + theory_covmat_flag=False, + use_user_uncertainties=False, + use_scalevar_uncertainties=True + ): + if theory_covmat_flag is False: + return np.array([]) + #Load correct file according to how the thcovmat was generated by vp-setupfit + generic_path = None + if use_user_uncertainties is True: + if use_scalevar_uncertainties is True: + generic_path = "datacuts_theory_theorycovmatconfig_total_theory_covmat.csv" + else: + generic_path = "datacuts_theory_theorycovmatconfig_user_covmat.csv" + else: + generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" + + theorypath = output_path/"tables"/generic_path + theory_covmat = pd.read_csv(theorypath, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python").fillna(0) + #change ordering according to exp_covmat (so according to runcard order) + tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) + bb = [str(i) for i in data_input] + return tmp.reindex(index=bb, columns=bb, level=0).values + @configparser.explicit_node def produce_covmat_t0_considered(self, use_t0: bool = False): diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 61c90cd580..4156fe5b38 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -191,7 +191,6 @@ def dataset_inputs_covmat_from_systematics( special_corrs = [] block_diags = [] weights = [] - if _list_of_central_values is None: # want to just pass None to systematic_errors method _list_of_central_values = [None] * len(dataset_inputs_loaded_cd_with_cuts) @@ -344,57 +343,77 @@ def dataset_inputs_t0_covmat_from_systematics( _list_of_central_values=dataset_inputs_t0_predictions ) -def loaded_theory_covmat(output_path, + +def dataset_inputs_t0_total_covmat_separate(dataset_inputs_loaded_cd_with_cuts, + *, data_input, - theory_covmat_flag, - use_user_uncertainties, - use_scalevar_uncertainties + use_weights_in_covmat=True, + norm_threshold=None, + dataset_inputs_t0_predictions, + loaded_theory_covmat, ): - if theory_covmat_flag is False: - return np.array([]) - #Load correct file according to how the thcovmat was generated by vp-setupfit - generic_path = None - if use_user_uncertainties is True: - if use_scalevar_uncertainties is True: - generic_path = "datacuts_theory_theorycovmatconfig_total_theory_covmat.csv" - else: - generic_path = "datacuts_theory_theorycovmatconfig_user_covmat.csv" - else: - generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" - - theorypath = output_path/"tables"/generic_path - theory_covmat = pd.read_csv(theorypath, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python").fillna(0) - #change ordering according to exp_covmat (so according to runcard order) - tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) - bb = [str(i) for i in data_input] - return tmp.reindex(index=bb, columns=bb, level=0).values + """ + Function to compute the total covmat to be used for the chi2 by fitting_data_dict. + Depending on the theory_covmat_flag value, the theory covmat will be added to the + experimental covmat or not. With the use_t0_fitting flag it is possible to choose + if using the t0 prescription in the covmat computation. In order to compute the covmat + only using the additive errors, you can set only_add to True. + """ + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions , True) + covmat += loaded_theory_covmat + return covmat -#Function called by make_replica -def dataset_inputs_sampling_covmat(dataset_inputs_loaded_cd_with_cuts, +def dataset_inputs_t0_exp_covmat_separate(dataset_inputs_loaded_cd_with_cuts, *, data_input, use_weights_in_covmat=True, norm_threshold=None, dataset_inputs_t0_predictions, + ): + """ + Function to compute the total covmat to be used for the chi2 by fitting_data_dict. + Depending on the theory_covmat_flag value, the theory covmat will be added to the + experimental covmat or not. With the use_t0_fitting flag it is possible to choose + if using the t0 prescription in the covmat computation. In order to compute the covmat + only using the additive errors, you can set only_add to True. + """ + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions , True) + return covmat + +def dataset_inputs_total_covmat_separate(dataset_inputs_loaded_cd_with_cuts, + *, + data_input, + use_weights_in_covmat=True, + norm_threshold=None, loaded_theory_covmat, - theory_covmat_flag, - use_thcovmat_in_sampling, - use_t0_sampling, - separate_multiplicative, ): """ - Function to compute the total covmat to be used to generate replicas by make_replica. + Function to compute the total covmat to be used for the chi2 by fitting_data_dict. Depending on the theory_covmat_flag value, the theory covmat will be added to the - experimental covmat or not. With the use_t0_sampling flag it is possible to choose + experimental covmat or not. With the use_t0_fitting flag it is possible to choose if using the t0 prescription in the covmat computation. In order to compute the covmat - only using the additive errors, you can set separate_multiplicative to True. + only using the additive errors, you can set only_add to True. """ - covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions if use_t0_sampling is True else None, separate_multiplicative ) - if theory_covmat_flag and use_thcovmat_in_sampling: - covmat += loaded_theory_covmat + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, None , True) + covmat += loaded_theory_covmat + return covmat + +def dataset_inputs_exp_covmat_separate(dataset_inputs_loaded_cd_with_cuts, + *, + data_input, + use_weights_in_covmat=True, + norm_threshold=None, + ): + """ + Function to compute the total covmat to be used for the chi2 by fitting_data_dict. + Depending on the theory_covmat_flag value, the theory covmat will be added to the + experimental covmat or not. With the use_t0_fitting flag it is possible to choose + if using the t0 prescription in the covmat computation. In order to compute the covmat + only using the additive errors, you can set only_add to True. + """ + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, None , True) return covmat -#Function called by n3fit_data def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, *, data_input, @@ -402,10 +421,6 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, norm_threshold=None, dataset_inputs_t0_predictions, loaded_theory_covmat, - theory_covmat_flag, - use_thcovmat_in_fitting, - use_t0_fitting, - only_add=False, ): """ Function to compute the total covmat to be used for the chi2 by fitting_data_dict. @@ -414,9 +429,57 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, if using the t0 prescription in the covmat computation. In order to compute the covmat only using the additive errors, you can set only_add to True. """ - covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions if use_t0_fitting is True else None, only_add) - if theory_covmat_flag and use_thcovmat_in_fitting: - covmat += loaded_theory_covmat + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions , False) + covmat += loaded_theory_covmat + return covmat +def dataset_inputs_t0_exp_covmat(dataset_inputs_loaded_cd_with_cuts, + *, + data_input, + use_weights_in_covmat=True, + norm_threshold=None, + dataset_inputs_t0_predictions, + ): + """ + Function to compute the total covmat to be used for the chi2 by fitting_data_dict. + Depending on the theory_covmat_flag value, the theory covmat will be added to the + experimental covmat or not. With the use_t0_fitting flag it is possible to choose + if using the t0 prescription in the covmat computation. In order to compute the covmat + only using the additive errors, you can set only_add to True. + """ + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions , False) + return covmat + +def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, + *, + data_input, + use_weights_in_covmat=True, + norm_threshold=None, + loaded_theory_covmat, + ): + """ + Function to compute the total covmat to be used for the chi2 by fitting_data_dict. + Depending on the theory_covmat_flag value, the theory covmat will be added to the + experimental covmat or not. With the use_t0_fitting flag it is possible to choose + if using the t0 prescription in the covmat computation. In order to compute the covmat + only using the additive errors, you can set only_add to True. + """ + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, None , False) + covmat += loaded_theory_covmat + return covmat +def dataset_inputs_exp_covmat(dataset_inputs_loaded_cd_with_cuts, + *, + data_input, + use_weights_in_covmat=True, + norm_threshold=None, + ): + """ + Function to compute the total covmat to be used for the chi2 by fitting_data_dict. + Depending on the theory_covmat_flag value, the theory covmat will be added to the + experimental covmat or not. With the use_t0_fitting flag it is possible to choose + if using the t0 prescription in the covmat computation. In order to compute the covmat + only using the additive errors, you can set only_add to True. + """ + covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, None , False) return covmat def generate_exp_covmat(datasets_input, data, use_weights, norm_thre, _list_of_c_values, only_add): diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 9c1d23ae7d..4883852bdf 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -188,7 +188,7 @@ def _mask_fk_tables(dataset_dicts, tr_masks): def fitting_data_dict( data, make_replica, - dataset_inputs_t0_total_covmat, + dataset_inputs_fitting_covmat, tr_masks, kfold_masks, diagonal_basis=None, @@ -243,7 +243,7 @@ def fitting_data_dict( datasets = common_data_reader_experiment(spec_c, data) # t0 covmat - covmat = dataset_inputs_t0_total_covmat + covmat = dataset_inputs_fitting_covmat inv_true = np.linalg.inv(covmat) if diagonal_basis: From 793de60ff2bafadd07440250ee6cf875053c05eb Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 5 Apr 2022 15:33:24 +0200 Subject: [PATCH 51/64] Fixed tests --- validphys2/src/validphys/config.py | 30 +++++++++------ validphys2/src/validphys/pseudodata.py | 8 ++-- .../src/validphys/tests/test_pseudodata.py | 38 +++---------------- .../validphys/tests/test_pythonmakereplica.py | 33 ---------------- .../src/validphys/tests/test_regressions.py | 17 +-------- 5 files changed, 29 insertions(+), 97 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 33d3d42656..4a69d1316a 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -695,9 +695,15 @@ def produce_experiment_from_input( fit=fit, ) } + + def produce_sep_mult(self,separate_multiplicative=None): + if separate_multiplicative is False: + return False + return True + @configparser.explicit_node def produce_dataset_inputs_fitting_covmat(self,theory_covmat_flag=False, - use_thcovmat_in_fitting=None, + use_thcovmat_in_fitting=False, use_t0_fitting=True, ): from validphys import covmats @@ -711,32 +717,32 @@ def produce_dataset_inputs_fitting_covmat(self,theory_covmat_flag=False, return covmats.dataset_inputs_total_covmat else: return covmats.dataset_inputs_exp_covmat + @configparser.explicit_node - def produce_dataset_inputs_sampling_covmat(self,theory_covmat_flag=False, - use_thcovmat_in_sampling=None, - use_t0_sampling=True, - separate_multiplicative=True + def produce_dataset_inputs_sampling_covmat(self,sep_mult,theory_covmat_flag=False, + use_thcovmat_in_sampling=False, + use_t0_sampling=False, ): from validphys import covmats if use_t0_sampling: if theory_covmat_flag and use_thcovmat_in_sampling: - if separate_multiplicative: + if sep_mult: return covmats.dataset_inputs_t0_total_covmat_separate else: return covmats.dataset_inputs_t0_total_covmat else: - if separate_multiplicative: + if sep_mult: return covmats.dataset_inputs_t0_exp_covmat_separate else: return covmats.dataset_inputs_t0_exp_covmat else: if theory_covmat_flag and use_thcovmat_in_sampling: - if separate_multiplicative: + if sep_mult: return covmats.dataset_inputs_total_covmat_separate else: return covmats.dataset_inputs_total_covmat else: - if separate_multiplicative: + if sep_mult: return covmats.dataset_inputs_exp_covmat_separate else: return covmats.dataset_inputs_exp_covmat @@ -985,13 +991,13 @@ def parse_use_t0(self, do_use_t0: bool): return do_use_t0 # TODO: Find a good name for this - def produce_t0set(self, use_t0=False, t0pdfset=None): + def produce_t0set(self, t0pdfset=None, use_t0_sampling=False, use_t0_fitting=True, ): """Return the t0set if use_t0 is True and None otherwise. Raises an error if t0 is requested but no t0set is given. """ - if use_t0: + if use_t0_sampling or use_t0_fitting: if not t0pdfset: - raise ConfigError("Setting use_t0 requires specifying a valid t0pdfset") + raise ConfigError("Setting use_t0_* requires specifying a valid t0pdfset") return t0pdfset return None diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 7cc72d724c..8885de78fc 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -101,7 +101,7 @@ def read_replica_pseudodata(fit, context_index, replica): return DataTrValSpec(pseudodata.drop("type", axis=1), tr.index, val.index) -def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_sampling_covmat, separate_multiplicative, genrep=True, ): +def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_sampling_covmat, sep_mult, genrep=True, ): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for possible correlations between systematic uncertainties. @@ -173,7 +173,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat pseudodatas.append(pseudodata) #Separation of multiplicative errors. If separate_multiplicative is True also the exp_covmat is produced # without multiplicative errors - if separate_multiplicative: + if sep_mult: mult_errors = cd.multiplicative_errors mult_uncorr_errors = mult_errors.loc[:, mult_errors.columns == "UNCORR"].to_numpy() mult_corr_errors = mult_errors.loc[:, mult_errors.columns == "CORR"].to_numpy() @@ -186,7 +186,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat else: check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) #concatenating special multiplicative errors, pseudodatas and positive mask - if separate_multiplicative: + if sep_mult: special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() all_pseudodata = np.concatenate(pseudodatas, axis=0) full_mask=np.concatenate(check_positive_masks, axis=0) @@ -210,7 +210,7 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat #Additive shifts (if separate_multiplicative is True) or total shifts (if separate_multiplicative is False) shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) mult_part = 1. - if separate_multiplicative: + if sep_mult: special_mult = (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) mult_part = np.concatenate(mult_shifts, axis=0)*special_mult #Shifting pseudodata diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index cfffc124c4..ba79356b97 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -38,17 +38,7 @@ def test_read_pdf_pseudodata(): def test_recreate_fit_pseudodata(): - config = {} - config["use_t0_sampling"] = False - config["separate_multiplicative"]= True - config["output_path"] = None - config["theory_covmat_flag"] = False - config["use_user_uncertainties"] = None - config["use_scalevar_uncertainties"] = None - config["use_thcovmat_in_sampling"]=None - config["dataset_t0_predictions"] = None - - fit_pseudodata = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT, **config) + fit_pseudodata = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT) nrep = API.num_fitted_replicas(fit=PSEUDODATA_FIT) assert nrep == len(fit_pseudodata) @@ -58,17 +48,9 @@ def test_recreate_fit_pseudodata(): def test_recreate_pdf_pseudodata(): - config = {} - config["use_t0_sampling"] = False - config["separate_multiplicative"]= True - config["output_path"] = None - config["theory_covmat_flag"] = False - config["use_user_uncertainties"] = None - config["use_scalevar_uncertainties"] = None - config["use_thcovmat_in_sampling"]=None - config["dataset_t0_predictions"] = None - - pdf_pseudodata = API.recreate_pdf_pseudodata(fit=PSEUDODATA_FIT, **config) + + + pdf_pseudodata = API.recreate_pdf_pseudodata(fit=PSEUDODATA_FIT) pdf = API.pdf(pdf=PSEUDODATA_FIT) # -1 because we ignore replica 0 @@ -89,18 +71,10 @@ def test_no_savepseudodata(): def test_read_matches_recreate(): - config = {} - config["use_t0_sampling"] = False - config["separate_multiplicative"]= True - config["output_path"] = None - config["theory_covmat_flag"] = False - config["use_user_uncertainties"] = None - config["use_scalevar_uncertainties"] = None - config["use_thcovmat_in_sampling"]=None - config["dataset_t0_predictions"] = None + reads = API.read_fit_pseudodata(fit=PSEUDODATA_FIT) - recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT, **config) + recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT,) for read, recreate in zip(reads, recreates): # We ignore the absolute ordering of the dataframes and just check # that they contain identical elements. diff --git a/validphys2/src/validphys/tests/test_pythonmakereplica.py b/validphys2/src/validphys/tests/test_pythonmakereplica.py index 649cb94870..5d4f3cd576 100644 --- a/validphys2/src/validphys/tests/test_pythonmakereplica.py +++ b/validphys2/src/validphys/tests/test_pythonmakereplica.py @@ -12,7 +12,6 @@ from validphys.api import API from validphys.pseudodata import make_replica -from validphys.covmats import dataset_inputs_sampling_covmat from validphys.tests.conftest import DATA from validphys.tests.test_covmats import CORR_DATA @@ -40,14 +39,6 @@ def test_commondata_unchanged(data_config, dataset_inputs, use_cuts): config["dataset_inputs"] = dataset_inputs config["use_cuts"] = use_cuts config["replica_mcseed"] = SEED - config["use_t0_sampling"] = False - config["separate_multiplicative"]= True - config["output_path"] = None - config["theory_covmat_flag"] = False - config["use_user_uncertainties"] = None - config["use_scalevar_uncertainties"] = None - config["use_thcovmat_in_sampling"]=None - config["dataset_t0_predictions"] = None ld_cds = API.dataset_inputs_loaded_cd_with_cuts(**config) # keep a copy of all dataframes/series pre make replica @@ -81,14 +72,6 @@ def test_pseudodata_seeding(data_config, dataset_inputs, use_cuts): config["dataset_inputs"] = dataset_inputs config["use_cuts"] = use_cuts config["replica_mcseed"] = SEED - config["use_t0_sampling"] = False - config["separate_multiplicative"]= True - config["output_path"] = None - config["theory_covmat_flag"] = False - config["use_user_uncertainties"] = None - config["use_scalevar_uncertainties"] = None - config["use_thcovmat_in_sampling"]=None - config["dataset_t0_predictions"] = None rep_1 = API.make_replica(**config) rep_2 = API.make_replica(**config) np.testing.assert_allclose(rep_1, rep_2) @@ -102,14 +85,6 @@ def test_pseudodata_has_correct_ndata(data_config, dataset_inputs, use_cuts): config["dataset_inputs"] = dataset_inputs config["use_cuts"] = use_cuts config["replica_mcseed"] = SEED - config["use_t0_sampling"] = False - config["separate_multiplicative"]= True - config["output_path"] = None - config["theory_covmat_flag"] = False - config["use_user_uncertainties"] = None - config["use_scalevar_uncertainties"] = None - config["use_thcovmat_in_sampling"]=None - config["dataset_t0_predictions"] = None ld_cds = API.dataset_inputs_loaded_cd_with_cuts(**config) rep = API.make_replica(**config) ndata = np.sum([cd.ndata for cd in ld_cds]) @@ -124,14 +99,6 @@ def test_genrep_off(data_config, dataset_inputs, use_cuts): config["dataset_inputs"] = dataset_inputs config["use_cuts"] = use_cuts config["replica_mcseed"] = SEED - config["use_t0_sampling"] = False - config["separate_multiplicative"]= True - config["output_path"] = None - config["theory_covmat_flag"] = False - config["use_user_uncertainties"] = None - config["use_scalevar_uncertainties"] = None - config["use_thcovmat_in_sampling"]=None - config["dataset_t0_predictions"] = None config["genrep"] = False ld_cds = API.dataset_inputs_loaded_cd_with_cuts(**config) not_replica = API.make_replica(**config) diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py index 0a8b26e01e..ba67486337 100644 --- a/validphys2/src/validphys/tests/test_regressions.py +++ b/validphys2/src/validphys/tests/test_regressions.py @@ -57,14 +57,6 @@ def test_mcreplica(data_config): #### config = dict(data_config) config["dataset_inputs"] = CORR_DATA - config["use_t0_sampling"] = False - config["separate_multiplicative"]= True - config["output_path"] = None - config["theory_covmat_flag"] = False - config["use_user_uncertainties"] = None - config["use_scalevar_uncertainties"] = None - config["use_thcovmat_in_sampling"]=None - config["dataset_t0_predictions"] = None seed = 123456 # Use no cuts because if filter rules change in the # future then this test will end up failing @@ -153,13 +145,6 @@ def test_art_rep_generation(data_config): config["mcseed"] = 123456 config["genrep"] = True config["nreplica"] = 1 - config["use_t0_sampling"] = False - config["separate_multiplicative"]= True - config["output_path"] = None - config["theory_covmat_flag"] = False - config["use_user_uncertainties"] = None - config["use_scalevar_uncertainties"] = None - config["use_thcovmat_in_sampling"]=None - config["dataset_t0_predictions"] = None + _, art_replicas, _,_ = API.art_rep_generation(**config) return pd.DataFrame(art_replicas.T, columns=['rep0']) From fce0ad872ff8b9fedec8ec77a4886cc59e0fb4fc Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 5 Apr 2022 17:45:09 +0200 Subject: [PATCH 52/64] Added datasets to pythonmakereplica tests --- validphys2/src/validphys/tests/test_pythonmakereplica.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/tests/test_pythonmakereplica.py b/validphys2/src/validphys/tests/test_pythonmakereplica.py index 5d4f3cd576..7a657091f6 100644 --- a/validphys2/src/validphys/tests/test_pythonmakereplica.py +++ b/validphys2/src/validphys/tests/test_pythonmakereplica.py @@ -23,7 +23,11 @@ {"dataset": "D0ZRAP", "cfac": ["QCD"]}, {"dataset": "NMC"}, {"dataset": "NMCPD"}, - {"dataset": "ATLASZPT8TEVMDIST", "cfac": ["QCD"], } + {"dataset": "ATLASZPT8TEVMDIST", "cfac": ["QCD"]}, + {"dataset": "ATLASWZRAP36PB"}, + {"dataset": "ATLASZHIGHMASS49FB"}, + {"dataset": "CMSWEASY840PB"}, + {"dataset": "CMSWMASY47FB"} ] From 900732293648240aed0ed2d783c7feb9de64fd34 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Wed, 6 Apr 2022 11:16:33 +0200 Subject: [PATCH 53/64] Minor changes and docs --- validphys2/src/validphys/config.py | 73 +++++++--- validphys2/src/validphys/covmats.py | 127 ++++++++---------- validphys2/src/validphys/pseudodata.py | 7 +- .../src/validphys/tests/test_pseudodata.py | 3 +- .../validphys/tests/test_pythonmakereplica.py | 1 - .../src/validphys/tests/test_regressions.py | 5 +- 6 files changed, 112 insertions(+), 104 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 4a69d1316a..ed032111a9 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -6,6 +6,8 @@ """ import logging import pathlib +import numpy as np +import pandas as pd import functools import inspect import numbers @@ -55,9 +57,6 @@ import validphys.scalevariations -import numpy as np -import pandas as pd -import pathlib log = logging.getLogger(__name__) @@ -696,16 +695,28 @@ def produce_experiment_from_input( ) } - def produce_sep_mult(self,separate_multiplicative=None): + def produce_sep_mult(self, separate_multiplicative=None): + """ + Specifies whether to separate the multiplicative errors in the + experimental covmat construction. The default is True. + """ if separate_multiplicative is False: return False return True - @configparser.explicit_node - def produce_dataset_inputs_fitting_covmat(self,theory_covmat_flag=False, - use_thcovmat_in_fitting=False, - use_t0_fitting=True, + @configparser.explicit_node + def produce_dataset_inputs_fitting_covmat( + self, + theory_covmat_flag=False, + use_thcovmat_in_fitting=False, + use_t0_fitting=True, ): + """ + Produces the correct covmat to be used in fitting_data_dict according + to some options: whether to include the theory covmat, whether to + separate the multiplcative errors and whether to compute the + experimental covmat using the t0 prescription. + """ from validphys import covmats if use_t0_fitting: if theory_covmat_flag and use_thcovmat_in_fitting: @@ -719,10 +730,19 @@ def produce_dataset_inputs_fitting_covmat(self,theory_covmat_flag=False, return covmats.dataset_inputs_exp_covmat @configparser.explicit_node - def produce_dataset_inputs_sampling_covmat(self,sep_mult,theory_covmat_flag=False, - use_thcovmat_in_sampling=False, - use_t0_sampling=False, + def produce_dataset_inputs_sampling_covmat( + self, + sep_mult, + theory_covmat_flag=False, + use_thcovmat_in_sampling=False, + use_t0_sampling=False, ): + """ + Produces the correct covmat to be used in make_replica according + to some options: whether to include the theory covmat, whether to + separate the multiplcative errors and whether to compute the + experimental covmat using the t0 prescription. + """ from validphys import covmats if use_t0_sampling: if theory_covmat_flag and use_thcovmat_in_sampling: @@ -747,24 +767,28 @@ def produce_dataset_inputs_sampling_covmat(self,sep_mult,theory_covmat_flag=Fals else: return covmats.dataset_inputs_exp_covmat - def produce_loaded_theory_covmat(self,output_path, - data_input, - theory_covmat_flag=False, - use_user_uncertainties=False, - use_scalevar_uncertainties=True + def produce_loaded_theory_covmat( + self, + output_path, + data_input, + theory_covmat_flag=False, + use_user_uncertainties=False, + use_scalevar_uncertainties=True ): + """ + Loads the theory covmat from the correct file according to how it + was generated by vp-setupfit. + """ if theory_covmat_flag is False: return np.array([]) #Load correct file according to how the thcovmat was generated by vp-setupfit - generic_path = None + generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" if use_user_uncertainties is True: if use_scalevar_uncertainties is True: generic_path = "datacuts_theory_theorycovmatconfig_total_theory_covmat.csv" else: generic_path = "datacuts_theory_theorycovmatconfig_user_covmat.csv" - else: - generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" - + theorypath = output_path/"tables"/generic_path theory_covmat = pd.read_csv(theorypath, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python").fillna(0) #change ordering according to exp_covmat (so according to runcard order) @@ -997,7 +1021,7 @@ def produce_t0set(self, t0pdfset=None, use_t0_sampling=False, use_t0_fitting=Tru """ if use_t0_sampling or use_t0_fitting: if not t0pdfset: - raise ConfigError("Setting use_t0_* requires specifying a valid t0pdfset") + raise ConfigError("Setting use_t0 requires specifying a valid t0pdfset") return t0pdfset return None @@ -1133,6 +1157,7 @@ def produce_nnfit_theory_covmat( # Only user uncertainties from validphys.theorycovariance.construction import user_covmat_fitting f = user_covmat_fitting + @functools.wraps(f) def res(*args, **kwargs): return f(*args, **kwargs) @@ -1587,7 +1612,11 @@ def produce_group_dataset_inputs_by_metadata( ] def produce_group_dataset_inputs_by_fitting_group(self, data_input, theory_covmat_flag): - if theory_covmat_flag is True: + """ + Groups datasets all together in a group called ALL if the theory covariance matrix + is used in the fit, otherwise it groups them by experiment. + """ + if theory_covmat_flag: return self.produce_group_dataset_inputs_by_metadata(data_input, "ALL") return self.produce_group_dataset_inputs_by_metadata(data_input, "experiment") diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 4156fe5b38..27e391702b 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -344,26 +344,21 @@ def dataset_inputs_t0_covmat_from_systematics( ) -def dataset_inputs_t0_total_covmat_separate(dataset_inputs_loaded_cd_with_cuts, - *, - data_input, - use_weights_in_covmat=True, - norm_threshold=None, - dataset_inputs_t0_predictions, - loaded_theory_covmat, +def dataset_inputs_t0_total_covmat_separate( + dataset_inputs_t0_exp_covmat_separate, + loaded_theory_covmat ): """ - Function to compute the total covmat to be used for the chi2 by fitting_data_dict. - Depending on the theory_covmat_flag value, the theory covmat will be added to the - experimental covmat or not. With the use_t0_fitting flag it is possible to choose - if using the t0 prescription in the covmat computation. In order to compute the covmat - only using the additive errors, you can set only_add to True. + Function to compute the covmat to be used for the sampling by make_replica. + In this case the t0 prescription is used for the experimental covmat and the multiplicative + errors are separated. Moreover, the theory covmat is added to experimental covmat. """ - covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions , True) + covmat = dataset_inputs_t0_exp_covmat_separate covmat += loaded_theory_covmat return covmat -def dataset_inputs_t0_exp_covmat_separate(dataset_inputs_loaded_cd_with_cuts, +def dataset_inputs_t0_exp_covmat_separate( + dataset_inputs_loaded_cd_with_cuts, *, data_input, use_weights_in_covmat=True, @@ -371,68 +366,56 @@ def dataset_inputs_t0_exp_covmat_separate(dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions, ): """ - Function to compute the total covmat to be used for the chi2 by fitting_data_dict. - Depending on the theory_covmat_flag value, the theory covmat will be added to the - experimental covmat or not. With the use_t0_fitting flag it is possible to choose - if using the t0 prescription in the covmat computation. In order to compute the covmat - only using the additive errors, you can set only_add to True. + Function to compute the covmat to be used for the sampling by make_replica. + In this case the t0 prescription is used for the experimental covmat and the multiplicative + errors are separated. """ covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions , True) return covmat -def dataset_inputs_total_covmat_separate(dataset_inputs_loaded_cd_with_cuts, - *, - data_input, - use_weights_in_covmat=True, - norm_threshold=None, +def dataset_inputs_total_covmat_separate( + dataset_inputs_exp_covmat_separate, loaded_theory_covmat, ): """ - Function to compute the total covmat to be used for the chi2 by fitting_data_dict. - Depending on the theory_covmat_flag value, the theory covmat will be added to the - experimental covmat or not. With the use_t0_fitting flag it is possible to choose - if using the t0 prescription in the covmat computation. In order to compute the covmat - only using the additive errors, you can set only_add to True. + Function to compute the covmat to be used for the sampling by make_replica. + In this case the t0 prescription is not used for the experimental covmat and the multiplicative + errors are separated. Moreover, the theory covmat is added to experimental covmat. """ - covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, None , True) + covmat = dataset_inputs_exp_covmat_separate covmat += loaded_theory_covmat return covmat -def dataset_inputs_exp_covmat_separate(dataset_inputs_loaded_cd_with_cuts, +def dataset_inputs_exp_covmat_separate( + dataset_inputs_loaded_cd_with_cuts, *, data_input, use_weights_in_covmat=True, norm_threshold=None, ): """ - Function to compute the total covmat to be used for the chi2 by fitting_data_dict. - Depending on the theory_covmat_flag value, the theory covmat will be added to the - experimental covmat or not. With the use_t0_fitting flag it is possible to choose - if using the t0 prescription in the covmat computation. In order to compute the covmat - only using the additive errors, you can set only_add to True. + Function to compute the covmat to be used for the sampling by make_replica. + In this case the t0 prescription is not used for the experimental covmat and the multiplicative + errors are separated. """ covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, None , True) return covmat -def dataset_inputs_t0_total_covmat(dataset_inputs_loaded_cd_with_cuts, - *, - data_input, - use_weights_in_covmat=True, - norm_threshold=None, - dataset_inputs_t0_predictions, +def dataset_inputs_t0_total_covmat( + dataset_inputs_t0_exp_covmat, loaded_theory_covmat, ): """ - Function to compute the total covmat to be used for the chi2 by fitting_data_dict. - Depending on the theory_covmat_flag value, the theory covmat will be added to the - experimental covmat or not. With the use_t0_fitting flag it is possible to choose - if using the t0 prescription in the covmat computation. In order to compute the covmat - only using the additive errors, you can set only_add to True. + Function to compute the covmat to be used for the sampling by make_replica and for the chi2 + by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat + and the multiplicative errors are included in it. Moreover, the theory covmat is added to experimental covmat. """ - covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions , False) + covmat = dataset_inputs_t0_exp_covmat covmat += loaded_theory_covmat return covmat -def dataset_inputs_t0_exp_covmat(dataset_inputs_loaded_cd_with_cuts, + +def dataset_inputs_t0_exp_covmat( + dataset_inputs_loaded_cd_with_cuts, *, data_input, use_weights_in_covmat=True, @@ -440,51 +423,49 @@ def dataset_inputs_t0_exp_covmat(dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions, ): """ - Function to compute the total covmat to be used for the chi2 by fitting_data_dict. - Depending on the theory_covmat_flag value, the theory covmat will be added to the - experimental covmat or not. With the use_t0_fitting flag it is possible to choose - if using the t0 prescription in the covmat computation. In order to compute the covmat - only using the additive errors, you can set only_add to True. + Function to compute the covmat to be used for the sampling by make_replica and for the chi2 + by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat + and the multiplicative errors are included in it. """ covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, dataset_inputs_t0_predictions , False) return covmat -def dataset_inputs_total_covmat(dataset_inputs_loaded_cd_with_cuts, - *, - data_input, - use_weights_in_covmat=True, - norm_threshold=None, +def dataset_inputs_total_covmat( + dataset_inputs_exp_covmat, loaded_theory_covmat, ): """ - Function to compute the total covmat to be used for the chi2 by fitting_data_dict. - Depending on the theory_covmat_flag value, the theory covmat will be added to the - experimental covmat or not. With the use_t0_fitting flag it is possible to choose - if using the t0 prescription in the covmat computation. In order to compute the covmat - only using the additive errors, you can set only_add to True. + Function to compute the covmat to be used for the sampling by make_replica and for the chi2 + by fitting_data_dict. In this case the t0 prescription is not used for the experimental covmat + and the multiplicative errors are included in it. Moreover, the theory covmat is added to experimental covmat. """ - covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, None , False) + covmat = dataset_inputs_exp_covmat covmat += loaded_theory_covmat return covmat -def dataset_inputs_exp_covmat(dataset_inputs_loaded_cd_with_cuts, + +def dataset_inputs_exp_covmat( + dataset_inputs_loaded_cd_with_cuts, *, data_input, use_weights_in_covmat=True, norm_threshold=None, ): """ - Function to compute the total covmat to be used for the chi2 by fitting_data_dict. - Depending on the theory_covmat_flag value, the theory covmat will be added to the - experimental covmat or not. With the use_t0_fitting flag it is possible to choose - if using the t0 prescription in the covmat computation. In order to compute the covmat - only using the additive errors, you can set only_add to True. + Function to compute the covmat to be used for the sampling by make_replica and for the chi2 + by fitting_data_dict. In this case the t0 prescription is not used for the experimental covmat + and the multiplicative errors are included in it. """ covmat = generate_exp_covmat(dataset_inputs_loaded_cd_with_cuts, data_input, use_weights_in_covmat, norm_threshold, None , False) return covmat -def generate_exp_covmat(datasets_input, data, use_weights, norm_thre, _list_of_c_values, only_add): +def generate_exp_covmat(datasets_input, + data, + use_weights, + norm_thre, + _list_of_c_values, + only_add): """ - Function to generate the experimental covmat eventually using the t0 prescription. It is also + Function to generate the experimental covmat eventually using the t0 prescription. It is also possible to compute it only with the additive errors. Parameters diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 8885de78fc..d5b87fbb7b 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -207,11 +207,14 @@ def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dat mult_shifts.append(mult_shift) - #Additive shifts (if separate_multiplicative is True) or total shifts (if separate_multiplicative is False) + #If sep_mult is true then the multiplicative shifts were not included in the covmat shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) mult_part = 1. if sep_mult: - special_mult = (1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100).prod(axis=1) + special_mult = ( + 1 + special_mult_errors * rng.normal(size=(1, + special_mult_errors.shape[1])) / 100 + ).prod(axis=1) mult_part = np.concatenate(mult_shifts, axis=0)*special_mult #Shifting pseudodata shifted_pseudodata = (all_pseudodata + shifts)*mult_part diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index ba79356b97..b3070382a7 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -72,9 +72,8 @@ def test_no_savepseudodata(): def test_read_matches_recreate(): - reads = API.read_fit_pseudodata(fit=PSEUDODATA_FIT) - recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT,) + recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT) for read, recreate in zip(reads, recreates): # We ignore the absolute ordering of the dataframes and just check # that they contain identical elements. diff --git a/validphys2/src/validphys/tests/test_pythonmakereplica.py b/validphys2/src/validphys/tests/test_pythonmakereplica.py index 7a657091f6..ba001d4d79 100644 --- a/validphys2/src/validphys/tests/test_pythonmakereplica.py +++ b/validphys2/src/validphys/tests/test_pythonmakereplica.py @@ -50,7 +50,6 @@ def test_commondata_unchanged(data_config, dataset_inputs, use_cuts): pre_mkrep_sys_tabs = [deepcopy(cd.systematics_table) for cd in ld_cds] pre_mkrep_cd_tabs = [deepcopy(cd.commondata_table) for cd in ld_cds] - make_replica = API.make_replica(**config) for post_mkrep_cd, pre_mkrep_cv in zip(ld_cds, pre_mkrep_cvs): diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py index ba67486337..88059462c8 100644 --- a/validphys2/src/validphys/tests/test_regressions.py +++ b/validphys2/src/validphys/tests/test_regressions.py @@ -54,7 +54,6 @@ def f_(*args, **kwargs): @make_table_comp(parse_data_cv) def test_mcreplica(data_config): - #### config = dict(data_config) config["dataset_inputs"] = CORR_DATA seed = 123456 @@ -139,12 +138,10 @@ def test_datasetchi2(data_singleexp_witht0_config): @make_table_comp(sane_load) def test_art_rep_generation(data_config): - ### config = dict(data_config) config["dataset_inputs"] = CORR_DATA config["mcseed"] = 123456 config["genrep"] = True config["nreplica"] = 1 - _, art_replicas, _,_ = API.art_rep_generation(**config) - return pd.DataFrame(art_replicas.T, columns=['rep0']) + return pd.DataFrame(art_replicas.T, columns=['rep0']) \ No newline at end of file From 99185dde456bd5958d5556b7cf57f13a359a8a85 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Wed, 6 Apr 2022 12:21:34 +0200 Subject: [PATCH 54/64] Minor changes --- validphys2/src/validphys/config.py | 5 +++-- validphys2/src/validphys/covmats.py | 2 +- validphys2/src/validphys/pseudodata.py | 8 +++++++- validphys2/src/validphys/tests/test_pseudodata.py | 1 - validphys2/src/validphys/tests/test_pythonmakereplica.py | 1 - 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index ed032111a9..598a2a7957 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -6,8 +6,6 @@ """ import logging import pathlib -import numpy as np -import pandas as pd import functools import inspect import numbers @@ -17,6 +15,9 @@ from collections import ChainMap, defaultdict from collections.abc import Mapping, Sequence +import numpy as np +import pandas as pd + from reportengine import configparser from reportengine.environment import Environment, EnvironmentError_ from reportengine.configparser import ( diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 27e391702b..d00d3fc6d8 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -230,7 +230,7 @@ def dataset_inputs_covmat_from_systematics( covmat = regularize_covmat( covmat, norm_threshold=norm_threshold - ) + ) return covmat diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index d5b87fbb7b..afe21aabcb 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -101,7 +101,13 @@ def read_replica_pseudodata(fit, context_index, replica): return DataTrValSpec(pseudodata.drop("type", axis=1), tr.index, val.index) -def make_replica(groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_sampling_covmat, sep_mult, genrep=True, ): +def make_replica( + groups_dataset_inputs_loaded_cd_with_cuts, + replica_mcseed, + dataset_inputs_sampling_covmat, + sep_mult, + genrep=True + ): """Function that takes in a list of :py:class:`validphys.coredata.CommonData` objects and returns a pseudodata replica accounting for possible correlations between systematic uncertainties. diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index b3070382a7..e0bd082f05 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -71,7 +71,6 @@ def test_no_savepseudodata(): def test_read_matches_recreate(): - reads = API.read_fit_pseudodata(fit=PSEUDODATA_FIT) recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT) for read, recreate in zip(reads, recreates): diff --git a/validphys2/src/validphys/tests/test_pythonmakereplica.py b/validphys2/src/validphys/tests/test_pythonmakereplica.py index ba001d4d79..933269716f 100644 --- a/validphys2/src/validphys/tests/test_pythonmakereplica.py +++ b/validphys2/src/validphys/tests/test_pythonmakereplica.py @@ -49,7 +49,6 @@ def test_commondata_unchanged(data_config, dataset_inputs, use_cuts): pre_mkrep_cvs = [deepcopy(cd.central_values) for cd in ld_cds] pre_mkrep_sys_tabs = [deepcopy(cd.systematics_table) for cd in ld_cds] pre_mkrep_cd_tabs = [deepcopy(cd.commondata_table) for cd in ld_cds] - make_replica = API.make_replica(**config) for post_mkrep_cd, pre_mkrep_cv in zip(ld_cds, pre_mkrep_cvs): From 1d7b089b2ca1f8082653ec7885a913a9ab13b265 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Wed, 6 Apr 2022 12:27:41 +0200 Subject: [PATCH 55/64] Restored tests and commondata files to master version --- validphys2/src/validphys/commondata.py | 3 ++- validphys2/src/validphys/tests/test_pseudodata.py | 3 +-- validphys2/src/validphys/tests/test_regressions.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py index 436cd9edbf..6f6445a466 100644 --- a/validphys2/src/validphys/commondata.py +++ b/validphys2/src/validphys/commondata.py @@ -34,4 +34,5 @@ def loaded_commondata_with_cuts(commondata, cuts): groups_dataset_inputs_loaded_cd_with_cuts = collect( "loaded_commondata_with_cuts", ("group_dataset_inputs_by_metadata", "data_input") -) \ No newline at end of file +) + diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index e0bd082f05..413975af7c 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -39,6 +39,7 @@ def test_read_pdf_pseudodata(): def test_recreate_fit_pseudodata(): fit_pseudodata = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT) + nrep = API.num_fitted_replicas(fit=PSEUDODATA_FIT) assert nrep == len(fit_pseudodata) @@ -48,8 +49,6 @@ def test_recreate_fit_pseudodata(): def test_recreate_pdf_pseudodata(): - - pdf_pseudodata = API.recreate_pdf_pseudodata(fit=PSEUDODATA_FIT) pdf = API.pdf(pdf=PSEUDODATA_FIT) diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py index 88059462c8..604b675e8a 100644 --- a/validphys2/src/validphys/tests/test_regressions.py +++ b/validphys2/src/validphys/tests/test_regressions.py @@ -144,4 +144,4 @@ def test_art_rep_generation(data_config): config["genrep"] = True config["nreplica"] = 1 _, art_replicas, _,_ = API.art_rep_generation(**config) - return pd.DataFrame(art_replicas.T, columns=['rep0']) \ No newline at end of file + return pd.DataFrame(art_replicas.T, columns=['rep0']) From 1d68e85dd13d59ca0a7e16524949c221def4404a Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 30 May 2022 16:50:06 +0200 Subject: [PATCH 56/64] Fixing conflict --- n3fit/src/n3fit/scripts/vp_setupfit.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index ea839b40cc..308bd7fa7c 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -153,11 +153,8 @@ def from_yaml(cls, o, *args, **kwargs): else: filter_action = 'datacuts::theory::fitting filter' check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action' - - if kwargs["environment"].legacy: - SETUPFIT_FIXED_CONFIG['actions_'] += [filter_action] - else: - SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action] + + SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action] if file_content.get('theorycovmatconfig') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append( 'datacuts::theory::theorycovmatconfig nnfit_theory_covmat') From a922e26e27ff87df1b10f5292921bfd4f4efa529 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 30 May 2022 16:53:36 +0200 Subject: [PATCH 57/64] fixed conflict again --- n3fit/src/n3fit/scripts/vp_setupfit.py | 1 + 1 file changed, 1 insertion(+) diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 308bd7fa7c..dd8c9f0208 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -155,6 +155,7 @@ def from_yaml(cls, o, *args, **kwargs): check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action' SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action] + if file_content.get('theorycovmatconfig') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append( 'datacuts::theory::theorycovmatconfig nnfit_theory_covmat') From 7d8f4e8f9a4f1787d148a8f891abf59914747a9c Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 30 May 2022 17:36:33 +0200 Subject: [PATCH 58/64] Rerunning the tests --- n3fit/src/n3fit/scripts/vp_setupfit.py | 1 - 1 file changed, 1 deletion(-) diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index dd8c9f0208..f980ee044e 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -153,7 +153,6 @@ def from_yaml(cls, o, *args, **kwargs): else: filter_action = 'datacuts::theory::fitting filter' check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action' - SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action] if file_content.get('theorycovmatconfig') is not None: From d84bb2f70ac7a95bbecd51837922cc0d10df5ea0 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 1 Jul 2022 11:43:00 +0200 Subject: [PATCH 59/64] Some minor corrections --- .../examples}/Fit_with_theory_covmat.yml | 26 +++++-------------- n3fit/src/n3fit/scripts/vp_setupfit.py | 1 - validphys2/src/validphys/config.py | 11 +++++--- validphys2/src/validphys/covmats.py | 8 +++--- validphys2/src/validphys/pseudodata.py | 3 +-- .../validphys/tests/test_pythonmakereplica.py | 4 +-- 6 files changed, 22 insertions(+), 31 deletions(-) rename {validphys2/examples/theory_covariance => n3fit/runcards/examples}/Fit_with_theory_covmat.yml (78%) diff --git a/validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml b/n3fit/runcards/examples/Fit_with_theory_covmat.yml similarity index 78% rename from validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml rename to n3fit/runcards/examples/Fit_with_theory_covmat.yml index 7fc96563f4..ff5915f59e 100644 --- a/validphys2/examples/theory_covariance/Fit_with_theory_covmat.yml +++ b/n3fit/runcards/examples/Fit_with_theory_covmat.yml @@ -1,5 +1,5 @@ # -# Configuration file for NNPDF++ +# Configuration file for n3fit # ########################################################################################## description: "NNPDF4.0 methodology fit with theory covariance matrix (9pt prescription) like NNPDF31_nlo_as_0118_scalecov_9pt" @@ -59,15 +59,6 @@ datacuts: t0pdfset: 190310-tg-nlo-global # PDF set to generate t0 covmat q2min: 13.96 # Q2 minimum w2min: 12.5 # W2 minimum - combocuts: NNPDF31 # NNPDF3.0 final kin. cuts - jetptcut_tev: 0 # jet pt cut for tevatron - jetptcut_lhc: 0 # jet pt cut for lhc - wptcut_lhc: 30.0 # Minimum pT for W pT diff distributions - jetycut_tev: 1e30 # jet rap. cut for tevatron - jetycut_lhc: 1e30 # jet rap. cut for lhc - dymasscut_min: 0 # dy inv.mass. min cut - dymasscut_max: 1e30 # dy inv.mass. max cut - jetcfactcut: 1e30 # jet cfact. cut use_cuts: fromintersection cuts_intersection_spec: - theoryid: 163 @@ -123,15 +114,6 @@ fitting: - {fl: t3, trainable: false, smallx: [-0.3687, 1.459], largex: [1.664, 3.373]} - {fl: t8, trainable: false, smallx: [0.5357, 1.267], largex: [1.433, 2.866]} - {fl: t15, trainable: false, smallx: [1.073, 1.164], largex: [1.503, 3.636]} -# basis: -# - {fl: sng, trainable: false, smallx: [1.121, 1.154], largex: [1.498, 3.138]} -# - {fl: g, trainable: false, smallx: [0.9224, 1.149], largex: [3.266, 6.214]} -# - {fl: v, trainable: false, smallx: [0.5279, 0.8017], largex: [1.6, 3.588]} -# - {fl: v3, trainable: false, smallx: [0.2011, 0.4374], largex: [1.761, 3.427]} -# - {fl: v8, trainable: false, smallx: [0.5775, 0.8357], largex: [1.589, 3.378]} -# - {fl: t3, trainable: false, smallx: [-0.484, 1.0], largex: [1.763, 3.397]} -# - {fl: t8, trainable: false, smallx: [0.6714, 0.9197], largex: [1.572, 3.496]} -# - {fl: t15, trainable: false, smallx: [1.073, 1.164], largex: [1.503, 3.636]} ############################################################ positivity: @@ -144,6 +126,12 @@ positivity: - {dataset: POSDYD, maxlambda: 1e10} - {dataset: POSDYS, maxlambda: 1e10} +############################################################ +integrability: + integdatasets: + - {dataset: INTEGXT8, maxlambda: 1e2} + - {dataset: INTEGXT3, maxlambda: 1e2} + ############################################################ debug: False maxcores: 4 diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 2062f8f103..19722802af 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -154,7 +154,6 @@ def from_yaml(cls, o, *args, **kwargs): filter_action = 'datacuts::theory::fitting filter' check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action' SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action] - if file_content.get('theorycovmatconfig') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append( 'datacuts::theory::theorycovmatconfig nnfit_theory_covmat') diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index b97d1938d7..b4d905ed45 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -788,10 +788,15 @@ def produce_loaded_theory_covmat( if use_scalevar_uncertainties is True: generic_path = "datacuts_theory_theorycovmatconfig_total_theory_covmat.csv" else: - generic_path = "datacuts_theory_theorycovmatconfig_user_covmat.csv" - + generic_path = "datacuts_theory_theorycovmatconfig_user_covmat.csv" theorypath = output_path/"tables"/generic_path - theory_covmat = pd.read_csv(theorypath, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python").fillna(0) + theory_covmat = pd.read_csv( + theorypath, + index_col=[0, 1, 2], + header=[0, 1, 2], + sep="\t|,", + engine="python", + ).fillna(0) #change ordering according to exp_covmat (so according to runcard order) tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) bb = [str(i) for i in data_input] diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 91602df719..3804bbb476 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -230,7 +230,7 @@ def dataset_inputs_covmat_from_systematics( covmat = regularize_covmat( covmat, norm_threshold=norm_threshold - ) + ) return covmat @@ -461,7 +461,7 @@ def dataset_inputs_exp_covmat( def generate_exp_covmat(datasets_input, data, use_weights, - norm_thre, + norm_threshold, _list_of_c_values, only_add): """ @@ -480,7 +480,7 @@ def generate_exp_covmat(datasets_input, the returned covmat will be unmodified. use_weights: bool Whether to weight the covmat, True by default. - norm_thre: number + norm_threshold: number threshold used to regularize covariance matrix _list_of_c_values: None, list[np.array] list of 1-D arrays which contain alternative central values which are @@ -499,7 +499,7 @@ def generate_exp_covmat(datasets_input, datasets_input, data, use_weights, - norm_threshold=norm_thre, + norm_threshold=norm_threshold, _list_of_central_values=_list_of_c_values, _only_additive = only_add ) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index afe21aabcb..dae173d0ed 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -9,7 +9,6 @@ import numpy as np import pandas as pd -from scipy import linalg as lin from validphys.covmats import INTRA_DATASET_SYS_NAME, sqrt_covmat @@ -195,7 +194,7 @@ def make_replica( if sep_mult: special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() all_pseudodata = np.concatenate(pseudodatas, axis=0) - full_mask=np.concatenate(check_positive_masks, axis=0) + full_mask = np.concatenate(check_positive_masks, axis=0) # The inner while True loop is for ensuring a positive definite # pseudodata replica while True: diff --git a/validphys2/src/validphys/tests/test_pythonmakereplica.py b/validphys2/src/validphys/tests/test_pythonmakereplica.py index 933269716f..c23b1f81da 100644 --- a/validphys2/src/validphys/tests/test_pythonmakereplica.py +++ b/validphys2/src/validphys/tests/test_pythonmakereplica.py @@ -11,13 +11,13 @@ import pytest from validphys.api import API -from validphys.pseudodata import make_replica from validphys.tests.conftest import DATA from validphys.tests.test_covmats import CORR_DATA SEED = 123456 +#Datasets to be tested SINGLE_SYS_DATASETS = [ {"dataset": "DYE886R"}, {"dataset": "D0ZRAP", "cfac": ["QCD"]}, @@ -49,7 +49,7 @@ def test_commondata_unchanged(data_config, dataset_inputs, use_cuts): pre_mkrep_cvs = [deepcopy(cd.central_values) for cd in ld_cds] pre_mkrep_sys_tabs = [deepcopy(cd.systematics_table) for cd in ld_cds] pre_mkrep_cd_tabs = [deepcopy(cd.commondata_table) for cd in ld_cds] - make_replica = API.make_replica(**config) + API.make_replica(**config) for post_mkrep_cd, pre_mkrep_cv in zip(ld_cds, pre_mkrep_cvs): assert_series_equal(post_mkrep_cd.central_values, pre_mkrep_cv) From e3d41bedd08a71ac947e244fbb96e6c968b2e6ed Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 1 Jul 2022 12:46:05 +0200 Subject: [PATCH 60/64] Other minor changes --- validphys2/src/validphys/config.py | 3 +-- validphys2/src/validphys/covmats.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index b4d905ed45..2d386ad42f 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -15,7 +15,6 @@ from collections import ChainMap, defaultdict from collections.abc import Mapping, Sequence -import numpy as np import pandas as pd from reportengine import configparser @@ -781,7 +780,7 @@ def produce_loaded_theory_covmat( was generated by vp-setupfit. """ if theory_covmat_flag is False: - return np.array([]) + return 0. #Load correct file according to how the thcovmat was generated by vp-setupfit generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" if use_user_uncertainties is True: diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 3804bbb476..734e9b2e4f 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -23,7 +23,7 @@ from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.covmats_utils import construct_covmat, systematics_matrix from validphys.results import ThPredictionsResult -from validphys.commondata import loaded_commondata_with_cuts + log = logging.getLogger(__name__) INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR") @@ -191,6 +191,7 @@ def dataset_inputs_covmat_from_systematics( special_corrs = [] block_diags = [] weights = [] + if _list_of_central_values is None: # want to just pass None to systematic_errors method _list_of_central_values = [None] * len(dataset_inputs_loaded_cd_with_cuts) From d4b4702ff131f5ea1ad915591a7a69fa729011bb Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 4 Jul 2022 10:37:42 +0200 Subject: [PATCH 61/64] Final fixes --- validphys2/src/validphys/config.py | 3 +++ validphys2/src/validphys/covmats.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 2d386ad42f..318acb5c0f 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -16,6 +16,7 @@ from collections.abc import Mapping, Sequence import pandas as pd +import glob from reportengine import configparser from reportengine.environment import Environment, EnvironmentError_ @@ -781,6 +782,8 @@ def produce_loaded_theory_covmat( """ if theory_covmat_flag is False: return 0. + if len(glob.glob(output_path/"*theory_covmat*")) > 1: + raise ValueError("Too many theory_covmat files in folder tables") #Load correct file according to how the thcovmat was generated by vp-setupfit generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" if use_user_uncertainties is True: diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 734e9b2e4f..da50f3f1df 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -23,6 +23,7 @@ from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.covmats_utils import construct_covmat, systematics_matrix from validphys.results import ThPredictionsResult +from validphys.commondata import loaded_commondata_with_cuts log = logging.getLogger(__name__) @@ -191,7 +192,7 @@ def dataset_inputs_covmat_from_systematics( special_corrs = [] block_diags = [] weights = [] - + if _list_of_central_values is None: # want to just pass None to systematic_errors method _list_of_central_values = [None] * len(dataset_inputs_loaded_cd_with_cuts) From 4d768f01003c76ef2d87db54795ef30e0e159de0 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 4 Jul 2022 11:55:28 +0200 Subject: [PATCH 62/64] Reformatting funcs --- validphys2/src/validphys/covmats.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index da50f3f1df..5d272c514c 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -349,7 +349,7 @@ def dataset_inputs_t0_covmat_from_systematics( def dataset_inputs_t0_total_covmat_separate( dataset_inputs_t0_exp_covmat_separate, loaded_theory_covmat - ): +): """ Function to compute the covmat to be used for the sampling by make_replica. In this case the t0 prescription is used for the experimental covmat and the multiplicative @@ -366,7 +366,7 @@ def dataset_inputs_t0_exp_covmat_separate( use_weights_in_covmat=True, norm_threshold=None, dataset_inputs_t0_predictions, - ): +): """ Function to compute the covmat to be used for the sampling by make_replica. In this case the t0 prescription is used for the experimental covmat and the multiplicative @@ -378,7 +378,7 @@ def dataset_inputs_t0_exp_covmat_separate( def dataset_inputs_total_covmat_separate( dataset_inputs_exp_covmat_separate, loaded_theory_covmat, - ): +): """ Function to compute the covmat to be used for the sampling by make_replica. In this case the t0 prescription is not used for the experimental covmat and the multiplicative @@ -394,7 +394,7 @@ def dataset_inputs_exp_covmat_separate( data_input, use_weights_in_covmat=True, norm_threshold=None, - ): +): """ Function to compute the covmat to be used for the sampling by make_replica. In this case the t0 prescription is not used for the experimental covmat and the multiplicative @@ -406,7 +406,7 @@ def dataset_inputs_exp_covmat_separate( def dataset_inputs_t0_total_covmat( dataset_inputs_t0_exp_covmat, loaded_theory_covmat, - ): +): """ Function to compute the covmat to be used for the sampling by make_replica and for the chi2 by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat @@ -423,7 +423,7 @@ def dataset_inputs_t0_exp_covmat( use_weights_in_covmat=True, norm_threshold=None, dataset_inputs_t0_predictions, - ): +): """ Function to compute the covmat to be used for the sampling by make_replica and for the chi2 by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat @@ -435,7 +435,7 @@ def dataset_inputs_t0_exp_covmat( def dataset_inputs_total_covmat( dataset_inputs_exp_covmat, loaded_theory_covmat, - ): +): """ Function to compute the covmat to be used for the sampling by make_replica and for the chi2 by fitting_data_dict. In this case the t0 prescription is not used for the experimental covmat @@ -451,7 +451,7 @@ def dataset_inputs_exp_covmat( data_input, use_weights_in_covmat=True, norm_threshold=None, - ): +): """ Function to compute the covmat to be used for the sampling by make_replica and for the chi2 by fitting_data_dict. In this case the t0 prescription is not used for the experimental covmat @@ -465,7 +465,8 @@ def generate_exp_covmat(datasets_input, use_weights, norm_threshold, _list_of_c_values, - only_add): + only_add +): """ Function to generate the experimental covmat eventually using the t0 prescription. It is also possible to compute it only with the additive errors. From 1c5205ab245f595ca5fe22f344df04edab42893e Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Wed, 6 Jul 2022 14:02:31 +0200 Subject: [PATCH 63/64] removed file control --- validphys2/src/validphys/config.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 318acb5c0f..a67662e9a7 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -782,8 +782,6 @@ def produce_loaded_theory_covmat( """ if theory_covmat_flag is False: return 0. - if len(glob.glob(output_path/"*theory_covmat*")) > 1: - raise ValueError("Too many theory_covmat files in folder tables") #Load correct file according to how the thcovmat was generated by vp-setupfit generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" if use_user_uncertainties is True: From a10450e7fd2ad8ed39d926845443080cff790303 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Wed, 6 Jul 2022 23:46:04 +0200 Subject: [PATCH 64/64] Implemented check of files --- validphys2/src/validphys/config.py | 202 +++++++++++++++++------------ 1 file changed, 118 insertions(+), 84 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index a67662e9a7..fedfbba707 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -65,13 +65,7 @@ class Environment(Environment): """Container for information to be filled at run time""" def __init__( - self, - *, - this_folder=None, - net=True, - upload=False, - dry=False, - **kwargs, + self, *, this_folder=None, net=True, upload=False, dry=False, **kwargs, ): if this_folder: self.this_folder = pathlib.Path(this_folder) @@ -79,8 +73,10 @@ def __init__( if not net: loader_class = Loader elif dry and net: - log.warning("The --dry flag overrides the --net flag. No resources will be downloaded " - "while executing a dry run") + log.warning( + "The --dry flag overrides the --net flag. No resources will be downloaded " + "while executing a dry run" + ) loader_class = Loader else: loader_class = FallbackLoader @@ -219,15 +215,18 @@ def parse_use_cuts(self, use_cuts: (bool, str)): def produce_replicas(self, nreplica: int): """Produce a replicas array""" - return NSList(range(1, nreplica+1), nskey="replica") + return NSList(range(1, nreplica + 1), nskey="replica") - def produce_inclusive_use_scalevar_uncertainties(self, use_scalevar_uncertainties: bool = False, - point_prescription: (str, None) = None): + def produce_inclusive_use_scalevar_uncertainties( + self, + use_scalevar_uncertainties: bool = False, + point_prescription: (str, None) = None, + ): """Whether to use a scale variation uncertainty theory covmat. Checks whether a point prescription is included in the runcard and if so assumes scale uncertainties are to be used.""" - if ((not use_scalevar_uncertainties) and (point_prescription is not None)): - use_scalevar_uncertainties = True + if (not use_scalevar_uncertainties) and (point_prescription is not None): + use_scalevar_uncertainties = True return use_scalevar_uncertainties # TODO: load fit config from here @@ -245,21 +244,21 @@ def produce_fitreplicas(self, fit): fit replica. """ num_replicas = num_fitted_replicas(fit) - return NSList(range(1, num_replicas + 1), nskey='replica') + return NSList(range(1, num_replicas + 1), nskey="replica") def produce_pdfreplicas(self, fitpdf): """Production rule mapping the ``replica`` key to each postfit replica. """ - pdf = fitpdf['pdf'] + pdf = fitpdf["pdf"] replicas = fitted_replica_indexes(pdf) - return NSList(replicas, nskey='replica') + return NSList(replicas, nskey="replica") def produce_fitcontextwithcuts(self, fit, fitinputcontext): """Like fitinputcontext but setting the cuts policy. """ - theoryid = fitinputcontext['theoryid'] - data_input = fitinputcontext['data_input'] + theoryid = fitinputcontext["theoryid"] + data_input = fitinputcontext["data_input"] return { "dataset_inputs": data_input, @@ -281,13 +280,13 @@ def produce_fitenvironment(self, fit, fitinputcontext): the fit runcard. """ log.warning(f"Using mcseed and trvlseed from fit: {fit}") - theoryid = fitinputcontext['theoryid'] - data_input = fitinputcontext['data_input'] + theoryid = fitinputcontext["theoryid"] + data_input = fitinputcontext["data_input"] runcard = fit.as_input() - trvlseed = runcard['trvlseed'] - mcseed = runcard['mcseed'] - genrep = runcard['genrep'] + trvlseed = runcard["trvlseed"] + mcseed = runcard["mcseed"] + genrep = runcard["genrep"] return { "dataset_inputs": data_input, @@ -308,9 +307,7 @@ def produce_fitinputcontext(self, fit): _, theory = self.parse_from_("fit", "theory", write=False) thid = theory["theoryid"] - data_input = self._parse_data_input_from_( - "fit", {"theoryid": thid} - ) + data_input = self._parse_data_input_from_("fit", {"theoryid": thid}) return {"theoryid": thid, "data_input": data_input} def produce_fitpdf(self, fit): @@ -334,20 +331,24 @@ def parse_hyperscan(self, hyperscan): try: return self.loader.check_hyperscan(hyperscan) except LoadFailedError as e: - raise ConfigError(str(e), hyperscan, self.loader.available_hyperscans) from e + raise ConfigError( + str(e), hyperscan, self.loader.available_hyperscans + ) from e def parse_hyperscan_config(self, hyperscan_config, hyperopt=None): """Configuration of the hyperscan """ if "from_hyperscan" in hyperscan_config: hyperscan = self.parse_hyperscan(hyperscan_config["from_hyperscan"]) - log.info("Using previous hyperscan: '%s' to generate the search space", hyperscan) + log.info( + "Using previous hyperscan: '%s' to generate the search space", hyperscan + ) return hyperscan.as_input().get("hyperscan_config") if "use_tries_from" in hyperscan_config: hyperscan = self.parse_hyperscan(hyperscan_config["use_tries_from"]) log.info("Reusing tries from: %s", hyperscan) - return {"parameters": hyperscan.sample_trials(n = hyperopt)} + return {"parameters": hyperscan.sample_trials(n=hyperopt)} return hyperscan_config @@ -382,7 +383,6 @@ def produce_multiclosure_underlyinglaw(self, fits): ) return self.parse_pdf(laws.pop()) - def produce_basisfromfit(self, fit): """Set the basis from fit config. In the fit config file the basis is set using the key ``fitbasis``, but it is exposed to validphys @@ -397,12 +397,10 @@ def produce_basisfromfit(self, fit): basis = fitting["fitbasis"] return {"basis": basis} - def produce_fitpdfandbasis(self, fitpdf, basisfromfit): """ Set the PDF and basis from the fit config. """ return {**fitpdf, **basisfromfit} - @element_of("dataset_inputs") def parse_dataset_input(self, dataset: Mapping): """The mapping that corresponds to the dataset specifications in the @@ -443,7 +441,7 @@ def parse_dataset_input(self, dataset: Mapping): cfac=cfac, frac=frac, weight=weight, - custom_group=custom_group + custom_group=custom_group, ) def parse_use_fitcommondata(self, do_use: bool): @@ -661,7 +659,6 @@ def parse_experiment(self, experiment: dict): dsinputs = [self.parse_dataset_input(ds) for ds in datasets] - return self.produce_data(group_name=name, data_input=dsinputs) @configparser.element_of("experiment_inputs") @@ -719,6 +716,7 @@ def produce_dataset_inputs_fitting_covmat( experimental covmat using the t0 prescription. """ from validphys import covmats + if use_t0_fitting: if theory_covmat_flag and use_thcovmat_in_fitting: return covmats.dataset_inputs_t0_total_covmat @@ -745,6 +743,7 @@ def produce_dataset_inputs_sampling_covmat( experimental covmat using the t0 prescription. """ from validphys import covmats + if use_t0_sampling: if theory_covmat_flag and use_thcovmat_in_sampling: if sep_mult: @@ -774,22 +773,47 @@ def produce_loaded_theory_covmat( data_input, theory_covmat_flag=False, use_user_uncertainties=False, - use_scalevar_uncertainties=True + use_scalevar_uncertainties=True, ): """ Loads the theory covmat from the correct file according to how it was generated by vp-setupfit. """ if theory_covmat_flag is False: - return 0. - #Load correct file according to how the thcovmat was generated by vp-setupfit + return 0.0 + # Load correct file according to how the thcovmat was generated by vp-setupfit generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" if use_user_uncertainties is True: if use_scalevar_uncertainties is True: - generic_path = "datacuts_theory_theorycovmatconfig_total_theory_covmat.csv" + generic_path = ( + "datacuts_theory_theorycovmatconfig_total_theory_covmat.csv" + ) else: - generic_path = "datacuts_theory_theorycovmatconfig_user_covmat.csv" - theorypath = output_path/"tables"/generic_path + generic_path = "datacuts_theory_theorycovmatconfig_user_covmat.csv" + # check if there are multiple files + files = glob.glob(str(output_path / "tables/*theorycovmat*")) + paths = [ + str( + output_path + / "tables/datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv" + ), + str( + output_path + / "tables/datacuts_theory_theorycovmatconfig_total_theory_covmat.csv" + ), + str( + output_path + / "tables/datacuts_theory_theorycovmatconfig_user_covmat.csv" + ), + ] + paths.remove(str(output_path / "tables" / generic_path)) + for f in files: + for path in paths: + if f == path: + raise ValueError( + "More than one theory_covmat file in folder tables" + ) + theorypath = output_path / "tables" / generic_path theory_covmat = pd.read_csv( theorypath, index_col=[0, 1, 2], @@ -797,12 +821,11 @@ def produce_loaded_theory_covmat( sep="\t|,", engine="python", ).fillna(0) - #change ordering according to exp_covmat (so according to runcard order) + # change ordering according to exp_covmat (so according to runcard order) tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) bb = [str(i) for i in data_input] return tmp.reindex(index=bb, columns=bb, level=0).values - @configparser.explicit_node def produce_covmat_t0_considered(self, use_t0: bool = False): """Modifies which action is used as covariance_matrix depending on @@ -1021,7 +1044,9 @@ def parse_use_t0(self, do_use_t0: bool): return do_use_t0 # TODO: Find a good name for this - def produce_t0set(self, t0pdfset=None, use_t0_sampling=False, use_t0_fitting=True, ): + def produce_t0set( + self, t0pdfset=None, use_t0_sampling=False, use_t0_fitting=True, + ): """Return the t0set if use_t0 is True and None otherwise. Raises an error if t0 is requested but no t0set is given. """ @@ -1035,14 +1060,14 @@ def _parse_lagrange_multiplier(self, kind, theoryid, setdict): """ Lagrange multiplier constraints are mappings containing a `dataset` and a `maxlambda` argument which defines the maximum value allowed for the multiplier """ - bad_msg = ( - f"{kind} must be a mapping with a name ('dataset') and a float multiplier (maxlambda)" - ) + bad_msg = f"{kind} must be a mapping with a name ('dataset') and a float multiplier (maxlambda)" theoryno, _ = theoryid lambda_key = "maxlambda" - #BCH allow for old-style runcards with 'poslambda' instead of 'maxlambda' + # BCH allow for old-style runcards with 'poslambda' instead of 'maxlambda' if "poslambda" in setdict and "maxlambda" not in setdict: - log.warning("The `poslambda` argument has been deprecated in favour of `maxlambda`") + log.warning( + "The `poslambda` argument has been deprecated in favour of `maxlambda`" + ) lambda_key = "poslambda" try: name = setdict["dataset"] @@ -1124,8 +1149,9 @@ def parse_lumi_channel(self, ch: str): def produce_all_lumi_channels(self): return {"lumi_channels": self.parse_lumi_channels(list(LUMI_CHANNELS))} - def produce_loaded_user_covmat_path(self, user_covmat_path: str = "", - use_user_uncertainties: bool = False): + def produce_loaded_user_covmat_path( + self, user_covmat_path: str = "", use_user_uncertainties: bool = False + ): """ Path to the user covmat provided by user_covmat_path in the runcard. If no path is provided, returns None. @@ -1138,14 +1164,13 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = "", fileloc = l.check_vp_output_file(user_covmat_path) return fileloc - @configparser.explicit_node def produce_nnfit_theory_covmat( self, use_thcovmat_in_sampling: bool, use_thcovmat_in_fitting: bool, inclusive_use_scalevar_uncertainties, - use_user_uncertainties: bool = False + use_user_uncertainties: bool = False, ): """ Return the theory covariance matrix used in the fit. @@ -1153,20 +1178,28 @@ def produce_nnfit_theory_covmat( if inclusive_use_scalevar_uncertainties: if use_user_uncertainties: # Both scalevar and user uncertainties - from validphys.theorycovariance.construction import total_theory_covmat_fitting + from validphys.theorycovariance.construction import ( + total_theory_covmat_fitting, + ) + f = total_theory_covmat_fitting - else: + else: # Only scalevar uncertainties - from validphys.theorycovariance.construction import theory_covmat_custom_fitting + from validphys.theorycovariance.construction import ( + theory_covmat_custom_fitting, + ) + f = theory_covmat_custom_fitting elif use_user_uncertainties: # Only user uncertainties from validphys.theorycovariance.construction import user_covmat_fitting + f = user_covmat_fitting @functools.wraps(f) def res(*args, **kwargs): return f(*args, **kwargs) + # Set this to get the same filename regardless of the action. res.__name__ = "theory_covmat" return res @@ -1205,17 +1238,14 @@ def produce_fitthcovmat( if use_thcovmat_if_present and thcovmat_present: # Expected directory of theory covmat hardcoded - covmat_path = ( - fit.path - / "tables" - ) + covmat_path = fit.path / "tables" # All possible valid files covfiles = sorted(covmat_path.glob("*theory_covmat*.csv")) if not covfiles: raise ConfigError( "Fit appeared to use theory covmat in fit but the file was not at the " f"usual location: {covmat_path}." - ) + ) if len(covfiles) > 1: raise ConfigError( "More than one valid theory covmat file found at the " @@ -1447,10 +1477,7 @@ def produce_defaults( return filter_defaults def produce_data( - self, - data_input, - *, - group_name="data", + self, data_input, *, group_name="data", ): """A set of datasets where correlated systematics are taken into account @@ -1495,7 +1522,7 @@ def _parse_data_input_from_( """ with self.set_context(ns=self._curr_ns.new_child(additional_context)): - # new fits have dataset_inputs, old fits have experiments + # new fits have dataset_inputs, old fits have experiments data_key = "dataset_inputs" try: _, data_val = self.parse_from_(parse_from_value, data_key, write=False) @@ -1507,10 +1534,17 @@ def _parse_data_input_from_( ) # We need to make theoryid available if using experiments try: - _, experiments = self.parse_from_(parse_from_value, data_key, write=False) - data_val = NSList([ - dsinput for experiment in experiments for dsinput in experiment.dsinputs - ], nskey='dataset_input') + _, experiments = self.parse_from_( + parse_from_value, data_key, write=False + ) + data_val = NSList( + [ + dsinput + for experiment in experiments + for dsinput in experiment.dsinputs + ], + nskey="dataset_input", + ) except ConfigError as inner_error: log.error(inner_error) raise e from inner_error @@ -1580,7 +1614,6 @@ def produce_processed_metadata_group( return processed_data_grouping return metadata_group - def produce_group_dataset_inputs_by_metadata( self, data_input, processed_metadata_group, ): @@ -1593,7 +1626,7 @@ def produce_group_dataset_inputs_by_metadata( # special case of custom group, take the grouping from the dataset input if processed_metadata_group == "custom_group": group_name = str(dsinput.custom_group) - #special case of ALL, grouping everything together + # special case of ALL, grouping everything together if processed_metadata_group == "ALL": group_name = processed_metadata_group # otherwise try and take the key from the metadata. @@ -1616,8 +1649,10 @@ def produce_group_dataset_inputs_by_metadata( {"data_input": NSList(group, nskey="dataset_input"), "group_name": name} for name, group in res.items() ] - - def produce_group_dataset_inputs_by_fitting_group(self, data_input, theory_covmat_flag): + + def produce_group_dataset_inputs_by_fitting_group( + self, data_input, theory_covmat_flag + ): """ Groups datasets all together in a group called ALL if the theory covariance matrix is used in the fit, otherwise it groups them by experiment. @@ -1631,11 +1666,11 @@ def produce_fivetheories(self, point_prescription): return "bar" elif point_prescription == "5 point": return "nobar" - return None - + return None + def produce_seventheories(self, point_prescription): if point_prescription == "7 point": - #This is None because is the default choice + # This is None because is the default choice return None elif point_prescription == "7original point": return "original" @@ -1645,8 +1680,9 @@ def produce_group_dataset_inputs_by_experiment(self, data_input): return self.produce_group_dataset_inputs_by_metadata(data_input, "experiment") def produce_group_dataset_inputs_by_process(self, data_input): - return self.produce_group_dataset_inputs_by_metadata(data_input, "nnpdf31_process") - + return self.produce_group_dataset_inputs_by_metadata( + data_input, "nnpdf31_process" + ) def produce_scale_variation_theories(self, theoryid, point_prescription): """Produces a list of theoryids given a theoryid at central scales and a point @@ -1718,7 +1754,6 @@ def produce_scale_variation_theories(self, theoryid, point_prescription): # NSList needs to be used for theoryids to be recognised as a namespace return {"theoryids": NSList(theoryids, nskey="theoryid")} - @configparser.explicit_node def produce_filter_data(self, fakedata: bool = False, theorycovmatconfig=None): """Set the action used to filter the data to filter either real or @@ -1726,12 +1761,12 @@ def produce_filter_data(self, fakedata: bool = False, theorycovmatconfig=None): theory covariance matrix is not being closure tested then filter data by experiment for efficiency""" import validphys.filters + if not fakedata: return validphys.filters.filter_real_data else: - if ( - theorycovmatconfig is not None and - theorycovmatconfig.get("use_thcovmat_in_sampling") + if theorycovmatconfig is not None and theorycovmatconfig.get( + "use_thcovmat_in_sampling" ): # NOTE: By the time we run theory covmat closure tests, # hopefully the generation of pseudodata will be done in python. @@ -1764,7 +1799,6 @@ def produce_total_phi_data(self, fitthcovmat): return validphys.results.dataset_inputs_phi_data - class Config(report.Config, CoreConfig, ParamfitsConfig): """The effective configuration parser class."""