From 58942d01151e2761b264b67e24d2fb03288f14de Mon Sep 17 00:00:00 2001 From: Christopher_Madrid Date: Mon, 25 Jan 2021 12:41:56 -0600 Subject: [PATCH 1/5] Moving to uproot4 --- DataGetter.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/DataGetter.py b/DataGetter.py index a1a7957..eb8f41c 100644 --- a/DataGetter.py +++ b/DataGetter.py @@ -1,4 +1,4 @@ -import uproot +import uproot4 as uproot import numpy as np import pandas as pd from glob import glob @@ -38,7 +38,7 @@ def get_data(signalDataSet, backgroundDataSet, config, doBgWeight = False, doSgW trainData[key] = data[key][:minLen] # Randomly shuffle the signal and background - np.random.seed(config["seed"]) + np.random.seed(config["seed"]) perms = np.random.permutation(trainData["data"].shape[0]) for key in trainData: trainData[key] = trainData[key][perms] @@ -92,7 +92,7 @@ def getColumnHeaders(self, samplesToRun, treename): try: sample = samplesToRun[0] f = uproot.open(sample) - self.columnHeaders = f[treename].pandas.df().columns.tolist() + self.columnHeaders = f[treename].arrays(library="pd").columns.tolist() f.close() except IndexError as e: print(e) @@ -111,8 +111,7 @@ def getDataSets(self, samplesToRun, treename): for filename in samplesToRun: try: f = uproot.open(filename) - #dsets.append( f[treename].pandas.df(branches=variables) ) - dsets.append( f[treename].pandas.df() ) + dsets.append( f[treename].arrays(library="pd") ) f.close() except Exception as e: print("Warning: \"%s\" has issues" % filename, e) From 10b5716785103a336d646800f3e2407547446072 Mon Sep 17 00:00:00 2001 From: Christopher_Madrid Date: Mon, 25 Jan 2021 12:42:41 -0600 Subject: [PATCH 2/5] Now using LCG env instead of conda --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index 57ef3ae..5493289 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,36 @@ xrdcp -r root://cmseos.fnal.gov///store/user/cmadrid/trainingTuples/MVA_Training python train.py ``` +### Alternative LPC setup: Python Virtual Environment + +To begin the initial setup, run the following commands: +```bash +cd +git clone git@github.com:StealthStop/DeepESM.git +cd t-channel_Analysis +./setup.sh +``` +Remember to replace `` with the directory where you want your files/folders to appear. You can change the name of the virtual environment by using the `-n` option and you can use the development version of coffea by using the `-d` option. These commands only need to be run during the initial setup. When doing your day-to-day tasks, you can skip these. + +To activate the `coffeaenv` environment and set the Jupyter paths, run the command (every time): +```bash +cd /t-channel_Analysis +source init.sh +``` + +When you are done working and would like to ``de-activate'' the `coffeaenv` environment, run the command: +```bash +deactivate +``` +This shell function was given to you by the virtual environment. + +To remove the virtual environment and the associated files (i.e. inverse of the setup script), you can use the run the following command: +```bash +cd /t-channel_Analysis +./clean.sh +``` +The `clean.sh` script has the same `-n` and `-d` options as in the `setup.sh` script. + ### Plotting Input Variables A plotting script is provided to make pretty plots of NN inputs from the ntuple files. From 26af4438480950c2b1ef27c4bc06694c4d7d57c7 Mon Sep 17 00:00:00 2001 From: Christopher_Madrid Date: Mon, 25 Jan 2021 12:43:33 -0600 Subject: [PATCH 3/5] Adding vm env setup scripts --- clean.sh | 52 +++++++++++++++++++++++++++++++++++++ init.sh | 11 ++++++++ setup.sh | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100755 clean.sh create mode 100755 init.sh create mode 100755 setup.sh diff --git a/clean.sh b/clean.sh new file mode 100755 index 0000000..ad76ca8 --- /dev/null +++ b/clean.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +case `uname` in + Linux) ECHO="echo -e" ;; + *) ECHO="echo" ;; +esac + +usage(){ + EXIT=$1 + $ECHO "clean.sh [options]" + $ECHO + $ECHO "Options:" + $ECHO "-d \tuse the developer branch of Coffea (default = 0)" + $ECHO "-h \tprint this message and exit" + $ECHO "-n [NAME] \toverride the name of the virtual environment (default = coffeaenv)" + exit $EXIT +} + +NAME=coffeaenv +DEV=0 + +# check arguments +while getopts "dhn:" opt; do + case "$opt" in + d) DEV=1 + ;; + h) usage 0 + ;; + n) NAME=$OPTARG + ;; + :) printf "missing argument for -%s\n" "$OPTARG" >&2 + usage -1 + ;; + \?) printf "illegal option: -%s\n" "$OPTARG" >&2 + usage -2 + ;; + esac +done + +$ECHO "Removing the virtual environment ... " +rm -rf ${NAME} ${NAME}.tar.gz + +if [[ "$DEV" == "1" ]]; then + $ECHO "\nRemoving the 'development' version of Coffea ... " + rm -rf coffea +fi + +$ECHO "\nRemoving the ipython/jupyter kernel ... " +storage_dir=$(readlink -f $PWD) +rm -rf ${storage_dir}/.local/share/jupyter/kernels/${NAME} + +$ECHO "\nFINISHED" diff --git a/init.sh b/init.sh new file mode 100755 index 0000000..5ccaea4 --- /dev/null +++ b/init.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# vars for jupyter +storage_dir=$(readlink -f $PWD) +export TCHANNEL_BASE=${storage_dir} +export JUPYTER_PATH=${storage_dir}/.jupyter +export JUPYTER_RUNTIME_DIR=${storage_dir}/.local/share/jupyter/runtime +export JUPYTER_DATA_DIR=${storage_dir}/.local/share/jupyter +export IPYTHONDIR=${storage_dir}/.ipython + +source coffeaenv/bin/activate diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..195ef7d --- /dev/null +++ b/setup.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash + +case `uname` in + Linux) ECHO="echo -e" ;; + *) ECHO="echo" ;; +esac + +usage(){ + EXIT=$1 + $ECHO "setup.sh [options]" + $ECHO + $ECHO "Options:" + $ECHO "-d \tuse the developer branch of Coffea (default = 0)" + $ECHO "-h \tprint this message and exit" + $ECHO "-n [NAME] \toverride the name of the virtual environment (default = coffeaenv)" + exit $EXIT +} + +NAME=coffeaenv +LCG=/cvmfs/sft.cern.ch/lcg/views/LCG_99cuda/x86_64-centos7-gcc8-opt +DEV=0 + +# check arguments +while getopts "dhn:" opt; do + case "$opt" in + d) DEV=1 + ;; + h) usage 0 + ;; + n) NAME=$OPTARG + ;; + :) printf "missing argument for -%s\n" "$OPTARG" >&2 + usage -1 + ;; + \?) printf "illegal option: -%s\n" "$OPTARG" >&2 + usage -2 + ;; + esac +done + +# Setup the LCG environment +$ECHO "Getting the LCG environment ... " +source $LCG/setup.sh + +# Install most of the needed software in a virtual environment +# following https://aarongorka.com/blog/portable-virtualenv/, an alternative is https://github.com/pantsbuild/pex +$ECHO "\nMaking and activiating the virtual environment ... " +python -m venv --copies $NAME +source $NAME/bin/activate +$ECHO "\nInstalling 'pip' packages ... " +python -m pip install --no-cache-dir setuptools pip argparse --upgrade +python -m pip install --no-cache-dir xxhash +python -m pip install --no-cache-dir uproot4 +if [[ "$DEV" == "1" ]]; then + $ECHO "\nInstalling the 'development' version of Coffea ... " + python -m pip install --no-cache-dir flake8 pytest coverage + git clone https://github.com/CoffeaTeam/coffea + cd coffea + python -m pip install --no-cache-dir --editable .[dask,spark,parsl] 'uproot-methods<0.9.0,>=0.7.3' 'pillow>=7.1.0' 'mplhep==0.1.35' + cd .. +else + $ECHO "Installing the 'production' version of Coffea ... " + python -m pip install --no-cache-dir coffea[dask,spark,parsl] 'uproot-methods<0.9.0,>=0.7.3' 'pillow>=7.1.0' 'mplhep==0.1.35' +fi + +# Setup the activation script for the virtual environment +$ECHO "\nSetting up the activation script for the virtual environment ... " +sed -i '40s/.*/VIRTUAL_ENV="$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}" )")" \&\& pwd)"/' $NAME/bin/activate +find coffeaenv/bin/ -type f -print0 | xargs -0 -P 4 sed -i '1s/#!.*python$/#!\/usr\/bin\/env python/' +sed -i "2a source ${LCG}/setup.sh" $NAME/bin/activate +sed -i "4a source ${LCG}/setup.csh" $NAME/bin/activate.csh + +$ECHO "\nSetting up the ipython/jupyter kernel ... " +storage_dir=$(readlink -f $PWD) +ipython kernel install --prefix=${storage_dir}/.local --name=$NAME +tar -zcf ${NAME}.tar.gz ${NAME} + +deactivate +$ECHO "\nFINISHED" From 997a44859c94eb2086e38d4ad4aaef193d359d2b Mon Sep 17 00:00:00 2001 From: Christopher_Madrid Date: Mon, 25 Jan 2021 12:46:02 -0600 Subject: [PATCH 4/5] fixing readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5493289..55ef920 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ To begin the initial setup, run the following commands: ```bash cd git clone git@github.com:StealthStop/DeepESM.git -cd t-channel_Analysis +cd DeepESM ./setup.sh ``` Remember to replace `` with the directory where you want your files/folders to appear. You can change the name of the virtual environment by using the `-n` option and you can use the development version of coffea by using the `-d` option. These commands only need to be run during the initial setup. When doing your day-to-day tasks, you can skip these. From 58d5ec96faca4e70de25822ff3322b458512c144 Mon Sep 17 00:00:00 2001 From: Christopher_Madrid Date: Mon, 25 Jan 2021 12:47:42 -0600 Subject: [PATCH 5/5] really fix readme this time --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 55ef920..54c15d0 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,7 @@ Remember to replace `` with the directory where you want your To activate the `coffeaenv` environment and set the Jupyter paths, run the command (every time): ```bash -cd /t-channel_Analysis +cd /DeepESM source init.sh ``` @@ -171,7 +171,7 @@ This shell function was given to you by the virtual environment. To remove the virtual environment and the associated files (i.e. inverse of the setup script), you can use the run the following command: ```bash -cd /t-channel_Analysis +cd /DeepESM ./clean.sh ``` The `clean.sh` script has the same `-n` and `-d` options as in the `setup.sh` script.