diff --git a/DataGetter.py b/DataGetter.py index a1a7957..eb8f41c 100644 --- a/DataGetter.py +++ b/DataGetter.py @@ -1,4 +1,4 @@ -import uproot +import uproot4 as uproot import numpy as np import pandas as pd from glob import glob @@ -38,7 +38,7 @@ def get_data(signalDataSet, backgroundDataSet, config, doBgWeight = False, doSgW trainData[key] = data[key][:minLen] # Randomly shuffle the signal and background - np.random.seed(config["seed"]) + np.random.seed(config["seed"]) perms = np.random.permutation(trainData["data"].shape[0]) for key in trainData: trainData[key] = trainData[key][perms] @@ -92,7 +92,7 @@ def getColumnHeaders(self, samplesToRun, treename): try: sample = samplesToRun[0] f = uproot.open(sample) - self.columnHeaders = f[treename].pandas.df().columns.tolist() + self.columnHeaders = f[treename].arrays(library="pd").columns.tolist() f.close() except IndexError as e: print(e) @@ -111,8 +111,7 @@ def getDataSets(self, samplesToRun, treename): for filename in samplesToRun: try: f = uproot.open(filename) - #dsets.append( f[treename].pandas.df(branches=variables) ) - dsets.append( f[treename].pandas.df() ) + dsets.append( f[treename].arrays(library="pd") ) f.close() except Exception as e: print("Warning: \"%s\" has issues" % filename, e) diff --git a/README.md b/README.md index 57ef3ae..54c15d0 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,36 @@ xrdcp -r root://cmseos.fnal.gov///store/user/cmadrid/trainingTuples/MVA_Training python train.py ``` +### Alternative LPC setup: Python Virtual Environment + +To begin the initial setup, run the following commands: +```bash +cd +git clone git@github.com:StealthStop/DeepESM.git +cd DeepESM +./setup.sh +``` +Remember to replace `` with the directory where you want your files/folders to appear. You can change the name of the virtual environment by using the `-n` option and you can use the development version of coffea by using the `-d` option. These commands only need to be run during the initial setup. When doing your day-to-day tasks, you can skip these. + +To activate the `coffeaenv` environment and set the Jupyter paths, run the command (every time): +```bash +cd /DeepESM +source init.sh +``` + +When you are done working and would like to ``de-activate'' the `coffeaenv` environment, run the command: +```bash +deactivate +``` +This shell function was given to you by the virtual environment. + +To remove the virtual environment and the associated files (i.e. inverse of the setup script), you can use the run the following command: +```bash +cd /DeepESM +./clean.sh +``` +The `clean.sh` script has the same `-n` and `-d` options as in the `setup.sh` script. + ### Plotting Input Variables A plotting script is provided to make pretty plots of NN inputs from the ntuple files. diff --git a/clean.sh b/clean.sh new file mode 100755 index 0000000..ad76ca8 --- /dev/null +++ b/clean.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +case `uname` in + Linux) ECHO="echo -e" ;; + *) ECHO="echo" ;; +esac + +usage(){ + EXIT=$1 + $ECHO "clean.sh [options]" + $ECHO + $ECHO "Options:" + $ECHO "-d \tuse the developer branch of Coffea (default = 0)" + $ECHO "-h \tprint this message and exit" + $ECHO "-n [NAME] \toverride the name of the virtual environment (default = coffeaenv)" + exit $EXIT +} + +NAME=coffeaenv +DEV=0 + +# check arguments +while getopts "dhn:" opt; do + case "$opt" in + d) DEV=1 + ;; + h) usage 0 + ;; + n) NAME=$OPTARG + ;; + :) printf "missing argument for -%s\n" "$OPTARG" >&2 + usage -1 + ;; + \?) printf "illegal option: -%s\n" "$OPTARG" >&2 + usage -2 + ;; + esac +done + +$ECHO "Removing the virtual environment ... " +rm -rf ${NAME} ${NAME}.tar.gz + +if [[ "$DEV" == "1" ]]; then + $ECHO "\nRemoving the 'development' version of Coffea ... " + rm -rf coffea +fi + +$ECHO "\nRemoving the ipython/jupyter kernel ... " +storage_dir=$(readlink -f $PWD) +rm -rf ${storage_dir}/.local/share/jupyter/kernels/${NAME} + +$ECHO "\nFINISHED" diff --git a/init.sh b/init.sh new file mode 100755 index 0000000..5ccaea4 --- /dev/null +++ b/init.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# vars for jupyter +storage_dir=$(readlink -f $PWD) +export TCHANNEL_BASE=${storage_dir} +export JUPYTER_PATH=${storage_dir}/.jupyter +export JUPYTER_RUNTIME_DIR=${storage_dir}/.local/share/jupyter/runtime +export JUPYTER_DATA_DIR=${storage_dir}/.local/share/jupyter +export IPYTHONDIR=${storage_dir}/.ipython + +source coffeaenv/bin/activate diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..195ef7d --- /dev/null +++ b/setup.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash + +case `uname` in + Linux) ECHO="echo -e" ;; + *) ECHO="echo" ;; +esac + +usage(){ + EXIT=$1 + $ECHO "setup.sh [options]" + $ECHO + $ECHO "Options:" + $ECHO "-d \tuse the developer branch of Coffea (default = 0)" + $ECHO "-h \tprint this message and exit" + $ECHO "-n [NAME] \toverride the name of the virtual environment (default = coffeaenv)" + exit $EXIT +} + +NAME=coffeaenv +LCG=/cvmfs/sft.cern.ch/lcg/views/LCG_99cuda/x86_64-centos7-gcc8-opt +DEV=0 + +# check arguments +while getopts "dhn:" opt; do + case "$opt" in + d) DEV=1 + ;; + h) usage 0 + ;; + n) NAME=$OPTARG + ;; + :) printf "missing argument for -%s\n" "$OPTARG" >&2 + usage -1 + ;; + \?) printf "illegal option: -%s\n" "$OPTARG" >&2 + usage -2 + ;; + esac +done + +# Setup the LCG environment +$ECHO "Getting the LCG environment ... " +source $LCG/setup.sh + +# Install most of the needed software in a virtual environment +# following https://aarongorka.com/blog/portable-virtualenv/, an alternative is https://github.com/pantsbuild/pex +$ECHO "\nMaking and activiating the virtual environment ... " +python -m venv --copies $NAME +source $NAME/bin/activate +$ECHO "\nInstalling 'pip' packages ... " +python -m pip install --no-cache-dir setuptools pip argparse --upgrade +python -m pip install --no-cache-dir xxhash +python -m pip install --no-cache-dir uproot4 +if [[ "$DEV" == "1" ]]; then + $ECHO "\nInstalling the 'development' version of Coffea ... " + python -m pip install --no-cache-dir flake8 pytest coverage + git clone https://github.com/CoffeaTeam/coffea + cd coffea + python -m pip install --no-cache-dir --editable .[dask,spark,parsl] 'uproot-methods<0.9.0,>=0.7.3' 'pillow>=7.1.0' 'mplhep==0.1.35' + cd .. +else + $ECHO "Installing the 'production' version of Coffea ... " + python -m pip install --no-cache-dir coffea[dask,spark,parsl] 'uproot-methods<0.9.0,>=0.7.3' 'pillow>=7.1.0' 'mplhep==0.1.35' +fi + +# Setup the activation script for the virtual environment +$ECHO "\nSetting up the activation script for the virtual environment ... " +sed -i '40s/.*/VIRTUAL_ENV="$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}" )")" \&\& pwd)"/' $NAME/bin/activate +find coffeaenv/bin/ -type f -print0 | xargs -0 -P 4 sed -i '1s/#!.*python$/#!\/usr\/bin\/env python/' +sed -i "2a source ${LCG}/setup.sh" $NAME/bin/activate +sed -i "4a source ${LCG}/setup.csh" $NAME/bin/activate.csh + +$ECHO "\nSetting up the ipython/jupyter kernel ... " +storage_dir=$(readlink -f $PWD) +ipython kernel install --prefix=${storage_dir}/.local --name=$NAME +tar -zcf ${NAME}.tar.gz ${NAME} + +deactivate +$ECHO "\nFINISHED"