Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions DataGetter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import uproot
import uproot4 as uproot
import numpy as np
import pandas as pd
from glob import glob
Expand Down Expand Up @@ -38,7 +38,7 @@ def get_data(signalDataSet, backgroundDataSet, config, doBgWeight = False, doSgW
trainData[key] = data[key][:minLen]

# Randomly shuffle the signal and background
np.random.seed(config["seed"])
np.random.seed(config["seed"])
perms = np.random.permutation(trainData["data"].shape[0])
for key in trainData:
trainData[key] = trainData[key][perms]
Expand Down Expand Up @@ -92,7 +92,7 @@ def getColumnHeaders(self, samplesToRun, treename):
try:
sample = samplesToRun[0]
f = uproot.open(sample)
self.columnHeaders = f[treename].pandas.df().columns.tolist()
self.columnHeaders = f[treename].arrays(library="pd").columns.tolist()
f.close()
except IndexError as e:
print(e)
Expand All @@ -111,8 +111,7 @@ def getDataSets(self, samplesToRun, treename):
for filename in samplesToRun:
try:
f = uproot.open(filename)
#dsets.append( f[treename].pandas.df(branches=variables) )
dsets.append( f[treename].pandas.df() )
dsets.append( f[treename].arrays(library="pd") )
f.close()
except Exception as e:
print("Warning: \"%s\" has issues" % filename, e)
Expand Down
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,36 @@ xrdcp -r root://cmseos.fnal.gov///store/user/cmadrid/trainingTuples/MVA_Training
python train.py
```

### Alternative LPC setup: Python Virtual Environment

To begin the initial setup, run the following commands:
```bash
cd <working_directory>
git clone git@github.com:StealthStop/DeepESM.git
cd DeepESM
./setup.sh
```
Remember to replace `<working_directory>` with the directory where you want your files/folders to appear. You can change the name of the virtual environment by using the `-n` option and you can use the development version of coffea by using the `-d` option. These commands only need to be run during the initial setup. When doing your day-to-day tasks, you can skip these.

To activate the `coffeaenv` environment and set the Jupyter paths, run the command (every time):
```bash
cd <working_directory>/DeepESM
source init.sh
```

When you are done working and would like to ``de-activate'' the `coffeaenv` environment, run the command:
```bash
deactivate
```
This shell function was given to you by the virtual environment.

To remove the virtual environment and the associated files (i.e. inverse of the setup script), you can use the run the following command:
```bash
cd <working_directory>/DeepESM
./clean.sh
```
The `clean.sh` script has the same `-n` and `-d` options as in the `setup.sh` script.

### Plotting Input Variables

A plotting script is provided to make pretty plots of NN inputs from the ntuple files.
Expand Down
52 changes: 52 additions & 0 deletions clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env bash

case `uname` in
Linux) ECHO="echo -e" ;;
*) ECHO="echo" ;;
esac

usage(){
EXIT=$1
$ECHO "clean.sh [options]"
$ECHO
$ECHO "Options:"
$ECHO "-d \tuse the developer branch of Coffea (default = 0)"
$ECHO "-h \tprint this message and exit"
$ECHO "-n [NAME] \toverride the name of the virtual environment (default = coffeaenv)"
exit $EXIT
}

NAME=coffeaenv
DEV=0

# check arguments
while getopts "dhn:" opt; do
case "$opt" in
d) DEV=1
;;
h) usage 0
;;
n) NAME=$OPTARG
;;
:) printf "missing argument for -%s\n" "$OPTARG" >&2
usage -1
;;
\?) printf "illegal option: -%s\n" "$OPTARG" >&2
usage -2
;;
esac
done

$ECHO "Removing the virtual environment ... "
rm -rf ${NAME} ${NAME}.tar.gz

if [[ "$DEV" == "1" ]]; then
$ECHO "\nRemoving the 'development' version of Coffea ... "
rm -rf coffea
fi

$ECHO "\nRemoving the ipython/jupyter kernel ... "
storage_dir=$(readlink -f $PWD)
rm -rf ${storage_dir}/.local/share/jupyter/kernels/${NAME}

$ECHO "\nFINISHED"
11 changes: 11 additions & 0 deletions init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# vars for jupyter
storage_dir=$(readlink -f $PWD)
export TCHANNEL_BASE=${storage_dir}
export JUPYTER_PATH=${storage_dir}/.jupyter
export JUPYTER_RUNTIME_DIR=${storage_dir}/.local/share/jupyter/runtime
export JUPYTER_DATA_DIR=${storage_dir}/.local/share/jupyter
export IPYTHONDIR=${storage_dir}/.ipython

source coffeaenv/bin/activate
79 changes: 79 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env bash

case `uname` in
Linux) ECHO="echo -e" ;;
*) ECHO="echo" ;;
esac

usage(){
EXIT=$1
$ECHO "setup.sh [options]"
$ECHO
$ECHO "Options:"
$ECHO "-d \tuse the developer branch of Coffea (default = 0)"
$ECHO "-h \tprint this message and exit"
$ECHO "-n [NAME] \toverride the name of the virtual environment (default = coffeaenv)"
exit $EXIT
}

NAME=coffeaenv
LCG=/cvmfs/sft.cern.ch/lcg/views/LCG_99cuda/x86_64-centos7-gcc8-opt
DEV=0

# check arguments
while getopts "dhn:" opt; do
case "$opt" in
d) DEV=1
;;
h) usage 0
;;
n) NAME=$OPTARG
;;
:) printf "missing argument for -%s\n" "$OPTARG" >&2
usage -1
;;
\?) printf "illegal option: -%s\n" "$OPTARG" >&2
usage -2
;;
esac
done

# Setup the LCG environment
$ECHO "Getting the LCG environment ... "
source $LCG/setup.sh

# Install most of the needed software in a virtual environment
# following https://aarongorka.com/blog/portable-virtualenv/, an alternative is https://github.com/pantsbuild/pex
$ECHO "\nMaking and activiating the virtual environment ... "
python -m venv --copies $NAME
source $NAME/bin/activate
$ECHO "\nInstalling 'pip' packages ... "
python -m pip install --no-cache-dir setuptools pip argparse --upgrade
python -m pip install --no-cache-dir xxhash
python -m pip install --no-cache-dir uproot4
if [[ "$DEV" == "1" ]]; then
$ECHO "\nInstalling the 'development' version of Coffea ... "
python -m pip install --no-cache-dir flake8 pytest coverage
git clone https://github.com/CoffeaTeam/coffea
cd coffea
python -m pip install --no-cache-dir --editable .[dask,spark,parsl] 'uproot-methods<0.9.0,>=0.7.3' 'pillow>=7.1.0' 'mplhep==0.1.35'
cd ..
else
$ECHO "Installing the 'production' version of Coffea ... "
python -m pip install --no-cache-dir coffea[dask,spark,parsl] 'uproot-methods<0.9.0,>=0.7.3' 'pillow>=7.1.0' 'mplhep==0.1.35'
fi

# Setup the activation script for the virtual environment
$ECHO "\nSetting up the activation script for the virtual environment ... "
sed -i '40s/.*/VIRTUAL_ENV="$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}" )")" \&\& pwd)"/' $NAME/bin/activate
find coffeaenv/bin/ -type f -print0 | xargs -0 -P 4 sed -i '1s/#!.*python$/#!\/usr\/bin\/env python/'
sed -i "2a source ${LCG}/setup.sh" $NAME/bin/activate
sed -i "4a source ${LCG}/setup.csh" $NAME/bin/activate.csh

$ECHO "\nSetting up the ipython/jupyter kernel ... "
storage_dir=$(readlink -f $PWD)
ipython kernel install --prefix=${storage_dir}/.local --name=$NAME
tar -zcf ${NAME}.tar.gz ${NAME}

deactivate
$ECHO "\nFINISHED"