Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@ version: 2.1
jobs:
build-and-test:
docker:
- image: humancompatibleai/il-representations:2020.08.03-r3
- image: humancompatibleai/il-representations:2020.10.07-r1
steps:
- checkout
- run:
command: pip install -r requirements.txt
name: Install dependencies
- run:
command: curl -so ~/.mujoco/mjkey.txt "${MUJOCO_KEY}"
name: Set up MuJoCo
Expand Down
19 changes: 19 additions & 0 deletions reformat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

# Reformats imports and source code so that you don't have to

set -xe

SRC_FILES=(src/ tests/ setup.py)

# sometimes we need a couple of runs to get to a setting that all the tools are
# happy with
n_runs=2
for run in $(seq 1 $n_runs); do
echo "Reformatting source code (run $run/$n_runs)"
yapf -ir ${SRC_FILES[@]}
echo "Sorting imports (repeat $run/$n_runs)"
isort ${SRC_FILES[@]}
echo "Removing unused imports (run $run/$n_runs)"
autoflake --in-place --expand-star-imports --remove-all-unused-imports -r ${SRC_FILES[@]}
done
21 changes: 0 additions & 21 deletions requirements.txt

This file was deleted.

17 changes: 15 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
[isort]
line_length=79
line_length=100
known_first_party=il_representations
default_section=THIRDPARTY
multi_line_output=0
force_sort_within_sections=True

[yapf]
based_on_style=pep8
column_limit=100

[flake8]
max-line-length=100
ignore=E266,E261,W504

[tool:pytest]
# adding all these to testpaths is necessary to make flake8 and isort run on
# everything
testpaths=
tests/
src/
addopts=--isort --flake8
filterwarnings=
ignore:.*importing the ABCs from 'collections' instead of from 'collections.abc'.*:DeprecationWarning
ignore:.*Box bound precision lowered by casting to float32.*:UserWarning
ignore:.*The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors.*:UserWarning
testpaths=tests
47 changes: 39 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from setuptools import find_packages, setup


setup(
name="il-representations",
version="0.0.1",
Expand All @@ -9,11 +8,43 @@
python_requires=">=3.7.0",
packages=find_packages("src"),
package_dir={"": "src"},
# FIXME(sam): move from requirements.txt to setup.py once merge is done
install_requires=[],
# FIXME(sam): keeping this as reminder to add all experiment scripts as
# console_scripts
# entry_points={
# "console_scripts": [],
# },
install_requires=[
"numpy~=1.19.0",
"gym[atari]==0.17.*",
"sacred~=0.8.1",
"torch==1.6.*",
"torchvision==0.7.*",
"opencv-python~=4.3.0.36",
"pyyaml~=5.3.1",
"sacred~=0.8.1",
"tensorboard~=2.2.0",

# testing/dev utils
"pytest~=5.4.3",
"isort~=5.0",
"yapf~=0.30.0",
"flake8~=3.8.3",
"autoflake~=1.3.1",
"pytest-flake8~=1.0.6",
"pytest-isort~=1.1.0",

# imitation needs special branch as of 2020-10-07
("imitation @ git+git://github.com/HumanCompatibleAI/imitation"
"@image-env-changes#egg=imitation"),
("stable_baselines3 @ git+https://github.com/HumanCompatibleAI/stable-baselines3.git"
"@imitation#egg=stable-baselines3"),

# environments
"magical @ git+https://github.com/qxcv/magical@pyglet1.5",
"dm_control~=0.0.319497192",
("dmc2gym @ git+git://github.com/denisyarats/dmc2gym"
"@6e34d8acf18e92f0ea0a38ecee9564bdf2549076"),
],
entry_points={
"console_scripts": [
"run_rep_learner=il_representations.scripts.run_rep_learner:main",
"il_train=il_representations.scripts.il_train:main",
"il_test=il_representations.scripts.il_test:main",
],
},
)
109 changes: 65 additions & 44 deletions src/il_representations/algos/__init__.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,31 @@
from il_representations.algos.representation_learner import RepresentationLearner, DEFAULT_HARDCODED_PARAMS
from il_representations.algos.encoders import MomentumEncoder, InverseDynamicsEncoder, DynamicsEncoder, RecurrentEncoder, StochasticEncoder, DeterministicEncoder
from il_representations.algos.decoders import ProjectionHead, NoOp, MomentumProjectionHead, BYOLProjectionHead, ActionConditionedVectorDecoder, TargetProjection
from il_representations.algos.losses import SymmetricContrastiveLoss, AsymmetricContrastiveLoss, MSELoss, CEBLoss, \
QueueAsymmetricContrastiveLoss, BatchAsymmetricContrastiveLoss

from il_representations.algos.augmenters import AugmentContextAndTarget, AugmentContextOnly, NoAugmentation
from il_representations.algos.pair_constructors import IdentityPairConstructor, TemporalOffsetPairConstructor
from il_representations.algos.augmenters import (AugmentContextAndTarget, AugmentContextOnly,
NoAugmentation)
from il_representations.algos.batch_extenders import QueueBatchExtender
from il_representations.algos.optimizers import LARS
from il_representations.algos.decoders import (ActionConditionedVectorDecoder, BYOLProjectionHead,
MomentumProjectionHead, NoOp, ProjectionHead,
TargetProjection)
from il_representations.algos.encoders import (DeterministicEncoder, DynamicsEncoder,
InverseDynamicsEncoder, MomentumEncoder,
RecurrentEncoder, StochasticEncoder)
from il_representations.algos.losses import (BatchAsymmetricContrastiveLoss, CEBLoss, MSELoss,
QueueAsymmetricContrastiveLoss,
SymmetricContrastiveLoss)
from il_representations.algos.pair_constructors import (IdentityPairConstructor,
TemporalOffsetPairConstructor)
from il_representations.algos.representation_learner import (DEFAULT_HARDCODED_PARAMS,
RepresentationLearner)


class SimCLR(RepresentationLearner):
"""
Implementation of SimCLR: A Simple Framework for Contrastive Learning of Visual Representations
https://arxiv.org/abs/2002.05709

This method works by using a contrastive loss to push together representations of two differently-augmented
versions of the same image. In particular, it uses a symmetric contrastive loss, which compares the
(target, context) similarity against similarity of context with all other targets, and also similarity
of target with all other contexts.
This method works by using a contrastive loss to push together
representations of two differently-augmented versions of the same image. In
particular, it uses a symmetric contrastive loss, which compares the
(target, context) similarity against similarity of context with all other
targets, and also similarity of target with all other contexts.
"""
def __init__(self, env, log_dir, **kwargs):
kwargs = self.validate_and_update_kwargs(kwargs)
Expand All @@ -39,7 +46,8 @@ def __init__(self, env, log_dir, temporal_offset=1, **kwargs):
Implementation of a non-recurrent version of CPC: Contrastive Predictive Coding
https://arxiv.org/abs/1807.03748

By default, augments only the context, but can be modified to augment both context and target.
By default, augments only the context, but can be modified to augment
both context and target.
"""
kwargs_updates = {'target_pair_constructor_kwargs': {'temporal_offset': temporal_offset}}
kwargs = self.validate_and_update_kwargs(kwargs, kwargs_updates=kwargs_updates)
Expand All @@ -58,8 +66,9 @@ class RecurrentCPC(RepresentationLearner):
Implementation of a recurrent version of CPC: Contrastive Predictive Coding
https://arxiv.org/abs/1807.03748

The encoder first encodes individual frames for both context and target, and then, for the context,
builds up a recurrent representation of all prior frames in the same trajectory, to use to predict the target.
The encoder first encodes individual frames for both context and target,
and then, for the context, builds up a recurrent representation of all
prior frames in the same trajectory, to use to predict the target.

By default, augments only the context, but can be modified to augment both context and target.
"""
Expand Down Expand Up @@ -100,7 +109,6 @@ class MoCoWithProjection(RepresentationLearner):

Includes an additional projection head atop the representation and before the prediction
"""

def __init__(self, env, log_dir, **kwargs):
hardcoded_params = DEFAULT_HARDCODED_PARAMS + ['batch_extender']
kwargs = self.validate_and_update_kwargs(kwargs, hardcoded_params=hardcoded_params)
Expand All @@ -119,15 +127,16 @@ class DynamicsPrediction(RepresentationLearner):
def __init__(self, env, log_dir, **kwargs):
kwargs_updates = {'target_pair_constructor_kwargs': {'mode': 'dynamics'}}
kwargs = self.validate_and_update_kwargs(kwargs, kwargs_updates=kwargs_updates)
super().__init__(env=env,
log_dir=log_dir,
encoder=DynamicsEncoder,
# Should be a pixel decoder that takes in action, currently errors
decoder=NoOp,
loss_calculator=MSELoss,
augmenter=AugmentContextOnly,
target_pair_constructor=TemporalOffsetPairConstructor,
**kwargs)
super().__init__(
env=env,
log_dir=log_dir,
encoder=DynamicsEncoder,
# Should be a pixel decoder that takes in action, currently errors
decoder=NoOp,
loss_calculator=MSELoss,
augmenter=AugmentContextOnly,
target_pair_constructor=TemporalOffsetPairConstructor,
**kwargs)

def learn(self, dataset, training_epochs):
raise NotImplementedError("DynamicsPrediction is not yet fully implemented")
Expand All @@ -138,15 +147,16 @@ def __init__(self, env, log_dir, **kwargs):
kwargs_updates = {'target_pair_constructor_kwargs': {'mode': 'inverse_dynamics'}}
kwargs = self.validate_and_update_kwargs(kwargs, kwargs_updates=kwargs_updates)

super().__init__(env=env,
log_dir=log_dir,
encoder=InverseDynamicsEncoder,
# Should be a action decoder that takes in next obs representation
decoder=NoOp,
loss_calculator=MSELoss,
augmenter=AugmentContextOnly,
target_pair_constructor=TemporalOffsetPairConstructor,
**kwargs)
super().__init__(
env=env,
log_dir=log_dir,
encoder=InverseDynamicsEncoder,
# Should be a action decoder that takes in next obs representation
decoder=NoOp,
loss_calculator=MSELoss,
augmenter=AugmentContextOnly,
target_pair_constructor=TemporalOffsetPairConstructor,
**kwargs)

def learn(self, dataset, training_epochs):
raise NotImplementedError("InverseDynamicsPrediction is not yet fully implemented")
Expand Down Expand Up @@ -186,6 +196,7 @@ def __init__(self, env, log_dir, **kwargs):
target_pair_constructor=TemporalOffsetPairConstructor,
**kwargs)


class FixedVarianceCEB(RepresentationLearner):
"""
CEB with fixed rather than learned variance
Expand All @@ -201,6 +212,7 @@ def __init__(self, env, log_dir, **kwargs):
target_pair_constructor=TemporalOffsetPairConstructor,
**kwargs)


class FixedVarianceTargetProjectedCEB(RepresentationLearner):
"""
"""
Expand All @@ -218,17 +230,25 @@ def __init__(self, env, log_dir, **kwargs):

class ActionConditionedTemporalCPC(RepresentationLearner):
"""
Implementation of reinforcement-learning-specific variant of Temporal CPC which adds a projection layer on top
of the learned representation which integrates an encoding of the actions taken between time (t) and whatever
time (t+k) is specified in temporal_offset and used for pulling out the target frame. This, notionally, allows
the algorithm to construct frame representations that are action-independent, rather than marginalizing over an
expected policy, as might need to happen if the algorithm needed to predict the frame at time (t+k) over any
possible action distribution.
Implementation of reinforcement-learning-specific variant of Temporal CPC
which adds a projection layer on top of the learned representation which
integrates an encoding of the actions taken between time (t) and whatever
time (t+k) is specified in temporal_offset and used for pulling out the
target frame. This, notionally, allows the algorithm to construct frame
representations that are action-independent, rather than marginalizing over
an expected policy, as might need to happen if the algorithm needed to
predict the frame at time (t+k) over any possible action distribution.
"""
def __init__(self, env, log_dir, **kwargs):
kwargs_updates = {'preprocess_extra_context': False,
'target_pair_constructor_kwargs': {"mode": "dynamics"},
'decoder_kwargs': {'action_space': env.action_space}}
kwargs_updates = {
'preprocess_extra_context': False,
'target_pair_constructor_kwargs': {
"mode": "dynamics"
},
'decoder_kwargs': {
'action_space': env.action_space
}
}
kwargs = self.validate_and_update_kwargs(kwargs, kwargs_updates=kwargs_updates)

super().__init__(env=env,
Expand All @@ -239,5 +259,6 @@ def __init__(self, env, log_dir, **kwargs):
loss_calculator=BatchAsymmetricContrastiveLoss,
**kwargs)


## Algos that should not be run in all-algo test because they are not yet finished
WIP_ALGOS = [DynamicsPrediction, InverseDynamicsPrediction]
15 changes: 5 additions & 10 deletions src/il_representations/algos/augmenters.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
import enum
from torchvision import transforms
from imitation.augment.color import ColorSpace # noqa: F401
from imitation.augment.convenience import StandardAugmentations
from il_representations.algos.utils import gaussian_blur
import torch
from abc import ABC, abstractmethod
import PIL
"""
These are pretty basic: when constructed, they take in a list of augmentations, and
either augment just the context, or both the context and the target, depending on the algorithm.
"""
from abc import ABC, abstractmethod

from imitation.augment.color import ColorSpace # noqa: F401
from imitation.augment.convenience import StandardAugmentations


class Augmenter(ABC):
def __init__(self, augmenter_spec, color_space):
augment_op = StandardAugmentations.from_string_spec(
augmenter_spec, color_space)
augment_op = StandardAugmentations.from_string_spec(augmenter_spec, color_space)
self.augment_op = augment_op

@abstractmethod
Expand Down
7 changes: 3 additions & 4 deletions src/il_representations/algos/base_learner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import gym

from il_representations.algos.utils import set_global_seeds


Expand All @@ -13,12 +14,10 @@ def __init__(self, env):
# if EncoderSimplePolicyHead is refactored.
if isinstance(self.action_space, gym.spaces.Discrete):
self.action_size = env.action_space.n
elif (isinstance(self.action_space, gym.spaces.Box)
and len(self.action_space.shape) == 1):
elif (isinstance(self.action_space, gym.spaces.Box) and len(self.action_space.shape) == 1):
self.action_size, = self.action_space.shape
else:
raise NotImplementedError(
f"can't handle action space {self.action_space}")
raise NotImplementedError(f"can't handle action space {self.action_space}")

def set_random_seed(self, seed):
if seed is None:
Expand Down
Loading