HumanCompatibleAI · qxcv · Aug 20, 2020 · Oct 7, 2020 · Oct 7, 2020 · Oct 7, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -3,12 +3,9 @@ version: 2.1
 jobs:
   build-and-test:
     docker:
-      - image: humancompatibleai/il-representations:2020.08.03-r3
+      - image: humancompatibleai/il-representations:2020.10.07-r1
     steps:
       - checkout
-      - run:
-          command: pip install -r requirements.txt
-          name: Install dependencies
       - run:
           command: curl -so ~/.mujoco/mjkey.txt "${MUJOCO_KEY}"
           name: Set up MuJoCo

diff --git a/reformat.sh b/reformat.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+# Reformats imports and source code so that you don't have to
+
+set -xe
+
+SRC_FILES=(src/ tests/ setup.py)
+
+# sometimes we need a couple of runs to get to a setting that all the tools are
+# happy with
+n_runs=2
+for run in $(seq 1 $n_runs); do
+    echo "Reformatting source code (run $run/$n_runs)"
+    yapf -ir ${SRC_FILES[@]}
+    echo "Sorting imports (repeat $run/$n_runs)"
+    isort ${SRC_FILES[@]}
+    echo "Removing unused imports (run $run/$n_runs)"
+    autoflake --in-place --expand-star-imports --remove-all-unused-imports -r ${SRC_FILES[@]}
+done
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.cfg b/setup.cfg
@@ -1,13 +1,26 @@
 [isort]
-line_length=79
+line_length=100
 known_first_party=il_representations
 default_section=THIRDPARTY
 multi_line_output=0
 force_sort_within_sections=True
 
+[yapf]
+based_on_style=pep8
+column_limit=100
+
+[flake8]
+max-line-length=100
+ignore=E266,E261,W504
+
 [tool:pytest]
+# adding all these to testpaths is necessary to make flake8 and isort run on
+# everything
+testpaths=
+    tests/
+    src/
+addopts=--isort --flake8
 filterwarnings=
     ignore:.*importing the ABCs from 'collections' instead of from 'collections.abc'.*:DeprecationWarning
     ignore:.*Box bound precision lowered by casting to float32.*:UserWarning
     ignore:.*The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors.*:UserWarning
-testpaths=tests
diff --git a/setup.py b/setup.py
@@ -1,6 +1,5 @@
 from setuptools import find_packages, setup
 
-
 setup(
     name="il-representations",
     version="0.0.1",
@@ -9,11 +8,43 @@
     python_requires=">=3.7.0",
     packages=find_packages("src"),
     package_dir={"": "src"},
-    # FIXME(sam): move from requirements.txt to setup.py once merge is done
-    install_requires=[],
-    # FIXME(sam): keeping this as reminder to add all experiment scripts as
-    # console_scripts
-    # entry_points={
-    #     "console_scripts": [],
-    # },
+    install_requires=[
+        "numpy~=1.19.0",
+        "gym[atari]==0.17.*",
+        "sacred~=0.8.1",
+        "torch==1.6.*",
+        "torchvision==0.7.*",
+        "opencv-python~=4.3.0.36",
+        "pyyaml~=5.3.1",
+        "sacred~=0.8.1",
+        "tensorboard~=2.2.0",
+
+        # testing/dev utils
+        "pytest~=5.4.3",
+        "isort~=5.0",
+        "yapf~=0.30.0",
+        "flake8~=3.8.3",
+        "autoflake~=1.3.1",
+        "pytest-flake8~=1.0.6",
+        "pytest-isort~=1.1.0",
+
+        # imitation needs special branch as of 2020-10-07
+        ("imitation @ git+git://github.com/HumanCompatibleAI/imitation"
+         "@image-env-changes#egg=imitation"),
+        ("stable_baselines3 @ git+https://github.com/HumanCompatibleAI/stable-baselines3.git"
+         "@imitation#egg=stable-baselines3"),
+
+        # environments
+        "magical @ git+https://github.com/qxcv/magical@pyglet1.5",
+        "dm_control~=0.0.319497192",
+        ("dmc2gym @ git+git://github.com/denisyarats/dmc2gym"
+         "@6e34d8acf18e92f0ea0a38ecee9564bdf2549076"),
+    ],
+    entry_points={
+        "console_scripts": [
+            "run_rep_learner=il_representations.scripts.run_rep_learner:main",
+            "il_train=il_representations.scripts.il_train:main",
+            "il_test=il_representations.scripts.il_test:main",
+        ],
+    },
 )
diff --git a/src/il_representations/algos/__init__.py b/src/il_representations/algos/__init__.py
@@ -1,24 +1,31 @@
-from il_representations.algos.representation_learner import RepresentationLearner, DEFAULT_HARDCODED_PARAMS
-from il_representations.algos.encoders import MomentumEncoder, InverseDynamicsEncoder, DynamicsEncoder, RecurrentEncoder, StochasticEncoder, DeterministicEncoder
-from il_representations.algos.decoders import ProjectionHead, NoOp, MomentumProjectionHead, BYOLProjectionHead, ActionConditionedVectorDecoder, TargetProjection
-from il_representations.algos.losses import SymmetricContrastiveLoss, AsymmetricContrastiveLoss, MSELoss, CEBLoss, \
-    QueueAsymmetricContrastiveLoss, BatchAsymmetricContrastiveLoss
-
-from il_representations.algos.augmenters import AugmentContextAndTarget, AugmentContextOnly, NoAugmentation
-from il_representations.algos.pair_constructors import IdentityPairConstructor, TemporalOffsetPairConstructor
+from il_representations.algos.augmenters import (AugmentContextAndTarget, AugmentContextOnly,
+                                                 NoAugmentation)
 from il_representations.algos.batch_extenders import QueueBatchExtender
-from il_representations.algos.optimizers import LARS
+from il_representations.algos.decoders import (ActionConditionedVectorDecoder, BYOLProjectionHead,
+                                               MomentumProjectionHead, NoOp, ProjectionHead,
+                                               TargetProjection)
+from il_representations.algos.encoders import (DeterministicEncoder, DynamicsEncoder,
+                                               InverseDynamicsEncoder, MomentumEncoder,
+                                               RecurrentEncoder, StochasticEncoder)
+from il_representations.algos.losses import (BatchAsymmetricContrastiveLoss, CEBLoss, MSELoss,
+                                             QueueAsymmetricContrastiveLoss,
+                                             SymmetricContrastiveLoss)
+from il_representations.algos.pair_constructors import (IdentityPairConstructor,
+                                                        TemporalOffsetPairConstructor)
+from il_representations.algos.representation_learner import (DEFAULT_HARDCODED_PARAMS,
+                                                             RepresentationLearner)
 
 
 class SimCLR(RepresentationLearner):
     """
     Implementation of SimCLR: A Simple Framework for Contrastive Learning of Visual Representations
     https://arxiv.org/abs/2002.05709
 
-    This method works by using a contrastive loss to push together representations of two differently-augmented
-    versions of the same image. In particular, it uses a symmetric contrastive loss, which compares the
-    (target, context) similarity against similarity of context with all other targets, and also similarity
-     of target with all other contexts.
+    This method works by using a contrastive loss to push together
+    representations of two differently-augmented versions of the same image. In
+    particular, it uses a symmetric contrastive loss, which compares the
+    (target, context) similarity against similarity of context with all other
+    targets, and also similarity of target with all other contexts.
     """
     def __init__(self, env, log_dir, **kwargs):
         kwargs = self.validate_and_update_kwargs(kwargs)
@@ -39,7 +46,8 @@ def __init__(self, env, log_dir, temporal_offset=1, **kwargs):
         Implementation of a non-recurrent version of CPC: Contrastive Predictive Coding
         https://arxiv.org/abs/1807.03748
 
-        By default, augments only the context, but can be modified to augment both context and target.
+        By default, augments only the context, but can be modified to augment
+        both context and target.
         """
         kwargs_updates = {'target_pair_constructor_kwargs': {'temporal_offset': temporal_offset}}
         kwargs = self.validate_and_update_kwargs(kwargs, kwargs_updates=kwargs_updates)
@@ -58,8 +66,9 @@ class RecurrentCPC(RepresentationLearner):
     Implementation of a recurrent version of CPC: Contrastive Predictive Coding
     https://arxiv.org/abs/1807.03748
 
-    The encoder first encodes individual frames for both context and target, and then, for the context,
-    builds up a recurrent representation of all prior frames in the same trajectory, to use to predict the target.
+    The encoder first encodes individual frames for both context and target,
+    and then, for the context, builds up a recurrent representation of all
+    prior frames in the same trajectory, to use to predict the target.
 
     By default, augments only the context, but can be modified to augment both context and target.
     """
@@ -100,7 +109,6 @@ class MoCoWithProjection(RepresentationLearner):
 
     Includes an additional projection head atop the representation and before the prediction
     """
-
     def __init__(self, env, log_dir, **kwargs):
         hardcoded_params = DEFAULT_HARDCODED_PARAMS + ['batch_extender']
         kwargs = self.validate_and_update_kwargs(kwargs, hardcoded_params=hardcoded_params)
@@ -119,15 +127,16 @@ class DynamicsPrediction(RepresentationLearner):
     def __init__(self, env, log_dir, **kwargs):
         kwargs_updates = {'target_pair_constructor_kwargs': {'mode': 'dynamics'}}
         kwargs = self.validate_and_update_kwargs(kwargs, kwargs_updates=kwargs_updates)
-        super().__init__(env=env,
-                         log_dir=log_dir,
-                         encoder=DynamicsEncoder,
-                         # Should be a pixel decoder that takes in action, currently errors
-                         decoder=NoOp,
-                         loss_calculator=MSELoss,
-                         augmenter=AugmentContextOnly,
-                         target_pair_constructor=TemporalOffsetPairConstructor,
-                         **kwargs)
+        super().__init__(
+            env=env,
+            log_dir=log_dir,
+            encoder=DynamicsEncoder,
+            # Should be a pixel decoder that takes in action, currently errors
+            decoder=NoOp,
+            loss_calculator=MSELoss,
+            augmenter=AugmentContextOnly,
+            target_pair_constructor=TemporalOffsetPairConstructor,
+            **kwargs)
 
     def learn(self, dataset, training_epochs):
         raise NotImplementedError("DynamicsPrediction is not yet fully implemented")
@@ -138,15 +147,16 @@ def __init__(self, env, log_dir, **kwargs):
         kwargs_updates = {'target_pair_constructor_kwargs': {'mode': 'inverse_dynamics'}}
         kwargs = self.validate_and_update_kwargs(kwargs, kwargs_updates=kwargs_updates)
 
-        super().__init__(env=env,
-                         log_dir=log_dir,
-                         encoder=InverseDynamicsEncoder,
-                         # Should be a action decoder that takes in next obs representation
-                         decoder=NoOp,
-                         loss_calculator=MSELoss,
-                         augmenter=AugmentContextOnly,
-                         target_pair_constructor=TemporalOffsetPairConstructor,
-                         **kwargs)
+        super().__init__(
+            env=env,
+            log_dir=log_dir,
+            encoder=InverseDynamicsEncoder,
+            # Should be a action decoder that takes in next obs representation
+            decoder=NoOp,
+            loss_calculator=MSELoss,
+            augmenter=AugmentContextOnly,
+            target_pair_constructor=TemporalOffsetPairConstructor,
+            **kwargs)
 
     def learn(self, dataset, training_epochs):
         raise NotImplementedError("InverseDynamicsPrediction is not yet fully implemented")
@@ -186,6 +196,7 @@ def __init__(self, env, log_dir, **kwargs):
                          target_pair_constructor=TemporalOffsetPairConstructor,
                          **kwargs)
 
+
 class FixedVarianceCEB(RepresentationLearner):
     """
     CEB with fixed rather than learned variance
@@ -201,6 +212,7 @@ def __init__(self, env, log_dir, **kwargs):
                          target_pair_constructor=TemporalOffsetPairConstructor,
                          **kwargs)
 
+
 class FixedVarianceTargetProjectedCEB(RepresentationLearner):
     """
     """
@@ -218,17 +230,25 @@ def __init__(self, env, log_dir, **kwargs):
 
 class ActionConditionedTemporalCPC(RepresentationLearner):
     """
-    Implementation of reinforcement-learning-specific variant of Temporal CPC which adds a projection layer on top
-    of the learned representation which integrates an encoding of the actions taken between time (t) and whatever
-    time (t+k) is specified in temporal_offset and used for pulling out the target frame. This, notionally, allows
-    the algorithm to construct frame representations that are action-independent, rather than marginalizing over an
-    expected policy, as might need to happen if the algorithm needed to predict the frame at time (t+k) over any
-    possible action distribution.
+    Implementation of reinforcement-learning-specific variant of Temporal CPC
+    which adds a projection layer on top of the learned representation which
+    integrates an encoding of the actions taken between time (t) and whatever
+    time (t+k) is specified in temporal_offset and used for pulling out the
+    target frame. This, notionally, allows the algorithm to construct frame
+    representations that are action-independent, rather than marginalizing over
+    an expected policy, as might need to happen if the algorithm needed to
+    predict the frame at time (t+k) over any possible action distribution.
     """
     def __init__(self, env, log_dir, **kwargs):
-        kwargs_updates = {'preprocess_extra_context': False,
-                          'target_pair_constructor_kwargs': {"mode": "dynamics"},
-                          'decoder_kwargs': {'action_space': env.action_space}}
+        kwargs_updates = {
+            'preprocess_extra_context': False,
+            'target_pair_constructor_kwargs': {
+                "mode": "dynamics"
+            },
+            'decoder_kwargs': {
+                'action_space': env.action_space
+            }
+        }
         kwargs = self.validate_and_update_kwargs(kwargs, kwargs_updates=kwargs_updates)
 
         super().__init__(env=env,
@@ -239,5 +259,6 @@ def __init__(self, env, log_dir, **kwargs):
                          loss_calculator=BatchAsymmetricContrastiveLoss,
                          **kwargs)
 
+
 ## Algos that should not be run in all-algo test because they are not yet finished
 WIP_ALGOS = [DynamicsPrediction, InverseDynamicsPrediction]
diff --git a/src/il_representations/algos/augmenters.py b/src/il_representations/algos/augmenters.py
@@ -1,21 +1,16 @@
-import enum
-from torchvision import transforms
-from imitation.augment.color import ColorSpace  # noqa: F401
-from imitation.augment.convenience import StandardAugmentations
-from il_representations.algos.utils import gaussian_blur
-import torch
-from abc import ABC, abstractmethod
-import PIL
 """
 These are pretty basic: when constructed, they take in a list of augmentations, and
 either augment just the context, or both the context and the target, depending on the algorithm.
 """
+from abc import ABC, abstractmethod
+
+from imitation.augment.color import ColorSpace  # noqa: F401
+from imitation.augment.convenience import StandardAugmentations
 
 
 class Augmenter(ABC):
     def __init__(self, augmenter_spec, color_space):
-        augment_op = StandardAugmentations.from_string_spec(
-            augmenter_spec, color_space)
+        augment_op = StandardAugmentations.from_string_spec(augmenter_spec, color_space)
         self.augment_op = augment_op
 
     @abstractmethod

diff --git a/src/il_representations/algos/base_learner.py b/src/il_representations/algos/base_learner.py
@@ -1,4 +1,5 @@
 import gym
+
 from il_representations.algos.utils import set_global_seeds
 
 
@@ -13,12 +14,10 @@ def __init__(self, env):
         # if EncoderSimplePolicyHead is refactored.
         if isinstance(self.action_space, gym.spaces.Discrete):
             self.action_size = env.action_space.n
-        elif (isinstance(self.action_space, gym.spaces.Box)
-              and len(self.action_space.shape) == 1):
+        elif (isinstance(self.action_space, gym.spaces.Box) and len(self.action_space.shape) == 1):
             self.action_size, = self.action_space.shape
         else:
-            raise NotImplementedError(
-                f"can't handle action space {self.action_space}")
+            raise NotImplementedError(f"can't handle action space {self.action_space}")
 
     def set_random_seed(self, seed):
         if seed is None: