LBANN · szaman19 · Mar 15, 2023 · Mar 15, 2023 · Dec 4, 2023 · Mar 17, 2023
diff --git a/ci_test/unit_tests/test_unit_layer_layer_norm_distconv.py b/ci_test/unit_tests/test_unit_layer_layer_norm_distconv.py
@@ -0,0 +1,185 @@
+import functools
+import operator
+import os
+import os.path
+import sys
+import math
+import numpy as np
+
+# Bamboo utilities
+current_file = os.path.realpath(__file__)
+current_dir = os.path.dirname(current_file)
+sys.path.insert(0, os.path.join(os.path.dirname(current_dir), 'common_python'))
+import tools
+
+# ==============================================
+# Objects for Python data reader
+# ==============================================
+# Note: The Python data reader imports this file as a module and calls
+# the functions below to ingest data.
+
+# Data
+np.random.seed(20191114)
+_num_samples = 31
+_sample_shape = (17, 4, 2)
+_sample_size = math.prod(_sample_shape)
+_samples = np.random.normal(size=(_num_samples, _sample_size)).astype(np.float32)
+
+# Sample access functions
+def get_sample(index):
+    return _samples[index,:]
+def num_samples():
+    return _num_samples
+def sample_dims():
+    return (_sample_size,)
+
+# ==============================================
+# NumPy softmax
+# ==============================================
+
+def numpy_layer_norm(x, epsilon=1e-5):
+    if x.dtype is not np.float64:
+        x = x.astype(np.float64)
+    mean = np.mean(x)
+    var = np.var(x, ddof=0)
+    return (x - mean) / np.sqrt(var + epsilon)
+
+# ==============================================
+# Setup LBANN experiment
+# ==============================================
+
+def setup_experiment(lbann, weekly):
+    """Construct LBANN experiment.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+    mini_batch_size = num_samples() // 2
+    trainer = lbann.Trainer(mini_batch_size)
+    model = construct_model(lbann)
+    data_reader = construct_data_reader(lbann)
+    optimizer = lbann.NoOptimizer()
+    return trainer, model, data_reader, optimizer, None # Don't request any specific number of nodes
+
+def create_parallel_strategy(num_groups):
+    return {"channel_groups": num_groups}
+
+def construct_model(lbann):
+    """Construct LBANN model.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+
+    # Input data
+    # Note: Sum with a weights layer so that gradient checking will
+    # verify that error signals are correct.
+    x_weights = lbann.Weights(optimizer=lbann.SGD(),
+                              initializer=lbann.ConstantInitializer(value=0.0),
+                              name='input_weights')
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
+                                dims=_sample_size),
+                  lbann.WeightsLayer(weights=x_weights,
+                                     dims=_sample_size))
+    x_lbann = x
+
+    # Objects for LBANN model
+    obj = []
+    metrics = []
+    callbacks = []
+
+    # ------------------------------------------
+    # Data-parallel layout
+    # ------------------------------------------
+
+    num_groups = tools.gpus_per_node(lbann)
+    # LBANN implementation
+    x = x_lbann
+    # The input tensor must be 4-D so reshape and add an extra axis
+    x = lbann.Reshape(x, dims=_sample_shape+(1, ))
+    y = lbann.LayerNorm(x,
+                        data_layout='data_parallel',
+                        parallel_strategy=create_parallel_strategy(
+                            num_groups))
+    z = lbann.L2Norm2(y)
+    obj.append(z)
+    metrics.append(lbann.Metric(z, name='data-parallel layout'))
+
+    # NumPy implementation
+    vals = []
+    for i in range(num_samples()):
+        x = get_sample(i).astype(np.float64)
+        y = numpy_layer_norm(x)
+        z = tools.numpy_l2norm2(y)
+        vals.append(z)
+    val = np.mean(vals)
+    tol = 8 * val * np.finfo(np.float32).eps
+    callbacks.append(lbann.CallbackCheckMetric(
+        metric=metrics[-1].name,
+        lower_bound=val-tol,
+        upper_bound=val+tol,
+        error_on_failure=True,
+        execution_modes='test'))
+
+    # ------------------------------------------
+    # Gradient checking
+    # ------------------------------------------
+
+    callbacks.append(lbann.CallbackCheckGradients(error_on_failure=True))
+
+    # ------------------------------------------
+    # Construct model
+    # ------------------------------------------
+
+    num_epochs = 0
+    return lbann.Model(num_epochs,
+                       layers=lbann.traverse_layer_graph(x_lbann),
+                       objective_function=obj,
+                       metrics=metrics,
+                       callbacks=callbacks)
+
+def construct_data_reader(lbann):
+    """Construct Protobuf message for Python data reader.
+
+    The Python data reader will import the current Python file to
+    access the sample access functions.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+
+    # Note: The training data reader should be removed when
+    # https://github.com/LLNL/lbann/issues/1098 is resolved.
+    message = lbann.reader_pb2.DataReader()
+    message.reader.extend([
+        tools.create_python_data_reader(
+            lbann,
+            current_file,
+            'get_sample',
+            'num_samples',
+            'sample_dims',
+            'train'
+        )
+    ])
+    message.reader.extend([
+        tools.create_python_data_reader(
+            lbann,
+            current_file,
+            'get_sample',
+            'num_samples',
+            'sample_dims',
+            'test'
+        )
+    ])
+    return message
+
+# ==============================================
+# Setup PyTest
+# ==============================================
+
+# Create test functions that can interact with PyTest
+for _test_func in tools.create_tests(setup_experiment, __file__):
+    globals()[_test_func.__name__] = _test_func
diff --git a/include/lbann/layers/regularizers/CMakeLists.txt b/include/lbann/layers/regularizers/CMakeLists.txt
@@ -34,5 +34,9 @@ set_full_path(THIS_DIR_HEADERS
   selu_dropout.hpp
   )
 
+if (LBANN_HAS_DISTCONV)
+  add_subdirectory(distconv)
+endif()
+
 # Propagate the files up the tree
 set(HEADERS "${HEADERS}" "${THIS_DIR_HEADERS}" PARENT_SCOPE)
diff --git a/include/lbann/layers/regularizers/distconv/CMakeLists.txt b/include/lbann/layers/regularizers/distconv/CMakeLists.txt
@@ -0,0 +1,31 @@
+################################################################################
+## Copyright (c) 2014-2022, Lawrence Livermore National Security, LLC.
+## Produced at the Lawrence Livermore National Laboratory.
+## Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+## the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+##
+## LLNL-CODE-697807.
+## All rights reserved.
+##
+## This file is part of LBANN: Livermore Big Artificial Neural Network
+## Toolkit. For details, see http://software.llnl.gov/LBANN or
+## https://github.com/LLNL/LBANN.
+##
+## Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+## may not use this file except in compliance with the License.  You may
+## obtain a copy of the License at:
+##
+## http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+## implied. See the License for the specific language governing
+## permissions and limitations under the license.
+################################################################################
+set_full_path(THIS_DIR_HEADERS
+  distconv_layer_norm.hpp
+  )
+
+# Propagate the files up the tree
+set(HEADERS "${HEADERS}" "${THIS_DIR_HEADERS}" PARENT_SCOPE)
diff --git a/include/lbann/layers/regularizers/distconv/distconv_layer_norm.hpp b/include/lbann/layers/regularizers/distconv/distconv_layer_norm.hpp
@@ -0,0 +1,78 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2022, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_LAYERSE_REGULARIZERS_DISTCONV_LAYER_NORM
+#define LBANN_LAYERSE_REGULARIZERS_DISTCONV_LAYER_NORM
+
+#ifdef LBANN_HAS_DISTCONV
+
+namespace distconv {
+template <typename Backend, typename DataType>
+class LayerNormalization
+{
+  using LocaleMPI = tensor::LocaleMPI;
+
+  template <typename Allocator>
+  using DCTensor = tensor::Tensor<DataType, LocaleMPI, Allocator>;
+
+public:
+  LayerNormalization(Backend& backend, DataType epsilon)
+    : m_backend(backend), m_epsilon(epsilon)
+  {}
+
+  template <typename Allocator>
+  void calculate_forward_stats(const DCTensor<Allocator>& input,
+                               DCTensor<Allocator>& statistics);
+
+  template <typename Allocator>
+  void apply_normalization(const DCTensor<Allocator>& input,
+                           DCTensor<Allocator>& statistics,
+                           DCTensor<Allocator>& output);
+
+  template <typename Allocator>
+  void calculate_backward_stats(const DCTensor<Allocator>& input,
+                                const DCTensor<Allocator>& output_grad,
+                                const DCTensor<Allocator>& statistics,
+                                DCTensor<Allocator>& statistics_grad);
+
+  template <typename Allocator>
+  void apply_grad(const DCTensor<Allocator>& input,
+                  const DCTensor<Allocator>& output_grad,
+                  const DCTensor<Allocator>& statistics,
+                  const DCTensor<Allocator>& statistics_grad,
+                  DCTensor<Allocator>& input_grad);
+
+protected:
+  Backend& m_backend;
+
+private:
+  DataType m_epsilon;
+
+}; // class definition LayerNorm
+} // namespace distconv
+
+#endif // LBANN_HAS_DISTCONV
+#endif // LBANN_LAYERS_REGULARIZERS_DISTCONV_LAYER_NORM