google · MalcolmSlaney · Feb 24, 2024 · Feb 24, 2024 · Feb 24, 2024 · Feb 24, 2024
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,8 @@
+bazel*
+MODULE*
+**.mat
+**.pyc
+notes.txt
+tmp/*
+**junk**
+**/.DS_Store
diff --git a/BUILD.bazel b/BUILD.bazel
@@ -45,6 +45,7 @@ py_test(
         "test_data/meg/subj02_1ksamples.tfrecords",
         "test_data/meg/subj03_1ksamples.tfrecords",
     ],
+    timeout = "eternal",
 )
 
 py_test(
@@ -81,10 +82,13 @@ py_test(
     srcs = ["test/decoding_test.py"],
     data = [
         "test_data/meg/subj01_1ksamples.tfrecords",
+        "test_data/meg/subj02_1ksamples.tfrecords",
+        "test_data/meg/subj03_1ksamples.tfrecords",
     ],
     deps = [
         ":decoding_lib",
     ],
+    timeout = "eternal",
 )
 
 py_test(

diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # The telluride_decoding Library
 
-(This is not an official Google product!)
+[This is a fork of the original project, as Google is no longer 
+contributing to this project. This should be consided the official version.]
 
 This repository contains Python/Tensorflow code to decode perceptual signals
 from brain data.  The perceptual signals we are using are generally audio
@@ -55,6 +56,9 @@ install the necessary prerequisites:
 pip install telluride-decoding
 ```
 
+This code builds and test with the [Bazel](https://bazel.build/) build software.
+All tests pass on MacOSX as of March 22, 2024.
+
 ## Using this code
 This library is written in Python3 and uses Tensorflow2. The
 decoding code can be run as a standalone program, or as a library, or in 

diff --git a/telluride_decoding/add_trigger.py b/telluride_decoding/add_trigger.py
@@ -41,7 +41,7 @@
 import six
 from six.moves import range
 
-from google3.pyglib import gfile
+from tensorflow.io.gfile import GFile
 
 FLAGS = flags.FLAGS
 
@@ -155,9 +155,7 @@ def read_audio_wave_file(audio_filename):
   if not isinstance(audio_filename, six.string_types):
     raise TypeError('audio_filename must be a string.')
 
-  # Use gfile.Open so we can read files from all sorts of file systems.
-  with gfile.Open(audio_filename) as fp:
-    [fs, audio_signal] = scipy.io.wavfile.read(fp)
+  [fs, audio_signal] = scipy.io.wavfile.read(audio_filename)
   logging.info('Read_audio_file: Read %s samples from %s at %gHz.',
                audio_signal.shape, audio_filename, fs)
   assert audio_signal.dtype == np.int16
@@ -170,9 +168,7 @@ def write_audio_wave_file(audio_filename, audio_signal, fs):
   if not isinstance(audio_signal, np.ndarray):
     raise TypeError('audio_signal must be an np.ndarray')
 
-  # Use gfile.Open so we can read files from all sorts of file systems.
-  with gfile.Open(audio_filename, 'w') as fp:
-    scipy.io.wavfile.write(fp, fs, audio_signal)
+  scipy.io.wavfile.write(audio_filename, fs, audio_signal)
   logging.info('Write_audio_file: wrote %s samples to %s at %gHz.',
                audio_signal.shape, audio_filename, fs)
 

diff --git a/telluride_decoding/brain_data.py b/telluride_decoding/brain_data.py
@@ -279,7 +279,7 @@ def filter_file_names(self, mode: str) -> List[str]:
     if not isinstance(filename_list, list):
       raise TypeError('Filename_list is a %s, not a list.' %
                       type(filename_list))
-    logging.info('Filter_file_names: filename_list: %s', filename_list)
+    logging.info('Filter_file_names: All files to consider: %s', filename_list)
     logging.info('Filter_file_names: train_file_pattern: %s',
                  self.train_file_pattern)
     logging.info('Filter_file_names: validate_file_pattern: %s',
@@ -440,11 +440,11 @@ def window_one_stream_new(x: tf.Tensor,
         A tf.dataset with shape N' x (pre_context+1+post_context)*C, where N' is
         shortened to account for the frames where there is not enough context.
       """
-      logging.info(' Window_one_stream: adding %d and %d frames of context '
-                   'to stream.', pre_context, post_context)
-      total_context = pre_context + 1 + post_context
       channels = x.shape[1]
-      logging.info(' Window_one_stream: %s channels.', channels)
+      logging.info(f'Window_one_stream: adding {pre_context} before '
+                   f'and {post_context} after frames of context to stream'
+                   f' with {channels} channels')
+      total_context = pre_context + 1 + post_context
       padded_x = tf.concat((tf.zeros((pre_context, channels), dtype=x.dtype),
                             x,
                             tf.zeros((post_context, channels),
@@ -668,15 +668,30 @@ def _get_data_file_names(self):
                       (type(self.data_dir), self.data_dir))
     self._cached_file_names = []
     exp_data_dir = self.data_dir
-    for (path, _, files) in tf.io.gfile.walk(exp_data_dir):
-      # pylint: disable=g-complex-comprehension
-      self._cached_file_names += [
-          os.path.join(path, f)
-          for f in files
-          if (f.endswith('.tfrecords') and
-              '-bad-' not in f and
+
+    def on_error(e):
+      """Ignore errors.  It seems the Mac's tmpdir contains some directories
+      that can't be walked."""
+      logging.info(f'Walk error: {e}')
+
+    def good_file(f):
+      return (f.endswith('.tfrecords') and 
+              '-bad-' not in f and 
               self.data_pattern in f)
-      ]
+
+    try:
+      for (path, _, files) in tf.io.gfile.walk(exp_data_dir, onerror=on_error):
+        self._cached_file_names += [
+            os.path.join(path, f)
+            for f in files if good_file(f)
+        ]
+    except:
+      # The tf.io.gfile.walk fails on Mac with a temporary directory.
+      for (path, _, files) in os.walk(exp_data_dir, onerror=on_error):
+        self._cached_file_names += [
+            os.path.join(path, f)
+            for f in files if good_file(f)
+        ]
     logging.info('_get_data_file_names found %d files for TFExample data '
                  'analysis.', len(self._cached_file_names))
     if not self._cached_file_names:

diff --git a/telluride_decoding/brain_model.py b/telluride_decoding/brain_model.py
@@ -27,7 +27,7 @@
 from absl import logging
 import numpy as np
 
-import tensorflow.compat.v2 as tf
+import tensorflow as tf
 # User should call tf.compat.v1.enable_v2_behavior()
 
 

diff --git a/telluride_decoding/cca.py b/telluride_decoding/cca.py
@@ -24,8 +24,7 @@
 
 import numpy as np
 from telluride_decoding import brain_model
-import tensorflow.compat.v2 as tf
-# User should call tf.compat.v1.enable_v2_behavior()
+import tensorflow as tf
 
 
 def rmss(x):
@@ -330,6 +329,11 @@ def calculate_cca_parameters_from_dataset(dataset, dim, regularization=0.1,
     num_mini_batches += 1
     if mini_batch_count and num_mini_batches >= mini_batch_count:
       break
+  assert np.sum(~np.isfinite(cov_xx)) == 0
+  assert np.sum(~np.isfinite(cov_yy)) == 0
+  assert np.sum(~np.isfinite(cov_xy)) == 0
+  assert np.sum(~np.isfinite(sum_x)) == 0
+  assert np.sum(~np.isfinite(sum_y)) == 0
   logging.info('Calculating the CCA parameters from %d minibatches',
                num_mini_batches)
   if not num_mini_batches:
@@ -366,6 +370,12 @@ def calculate_cca_parameters_from_dataset(dataset, dim, regularization=0.1,
   rot_y = np.matmul(k22, v[:, 0:dim])
   e = e[0:dim]
 
+  assert np.sum(~np.isfinite(rot_x)) == 0
+  assert np.sum(~np.isfinite(rot_y)) == 0
+  assert np.sum(~np.isfinite(mean_x)) == 0
+  assert np.sum(~np.isfinite(mean_y)) == 0
+  assert np.sum(~np.isfinite(e)) == 0
+
   return rot_x, rot_y, mean_x, mean_y, e
 
 

diff --git a/telluride_decoding/csv_util.py b/telluride_decoding/csv_util.py
@@ -24,12 +24,10 @@
 import csv
 import os
 import numpy as np
+import tensorflow as tf
 
 from telluride_decoding import plot_util
 
-import tensorflow.compat.v2 as tf
-# User should call tf.compat.v1.enable_v2_behavior()
-
 
 def write_results(file_name, regularization_list, all_results):
   """"Writes results to a CSV file.

diff --git a/telluride_decoding/decoding.py b/telluride_decoding/decoding.py
@@ -469,8 +469,8 @@ def train_lda_model(brain_dataset: brain_data.BrainData,
     raise TypeError('Train_lda_model needs a DecodingOptions object, not %s.' %
                     type(my_flags))
 
+  # Get two copies of the dataset, one regular and one mixed up for comparison.
   attended_data = brain_dataset.create_dataset('test', mixup_batch=False)
-
   unattended_data = brain_dataset.create_dataset('test', mixup_batch=True)
 
   decoder = infer_decoder.create_decoder(my_flags.dnn_regressor,

diff --git a/telluride_decoding/infer.py b/telluride_decoding/infer.py
@@ -32,7 +32,7 @@
 # The next change breaks colab, so add "%matplotlib inline" after importing
 # this file.
 # pylint: disable=g-import-not-at-top
-matplotlib.use('Agg')    # Needed for plotting to a file, before the next import
+# matplotlib.use('Agg')    # Needed for plotting to a file, before the next import
 import matplotlib.pyplot as plt
 
 import numpy as np

diff --git a/telluride_decoding/infer_decoder.py b/telluride_decoding/infer_decoder.py
@@ -305,9 +305,12 @@ def add_data_correlator(self, x: np.ndarray, y: np.ndarray):
     # Update the means and power so they are ready for use.
     self._mean_x = self._sum_x / self._count
     self._mean_y = self._sum_y / self._count
-    self._power = (np.sqrt((self._sum_x2 - self._sum_x**2/self._count) *
-                           (self._sum_y2 - self._sum_y**2/self._count)) /
-                   self._count)
+
+    # Make sure that we're taking the sqrt of a positive number.  (Could go
+    # negative for silent audio (due to roundoff errors?).
+    term = ((self._sum_x2 - self._sum_x**2/self._count) * 
+            (self._sum_y2 - self._sum_y**2/self._count))
+    self._power = np.sqrt(np.maximum(term, 0.0))/self._count
 
   def compute_correlation(self, x: np.ndarray, y: np.ndarray) -> np.ndarray:
     """Computes multidimensional correlation and scaling without the final sum.
@@ -324,8 +327,12 @@ def compute_correlation(self, x: np.ndarray, y: np.ndarray) -> np.ndarray:
       The normalized cross product (num_frames x num_features).
     """
     # From: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
-    return ((x - np.broadcast_to(self._mean_x, x.shape)) *
-            (y - np.broadcast_to(self._mean_y, y.shape))/ self._power)
+    self._power = np.asarray(self._power)  # Hack.. not sure why this is needed.
+    assert np.sum(~np.isfinite(self._power)) == 0
+    assert np.sum(self._power <= 0) == 0, f'ComputeCorrelation: Power is {self._power}, and count is {self._count}'
+    result = ((x - np.broadcast_to(self._mean_x, x.shape)) *
+              (y - np.broadcast_to(self._mean_y, y.shape))/ self._power)
+    return result
 
   def train(self,
             data0: tf.data.Dataset,
@@ -523,6 +530,9 @@ def compute_lda_model(self, d1: np.ndarray, d2: np.ndarray):
       raise TypeError('Input d1 must be an numpy array, not %s.' % type(d1))
     if not isinstance(d2, np.ndarray):
       raise TypeError('Input d2 must be an numpy array, not %s.' % type(d2))
+    assert np.sum(~np.isfinite(d1)) == 0
+    assert np.sum(~np.isfinite(d2)) == 0
+
     data = np.concatenate((d1, d2), axis=0)
     labels = np.concatenate((1*np.ones(d1.shape[0],),
                              2*np.ones(d2.shape[0],)))

diff --git a/telluride_decoding/ingest.py b/telluride_decoding/ingest.py
@@ -1156,7 +1156,7 @@ def _float_feature(value):
         data = data_dict[k]
         feature = None
         # if type(data[row, 0]) == np.str or type[data[row, 0]:
-        if data.dtype == np.str or data.dtype == '|S1':
+        if data.dtype == str or data.dtype == '|S1':
           feature = _bytes_feature(data[row])
         elif isinstance(data, np.ndarray):
           if data.dtype == np.float64 or data.dtype == np.float32:

diff --git a/telluride_decoding/ingest_brainvision.py b/telluride_decoding/ingest_brainvision.py
@@ -30,8 +30,7 @@
 
 import numpy as np
 from telluride_decoding import ingest
-import tensorflow.compat.v2 as tf
-# User should call tf.compat.v1.enable_v2_behavior()
+import tensorflow as tf
 
 
 def parse_bv_keywords(section):

diff --git a/telluride_decoding/plot_util.py b/telluride_decoding/plot_util.py
@@ -18,9 +18,8 @@
 # To prevent tkinter errors as per: https://stackoverflow.com/a/37605654
 import os
 import matplotlib
-matplotlib.use('Agg')
-import tensorflow.compat.v2 as tf  # pylint: disable=g-import-not-at-top
-# User should call tf.compat.v1.enable_v2_behavior()
+# matplotlib.use('Agg')
+import tensorflow as tf  # pylint: disable=g-import-not-at-top
 
 
 def matplotlib_pyplot():