ChristopherLucas · tanxpyox · Aug 18, 2022 · Aug 18, 2022 · Aug 18, 2022 · Aug 18, 2022
diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,5 @@ src/symbols.rds
 .Rproj.user
 .RData
 *smile.log
-src/conf/config.h
+src/conf/config.h
+.vscode/*
diff --git a/NAMESPACE b/NAMESPACE
@@ -18,6 +18,7 @@ export(getEdges)
 export(hmm)
 export(intensity)
 export(labelUtterances)
+export(llh)
 export(loudness)
 export(lpc)
 export(magPhase)

diff --git a/R/extract.features.R b/R/extract.features.R
@@ -2,56 +2,94 @@
 #' @description Extract features from a file and return audio class with these data
 #' @importFrom magrittr %>%
 #' @importFrom purrr map
+#' @importFrom purrr transpose
 #' @param files \code{character}. Vector of path and file name. For example, c("folder/1.wav", "folder/2.wav")
 #' @param config \code{audio_config}. An object of class 'audio_config' with parameters for extraction.
 #' @param use.full.names \code{boolean}. Whether to use full file path as names of returned list elements
 #' @param use.exts \code{boolean}. Whether to use file extensions in names of returned list elements.
+#' @param raw.data \code{boolean}. Whether to include raw data in the output list.
+#' @param timestamp \code{boolean}. Whether to include timestamps in the output list.
+#' @param standardize \code{boolean}. Whether to standardize the extracted features. Only works when both raw.data and timestamp are set to false.
 #'   Only applicable if use.full.names is FALSE
 #' @return audio class
 #' @export
-#' 
-extractFeatures <- function(files,  config = loudness(createConfig()), use.full.names=T, use.exts=T) {
-    features <- purrr::map(files, function(x) {
-        result <- extractFeature(x, config)
-        attr(result, "filename") <- x
-        result
-        })
-
-    list_names <- files
-    if (!use.full.names) list_names <- basename(list_names)
-    if (!use.exts) list_names <- tools::file_path_sans_ext(list_names)
-
-    purrr::set_names(features, list_names)
+
+extractFeatures <- function(files,  config = loudness(createConfig()), 
+                            use.full.names = T, 
+                            use.exts = T,
+                            raw.data = F,
+                            timestamp = F,
+                            standardize = F
+) {
+  features <- purrr::map(files, function(x) {
+    result <- extractFeature(x, config, raw.data, timestamp) 
+    attr(result, "filename") <- x
+    result
+  }) %>% purrr::transpose()
+
+  out = NULL
+
+  list_names <- files
+  if (!use.full.names) list_names <- basename(list_names)
+  if (!use.exts) list_names <- tools::file_path_sans_ext(list_names)
+
+  out <- purrr::map(features, purrr::set_names, list_names)
+
+  if (standardize) {
+      out$data <- out$data %>% communication:::standardizeFeatures()
+  }
+
+  out$files$fname <- list_names
+  out$files$duration <- sapply(out$data, nrow) 
+
+  return(out)
 }
 
 
-extractFeature <- function(filename, config = config) {
-    audio_nfeatures <- create_audio_object(filename, config)
-    audio <- audio_nfeatures$audio
-    n_features <- audio_nfeatures$n_features
-    raw_data <- add_raw_data(filename, audio$timestamps)
+extractFeature <- function(filename, config = config,
+                           raw.data = F,
+                           timestamp = F) {
+
+  out = NULL
+  audio_nfeatures <- create_audio_object(filename, config)
+  audio <- audio_nfeatures$audio %>% as.matrix
+  n_features <- audio_nfeatures$n_features
+  timestamps <- audio_nfeatures$timestamps
+
+  out$data <- audio
+
+  if(raw.data){
+    raw_data <- add_raw_data(filename, timestamps)
 
     # For some reasons, length of audio$timestamps is not the same as raw_data (returning 
     # more data - I have to check it)
-    audio$raw_data <- raw_data$cut_raw_data[1:length(audio$timestamps)]
-    attr(audio, "header") <- raw_data$header
+    raw_data_out <- raw_data$cut_raw_data[1:length(timestamps)]
+    attr(raw_data_out, "header") <- raw_data$header  
 
-    # If the no. of columns supplied by the config object is not the same as the no. of features
-    # we got, then most likely the config does not know how many features are created
-    # In this case, we name the feature columns based on the output name of the components
-    # But we can do so reliably only if there is a single component that connects to the sink.
-    column_names <- strsplit(attr(config, "columns"), ":")[[1]]
-    if (length(column_names) < n_features) {
-        edges <- attr(config, 'edges')
-        outputs <- subset(edges, 'explicit_output')$output
-        if (length(outputs) > 1) {
-            warning('Unable to determine column names for features. Please correctly specify the no. of features of each component.')
-        } else {
-            column_names <- paste0(outputs[1], '_', seq(n_features))
-        }
+    out$raw_data <- raw_data_out
+  }
+
+  if(timestamp){
+    out$timestamps <- timestamps
+  }
+
+  # If the no. of columns supplied by the config object is not the same as the no. of features
+  # we got, then most likely the config does not know how many features are created
+  # In this case, we name the feature columns based on the output name of the components
+  # But we can do so reliably only if there is a single component that connects to the sink.
+  column_names <- strsplit(attr(config, "columns"), ":")[[1]]
+  if (length(column_names) < n_features) {
+    edges <- attr(config, 'edges')
+    outputs <- subset(edges, 'explicit_output')$output
+    if (length(outputs) > 1) {
+      warning('Unable to determine column names for features. Please correctly specify the no. of features of each component.')
+    } else {
+      column_names <- paste0(outputs[1], '_', seq(n_features))
     }
-    colnames(audio) <- c(column_names, "timestamps", "raw_data")
-    audio
+  }
+  colnames(out$data) <- c(column_names)
+
+  return(out)
 }
 
 
@@ -62,13 +100,13 @@ extractFeature <- function(filename, config = config) {
 #' @return speech object
 #' @export
 labelUtterances <- function(features, labels) {
-    if ( length(features) != length(labels) )
-        stop("Verify the number of labels and different speech units")
-    
-    purrr::map2(features, labels, function(x, y) {
-        attr(x, "label") <- y
-        x
-    })
+  if ( length(features) != length(labels) )
+    stop("Verify the number of labels and different speech units")
+
+  purrr::map2(features, labels, function(x, y) {
+    attr(x, "label") <- y
+    x
+  })
 }
 
 
@@ -79,63 +117,60 @@ labelUtterances <- function(features, labels) {
 #' @return speech class
 #' @export
 play <- function(x, ...) {
-    UseMethod("play", x)
+  UseMethod("play", x)
 }
 
 
 play.speech <- function(x, start = NULL, end = NULL) {
-    n <- 1:nrow(x)
-    if ( is.null(start) || is.null(end)) {
-        rcpp_playWavFile(purrr::as_vector(x$raw_data[n]),
-                         attr(x, "header"))
-    }
-    else {
-        rcpp_playWavFileSubset(purrr::as_vector(x$raw_data[n]),
-                            attr(x, "header"), start, end)
-    }
+  n <- 1:nrow(x)
+  if ( is.null(start) || is.null(end)) {
+    rcpp_playWavFile(purrr::as_vector(x$raw_data[n]),
+                     attr(x, "header"))
+  }
+  else {
+    rcpp_playWavFileSubset(purrr::as_vector(x$raw_data[n]),
+                           attr(x, "header"), start, end)
+  }
 }
 
 
 
 #' @export
 print.speech <- function(x, ...) {
-    base::print(format(x[1:(ncol(x)-1)]))
-#    invisible(x)
+  base::print(format(x[1:(ncol(x)-1)]))
+  #    invisible(x)
 }
 
 
 head.speech <- function(x, ...) {
-    head(print(x))
+  head(print(x))
 }
 
 
 
 tail.speech <- function(x, ...) {
-    tail(print(x))
+  tail(print(x))
 }
 
 create_audio_object <- function(filename, config) {
-    config_string <- generate_config_string(config)
-    extracted_data <- rcpp_openSmileGetFeatures(filename, config_string_in = config_string)
-    audio <- as.data.frame(extracted_data$audio_features_0)
-    n_features <- ncol(audio)
-    timestamps <- as.vector(extracted_data$audio_timestamps_0)
-    audio$timestamps <- timestamps
-    class(audio) <- c("speech", "data.frame")
-
-    list(audio=audio, n_features=n_features)
+  config_string <- communication:::generate_config_string(config)
+  extracted_data <- communication:::rcpp_openSmileGetFeatures(filename, config_string_in = config_string)
+  audio <- as.data.frame(extracted_data$audio_features_0)
+  n_features <- ncol(audio)
+
+  timestamps <- as.vector(extracted_data$audio_timestamps_0)
+
+  list(audio=audio, timestamps = timestamps, n_features=n_features)
 }
 
 add_raw_data <- function(filename, timestamps) {
-    raw_data <- rcpp_parseAudioFile(filename)
-    # Timemust must be cut in time intervals of same size
-    # We need to create a test for that
-    timestamp_interval <- timestamps[2]
-    
-    # Split raw_data in intervals of sampleRate * timestamp_interval
-    cut_raw_data <- split(raw_data[[2]], 
-                          ceiling(seq_along(raw_data[[2]])/(raw_data[[1]]$sampleRate*timestamp_interval)))
-    list(cut_raw_data = cut_raw_data, header = raw_data[[1]])
+  raw_data <- communication:::rcpp_parseAudioFile(filename)
+  # Timemust must be cut in time intervals of same size
+  # We need to create a test for that
+  timestamp_interval <- timestamps[2]
+
+  # Split raw_data in intervals of sampleRate * timestamp_interval
+  cut_raw_data <- split(raw_data[[2]], 
+                        ceiling(seq_along(raw_data[[2]])/(raw_data[[1]]$sampleRate*timestamp_interval)))
+  list(cut_raw_data = cut_raw_data, header = raw_data[[1]])
 }
-
-
diff --git a/R/hmm.R b/R/hmm.R
@@ -1,7 +1,3 @@
-# dk when derivatives > 0 delete first leading rows from each Xs before
-# calculating Ts, passing to hmm
-# then add leading rows of NAs back into results
-
 #########################
 # R wrapper for hmm_cpp #
 #########################
@@ -113,22 +109,19 @@
 #'   }
 #' }
 #' @export
-hmm <- function(Xs,              # data
+hmm = function(Xs,              # data
                weights=NULL,    # weight on each element of Xs
                nstates,         # number of states
                par=list(),      # initialization
                control=list(),  # EM control parameters
                labels=list()    # labels for supervised training
-)
-  {
-
-  Xs
+){
 
   if (class(Xs) == 'matrix'){
       Xs = list(Xs)
   }
 
-  if (any(sapply(Xs, function(X) class(X)!='matrix'))){
+  if (any(sapply(Xs, function(X) !is.matrix(X)))){
       stop('Xs must be matrix or list of matrices')
   }
 
@@ -265,7 +258,7 @@ hmm <- function(Xs,              # data
 
   if (control$standardize){
 
-    Xs = standardizeFeatures(Xs)
+    Xs = standardizeFeatures(Xs, verbose=control$verbose)
 
     feature_means = attr(Xs[[1]], 'scaled:center')
     feature_sds = attr(Xs[[1]], 'scaled:scale')
@@ -519,7 +512,7 @@ hmm <- function(Xs,              # data
   if (iters >= control$maxiter){
     if (diff(out$llh_seq[-1:0 + length(out$llh_seq)]) >= control$tol){
       converged = FALSE
-      warning('failed to converge by maxiter = ', control$maxiter, ' iterations (tol = ', control$tol, ')')
+      if (control$verbose > 0) warning('failed to converge by maxiter = ', control$maxiter, ' iterations (tol = ', control$tol, ')')
     }
   }
 

diff --git a/R/llh.R b/R/llh.R
@@ -9,6 +9,7 @@
 #'   sequence, a single matrix can be provided
 #' @param mod Model object of class 'feelr.hmm', as output by \code{hmm}
 #' @param control list
+#' 
 #' @return List with two components. \code{llhs} is a numeric vector of
 #'   log-likelihoods of each observation sequence in \code{Xs}. \code{llh_total}
 #'   is the log-likelihood of all observation sequences together, i.e.
@@ -17,7 +18,8 @@
 #'   \code{mod$llhs}. This is because \code{hmm} estimates the starting state of
 #'   each sequence, whereas here it is assumed that the starting state is drawn
 #'   from the stationary distribution \code{mod$delta}.
-#'
+#' @export
+
 llh = function(Xs,   # data
                mod,  # fitted feelr.hmm model
                control=list()
@@ -29,7 +31,7 @@ llh = function(Xs,   # data
   if (class(Xs) == 'matrix')
     Xs = list(Xs)
 
-  if (any(sapply(Xs, function(X) class(X)!='matrix')))
+  if (any(sapply(Xs, function(X) !is.matrix(X))))
     stop('Xs must be matrix or list of matrices')
 
   if (any(sapply(Xs, ncol) != length(mod$mus[[1]])))