diff --git a/.gitignore b/.gitignore index fce20f0..87def04 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ src/symbols.rds .Rproj.user .RData *smile.log -src/conf/config.h \ No newline at end of file +src/conf/config.h +.vscode/* diff --git a/NAMESPACE b/NAMESPACE index d9bba6b..01ca405 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(getEdges) export(hmm) export(intensity) export(labelUtterances) +export(llh) export(loudness) export(lpc) export(magPhase) diff --git a/R/extract.features.R b/R/extract.features.R index c276beb..8b35650 100644 --- a/R/extract.features.R +++ b/R/extract.features.R @@ -2,56 +2,94 @@ #' @description Extract features from a file and return audio class with these data #' @importFrom magrittr %>% #' @importFrom purrr map +#' @importFrom purrr transpose #' @param files \code{character}. Vector of path and file name. For example, c("folder/1.wav", "folder/2.wav") #' @param config \code{audio_config}. An object of class 'audio_config' with parameters for extraction. #' @param use.full.names \code{boolean}. Whether to use full file path as names of returned list elements #' @param use.exts \code{boolean}. Whether to use file extensions in names of returned list elements. +#' @param raw.data \code{boolean}. Whether to include raw data in the output list. +#' @param timestamp \code{boolean}. Whether to include timestamps in the output list. +#' @param standardize \code{boolean}. Whether to standardize the extracted features. Only works when both raw.data and timestamp are set to false. #' Only applicable if use.full.names is FALSE #' @return audio class #' @export -#' -extractFeatures <- function(files, config = loudness(createConfig()), use.full.names=T, use.exts=T) { - features <- purrr::map(files, function(x) { - result <- extractFeature(x, config) - attr(result, "filename") <- x - result - }) - - list_names <- files - if (!use.full.names) list_names <- basename(list_names) - if (!use.exts) list_names <- tools::file_path_sans_ext(list_names) - - purrr::set_names(features, list_names) + +extractFeatures <- function(files, config = loudness(createConfig()), + use.full.names = T, + use.exts = T, + raw.data = F, + timestamp = F, + standardize = F +) { + features <- purrr::map(files, function(x) { + result <- extractFeature(x, config, raw.data, timestamp) + attr(result, "filename") <- x + result + }) %>% purrr::transpose() + + out = NULL + + list_names <- files + if (!use.full.names) list_names <- basename(list_names) + if (!use.exts) list_names <- tools::file_path_sans_ext(list_names) + + out <- purrr::map(features, purrr::set_names, list_names) + + if (standardize) { + out$data <- out$data %>% communication:::standardizeFeatures() + } + + out$files$fname <- list_names + out$files$duration <- sapply(out$data, nrow) + + return(out) } -extractFeature <- function(filename, config = config) { - audio_nfeatures <- create_audio_object(filename, config) - audio <- audio_nfeatures$audio - n_features <- audio_nfeatures$n_features - raw_data <- add_raw_data(filename, audio$timestamps) +extractFeature <- function(filename, config = config, + raw.data = F, + timestamp = F) { + + out = NULL + audio_nfeatures <- create_audio_object(filename, config) + audio <- audio_nfeatures$audio %>% as.matrix + n_features <- audio_nfeatures$n_features + timestamps <- audio_nfeatures$timestamps + + out$data <- audio + + if(raw.data){ + raw_data <- add_raw_data(filename, timestamps) # For some reasons, length of audio$timestamps is not the same as raw_data (returning # more data - I have to check it) - audio$raw_data <- raw_data$cut_raw_data[1:length(audio$timestamps)] - attr(audio, "header") <- raw_data$header + raw_data_out <- raw_data$cut_raw_data[1:length(timestamps)] + attr(raw_data_out, "header") <- raw_data$header - # If the no. of columns supplied by the config object is not the same as the no. of features - # we got, then most likely the config does not know how many features are created - # In this case, we name the feature columns based on the output name of the components - # But we can do so reliably only if there is a single component that connects to the sink. - column_names <- strsplit(attr(config, "columns"), ":")[[1]] - if (length(column_names) < n_features) { - edges <- attr(config, 'edges') - outputs <- subset(edges, 'explicit_output')$output - if (length(outputs) > 1) { - warning('Unable to determine column names for features. Please correctly specify the no. of features of each component.') - } else { - column_names <- paste0(outputs[1], '_', seq(n_features)) - } + out$raw_data <- raw_data_out + } + + if(timestamp){ + out$timestamps <- timestamps + } + + # If the no. of columns supplied by the config object is not the same as the no. of features + # we got, then most likely the config does not know how many features are created + # In this case, we name the feature columns based on the output name of the components + # But we can do so reliably only if there is a single component that connects to the sink. + column_names <- strsplit(attr(config, "columns"), ":")[[1]] + if (length(column_names) < n_features) { + edges <- attr(config, 'edges') + outputs <- subset(edges, 'explicit_output')$output + if (length(outputs) > 1) { + warning('Unable to determine column names for features. Please correctly specify the no. of features of each component.') + } else { + column_names <- paste0(outputs[1], '_', seq(n_features)) } - colnames(audio) <- c(column_names, "timestamps", "raw_data") - audio + } + colnames(out$data) <- c(column_names) + + return(out) } @@ -62,13 +100,13 @@ extractFeature <- function(filename, config = config) { #' @return speech object #' @export labelUtterances <- function(features, labels) { - if ( length(features) != length(labels) ) - stop("Verify the number of labels and different speech units") - - purrr::map2(features, labels, function(x, y) { - attr(x, "label") <- y - x - }) + if ( length(features) != length(labels) ) + stop("Verify the number of labels and different speech units") + + purrr::map2(features, labels, function(x, y) { + attr(x, "label") <- y + x + }) } @@ -79,63 +117,60 @@ labelUtterances <- function(features, labels) { #' @return speech class #' @export play <- function(x, ...) { - UseMethod("play", x) + UseMethod("play", x) } play.speech <- function(x, start = NULL, end = NULL) { - n <- 1:nrow(x) - if ( is.null(start) || is.null(end)) { - rcpp_playWavFile(purrr::as_vector(x$raw_data[n]), - attr(x, "header")) - } - else { - rcpp_playWavFileSubset(purrr::as_vector(x$raw_data[n]), - attr(x, "header"), start, end) - } + n <- 1:nrow(x) + if ( is.null(start) || is.null(end)) { + rcpp_playWavFile(purrr::as_vector(x$raw_data[n]), + attr(x, "header")) + } + else { + rcpp_playWavFileSubset(purrr::as_vector(x$raw_data[n]), + attr(x, "header"), start, end) + } } #' @export print.speech <- function(x, ...) { - base::print(format(x[1:(ncol(x)-1)])) -# invisible(x) + base::print(format(x[1:(ncol(x)-1)])) + # invisible(x) } head.speech <- function(x, ...) { - head(print(x)) + head(print(x)) } tail.speech <- function(x, ...) { - tail(print(x)) + tail(print(x)) } create_audio_object <- function(filename, config) { - config_string <- generate_config_string(config) - extracted_data <- rcpp_openSmileGetFeatures(filename, config_string_in = config_string) - audio <- as.data.frame(extracted_data$audio_features_0) - n_features <- ncol(audio) - timestamps <- as.vector(extracted_data$audio_timestamps_0) - audio$timestamps <- timestamps - class(audio) <- c("speech", "data.frame") - - list(audio=audio, n_features=n_features) + config_string <- communication:::generate_config_string(config) + extracted_data <- communication:::rcpp_openSmileGetFeatures(filename, config_string_in = config_string) + audio <- as.data.frame(extracted_data$audio_features_0) + n_features <- ncol(audio) + + timestamps <- as.vector(extracted_data$audio_timestamps_0) + + list(audio=audio, timestamps = timestamps, n_features=n_features) } add_raw_data <- function(filename, timestamps) { - raw_data <- rcpp_parseAudioFile(filename) - # Timemust must be cut in time intervals of same size - # We need to create a test for that - timestamp_interval <- timestamps[2] - - # Split raw_data in intervals of sampleRate * timestamp_interval - cut_raw_data <- split(raw_data[[2]], - ceiling(seq_along(raw_data[[2]])/(raw_data[[1]]$sampleRate*timestamp_interval))) - list(cut_raw_data = cut_raw_data, header = raw_data[[1]]) + raw_data <- communication:::rcpp_parseAudioFile(filename) + # Timemust must be cut in time intervals of same size + # We need to create a test for that + timestamp_interval <- timestamps[2] + + # Split raw_data in intervals of sampleRate * timestamp_interval + cut_raw_data <- split(raw_data[[2]], + ceiling(seq_along(raw_data[[2]])/(raw_data[[1]]$sampleRate*timestamp_interval))) + list(cut_raw_data = cut_raw_data, header = raw_data[[1]]) } - - diff --git a/R/hmm.R b/R/hmm.R index 8f4e3d4..707d57d 100644 --- a/R/hmm.R +++ b/R/hmm.R @@ -1,7 +1,3 @@ -# dk when derivatives > 0 delete first leading rows from each Xs before -# calculating Ts, passing to hmm -# then add leading rows of NAs back into results - ######################### # R wrapper for hmm_cpp # ######################### @@ -113,22 +109,19 @@ #' } #' } #' @export -hmm <- function(Xs, # data +hmm = function(Xs, # data weights=NULL, # weight on each element of Xs nstates, # number of states par=list(), # initialization control=list(), # EM control parameters labels=list() # labels for supervised training -) - { - - Xs +){ if (class(Xs) == 'matrix'){ Xs = list(Xs) } - if (any(sapply(Xs, function(X) class(X)!='matrix'))){ + if (any(sapply(Xs, function(X) !is.matrix(X)))){ stop('Xs must be matrix or list of matrices') } @@ -265,7 +258,7 @@ hmm <- function(Xs, # data if (control$standardize){ - Xs = standardizeFeatures(Xs) + Xs = standardizeFeatures(Xs, verbose=control$verbose) feature_means = attr(Xs[[1]], 'scaled:center') feature_sds = attr(Xs[[1]], 'scaled:scale') @@ -519,7 +512,7 @@ hmm <- function(Xs, # data if (iters >= control$maxiter){ if (diff(out$llh_seq[-1:0 + length(out$llh_seq)]) >= control$tol){ converged = FALSE - warning('failed to converge by maxiter = ', control$maxiter, ' iterations (tol = ', control$tol, ')') + if (control$verbose > 0) warning('failed to converge by maxiter = ', control$maxiter, ' iterations (tol = ', control$tol, ')') } } diff --git a/R/llh.R b/R/llh.R index 9f85312..40539d6 100644 --- a/R/llh.R +++ b/R/llh.R @@ -9,6 +9,7 @@ #' sequence, a single matrix can be provided #' @param mod Model object of class 'feelr.hmm', as output by \code{hmm} #' @param control list +#' #' @return List with two components. \code{llhs} is a numeric vector of #' log-likelihoods of each observation sequence in \code{Xs}. \code{llh_total} #' is the log-likelihood of all observation sequences together, i.e. @@ -17,7 +18,8 @@ #' \code{mod$llhs}. This is because \code{hmm} estimates the starting state of #' each sequence, whereas here it is assumed that the starting state is drawn #' from the stationary distribution \code{mod$delta}. -#' +#' @export + llh = function(Xs, # data mod, # fitted feelr.hmm model control=list() @@ -29,7 +31,7 @@ llh = function(Xs, # data if (class(Xs) == 'matrix') Xs = list(Xs) - if (any(sapply(Xs, function(X) class(X)!='matrix'))) + if (any(sapply(Xs, function(X) !is.matrix(X)))) stop('Xs must be matrix or list of matrices') if (any(sapply(Xs, ncol) != length(mod$mus[[1]]))) diff --git a/R/standardizeFeatures.R b/R/standardizeFeatures.R new file mode 100644 index 0000000..352d2e6 --- /dev/null +++ b/R/standardizeFeatures.R @@ -0,0 +1,330 @@ +#' Title +#' +#' @param Xs Data +#' @param feature_means Numeric vector corresponding to columns of elements in Xs +#' @param feature_sds Numeric vector corresponding to columns of elements in Xs. If not supplied, will be computed from Xs. +#' @param verbose Verbose printing +#' +#' @return Standardizes `data` of objects of class +#' `preppedAudio`. Maintains structure of original object +#' otherwise. Is used to standardize data where the recording +#' environment systematically shifts audio features. +#' +#' @details \code{feature_means} and \code{feature_sds} are provided to allow +#' alignment of new datasets. For example, after a model is trained, new data +#' for prediction must be transformed in the same way as the training data to +#' ensure predictions are valid. If either is \code{NULL}, both will be +#' computed from \code{Xs} and the output will be internally standardized +#' (i.e., columns of \code{do.call(rbind, standardizeFeatures(Xs))} will be +#' have a mean of 0 and a standard deviation of 1). +#' +#' @examples +#' data('audio') +#' audio$data <- standardizeFeatures( +#' lapply(audio$data, function(x) na.omit(x)) +#' ) + +standardizeFeatures = function(Xs, feature_means=NULL, feature_sds=NULL, verbose=1){ + + if (class(Xs) == 'matrix') + Xs = list(Xs) + + if (any(unique(diff(sapply(Xs, ncol))) != 0)) + stop('all observation sequences must have same number of observed features') + + if (length(Xs) > 1){ + for (i in 1:length(Xs)){ + if (!all.equal(colnames(Xs[[i]]), colnames(Xs[[1]]))) + stop('colnames differ across observation sequences') + } + } + + if (!is.null(feature_means)){ + if (!is.numeric(feature_means) | !length(feature_means) == ncol(Xs[[1]])) + stop('"feature_means" must be numeric vector of length ncol(Xs[[i]]) for all i') + } + + if (!is.null(feature_sds)){ + if (!is.numeric(feature_sds) | !length(feature_sds) == ncol(Xs[[1]])) + stop('"feature_sds" must be numeric vector of length ncol(Xs[[i]]) for all i') + } + + internal = TRUE + if (xor(is.null(feature_means), is.null(feature_sds))){ + warning('both "feature_means" and "feature_sds" must be supplied for external alignment; ignoring') + feature_means = NULL + feature_sds = NULL + } + if (!(is.null(feature_means) | is.null(feature_sds))) { + internal = FALSE + } + + if (verbose == 1) + cat('standardizing features\n') + + # indices + + N = length(Xs) + N_digits = ceiling(log(N+1, 10)) + M = ncol(Xs[[1]]) + Ts = sapply(Xs, nrow) # number of obs in each sequence + + ind_d0 = grep('_d\\d$', colnames(Xs[[1]]), invert=TRUE) # base features + ind_d1 = grep('_d1$', colnames(Xs[[1]])) # 1st deriv + ind_d2 = grep('_d2$', colnames(Xs[[1]])) # 2nd deriv + + M_d0 = length(ind_d0) + M_d1 = length(ind_d1) + M_d2 = length(ind_d2) + + derivatives = 0 + if (M_d1 > 0) + derivatives = 1 + if (M_d2 > 0) + derivatives = 2 + + if (derivatives >= 1){ + if (!all.equal(colnames(Xs[[1]])[ind_d0], gsub('_d1$', '', colnames(Xs[[1]])[ind_d1]))){ + stop('failed to align base features with first derivatives') + } + } + + if (derivatives == 2){ + if (!all.equal(colnames(Xs[[1]])[ind_d0], gsub('_d2$', '', colnames(Xs[[1]])[ind_d2]))){ + stop('failed to align base features with second derivatives') + } + } + + # identify patterns of missingness + + for (i in 1:N){ + Xs[[i]][!is.finite(Xs[[i]])] = NA + } + + missingness = lapply(Xs, is.na) + missingness_binary = lapply(missingness, function(x){ + for (m in 1:M){ + x[,m] = 2^(M-m) * x[,m] # missingness -> binary sequence -> decimal + } + return(rowSums(x)) + }) + nonmissing = lapply(missingness_binary, function(x){ + x == 0 + }) + complete_obs = sum(sapply(nonmissing, sum)) + if (complete_obs <= M && internal){ + stop('only ', complete_obs, ' complete observations (no missing features) found; ', + 'provide at least nfeatures complete observations to estimate model') + } else if (complete_obs < 1000 & complete_obs < sum(Ts)){ + warning('only ', complete_obs, ' complete observations (no missing features) found; ', + 'partially censored observations are used for likelihood calculations but ', + 'dropped for feature standardization and estimation of state distributions.') + } + + missingness_modes = unique(do.call(rbind, missingness)) + missingness_modes_binary = unique(do.call(c, missingness_binary)) + + nonmissing_features = lapply(1:nrow(missingness_modes), function(i){ + which(!missingness_modes[i,]) + }) + missingness_labels = lapply(missingness_binary, function(x){ + match(x, missingness_modes_binary) + }) + + attr(Xs, 'nonmissing') = lapply(nonmissing, which) + attr(Xs, 'missingness_labels') = missingness_labels + attr(Xs, 'nonmissing_features') = nonmissing_features + + # locate zeroes and contaminated adjacent derivatives + zeroes = lapply(Xs, function(X) which(X[,ind_d0]==0, arr.ind=T)) + + if (derivatives >= 1){ # indices of contaminated 1st derivatives + zeroes_back1 = zeroes_fwd1 = zeroes + for (i in 1:N){ + zeroes_back1[[i]][,'row'] = zeroes_back1[[i]][,'row'] - 1 + zeroes_back1[[i]] = zeroes_back1[[i]][zeroes_back1[[i]][,'row'] > 0,] + zeroes_fwd1[[i]][,'row'] = zeroes_fwd1[[i]][,'row'] + 1 + zeroes_fwd1[[i]] = zeroes_fwd1[[i]][zeroes_fwd1[[i]][,'row'] <= Ts[i],] + } + } + + if (derivatives == 2){ # indices of contaminated 2nd derivatives + zeroes_back2 = zeroes_fwd2 = zeroes + for (i in 1:N){ + zeroes_back2[[i]][,'row'] = zeroes_back2[[i]][,'row'] - 2 + zeroes_back2[[i]] = zeroes_back2[[i]][zeroes_back2[[i]][,'row'] > 0,] + zeroes_fwd2[[i]][,'row'] = zeroes_fwd2[[i]][,'row'] + 2 + zeroes_fwd2[[i]] = zeroes_fwd2[[i]][zeroes_fwd2[[i]][,'row'] <= Ts[i],] + } + } + + # standardize + + if (verbose == 1) + cat(' centering obs seq ', rep(' ', N_digits), sep='') + + if (internal){ # calculate internal means of features (as opposed to using means from external source) + running_sum = rep(0, M) + running_count = rep(0, M) + for (i in 1:N){ + Xs.zerotoNA <- Xs[[i]] + Xs.zerotoNA[zeroes[[i]]] <- NA + running_sum <- running_sum + colSums(Xs.zerotoNA, na.rm = TRUE) + running_count <- running_count + colSums(!is.na(Xs.zerotoNA)) + } + feature_means <- running_sum / running_count + ## rowSums(sapply(1:N, function(i){ + ## colSums(Xs[[i]][nonmissing[[i]],]), na.rm = TRUE) + ## }) / + ## sum(sapply(nonmissing, sum)) + } + + for (i in 1:N){ + if (verbose == 1){ + cat(rep('\b', N_digits), sprintf(paste('%', N_digits, 'd', sep=''), i), sep='') + } + Xs[[i]] = sweep(Xs[[i]], 2, feature_means) + } + if (verbose == 1){ + cat('\n') + } + if (verbose == 1){ + cat(' scaling obs seq ', rep(' ', N_digits), sep='') + } + + if (internal){ # calculate internal sds of features (as opposed to using sds from external source) + running_sum_sq = rep(0, M) + for (i in 1:N){ + Xs.zerotoNA <- Xs[[i]] + Xs.zerotoNA[zeroes[[i]]] <- NA + running_sum_sq <- running_sum_sq + colSums(Xs.zerotoNA^2, na.rm = TRUE) + } + feature_sds = sqrt(running_sum_sq / (running_count - 1)) + ## feature_sds = sqrt( + ## rowSums(sapply(1:N, function(i){ + ## colSums(Xs[[i]][nonmissing[[i]],]^2)) + ## }) / + ## (sum(sapply(nonmissing, sum)) - 1) + ## ) + } + if (any(feature_sds == 0 | is.na(feature_sds))){ + stop('zero variance in the following features ', + 'after dropping incomplete obs (missing features):\n ', + paste(colnames(Xs[[1]])[feature_sds == 0], collapse=', '), + '\npartially censored obs are used in likelihood calculations ', + 'but not feature standardization or estimation of state distributions' + ) + } + for (i in 1:N){ + if (verbose == 1){ + cat(rep('\b', N_digits), sprintf(paste('%', N_digits, 'd', sep=''), i), sep='') + } + Xs[[i]] = scale(Xs[[i]], center=F, scale=feature_sds) + attr(Xs[[i]], 'scaled:center') = feature_means + } + if (verbose == 1){ + cat('\n') + } + + # + + + + + # handle zeroed-out feature-observations + seqs_with_zeroes = which(sapply(zeroes, length) > 0) + if (length(seqs_with_zeroes) > 0){ + + if (verbose == 1){ + cat(' handling zeroed-out obs in seq ', rep(' ', N_digits), sep='') + } + + for (i in seqs_with_zeroes){ + + if (verbose == 1) + cat(rep('\b', N_digits), sprintf(paste('%', N_digits, 'd', sep=''), i), sep='') + + # replace zeroed-out feature-obs with -2 sd + small noise + Xs[[i]][zeroes[[i]]] = stats::rnorm(nrow(zeroes[[i]]), + mean = -3, + sd = .1) + + } + if (verbose == 1){ + cat('\n') + } + + # handle 1st derivative of zeroed-out feature-observations + if (derivatives >= 1){ + + if (verbose == 1) + cat(' handling 1st derivatives at zeroed-out obs in seq ', rep(' ', N_digits), sep='') + + for (i in seqs_with_zeroes){ + + if (verbose == 1) + cat(rep('\b', N_digits), sprintf(paste('%', N_digits, 'd', sep=''), i), sep='') + + zeroes_back1[[i]][,'col'] = zeroes_back1[[i]][,'col'] + M_d0 + zeroes[[i]][,'col'] = zeroes[[i]][,'col'] + M_d0 + zeroes_fwd1[[i]][,'col'] = zeroes_fwd1[[i]][,'col'] + M_d0 + + # pull contaminated derivatives back to zero and add small noise + Xs[[i]][zeroes_back1[[i]]] =stats::rnorm(nrow(zeroes_back1[[i]]), 0, sd=.01) + Xs[[i]][zeroes[[i]]] =stats::rnorm(nrow(zeroes[[i]]), 0, sd=.01) + Xs[[i]][zeroes_fwd1[[i]]] =stats::rnorm(nrow(zeroes_fwd1[[i]]), 0, sd=.01) + + } + if (verbose == 1){ + cat('\n') + } + + } + + # handle 2nd derivative of zeroed-out feature-observations + if (derivatives == 2){ + + if (verbose == 1) + cat(' handling 2nd derivatives at zeroed-out obs in seq ', rep(' ', N_digits), sep='') + + for (i in seqs_with_zeroes){ + + if (verbose == 1) + cat(rep('\b', N_digits), sprintf(paste('%', N_digits, 'd', sep=''), i), sep='') + + zeroes_back2[[i]][,'col'] = zeroes_back2[[i]][,'col'] + M_d0 + M_d1 + zeroes_back1[[i]][,'col'] = zeroes_back1[[i]][,'col'] + M_d0 + zeroes[[i]][,'col'] = zeroes[[i]][,'col'] + M_d0 + zeroes_fwd1[[i]][,'col'] = zeroes_fwd1[[i]][,'col'] + M_d0 + zeroes_fwd2[[i]][,'col'] = zeroes_fwd2[[i]][,'col'] + M_d0 + M_d1 + + # pull contaminated derivatives back to zero and add small noise + Xs[[i]][zeroes_back2[[i]]] =stats::rnorm(nrow(zeroes_back2[[i]]), 0, sd=.01) + Xs[[i]][zeroes_back1[[i]]] =stats::rnorm(nrow(zeroes_back1[[i]]), 0, sd=.01) + Xs[[i]][zeroes[[i]]] =stats::rnorm(nrow(zeroes[[i]]), 0, sd=.01) + Xs[[i]][zeroes_fwd1[[i]]] =stats::rnorm(nrow(zeroes_fwd1[[i]]), 0, sd=.01) + Xs[[i]][zeroes_fwd2[[i]]] =stats::rnorm(nrow(zeroes_fwd2[[i]]), 0, sd=.01) + + attr(Xs[[i]], 'derivatives') = derivatives + + } + if (verbose == 1){ + cat('\n') + } + + } + + } + + if (derivatives > 0){ + for (i in 1:N){ + ## drop leading frames if they have missing derivatives + if (sum(is.na(Xs[[i]][derivatives,])) >= M / (derivatives + 1)){ + Xs[[i]][1:derivatives,] = NA + } + } + } + + return(Xs) + +}