From b8f67c3457cb546045cc10fc75475df1e0c36a53 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 3 Feb 2026 11:34:47 -0500 Subject: [PATCH 1/5] scRNA-seq celltype notebook: update from restrict -> test.genes, including text --- scRNA-seq/06-celltype_annotation.Rmd | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scRNA-seq/06-celltype_annotation.Rmd b/scRNA-seq/06-celltype_annotation.Rmd index 82219b58..256d55e4 100644 --- a/scRNA-seq/06-celltype_annotation.Rmd +++ b/scRNA-seq/06-celltype_annotation.Rmd @@ -490,12 +490,12 @@ We will also take this time to dive a bit deeper into the steps that `SingleR` p As mentioned, the first step is training the model, during which we identify the genes that will be used for the correlation analysis later. While this step is not particularly slow, if we were classifying multiple samples, we would not want to have to repeat it for every sample. -To do the training, we will use the `trainSingleR()` function. +To do the training, we will use the `SingleR::trainSingleR()` function. For this we will start with our reference and the labels we want to train the model with. We can then specify the method used to select the genes that will be used for classification. The default method is `"de"`, which performs a differential expression analysis for each pair of labels, but we could also use `"sd"` to select the genes which are most variable across labels, or `"all"` to use all genes. -If we want to get really fancy, we could even provide a specific list of genes to use. +If we wanted to get really fancy, we could even provide a specific list of genes to use with the `restrict` argument. We should note here that the reference dataset for `SingleR` does not need to be from a compendium like `celldex`! If you have any well-classified dataset that you want to use as a reference, you can, as long as you can create a gene by sample expression matrix and a vector of cell types for each sample. @@ -503,7 +503,8 @@ You will want to ensure that the cell types you expect to see in your sample are You can even use a previously-annotated `SingleCellExperiment` as a reference for a new dataset. For more details about custom references, see the [OSCA chapter on cell type annotation](http://bioconductor.org/books/3.19/OSCA.basic/cell-type-annotation.html#using-custom-references) -We do want to be sure that the genes selected for the model will be among those present in our SCE object, so we will use the `restrict` argument with a vector of the genes in our SCE. +We do want to be sure that the genes included in the model will be among those present in our SCE object, so we will use the `test.genes` argument with a vector of the genes in our SCE. +Otherwise, the function will assume that both the reference and test data set have the same genes in the same order (which they don't!), and the next step of actually classifying cell types wouldn't work. This step would happen automatically with the `SingleR::SingleR()` function, but we need to add it manually for this use case. @@ -512,10 +513,10 @@ This step would happen automatically with the `SingleR::SingleR()` function, but singler_finemodel <- SingleR::trainSingleR( monaco_ref, # reference dataset labels = monaco_ref$label.fine, # labels for training dataset + # consider only genes in the sce object + test.genes = rownames(sce), # use DE to select genes (default) genes = "de", - # only use genes in the sce object - restrict = rownames(sce), # parallel processing BPPARAM = BiocParallel::MulticoreParam(4) ) From 4652281847c7db29e6773e4b72e66cacda87459a Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 11 Feb 2026 09:20:33 -0500 Subject: [PATCH 2/5] some spacing while we're here --- scRNA-seq/06-celltype_annotation.Rmd | 41 ++++++++++++++++------------ 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/scRNA-seq/06-celltype_annotation.Rmd b/scRNA-seq/06-celltype_annotation.Rmd index 256d55e4..76bb8a61 100644 --- a/scRNA-seq/06-celltype_annotation.Rmd +++ b/scRNA-seq/06-celltype_annotation.Rmd @@ -61,24 +61,32 @@ We aren't planning any significant modifications of the underlying data, so we w ```{r filepaths, live=TRUE} # directory for the input data -data_dir <- file.path("data", - "PBMC-TotalSeqB", - "normalized") +data_dir <- file.path( + "data", + "PBMC-TotalSeqB", + "normalized" +) # the input file itself -sce_file <- file.path(data_dir, - "PBMC_TotalSeqB_normalized_sce.rds") +sce_file <- file.path( + data_dir, + "PBMC_TotalSeqB_normalized_sce.rds" +) # A directory to store outputs -analysis_dir <- file.path("analysis", - "PBMC-TotalSeqB") +analysis_dir <- file.path( + "analysis", + "PBMC-TotalSeqB" +) # Create directory if it doesn't exist fs::dir_create(analysis_dir) # output table path -cellinfo_file <- file.path(analysis_dir, - "PBMC_TotalSeqB_cellinfo.tsv") +cellinfo_file <- file.path( + analysis_dir, + "PBMC_TotalSeqB_cellinfo.tsv" +) ``` @@ -198,14 +206,12 @@ Let's plot the ADT results for those two markers as well below: ```{r plot CD4, live=TRUE} # plot CD4 marker -scater::plotUMAP(sce, - color_by = "CD4") +scater::plotUMAP(sce, color_by = "CD4") ``` ```{r plot CD8, live=TRUE} # plot CD8 marker -scater::plotUMAP(sce, - color_by = "CD8") +scater::plotUMAP(sce, color_by = "CD8") ``` @@ -306,8 +312,7 @@ Creating and assigning values to this column can be done with the `$` shortcut, ```{r plot thresholds} sce$threshold_celltype <- adt_df$celltype -scater::plotUMAP(sce, - color_by = "threshold_celltype") + +scater::plotUMAP(sce, color_by = "threshold_celltype") + guides(color = guide_legend(title = "Cell type")) ``` @@ -612,8 +617,10 @@ Now that we have that set up, we can plot using our collapsed and ordered cell t ```{r plot collapsed, live=TRUE} sce$celltype_collapsed <- collapsed_labels -scater::plotUMAP(sce, - color_by = "celltype_collapsed") +scater::plotUMAP( + sce, + color_by = "celltype_collapsed" +) ``` From 2f913e72cc5a26e3f0a2a9e5596b77230aafe6a9 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 12 Feb 2026 14:09:03 -0500 Subject: [PATCH 3/5] temp comment out notebooks not to render, see if this one makes it through disk-space wise --- scripts/render-live.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/render-live.sh b/scripts/render-live.sh index 88c54a40..4149f84a 100644 --- a/scripts/render-live.sh +++ b/scripts/render-live.sh @@ -35,15 +35,15 @@ files=( scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd scRNA-seq-advanced/02-dataset_integration.Rmd scRNA-seq-advanced/03-differential_expression.Rmd - scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd - scRNA-seq-advanced/05-aucell.Rmd + # scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd # TEMPORARY + # scRNA-seq-advanced/05-aucell.Rmd # TEMPORARY # machine-learning/01-openpbta_heatmap.Rmd # machine-learning/02-openpbta_consensus_clustering.Rmd # machine-learning/03-openpbta_PLIER.Rmd # machine-learning/04-openpbta_plot_LV.Rmd - pathway-analysis/01-overrepresentation_analysis.Rmd - pathway-analysis/02-gene_set_enrichment_analysis.Rmd - pathway-analysis/03-gene_set_variation_analysis.Rmd + # pathway-analysis/01-overrepresentation_analysis.Rmd # TEMPORARY + # pathway-analysis/02-gene_set_enrichment_analysis.Rmd # TEMPORARY + # pathway-analysis/03-gene_set_variation_analysis.Rmd # TEMPORARY ) for file in ${files[@]} do From 88e347170ca639e584897c88f31ccaae69ddd34b Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 12 Feb 2026 14:44:54 -0500 Subject: [PATCH 4/5] might as well try same step we use in exercises repo --- .github/workflows/render-rmds.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/render-rmds.yml b/.github/workflows/render-rmds.yml index 3b3cd56c..a234e62d 100644 --- a/.github/workflows/render-rmds.yml +++ b/.github/workflows/render-rmds.yml @@ -19,10 +19,22 @@ jobs: container: image: ccdl/training_rstudio:edge + steps: - name: Checkout code uses: actions/checkout@v4 + # As in: https://github.com/AlexsLemonade/exercise-notebook-answers/blob/2139d011b0aaf0a7ff0cdfc96e89c55bf6bb9215/.github/workflows/render-exercises.yml#L17 + - name: Clear disk space + id: clear-space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + df -h + - name: Load 1Password secrets uses: 1password/load-secrets-action@v2 with: From c3deda25a4b01b75fdf93e4c41ebff6b80d8e95c Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 12 Feb 2026 14:52:30 -0500 Subject: [PATCH 5/5] Revert "might as well try same step we use in exercises repo" This reverts commit 88e347170ca639e584897c88f31ccaae69ddd34b. --- .github/workflows/render-rmds.yml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/.github/workflows/render-rmds.yml b/.github/workflows/render-rmds.yml index a234e62d..3b3cd56c 100644 --- a/.github/workflows/render-rmds.yml +++ b/.github/workflows/render-rmds.yml @@ -19,22 +19,10 @@ jobs: container: image: ccdl/training_rstudio:edge - steps: - name: Checkout code uses: actions/checkout@v4 - # As in: https://github.com/AlexsLemonade/exercise-notebook-answers/blob/2139d011b0aaf0a7ff0cdfc96e89c55bf6bb9215/.github/workflows/render-exercises.yml#L17 - - name: Clear disk space - id: clear-space - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/local/share/boost - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - df -h - - name: Load 1Password secrets uses: 1password/load-secrets-action@v2 with: