From e8952da4800c6b24763fb802b599754ff3cfe922 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 3 Feb 2026 12:46:18 -0500 Subject: [PATCH 1/8] advanced GSEA notebook: spacing in general, and update the msigdbr argument to collection instead of category --- .../04-gene_set_enrichment_analysis.Rmd | 68 +++++++++++-------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd b/scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd index e22afb81..531bd5fe 100644 --- a/scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd +++ b/scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd @@ -69,9 +69,11 @@ fs::dir_create(results_dir) ```{r input_files} -input_file <- file.path(rms_analysis_dir, - "deseq", - "rms_myoblast_deseq_results.tsv") +input_file <- file.path( + rms_analysis_dir, + "deseq", + "rms_myoblast_deseq_results.tsv" +) ``` #### Output files @@ -79,8 +81,10 @@ input_file <- file.path(rms_analysis_dir, We'll save our table of GSEA results as a TSV. ```{r output_files} -output_file <- file.path(results_dir, - "rms_myoblast_gsea_results.tsv") +output_file <- file.path( + results_dir, + "rms_myoblast_gsea_results.tsv" +) ``` ## Gene sets @@ -116,8 +120,10 @@ The fewer gene sets we test, the lower our multiple hypothesis testing burden. We can retrieve only the Hallmark gene sets by specifying `category = "H"` to the `msigdbr()` function. ```{r immunologic_sets, live = TRUE} -hs_hallmarks_df <- msigdbr(species = "Homo sapiens", - category = "H") +hs_hallmarks_df <- msigdbr( + species = "Homo sapiens", + collection = "H" +) ``` ## Gene Set Enrichment Analysis @@ -211,14 +217,16 @@ Now for the analysis! We can use the `GSEA()` function to perform GSEA with any generic set of gene sets, but there are several functions for using specific, commonly used gene sets (e.g., `gseKEGG()`). ```{r run_gsea} -gsea_results <- GSEA(geneList = lfc_vector, # ordered ranked gene list - minGSSize = 25, # minimum gene set size - maxGSSize = 500, # maximum gene set set - pvalueCutoff = 0.05, - pAdjustMethod = "BH", # correction for multiple hypothesis testing - TERM2GENE = dplyr::select(hs_hallmarks_df, - gs_name, - ensembl_gene)) # pass the correct identifier column +gsea_results <- GSEA( + geneList = lfc_vector, # ordered ranked gene list + minGSSize = 25, # minimum gene set size + maxGSSize = 500, # maximum gene set set + palueCutoff = 0.05, + pAdjustMethod = "BH", # correction for multiple hypothesis testing + TERM2GENE = dplyr::select(hs_hallmarks_df, + gs_name, + ensembl_gene) # pass the correct identifier column +) ``` Let's take a look at the GSEA results. @@ -250,10 +258,12 @@ Let's take a look at 3 different pathways -- one with a highly positive NES, one Let's take look at a pathway with a highly positive NES (`HALLMARK_MYC_TARGETS_V2`) using a GSEA plot. ```{r highly_pos} -enrichplot::gseaplot(gsea_results, - geneSetID = "HALLMARK_MYC_TARGETS_V2", - title = "HALLMARK_MYC_TARGETS_V2", - color.line = "#0066FF") +enrichplot::gseaplot( + gsea_results, + geneSetID = "HALLMARK_MYC_TARGETS_V2", + title = "HALLMARK_MYC_TARGETS_V2", + color.line = "#0066FF" +) ``` Notice how the genes that are in the gene set, indicated by the black bars, tend to be on the left side of the graph indicating that they have positive gene-level scores. @@ -263,10 +273,12 @@ Notice how the genes that are in the gene set, indicated by the black bars, tend The gene set `HALLMARK_MYOGENESIS` had a highly negative NES. ```{r highly_neg} -enrichplot::gseaplot(gsea_results, - geneSetID = "HALLMARK_MYOGENESIS", - title = "HALLMARK_MYOGENESIS", - color.line = "#0066FF") +enrichplot::gseaplot( + gsea_results, + geneSetID = "HALLMARK_MYOGENESIS", + title = "HALLMARK_MYOGENESIS", + color.line = "#0066FF" +) ``` This gene set shows the opposite pattern -- genes in the pathway tend to be on the right side of the graph. @@ -277,10 +289,12 @@ The `@results` slot will only show gene sets that pass the `pvalueCutoff` thresh Let's look at `HALLMARK_P53_PATHWAY`, which was not in the results we viewed earlier. ```{r p53, live = TRUE} -enrichplot::gseaplot(gsea_results, - geneSetID = "HALLMARK_P53_PATHWAY", - title = "HALLMARK_P53_PATHWAY", - color.line = "#0066FF") +enrichplot::gseaplot( + gsea_results, + geneSetID = "HALLMARK_P53_PATHWAY", + title = "HALLMARK_P53_PATHWAY", + color.line = "#0066FF" +) ``` Genes in the pathway are distributed more evenly throughout the ranked list, resulting in a more "middling" score. From 0a6ee2b4e4d1f6ef563467ce339301304008d2f2 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 12 Feb 2026 11:04:41 -0500 Subject: [PATCH 2/8] fix typo and one more wording spot --- scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd b/scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd index 531bd5fe..513c4ea7 100644 --- a/scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd +++ b/scRNA-seq-advanced/04-gene_set_enrichment_analysis.Rmd @@ -117,7 +117,7 @@ Here's an excerpt of the [collection description](https://www.gsea-msigdb.org/gs Notably, there are only 50 gene sets included in this collection. The fewer gene sets we test, the lower our multiple hypothesis testing burden. -We can retrieve only the Hallmark gene sets by specifying `category = "H"` to the `msigdbr()` function. +We can retrieve only the Hallmark gene sets by specifying `collection = "H"` to the `msigdbr()` function. ```{r immunologic_sets, live = TRUE} hs_hallmarks_df <- msigdbr( @@ -221,7 +221,7 @@ gsea_results <- GSEA( geneList = lfc_vector, # ordered ranked gene list minGSSize = 25, # minimum gene set size maxGSSize = 500, # maximum gene set set - palueCutoff = 0.05, + pvalueCutoff = 0.05, pAdjustMethod = "BH", # correction for multiple hypothesis testing TERM2GENE = dplyr::select(hs_hallmarks_df, gs_name, From 686bcdd0f8e82ab61c67740d2c390f849b92fa15 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 3 Feb 2026 13:06:24 -0500 Subject: [PATCH 3/8] advanced AUCell notebook: spacing all around, update AUCell to use a gene list instead of genesetcollection, and update msigdbr arguments --- scRNA-seq-advanced/05-aucell.Rmd | 147 ++++++++++++++++++------------- 1 file changed, 84 insertions(+), 63 deletions(-) diff --git a/scRNA-seq-advanced/05-aucell.Rmd b/scRNA-seq-advanced/05-aucell.Rmd index 4f185439..94a46700 100644 --- a/scRNA-seq-advanced/05-aucell.Rmd +++ b/scRNA-seq-advanced/05-aucell.Rmd @@ -77,16 +77,20 @@ fs::dir_create(analysis_dir) The input will be a `SingleCellExperiment` for an individual Ewing sarcoma library. ```{r setup_input_files} -sce_file <- fs::path(processed_dir, - "SCPCS000490", - "SCPCL000822_processed.rds") +sce_file <- fs::path( + processed_dir, + "SCPCS000490", + "SCPCL000822_processed.rds" +) ``` We will save the `AUCell` results as a table in the analysis directory. ```{r setup_output_files, live = TRUE} -output_file <- fs::path(analysis_dir, - "ewing_sarcoma_aucell_results.tsv") +output_file <- fs::path( + analysis_dir, + "ewing_sarcoma_aucell_results.tsv" +) ``` @@ -112,8 +116,10 @@ We would expect both of these gene sets to have high expression in tumor cells. ```{r genesets} # Create a named vector with the relevant gene set names -ewing_gene_set_names <- c(zhang = "ZHANG_TARGETS_OF_EWSR1_FLI1_FUSION", - riggi = "RIGGI_EWING_SARCOMA_PROGENITOR_UP") +ewing_gene_set_names <- c( + zhang = "ZHANG_TARGETS_OF_EWSR1_FLI1_FUSION", + riggi = "RIGGI_EWING_SARCOMA_PROGENITOR_UP" +) ewing_gene_set_names ``` @@ -122,32 +128,25 @@ These gene sets come from the C2 gene set collection from MSigDB. Let's retrieve them using `msigdbr()`. ```{r extract_genesets, live = TRUE} -ewing_gene_sets_df <- msigdbr(species = "Homo sapiens", - category = "C2", - subcategory = "CGP") |> - dplyr::filter(gs_name %in% ewing_gene_set_names) +ewing_gene_sets_df <- msigdbr( + species = "Homo sapiens", + collection = "C2", + subcollection = "CGP" +) |> + dplyr::filter(gs_name %in% ewing_gene_set_names) |> + dplyr::select(gs_name, ensembl_gene) ``` -`AUCell` uses gene sets in a particular format that comes from the `GSEABase` package. -We need to create a `GeneSetCollection`. +We can provide these gene sets as a named list to `AUCell`, so let's create that now. -```{r gene_set_collection} -ewing_gene_set_collection <- ewing_gene_set_names |> - purrr::map( - # For each gene set - \(gene_set_name) { - ewing_gene_sets_df |> - # Subset to the rows in that gene set - dplyr::filter(gs_name == gene_set_name) |> - # Grab the Ensembl gene identifiers - dplyr::pull(ensembl_gene) |> - # Create a GeneSet object - GeneSet(setName = gene_set_name, - geneIdType = ENSEMBLIdentifier()) - } - ) |> - # Turn the list of GeneSet objects into a GeneSet collection - GeneSetCollection() +```{r create_gene_list} +ewing_gene_set_list <- split( + ewing_gene_sets_df$ensembl_gene, + ewing_gene_sets_df$gs_name +) + +# briefly, show the resulting list +purrr::map(ewing_gene_set_list, head) ``` ## Read in and prepare the `SingleCellExperiment` @@ -176,13 +175,16 @@ We can extract a counts matrix in sparse format for use with `AUCell`. counts_matrix <- counts(sce) ``` -There may be genes in our gene set that do not appear in the `SingleCellExperiment` object. -We can remove them using the `subsetGeneSets()` function. +There may be genes in our gene set that do not appear in the `SingleCellExperiment` object which we should remove before proceeding. ```{r subset_gene_sets, live = TRUE} -# Remove genes from gene sets if they are not in the SCE -ewing_gene_set_collection <- subsetGeneSets(ewing_gene_set_collection, - rownames(counts_matrix)) +# Remove genes from list if they are not in the SCE +ewing_gene_set_list <- ewing_gene_set_list |> + purrr::map( + \(gene_list) { + intersect(gene_list, rownames(sce)) # keep only the intersection with our genes + } +) ``` ## `AUCell` @@ -248,11 +250,13 @@ First, we'll start with a cell with a high AUC. We picked this barcode ahead of time when we wrote the notebook. ```{r high_recovery_curve} -plot_recovery_curve(cell_rankings, - ewing_gene_set_collection, - gene_set_name = "ZHANG_TARGETS_OF_EWSR1_FLI1_FUSION", - barcode = "CTGAGCGGTCTTTATC", - auc_max_rank = auc_max_rank) # 1% threshold +plot_recovery_curve( + cell_rankings, + ewing_gene_set_list, + gene_set_name = "ZHANG_TARGETS_OF_EWSR1_FLI1_FUSION", + barcode = "CTGAGCGGTCTTTATC", + auc_max_rank = auc_max_rank # 1% threshold +) ``` The x-axis is the gene ranks for all genes. @@ -262,11 +266,13 @@ The AUC is the area under this recovery curve at the max rank threshold chosen f Now, let's look at an example with a low AUC. ```{r low_recovery_curve} -plot_recovery_curve(cell_rankings, - ewing_gene_set_collection, - gene_set_name = "ZHANG_TARGETS_OF_EWSR1_FLI1_FUSION", - barcode = "AGATAGAGTCACAATC", - auc_max_rank = auc_max_rank) # 1% threshold +plot_recovery_curve( + cell_rankings, + ewing_gene_set_list, + gene_set_name = "ZHANG_TARGETS_OF_EWSR1_FLI1_FUSION", + barcode = "AGATAGAGTCACAATC", + auc_max_rank = auc_max_rank # 1% threshold +) ``` Far fewer genes in the gene set are ranked above the threshold, yielding a lower AUC value. @@ -276,9 +282,11 @@ Far fewer genes in the gene set are ranked above the threshold, yielding a lower Once we have the rankings, we can calculate the AUC scores for both gene sets in all cells with the `AUCell_calcAUC()` function. ```{r calc_auc, live = TRUE} -cell_auc <- AUCell_calcAUC(geneSets = ewing_gene_set_collection, - rankings = cell_rankings, - aucMaxRank = auc_max_rank) +cell_auc <- AUCell_calcAUC( + geneSets = ewing_gene_set_list, + rankings = cell_rankings, + aucMaxRank = auc_max_rank +) ``` This function returns an `aucellResults` object. @@ -309,18 +317,22 @@ head(auc_df) We'll explore these in a later plot, but for now, let's calculate the threshold and assign cells with `AUCell_exploreThresholds()`. ```{r auc_assignments, live = TRUE} -auc_assignments <- AUCell_exploreThresholds(cell_auc, - plotHist = FALSE, - assignCells = TRUE) +auc_assignments <- AUCell_exploreThresholds( + cell_auc, + plotHist = FALSE, + assignCells = TRUE +) ``` We're going to plot the distribution of AUC values with `ggplot2`, so we will want the AUC values in a longer format. ```{r auc_plotting_df} auc_plotting_df <- auc_df |> - tidyr::pivot_longer(!barcodes, - names_to = "gene_set", - values_to = "auc") |> + tidyr::pivot_longer( + !barcodes, + names_to = "gene_set", + values_to = "auc" + ) |> dplyr::mutate( # Create a new logical column called assigned assigned = dplyr::case_when( @@ -396,9 +408,10 @@ First, let's rename the gene set columns to something more easily typed. ```{r rename_gene_set} auc_df <- auc_df |> # Use shorter names - dplyr::rename(zhang_auc = ewing_gene_set_names[["zhang"]], - riggi_auc = ewing_gene_set_names[["riggi"]]) - + dplyr::rename( + zhang_auc = ewing_gene_set_names[["zhang"]], + riggi_auc = ewing_gene_set_names[["riggi"]] + ) ``` And join it to the existing `colData`. @@ -431,18 +444,26 @@ We can use the `plotUMAP()` function from the `scater` package to plot a UMAP wi ```{r plot_umap_zhang} scater::plotUMAP(sce, colour_by = "zhang_auc") + # Use the gene set name, replacing underscores with spaces - ggplot2::ggtitle(stringr::str_replace_all(ewing_gene_set_names[["zhang"]], - "\\_", - " ")) + ggplot2::ggtitle( + stringr::str_replace_all( + ewing_gene_set_names[["zhang"]], + "\\_", + " " + ) + ) ``` Let's color the points by the AUC values for the other gene set. ```{r plot_umap_riggi, live = TRUE} scater::plotUMAP(sce, colour_by = "riggi_auc") + - ggplot2::ggtitle(stringr::str_replace_all(ewing_gene_set_names[["riggi"]], - "\\_", - " ")) + ggplot2::ggtitle( + stringr::str_replace_all( + ewing_gene_set_names[["riggi"]], + "\\_", + " " + ) + ) ``` We would want to do something more formal to confirm, but it seems like the same cells have high AUC values for both gene sets! From 015fb8f0b2f6ae3384af4b18e49e0ee11600101c Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 3 Feb 2026 13:06:51 -0500 Subject: [PATCH 4/8] advanced aucell util: update plot_recovery_curve function to use list instead of genesetcollection --- scRNA-seq-advanced/util/aucell_functions.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scRNA-seq-advanced/util/aucell_functions.R b/scRNA-seq-advanced/util/aucell_functions.R index 16f3482f..44dc2db1 100644 --- a/scRNA-seq-advanced/util/aucell_functions.R +++ b/scRNA-seq-advanced/util/aucell_functions.R @@ -3,8 +3,8 @@ #' Adapted from https://github.com/aertslab/AUCell/blob/91753b327a39dc7a4bbed46408ec2271c485f2f0/vignettes/AUCell.Rmd#L295-L316 #' #' @param cell_rankings Cell rankings; the output of AUCell::AUCell_buildRankings() -#' @param gene_set_collection A GSEABase GeneSetCollection object -#' @param gene_set_name The name of the gene set from the GeneSetCollection +#' @param gene_set_list A list of gene sets +#' @param gene_set_name The name of the gene set the gene_set_list #' that you would like to plot a recovery curve for #' @param barcode The cell barcode that you would like to plot a recovery #' curve for @@ -14,7 +14,7 @@ #' #' @return Outputs a recovery curve plot with the plot_recovery_curve <- function(cell_rankings, - gene_set_collection, + gene_set_list, gene_set_name, barcode, auc_max_rank, @@ -24,8 +24,8 @@ plot_recovery_curve <- function(cell_rankings, # Pull out the gene set and identify where in the cell ranks those genes # lie - gene_set <- gene_set_collection[[gene_set_name]] - gene_set_ranks <- cell_rankings[geneIds(gene_set), ] + gene_set <- ewing_gene_set_list[[gene_set_name]] + gene_set_ranks <- cell_rankings[gene_set, ] # Index of the cell with the barcode barcode_index <- which(colnames(gene_set_ranks) == barcode) @@ -35,7 +35,7 @@ plot_recovery_curve <- function(cell_rankings, plot(aucCurve, type="s", col="darkblue", lwd=1, xlab="Gene rank", ylab="# genes in the gene set", - xlim=c(0, auc_max_rank*3), ylim=c(0, nGenes(gene_set)), + xlim=c(0, auc_max_rank*3), ylim=c(0, length(gene_set)), main="Recovery curve", sub=paste("Cell:", colnames(gene_set_ranks)[barcode_index])) aucShade <- aucCurve[which(aucCurve[,1] < auc_max_rank),] From 17fba6107ab4c3d4130e1440b65b2f954fa6f065 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 12 Feb 2026 11:27:00 -0500 Subject: [PATCH 5/8] fix typo --- scRNA-seq-advanced/util/aucell_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scRNA-seq-advanced/util/aucell_functions.R b/scRNA-seq-advanced/util/aucell_functions.R index 44dc2db1..e4bbfa98 100644 --- a/scRNA-seq-advanced/util/aucell_functions.R +++ b/scRNA-seq-advanced/util/aucell_functions.R @@ -24,7 +24,7 @@ plot_recovery_curve <- function(cell_rankings, # Pull out the gene set and identify where in the cell ranks those genes # lie - gene_set <- ewing_gene_set_list[[gene_set_name]] + gene_set <- gene_set_list[[gene_set_name]] gene_set_ranks <- cell_rankings[gene_set, ] # Index of the cell with the barcode From 265a4d383068e39a28c624e57c6b0adf83380c47 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 12 Feb 2026 12:23:04 -0500 Subject: [PATCH 6/8] comment out unfixed notebooks with TODO linking #911 --- scripts/render-live.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/render-live.sh b/scripts/render-live.sh index 88c54a40..bc09ea7d 100644 --- a/scripts/render-live.sh +++ b/scripts/render-live.sh @@ -31,7 +31,7 @@ files=( scRNA-seq/03-normalizing_scRNA.Rmd scRNA-seq/04-dimension_reduction_scRNA.Rmd scRNA-seq/05-clustering_markers_scRNA.Rmd - scRNA-seq/06-celltype_annotation.Rmd + # scRNA-seq/06-celltype_annotation.Rmd # TODO: Uncomment for https://github.com/AlexsLemonade/training-modules/issues/911 scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd scRNA-seq-advanced/02-dataset_integration.Rmd scRNA-seq-advanced/03-differential_expression.Rmd @@ -41,9 +41,9 @@ files=( # machine-learning/02-openpbta_consensus_clustering.Rmd # machine-learning/03-openpbta_PLIER.Rmd # machine-learning/04-openpbta_plot_LV.Rmd - pathway-analysis/01-overrepresentation_analysis.Rmd - pathway-analysis/02-gene_set_enrichment_analysis.Rmd - pathway-analysis/03-gene_set_variation_analysis.Rmd + # pathway-analysis/01-overrepresentation_analysis.Rmd # TODO: Uncomment for https://github.com/AlexsLemonade/training-modules/issues/911 + # pathway-analysis/02-gene_set_enrichment_analysis.Rmd # TODO: Uncomment for https://github.com/AlexsLemonade/training-modules/issues/911 + # pathway-analysis/03-gene_set_variation_analysis.Rmd # TODO: Uncomment for https://github.com/AlexsLemonade/training-modules/issues/911 ) for file in ${files[@]} do From 40cbf57abb0d257b8ebf9fd55b5d1184c981ee9e Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 12 Feb 2026 17:18:08 -0500 Subject: [PATCH 7/8] fix the bad merge --- scripts/render-live.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/render-live.sh b/scripts/render-live.sh index 9188b287..fe0cd425 100644 --- a/scripts/render-live.sh +++ b/scripts/render-live.sh @@ -41,15 +41,9 @@ files=( # machine-learning/02-openpbta_consensus_clustering.Rmd # machine-learning/03-openpbta_PLIER.Rmd # machine-learning/04-openpbta_plot_LV.Rmd -<<<<<<< HEAD # pathway-analysis/01-overrepresentation_analysis.Rmd # TODO: Uncomment for https://github.com/AlexsLemonade/training-modules/issues/911 # pathway-analysis/02-gene_set_enrichment_analysis.Rmd # TODO: Uncomment for https://github.com/AlexsLemonade/training-modules/issues/911 # pathway-analysis/03-gene_set_variation_analysis.Rmd # TODO: Uncomment for https://github.com/AlexsLemonade/training-modules/issues/911 -======= - # pathway-analysis/01-overrepresentation_analysis.Rmd # TEMPORARY - # pathway-analysis/02-gene_set_enrichment_analysis.Rmd # TEMPORARY - # pathway-analysis/03-gene_set_variation_analysis.Rmd # TEMPORARY ->>>>>>> master ) for file in ${files[@]} do From b78715869bcf4084ae2d1e1bef1de4e5774689ab Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Fri, 13 Feb 2026 09:13:07 -0500 Subject: [PATCH 8/8] remove extra period --- scRNA-seq-advanced/05-aucell.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scRNA-seq-advanced/05-aucell.Rmd b/scRNA-seq-advanced/05-aucell.Rmd index 94a46700..55d3891b 100644 --- a/scRNA-seq-advanced/05-aucell.Rmd +++ b/scRNA-seq-advanced/05-aucell.Rmd @@ -18,7 +18,7 @@ date: 2024 --- -In this notebook, we'll demonstrate how to use the `AUCell` method, introduced in [Aibar _et al_. 2017.](https://doi.org/10.1038/nmeth.4463). +In this notebook, we'll demonstrate how to use the `AUCell` method, introduced in [Aibar _et al_. 2017](https://doi.org/10.1038/nmeth.4463). We can use `AUCell` when we are interested in a gene set's relative expression or activity in an individual cell. Gene sets can come from a curated collection of prior knowledge, like the Hallmark collection we used in the last notebook, or we can use our own custom gene sets (e.g., a set of marker genes for a cell type of interest).