From 920d672f814efd6c1941a2d41755e07d5b924962 Mon Sep 17 00:00:00 2001 From: angieh811 Date: Mon, 16 Aug 2021 14:49:56 -0400 Subject: [PATCH 1/3] Update requirements.txt --- .../Augmented_Pathway_Enrichment_Analysis/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/appyters/Augmented_Pathway_Enrichment_Analysis/requirements.txt b/appyters/Augmented_Pathway_Enrichment_Analysis/requirements.txt index d643228c..6ff60661 100644 --- a/appyters/Augmented_Pathway_Enrichment_Analysis/requirements.txt +++ b/appyters/Augmented_Pathway_Enrichment_Analysis/requirements.txt @@ -8,4 +8,5 @@ bokeh jinja2>=3.0,<3.1 pybase64 statsmodels -requests \ No newline at end of file +requests +networkx \ No newline at end of file From 2890c8fb50fe0cd373c9e8039bc051e29e2591d9 Mon Sep 17 00:00:00 2001 From: angieh811 Date: Mon, 16 Aug 2021 14:57:16 -0400 Subject: [PATCH 2/3] Tiebow --- ...ugmented_Pathway_Enrichment_Analysis.ipynb | 463 +++++++++++------- .../README.md | 2 +- 2 files changed, 282 insertions(+), 183 deletions(-) diff --git a/appyters/Augmented_Pathway_Enrichment_Analysis/Augmented_Pathway_Enrichment_Analysis.ipynb b/appyters/Augmented_Pathway_Enrichment_Analysis/Augmented_Pathway_Enrichment_Analysis.ipynb index 5cab5215..319851f4 100644 --- a/appyters/Augmented_Pathway_Enrichment_Analysis/Augmented_Pathway_Enrichment_Analysis.ipynb +++ b/appyters/Augmented_Pathway_Enrichment_Analysis/Augmented_Pathway_Enrichment_Analysis.ipynb @@ -17,8 +17,8 @@ "id": "7f426231", "metadata": {}, "source": [ - "# Augmented Pathway Enrichment Analysis\n", - "This Appyter performs enrichment analysis given an input set of genes, and a library of pathways. The Appyter performs the Fisher exact test to compute enrichment p-value and q-values, and reports the results in a bar graph and sorted table." + "# Augmented Pathway Enrichment Analysis (APEA)\n", + "The APEA Appyter performs enrichment analysis against 4 major pathway databases given an input set of genes. The APEA Appyter performs enrichment analysis with the Fisher's exact test, and reports the results as bar graphs and sorted tables. By augmenting the pathway databases with gene-gene similarity matrices, APEA improves pathway enrichment analysis while also providing additional genes that are likely members of the enriched pathways and are relevant to the biological process under investigation" ] }, { @@ -30,6 +30,7 @@ "source": [ "from maayanlab_bioinformatics.enrichment.crisp import enrich_crisp, fisher_overlap\n", "\n", + "%matplotlib inline\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", @@ -39,24 +40,16 @@ "from statsmodels.stats.multitest import multipletests\n", "from collections import OrderedDict\n", "import urllib\n", - "# Manhattan Plot Imports\n", - "import matplotlib.patches as mpatches\n", - "import matplotlib.cm as cm\n", - "\n", - "# Bokeh\n", - "from bokeh.io import output_notebook\n", - "from bokeh.plotting import figure, show\n", - "from bokeh.models import HoverTool, CustomJS, ColumnDataSource, Span, Select, PreText, Paragraph\n", - "from bokeh.layouts import layout, row, column, gridplot\n", - "from bokeh.palettes import all_palettes\n", "\n", + "import ipywidgets as widgets\n", "import base64\n", "import csv\n", "import itertools\n", "import json\n", "import requests\n", "import time\n", - "import urllib" + "import urllib\n", + "import networkx as nx" ] }, { @@ -75,9 +68,9 @@ " \n", ") %}\n", "{% do SectionField(\n", - " name='Library_Section',\n", - " title='Submit Your Library',\n", - " subtitle='Choose a library for your augmented pathway enrichment analysis.',\n", + " name='Similarity_Section',\n", + " title='Choose Your Similarity',\n", + " subtitle='Choose a similarity for your augmented pathway enrichment analysis.',\n", " img='analysis.png'\n", " \n", ") %}" @@ -130,7 +123,15 @@ " \n", " },\n", " section = 'Set_Section',\n", - ") %}" + ") %}\n", + "\n", + "{% set input_name = StringField(\n", + " name='input_name', \n", + " label='Input Set Name', \n", + " default='M1 Genes', \n", + " description='Input net name', \n", + " section='Set_Section')\n", + "%}" ] }, { @@ -141,52 +142,32 @@ "outputs": [], "source": [ "%%appyter code_exec\n", - "{% set library_kind = TabField(\n", - " name='library_kind',\n", - " label='Library',\n", - " default='Select a library',\n", + "{% set similarity_kind = TabField(\n", + " name='similarity_kind',\n", + " label='Similarity',\n", + " default='Select a Similarity',\n", " description='',\n", " choices={\n", - " 'Select a library': [\n", + " 'Select a Similarity': [\n", " ChoiceField(\n", - " name='enrichr_library', \n", - " description='Select one library for enrichment analysis.', \n", - " label='Library', \n", - " default='- ARCHS4_mRNA_Coexpression', \n", - " section = 'Library_Section',\n", + " name='similarity', \n", + " description='Select one similarity for enrichment analysis.', \n", + " label='Similarity', \n", + " default='ARCHS4_mRNA_Coexpression', \n", + " section = 'Similarity_Section',\n", " choices=[\n", - " '- ARCHS4_mRNA_Coexpression',\n", - " 'KEGG_Augmented_with_ARCHS4',\n", - " 'PANTHER_Augmented_with_ARCHS4',\n", - " 'Reactome_Augmented_with_ARCHS4',\n", - " 'WikiPathways_Augmented_with_ARCHS4',\n", - " '- Enrichr_Gene_Coocurrence',\n", - " 'KEGG_Augmented_with_Enrichr',\n", - " 'PANTHER_Augmented_with_Enrichr',\n", - " 'Reactome_Augmented_with_Enrichr',\n", - " 'WikiPathways_Augmented_with_Enrichr',\n", - " '- GeneRIF_Coocurrence',\n", - " 'KEGG_Augmented_with_GeneRIF',\n", - " 'PANTHER_Augmented_with_GeneRIF',\n", - " 'Reactome_Augmented_with_GeneRIF',\n", - " 'WikiPathways_Augmented_with_GeneRIF',\n", - " '- Tagger_Cooccurence',\n", - " 'KEGG_Augmented_with_Tagger',\n", - " 'PANTHER_Augmented_with_Tagger',\n", - " 'Reactome_Augmented_with_Tagger',\n", - " 'WikiPathways_Augmented_with_Tagger',\n", - " '- Proteomics_Coexpression',\n", - " 'KEGG_Augmented_with_Proteomics',\n", - " 'PANTHER_Augmented_with_Proteomics',\n", - " 'Reactome_Augmented_with_Proteomics',\n", - " 'WikiPathways_Augmented_with_Proteomics',\n", + " 'ARCHS4_mRNA_Coexpression',\n", + " 'Enrichr_Gene_Coocurrence',\n", + " 'GeneRIF_Coocurrence',\n", + " 'Tagger_Cooccurence',\n", + " 'Proteomics_Coexpression',\n", " ]\n", " )\n", " ],\n", " \n", " \n", " },\n", - " section = 'Library_Section',\n", + " section = 'Similarity_Section',\n", ") %}" ] }, @@ -206,14 +187,10 @@ "set_filename = {{ set_kind.value[0] }}\n", "{%- endif %}\n", "\n", - "library_kind = \"Select a library\"\n", - "library_name = \"{{ library_kind.value[0] }}\"\n", - "library_combined = False\n", - "data_dir=\"https://appyters.maayanlab.cloud/storage/Augmented_Pathway_Enrichment_Analysis/\"\n", - "\n", - "if '-' in library_name:\n", - " library_combined = True\n", - " library_name = library_name[2:]" + "input_name = \"{{ input_name.value }}\"\n", + "similarity_kind = \"Select a Similarity\"\n", + "similarity_name = \"{{ similarity_kind.value[0] }}\"\n", + "data_dir=\"https://appyters.maayanlab.cloud/storage/Augmented_Pathway_Enrichment_Analysis/\"" ] }, { @@ -223,18 +200,17 @@ "metadata": {}, "outputs": [], "source": [ - "output_notebook()\n", - "\n", "# Table Parameters\n", "significance_value = 0.05\n", "display_topk = 20\n", "\n", "# Bar Chart Parameters\n", "figure_file_format = ['png', 'svg']\n", - "output_file_name = 'Enrichment_analysis_results_bar'\n", "color = 'deepskyblue'\n", - "final_output_file_names = ['{0}.{1}'.format(output_file_name, file_type) for file_type in figure_file_format]\n", - "topk = 10" + "topk = 10\n", + "\n", + "# Tiebow Parameters\n", + "# display_num = 5" ] }, { @@ -268,26 +244,23 @@ "outputs": [], "source": [ "#loading and validating data\n", - "def load(library_name, items, separate=False):\n", - " \n", - " #load selected library and pathways\n", - "# if library_combined:\n", - " \n", - " library_data, pathways = load_library(library_name, separate)\n", + "def load(similarity_name, items, separate=False):\n", + "\n", + " similarity_data, pathways = load_similarity(similarity_name, separate)\n", " # to upper case\n", " items = [x.upper() for x in items]\n", - " validate_inputs(items, library_data)\n", + " validate_inputs(items, similarity_data)\n", "\n", - " return library_data, pathways, items\n", + " return similarity_data, pathways, items\n", "\n", - "def load_library(library_name, separate=False):\n", - " library_filename = library_name+'.csv'\n", - " pathway_filename = library_name.split('_')[0]+'_Pathways.txt'\n", + "def load_similarity(similarity_name, separate=False):\n", + " similarity_filename = similarity_name+'.csv'\n", + " pathway_filename = similarity_name.split('_')[0]+'_Pathways.txt'\n", " \n", - " if separate == False and library_combined:\n", + " if separate == False:\n", " pathway_filename = 'All_Pathways.txt'\n", " \n", - " library_data = dict()\n", + " similarity_data = dict()\n", " pathways = []\n", " lib=[]\n", "\n", @@ -296,21 +269,21 @@ " decoded_line = line.decode(\"utf-8\")[:-1]\n", " pathways.append(decoded_line)\n", " \n", - " response = urllib.request.urlopen(data_dir+library_filename)\n", + " response = urllib.request.urlopen(data_dir+similarity_filename)\n", " lines = [l.decode('utf-8') for l in response.readlines()]\n", " cr = csv.reader(lines)\n", " for row in cr:\n", " lib.append(row)\n", - " library_data=lib\n", + " similarity_data=lib\n", " \n", " \n", - " return library_data, pathways\n", + " return similarity_data, pathways\n", "\n", - "def validate_inputs(items, library_data):\n", + "def validate_inputs(items, similarity_data):\n", " if len(items) == 0:\n", " raise Exception('No items in the input set. Please check the background information.') \n", - " if not bool(set(items) & set(itertools.chain(*library_data))):\n", - " raise Exception('No matches in the input set and library.') " + " if not bool(set(items) & set(itertools.chain(*similarity_data))):\n", + " raise Exception('No matches in the input set and similarity.') " ] }, { @@ -321,12 +294,12 @@ "outputs": [], "source": [ "#Enrichment analysis\n", - "def get_library_iter(library_data, pathways):\n", - " for i, gset in enumerate(library_data):\n", + "def get_similarity_iter(similarity_data, pathways):\n", + " for i, gset in enumerate(similarity_data):\n", " yield pathways[i], gset\n", "\n", - "def get_enrichment_results(items, library_data, pathways):\n", - " return sorted(enrich_crisp(items, get_library_iter(library_data, pathways), 20000, True), key=lambda r: r[1].pvalue)\n", + "def get_enrichment_results(items, similarity_data, pathways):\n", + " return sorted(enrich_crisp(items, get_similarity_iter(similarity_data, pathways), 20000, True), key=lambda r: r[1].pvalue)\n", "\n", "\n", "def get_pvalue(row, unzipped_results, all_results):\n", @@ -352,8 +325,8 @@ " overlap.append(i.overlap)\n", " return pvals, odds_ratio, n_overlap, overlap\n", " \n", - "def enrichment_analysis(items, library_data, pathways): \n", - " all_results = get_enrichment_results(items, library_data, pathways)\n", + "def enrichment_analysis(items, similarity_data, pathways): \n", + " all_results = get_enrichment_results(items, similarity_data, pathways)\n", " unzipped_results = list(zip(*all_results))\n", " pvals, odds_ratio, n_overlap, overlap = get_values(unzipped_results[1])\n", " df = pd.DataFrame({\"Name\":unzipped_results[0], \"p value\": pvals, \\\n", @@ -385,7 +358,7 @@ " ol = ', '.join(row['overlap'])\n", " ol2 = ol\n", " for g in row['overlap']:\n", - " if not g in library_data[pathway][:-100]:\n", + " if not g in similarity_data[pathway][:-100]:\n", " ol2 = ol2.replace(g,''+g+'*')\n", " h = h.replace(ol, ol2)\n", " display(HTML(h))\n", @@ -400,7 +373,7 @@ "outputs": [], "source": [ "# Bar Chart Functions\n", - "def enrichr_figure(all_terms, all_pvalues, all_qvalues, plot_names, all_libraries, bar_color, topk=10): \n", + "def enrichr_figure(all_terms, all_pvalues, all_qvalues, all_libraries, bar_color, topk=10): \n", " all_terms = [all_terms[0][:topk]]\n", " all_pvalues = [all_pvalues[0][:topk]]\n", " all_qvalues = [all_qvalues[:topk]]\n", @@ -437,11 +410,15 @@ " fig.spines['right'].set_visible(False)\n", " fig.spines['top'].set_visible(False)\n", " # Save results \n", - " for plot_name in plot_names:\n", - " plt.savefig(plot_name, bbox_inches = 'tight')\n", + " output_file_names = ['{0}_bar.{1}'.format(all_libraries[i], file_type) for file_type in figure_file_format]\n", + " for f in output_file_names:\n", + " plt.savefig(f, bbox_inches = 'tight')\n", " \n", " # Show plot \n", - " plt.show()" + " plt.show()\n", + " \n", + " \n", + " return output_file_names" ] }, { @@ -451,7 +428,106 @@ "metadata": {}, "outputs": [], "source": [ - "# Network\n" + "# Tiebow\n", + "def tiebow(df, augmented=True):\n", + " \n", + "# sub_tab=[widgets.Output() for i in range(display_num)]\n", + "# tab = widgets.Tab(sub_tab)\n", + "# for i in range(display_num):\n", + "# tab.set_title(i,\"Tab {}\".format(i+1))\n", + " \n", + "# with sub_tab[i]:\n", + " i=0\n", + " p=df['Name'][i]\n", + " pathway = pathways.index(p)\n", + " if(len(p.split())>4):\n", + " p=add_breaks3(p)\n", + " \n", + " if len(items)>len(similarity_data[pathway]):\n", + " b=5000\n", + " a=5000*(len(items)/len(similarity_data[pathway]))\n", + " else:\n", + " a=5000\n", + " b=5000*(len(similarity_data[pathway])/len(items))\n", + " d=b*len(df['overlap'][i])/len(similarity_data[pathway])\n", + "\n", + " plt.figure(figsize=(12,12))\n", + " G = nx.Graph()\n", + " G.add_node('Input\\n Set', size=a, pos=(-25,25), color='mediumorchid', s='s')\n", + " G.add_node('Pathway\\n Set', size=b, pos=(25,25), color='salmon', s='s')\n", + " G.add_node('', size=d, pos=(25,25), color='mediumorchid', s='s')\n", + " l=len(df['overlap'][0])\n", + " for j,g in enumerate(df['overlap'][i]):\n", + " c = 'lightskyblue'\n", + " if augmented and not g in similarity_data[pathway][:-100]:\n", + " c = 'lightgreen'\n", + " G.add_node(g, size= 15000, pos=(0,j*50/(l-1)), color=c, s='_')\n", + " G.add_edge(g, 'Input\\n Set')\n", + " G.add_edge(g, 'Pathway\\n Set')\n", + "\n", + " # # bbox=dict(facecolor='none', edgecolor='black', boxstyle='round,pad=0.2'),\n", + " # alpha=0.5, with_labels=True, node_shape='_')\n", + "\n", + " nodeShapes = set((aShape[1]['s'] for aShape in G.nodes(data = True)))\n", + " #For each node class...\n", + " for aShape in nodeShapes:\n", + " if aShape=='_':\n", + " l=50\n", + " else:\n", + " l=1\n", + " #...filter and draw the subset of nodes with the same symbol in the positions that are now known through the use of the layout.\n", + " nx.draw_networkx_nodes(G,nx.get_node_attributes(G,'pos'),\n", + " node_shape = aShape, alpha = 1,\n", + " linewidths = l,\n", + " node_size = [sNode[1]['size'] for sNode in filter(lambda x: x[1][\"s\"]==aShape,G.nodes(data = True))],\n", + " node_color = [sNode[1]['color'] for sNode in filter(lambda x: x[1][\"s\"]==aShape,G.nodes(data = True))],\n", + " nodelist = [sNode[0] for sNode in filter(lambda x: x[1][\"s\"]==aShape,G.nodes(data = True))])\n", + "\n", + " #Finally, draw the edges between the nodes\n", + " nx.draw_networkx_edges(G,nx.get_node_attributes(G,'pos')) \n", + " nx.draw_networkx_labels(G,nx.get_node_attributes(G,'pos'), font_size=20)\n", + "\n", + " \n", + "\n", + " # Legend\n", + " if augmented:\n", + " plt.text(-30,-4.5, 'P-value for augmented pathway: {:.2e}\\nQ-value for augmented pathway: {:.3e}'.format(df['p value'][i],df['q value'][i]), fontsize=18)\n", + " plt.scatter([],[], c='lightskyblue', label='Original Pathway Member')\n", + " plt.scatter([],[], c='lightgreen', label='Augmented Pathway Member')\n", + " l=plt.legend(loc=4, scatterpoints=1, fontsize=15)\n", + " l.legendHandles[0]._sizes= [100]\n", + " l.legendHandles[1]._sizes= [100]\n", + " else:\n", + " plt.text(-30,-4.5, 'P-value for unaugmented pathway: {:.2e}\\nQ-value for unaugmented pathway: {:.3e}'.format(df['p value'][i],df['q value'][i]), fontsize=18)\n", + " \n", + " plt.text(-25,40, input_name+'\\n\\u2193', fontsize=20,\n", + " horizontalalignment=\"center\")\n", + " plt.text(25,40, p+'\\n\\u2193', fontsize=20,\n", + " horizontalalignment=\"center\")\n", + " \n", + " plt.axis('off')\n", + " axis = plt.gca()\n", + " axis.set_xlim([1.3*x for x in axis.get_xlim()])\n", + "# axis.set_ylim([1.2*y for y in axis.get_ylim()])\n", + " plt.tight_layout() \n", + " \n", + " # Save results \n", + " output_file_names = ['{0}_tiebow.{1}'.format(df['Name'][i], file_type) for file_type in figure_file_format]\n", + " for f in output_file_names:\n", + " plt.savefig(f, bbox_inches = 'tight')\n", + " \n", + " plt.show()\n", + " \n", + " return output_file_names\n", + " \n", + " \n", + "# display(tab)\n", + "\n", + "\n", + "def add_breaks3(string):\n", + " words = string.split()\n", + " grouped_words = [' '.join(words[i: i + 3]) for i in range(0, len(words), 3)]\n", + " return '\\n'.join(grouped_words)" ] }, { @@ -462,8 +538,9 @@ "outputs": [], "source": [ "%%appyter code_exec \n", - "display(Markdown(f\"The bar charts show the top {topk} enriched terms for the chosen library based on their p-values, along with their q-values written. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05). The bar chart is downloadable as an image in the PNG and SVG formats. \"))\n", - "display(Markdown(f\"The tables display the top {display_topk} enrichment analysis results for the chosen library. The table contains the pathway name, p-value, odds ratio, the number of overlapping items, overlapping items, and -log(p-value). The bolded genes with an asterisk in the overlap column are from {library_name.replace('_', ' ')} augmentation. The table is sorted by p-values in ascending order. The full results are downloadable in CSV format.\"))" + "display(Markdown(f\"The bar charts display the top {topk} enriched terms for the chosen pathway database similarity based on their p-values, along with their q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05). The bar chart is downloadable as an image in the PNG and SVG formats. \"))\n", + "display(Markdown(f\"The tables display the top {display_topk} enrichment analysis matching pathways for the chosen database. The table contains the pathway name, p-value, odds ratio, the number of overlapping genes, the overlapping genes, and -log(p-value). The bolded genes with an asterisk in the overlap column are the additional genes added to the pathway via the augmentation. The table is sorted by p-values in ascending order. The full results are downloadable in CSV format.\"))\n", + "display(Markdown(f\"The tiebow graphs of the overlapping genes between the input set and the top pathway result based on p-values is shown. For the augmented results, the genes ar colored by whether they were in the original pathway set or from augmentation. The set squares are sizes proportionally, and the purple square within the pathway set node indicates the size of overlap relative to the pathway set. The tiebow graph is downloadable as an image in the PNG and SVG formats.\"))" ] }, { @@ -474,32 +551,37 @@ "outputs": [], "source": [ "%%appyter code_exec \n", - "if library_combined:\n", - " plasma = mpl.cm.get_cmap('plasma')\n", - " display(Markdown(f\"# Enrichment Analysis by Unaugmented Pathway Database\"))\n", - " display(Markdown(f\"Enrichment Analysis separated for each unaugmented database.\"))\n", - " db = ['KEGG', 'PANTHER', 'Reactome', 'WikiPathways']\n", - " for i,l in enumerate(db):\n", - " file = l+'_Unaugmented'\n", - " display(Markdown(f\"### {l+' Unaugmented'}\"))\n", - " library_data, pathways, items = load(file, items, separate=True)\n", - " results, pvals, results_df = enrichment_analysis(items, library_data, pathways)\n", - " \n", - " enrichr_figure(results, pvals, results_df[\"q value\"].tolist(), final_output_file_names, [file], plasma(i*0.25), topk)\n", - " display(Markdown(f\"*Figure {i+1}. Bar chart of the top {topk} enriched terms in {file.replace('_', ' ')} based on p-values, along with their corresponding q-values (written). Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*\")) \n", - " \n", - " # Download Bar Chart\n", - " for k, file in enumerate(final_output_file_names):\n", - " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[k] + ': ')))\n", + "plasma = mpl.cm.get_cmap('plasma')\n", + "display(Markdown(f\"# Enrichment Analysis with Unaugmented Pathway Databases\"))\n", + "display(Markdown(f\"Enrichment analysis results are separated for each unaugmented database.\"))\n", + "db = ['KEGG', 'PANTHER', 'Reactome', 'WikiPathways']\n", + "for i,l in enumerate(db):\n", + " file = l+'_Unaugmented'\n", + " display(Markdown(f\"### {l+' Unaugmented'}\"))\n", + " similarity_data, pathways, items = load(file, items, separate=True)\n", + " results, pvals, results_df = enrichment_analysis(items, similarity_data, pathways)\n", + "\n", + " output_file_names = enrichr_figure(results, pvals, results_df[\"q value\"].tolist(), [file], plasma(i*0.25), topk)\n", + " display(Markdown(f\"*Figure {2*i+1}. Bar chart of the top {topk} enriched terms from the {file.replace('_', ' ')} similarity based on p-values, along with their corresponding q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*\")) \n", + "\n", + " # Download Bar Chart\n", + " for k, file in enumerate(output_file_names):\n", + " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[k] + ': ')))\n", + "\n", + " if 'p value' in results_df.columns:\n", + " sorted_df = results_df.sort_values(by = ['p value'])\n", + " filtered_df = sorted_df.iloc[:display_topk]\n", + " if len(filtered_df) != 0:\n", + " display(HTML(filtered_df.to_html(index = False)))\n", + " display(Markdown(f\"*Table {i+1}. Enrichment analysis results from the {l.replace('_', ' ')} similarity.*\")) \n", + " display(create_download_link(sorted_df))\n", " \n", - " if 'p value' in results_df.columns:\n", - " sorted_df = results_df.sort_values(by = ['p value'])\n", - " filtered_df = sorted_df.iloc[:display_topk]\n", - " if len(filtered_df) != 0:\n", - " display(HTML(filtered_df.to_html(index = False)))\n", - " display(Markdown(f\"*Table {i+1}. Enrichment analysis results of {l.replace('_', ' ')}*\")) \n", - " display(create_download_link(sorted_df))\n", - " " + " output_file_names = tiebow(sorted_df, augmented=False)\n", + " display(Markdown(f\"*Figure {2*i+2}. Tiebow graph of the overlapping genes between the input set and the top pathway result based on p-values. The purple square within the pathway set indicates the size of overlap relative to the pathway set.*\")) \n", + "\n", + " # Download Tiebow Graph\n", + " for i, file in enumerate(output_file_names):\n", + " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': '))) " ] }, { @@ -510,33 +592,38 @@ "outputs": [], "source": [ "%%appyter code_exec \n", - "if library_combined:\n", - " plasma = mpl.cm.get_cmap('plasma')\n", - " display(Markdown(f\"# Enrichment Analysis by Pathway Database Augmented with {library_name.replace('_', ' ')}\"))\n", - " display(Markdown(f\"Enrichment Analysis separated for each augmented database.\"))\n", - " suf = library_name.split('_')[0]\n", - " db = ['KEGG', 'PANTHER', 'Reactome', 'WikiPathways']\n", - " for i,l in enumerate(db):\n", - " file = l+'_Augmented_with_'+suf\n", - " display(Markdown(f\"### {l+' Augmented'}\"))\n", - " library_data, pathways, items = load(file, items, separate=True)\n", - " results, pvals, results_df = enrichment_analysis(items, library_data, pathways)\n", - " \n", - " enrichr_figure(results, pvals, results_df[\"q value\"].tolist(), final_output_file_names, [l+'_Augmented_with_'+library_name], plasma(i*0.25), topk)\n", - " display(Markdown(f\"*Figure {i+5}. Bar chart of the top {topk} enriched terms in {l+' Augmented with '+library_name.replace('_', ' ')} based on p-values, along with their corresponding q-values (written). Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*\")) \n", - " \n", - " # Download Bar Chart\n", - " for k, file in enumerate(final_output_file_names):\n", - " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[k] + ': ')))\n", + "plasma = mpl.cm.get_cmap('plasma')\n", + "display(Markdown(f\"# Enrichment Analysis by Pathway Database Augmented with {similarity_name.replace('_', ' ')}\"))\n", + "display(Markdown(f\"Enrichment analysis results are separated for each augmented pathway database.\"))\n", + "suf = similarity_name.split('_')[0]\n", + "db = ['KEGG', 'PANTHER', 'Reactome', 'WikiPathways']\n", + "for i,l in enumerate(db):\n", + " file = l+'_Augmented_with_'+suf\n", + " display(Markdown(f\"### {l+' Augmented'}\"))\n", + " similarity_data, pathways, items = load(file, items, separate=True)\n", + " results, pvals, results_df = enrichment_analysis(items, similarity_data, pathways)\n", + "\n", + " output_file_names = enrichr_figure(results, pvals, results_df[\"q value\"].tolist(), [l+'_Augmented_with_'+similarity_name], plasma(i*0.25), topk)\n", + " display(Markdown(f\"*Figure {2*i+9}. Bar chart of the top {topk} enriched terms from the {l+' similarity augmented with '+similarity_name.replace('_', ' ')} based on p-values, along with their corresponding q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*\")) \n", + "\n", + " # Download Bar Chart\n", + " for k, file in enumerate(output_file_names):\n", + " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[k] + ': ')))\n", + "\n", + " if 'p value' in results_df.columns:\n", + " sorted_df = results_df.sort_values(by = ['p value'])\n", + " filtered_df = sorted_df.iloc[:display_topk]\n", + " if len(filtered_df) != 0:\n", + " bold_augmented(filtered_df)\n", + " display(Markdown(f\"*Table {i+5}. Enrichment analysis results from the augmented {l} similarity. Bolded terms with an asterisk are genes from augmenting the {l} pathways with {similarity_name.replace('_', ' ')}.*\")) \n", + " display(create_download_link(sorted_df))\n", " \n", - " if 'p value' in results_df.columns:\n", - " sorted_df = results_df.sort_values(by = ['p value'])\n", - " filtered_df = sorted_df.iloc[:display_topk]\n", - " if len(filtered_df) != 0:\n", - " bold_augmented(filtered_df)\n", - " display(Markdown(f\"*Table {i+5}. Enrichment analysis results of {l.replace('_', ' ')}. Bolded terms with an asterisk are genes from augmenting the pathways with {library_name.replace('_', ' ')}.*\")) \n", - " display(create_download_link(sorted_df))\n", - " " + " output_file_names = tiebow(sorted_df)\n", + " display(Markdown(f\"*Figure {2*i+10}. Tiebow graph of the overlapping genes between the input set and the top pathway result based on p-values. The purple square within the pathway set indicates the size of overlap relative to the pathway set.*\")) \n", + "\n", + " # Download Tiebow Graph\n", + " for i, file in enumerate(output_file_names):\n", + " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))" ] }, { @@ -547,27 +634,18 @@ "outputs": [], "source": [ "%%appyter code_exec\n", - "if library_combined:\n", - " display(Markdown(f\"# Combined Enrichment Analysis with All 4 Unaugmented Databases\"))\n", - "else:\n", - " display(Markdown(f\"# Unaugmented Enrichment Analysis\"))\n", + "display(Markdown(f\"# Combined Enrichment Analysis with All 4 Unaugmented Databases\"))\n", + "display(Markdown(f\"Enrichment analysis based on the unaugmented data.\"))\n", "\n", - "display(Markdown(f\"Enrichment Analysis based on the Unaugmented Data from the chosen library to compare with the augmented analysis.\"))\n", - "if library_combined:\n", - " lib = 'Unaugmented'\n", - " k=9\n", - "else:\n", - " lib = library_name.split('_')[0]+'_Unaugmented'\n", - " k=1\n", - "library_data, pathways, items = load(lib, items)\n", - "results, pvals, results_df = enrichment_analysis(items, library_data, pathways)\n", + "lib = 'Unaugmented'\n", + "similarity_data, pathways, items = load(lib, items)\n", + "results, pvals, results_df = enrichment_analysis(items, similarity_data, pathways)\n", "\n", "display(Markdown(f\"## Bar Chart\"))\n", - "enrichr_figure(results, pvals, results_df[\"q value\"].tolist(), final_output_file_names, [lib], color, topk)\n", - "display(Markdown(f\"*Figure {k}. Bar chart of the top {topk} enriched terms in {lib.replace('_', ' ')} based on p-values, along with their corresponding q-values (written). Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*\")) \n", - "\n", + "output_file_names = enrichr_figure(results, pvals, results_df[\"q value\"].tolist(), ['Combined Unaugmented Databases'], color, topk)\n", + "display(Markdown(f\"*Figure 17. Bar chart of the top {topk} enriched terms across all 4 pathway databases based on p-values, along with their corresponding q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*\")) \n", "# Download Bar Chart\n", - "for i, file in enumerate(final_output_file_names):\n", + "for i, file in enumerate(output_file_names):\n", " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))\n", "\n", "display(Markdown(f\"## Results Table\"))\n", @@ -576,9 +654,15 @@ " filtered_df = sorted_df.iloc[:display_topk]\n", " if len(filtered_df) != 0:\n", " display(HTML(filtered_df.to_html(index = False)))\n", - " display(Markdown(f\"*Table {k}. Enrichment analysis results of {lib.replace('_', ' ')}*\")) \n", + " display(Markdown(f\"*Table 9. Enrichment analysis results of {lib.replace('_', ' ')}*\")) \n", " display(create_download_link(sorted_df)) \n", + " \n", + "output_file_names = tiebow(sorted_df, augmented=False)\n", + "display(Markdown(f\"*Figure 18. Tiebow graph of the overlapping genes between the input set and the top pathway result based on p-values. The purple square within the pathway set indicates the size of overlap relative to the pathway set.*\")) \n", "\n", + "# Download Tiebow Graph\n", + "for i, file in enumerate(output_file_names):\n", + " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))\n", "\n" ] }, @@ -590,10 +674,7 @@ "outputs": [], "source": [ "%%appyter code_exec\n", - "if library_combined:\n", - " display(Markdown(f\"# Combined Enrichment Analysis with All 4 Pathway Databases Augmented with {library_name.replace('_', ' ')}\"))\n", - "else:\n", - " display(Markdown(f\"# Enrichment Analysis Augmented with {library_name.replace('_', ' ')}\"))" + "display(Markdown(f\"# Combined Enrichment Analysis with All 4 Pathway Databases Augmented with {similarity_name.replace('_', ' ')}\"))" ] }, { @@ -615,14 +696,14 @@ "outputs": [], "source": [ "%%appyter code_exec\n", - "library_data, pathways, items = load(library_name, items)\n", - "results, pvals, results_df = enrichment_analysis(items, library_data, pathways)\n", + "similarity_data, pathways, items = load(similarity_name, items)\n", + "results, pvals, results_df = enrichment_analysis(items, similarity_data, pathways)\n", "\n", - "enrichr_figure(results, pvals, results_df[\"q value\"].tolist(), final_output_file_names, [library_name], color, topk)\n", - "display(Markdown(f\"*Figure {k+1}. Bar chart of the top {topk} enriched terms in {library_name.replace('_', ' ')} based on p-values, along with their corresponding q-values (written). Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*\")) \n", + "output_file_names = enrichr_figure(results, pvals, results_df[\"q value\"].tolist(), ['Combined Databases Augmented with '+similarity_name], color, topk)\n", + "display(Markdown(f\"*Figure 19. Bar chart of the top {topk} enriched terms across all 4 pathway augmented databases based on p-values, along with their corresponding q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*\")) \n", "\n", "# Download Bar Chart\n", - "for i, file in enumerate(final_output_file_names):\n", + "for i, file in enumerate(output_file_names):\n", " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))" ] }, @@ -640,18 +721,36 @@ " filtered_df = sorted_df.iloc[:display_topk]\n", " if len(filtered_df) != 0:\n", " bold_augmented(filtered_df)\n", - " display(Markdown(f\"*Table {k+1}. Enrichment analysis results of {library_name.replace('_', ' ')}. Bolded terms with an asterisk are genes from augmenting the pathways with {library_name.replace('_', ' ')}.*\")) \n", - "\n", + " display(Markdown(f\"*Table 10. Enrichment analysis results from the augmented combined similarity. Bolded terms with an asterisk are genes from augmenting the pathways with {similarity_name.replace('_', ' ')}.*\")) \n", " display(create_download_link(sorted_df))" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb81a64b", + "metadata": {}, + "outputs": [], + "source": [ + "%%appyter markdown\n", + "## Tiebow Graph" + ] + }, { "cell_type": "code", "execution_count": null, "id": "92883721", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "%%appyter code_exec \n", + "output_file_names = tiebow(sorted_df)\n", + "display(Markdown(f\"*Figure 20. Tiebow graph of the overlapping genes between the input set and the top pathway result based on p-values. The purple square within the pathway set indicates the size of overlap relative to the pathway set.*\")) \n", + "\n", + "# Download Tiebow Graph\n", + "for i, file in enumerate(output_file_names):\n", + " display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))" + ] }, { "cell_type": "markdown", @@ -688,10 +787,10 @@ "\n", "if short_id:\n", " url = 'https://maayanlab.cloud/Enrichr/enrich?dataset=' + short_id\n", - " display(HTML(f\"
Access the complete enrichment analysis results on the Enrichr website.
\"))\n", + " display(HTML(f\"
Access additional enrichment analysis results for the input set from the Enrichr website.
\"))\n", "else:\n", " display(HTML(\"
No Enrichr results available for the current query
\"))\n", - " display(HTML(\"
Please try again with a different input list and/or library.\"))" + " display(HTML(\"
Please try again with a different input list and/or similarity.\"))" ] } ], diff --git a/appyters/Augmented_Pathway_Enrichment_Analysis/README.md b/appyters/Augmented_Pathway_Enrichment_Analysis/README.md index 5ff843f3..794bccbd 100644 --- a/appyters/Augmented_Pathway_Enrichment_Analysis/README.md +++ b/appyters/Augmented_Pathway_Enrichment_Analysis/README.md @@ -1,6 +1,6 @@ # Augmented Pathway Enrichment Analysis -Augmented Pathway Enrichment Analysis Appyter performs enrichment analysis with augmented pathway libraries. The required input is a gene set. The Appyter performs the Fisher exact test to compute enrichment p-value and q-values, and reports the results as a bar graph along with a sorted table. If a combined library is chosen, the results for separate pathway databases are shown on the bottom as well. Unaugmented and augmented results are shown. +Augmented Pathway Enrichment Analysis Appyter performs enrichment analysis with augmented pathway libraries. The required input is a gene set. The Appyter performs the Fisher exact test to compute enrichment p-value and q-values, and reports the results as a bar graph along with a sorted table and a tiebow graph. If a combined library is chosen, the results for separate pathway databases are shown on the bottom as well. Unaugmented and augmented results are shown. ## **File Format** To run the Appyter, a set of genes is required as input. There is an example gene set for use as well. From f74fd4763410642ad07836102a3e08290d7fc353 Mon Sep 17 00:00:00 2001 From: ekropiwnicki Date: Thu, 2 Sep 2021 12:02:32 -0400 Subject: [PATCH 3/3] public field set to false --- ...ugmented_Pathway_Enrichment_Analysis.ipynb | 38 +------------------ .../appyter.json | 3 +- 2 files changed, 4 insertions(+), 37 deletions(-) diff --git a/appyters/Augmented_Pathway_Enrichment_Analysis/Augmented_Pathway_Enrichment_Analysis.ipynb b/appyters/Augmented_Pathway_Enrichment_Analysis/Augmented_Pathway_Enrichment_Analysis.ipynb index 319851f4..63ddd75e 100644 --- a/appyters/Augmented_Pathway_Enrichment_Analysis/Augmented_Pathway_Enrichment_Analysis.ipynb +++ b/appyters/Augmented_Pathway_Enrichment_Analysis/Augmented_Pathway_Enrichment_Analysis.ipynb @@ -3,7 +3,6 @@ { "cell_type": "code", "execution_count": null, - "id": "ddb0a4dd", "metadata": {}, "outputs": [], "source": [ @@ -14,7 +13,6 @@ }, { "cell_type": "markdown", - "id": "7f426231", "metadata": {}, "source": [ "# Augmented Pathway Enrichment Analysis (APEA)\n", @@ -24,7 +22,6 @@ { "cell_type": "code", "execution_count": null, - "id": "3be8da4e", "metadata": {}, "outputs": [], "source": [ @@ -41,7 +38,6 @@ "from collections import OrderedDict\n", "import urllib\n", "\n", - "import ipywidgets as widgets\n", "import base64\n", "import csv\n", "import itertools\n", @@ -55,7 +51,6 @@ { "cell_type": "code", "execution_count": null, - "id": "a783a1f2", "metadata": {}, "outputs": [], "source": [ @@ -79,7 +74,6 @@ { "cell_type": "code", "execution_count": null, - "id": "53f9a933", "metadata": {}, "outputs": [], "source": [ @@ -137,7 +131,6 @@ { "cell_type": "code", "execution_count": null, - "id": "290d7db9", "metadata": {}, "outputs": [], "source": [ @@ -174,7 +167,6 @@ { "cell_type": "code", "execution_count": null, - "id": "1ecd91dc", "metadata": {}, "outputs": [], "source": [ @@ -187,7 +179,7 @@ "set_filename = {{ set_kind.value[0] }}\n", "{%- endif %}\n", "\n", - "input_name = \"{{ input_name.value }}\"\n", + "input_name = \"{{ input_name }}\"\n", "similarity_kind = \"Select a Similarity\"\n", "similarity_name = \"{{ similarity_kind.value[0] }}\"\n", "data_dir=\"https://appyters.maayanlab.cloud/storage/Augmented_Pathway_Enrichment_Analysis/\"" @@ -196,7 +188,6 @@ { "cell_type": "code", "execution_count": null, - "id": "c48fe630", "metadata": {}, "outputs": [], "source": [ @@ -216,7 +207,6 @@ { "cell_type": "code", "execution_count": null, - "id": "b814f8b4", "metadata": {}, "outputs": [], "source": [ @@ -239,7 +229,6 @@ { "cell_type": "code", "execution_count": null, - "id": "9eacb435", "metadata": {}, "outputs": [], "source": [ @@ -289,7 +278,6 @@ { "cell_type": "code", "execution_count": null, - "id": "d5754b30", "metadata": {}, "outputs": [], "source": [ @@ -347,7 +335,6 @@ { "cell_type": "code", "execution_count": null, - "id": "1f27f65c", "metadata": {}, "outputs": [], "source": [ @@ -368,7 +355,6 @@ { "cell_type": "code", "execution_count": null, - "id": "4b65bc02", "metadata": {}, "outputs": [], "source": [ @@ -424,19 +410,11 @@ { "cell_type": "code", "execution_count": null, - "id": "7e6edc12", "metadata": {}, "outputs": [], "source": [ "# Tiebow\n", "def tiebow(df, augmented=True):\n", - " \n", - "# sub_tab=[widgets.Output() for i in range(display_num)]\n", - "# tab = widgets.Tab(sub_tab)\n", - "# for i in range(display_num):\n", - "# tab.set_title(i,\"Tab {}\".format(i+1))\n", - " \n", - "# with sub_tab[i]:\n", " i=0\n", " p=df['Name'][i]\n", " pathway = pathways.index(p)\n", @@ -533,7 +511,6 @@ { "cell_type": "code", "execution_count": null, - "id": "bd3e7c1c", "metadata": {}, "outputs": [], "source": [ @@ -546,7 +523,6 @@ { "cell_type": "code", "execution_count": null, - "id": "e3df4a03", "metadata": {}, "outputs": [], "source": [ @@ -587,7 +563,6 @@ { "cell_type": "code", "execution_count": null, - "id": "75ca5e21", "metadata": {}, "outputs": [], "source": [ @@ -629,7 +604,6 @@ { "cell_type": "code", "execution_count": null, - "id": "41f665ea", "metadata": {}, "outputs": [], "source": [ @@ -669,7 +643,6 @@ { "cell_type": "code", "execution_count": null, - "id": "428f6a3b", "metadata": {}, "outputs": [], "source": [ @@ -680,7 +653,6 @@ { "cell_type": "code", "execution_count": null, - "id": "48569fda", "metadata": {}, "outputs": [], "source": [ @@ -691,7 +663,6 @@ { "cell_type": "code", "execution_count": null, - "id": "1cfe7953", "metadata": {}, "outputs": [], "source": [ @@ -710,7 +681,6 @@ { "cell_type": "code", "execution_count": null, - "id": "04a69353", "metadata": {}, "outputs": [], "source": [ @@ -728,7 +698,6 @@ { "cell_type": "code", "execution_count": null, - "id": "eb81a64b", "metadata": {}, "outputs": [], "source": [ @@ -739,7 +708,6 @@ { "cell_type": "code", "execution_count": null, - "id": "92883721", "metadata": {}, "outputs": [], "source": [ @@ -754,7 +722,6 @@ }, { "cell_type": "markdown", - "id": "cdb54dd8", "metadata": {}, "source": [ "# Link to Enrichr" @@ -763,7 +730,6 @@ { "cell_type": "code", "execution_count": null, - "id": "782c6ed2", "metadata": {}, "outputs": [], "source": [ @@ -810,7 +776,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.8.3" } }, "nbformat": 4, diff --git a/appyters/Augmented_Pathway_Enrichment_Analysis/appyter.json b/appyters/Augmented_Pathway_Enrichment_Analysis/appyter.json index 07c6ae5f..b77ceeba 100644 --- a/appyters/Augmented_Pathway_Enrichment_Analysis/appyter.json +++ b/appyters/Augmented_Pathway_Enrichment_Analysis/appyter.json @@ -2,7 +2,7 @@ "$schema": "https://raw.githubusercontent.com/MaayanLab/appyter-catalog/main/schema/appyter-validator.json", "name": "Augmented_Pathway_Enrichment_Analysis", "title": "Augmented Pathway Enrichment Analysis", - "version": "0.0.1", + "version": "0.0.2", "description": "This Appyter performs enrichment analysis with augmented pathway libraries. The required input is a gene set", "image": "logo.png", "authors": [ @@ -17,6 +17,7 @@ "Enrichr" ], "license": "CC-BY-NC-SA-4.0", + "public": false, "appyter": { "file": "Augmented_Pathway_Enrichment_Analysis.ipynb", "profile": "biojupies",