From a05f0380bda85dcbed4e1fb116318a295e022af7 Mon Sep 17 00:00:00 2001 From: "Daniel J. B. Clarke" Date: Tue, 17 Feb 2026 13:20:34 -0500 Subject: [PATCH 1/3] Patch some of the issues --- .dockerignore | 1 + app/app.py | 52 +++++++++++++++++++++++++------------------------- app/helpers.py | 16 +++++++--------- 3 files changed, 34 insertions(+), 35 deletions(-) diff --git a/.dockerignore b/.dockerignore index b0d487e..4b893dc 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,5 +4,6 @@ env .DS_Store app/__pycache__/ *.env +app/.env ETL predictions \ No newline at end of file diff --git a/app/app.py b/app/app.py index f6d5704..ee329ac 100644 --- a/app/app.py +++ b/app/app.py @@ -12,6 +12,7 @@ from functools import lru_cache import pickle import anndata +import urllib.parse from dotenv import load_dotenv load_dotenv() @@ -86,7 +87,7 @@ def downloads(): @app.route(f'{ROOT_PATH}/queryexpression', methods=['GET','POST']) def query_expression(): - gene = request.form['gene'] + gene = urllib.parse.urlencode(request.form['gene']) result = query_generanger(gene) @@ -95,7 +96,7 @@ def query_expression(): @app.route(f'{ROOT_PATH}/getgwas', methods=['GET','POST']) def get_gwas(): - gene = request.form['gene'] + gene = urllib.parse.urlencode(request.form['gene']) if gene == '': return {'GWAS_Catalog':[]} @@ -106,7 +107,7 @@ def get_gwas(): @app.route(f'{ROOT_PATH}/getkomp', methods=['GET','POST']) def get_mgi(): - gene = request.form['gene'] + gene = urllib.parse.urlencode(request.form['gene']) result = query_mgi(gene) return result @@ -154,7 +155,7 @@ def getchea3(): @app.route(f'{ROOT_PATH}/gettfs', methods=['GET','POST']) def gettfs(): - gene = request.form['gene'] + gene = urllib.parse.urlencode(request.form['gene']) if gene.strip() == '': return {'data': []} @@ -258,8 +259,8 @@ def dge(): perturb = response_json['perturb'] control = response_json['control'] method = response_json['method'] - gse = response_json['gse'] - species = response_json['species'] + gse = urllib.parse.urlencode(response_json['gse']) + species = urllib.parse.urlencode(response_json['species']) norms = response_json['norms'] expr_file = '{base_url}/{species}/{gse}/{gse}_Expression.tsv'.format(species=species, gse=gse, base_url=base_url) meta_file = '{base_url}/{species}/{gse}/{gse}_Metadata.tsv'.format(species=species, gse=gse, base_url=base_url) @@ -276,14 +277,14 @@ def dge(): def dgesingle(): response_json = request.get_json() method = response_json['method'] - gse = response_json['gse'] - species = response_json['species'] + gse = urllib.parse.urlencode(response_json['gse']) + species = urllib.parse.urlencode(response_json['species']) condition_group = response_json['conditiongroup'] cluster_group = response_json['diffcluster'] metajson = s3.open('{base_url}/{species}/{gse}/{gse}_metasep.json'.format(species=species, gse=gse, base_url=base_url),'r') metadict = json.load(metajson) - base_expression_filename = metadict[condition_group]['filename'] + base_expression_filename = urllib.parse.urlencode(metadict[condition_group]['filename']) expr_file = '{base_url}/{species}/{gse}/{file}'.format(species=species, gse=gse, base_url=base_url, file=base_expression_filename) data_dict = compute_dge_single(expr_file, method, 'Cluster', 'Cell_types',cluster_group, True) @@ -304,16 +305,16 @@ def dgesingle(): @app.route('/api/precomputed_dge', methods=['GET','POST']) def fetch_precomputed_dge(): response_json = request.get_json() - sig = response_json['sig'] - species = response_json['species'] + sig = urllib.parse.urlencode(response_json['sig']) + species = urllib.parse.urlencode(response_json['species']) dge_tab = get_precomputed_dge(sig, species) return dge_tab.to_json(orient='index') @app.route('/api/precomputed_dge_options', methods=['GET','POST']) def fetch_precomputed_dge_options(): response_json = request.get_json() - gse = response_json['gse'] - species = response_json['species'] + gse = urllib.parse.urlencode(response_json['gse']) + species = urllib.parse.urlencode(response_json['species']) return get_precomputed_dge_options(gse, species) @@ -322,15 +323,15 @@ def fetch_precomputed_dge_options(): @app.route('/singleplots', methods=['GET','POST']) def makesingleplots(): response_json = request.get_json() - gse = response_json['gse'] - species = response_json['species'] + gse = urllib.parse.urlencode(response_json['gse']) + species = urllib.parse.urlencode(response_json['species']) condition_group = response_json['conditiongroup'] print('in pca, tsne, umap singleplots function') print(condition_group) #metajson file that stores the group/condition pairing to point to the expression h5 file metajson = s3.open('{base_url}/{species}/{gse}/{gse}_metasep.json'.format(species=species, gse=gse, base_url=base_url),'r') metadict = json.load(metajson) - base_expression_filename = metadict[condition_group]['filename'] + base_expression_filename =urllib.parse.urlencode(metadict[condition_group]['filename']) #image path for pulling the distribution plot from s3 base_name_for_cell_type_dist = base_expression_filename.split('.h5')[0] base_name_for_cell_type_dist = base_name_for_cell_type_dist + '.png' @@ -375,8 +376,8 @@ def makesingleplots(): def getclusterinfo(): #The json below holds information about the conditiongroup that we are looking at for this data as well the specific species. response_json = request.get_json() - gse = response_json['gse'] - species = response_json['species'] + gse = urllib.parse.urlencode(response_json['gse']) + species = urllib.parse.urlencode(response_json['species']) condition_group = response_json['conditiongroup'] metajson = s3.open('{base_url}/{species}/{gse}/{gse}_metasep.json'.format(species=species, gse=gse, base_url=base_url),'r') metadict = json.load(metajson) @@ -857,9 +858,8 @@ def plot_api(geo_accession): @app.route(f'{ROOT_PATH}/api/volcano', methods=['GET', 'POST']) def plot_volcano_api(): - request.form - gene = request.form["gene"] - species = request.form["species"] + gene = urllib.parse.urlencode(request.form["gene"]) + species = urllib.parse.urlencode(request.form["species"]) try: json_item_plot = send_plot(species, gene) except Exception as e: @@ -913,10 +913,10 @@ def samples_api(geo_accession): def get_study_data(): response_json = request.get_json() - geo_accession = response_json['gse'] + geo_accession = urllib.parse.urlencode(response_json['gse']) control = response_json['control'] perturb = response_json['perturb'] - species = response_json['species'] + species = urllib.parse.urlencode(response_json['species']) metadata_file = base_url + '/' + species + '/' + geo_accession + '/' + geo_accession + '_Metadata.tsv' expression_file = base_url + '/' + species + '/' + geo_accession + '/' + geo_accession + '_Expression.tsv' @@ -950,8 +950,8 @@ def get_study_data(): @app.route(f'{ROOT_PATH}/api/bulksampvis', methods=['GET', 'POST']) def visualize_samps(): response_json = request.get_json() - geo_accession = response_json['gse'] - species = response_json['species'] + geo_accession = urllib.parse.urlencode(response_json['gse']) + species = urllib.parse.urlencode(response_json['species']) meta_df = base_url + '/' + species + '/' + geo_accession + '/' + geo_accession + '_Metadata.tsv' meta_df = pd.read_csv(s3.open(meta_df), sep='\t', index_col=0) @@ -984,7 +984,7 @@ def query_options(): @app.route(f'{ROOT_PATH}/api/query_genes', methods=['GET', 'POST']) def query_genes(): response_json = request.get_json() - g = response_json['gene'] + g = urllib.parse.urlencode(response_json['gene']) res = infer_gene(g) return res diff --git a/app/helpers.py b/app/helpers.py index 52bb88a..0919a02 100644 --- a/app/helpers.py +++ b/app/helpers.py @@ -86,9 +86,9 @@ def enrichr_id(genes, desc=''): @lru_cache() def query_enricher(gene): ENRICHR_URL = 'https://maayanlab.cloud/Enrichr/genemap' - query_string = '?json=true&setup=true&gene=%s' + params = dict(json='true', setup='true', gene=gene) - response = requests.get(ENRICHR_URL + query_string % gene) + response = requests.get(ENRICHR_URL, params=params) if not response.ok: raise Exception('Error finding gene') @@ -126,12 +126,9 @@ def query_enricher_diabetes(genelist, description): listid = data["userListId"] ENRICHR_URL = 'https://maayanlab.cloud/Enrichr/enrich' - query_string = '?userListId=%s&backgroundType=%s' - user_list_id = listid gene_set_library = 'Diabetes_Perturbations_GEO_2022' - response = requests.get( - ENRICHR_URL + query_string % (user_list_id, gene_set_library) - ) + params = dict(userListId=listid, backgroundType=gene_set_library) + response = requests.get(ENRICHR_URL, params=params) if not response.ok: raise Exception('Error fetching enrichment results') @@ -147,8 +144,9 @@ def query_enricher_diabetes(genelist, description): def query_komp(gene: str): gene = gene[0].upper() + (gene[1:]).lower() - KOMP_URL = "https://www.ebi.ac.uk/mi/impc/solr/genotype-phenotype/select?q=marker_symbol:" + gene - response = requests.get(KOMP_URL) + KOMP_URL = "https://www.ebi.ac.uk/mi/impc/solr/genotype-phenotype/select" + params = dict(q="marker_symbol:" + gene) + response = requests.get(KOMP_URL, params=params) if not response.ok: raise Exception('Error analyzing retrieving information') data = json.loads(response.text) From 008f38837785a9f818f22ac042048c1d28abf17d Mon Sep 17 00:00:00 2001 From: "Daniel J. B. Clarke" Date: Tue, 17 Feb 2026 14:41:49 -0500 Subject: [PATCH 2/3] Fixup patch --- app/app.py | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/app/app.py b/app/app.py index ee329ac..a5e6a15 100644 --- a/app/app.py +++ b/app/app.py @@ -87,7 +87,7 @@ def downloads(): @app.route(f'{ROOT_PATH}/queryexpression', methods=['GET','POST']) def query_expression(): - gene = urllib.parse.urlencode(request.form['gene']) + gene = urllib.parse.quote(request.form['gene'], safe='') result = query_generanger(gene) @@ -96,7 +96,7 @@ def query_expression(): @app.route(f'{ROOT_PATH}/getgwas', methods=['GET','POST']) def get_gwas(): - gene = urllib.parse.urlencode(request.form['gene']) + gene = urllib.parse.quote(request.form['gene'], safe='') if gene == '': return {'GWAS_Catalog':[]} @@ -107,7 +107,7 @@ def get_gwas(): @app.route(f'{ROOT_PATH}/getkomp', methods=['GET','POST']) def get_mgi(): - gene = urllib.parse.urlencode(request.form['gene']) + gene = urllib.parse.quote(request.form['gene'], safe='') result = query_mgi(gene) return result @@ -155,7 +155,7 @@ def getchea3(): @app.route(f'{ROOT_PATH}/gettfs', methods=['GET','POST']) def gettfs(): - gene = urllib.parse.urlencode(request.form['gene']) + gene = urllib.parse.quote(request.form['gene'], safe='') if gene.strip() == '': return {'data': []} @@ -259,8 +259,8 @@ def dge(): perturb = response_json['perturb'] control = response_json['control'] method = response_json['method'] - gse = urllib.parse.urlencode(response_json['gse']) - species = urllib.parse.urlencode(response_json['species']) + gse = urllib.parse.quote(response_json['gse'], safe='') + species = urllib.parse.quote(response_json['species'], safe='') norms = response_json['norms'] expr_file = '{base_url}/{species}/{gse}/{gse}_Expression.tsv'.format(species=species, gse=gse, base_url=base_url) meta_file = '{base_url}/{species}/{gse}/{gse}_Metadata.tsv'.format(species=species, gse=gse, base_url=base_url) @@ -277,14 +277,14 @@ def dge(): def dgesingle(): response_json = request.get_json() method = response_json['method'] - gse = urllib.parse.urlencode(response_json['gse']) - species = urllib.parse.urlencode(response_json['species']) + gse = urllib.parse.quote(response_json['gse'], safe='') + species = urllib.parse.quote(response_json['species'], safe='') condition_group = response_json['conditiongroup'] cluster_group = response_json['diffcluster'] metajson = s3.open('{base_url}/{species}/{gse}/{gse}_metasep.json'.format(species=species, gse=gse, base_url=base_url),'r') metadict = json.load(metajson) - base_expression_filename = urllib.parse.urlencode(metadict[condition_group]['filename']) + base_expression_filename = urllib.parse.quote(metadict[condition_group]['filename'], safe='') expr_file = '{base_url}/{species}/{gse}/{file}'.format(species=species, gse=gse, base_url=base_url, file=base_expression_filename) data_dict = compute_dge_single(expr_file, method, 'Cluster', 'Cell_types',cluster_group, True) @@ -305,16 +305,16 @@ def dgesingle(): @app.route('/api/precomputed_dge', methods=['GET','POST']) def fetch_precomputed_dge(): response_json = request.get_json() - sig = urllib.parse.urlencode(response_json['sig']) - species = urllib.parse.urlencode(response_json['species']) + sig = urllib.parse.quote(response_json['sig'], safe='') + species = urllib.parse.quote(response_json['species'], safe='') dge_tab = get_precomputed_dge(sig, species) return dge_tab.to_json(orient='index') @app.route('/api/precomputed_dge_options', methods=['GET','POST']) def fetch_precomputed_dge_options(): response_json = request.get_json() - gse = urllib.parse.urlencode(response_json['gse']) - species = urllib.parse.urlencode(response_json['species']) + gse = urllib.parse.quote(response_json['gse'], safe='') + species = urllib.parse.quote(response_json['species'], safe='') return get_precomputed_dge_options(gse, species) @@ -323,15 +323,15 @@ def fetch_precomputed_dge_options(): @app.route('/singleplots', methods=['GET','POST']) def makesingleplots(): response_json = request.get_json() - gse = urllib.parse.urlencode(response_json['gse']) - species = urllib.parse.urlencode(response_json['species']) + gse = urllib.parse.quote(response_json['gse'], safe='') + species = urllib.parse.quote(response_json['species'], safe='') condition_group = response_json['conditiongroup'] print('in pca, tsne, umap singleplots function') print(condition_group) #metajson file that stores the group/condition pairing to point to the expression h5 file metajson = s3.open('{base_url}/{species}/{gse}/{gse}_metasep.json'.format(species=species, gse=gse, base_url=base_url),'r') metadict = json.load(metajson) - base_expression_filename =urllib.parse.urlencode(metadict[condition_group]['filename']) + base_expression_filename =urllib.parse.quote(metadict[condition_group]['filename'], safe='') #image path for pulling the distribution plot from s3 base_name_for_cell_type_dist = base_expression_filename.split('.h5')[0] base_name_for_cell_type_dist = base_name_for_cell_type_dist + '.png' @@ -376,8 +376,8 @@ def makesingleplots(): def getclusterinfo(): #The json below holds information about the conditiongroup that we are looking at for this data as well the specific species. response_json = request.get_json() - gse = urllib.parse.urlencode(response_json['gse']) - species = urllib.parse.urlencode(response_json['species']) + gse = urllib.parse.quote(response_json['gse'], safe='') + species = urllib.parse.quote(response_json['species'], safe='') condition_group = response_json['conditiongroup'] metajson = s3.open('{base_url}/{species}/{gse}/{gse}_metasep.json'.format(species=species, gse=gse, base_url=base_url),'r') metadict = json.load(metajson) @@ -858,8 +858,8 @@ def plot_api(geo_accession): @app.route(f'{ROOT_PATH}/api/volcano', methods=['GET', 'POST']) def plot_volcano_api(): - gene = urllib.parse.urlencode(request.form["gene"]) - species = urllib.parse.urlencode(request.form["species"]) + gene = urllib.parse.quote(request.form["gene"], safe='') + species = urllib.parse.quote(request.form["species"], safe='') try: json_item_plot = send_plot(species, gene) except Exception as e: @@ -913,10 +913,10 @@ def samples_api(geo_accession): def get_study_data(): response_json = request.get_json() - geo_accession = urllib.parse.urlencode(response_json['gse']) + geo_accession = urllib.parse.quote(response_json['gse'], safe='') control = response_json['control'] perturb = response_json['perturb'] - species = urllib.parse.urlencode(response_json['species']) + species = urllib.parse.quote(response_json['species'], safe='') metadata_file = base_url + '/' + species + '/' + geo_accession + '/' + geo_accession + '_Metadata.tsv' expression_file = base_url + '/' + species + '/' + geo_accession + '/' + geo_accession + '_Expression.tsv' @@ -950,8 +950,8 @@ def get_study_data(): @app.route(f'{ROOT_PATH}/api/bulksampvis', methods=['GET', 'POST']) def visualize_samps(): response_json = request.get_json() - geo_accession = urllib.parse.urlencode(response_json['gse']) - species = urllib.parse.urlencode(response_json['species']) + geo_accession = urllib.parse.quote(response_json['gse'], safe='') + species = urllib.parse.quote(response_json['species'], safe='') meta_df = base_url + '/' + species + '/' + geo_accession + '/' + geo_accession + '_Metadata.tsv' meta_df = pd.read_csv(s3.open(meta_df), sep='\t', index_col=0) @@ -984,7 +984,7 @@ def query_options(): @app.route(f'{ROOT_PATH}/api/query_genes', methods=['GET', 'POST']) def query_genes(): response_json = request.get_json() - g = urllib.parse.urlencode(response_json['gene']) + g = urllib.parse.quote(response_json['gene'], safe='') res = infer_gene(g) return res From e6f331db7f816d10659ab8d92ab340c7208e73e2 Mon Sep 17 00:00:00 2001 From: "Daniel J. B. Clarke" Date: Tue, 17 Feb 2026 14:43:48 -0500 Subject: [PATCH 3/3] Fix env default issues --- app/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/app.py b/app/app.py index a5e6a15..a3fce8d 100644 --- a/app/app.py +++ b/app/app.py @@ -29,8 +29,8 @@ base_url = os.environ.get('BASE_URL', 'data') ROOT_PATH = os.environ.get('ROOT_PATH', '/') BASE_PATH = os.environ.get('BASE_PATH', 'maayanlab.cloud') -DEBUG = os.environ.get('DEBUG', True).lower() in ('true', '1', 't') -UPDATE_STUDIES = os.environ.get('UPDATE_STUDIES', False).lower() in ('true', '1', 't') +DEBUG = os.environ.get('DEBUG', 'true').lower() in ('true', '1', 't') +UPDATE_STUDIES = os.environ.get('UPDATE_STUDIES', 'false').lower() in ('true', '1', 't') s3 = s3fs.S3FileSystem(anon=True, client_kwargs={'endpoint_url': endpoint})