From 4f0a0638aa370fbb07dca8d4500b7b2aa264e04b Mon Sep 17 00:00:00 2001 From: Sujay Patil Date: Thu, 7 Jul 2022 13:11:50 -0700 Subject: [PATCH 1/4] add gold client methods that use GOLD project ids --- sample_annotator/clients/gold_client.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sample_annotator/clients/gold_client.py b/sample_annotator/clients/gold_client.py index ac600b3..702e550 100644 --- a/sample_annotator/clients/gold_client.py +++ b/sample_annotator/clients/gold_client.py @@ -251,6 +251,28 @@ def fetch_studies_from_file(self, path: str, **kwargs) -> List[StudyDict]: ids.append(line.strip()) return self.fetch_studies(ids, **kwargs) + def fetch_biosamples_by_project(self, id: str) -> List[SampleDict]: + """Fetch the biosample from which the sequencing project + was generated. + + :param id: GOLD project id. Ex.: Gp0503330 + :return: List of SampleDict objects + """ + id = self._normalize_id(id) + results = self._call("biosamples", {"projectGoldId": id}) + return results + + def fetch_study_by_project(self, id: str) -> List[SampleDict]: + """Fetch the study for which the sequencing project + was performed. + + :param id: GOLD project id. Ex.: Gp0503330 + :return: List of SampleDict objects + """ + id = self._normalize_id(id) + results = self._call("studies", {"projectGoldId": id}) + return results + @click.group() @click.option("-v", "--verbose", count=True) From 7b3c3a1cdf1437ccc132c45d8fb63dd28afac508 Mon Sep 17 00:00:00 2001 From: Sujay Patil Date: Thu, 14 Jul 2022 17:50:38 -0700 Subject: [PATCH 2/4] add test cases for fetching by project id --- tests/test_gold.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_gold.py b/tests/test_gold.py index b7b8e6a..941a32c 100644 --- a/tests/test_gold.py +++ b/tests/test_gold.py @@ -21,6 +21,7 @@ 'Gb0011929', 'Gb0051032' ## sample with no study ] +TEST_PROJECT_ID = 'Gp0503317' #logging.basicConfig(level=logging.DEBUG) @@ -88,3 +89,25 @@ def test_get_biosamples(self): else: print(f'Skipping sample tests') print(f'To enable these, add your apikey to {KEYPATH}') + + def test_fetches_by_project(self): + """Tests for all methods in the library that seek to fetch + biosample and study information from gold database based on + supplied project ids. + """ + gc = GoldClient() + gc.clear_cache() + + if os.path.exists(KEYPATH): + gc.load_key(KEYPATH) + + expected_biosample_id = 'Gb0258249' + actual_biosample_id = gc.fetch_biosample_by_project(TEST_PROJECT_ID) + + self.assertEqual(expected_biosample_id, actual_biosample_id) + + expected_study_id = 'Gs0149396' + actual_study_id = gc.fetch_study_by_project(TEST_PROJECT_ID) + + self.assertEqual(expected_study_id, actual_study_id) + \ No newline at end of file From 7461475e4003b588b0663969e1df180a4b62150d Mon Sep 17 00:00:00 2001 From: Sujay Patil Date: Thu, 14 Jul 2022 17:59:04 -0700 Subject: [PATCH 3/4] add methods to filter by analysis ids --- sample_annotator/clients/gold_client.py | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sample_annotator/clients/gold_client.py b/sample_annotator/clients/gold_client.py index 702e550..33448ef 100644 --- a/sample_annotator/clients/gold_client.py +++ b/sample_annotator/clients/gold_client.py @@ -272,6 +272,36 @@ def fetch_study_by_project(self, id: str) -> List[SampleDict]: id = self._normalize_id(id) results = self._call("studies", {"projectGoldId": id}) return results + + def fetch_study_by_analysis_id(self, id: str) -> List[SampleDict]: + """Fetch the study id for which the informatics processing + of a sequencing project was performed. + :param id: GOLD Analysis id. Ex.: Ga0466468 + :return: List of SampleDict objects + """ + id = self._normalize_id(id) + results = self._call("studies", {"analysisGoldId": id}) + return results + + def fetch_biosample_by_analysis_id(self, id: str) -> List[SampleDict]: + """Fetch the biosample id for which the informatics processing + of a sequencing project was performed. + :param id: GOLD Analysis id. Ex.: Ga0466468 + :return: List of SampleDict objects + """ + id = self._normalize_id(id) + results = self._call("biosamples", {"analysisGoldId": id}) + return results + + def fetch_project_by_analysis_id(self, id: str) -> List[SampleDict]: + """Fetch the project id for which the informatics processing + of a sequencing project was performed. + :param id: GOLD Analysis id. Ex.: Ga0466468 + :return: List of SampleDict objects + """ + id = self._normalize_id(id) + results = self._call("projects", {"analysisGoldId": id}) + return results @click.group() From 90ea2a8abb498dc18082342caa096905f1fbc64d Mon Sep 17 00:00:00 2001 From: Sujay Patil Date: Fri, 15 Jul 2022 15:42:14 -0700 Subject: [PATCH 4/4] modify fetch by project and analysis id methods and tests --- sample_annotator/clients/gold_client.py | 28 +++++++++++++++++-------- tests/test_gold.py | 27 ++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/sample_annotator/clients/gold_client.py b/sample_annotator/clients/gold_client.py index 33448ef..952b485 100644 --- a/sample_annotator/clients/gold_client.py +++ b/sample_annotator/clients/gold_client.py @@ -251,7 +251,7 @@ def fetch_studies_from_file(self, path: str, **kwargs) -> List[StudyDict]: ids.append(line.strip()) return self.fetch_studies(ids, **kwargs) - def fetch_biosamples_by_project(self, id: str) -> List[SampleDict]: + def fetch_biosample_by_project(self, id: str) -> List[SampleDict]: """Fetch the biosample from which the sequencing project was generated. @@ -260,7 +260,9 @@ def fetch_biosamples_by_project(self, id: str) -> List[SampleDict]: """ id = self._normalize_id(id) results = self._call("biosamples", {"projectGoldId": id}) - return results + + biosample_id = results[0]["biosampleGoldId"] + return biosample_id def fetch_study_by_project(self, id: str) -> List[SampleDict]: """Fetch the study for which the sequencing project @@ -271,7 +273,9 @@ def fetch_study_by_project(self, id: str) -> List[SampleDict]: """ id = self._normalize_id(id) results = self._call("studies", {"projectGoldId": id}) - return results + + study_id = results[0]["studyGoldId"] + return study_id def fetch_study_by_analysis_id(self, id: str) -> List[SampleDict]: """Fetch the study id for which the informatics processing @@ -280,8 +284,10 @@ def fetch_study_by_analysis_id(self, id: str) -> List[SampleDict]: :return: List of SampleDict objects """ id = self._normalize_id(id) - results = self._call("studies", {"analysisGoldId": id}) - return results + results = self._call("studies", {"apGoldId": id}) + + study_id = results[0]["studyGoldId"] + return study_id def fetch_biosample_by_analysis_id(self, id: str) -> List[SampleDict]: """Fetch the biosample id for which the informatics processing @@ -290,8 +296,10 @@ def fetch_biosample_by_analysis_id(self, id: str) -> List[SampleDict]: :return: List of SampleDict objects """ id = self._normalize_id(id) - results = self._call("biosamples", {"analysisGoldId": id}) - return results + results = self._call("biosamples", {"apGoldId": id}) + + biosample_id = results[0]["biosampleGoldId"] + return biosample_id def fetch_project_by_analysis_id(self, id: str) -> List[SampleDict]: """Fetch the project id for which the informatics processing @@ -300,8 +308,10 @@ def fetch_project_by_analysis_id(self, id: str) -> List[SampleDict]: :return: List of SampleDict objects """ id = self._normalize_id(id) - results = self._call("projects", {"analysisGoldId": id}) - return results + results = self._call("projects", {"apGoldId": id}) + + project_id = results[0]["projectGoldId"] + return project_id @click.group() diff --git a/tests/test_gold.py b/tests/test_gold.py index 941a32c..a5ccb12 100644 --- a/tests/test_gold.py +++ b/tests/test_gold.py @@ -22,6 +22,7 @@ 'Gb0051032' ## sample with no study ] TEST_PROJECT_ID = 'Gp0503317' +TEST_ANALYSIS_ID = 'Ga0451502' #logging.basicConfig(level=logging.DEBUG) @@ -110,4 +111,30 @@ def test_fetches_by_project(self): actual_study_id = gc.fetch_study_by_project(TEST_PROJECT_ID) self.assertEqual(expected_study_id, actual_study_id) + + def test_fetches_by_analysis(self): + """Tests for all methods in the library that seek to fetch + biosample, study and project information from gold database based + on supplied project analysis ids. + """ + gc = GoldClient() + gc.clear_cache() + + if os.path.exists(KEYPATH): + gc.load_key(KEYPATH) + + expected_biosample_id = 'Gb0258249' + actual_biosample_id = gc.fetch_biosample_by_analysis_id(TEST_ANALYSIS_ID) + + self.assertEqual(expected_biosample_id, actual_biosample_id) + + expected_study_id = 'Gs0149396' + actual_study_id = gc.fetch_study_by_analysis_id(TEST_ANALYSIS_ID) + + self.assertEqual(expected_study_id, actual_study_id) + + expected_project_id = TEST_PROJECT_ID + actual_project_id = gc.fetch_project_by_analysis_id(TEST_ANALYSIS_ID) + + self.assertEqual(expected_project_id, actual_project_id) \ No newline at end of file