From b09d8e803dfe6ffd7c9c1bc594ba36f1c0805977 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 00:23:03 +0800
Subject: [PATCH 01/38] regenerate ontology
---
ftx/medical/clinical_ontology.py | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/ftx/medical/clinical_ontology.py b/ftx/medical/clinical_ontology.py
index 52019a75..6f782423 100644
--- a/ftx/medical/clinical_ontology.py
+++ b/ftx/medical/clinical_ontology.py
@@ -47,6 +47,8 @@
"MedicalArticle",
"Abbreviation",
"Hyponym",
+ "Disease",
+ "Chemical"
]
@@ -492,3 +494,23 @@ class Hyponym(Link):
def __init__(self, pack: DataPack, parent: Optional[Entry] = None, child: Optional[Entry] = None):
super().__init__(pack, parent, child)
self.hyponym_link: Optional[str] = None
+
+
+@dataclass
+class Disease(Annotation):
+ """
+ A span based annotation `Disease`, used to represent the diseases in a piece of clinical text.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
+
+
+@dataclass
+class Chemical(Annotation):
+ """
+ A span based annotation `Chemical`, used to represent the chemical in a piece of clinical text.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
From 4e5893aefc227b3c79c905c30beee39a0b388447 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 00:24:10 +0800
Subject: [PATCH 02/38] Add Disease and Chemical in ontology
---
.../ontology_specs/clinical_ontology.json | 828 +++++++++---------
1 file changed, 419 insertions(+), 409 deletions(-)
diff --git a/fortex/health/ontology_specs/clinical_ontology.json b/fortex/health/ontology_specs/clinical_ontology.json
index a9269abd..ed8df678 100644
--- a/fortex/health/ontology_specs/clinical_ontology.json
+++ b/fortex/health/ontology_specs/clinical_ontology.json
@@ -1,407 +1,407 @@
-{
- "name": "clinical_ontology",
- "imports": [
- "base_ontology.json"
- ],
- "additional_prefixes": [
- "ftx.medical.clinical_ontology"
- ],
- "definitions": [
- {
- "entry_name": "ftx.medical.clinical_ontology.ClinicalEntityMention",
- "parent_entry": "ft.onto.base_ontology.EntityMention",
- "description": "A span based annotation `ClinicalEntityMention`, normally used to represent an Entity Mention in a piece of clinical text."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.Description",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "A span based annotation `Description`, used to represent the description in a piece of clinical note."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.Body",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "A span based annotation `Body`, used to represent the actual content in a piece of clinical note."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.FrequencyAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The frequency determination for the Drug NER profile."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.DurationAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The duration determination for the Drug NER profile."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.RouteAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The route determination for the Drug NER profile.",
- "attributes": [
- {
- "name": "in_take_method",
- "type": "str"
- }
- ]
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.SuffixStrengthAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The suffix portion of dosage strength determination for the Drug NER profile."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.FractionStrengthAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The fraction portion of dosages strength determination for the Drug NER profile."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.RangeStrengthAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The range portion of dosages stength determination for the Drug NER profile."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.DecimalStrengthAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The decimal portion of dosages stength determination for the Drug NER profile"
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.DrugChangeStatusAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The change status of dosages determination for the Drug NER profile.",
- "attributes": [
- {
- "name": "change_status",
- "type": "str",
- "description": "Indicates the drug change status of 'stop', 'start', 'increase', 'decrease', or 'noChange'."
- }
- ]
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.DosagesAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The dosage determination for the Drug NER profile."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.StrengthAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "Holds the value representing the unit of the drug dosage."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.StrengthUnitAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": ""
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.FrequencyUnitAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The value represents the unit portion of the drug frequency.",
- "attributes": [
- {
- "name": "period",
- "type": "float",
- "description": "The periodic unit used, e.g day, month, hour, etc."
- }
- ]
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.FormAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The value represents the form portion of the drug mention."
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.SubSectionAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "",
- "attributes": [
- {
- "name": "sub_ssection_body_begin",
- "type": "int",
- "description": "Sub-section body begin offset."
- },
- {
- "name": "sub_section_body_end",
- "type": "int",
- "description": "Sub-section body end offset."
- },
- {
- "name": "status",
- "type": "int",
- "description": "Status of 'possible', 'history of', or 'family history of'."
- },
- {
- "name": "sub_section_header_begin",
- "type": "int",
- "description": "Begin offset of subSection header"
- },
- {
- "name": "sub_section_header_end",
- "type": "int",
- "description": "Ending offset of subsection header"
- },
- {
- "name": "parent_section_id",
- "type": "str",
- "description": "The section in which the subsection was found."
- }
- ]
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.DrugMentionAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "",
- "attributes": [
- {
- "name": "status",
- "type": "int",
- "description": ""
- },
- {
- "name": "confidence",
- "type": "float",
- "description": "The confidence of the annotation."
- },
- {
- "name": "frequency",
- "type": "str",
- "description": "Frequency refers to how often the patient needs to take the drug. Frequency is divided into frequency number and frequency unit. E.g. twice daily"
- },
- {
- "name": "frequency_begin",
- "type": "int",
- "description": ""
- },
- {
- "name": "frequency_end",
- "type": "int",
- "description": ""
- },
- {
- "name": "duration",
- "type": "str",
- "description": "Duration refers to for how long the patient is expected to take the drug. E.g. 'for 2 weeks' Strongly encouraged to use bold text"
- },
- {
- "name": "duration_begin",
- "type": "int",
- "description": ""
- },
- {
- "name": "duration_end",
- "type": "int",
- "description": ""
- },
- {
- "name": "route",
- "type": "str",
- "description": "Medication route refers to the way that a drug is introduced into the body. E.g oral Strongly encouraged to use bold text"
- },
- {
- "name": "route_begin",
- "type": "int",
- "description": ""
- },
- {
- "name": "route_end",
- "type": "int",
- "description": ""
- },
- {
- "name": "drug_change_status",
- "type": "str",
- "description": "Status refers to the whether the medication is currently being taken or not."
- },
- {
- "name": "dosage",
- "type": "str",
- "description": "Dosage refers to how many of each drug the patient is taking. E.g. 5 mg"
- },
- {
- "name": "dosage_begin",
- "type": "int",
- "description": ""
- },
- {
- "name": "dosage_end",
- "type": "int",
- "description": ""
- },
- {
- "name": "strength",
- "type": "str",
- "description": ""
- },
- {
- "name": "strength_begin",
- "type": "int",
- "description": ""
- },
- {
- "name": "strength_end",
- "type": "int",
- "description": ""
- },
- {
- "name": "strength_unit",
- "type": "str",
- "description": ""
- },
- {
- "name": "su_begin",
- "type": "int",
- "description": ""
- },
- {
- "name": "su_end",
- "type": "int",
- "description": ""
- },
- {
- "name": "form",
- "type": "str",
- "description": "Form refers to the physical appearance of the drug. E.g. cream"
- },
- {
- "name": "form_begin",
- "type": "int",
- "description": ""
- },
- {
- "name": "form_end",
- "type": "int",
- "description": ""
- },
- {
- "name": "frequency_unit",
- "type": "str",
- "description": ""
- },
- {
- "name": "fu_begin",
- "type": "int",
- "description": ""
- },
- {
- "name": "fu_end",
- "type": "int",
- "description": ""
- },
- {
- "name": "start_date",
- "type": "str",
- "description": ""
- },
- {
- "name": "reason",
- "type": "Dict",
- "key_type": "str",
- "value_type": "int"
- },
- {
- "name": "change_status_begin",
- "type": "int",
- "description": ""
- },
- {
- "name": "change_status_end",
- "type": "int",
- "description": ""
- }
- ]
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.ChunkAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "The value represents the unit portion of the drug frequency.",
- "attributes": [
- {
- "name": "sentence_id",
- "type": "str",
- "description": ""
- }
- ]
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.DrugLookupWindowAnnotation",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "Similar to LookupWindowAnnotation however, these annotations are restricted to the segments/sections specified in the parameter - sectionOverrideSet - in DrugCNP2LookupWindow"
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.NegationContext",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "A span based annotation `NegationContext`, used to represent the negation context of a named entity.",
- "attributes": [
- {
- "name": "polarity",
- "type": "bool"
- }
- ]
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.UMLSConceptLink",
- "parent_entry": "forte.data.ontology.top.Generics",
- "description": "A umls concept entity, used to represent basic information of a umls concept",
- "attributes": [
- {
- "name": "cui",
- "type": "str"
- },
- {
- "name": "name",
- "type": "str"
- },
- {
- "name": "definition",
- "type": "str"
- },
- {
- "name": "tuis",
- "type": "List",
- "item_type": "str"
- },
- {
- "name": "aliases",
- "type": "List",
- "item_type": "str"
- },
- {
- "name": "score",
- "type": "str"
- }
- ]
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.MedicalEntityMention",
- "parent_entry": "ft.onto.base_ontology.EntityMention",
- "description": "A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain",
- "attributes": [
- {
- "name": "umls_link",
- "type": "str"
- },
- {
- "name": "umls_entities",
- "type": "List",
- "item_type": "ftx.medical.clinical_ontology.UMLSConceptLink"
- }
- ]
- },
- {
- "entry_name": "ftx.medical.clinical_ontology.MedicalArticle",
- "parent_entry": "forte.data.ontology.top.Annotation",
- "description": "An annotation which represents the whole medical text chunk/document",
- "attributes": [
- {
- "name": "icd_version",
- "type": "int",
- "description": "The version of ICD-Coding being used."
- },
- {
- "name": "icd_code",
- "type": "str",
- "description": "The ICD code assigned to current medical article."
- }
- ]
- },
- {
+{
+ "name": "clinical_ontology",
+ "imports": [
+ "base_ontology.json"
+ ],
+ "additional_prefixes": [
+ "ftx.medical.clinical_ontology"
+ ],
+ "definitions": [
+ {
+ "entry_name": "ftx.medical.clinical_ontology.ClinicalEntityMention",
+ "parent_entry": "ft.onto.base_ontology.EntityMention",
+ "description": "A span based annotation `ClinicalEntityMention`, normally used to represent an Entity Mention in a piece of clinical text."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.Description",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Description`, used to represent the description in a piece of clinical note."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.Body",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Body`, used to represent the actual content in a piece of clinical note."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.FrequencyAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The frequency determination for the Drug NER profile."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.DurationAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The duration determination for the Drug NER profile."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.RouteAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The route determination for the Drug NER profile.",
+ "attributes": [
+ {
+ "name": "in_take_method",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.SuffixStrengthAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The suffix portion of dosage strength determination for the Drug NER profile."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.FractionStrengthAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The fraction portion of dosages strength determination for the Drug NER profile."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.RangeStrengthAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The range portion of dosages stength determination for the Drug NER profile."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.DecimalStrengthAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The decimal portion of dosages stength determination for the Drug NER profile"
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.DrugChangeStatusAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The change status of dosages determination for the Drug NER profile.",
+ "attributes": [
+ {
+ "name": "change_status",
+ "type": "str",
+ "description": "Indicates the drug change status of 'stop', 'start', 'increase', 'decrease', or 'noChange'."
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.DosagesAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The dosage determination for the Drug NER profile."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.StrengthAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "Holds the value representing the unit of the drug dosage."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.StrengthUnitAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": ""
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.FrequencyUnitAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The value represents the unit portion of the drug frequency.",
+ "attributes": [
+ {
+ "name": "period",
+ "type": "float",
+ "description": "The periodic unit used, e.g day, month, hour, etc."
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.FormAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The value represents the form portion of the drug mention."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.SubSectionAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "",
+ "attributes": [
+ {
+ "name": "sub_ssection_body_begin",
+ "type": "int",
+ "description": "Sub-section body begin offset."
+ },
+ {
+ "name": "sub_section_body_end",
+ "type": "int",
+ "description": "Sub-section body end offset."
+ },
+ {
+ "name": "status",
+ "type": "int",
+ "description": "Status of 'possible', 'history of', or 'family history of'."
+ },
+ {
+ "name": "sub_section_header_begin",
+ "type": "int",
+ "description": "Begin offset of subSection header"
+ },
+ {
+ "name": "sub_section_header_end",
+ "type": "int",
+ "description": "Ending offset of subsection header"
+ },
+ {
+ "name": "parent_section_id",
+ "type": "str",
+ "description": "The section in which the subsection was found."
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.DrugMentionAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "",
+ "attributes": [
+ {
+ "name": "status",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "confidence",
+ "type": "float",
+ "description": "The confidence of the annotation."
+ },
+ {
+ "name": "frequency",
+ "type": "str",
+ "description": "Frequency refers to how often the patient needs to take the drug. Frequency is divided into frequency number and frequency unit. E.g. twice daily"
+ },
+ {
+ "name": "frequency_begin",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "frequency_end",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "duration",
+ "type": "str",
+ "description": "Duration refers to for how long the patient is expected to take the drug. E.g. 'for 2 weeks' Strongly encouraged to use bold text"
+ },
+ {
+ "name": "duration_begin",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "duration_end",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "route",
+ "type": "str",
+ "description": "Medication route refers to the way that a drug is introduced into the body. E.g oral Strongly encouraged to use bold text"
+ },
+ {
+ "name": "route_begin",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "route_end",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "drug_change_status",
+ "type": "str",
+ "description": "Status refers to the whether the medication is currently being taken or not."
+ },
+ {
+ "name": "dosage",
+ "type": "str",
+ "description": "Dosage refers to how many of each drug the patient is taking. E.g. 5 mg"
+ },
+ {
+ "name": "dosage_begin",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "dosage_end",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "strength",
+ "type": "str",
+ "description": ""
+ },
+ {
+ "name": "strength_begin",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "strength_end",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "strength_unit",
+ "type": "str",
+ "description": ""
+ },
+ {
+ "name": "su_begin",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "su_end",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "form",
+ "type": "str",
+ "description": "Form refers to the physical appearance of the drug. E.g. cream"
+ },
+ {
+ "name": "form_begin",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "form_end",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "frequency_unit",
+ "type": "str",
+ "description": ""
+ },
+ {
+ "name": "fu_begin",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "fu_end",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "start_date",
+ "type": "str",
+ "description": ""
+ },
+ {
+ "name": "reason",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "int"
+ },
+ {
+ "name": "change_status_begin",
+ "type": "int",
+ "description": ""
+ },
+ {
+ "name": "change_status_end",
+ "type": "int",
+ "description": ""
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.ChunkAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "The value represents the unit portion of the drug frequency.",
+ "attributes": [
+ {
+ "name": "sentence_id",
+ "type": "str",
+ "description": ""
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.DrugLookupWindowAnnotation",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "Similar to LookupWindowAnnotation however, these annotations are restricted to the segments/sections specified in the parameter - sectionOverrideSet - in DrugCNP2LookupWindow"
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.NegationContext",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `NegationContext`, used to represent the negation context of a named entity.",
+ "attributes": [
+ {
+ "name": "polarity",
+ "type": "bool"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.UMLSConceptLink",
+ "parent_entry": "forte.data.ontology.top.Generics",
+ "description": "A umls concept entity, used to represent basic information of a umls concept",
+ "attributes": [
+ {
+ "name": "cui",
+ "type": "str"
+ },
+ {
+ "name": "name",
+ "type": "str"
+ },
+ {
+ "name": "definition",
+ "type": "str"
+ },
+ {
+ "name": "tuis",
+ "type": "List",
+ "item_type": "str"
+ },
+ {
+ "name": "aliases",
+ "type": "List",
+ "item_type": "str"
+ },
+ {
+ "name": "score",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.MedicalEntityMention",
+ "parent_entry": "ft.onto.base_ontology.EntityMention",
+ "description": "A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain",
+ "attributes": [
+ {
+ "name": "umls_link",
+ "type": "str"
+ },
+ {
+ "name": "umls_entities",
+ "type": "List",
+ "item_type": "ftx.medical.clinical_ontology.UMLSConceptLink"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.MedicalArticle",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "An annotation which represents the whole medical text chunk/document",
+ "attributes": [
+ {
+ "name": "icd_version",
+ "type": "int",
+ "description": "The version of ICD-Coding being used."
+ },
+ {
+ "name": "icd_code",
+ "type": "str",
+ "description": "The ICD code assigned to current medical article."
+ }
+ ]
+ },
+ {
"entry_name": "ftx.medical.clinical_ontology.Abbreviation",
"parent_entry": "forte.data.ontology.top.Annotation",
"description": "A span based annotation `Abbreviation`, used to represent an abbreviated token..",
@@ -411,8 +411,8 @@
"type": "str"
}
]
- },
- {
+ },
+ {
"entry_name": "ftx.medical.clinical_ontology.Hyponym",
"parent_entry": "forte.data.ontology.top.Link",
"description": "A `Link` type entry which represent a hyponym pair.",
@@ -425,6 +425,16 @@
],
"parent_type": "ft.onto.base_ontology.Phrase",
"child_type": "ft.onto.base_ontology.Phrase"
- }
- ]
- }
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.Disease",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Diesease`, used to represent the diseases in a piece of clinical text."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.Chemical",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Chemical`, used to represent the chemicals in a piece of clinical text."
+ }
+ ]
+}
\ No newline at end of file
From 266d64846aa9c348aaaa6accfd46d53f2bf1b109 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 00:52:58 +0800
Subject: [PATCH 03/38] Add NER Label Processor
---
.../health/processors/ner_label_processor.py | 109 ++++++++++++++++++
1 file changed, 109 insertions(+)
create mode 100644 fortex/health/processors/ner_label_processor.py
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
new file mode 100644
index 00000000..92dbfcd5
--- /dev/null
+++ b/fortex/health/processors/ner_label_processor.py
@@ -0,0 +1,109 @@
+# Copyright 2022 The Forte Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+NER Labeling Processor
+"""
+
+import spacy
+from typing import Dict, Set
+from forte.data.data_pack import DataPack
+from forte.processors.base import PackProcessor
+from forte.common.configuration import Config
+from forte.common.resources import Resources
+from forte.common import ProcessExecutionException
+
+
+from ftx.medical.clinical_ontology import Disease, Chemical
+
+
+__all__ = [
+ "NERLabelProcessor",
+]
+
+
+class NERLabelProcessor(PackProcessor):
+ r"""
+ Implementation of this NERLabelProcessor has been based on spaCy
+ pretained model. A rendition of it that exists on github has
+ been referred to as well.
+
+ Referred repository link:
+ https://github.com/explosion/spaCy
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.nlp = None
+
+ def initialize(self, resources: Resources, configs: Config):
+ super().initialize(resources, configs)
+ self.nlp = spacy.load("en_ner_bc5cdr_md")
+
+ def _process(self, input_pack: DataPack):
+ r"""
+ NER Label processing is based on spaCy.
+ """
+ labels = self.configs.labels
+
+ doc = input_pack.text
+
+ # Do all process.
+ if self.nlp is None:
+ raise ProcessExecutionException(
+ "The SpaCy pipeline is not initialized, maybe you "
+ "haven't called the initialization function."
+ )
+ result = self.nlp(doc)
+
+ if "disease" in labels:
+ for ent in result.ents:
+ if ent.label_ == "DISEASE":
+ Disease(
+ pack=input_pack,
+ begin=ent.start_char,
+ end=ent.end_char
+ )
+
+ if "chemical" in labels:
+ for ent in result.ents:
+ if ent.label_ == "CHEMICAL":
+ Chemical(
+ pack=input_pack,
+ begin=ent.start_char,
+ end=ent.end_char
+ )
+
+ @classmethod
+ def default_configs(cls):
+ r"""
+ This defines a basic config structure for `ICDCodingProcessor`.
+
+ Following are the keys for this dictionary:
+ - `labels`: ner labels
+
+ Returns: A dictionary with the default config for this processor.
+ """
+ return {
+ "labels":["disease","chemical"]
+ }
+
+ def record(self, record_meta: Dict[str, Set[str]]):
+ r"""
+
+ Args:
+ record_meta: the field in the datapack for type record that need to
+ fill in for consistency checking.
+ """
+ record_meta["ft.onto.base_ontology.Disease"] = set()
+ record_meta["ft.onto.base_ontology.Chemical"] = set()
From 3c9865e74201f2b60cc7fb0544f95eb5e0e93556 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:01:37 +0800
Subject: [PATCH 04/38] Add mimic iii reader
---
examples/label_example/mimic3_note_reader.py | 80 ++++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 examples/label_example/mimic3_note_reader.py
diff --git a/examples/label_example/mimic3_note_reader.py b/examples/label_example/mimic3_note_reader.py
new file mode 100644
index 00000000..b3f02de6
--- /dev/null
+++ b/examples/label_example/mimic3_note_reader.py
@@ -0,0 +1,80 @@
+# Copyright 2021 The Forte Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+import logging
+from pathlib import Path
+from typing import Any, Iterator, Union, List
+
+from smart_open import open
+
+from demo.clinical import Description, Body
+from forte.data.data_pack import DataPack
+from forte.data.base_reader import PackReader
+from ft.onto.base_ontology import Document
+
+
+class Mimic3DischargeNoteReader(PackReader):
+ """This class is designed to read the discharge notes from MIMIC3 dataset
+ as plain text packs.
+
+ For more information for the dataset, visit:
+ https://mimic.physionet.org/
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.headers: List[str] = []
+ self.text_col = -1 # Default to be last column.
+ self.description_col = 0 # Default to be first column.
+ self.__note_count = 0 # Count number of notes processed.
+
+ def _collect( # type: ignore
+ self, mimic3_path: Union[Path, str]
+ ) -> Iterator[Any]:
+ with open(mimic3_path) as f:
+ for r in csv.reader(f):
+ if 0 < self.configs.max_num_notes <= self.__note_count:
+ break
+ yield r
+
+ def _parse_pack(self, row: List[str]) -> Iterator[DataPack]:
+ if len(self.headers) == 0:
+ self.headers.extend(row)
+ for i, h in enumerate(self.headers):
+ if h == "TEXT":
+ self.text_col = i
+ logging.info("Text Column is %d", i)
+ if h == "DESCRIPTION":
+ self.description_col = i
+ logging.info("Description Column is %d", i)
+ else:
+ pack: DataPack = DataPack()
+ description: str = row[self.description_col]
+ text: str = row[self.text_col]
+ delimiter = "\n-----------------\n"
+ full_text = description + delimiter + text
+ pack.set_text(full_text)
+
+ Description(pack, 0, len(description))
+ Body(pack, len(description) + len(delimiter), len(full_text))
+ Document(pack, 0, len(pack.text))
+ self.__note_count += 1
+ yield pack
+
+ @classmethod
+ def default_configs(cls):
+ # If this is set (>0), the reader will only read up to
+ # the number specified.
+ return {'max_num_notes':-1}
From 745d3544a93b6a50c899428bb507b4a13e016a8c Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:02:07 +0800
Subject: [PATCH 05/38] Add demo
---
examples/label_example/demo/__init__.py | 1 +
examples/label_example/demo/clinical.py | 49 +++++++++++++++++++++++++
2 files changed, 50 insertions(+)
create mode 100644 examples/label_example/demo/__init__.py
create mode 100644 examples/label_example/demo/clinical.py
diff --git a/examples/label_example/demo/__init__.py b/examples/label_example/demo/__init__.py
new file mode 100644
index 00000000..49ecbbf8
--- /dev/null
+++ b/examples/label_example/demo/__init__.py
@@ -0,0 +1 @@
+# ***automatically_generated***
diff --git a/examples/label_example/demo/clinical.py b/examples/label_example/demo/clinical.py
new file mode 100644
index 00000000..68541b46
--- /dev/null
+++ b/examples/label_example/demo/clinical.py
@@ -0,0 +1,49 @@
+# ***automatically_generated***
+# ***source json:examples/clinical_pipeline/clinical_onto.json***
+# flake8: noqa
+# mypy: ignore-errors
+# pylint: skip-file
+"""
+Automatically generated ontology clinical. Do not change manually.
+"""
+
+from dataclasses import dataclass
+from forte.data.data_pack import DataPack
+from forte.data.ontology.top import Annotation
+from ft.onto.base_ontology import EntityMention
+
+__all__ = [
+ "ClinicalEntityMention",
+ "Description",
+ "Body",
+]
+
+
+@dataclass
+class ClinicalEntityMention(EntityMention):
+ """
+ A span based annotation `ClinicalEntityMention`, normally used to represent an Entity Mention in a piece of clinical text.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
+
+
+@dataclass
+class Description(Annotation):
+ """
+ A span based annotation `Description`, used to represent the description in a piece of clinical note.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
+
+
+@dataclass
+class Body(Annotation):
+ """
+ A span based annotation `Body`, used to represent the actual content in a piece of clinical note.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
From c18184ee74999595c3f7e18dd27cfc30b8cfa66b Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:23:09 +0800
Subject: [PATCH 06/38] design clinical pipeline
---
examples/label_example/clinical_pipeline.py | 63 +++++++++++++++++++++
1 file changed, 63 insertions(+)
create mode 100644 examples/label_example/clinical_pipeline.py
diff --git a/examples/label_example/clinical_pipeline.py b/examples/label_example/clinical_pipeline.py
new file mode 100644
index 00000000..e4644c45
--- /dev/null
+++ b/examples/label_example/clinical_pipeline.py
@@ -0,0 +1,63 @@
+import sys
+import time
+
+sys.path.insert(0,"E:\\NLP\\Forte\\ForteHealthBranches\\53\\ForteHealth")
+print(sys.path)
+
+from forte.data.data_pack import DataPack
+from forte.data.readers import PlainTextReader
+from forte.pipeline import Pipeline
+from forte.processors.writers import PackIdJsonPackWriter
+from fortex.elastic import ElasticSearchPackIndexProcessor
+from fortex.health.processors.ner_label_processor import NERLabelProcessor
+# from ner_label_processor import NERLabelProcessor
+
+from mimic3_note_reader import Mimic3DischargeNoteReader
+
+# from stave_backend.lib.stave_session import StaveSession
+
+
+def main(
+ input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1
+ ):
+
+ pl = Pipeline[DataPack]()
+ if use_mimiciii_reader == 1:
+ pl.set_reader(
+ Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
+ )
+ else:
+ pl.set_reader(PlainTextReader())
+
+ config_for_ner = {
+ "labels": ["disease", "chemical"]
+ }
+ pl.add(NERLabelProcessor(), config=config_for_ner)
+
+ pl.add(
+ ElasticSearchPackIndexProcessor(),
+ {
+ "indexer": {
+ "other_kwargs": {"refresh": True},
+ }
+ },
+ )
+ pl.add(
+ PackIdJsonPackWriter(),
+ {
+ "output_dir": output_path,
+ "indent": 2,
+ "overwrite": True,
+ "drop_record": True,
+ "zip_pack": False,
+ },
+ )
+
+ pl.initialize()
+
+ for idx, pack in enumerate(pl.process_dataset(input_path)):
+ if (idx + 1) % 50 == 0:
+ print(f"{time.strftime('%m-%d %H:%M')}: Processed {idx + 1} packs")
+
+
+main(sys.argv[1], sys.argv[2], int(sys.argv[3]), int(sys.argv[4]))
From 451c4882342576d54e4a696901ccca6cc85a1f58 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:26:23 +0800
Subject: [PATCH 07/38] Search engine and related utils
---
examples/label_example/search_engine.py | 93 +++++++++++++++++++++++++
examples/label_example/search_utils.py | 57 +++++++++++++++
examples/label_example/sqlite_utils.py | 79 +++++++++++++++++++++
examples/label_example/templates.py | 22 ++++++
4 files changed, 251 insertions(+)
create mode 100644 examples/label_example/search_engine.py
create mode 100644 examples/label_example/search_utils.py
create mode 100644 examples/label_example/sqlite_utils.py
create mode 100644 examples/label_example/templates.py
diff --git a/examples/label_example/search_engine.py b/examples/label_example/search_engine.py
new file mode 100644
index 00000000..03d58640
--- /dev/null
+++ b/examples/label_example/search_engine.py
@@ -0,0 +1,93 @@
+import sqlite3
+from typing import List
+import streamlit as st
+from forte.common.configuration import Config
+import yaml
+from elasticsearch import Elasticsearch
+from search_utils import all_search, index_search
+from sqlite_utils import create_links, sqlite_insert, get_json, update_stave_db
+import templates
+
+
+st.set_page_config(page_title="ForteHealth_Search_Engine", layout="wide")
+
+es = Elasticsearch(hosts=["http://localhost:9200/"])
+INDEX = "elastic_indexer"
+
+config = yaml.safe_load(open("stave_config.yml", "r"))
+config = Config(config, default_hparams=None)
+
+default_project_json = get_json("default_onto_project.json")
+
+base_project_id = update_stave_db(
+ default_project_json, config
+ )
+
+st.title("Search the MIMIC III Data...")
+search = st.text_input("Enter search words:")
+
+if not search:
+ records = {}
+ results = all_search(es, INDEX)
+ hits = results["hits"]["hits"]
+
+ conn = sqlite3.connect(config.Stave.stave_db_path)
+ answers = []
+ for idx, hit in enumerate(hits):
+ source = hit["_source"]
+ # The raw pack string and pack id (not database id)
+ raw_pack_str: str = source["pack_info"]
+ pack_id: str = source["doc_id"]
+
+ # Now you can write the pack into the database and generate url.
+ item = {
+ "name": f"clinical_results_{idx}",
+ "textPack": raw_pack_str,
+ "project_id": base_project_id,
+ }
+
+ db_id = sqlite_insert(conn, "stave_backend_document", item)
+ answers += [db_id]
+ print(pack_id, db_id)
+
+ links: List[str] = create_links(config.Stave.url, answers)
+
+ for link in links:
+ st.write(link, unsafe_allow_html=True)
+
+if search:
+ results = index_search(es, INDEX, search)
+ hits = results["hits"]["hits"]
+
+ conn = sqlite3.connect(config.Stave.stave_db_path)
+ answers = []
+ docs = []
+ for idx, hit in enumerate(hits):
+ source = hit["_source"]
+ # The raw pack string and pack id (not database id)
+ raw_pack_str: str = source["pack_info"]
+ pack_id: str = source["doc_id"]
+ highlight = "...".join(hit["highlight"]["content"])
+ # Now you can write the pack into the database and generate url.
+ item = {
+ "name": f"clinical_results_{idx}",
+ "textPack": raw_pack_str,
+ "project_id": base_project_id,
+ }
+
+ db_id = sqlite_insert(conn, "stave_backend_document", item)
+ answers += [db_id]
+
+ docs.append(highlight)
+
+ links: List[str] = create_links(config.Stave.url, answers)
+
+ for i in range(len(links)):
+ st.write(links[i], unsafe_allow_html=True)
+ st.write(
+ templates.search_result(
+ docs[i]
+ .replace("\n", " ")
+ ),
+ unsafe_allow_html=True,
+ )
diff --git a/examples/label_example/search_utils.py b/examples/label_example/search_utils.py
new file mode 100644
index 00000000..eb85da10
--- /dev/null
+++ b/examples/label_example/search_utils.py
@@ -0,0 +1,57 @@
+'''
+this file defines search functions for searching data in elasticsearch.
+'''
+
+
+def all_search(es, index: str) -> dict:
+ """
+ Args:
+ es: Elasticsearch client instance.
+ index: Name of the index we are going to use.
+ size: Number of results returned in each search.
+ """
+ # search query
+ body = {"query": {"match_all": {}}}
+
+ res = es.search(index=index, body=body)
+
+ return res
+
+
+def index_search(es, index: str, keywords: str) -> dict:
+ """
+ Args:
+ es: Elasticsearch client instance.
+ index: Name of the index we are going to use.
+ keywords: Search keywords.
+ from_i: Start index of the results for pagination.
+ size: Number of results returned in each search.
+ """
+ # search query
+ body = {
+ "query": {
+ "bool": {
+ "must": [
+ {
+ "query_string": {
+ "query": keywords,
+ "fields": ["content"],
+ "default_operator": "AND",
+ }
+ }
+ ],
+ }
+ },
+ "highlight": {
+ "pre_tags": [' '],
+ "post_tags": [""],
+ "fields": {"content": {}},
+ },
+ # "from": from_i,
+ # "size": size,
+ "aggs": {"match_count": {"value_count": {"field": "_id"}}},
+ }
+
+ res = es.search(index=index, body=body)
+
+ return res
diff --git a/examples/label_example/sqlite_utils.py b/examples/label_example/sqlite_utils.py
new file mode 100644
index 00000000..6cc7c036
--- /dev/null
+++ b/examples/label_example/sqlite_utils.py
@@ -0,0 +1,79 @@
+"""
+this file defines sqlite3 related utils for inserting data to the database of stave.
+"""
+import json
+from typing import List
+from stave_backend.lib.stave_session import StaveSession
+import sqlite3
+
+
+def sqlite_insert(conn, table, row):
+ """
+ Args:
+ conn: connection
+ table: table name
+ row: inserted item
+ """
+ cols: str = ", ".join('"{}"'.format(col) for col in row.keys())
+ vals: str = ", ".join(":{}".format(col) for col in row.keys())
+ sql: str = 'INSERT INTO "{0}" ({1}) VALUES ({2})'.format(table, cols, vals)
+ cursor = conn.cursor()
+ cursor.execute(sql, row)
+ conn.commit()
+ return cursor.lastrowid
+
+
+def create_links(url_stub: str, ids: List[int]) -> List[str]:
+ """
+ Args:
+ url_stub: url of stave
+ ids: the doc ids of the reports
+ """
+ links: List[str] = []
+
+ url_stub: str = url_stub.strip("/")
+ for temp_idm in ids:
+ links.append(
+ f"Report #{temp_idm}"
+ )
+ return links
+
+
+def get_json(path: str):
+ """
+ Args:
+ path: the file path of the json file
+ """
+ file_obj = open(path)
+ data = json.load(file_obj)
+ file_obj.close()
+ return data
+
+
+def update_stave_db(default_project_json, config):
+ """
+ Args:
+ default_project_json: the ontology configuration file
+ config: the configuration of Stave, including url, name, password, etc.
+ """
+ project_id_base = 0
+ with StaveSession(url=config.Stave.url) as session:
+ session.login(username=config.Stave.username, password=config.Stave.pw)
+
+ projects = session.get_project_list().json()
+ project_names = [project["name"] for project in projects]
+
+ if (
+ default_project_json["name"] in project_names
+ ):
+
+ base_project = [
+ proj
+ for proj in projects
+ if proj["name"] == default_project_json["name"]
+ ][0]
+ return base_project["id"]
+
+ resp1 = session.create_project(default_project_json)
+ project_id_base = json.loads(resp1.text)["id"]
+ return project_id_base
diff --git a/examples/label_example/templates.py b/examples/label_example/templates.py
new file mode 100644
index 00000000..63bf9aa2
--- /dev/null
+++ b/examples/label_example/templates.py
@@ -0,0 +1,22 @@
+"""
+This file defines some HTML templates
+"""
+
+
+def number_of_results(total_hits: int, duration: float) -> str:
+ """HTML scripts to display number of results and duration."""
+ return f"""
+
+ {total_hits} results ({duration:.2f} seconds)
+
+ """
+
+
+def search_result(highlights: str) -> str:
+ """HTML scripts to display search results."""
+ return f"""
+
+
+ {highlights}
+
+ """
From f201997991bf3a52b54d5048f9c88e965bff7790 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:26:45 +0800
Subject: [PATCH 08/38] add stave config
---
examples/label_example/stave_config.yml | 5 +++++
1 file changed, 5 insertions(+)
create mode 100644 examples/label_example/stave_config.yml
diff --git a/examples/label_example/stave_config.yml b/examples/label_example/stave_config.yml
new file mode 100644
index 00000000..f9ff6f02
--- /dev/null
+++ b/examples/label_example/stave_config.yml
@@ -0,0 +1,5 @@
+Stave:
+ stave_db_path: "C://Users//Leo//.stave//db.sqlite3"
+ url: "http://localhost:8899"
+ username: admin
+ pw: admin
From 4642690f12abcd78310cff20aca97eb3657bbea8 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:29:02 +0800
Subject: [PATCH 09/38] add stave project ontology file
---
.../label_example/default_onto_project.json | 751 ++++++++++++++++++
1 file changed, 751 insertions(+)
create mode 100644 examples/label_example/default_onto_project.json
diff --git a/examples/label_example/default_onto_project.json b/examples/label_example/default_onto_project.json
new file mode 100644
index 00000000..901ce4f1
--- /dev/null
+++ b/examples/label_example/default_onto_project.json
@@ -0,0 +1,751 @@
+{
+ "name": "clinical_pipeline_base",
+ "ontology": {
+ "name": "base_ontology",
+ "definitions": [
+ {
+ "entry_name": "ft.onto.base_ontology.Token",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation :class:`Token`, used to represent a token or a word.",
+ "attributes": [
+ {
+ "name": "pos",
+ "type": "str"
+ },
+ {
+ "name": "ud_xpos",
+ "type": "str",
+ "description": "Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html"
+ },
+ {
+ "name": "lemma",
+ "type": "str",
+ "description": "Lemma or stem of word form."
+ },
+ {
+ "name": "chunk",
+ "type": "str"
+ },
+ {
+ "name": "ner",
+ "type": "str"
+ },
+ {
+ "name": "sense",
+ "type": "str"
+ },
+ {
+ "name": "is_root",
+ "type": "bool"
+ },
+ {
+ "name": "ud_features",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "str"
+ },
+ {
+ "name": "ud_misc",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Subword",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "Used to represent subword tokenization results.",
+ "attributes": [
+ {
+ "name": "is_first_segment",
+ "type": "bool"
+ },
+ {
+ "name": "is_unk",
+ "type": "bool"
+ },
+ {
+ "name": "vocab_id",
+ "type": "int"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Classification",
+ "parent_entry": "forte.data.ontology.top.Generics",
+ "description": "Used to store values for classification prediction",
+ "attributes": [
+ {
+ "name": "classification_result",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Document",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Document`, normally used to represent a document.",
+ "attributes": [
+ {
+ "name": "document_class",
+ "type": "List",
+ "item_type": "str",
+ "description": "A list of class names that the document belongs to."
+ },
+ {
+ "name": "sentiment",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "classifications",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "ft.onto.base_ontology.Classification",
+ "description": "Stores the classification results for this document. The key is the name/task of the classification, the value is an classification object storing the results."
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Sentence",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Sentence`, normally used to represent a sentence.",
+ "attributes": [
+ {
+ "name": "speaker",
+ "type": "str"
+ },
+ {
+ "name": "part_id",
+ "type": "int"
+ },
+ {
+ "name": "sentiment",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "classification",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "classifications",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "ft.onto.base_ontology.Classification",
+ "description": "Stores the classification results for this sentence. The key is the name/task of the classification, the value is an classification object storing the results."
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Phrase",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Phrase`.",
+ "attributes": [
+ {
+ "name": "phrase_type",
+ "type": "str"
+ },
+ {
+ "name": "headword",
+ "type": "ft.onto.base_ontology.Token"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.UtteranceContext",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "`UtteranceContext` represents the context part in dialogue."
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Utterance",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Utterance`, normally used to represent an utterance in dialogue.",
+ "attributes": [
+ {
+ "name": "speaker",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.PredicateArgument",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.",
+ "attributes": [
+ {
+ "name": "ner_type",
+ "type": "str"
+ },
+ {
+ "name": "predicate_lemma",
+ "type": "str"
+ },
+ {
+ "name": "is_verb",
+ "type": "bool"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EntityMention",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.",
+ "attributes": [
+ {
+ "name": "ner_type",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EventMention",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `EventMention`, used to refer to a mention of an event.",
+ "attributes": [
+ {
+ "name": "event_type",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.PredicateMention",
+ "parent_entry": "ft.onto.base_ontology.Phrase",
+ "description": "A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.",
+ "attributes": [
+ {
+ "name": "predicate_lemma",
+ "type": "str"
+ },
+ {
+ "name": "framenet_id",
+ "type": "str"
+ },
+ {
+ "name": "is_verb",
+ "type": "bool"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.PredicateLink",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a semantic role link between a predicate and its argument.",
+ "attributes": [
+ {
+ "name": "arg_type",
+ "type": "str",
+ "description": "The predicate link type."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.PredicateMention",
+ "child_type": "ft.onto.base_ontology.PredicateArgument"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Dependency",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a syntactic dependency.",
+ "attributes": [
+ {
+ "name": "dep_label",
+ "type": "str",
+ "description": "The dependency label."
+ },
+ {
+ "name": "rel_type",
+ "type": "str"
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.Token",
+ "child_type": "ft.onto.base_ontology.Token"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EnhancedDependency",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a enhanced dependency: \n https://universaldependencies.org/u/overview/enhanced-syntax.html",
+ "attributes": [
+ {
+ "name": "dep_label",
+ "type": "str",
+ "description": "The enhanced dependency label in Universal Dependency."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.Token",
+ "child_type": "ft.onto.base_ontology.Token"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.RelationLink",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a relation between two entity mentions",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EntityMention",
+ "child_type": "ft.onto.base_ontology.EntityMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.CrossDocEntityRelation",
+ "parent_entry": "forte.data.ontology.top.MultiPackLink",
+ "description": "A `Link` type entry which represent a relation between two entity mentions across the packs.",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EntityMention",
+ "child_type": "ft.onto.base_ontology.EntityMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.CoreferenceGroup",
+ "parent_entry": "forte.data.ontology.top.Group",
+ "description": "A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.",
+ "member_type": "ft.onto.base_ontology.EntityMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EventRelation",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a relation between two event mentions.",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EventMention",
+ "child_type": "ft.onto.base_ontology.EventMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.CrossDocEventRelation",
+ "parent_entry": "forte.data.ontology.top.MultiPackLink",
+ "description": "A `Link` type entry which represent a relation between two event mentions across the packs.",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EventMention",
+ "child_type": "ft.onto.base_ontology.EventMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.ConstituentNode",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `ConstituentNode` to represent constituents in constituency parsing. This can also sentiment values annotated on the nodes.",
+ "attributes": [
+ {
+ "name": "label",
+ "type": "str"
+ },
+ {
+ "name": "sentiment",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "is_root",
+ "type": "bool"
+ },
+ {
+ "name": "is_leaf",
+ "type": "bool"
+ },
+ {
+ "name": "parent_node",
+ "type": "ft.onto.base_ontology.ConstituentNode"
+ },
+ {
+ "name": "children_nodes",
+ "type": "List",
+ "item_type": "ft.onto.base_ontology.ConstituentNode"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Title",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Title`, normally used to represent a title."
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Body",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Body`, normally used to represent a document body."
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.MCOption",
+ "parent_entry": "forte.data.ontology.top.Annotation"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.MCQuestion",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "attributes": [
+ {
+ "name": "options",
+ "type": "List",
+ "item_type": "ft.onto.base_ontology.MCOption"
+ },
+ {
+ "name": "answers",
+ "type": "List",
+ "item_type": "int"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.MRCQuestion",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "An `Annotation` type which represents an MRC question.",
+ "attributes": [
+ {
+ "name": "qid",
+ "type": "int"
+ },
+ {
+ "name": "answers",
+ "type": "List",
+ "item_type": "ft.onto.base_ontology.Phrase"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Recording",
+ "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+ "description": "A span based annotation `Recording`, normally used to represent a recording.",
+ "attributes": [
+ {
+ "name": "recording_class",
+ "type": "List",
+ "item_type": "str",
+ "description": "A list of class names that the recording belongs to."
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.AudioUtterance",
+ "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+ "description": "A span based annotation `AudioUtterance`, normally used to represent an utterance in dialogue.",
+ "attributes": [
+ {
+ "name": "speaker",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.NegationContext",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `NegationContext`, used to represent the negation context of a named entity.",
+ "attributes": [
+ {
+ "name": "polarity",
+ "type": "bool"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.MedicalEntityMention",
+ "parent_entry": "ft.onto.base_ontology.EntityMention",
+ "description": "A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain",
+ "attributes": [
+ {
+ "name": "umls_link",
+ "type": "str"
+ },
+ {
+ "name": "umls_entities",
+ "type": "List",
+ "item_type": "ftx.medical.clinical_ontology.UMLSConceptLink"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.MedicalArticle",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "An annotation based representation for the whole medical text chunk/document",
+ "attributes": [
+ {
+ "name": "icd_version",
+ "type": "int",
+ "description": "The version of ICD-Coding being used."
+ },
+ {
+ "name": "icd_code",
+ "type": "str",
+ "description": "The ICD code assigned to current medical article."
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.Disease",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Disease`, used to represent the diseases in a piece of clinical text."
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.Chemical",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Chemical`, used to represent the chemical in a piece of clinical text."
+ }
+ ]
+ },
+ "config": {
+ "legendConfigs": {
+ "ft.onto.base_ontology.Token": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "pos": false,
+ "ud_xpos": false,
+ "lemma": false,
+ "chunk": false,
+ "ner": false,
+ "sense": false
+ }
+ },
+ "ft.onto.base_ontology.Subword": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Classification": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Document": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Sentence": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "speaker": false
+ }
+ },
+ "ft.onto.base_ontology.Phrase": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "phrase_type": false
+ }
+ },
+ "ft.onto.base_ontology.UtteranceContext": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.Utterance": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "speaker": false
+ }
+ },
+ "ft.onto.base_ontology.PredicateArgument": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "ner_type": false,
+ "predicate_lemma": false
+ }
+ },
+ "ft.onto.base_ontology.EntityMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "ner_type": false
+ }
+ },
+ "ft.onto.base_ontology.EventMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "event_type": false
+ }
+ },
+ "ft.onto.base_ontology.PredicateMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "predicate_lemma": false,
+ "framenet_id": false
+ }
+ },
+ "ft.onto.base_ontology.PredicateLink": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "arg_type": false
+ }
+ },
+ "ft.onto.base_ontology.Dependency": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "dep_label": false,
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.EnhancedDependency": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "dep_label": false
+ }
+ },
+ "ft.onto.base_ontology.RelationLink": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.CrossDocEntityRelation": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.CoreferenceGroup": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.EventRelation": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.CrossDocEventRelation": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.ConstituentNode": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "label": false
+ }
+ },
+ "ft.onto.base_ontology.Title": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.Body": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.MCOption": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.MCQuestion": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.MRCQuestion": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Recording": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.AudioUtterance": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "speaker": false
+ }
+ },
+ "ftx.medical.clinical_ontology.NegationContext": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ftx.medical.clinical_ontology.MedicalEntityMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "umls_link": false
+ }
+ },
+ "ftx.medical.clinical_ontology.MedicalArticle": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "icd_code": false
+ }
+ },
+ "ftx.medical.clinical_ontology.Disease": {
+ "is_selected": false,
+ "is_shown": true
+ },
+ "ftx.medical.clinical_ontology.Chemical": {
+ "is_selected": false,
+ "is_shown": true
+ }
+ },
+ "scopeConfigs": {
+ "ft.onto.base_ontology.Token": false,
+ "ft.onto.base_ontology.Subword": false,
+ "ft.onto.base_ontology.Document": false,
+ "ft.onto.base_ontology.Sentence": false,
+ "ft.onto.base_ontology.Phrase": false,
+ "ft.onto.base_ontology.UtteranceContext": false,
+ "ft.onto.base_ontology.Utterance": false,
+ "ft.onto.base_ontology.PredicateArgument": false,
+ "ft.onto.base_ontology.EntityMention": false,
+ "ft.onto.base_ontology.EventMention": false,
+ "ft.onto.base_ontology.PredicateMention": false,
+ "ft.onto.base_ontology.ConstituentNode": false,
+ "ft.onto.base_ontology.Title": false,
+ "ft.onto.base_ontology.Body": false,
+ "ft.onto.base_ontology.MCOption": false,
+ "ft.onto.base_ontology.MCQuestion": false,
+ "ft.onto.base_ontology.MRCQuestion": false,
+ "ftx.medical.clinical_ontology.NegationContext": false,
+ "ftx.medical.clinical_ontology.MedicalEntityMention": false,
+ "ftx.medical.clinical_ontology.MedicalArticle": false,
+ "ftx.medical.clinical_ontology.Disease": false,
+ "ftx.medical.clinical_ontology.Chemical": false
+ },
+ "layoutConfigs": {
+ "center-middle": "default-nlp",
+ "left": "default-meta",
+ "right": "default-attribute",
+ "center-bottom": "disable"
+ },
+ "remoteConfigs": {
+ "pipelineUrl": "",
+ "doValidation": false,
+ "expectedName": "",
+ "inputFormat": "string",
+ "expectedRecords": {}
+ }
+ }
+}
\ No newline at end of file
From f6dea81cd070a4652727b70d6c037bc20bf7d20d Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:48:34 +0800
Subject: [PATCH 10/38] add some CRUD to temporally fix a bug
---
examples/label_example/sqlite_utils.py | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/examples/label_example/sqlite_utils.py b/examples/label_example/sqlite_utils.py
index 6cc7c036..3d223078 100644
--- a/examples/label_example/sqlite_utils.py
+++ b/examples/label_example/sqlite_utils.py
@@ -2,8 +2,10 @@
this file defines sqlite3 related utils for inserting data to the database of stave.
"""
import json
+import yaml
from typing import List
from stave_backend.lib.stave_session import StaveSession
+from forte.common import Config
import sqlite3
@@ -76,4 +78,23 @@ def update_stave_db(default_project_json, config):
resp1 = session.create_project(default_project_json)
project_id_base = json.loads(resp1.text)["id"]
+
+ config = yaml.safe_load(open("stave_config.yml", "r"))
+ config = Config(config, default_hparams=None)
+ con = sqlite3.connect(config.Stave.stave_db_path)
+
+ cursorObj = con.cursor()
+ cursorObj.execute('SELECT ontology, config FROM stave_backend_project WHERE id = {0}'.format(project_id_base))
+ results = cursorObj.fetchall()
+ onto = results[0][0]
+ conf = results[0][1]
+
+ onto_new = onto.replace("\'","\"")
+ conf_new = conf.replace("\'", "\"").replace("True", "true").replace("False", "false")
+
+ cursorObj.execute("UPDATE stave_backend_project SET ontology ='" + onto_new + "' WHERE id = {0}".format(project_id_base))
+ cursorObj.execute("UPDATE stave_backend_project SET config ='" + conf_new + "' WHERE id = {0}".format(project_id_base))
+
+ con.commit()
+
return project_id_base
From 62328123008fdcb31797961734992aa1c423b4fc Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:20:16 +0800
Subject: [PATCH 11/38] black and pylint
---
examples/label_example/clinical_pipeline.py | 21 +++----------
examples/label_example/search_engine.py | 31 ++++++++----------
examples/label_example/sqlite_utils.py | 35 ++++++++++++++-------
3 files changed, 40 insertions(+), 47 deletions(-)
diff --git a/examples/label_example/clinical_pipeline.py b/examples/label_example/clinical_pipeline.py
index e4644c45..43047d7f 100644
--- a/examples/label_example/clinical_pipeline.py
+++ b/examples/label_example/clinical_pipeline.py
@@ -1,37 +1,24 @@
import sys
import time
-sys.path.insert(0,"E:\\NLP\\Forte\\ForteHealthBranches\\53\\ForteHealth")
-print(sys.path)
-
from forte.data.data_pack import DataPack
from forte.data.readers import PlainTextReader
from forte.pipeline import Pipeline
from forte.processors.writers import PackIdJsonPackWriter
+from mimic3_note_reader import Mimic3DischargeNoteReader
from fortex.elastic import ElasticSearchPackIndexProcessor
from fortex.health.processors.ner_label_processor import NERLabelProcessor
-# from ner_label_processor import NERLabelProcessor
-
-from mimic3_note_reader import Mimic3DischargeNoteReader
-
-# from stave_backend.lib.stave_session import StaveSession
-def main(
- input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1
- ):
+def main(input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1):
pl = Pipeline[DataPack]()
if use_mimiciii_reader == 1:
- pl.set_reader(
- Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
- )
+ pl.set_reader(Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs})
else:
pl.set_reader(PlainTextReader())
- config_for_ner = {
- "labels": ["disease", "chemical"]
- }
+ config_for_ner = {"labels": ["disease", "chemical"]}
pl.add(NERLabelProcessor(), config=config_for_ner)
pl.add(
diff --git a/examples/label_example/search_engine.py b/examples/label_example/search_engine.py
index 03d58640..b1be57c2 100644
--- a/examples/label_example/search_engine.py
+++ b/examples/label_example/search_engine.py
@@ -19,9 +19,7 @@
default_project_json = get_json("default_onto_project.json")
-base_project_id = update_stave_db(
- default_project_json, config
- )
+base_project_id = update_stave_db(default_project_json, config)
st.title("Search the MIMIC III Data...")
search = st.text_input("Enter search words:")
@@ -41,10 +39,10 @@
# Now you can write the pack into the database and generate url.
item = {
- "name": f"clinical_results_{idx}",
- "textPack": raw_pack_str,
- "project_id": base_project_id,
- }
+ "name": f"clinical_results_{idx}",
+ "textPack": raw_pack_str,
+ "project_id": base_project_id,
+ }
db_id = sqlite_insert(conn, "stave_backend_document", item)
answers += [db_id]
@@ -70,10 +68,10 @@
highlight = "...".join(hit["highlight"]["content"])
# Now you can write the pack into the database and generate url.
item = {
- "name": f"clinical_results_{idx}",
- "textPack": raw_pack_str,
- "project_id": base_project_id,
- }
+ "name": f"clinical_results_{idx}",
+ "textPack": raw_pack_str,
+ "project_id": base_project_id,
+ }
db_id = sqlite_insert(conn, "stave_backend_document", item)
answers += [db_id]
@@ -82,12 +80,9 @@
links: List[str] = create_links(config.Stave.url, answers)
- for i in range(len(links)):
+ for i, _ in enumerate(links):
st.write(links[i], unsafe_allow_html=True)
st.write(
- templates.search_result(
- docs[i]
- .replace("\n", " ")
- ),
- unsafe_allow_html=True,
- )
+ templates.search_result(docs[i].replace("\n", " ")),
+ unsafe_allow_html=True,
+ )
diff --git a/examples/label_example/sqlite_utils.py b/examples/label_example/sqlite_utils.py
index 3d223078..da82e0ed 100644
--- a/examples/label_example/sqlite_utils.py
+++ b/examples/label_example/sqlite_utils.py
@@ -1,12 +1,13 @@
"""
-this file defines sqlite3 related utils for inserting data to the database of stave.
+this file defines sqlite3 related utils for inserting data to
+the database of stave.
"""
import json
-import yaml
from typing import List
+import sqlite3
+import yaml
from stave_backend.lib.stave_session import StaveSession
from forte.common import Config
-import sqlite3
def sqlite_insert(conn, table, row):
@@ -18,7 +19,7 @@ def sqlite_insert(conn, table, row):
"""
cols: str = ", ".join('"{}"'.format(col) for col in row.keys())
vals: str = ", ".join(":{}".format(col) for col in row.keys())
- sql: str = 'INSERT INTO "{0}" ({1}) VALUES ({2})'.format(table, cols, vals)
+ sql: str = f'INSERT INTO "{table}" ({cols}) VALUES ({vals})'
cursor = conn.cursor()
cursor.execute(sql, row)
conn.commit()
@@ -65,9 +66,7 @@ def update_stave_db(default_project_json, config):
projects = session.get_project_list().json()
project_names = [project["name"] for project in projects]
- if (
- default_project_json["name"] in project_names
- ):
+ if default_project_json["name"] in project_names:
base_project = [
proj
@@ -84,16 +83,28 @@ def update_stave_db(default_project_json, config):
con = sqlite3.connect(config.Stave.stave_db_path)
cursorObj = con.cursor()
- cursorObj.execute('SELECT ontology, config FROM stave_backend_project WHERE id = {0}'.format(project_id_base))
+ cursorObj.execute(
+ f"SELECT ontology, config FROM stave_backend_project WHERE id = {project_id_base}"
+ )
results = cursorObj.fetchall()
onto = results[0][0]
conf = results[0][1]
- onto_new = onto.replace("\'","\"")
- conf_new = conf.replace("\'", "\"").replace("True", "true").replace("False", "false")
+ onto_new = onto.replace("'", '"')
+ conf_new = (
+ conf.replace("'", '"').replace("True", "true").replace("False", "false")
+ )
- cursorObj.execute("UPDATE stave_backend_project SET ontology ='" + onto_new + "' WHERE id = {0}".format(project_id_base))
- cursorObj.execute("UPDATE stave_backend_project SET config ='" + conf_new + "' WHERE id = {0}".format(project_id_base))
+ cursorObj.execute(
+ "UPDATE stave_backend_project SET ontology ='"
+ + onto_new
+ + f"' WHERE id = {project_id_base}"
+ )
+ cursorObj.execute(
+ "UPDATE stave_backend_project SET config ='"
+ + conf_new
+ + f"' WHERE id = {project_id_base}"
+ )
con.commit()
From a8e880368cd0269e22c8c1dbb3286590f2dc701a Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:20:59 +0800
Subject: [PATCH 12/38] solve pylint issues
---
fortex/health/processors/ner_label_processor.py | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 92dbfcd5..3e52f357 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -14,9 +14,8 @@
"""
NER Labeling Processor
"""
-
-import spacy
from typing import Dict, Set
+import spacy
from forte.data.data_pack import DataPack
from forte.processors.base import PackProcessor
from forte.common.configuration import Config
@@ -94,9 +93,7 @@ def default_configs(cls):
Returns: A dictionary with the default config for this processor.
"""
- return {
- "labels":["disease","chemical"]
- }
+ return {"labels": ["disease", "chemical"]}
def record(self, record_meta: Dict[str, Set[str]]):
r"""
From e5418512e0080087431c3768fae88e20d1924e45 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:21:15 +0800
Subject: [PATCH 13/38] normalize the config
---
examples/label_example/stave_config.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/label_example/stave_config.yml b/examples/label_example/stave_config.yml
index f9ff6f02..1b5e0c7f 100644
--- a/examples/label_example/stave_config.yml
+++ b/examples/label_example/stave_config.yml
@@ -1,5 +1,5 @@
Stave:
- stave_db_path: "C://Users//Leo//.stave//db.sqlite3"
+ stave_db_path: "$HOME//.stave//db.sqlite3"
url: "http://localhost:8899"
username: admin
pw: admin
From 9c7d6e2d8b9c474d32a67b9a91ebb32fedc04128 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:29:34 +0800
Subject: [PATCH 14/38] solve black issue
---
fortex/health/processors/ner_label_processor.py | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 3e52f357..939cae69 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -69,18 +69,14 @@ def _process(self, input_pack: DataPack):
for ent in result.ents:
if ent.label_ == "DISEASE":
Disease(
- pack=input_pack,
- begin=ent.start_char,
- end=ent.end_char
+ pack=input_pack, begin=ent.start_char, end=ent.end_char
)
if "chemical" in labels:
for ent in result.ents:
if ent.label_ == "CHEMICAL":
Chemical(
- pack=input_pack,
- begin=ent.start_char,
- end=ent.end_char
+ pack=input_pack, begin=ent.start_char, end=ent.end_char
)
@classmethod
From c7ea7e16c298057e3da63fac7a6f9272ee8ad6f9 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:36:10 +0800
Subject: [PATCH 15/38] solve pylint issue
---
examples/label_example/clinical_pipeline.py | 14 +++-
examples/label_example/mimic3_note_reader.py | 80 --------------------
2 files changed, 11 insertions(+), 83 deletions(-)
delete mode 100644 examples/label_example/mimic3_note_reader.py
diff --git a/examples/label_example/clinical_pipeline.py b/examples/label_example/clinical_pipeline.py
index 43047d7f..3d4939b3 100644
--- a/examples/label_example/clinical_pipeline.py
+++ b/examples/label_example/clinical_pipeline.py
@@ -5,16 +5,24 @@
from forte.data.readers import PlainTextReader
from forte.pipeline import Pipeline
from forte.processors.writers import PackIdJsonPackWriter
-from mimic3_note_reader import Mimic3DischargeNoteReader
+from fortex.health.readers import Mimic3DischargeNoteReader
from fortex.elastic import ElasticSearchPackIndexProcessor
from fortex.health.processors.ner_label_processor import NERLabelProcessor
-def main(input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1):
+def main(
+ input_path: str,
+ output_path: str,
+ max_packs: int = -1,
+ use_mimiciii_reader=1
+ ):
pl = Pipeline[DataPack]()
if use_mimiciii_reader == 1:
- pl.set_reader(Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs})
+ pl.set_reader(
+ Mimic3DischargeNoteReader(),
+ config={"max_num_notes": max_packs}
+ )
else:
pl.set_reader(PlainTextReader())
diff --git a/examples/label_example/mimic3_note_reader.py b/examples/label_example/mimic3_note_reader.py
deleted file mode 100644
index b3f02de6..00000000
--- a/examples/label_example/mimic3_note_reader.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright 2021 The Forte Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import csv
-import logging
-from pathlib import Path
-from typing import Any, Iterator, Union, List
-
-from smart_open import open
-
-from demo.clinical import Description, Body
-from forte.data.data_pack import DataPack
-from forte.data.base_reader import PackReader
-from ft.onto.base_ontology import Document
-
-
-class Mimic3DischargeNoteReader(PackReader):
- """This class is designed to read the discharge notes from MIMIC3 dataset
- as plain text packs.
-
- For more information for the dataset, visit:
- https://mimic.physionet.org/
- """
-
- def __init__(self):
- super().__init__()
- self.headers: List[str] = []
- self.text_col = -1 # Default to be last column.
- self.description_col = 0 # Default to be first column.
- self.__note_count = 0 # Count number of notes processed.
-
- def _collect( # type: ignore
- self, mimic3_path: Union[Path, str]
- ) -> Iterator[Any]:
- with open(mimic3_path) as f:
- for r in csv.reader(f):
- if 0 < self.configs.max_num_notes <= self.__note_count:
- break
- yield r
-
- def _parse_pack(self, row: List[str]) -> Iterator[DataPack]:
- if len(self.headers) == 0:
- self.headers.extend(row)
- for i, h in enumerate(self.headers):
- if h == "TEXT":
- self.text_col = i
- logging.info("Text Column is %d", i)
- if h == "DESCRIPTION":
- self.description_col = i
- logging.info("Description Column is %d", i)
- else:
- pack: DataPack = DataPack()
- description: str = row[self.description_col]
- text: str = row[self.text_col]
- delimiter = "\n-----------------\n"
- full_text = description + delimiter + text
- pack.set_text(full_text)
-
- Description(pack, 0, len(description))
- Body(pack, len(description) + len(delimiter), len(full_text))
- Document(pack, 0, len(pack.text))
- self.__note_count += 1
- yield pack
-
- @classmethod
- def default_configs(cls):
- # If this is set (>0), the reader will only read up to
- # the number specified.
- return {'max_num_notes':-1}
From 557b8276bc1fbf4806ccf3e9afb6520715a9bc8a Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:40:24 +0800
Subject: [PATCH 16/38] solve pylint issue: import itself
---
fortex/health/readers/__init__.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/fortex/health/readers/__init__.py b/fortex/health/readers/__init__.py
index 076a48e7..d3745f4b 100644
--- a/fortex/health/readers/__init__.py
+++ b/fortex/health/readers/__init__.py
@@ -11,5 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
-from fortex.health.readers.mimic3_note_reader import *
From 2e2dcd50676d84d04c0c80f0a130fb12ba9d9fa4 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 03:20:08 +0800
Subject: [PATCH 17/38] add a README file
---
examples/label_example/README.md | 89 ++++++++++++++++++++++++++++++++
1 file changed, 89 insertions(+)
create mode 100644 examples/label_example/README.md
diff --git a/examples/label_example/README.md b/examples/label_example/README.md
new file mode 100644
index 00000000..dbcb9bee
--- /dev/null
+++ b/examples/label_example/README.md
@@ -0,0 +1,89 @@
+## NER Label Example
+
+This example shows how we start a search engine in streamlit and link the search results to stave.
+
+## Install extra dependencies
+
+To install from PyPI,
+```bash
+pip install forte.elastic
+pip install forte.health
+pip install stave
+pip install streamlit
+```
+
+## Download spaCy model
+
+run the following command to download the model
+```bash
+pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz
+```
+
+## Set up the configuration
+Before run Elastic Searcher and Stave, we need to ensure that the current configuration is compatible with the environment of our computer.
+
+Please check and change the following configurations in `stave_config.yml`:
+
+1. Ensure `Stave.stave_db_path` is the correct path -> `$Home/.stave`, e.g., `"/home/name/.stave"`.
+2. Ensure `Stave.username` and `Stave.pw`is `"admin"` and `"admin"`.
+
+## Prepare elastic searcher
+Download corresponding elasticsearch archive from https://www.elastic.co/downloads/past-releases/elasticsearch-7-17-2, unzip it and run `elasticsearch-7-17-2/bin/elasticsearch` to start the service.
+
+Run the following to check if elasticsearch is running properly:
+```bash
+curl -XGET localhost:9200/_cluster/health?pretty
+```
+
+Make sure you create index 'elastic_indexer' in the cluster before working with this example, you can run the following command:
+```bash
+curl -X PUT localhost:9200/elastic_indexer
+```
+
+You can also follow the online blog for more information:
+
+https://www.elastic.co/guide/en/elasticsearch/reference/current/starting-elasticsearch.html
+
+## Run pipeline
+First, you should start an Elastic Indexer backend.
+
+Now, open a terminal. You can run the following command to parse some files and index them.
+```bash
+python clinical__pipeline.py path_to_mimiciii/1.4/NOTEEVENTS.csv.gz path_to_mimiciii_output 10 1
+```
+
+Here, we write out the raw data pack to `/path_to_sample_output`, and only index the first 10 notes. You can change the number to whatever you want in the above command.
+
+Also, we write the data into elasticsearch. You can run the command line to check whether the 10 notes are written into your database:
+
+```bash
+curl -X GET localhost:9200/elastic_indexer/_search
+```
+
+## Run indexer and Stave
+Again, you should start an Elastic Indexer backend.
+
+Then, to start the Stave server that our pipeline will connect to for visualization purposes, run
+```bash
+stave -s start -o -l -n 8899
+```
+Then, login with username (admin) and password (admin).
+
+Here, you need to make sure `Stave.url` in `stave_config.yml` is `"http://localhost:8899"`. Or you can change the port 8899 to any port you like.
+
+## Run streamlit
+
+To run streamlit, the python version should be >= 3.7.2.
+
+Now, open the terminal. Run the following command to start the streamlit.
+```bash
+streamlit run search_engine.py
+```
+
+Now open `http://localhost:8501` on your browser to access the streamlit interface.
+
+Next, you will see the reports shown on the interface. You can also search with the search engine.
+
+Click the report with link, it will link to Stave, the visualization and annotation page.
+
+Click the radio (Disease and Chemical) on the sidebar, you can see the annotations on the UI.
From 8ba0d6f527a63d264922f5c6dd7bef7d27f0a874 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 03:56:10 +0800
Subject: [PATCH 18/38] add ner label test
---
.../processors/ner_label_processor_test.py | 62 +++++++++++++++++++
1 file changed, 62 insertions(+)
create mode 100644 tests/fortex/health/processors/ner_label_processor_test.py
diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
new file mode 100644
index 00000000..a91f1356
--- /dev/null
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -0,0 +1,62 @@
+# Copyright 2022 The Forte Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Unit tests for ICDCodingProcessor
+"""
+
+import unittest
+
+from ddt import data, ddt
+from forte.data.data_pack import DataPack
+from forte.data.readers import StringReader
+from forte.pipeline import Pipeline
+from fortex.health.processors.ner_label_processor import NERLabelProcessor
+from ftx.medical.clinical_ontology import Chemical, Disease
+
+
+@ddt
+class TestNERLabelProcessor(unittest.TestCase):
+
+ @data(
+ "He got cancer, and he needs oxygen."
+ )
+ def test_ner_label_processor(self, input_data):
+ self.nlp = Pipeline[DataPack]()
+ self.nlp.set_reader(StringReader())
+ config = {
+ "labels": ["disease", "chemical"]
+ }
+
+ self.nlp.add(NERLabelProcessor(), config=config)
+ self.nlp.initialize()
+ pack = self.nlp.process(input_data)
+
+ exp_disease = ["cancer"]
+ disease = []
+
+ for idx, d in enumerate(pack.get(Disease)):
+ disease.append(d.text)
+
+ assert exp_disease == disease
+
+ exp_chemical = ["oxygen"]
+ chemical = []
+ for idx, c in enumerate(pack.get(Chemical)):
+ chemical.append(c.text)
+
+ assert exp_chemical == chemical
+
+
+if __name__ == "__main__":
+ unittest.main()
From 555610bcb0e52b9f8e51fafdebee0be00d173fc2 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:01:52 +0800
Subject: [PATCH 19/38] solve black issue
---
tests/fortex/health/processors/ner_label_processor_test.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index a91f1356..dc24cc97 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -21,7 +21,9 @@
from forte.data.data_pack import DataPack
from forte.data.readers import StringReader
from forte.pipeline import Pipeline
-from fortex.health.processors.ner_label_processor import NERLabelProcessor
+from fortex.health.processors.ner_label_processor import (
+ NERLabelProcessor
+)
from ftx.medical.clinical_ontology import Chemical, Disease
From 94ae23049d56ed7ae37d4192fa5766f4a197741f Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:06:05 +0800
Subject: [PATCH 20/38] black check
---
.../health/processors/ner_label_processor_test.py | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index dc24cc97..b89b5806 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -21,24 +21,17 @@
from forte.data.data_pack import DataPack
from forte.data.readers import StringReader
from forte.pipeline import Pipeline
-from fortex.health.processors.ner_label_processor import (
- NERLabelProcessor
-)
+from fortex.health.processors.ner_label_processor import NERLabelProcessor
from ftx.medical.clinical_ontology import Chemical, Disease
@ddt
class TestNERLabelProcessor(unittest.TestCase):
-
- @data(
- "He got cancer, and he needs oxygen."
- )
+ @data("He got cancer, and he needs oxygen.")
def test_ner_label_processor(self, input_data):
self.nlp = Pipeline[DataPack]()
self.nlp.set_reader(StringReader())
- config = {
- "labels": ["disease", "chemical"]
- }
+ config = {"labels": ["disease", "chemical"]}
self.nlp.add(NERLabelProcessor(), config=config)
self.nlp.initialize()
From e21b7a2b93ed95d1fc4ca590983ccd2e03f59480 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:11:44 +0800
Subject: [PATCH 21/38] remove main
---
tests/fortex/health/processors/ner_label_processor_test.py | 4 ----
1 file changed, 4 deletions(-)
diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index b89b5806..23a7bbe0 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -51,7 +51,3 @@ def test_ner_label_processor(self, input_data):
chemical.append(c.text)
assert exp_chemical == chemical
-
-
-if __name__ == "__main__":
- unittest.main()
From 42b2b063a0f8f4092a6eb2cb5708ec7f980e3a18 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:28:52 +0800
Subject: [PATCH 22/38] add set_up
---
fortex/health/processors/ner_label_processor.py | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 939cae69..0812bcf1 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -15,6 +15,9 @@
NER Labeling Processor
"""
from typing import Dict, Set
+import subprocess
+import sys
+import os
import spacy
from forte.data.data_pack import DataPack
from forte.processors.base import PackProcessor
@@ -45,6 +48,13 @@ def __init__(self):
super().__init__()
self.nlp = None
+ def set_up(self):
+ download_url = "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz"
+ command = [sys.executable, "-m", "pip", "install"] + [download_url]
+ subprocess.run(
+ command, env=os.environ.copy(), encoding="utf8", check=False
+ )
+
def initialize(self, resources: Resources, configs: Config):
super().initialize(resources, configs)
self.nlp = spacy.load("en_ner_bc5cdr_md")
From 7015b8412cb84142048dcfa2209c7ff20a8a78b3 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:30:40 +0800
Subject: [PATCH 23/38] shorten the string
---
fortex/health/processors/ner_label_processor.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 0812bcf1..97b6ebd6 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -49,7 +49,8 @@ def __init__(self):
self.nlp = None
def set_up(self):
- download_url = "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz"
+ download_url = """https://s3-us-west-2.amazonaws.com/
+ ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz"""
command = [sys.executable, "-m", "pip", "install"] + [download_url]
subprocess.run(
command, env=os.environ.copy(), encoding="utf8", check=False
From 560631f53a40be5eee10dc8e99b51ff9e52328e2 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:42:20 +0800
Subject: [PATCH 24/38] fix test bug
---
fortex/health/processors/ner_label_processor.py | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 97b6ebd6..a25cb253 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -14,11 +14,13 @@
"""
NER Labeling Processor
"""
+import importlib
from typing import Dict, Set
import subprocess
import sys
import os
import spacy
+from spacy.cli.download import download
from forte.data.data_pack import DataPack
from forte.processors.base import PackProcessor
from forte.common.configuration import Config
@@ -49,12 +51,7 @@ def __init__(self):
self.nlp = None
def set_up(self):
- download_url = """https://s3-us-west-2.amazonaws.com/
- ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz"""
- command = [sys.executable, "-m", "pip", "install"] + [download_url]
- subprocess.run(
- command, env=os.environ.copy(), encoding="utf8", check=False
- )
+ download("en_ner_bc5cdr_md")
def initialize(self, resources: Resources, configs: Config):
super().initialize(resources, configs)
From b83e4aefa80a96f74faefe8b8261cfbf92a8fda0 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:46:25 +0800
Subject: [PATCH 25/38] remove unused import
---
fortex/health/processors/ner_label_processor.py | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index a25cb253..1bd37df2 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -14,11 +14,8 @@
"""
NER Labeling Processor
"""
-import importlib
+
from typing import Dict, Set
-import subprocess
-import sys
-import os
import spacy
from spacy.cli.download import download
from forte.data.data_pack import DataPack
From 3634a07a36ad61de8fc36e172e1321be486802d4 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:53:01 +0800
Subject: [PATCH 26/38] fix pytest issue
---
fortex/health/processors/ner_label_processor.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 1bd37df2..fffe00da 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -47,11 +47,9 @@ def __init__(self):
super().__init__()
self.nlp = None
- def set_up(self):
- download("en_ner_bc5cdr_md")
-
def initialize(self, resources: Resources, configs: Config):
super().initialize(resources, configs)
+ download("en_ner_bc5cdr_md")
self.nlp = spacy.load("en_ner_bc5cdr_md")
def _process(self, input_pack: DataPack):
From 0ccdfa44b5591cb5fe9c658b12cb93b147be591e Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 05:03:40 +0800
Subject: [PATCH 27/38] fix pytest bug
---
.../health/processors/ner_label_processor.py | 46 ++++++++++++++++++-
1 file changed, 44 insertions(+), 2 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index fffe00da..970a0b1b 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -32,6 +32,49 @@
"NERLabelProcessor",
]
+CUSTOM_SPACYMODEL_URL = {
+ "en_core_sci_sm": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+ "/releases/v0.3.0/en_core_sci_sm-0.3.0.tar.gz",
+ "en_core_sci_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+ "/releases/v0.3.0/en_core_sci_md-0.3.0.tar.gz",
+ "en_core_sci_lg": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+ "/releases/v0.3.0/en_core_sci_lg-0.3.0.tar.gz",
+ "en_ner_craft_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+ "/releases/v0.3.0/en_ner_craft_md-0.3.0.tar.gz",
+ "en_ner_jnlpba_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+ "/releases/v0.3.0/en_ner_jnlpba_md-0.3.0.tar.gz",
+ "en_ner_bc5cdr_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+ "/releases/v0.3.0/en_ner_bc5cdr_md-0.3.0.tar.gz",
+ "en_ner_bionlp13cg_md": "https://s3-us-west-2.amazonaws.com/ai2-s2"
+ "-scispacy/releases/v0.3.0/en_ner_bionlp13cg_md-0"
+ ".3.0.tar.gz",
+}
+
+def load_lang_model(lang_model):
+ # pylint: disable=import-outside-toplevel
+ if lang_model in CUSTOM_SPACYMODEL_URL:
+ # download ScispaCy model using URL
+ import subprocess
+ import sys
+ import os
+ import importlib
+
+ download_url = CUSTOM_SPACYMODEL_URL[lang_model]
+ command = [sys.executable, "-m", "pip", "install"] + [download_url]
+ subprocess.run(
+ command, env=os.environ.copy(), encoding="utf8", check=False
+ )
+ cls = importlib.import_module(lang_model)
+ return cls.load() # type: ignore
+ else:
+ # Use spaCy download
+ try:
+ nlp = spacy.load(lang_model) # type: ignore
+ except OSError:
+ download(lang_model)
+ nlp = spacy.load(lang_model) # type: ignore
+ return nlp
+
class NERLabelProcessor(PackProcessor):
r"""
@@ -49,8 +92,7 @@ def __init__(self):
def initialize(self, resources: Resources, configs: Config):
super().initialize(resources, configs)
- download("en_ner_bc5cdr_md")
- self.nlp = spacy.load("en_ner_bc5cdr_md")
+ self.nlp = load_lang_model("en_ner_bc5cdr_md")
def _process(self, input_pack: DataPack):
r"""
From b9d08cf6f455873c1a8f2f9e75e3633607d44138 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 11:52:57 +0800
Subject: [PATCH 28/38] black reformat
---
fortex/health/processors/ner_label_processor.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 970a0b1b..d9d48694 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -50,6 +50,7 @@
".3.0.tar.gz",
}
+
def load_lang_model(lang_model):
# pylint: disable=import-outside-toplevel
if lang_model in CUSTOM_SPACYMODEL_URL:
From deefc849b37dc10f7bb7d3093626987b954f43f1 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 12:00:12 +0800
Subject: [PATCH 29/38] remove unused comment
---
fortex/health/processors/ner_label_processor.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index d9d48694..54ebe68e 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -66,14 +66,14 @@ def load_lang_model(lang_model):
command, env=os.environ.copy(), encoding="utf8", check=False
)
cls = importlib.import_module(lang_model)
- return cls.load() # type: ignore
+ return cls.load()
else:
# Use spaCy download
try:
- nlp = spacy.load(lang_model) # type: ignore
+ nlp = spacy.load(lang_model)
except OSError:
download(lang_model)
- nlp = spacy.load(lang_model) # type: ignore
+ nlp = spacy.load(lang_model)
return nlp
From 7625dd95f5deb5c38879945de1bafd4a128469ec Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Mon, 25 Jul 2022 17:51:24 +0800
Subject: [PATCH 30/38] add json dumps and remove SQL statements
---
examples/label_example/sqlite_utils.py | 26 +-------------------------
1 file changed, 1 insertion(+), 25 deletions(-)
diff --git a/examples/label_example/sqlite_utils.py b/examples/label_example/sqlite_utils.py
index da82e0ed..38d61dce 100644
--- a/examples/label_example/sqlite_utils.py
+++ b/examples/label_example/sqlite_utils.py
@@ -75,37 +75,13 @@ def update_stave_db(default_project_json, config):
][0]
return base_project["id"]
- resp1 = session.create_project(default_project_json)
+ resp1 = session.create_project(json.dumps(default_project_json))
project_id_base = json.loads(resp1.text)["id"]
config = yaml.safe_load(open("stave_config.yml", "r"))
config = Config(config, default_hparams=None)
con = sqlite3.connect(config.Stave.stave_db_path)
- cursorObj = con.cursor()
- cursorObj.execute(
- f"SELECT ontology, config FROM stave_backend_project WHERE id = {project_id_base}"
- )
- results = cursorObj.fetchall()
- onto = results[0][0]
- conf = results[0][1]
-
- onto_new = onto.replace("'", '"')
- conf_new = (
- conf.replace("'", '"').replace("True", "true").replace("False", "false")
- )
-
- cursorObj.execute(
- "UPDATE stave_backend_project SET ontology ='"
- + onto_new
- + f"' WHERE id = {project_id_base}"
- )
- cursorObj.execute(
- "UPDATE stave_backend_project SET config ='"
- + conf_new
- + f"' WHERE id = {project_id_base}"
- )
-
con.commit()
return project_id_base
From b9af6dfa9b3d6eca7af55a28ac16cec492a762f4 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:03:46 +0800
Subject: [PATCH 31/38] wrap search engine code in method
---
examples/label_example/search_engine.py | 159 ++++++++++++------------
1 file changed, 82 insertions(+), 77 deletions(-)
diff --git a/examples/label_example/search_engine.py b/examples/label_example/search_engine.py
index b1be57c2..4c9bc98f 100644
--- a/examples/label_example/search_engine.py
+++ b/examples/label_example/search_engine.py
@@ -9,80 +9,85 @@
import templates
-st.set_page_config(page_title="ForteHealth_Search_Engine", layout="wide")
-
-es = Elasticsearch(hosts=["http://localhost:9200/"])
-INDEX = "elastic_indexer"
-
-config = yaml.safe_load(open("stave_config.yml", "r"))
-config = Config(config, default_hparams=None)
-
-default_project_json = get_json("default_onto_project.json")
-
-base_project_id = update_stave_db(default_project_json, config)
-
-st.title("Search the MIMIC III Data...")
-search = st.text_input("Enter search words:")
-
-if not search:
- records = {}
- results = all_search(es, INDEX)
- hits = results["hits"]["hits"]
-
- conn = sqlite3.connect(config.Stave.stave_db_path)
- answers = []
- for idx, hit in enumerate(hits):
- source = hit["_source"]
- # The raw pack string and pack id (not database id)
- raw_pack_str: str = source["pack_info"]
- pack_id: str = source["doc_id"]
-
- # Now you can write the pack into the database and generate url.
- item = {
- "name": f"clinical_results_{idx}",
- "textPack": raw_pack_str,
- "project_id": base_project_id,
- }
-
- db_id = sqlite_insert(conn, "stave_backend_document", item)
- answers += [db_id]
- print(pack_id, db_id)
-
- links: List[str] = create_links(config.Stave.url, answers)
-
- for link in links:
- st.write(link, unsafe_allow_html=True)
-
-if search:
- results = index_search(es, INDEX, search)
- hits = results["hits"]["hits"]
-
- conn = sqlite3.connect(config.Stave.stave_db_path)
- answers = []
- docs = []
- for idx, hit in enumerate(hits):
- source = hit["_source"]
- # The raw pack string and pack id (not database id)
- raw_pack_str: str = source["pack_info"]
- pack_id: str = source["doc_id"]
- highlight = "...".join(hit["highlight"]["content"])
- # Now you can write the pack into the database and generate url.
- item = {
- "name": f"clinical_results_{idx}",
- "textPack": raw_pack_str,
- "project_id": base_project_id,
- }
-
- db_id = sqlite_insert(conn, "stave_backend_document", item)
- answers += [db_id]
-
- docs.append(highlight)
-
- links: List[str] = create_links(config.Stave.url, answers)
-
- for i, _ in enumerate(links):
- st.write(links[i], unsafe_allow_html=True)
- st.write(
- templates.search_result(docs[i].replace("\n", " ")),
- unsafe_allow_html=True,
- )
+def main():
+ st.set_page_config(page_title="ForteHealth_Search_Engine", layout="wide")
+
+ es = Elasticsearch(hosts=["http://localhost:9200/"])
+ INDEX = "elastic_indexer"
+
+ config = yaml.safe_load(open("stave_config.yml", "r"))
+ config = Config(config, default_hparams=None)
+
+ default_project_json = get_json("default_onto_project.json")
+
+ base_project_id = update_stave_db(default_project_json, config)
+
+ st.title("Search the MIMIC III Data...")
+ search = st.text_input("Enter search words:")
+
+ if not search:
+ records = {}
+ results = all_search(es, INDEX)
+ hits = results["hits"]["hits"]
+
+ conn = sqlite3.connect(config.Stave.stave_db_path)
+ answers = []
+ for idx, hit in enumerate(hits):
+ source = hit["_source"]
+ # The raw pack string and pack id (not database id)
+ raw_pack_str: str = source["pack_info"]
+ pack_id: str = source["doc_id"]
+
+ # Now you can write the pack into the database and generate url.
+ item = {
+ "name": f"clinical_results_{idx}",
+ "textPack": raw_pack_str,
+ "project_id": base_project_id,
+ }
+
+ db_id = sqlite_insert(conn, "stave_backend_document", item)
+ answers += [db_id]
+ print(pack_id, db_id)
+
+ links: List[str] = create_links(config.Stave.url, answers)
+
+ for link in links:
+ st.write(link, unsafe_allow_html=True)
+
+ if search:
+ results = index_search(es, INDEX, search)
+ hits = results["hits"]["hits"]
+
+ conn = sqlite3.connect(config.Stave.stave_db_path)
+ answers = []
+ docs = []
+ for idx, hit in enumerate(hits):
+ source = hit["_source"]
+ # The raw pack string and pack id (not database id)
+ raw_pack_str: str = source["pack_info"]
+ pack_id: str = source["doc_id"]
+ highlight = "...".join(hit["highlight"]["content"])
+ # Now you can write the pack into the database and generate url.
+ item = {
+ "name": f"clinical_results_{idx}",
+ "textPack": raw_pack_str,
+ "project_id": base_project_id,
+ }
+
+ db_id = sqlite_insert(conn, "stave_backend_document", item)
+ answers += [db_id]
+
+ docs.append(highlight)
+
+ links: List[str] = create_links(config.Stave.url, answers)
+
+ for i, _ in enumerate(links):
+ st.write(links[i], unsafe_allow_html=True)
+ st.write(
+ templates.search_result(docs[i].replace("\n", " ")),
+ unsafe_allow_html=True,
+ )
+
+
+if __name__ == '__main__':
+ main()
From 8ffa0fac22e5434bd5eba2e6db918f48fcb09d51 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:04:49 +0800
Subject: [PATCH 32/38] remove unnecessary comments
---
examples/label_example/search_utils.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/examples/label_example/search_utils.py b/examples/label_example/search_utils.py
index eb85da10..35f1e6c4 100644
--- a/examples/label_example/search_utils.py
+++ b/examples/label_example/search_utils.py
@@ -47,8 +47,6 @@ def index_search(es, index: str, keywords: str) -> dict:
"post_tags": [""],
"fields": {"content": {}},
},
- # "from": from_i,
- # "size": size,
"aggs": {"match_count": {"value_count": {"field": "_id"}}},
}
From 9532d136408752cb991c3fbb1c873a606d887ce4 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:20:08 +0800
Subject: [PATCH 33/38] remove extra empty lines
---
examples/label_example/templates.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/examples/label_example/templates.py b/examples/label_example/templates.py
index 63bf9aa2..5278a33a 100644
--- a/examples/label_example/templates.py
+++ b/examples/label_example/templates.py
@@ -15,7 +15,6 @@ def number_of_results(total_hits: int, duration: float) -> str:
def search_result(highlights: str) -> str:
"""HTML scripts to display search results."""
return f"""
-
{highlights}
From 24dca3e8ab6b9ce7e57ea9b835dcaf38f082db77 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:40:09 +0800
Subject: [PATCH 34/38] add lang model as config
---
fortex/health/processors/ner_label_processor.py | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 54ebe68e..cbb938c0 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -93,7 +93,7 @@ def __init__(self):
def initialize(self, resources: Resources, configs: Config):
super().initialize(resources, configs)
- self.nlp = load_lang_model("en_ner_bc5cdr_md")
+ self.nlp = load_lang_model(configs.lang)
def _process(self, input_pack: DataPack):
r"""
@@ -111,15 +111,13 @@ def _process(self, input_pack: DataPack):
)
result = self.nlp(doc)
- if "disease" in labels:
- for ent in result.ents:
+ for ent in result.ents:
+ if "disease" in labels:
if ent.label_ == "DISEASE":
Disease(
pack=input_pack, begin=ent.start_char, end=ent.end_char
)
-
- if "chemical" in labels:
- for ent in result.ents:
+ if "chemical" in labels:
if ent.label_ == "CHEMICAL":
Chemical(
pack=input_pack, begin=ent.start_char, end=ent.end_char
@@ -135,7 +133,10 @@ def default_configs(cls):
Returns: A dictionary with the default config for this processor.
"""
- return {"labels": ["disease", "chemical"]}
+ return {
+ "labels": ["disease", "chemical"],
+ "lang": "en_ner_bc5cdr_md"
+ }
def record(self, record_meta: Dict[str, Set[str]]):
r"""
From 9d98979e980925867f96ce05db7bc670d8fa92dc Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:41:08 +0800
Subject: [PATCH 35/38] fix dependency
---
examples/label_example/clinical_pipeline.py | 5 ++++-
tests/fortex/health/processors/ner_label_processor_test.py | 5 ++++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/examples/label_example/clinical_pipeline.py b/examples/label_example/clinical_pipeline.py
index 3d4939b3..24df20de 100644
--- a/examples/label_example/clinical_pipeline.py
+++ b/examples/label_example/clinical_pipeline.py
@@ -26,7 +26,10 @@ def main(
else:
pl.set_reader(PlainTextReader())
- config_for_ner = {"labels": ["disease", "chemical"]}
+ config_for_ner = {
+ "labels": ["disease", "chemical"],
+ "lang": "en_ner_bc5cdr_md"
+ }
pl.add(NERLabelProcessor(), config=config_for_ner)
pl.add(
diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index 23a7bbe0..4271f31f 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -31,7 +31,10 @@ class TestNERLabelProcessor(unittest.TestCase):
def test_ner_label_processor(self, input_data):
self.nlp = Pipeline[DataPack]()
self.nlp.set_reader(StringReader())
- config = {"labels": ["disease", "chemical"]}
+ config = {
+ "labels": ["disease", "chemical"],
+ "lang": "en_ner_bc5cdr_md"
+ }
self.nlp.add(NERLabelProcessor(), config=config)
self.nlp.initialize()
From 1b2263ed627225b895214c4292065631cdc09356 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:46:43 +0800
Subject: [PATCH 36/38] black reformat
---
fortex/health/processors/ner_label_processor.py | 17 ++++-------------
1 file changed, 4 insertions(+), 13 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index cbb938c0..30c8e1bd 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -62,9 +62,7 @@ def load_lang_model(lang_model):
download_url = CUSTOM_SPACYMODEL_URL[lang_model]
command = [sys.executable, "-m", "pip", "install"] + [download_url]
- subprocess.run(
- command, env=os.environ.copy(), encoding="utf8", check=False
- )
+ subprocess.run(command, env=os.environ.copy(), encoding="utf8", check=False)
cls = importlib.import_module(lang_model)
return cls.load()
else:
@@ -114,14 +112,10 @@ def _process(self, input_pack: DataPack):
for ent in result.ents:
if "disease" in labels:
if ent.label_ == "DISEASE":
- Disease(
- pack=input_pack, begin=ent.start_char, end=ent.end_char
- )
+ Disease(pack=input_pack, begin=ent.start_char, end=ent.end_char)
if "chemical" in labels:
if ent.label_ == "CHEMICAL":
- Chemical(
- pack=input_pack, begin=ent.start_char, end=ent.end_char
- )
+ Chemical(pack=input_pack, begin=ent.start_char, end=ent.end_char)
@classmethod
def default_configs(cls):
@@ -133,10 +127,7 @@ def default_configs(cls):
Returns: A dictionary with the default config for this processor.
"""
- return {
- "labels": ["disease", "chemical"],
- "lang": "en_ner_bc5cdr_md"
- }
+ return {"labels": ["disease", "chemical"], "lang": "en_ner_bc5cdr_md"}
def record(self, record_meta: Dict[str, Set[str]]):
r"""
From 2694aec7462d7c0686dff0c602cf69a3b636e55c Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:47:27 +0800
Subject: [PATCH 37/38] black reformat
---
tests/fortex/health/processors/ner_label_processor_test.py | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index 4271f31f..aba91a09 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -31,10 +31,7 @@ class TestNERLabelProcessor(unittest.TestCase):
def test_ner_label_processor(self, input_data):
self.nlp = Pipeline[DataPack]()
self.nlp.set_reader(StringReader())
- config = {
- "labels": ["disease", "chemical"],
- "lang": "en_ner_bc5cdr_md"
- }
+ config = {"labels": ["disease", "chemical"], "lang": "en_ner_bc5cdr_md"}
self.nlp.add(NERLabelProcessor(), config=config)
self.nlp.initialize()
From a8494cf826ea4ef72089d94ebbf2db52c5b8a5e3 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:52:04 +0800
Subject: [PATCH 38/38] black line len 80 format
---
fortex/health/processors/ner_label_processor.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 30c8e1bd..55932867 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -62,7 +62,9 @@ def load_lang_model(lang_model):
download_url = CUSTOM_SPACYMODEL_URL[lang_model]
command = [sys.executable, "-m", "pip", "install"] + [download_url]
- subprocess.run(command, env=os.environ.copy(), encoding="utf8", check=False)
+ subprocess.run(
+ command, env=os.environ.copy(), encoding="utf8", check=False
+ )
cls = importlib.import_module(lang_model)
return cls.load()
else:
@@ -112,10 +114,14 @@ def _process(self, input_pack: DataPack):
for ent in result.ents:
if "disease" in labels:
if ent.label_ == "DISEASE":
- Disease(pack=input_pack, begin=ent.start_char, end=ent.end_char)
+ Disease(
+ pack=input_pack, begin=ent.start_char, end=ent.end_char
+ )
if "chemical" in labels:
if ent.label_ == "CHEMICAL":
- Chemical(pack=input_pack, begin=ent.start_char, end=ent.end_char)
+ Chemical(
+ pack=input_pack, begin=ent.start_char, end=ent.end_char
+ )
@classmethod
def default_configs(cls):