From b09d8e803dfe6ffd7c9c1bc594ba36f1c0805977 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 00:23:03 +0800
Subject: [PATCH 01/38] regenerate ontology

---
 ftx/medical/clinical_ontology.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/ftx/medical/clinical_ontology.py b/ftx/medical/clinical_ontology.py
index 52019a75..6f782423 100644
--- a/ftx/medical/clinical_ontology.py
+++ b/ftx/medical/clinical_ontology.py
@@ -47,6 +47,8 @@
     "MedicalArticle",
     "Abbreviation",
     "Hyponym",
+    "Disease",
+    "Chemical"
 ]
 
 
@@ -492,3 +494,23 @@ class Hyponym(Link):
     def __init__(self, pack: DataPack, parent: Optional[Entry] = None, child: Optional[Entry] = None):
         super().__init__(pack, parent, child)
         self.hyponym_link: Optional[str] = None
+
+
+@dataclass
+class Disease(Annotation):
+    """
+    A span based annotation `Disease`, used to represent the diseases in a piece of clinical text.
+    """
+
+    def __init__(self, pack: DataPack, begin: int, end: int):
+        super().__init__(pack, begin, end)
+
+
+@dataclass
+class Chemical(Annotation):
+    """
+    A span based annotation `Chemical`, used to represent the chemical in a piece of clinical text.
+    """
+
+    def __init__(self, pack: DataPack, begin: int, end: int):
+        super().__init__(pack, begin, end)

From 4e5893aefc227b3c79c905c30beee39a0b388447 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 00:24:10 +0800
Subject: [PATCH 02/38] Add Disease and Chemical in ontology

---
 .../ontology_specs/clinical_ontology.json     | 828 +++++++++---------
 1 file changed, 419 insertions(+), 409 deletions(-)

diff --git a/fortex/health/ontology_specs/clinical_ontology.json b/fortex/health/ontology_specs/clinical_ontology.json
index a9269abd..ed8df678 100644
--- a/fortex/health/ontology_specs/clinical_ontology.json
+++ b/fortex/health/ontology_specs/clinical_ontology.json
@@ -1,407 +1,407 @@
-{ 
-    "name": "clinical_ontology",
-    "imports": [
-        "base_ontology.json"
-      ],
-      "additional_prefixes": [
-        "ftx.medical.clinical_ontology"
-      ],
-    "definitions": [
-      {
-        "entry_name": "ftx.medical.clinical_ontology.ClinicalEntityMention",
-        "parent_entry": "ft.onto.base_ontology.EntityMention",
-        "description": "A span based annotation `ClinicalEntityMention`, normally used to represent an Entity Mention in a piece of clinical text."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.Description",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "A span based annotation `Description`, used to represent the description in a piece of clinical note."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.Body",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "A span based annotation `Body`, used to represent the actual content in a piece of clinical note."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.FrequencyAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The frequency determination for the Drug NER profile."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.DurationAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The duration determination for the Drug NER profile."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.RouteAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The route determination for the Drug NER profile.",
-        "attributes": [
-            {
-                "name": "in_take_method",
-                "type": "str"
-            }
-        ]
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.SuffixStrengthAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The suffix portion of dosage strength determination for the Drug NER profile."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.FractionStrengthAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The fraction portion of dosages strength determination for the Drug NER profile."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.RangeStrengthAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The range portion of dosages stength determination for the Drug NER profile."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.DecimalStrengthAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The decimal portion of dosages stength determination for the Drug NER profile"
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.DrugChangeStatusAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The change status of dosages determination for the Drug NER profile.",
-        "attributes": [
-            {
-                "name": "change_status",
-                "type": "str",
-                "description": "Indicates the drug change status of 'stop', 'start', 'increase', 'decrease', or 'noChange'."
-            }
-        ]
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.DosagesAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The dosage determination for the Drug NER profile."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.StrengthAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "Holds the value representing the unit of the drug dosage."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.StrengthUnitAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": ""
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.FrequencyUnitAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The value represents the unit portion of the drug frequency.",
-        "attributes": [
-            {
-                "name": "period",
-                "type": "float",
-                "description": "The periodic unit used, e.g day, month, hour, etc."
-            }
-        ]
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.FormAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The value represents the form portion of the drug mention."
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.SubSectionAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "",
-        "attributes": [
-            {
-                "name": "sub_ssection_body_begin",
-                "type": "int",
-                "description": "Sub-section body begin offset."
-            },
-            {
-                "name": "sub_section_body_end",
-                "type": "int",
-                "description": "Sub-section body end offset."
-            },
-            {
-                "name": "status",
-                "type": "int",
-                "description": "Status of 'possible', 'history of', or 'family history of'."
-            },
-            {
-                "name": "sub_section_header_begin",
-                "type": "int",
-                "description": "Begin offset of subSection header"
-            },
-            {
-                "name": "sub_section_header_end",
-                "type": "int",
-                "description": "Ending offset of subsection header"
-            },
-            {
-                "name": "parent_section_id",
-                "type": "str",
-                "description": "The section in which the subsection was found."
-            }
-        ]
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.DrugMentionAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "",
-        "attributes": [
-            {
-                "name": "status",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "confidence",
-                "type": "float",
-                "description": "The confidence of the annotation."
-            },
-            {
-                "name": "frequency",
-                "type": "str",
-                "description": "Frequency refers to how often the patient needs to take the drug. Frequency is divided into frequency number and frequency unit. E.g. twice daily"
-            },
-            {
-                "name": "frequency_begin",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "frequency_end",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "duration",
-                "type": "str",
-                "description": "Duration refers to for how long the patient is expected to take the drug. E.g. 'for 2 weeks' Strongly encouraged to use bold text"
-            },
-            {
-                "name": "duration_begin",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "duration_end",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "route",
-                "type": "str",
-                "description": "Medication route refers to the way that a drug is introduced into the body. E.g oral Strongly encouraged to use bold text"
-            },
-            {
-                "name": "route_begin",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "route_end",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "drug_change_status",
-                "type": "str",
-                "description": "Status refers to the whether the medication is currently being taken or not."
-            },
-            {
-                "name": "dosage",
-                "type": "str",
-                "description": "Dosage refers to how many of each drug the patient is taking. E.g. 5 mg"
-            },
-            {
-                "name": "dosage_begin",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "dosage_end",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "strength",
-                "type": "str",
-                "description": ""
-            },
-            {
-                "name": "strength_begin",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "strength_end",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "strength_unit",
-                "type": "str",
-                "description": ""
-            },
-            {
-                "name": "su_begin",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "su_end",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "form",
-                "type": "str",
-                "description": "Form refers to the physical appearance of the drug. E.g. cream"
-            },
-            {
-                "name": "form_begin",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "form_end",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "frequency_unit",
-                "type": "str",
-                "description": ""
-            },
-            {
-                "name": "fu_begin",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "fu_end",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "start_date",
-                "type": "str",
-                "description": ""
-            },
-            {
-                "name": "reason",
-                "type": "Dict",
-                "key_type": "str",
-                "value_type": "int"
-            },
-            {
-                "name": "change_status_begin",
-                "type": "int",
-                "description": ""
-            },
-            {
-                "name": "change_status_end",
-                "type": "int",
-                "description": ""
-            }
-        ]
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.ChunkAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "The value represents the unit portion of the drug frequency.",
-        "attributes": [
-            {
-                "name": "sentence_id",
-                "type": "str",
-                "description": ""
-            }
-        ]
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.DrugLookupWindowAnnotation",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "Similar to LookupWindowAnnotation however, these annotations are restricted to the segments/sections specified in the parameter - sectionOverrideSet - in DrugCNP2LookupWindow"
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.NegationContext",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "A span based annotation `NegationContext`, used to represent the negation context of a named entity.",
-        "attributes": [
-          {
-            "name": "polarity",
-            "type": "bool"
-          }
-        ]
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.UMLSConceptLink",
-        "parent_entry": "forte.data.ontology.top.Generics",
-        "description": "A umls concept entity, used to represent basic information of a umls concept",
-        "attributes": [
-          {
-              "name": "cui",
-              "type": "str"
-          },
-          {
-              "name": "name",
-              "type": "str"
-          },
-          {
-              "name": "definition",
-              "type": "str"
-          },
-          {
-              "name": "tuis",
-              "type": "List",
-              "item_type": "str"
-          },
-          {
-              "name": "aliases",
-              "type": "List",
-              "item_type": "str"
-          },
-          {
-              "name": "score",
-              "type": "str"
-          }
-        ]
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.MedicalEntityMention",
-        "parent_entry": "ft.onto.base_ontology.EntityMention",
-        "description": "A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain",
-        "attributes": [
-          {
-              "name": "umls_link",
-              "type": "str"
-          },
-          {
-              "name": "umls_entities",
-              "type": "List",
-              "item_type": "ftx.medical.clinical_ontology.UMLSConceptLink"
-          }
-        ]
-      },
-      {
-        "entry_name": "ftx.medical.clinical_ontology.MedicalArticle",
-        "parent_entry": "forte.data.ontology.top.Annotation",
-        "description": "An annotation which represents the whole medical text chunk/document",
-        "attributes": [
-          {
-              "name": "icd_version",
-              "type": "int",
-              "description": "The version of ICD-Coding being used."
-            },
-          {
-              "name": "icd_code",
-              "type": "str",
-              "description": "The ICD code assigned to current medical article."
-            }
-        ]
-      },
-      {
+{
+  "name": "clinical_ontology",
+  "imports": [
+    "base_ontology.json"
+  ],
+  "additional_prefixes": [
+    "ftx.medical.clinical_ontology"
+  ],
+  "definitions": [
+    {
+      "entry_name": "ftx.medical.clinical_ontology.ClinicalEntityMention",
+      "parent_entry": "ft.onto.base_ontology.EntityMention",
+      "description": "A span based annotation `ClinicalEntityMention`, normally used to represent an Entity Mention in a piece of clinical text."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.Description",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "A span based annotation `Description`, used to represent the description in a piece of clinical note."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.Body",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "A span based annotation `Body`, used to represent the actual content in a piece of clinical note."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.FrequencyAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The frequency determination for the Drug NER profile."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.DurationAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The duration determination for the Drug NER profile."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.RouteAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The route determination for the Drug NER profile.",
+      "attributes": [
+        {
+          "name": "in_take_method",
+          "type": "str"
+        }
+      ]
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.SuffixStrengthAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The suffix portion of dosage strength determination for the Drug NER profile."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.FractionStrengthAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The fraction portion of dosages strength determination for the Drug NER profile."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.RangeStrengthAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The range portion of dosages stength determination for the Drug NER profile."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.DecimalStrengthAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The decimal portion of dosages stength determination for the Drug NER profile"
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.DrugChangeStatusAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The change status of dosages determination for the Drug NER profile.",
+      "attributes": [
+        {
+          "name": "change_status",
+          "type": "str",
+          "description": "Indicates the drug change status of 'stop', 'start', 'increase', 'decrease', or 'noChange'."
+        }
+      ]
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.DosagesAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The dosage determination for the Drug NER profile."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.StrengthAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "Holds the value representing the unit of the drug dosage."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.StrengthUnitAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": ""
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.FrequencyUnitAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The value represents the unit portion of the drug frequency.",
+      "attributes": [
+        {
+          "name": "period",
+          "type": "float",
+          "description": "The periodic unit used, e.g day, month, hour, etc."
+        }
+      ]
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.FormAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The value represents the form portion of the drug mention."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.SubSectionAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "",
+      "attributes": [
+        {
+          "name": "sub_ssection_body_begin",
+          "type": "int",
+          "description": "Sub-section body begin offset."
+        },
+        {
+          "name": "sub_section_body_end",
+          "type": "int",
+          "description": "Sub-section body end offset."
+        },
+        {
+          "name": "status",
+          "type": "int",
+          "description": "Status of 'possible', 'history of', or 'family history of'."
+        },
+        {
+          "name": "sub_section_header_begin",
+          "type": "int",
+          "description": "Begin offset of subSection header"
+        },
+        {
+          "name": "sub_section_header_end",
+          "type": "int",
+          "description": "Ending offset of subsection header"
+        },
+        {
+          "name": "parent_section_id",
+          "type": "str",
+          "description": "The section in which the subsection was found."
+        }
+      ]
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.DrugMentionAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "",
+      "attributes": [
+        {
+          "name": "status",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "confidence",
+          "type": "float",
+          "description": "The confidence of the annotation."
+        },
+        {
+          "name": "frequency",
+          "type": "str",
+          "description": "Frequency refers to how often the patient needs to take the drug. Frequency is divided into frequency number and frequency unit. E.g. twice daily"
+        },
+        {
+          "name": "frequency_begin",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "frequency_end",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "duration",
+          "type": "str",
+          "description": "Duration refers to for how long the patient is expected to take the drug. E.g. 'for 2 weeks' Strongly encouraged to use bold text"
+        },
+        {
+          "name": "duration_begin",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "duration_end",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "route",
+          "type": "str",
+          "description": "Medication route refers to the way that a drug is introduced into the body. E.g oral Strongly encouraged to use bold text"
+        },
+        {
+          "name": "route_begin",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "route_end",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "drug_change_status",
+          "type": "str",
+          "description": "Status refers to the whether the medication is currently being taken or not."
+        },
+        {
+          "name": "dosage",
+          "type": "str",
+          "description": "Dosage refers to how many of each drug the patient is taking. E.g. 5 mg"
+        },
+        {
+          "name": "dosage_begin",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "dosage_end",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "strength",
+          "type": "str",
+          "description": ""
+        },
+        {
+          "name": "strength_begin",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "strength_end",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "strength_unit",
+          "type": "str",
+          "description": ""
+        },
+        {
+          "name": "su_begin",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "su_end",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "form",
+          "type": "str",
+          "description": "Form refers to the physical appearance of the drug. E.g. cream"
+        },
+        {
+          "name": "form_begin",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "form_end",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "frequency_unit",
+          "type": "str",
+          "description": ""
+        },
+        {
+          "name": "fu_begin",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "fu_end",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "start_date",
+          "type": "str",
+          "description": ""
+        },
+        {
+          "name": "reason",
+          "type": "Dict",
+          "key_type": "str",
+          "value_type": "int"
+        },
+        {
+          "name": "change_status_begin",
+          "type": "int",
+          "description": ""
+        },
+        {
+          "name": "change_status_end",
+          "type": "int",
+          "description": ""
+        }
+      ]
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.ChunkAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "The value represents the unit portion of the drug frequency.",
+      "attributes": [
+        {
+          "name": "sentence_id",
+          "type": "str",
+          "description": ""
+        }
+      ]
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.DrugLookupWindowAnnotation",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "Similar to LookupWindowAnnotation however, these annotations are restricted to the segments/sections specified in the parameter - sectionOverrideSet - in DrugCNP2LookupWindow"
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.NegationContext",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "A span based annotation `NegationContext`, used to represent the negation context of a named entity.",
+      "attributes": [
+        {
+          "name": "polarity",
+          "type": "bool"
+        }
+      ]
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.UMLSConceptLink",
+      "parent_entry": "forte.data.ontology.top.Generics",
+      "description": "A umls concept entity, used to represent basic information of a umls concept",
+      "attributes": [
+        {
+          "name": "cui",
+          "type": "str"
+        },
+        {
+          "name": "name",
+          "type": "str"
+        },
+        {
+          "name": "definition",
+          "type": "str"
+        },
+        {
+          "name": "tuis",
+          "type": "List",
+          "item_type": "str"
+        },
+        {
+          "name": "aliases",
+          "type": "List",
+          "item_type": "str"
+        },
+        {
+          "name": "score",
+          "type": "str"
+        }
+      ]
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.MedicalEntityMention",
+      "parent_entry": "ft.onto.base_ontology.EntityMention",
+      "description": "A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain",
+      "attributes": [
+        {
+          "name": "umls_link",
+          "type": "str"
+        },
+        {
+          "name": "umls_entities",
+          "type": "List",
+          "item_type": "ftx.medical.clinical_ontology.UMLSConceptLink"
+        }
+      ]
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.MedicalArticle",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "An annotation which represents the whole medical text chunk/document",
+      "attributes": [
+        {
+          "name": "icd_version",
+          "type": "int",
+          "description": "The version of ICD-Coding being used."
+        },
+        {
+          "name": "icd_code",
+          "type": "str",
+          "description": "The ICD code assigned to current medical article."
+        }
+      ]
+    },
+    {
       "entry_name": "ftx.medical.clinical_ontology.Abbreviation",
       "parent_entry": "forte.data.ontology.top.Annotation",
       "description": "A span based annotation `Abbreviation`, used to represent an abbreviated token..",
@@ -411,8 +411,8 @@
           "type": "str"
         }
       ]
-     },
-     {
+    },
+    {
       "entry_name": "ftx.medical.clinical_ontology.Hyponym",
       "parent_entry": "forte.data.ontology.top.Link",
       "description": "A `Link` type entry which represent a hyponym pair.",
@@ -425,6 +425,16 @@
       ],
       "parent_type": "ft.onto.base_ontology.Phrase",
       "child_type": "ft.onto.base_ontology.Phrase"
-     }
-    ]
-  }
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.Disease",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "A span based annotation `Diesease`, used to represent the diseases in a piece of clinical text."
+    },
+    {
+      "entry_name": "ftx.medical.clinical_ontology.Chemical",
+      "parent_entry": "forte.data.ontology.top.Annotation",
+      "description": "A span based annotation `Chemical`, used to represent the chemicals in a piece of clinical text."
+    }
+  ]
+}
\ No newline at end of file

From 266d64846aa9c348aaaa6accfd46d53f2bf1b109 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 00:52:58 +0800
Subject: [PATCH 03/38] Add NER Label Processor

---
 .../health/processors/ner_label_processor.py  | 109 ++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 fortex/health/processors/ner_label_processor.py

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
new file mode 100644
index 00000000..92dbfcd5
--- /dev/null
+++ b/fortex/health/processors/ner_label_processor.py
@@ -0,0 +1,109 @@
+# Copyright 2022 The Forte Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+NER Labeling Processor
+"""
+
+import spacy
+from typing import Dict, Set
+from forte.data.data_pack import DataPack
+from forte.processors.base import PackProcessor
+from forte.common.configuration import Config
+from forte.common.resources import Resources
+from forte.common import ProcessExecutionException
+
+
+from ftx.medical.clinical_ontology import Disease, Chemical
+
+
+__all__ = [
+    "NERLabelProcessor",
+]
+
+
+class NERLabelProcessor(PackProcessor):
+    r"""
+    Implementation of this NERLabelProcessor has been based on spaCy
+    pretained model. A rendition of it that exists on github has
+    been referred to as well.
+
+    Referred repository link:
+    https://github.com/explosion/spaCy
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.nlp = None
+
+    def initialize(self, resources: Resources, configs: Config):
+        super().initialize(resources, configs)
+        self.nlp = spacy.load("en_ner_bc5cdr_md")
+
+    def _process(self, input_pack: DataPack):
+        r"""
+        NER Label processing is based on spaCy.
+        """
+        labels = self.configs.labels
+
+        doc = input_pack.text
+
+        # Do all process.
+        if self.nlp is None:
+            raise ProcessExecutionException(
+                "The SpaCy pipeline is not initialized, maybe you "
+                "haven't called the initialization function."
+            )
+        result = self.nlp(doc)
+
+        if "disease" in labels:
+            for ent in result.ents:
+                if ent.label_ == "DISEASE":
+                    Disease(
+                        pack=input_pack,
+                        begin=ent.start_char,
+                        end=ent.end_char
+                    )
+
+        if "chemical" in labels:
+            for ent in result.ents:
+                if ent.label_ == "CHEMICAL":
+                    Chemical(
+                        pack=input_pack,
+                        begin=ent.start_char,
+                        end=ent.end_char
+                    )
+
+    @classmethod
+    def default_configs(cls):
+        r"""
+        This defines a basic config structure for `ICDCodingProcessor`.
+
+        Following are the keys for this dictionary:
+         - `labels`: ner labels
+
+        Returns: A dictionary with the default config for this processor.
+        """
+        return {
+            "labels":["disease","chemical"]
+        }
+
+    def record(self, record_meta: Dict[str, Set[str]]):
+        r"""
+
+        Args:
+            record_meta: the field in the datapack for type record that need to
+                fill in for consistency checking.
+        """
+        record_meta["ft.onto.base_ontology.Disease"] = set()
+        record_meta["ft.onto.base_ontology.Chemical"] = set()

From 3c9865e74201f2b60cc7fb0544f95eb5e0e93556 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:01:37 +0800
Subject: [PATCH 04/38] Add mimic iii reader

---
 examples/label_example/mimic3_note_reader.py | 80 ++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 examples/label_example/mimic3_note_reader.py

diff --git a/examples/label_example/mimic3_note_reader.py b/examples/label_example/mimic3_note_reader.py
new file mode 100644
index 00000000..b3f02de6
--- /dev/null
+++ b/examples/label_example/mimic3_note_reader.py
@@ -0,0 +1,80 @@
+# Copyright 2021 The Forte Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+import logging
+from pathlib import Path
+from typing import Any, Iterator, Union, List
+
+from smart_open import open
+
+from demo.clinical import Description, Body
+from forte.data.data_pack import DataPack
+from forte.data.base_reader import PackReader
+from ft.onto.base_ontology import Document
+
+
+class Mimic3DischargeNoteReader(PackReader):
+    """This class is designed to read the discharge notes from MIMIC3 dataset
+    as plain text packs.
+
+    For more information for the dataset, visit:
+      https://mimic.physionet.org/
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.headers: List[str] = []
+        self.text_col = -1  # Default to be last column.
+        self.description_col = 0  # Default to be first column.
+        self.__note_count = 0  # Count number of notes processed.
+
+    def _collect(  # type: ignore
+        self, mimic3_path: Union[Path, str]
+    ) -> Iterator[Any]:
+        with open(mimic3_path) as f:
+            for r in csv.reader(f):
+                if 0 < self.configs.max_num_notes <= self.__note_count:
+                    break
+                yield r
+
+    def _parse_pack(self, row: List[str]) -> Iterator[DataPack]:
+        if len(self.headers) == 0:
+            self.headers.extend(row)
+            for i, h in enumerate(self.headers):
+                if h == "TEXT":
+                    self.text_col = i
+                    logging.info("Text Column is %d", i)
+                if h == "DESCRIPTION":
+                    self.description_col = i
+                    logging.info("Description Column is %d", i)
+        else:
+            pack: DataPack = DataPack()
+            description: str = row[self.description_col]
+            text: str = row[self.text_col]
+            delimiter = "\n-----------------\n"
+            full_text = description + delimiter + text
+            pack.set_text(full_text)
+
+            Description(pack, 0, len(description))
+            Body(pack, len(description) + len(delimiter), len(full_text))
+            Document(pack, 0, len(pack.text))
+            self.__note_count += 1
+            yield pack
+
+    @classmethod
+    def default_configs(cls):
+        # If this is set (>0), the reader will only read up to
+        # the number specified.
+        return {'max_num_notes':-1}

From 745d3544a93b6a50c899428bb507b4a13e016a8c Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:02:07 +0800
Subject: [PATCH 05/38] Add demo

---
 examples/label_example/demo/__init__.py |  1 +
 examples/label_example/demo/clinical.py | 49 +++++++++++++++++++++++++
 2 files changed, 50 insertions(+)
 create mode 100644 examples/label_example/demo/__init__.py
 create mode 100644 examples/label_example/demo/clinical.py

diff --git a/examples/label_example/demo/__init__.py b/examples/label_example/demo/__init__.py
new file mode 100644
index 00000000..49ecbbf8
--- /dev/null
+++ b/examples/label_example/demo/__init__.py
@@ -0,0 +1 @@
+# ***automatically_generated***
diff --git a/examples/label_example/demo/clinical.py b/examples/label_example/demo/clinical.py
new file mode 100644
index 00000000..68541b46
--- /dev/null
+++ b/examples/label_example/demo/clinical.py
@@ -0,0 +1,49 @@
+# ***automatically_generated***
+# ***source json:examples/clinical_pipeline/clinical_onto.json***
+# flake8: noqa
+# mypy: ignore-errors
+# pylint: skip-file
+"""
+Automatically generated ontology clinical. Do not change manually.
+"""
+
+from dataclasses import dataclass
+from forte.data.data_pack import DataPack
+from forte.data.ontology.top import Annotation
+from ft.onto.base_ontology import EntityMention
+
+__all__ = [
+    "ClinicalEntityMention",
+    "Description",
+    "Body",
+]
+
+
+@dataclass
+class ClinicalEntityMention(EntityMention):
+    """
+    A span based annotation `ClinicalEntityMention`, normally used to represent an Entity Mention in a piece of clinical text.
+    """
+
+    def __init__(self, pack: DataPack, begin: int, end: int):
+        super().__init__(pack, begin, end)
+
+
+@dataclass
+class Description(Annotation):
+    """
+    A span based annotation `Description`, used to represent the description in a piece of clinical note.
+    """
+
+    def __init__(self, pack: DataPack, begin: int, end: int):
+        super().__init__(pack, begin, end)
+
+
+@dataclass
+class Body(Annotation):
+    """
+    A span based annotation `Body`, used to represent the actual content in a piece of clinical note.
+    """
+
+    def __init__(self, pack: DataPack, begin: int, end: int):
+        super().__init__(pack, begin, end)

From c18184ee74999595c3f7e18dd27cfc30b8cfa66b Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:23:09 +0800
Subject: [PATCH 06/38] design clinical pipeline

---
 examples/label_example/clinical_pipeline.py | 63 +++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 examples/label_example/clinical_pipeline.py

diff --git a/examples/label_example/clinical_pipeline.py b/examples/label_example/clinical_pipeline.py
new file mode 100644
index 00000000..e4644c45
--- /dev/null
+++ b/examples/label_example/clinical_pipeline.py
@@ -0,0 +1,63 @@
+import sys
+import time
+
+sys.path.insert(0,"E:\\NLP\\Forte\\ForteHealthBranches\\53\\ForteHealth")
+print(sys.path)
+
+from forte.data.data_pack import DataPack
+from forte.data.readers import PlainTextReader
+from forte.pipeline import Pipeline
+from forte.processors.writers import PackIdJsonPackWriter
+from fortex.elastic import ElasticSearchPackIndexProcessor
+from fortex.health.processors.ner_label_processor import NERLabelProcessor
+# from ner_label_processor import NERLabelProcessor
+
+from mimic3_note_reader import Mimic3DischargeNoteReader
+
+# from stave_backend.lib.stave_session import StaveSession
+
+
+def main(
+    input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1
+    ):
+
+    pl = Pipeline[DataPack]()
+    if use_mimiciii_reader == 1:
+        pl.set_reader(
+            Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
+            )
+    else:
+        pl.set_reader(PlainTextReader())
+
+    config_for_ner = {
+        "labels": ["disease", "chemical"]
+    }
+    pl.add(NERLabelProcessor(), config=config_for_ner)
+
+    pl.add(
+        ElasticSearchPackIndexProcessor(),
+        {
+            "indexer": {
+                "other_kwargs": {"refresh": True},
+            }
+        },
+    )
+    pl.add(
+        PackIdJsonPackWriter(),
+        {
+            "output_dir": output_path,
+            "indent": 2,
+            "overwrite": True,
+            "drop_record": True,
+            "zip_pack": False,
+        },
+    )
+
+    pl.initialize()
+
+    for idx, pack in enumerate(pl.process_dataset(input_path)):
+        if (idx + 1) % 50 == 0:
+            print(f"{time.strftime('%m-%d %H:%M')}: Processed {idx + 1} packs")
+
+
+main(sys.argv[1], sys.argv[2], int(sys.argv[3]), int(sys.argv[4]))

From 451c4882342576d54e4a696901ccca6cc85a1f58 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:26:23 +0800
Subject: [PATCH 07/38] Search engine and related utils

---
 examples/label_example/search_engine.py | 93 +++++++++++++++++++++++++
 examples/label_example/search_utils.py  | 57 +++++++++++++++
 examples/label_example/sqlite_utils.py  | 79 +++++++++++++++++++++
 examples/label_example/templates.py     | 22 ++++++
 4 files changed, 251 insertions(+)
 create mode 100644 examples/label_example/search_engine.py
 create mode 100644 examples/label_example/search_utils.py
 create mode 100644 examples/label_example/sqlite_utils.py
 create mode 100644 examples/label_example/templates.py

diff --git a/examples/label_example/search_engine.py b/examples/label_example/search_engine.py
new file mode 100644
index 00000000..03d58640
--- /dev/null
+++ b/examples/label_example/search_engine.py
@@ -0,0 +1,93 @@
+import sqlite3
+from typing import List
+import streamlit as st
+from forte.common.configuration import Config
+import yaml
+from elasticsearch import Elasticsearch
+from search_utils import all_search, index_search
+from sqlite_utils import create_links, sqlite_insert, get_json, update_stave_db
+import templates
+
+
+st.set_page_config(page_title="ForteHealth_Search_Engine", layout="wide")
+
+es = Elasticsearch(hosts=["http://localhost:9200/"])
+INDEX = "elastic_indexer"
+
+config = yaml.safe_load(open("stave_config.yml", "r"))
+config = Config(config, default_hparams=None)
+
+default_project_json = get_json("default_onto_project.json")
+
+base_project_id = update_stave_db(
+        default_project_json, config
+    )
+
+st.title("Search the MIMIC III Data...")
+search = st.text_input("Enter search words:")
+
+if not search:
+    records = {}
+    results = all_search(es, INDEX)
+    hits = results["hits"]["hits"]
+
+    conn = sqlite3.connect(config.Stave.stave_db_path)
+    answers = []
+    for idx, hit in enumerate(hits):
+        source = hit["_source"]
+        # The raw pack string and pack id (not database id)
+        raw_pack_str: str = source["pack_info"]
+        pack_id: str = source["doc_id"]
+
+        # Now you can write the pack into the database and generate url.
+        item = {
+                    "name": f"clinical_results_{idx}",
+                    "textPack": raw_pack_str,
+                    "project_id": base_project_id,
+                }
+
+        db_id = sqlite_insert(conn, "stave_backend_document", item)
+        answers += [db_id]
+        print(pack_id, db_id)
+
+    links: List[str] = create_links(config.Stave.url, answers)
+
+    for link in links:
+        st.write(link, unsafe_allow_html=True)
+
+if search:
+    results = index_search(es, INDEX, search)
+    hits = results["hits"]["hits"]
+
+    conn = sqlite3.connect(config.Stave.stave_db_path)
+    answers = []
+    docs = []
+    for idx, hit in enumerate(hits):
+        source = hit["_source"]
+        # The raw pack string and pack id (not database id)
+        raw_pack_str: str = source["pack_info"]
+        pack_id: str = source["doc_id"]
+        highlight = "...".join(hit["highlight"]["content"])
+        # Now you can write the pack into the database and generate url.
+        item = {
+                    "name": f"clinical_results_{idx}",
+                    "textPack": raw_pack_str,
+                    "project_id": base_project_id,
+                }
+
+        db_id = sqlite_insert(conn, "stave_backend_document", item)
+        answers += [db_id]
+
+        docs.append(highlight)
+
+    links: List[str] = create_links(config.Stave.url, answers)
+
+    for i in range(len(links)):
+        st.write(links[i], unsafe_allow_html=True)
+        st.write(
+        templates.search_result(
+            docs[i]
+            .replace("\n", " ")
+        ),
+        unsafe_allow_html=True,
+    )
diff --git a/examples/label_example/search_utils.py b/examples/label_example/search_utils.py
new file mode 100644
index 00000000..eb85da10
--- /dev/null
+++ b/examples/label_example/search_utils.py
@@ -0,0 +1,57 @@
+'''
+this file defines search functions for searching data in elasticsearch.
+'''
+
+
+def all_search(es, index: str) -> dict:
+    """
+    Args:
+        es: Elasticsearch client instance.
+        index: Name of the index we are going to use.
+        size: Number of results returned in each search.
+    """
+    # search query
+    body = {"query": {"match_all": {}}}
+
+    res = es.search(index=index, body=body)
+
+    return res
+
+
+def index_search(es, index: str, keywords: str) -> dict:
+    """
+    Args:
+        es: Elasticsearch client instance.
+        index: Name of the index we are going to use.
+        keywords: Search keywords.
+        from_i: Start index of the results for pagination.
+        size: Number of results returned in each search.
+    """
+    # search query
+    body = {
+        "query": {
+            "bool": {
+                "must": [
+                    {
+                        "query_string": {
+                            "query": keywords,
+                            "fields": ["content"],
+                            "default_operator": "AND",
+                        }
+                    }
+                ],
+            }
+        },
+        "highlight": {
+            "pre_tags": [' <font color = "#dc3023">'],
+            "post_tags": ["</font>"],
+            "fields": {"content": {}},
+        },
+        # "from": from_i,
+        # "size": size,
+        "aggs": {"match_count": {"value_count": {"field": "_id"}}},
+    }
+
+    res = es.search(index=index, body=body)
+
+    return res
diff --git a/examples/label_example/sqlite_utils.py b/examples/label_example/sqlite_utils.py
new file mode 100644
index 00000000..6cc7c036
--- /dev/null
+++ b/examples/label_example/sqlite_utils.py
@@ -0,0 +1,79 @@
+"""
+this file defines sqlite3 related utils for inserting data to the database of stave.
+"""
+import json
+from typing import List
+from stave_backend.lib.stave_session import StaveSession
+import sqlite3
+
+
+def sqlite_insert(conn, table, row):
+    """
+    Args:
+        conn: connection
+        table: table name
+        row: inserted item
+    """
+    cols: str = ", ".join('"{}"'.format(col) for col in row.keys())
+    vals: str = ", ".join(":{}".format(col) for col in row.keys())
+    sql: str = 'INSERT INTO "{0}" ({1}) VALUES ({2})'.format(table, cols, vals)
+    cursor = conn.cursor()
+    cursor.execute(sql, row)
+    conn.commit()
+    return cursor.lastrowid
+
+
+def create_links(url_stub: str, ids: List[int]) -> List[str]:
+    """
+    Args:
+        url_stub: url of stave
+        ids: the doc ids of the reports
+    """
+    links: List[str] = []
+
+    url_stub: str = url_stub.strip("/")
+    for temp_idm in ids:
+        links.append(
+            f"<a href={url_stub}/documents/{temp_idm}><font size='6'>Report #{temp_idm}</font></a>"
+        )
+    return links
+
+
+def get_json(path: str):
+    """
+    Args:
+        path: the file path of the json file
+    """
+    file_obj = open(path)
+    data = json.load(file_obj)
+    file_obj.close()
+    return data
+
+
+def update_stave_db(default_project_json, config):
+    """
+    Args:
+        default_project_json: the ontology configuration file
+        config: the configuration of Stave, including url, name, password, etc.
+    """
+    project_id_base = 0
+    with StaveSession(url=config.Stave.url) as session:
+        session.login(username=config.Stave.username, password=config.Stave.pw)
+
+        projects = session.get_project_list().json()
+        project_names = [project["name"] for project in projects]
+
+        if (
+            default_project_json["name"] in project_names
+            ):
+
+            base_project = [
+                proj
+                for proj in projects
+                if proj["name"] == default_project_json["name"]
+            ][0]
+            return base_project["id"]
+
+        resp1 = session.create_project(default_project_json)
+        project_id_base = json.loads(resp1.text)["id"]
+    return project_id_base
diff --git a/examples/label_example/templates.py b/examples/label_example/templates.py
new file mode 100644
index 00000000..63bf9aa2
--- /dev/null
+++ b/examples/label_example/templates.py
@@ -0,0 +1,22 @@
+"""
+This file defines some HTML templates
+"""
+
+
+def number_of_results(total_hits: int, duration: float) -> str:
+    """HTML scripts to display number of results and duration."""
+    return f"""
+        <div style="color:grey;font-size:95%;">
+            {total_hits} results ({duration:.2f} seconds)
+        </div><br>
+    """
+
+
+def search_result(highlights: str) -> str:
+    """HTML scripts to display search results."""
+    return f"""
+
+        <div style="font-size:100%; white-space: pre-line;">
+        {highlights}
+        </div>
+    """

From f201997991bf3a52b54d5048f9c88e965bff7790 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:26:45 +0800
Subject: [PATCH 08/38] add stave config

---
 examples/label_example/stave_config.yml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 examples/label_example/stave_config.yml

diff --git a/examples/label_example/stave_config.yml b/examples/label_example/stave_config.yml
new file mode 100644
index 00000000..f9ff6f02
--- /dev/null
+++ b/examples/label_example/stave_config.yml
@@ -0,0 +1,5 @@
+Stave:
+  stave_db_path: "C://Users//Leo//.stave//db.sqlite3"
+  url: "http://localhost:8899"
+  username: admin
+  pw: admin

From 4642690f12abcd78310cff20aca97eb3657bbea8 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:29:02 +0800
Subject: [PATCH 09/38] add stave project ontology file

---
 .../label_example/default_onto_project.json   | 751 ++++++++++++++++++
 1 file changed, 751 insertions(+)
 create mode 100644 examples/label_example/default_onto_project.json

diff --git a/examples/label_example/default_onto_project.json b/examples/label_example/default_onto_project.json
new file mode 100644
index 00000000..901ce4f1
--- /dev/null
+++ b/examples/label_example/default_onto_project.json
@@ -0,0 +1,751 @@
+{
+  "name": "clinical_pipeline_base",
+  "ontology": {
+    "name": "base_ontology",
+    "definitions": [
+      {
+        "entry_name": "ft.onto.base_ontology.Token",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation :class:`Token`, used to represent a token or a word.",
+        "attributes": [
+          {
+            "name": "pos",
+            "type": "str"
+          },
+          {
+            "name": "ud_xpos",
+            "type": "str",
+            "description": "Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html"
+          },
+          {
+            "name": "lemma",
+            "type": "str",
+            "description": "Lemma or stem of word form."
+          },
+          {
+            "name": "chunk",
+            "type": "str"
+          },
+          {
+            "name": "ner",
+            "type": "str"
+          },
+          {
+            "name": "sense",
+            "type": "str"
+          },
+          {
+            "name": "is_root",
+            "type": "bool"
+          },
+          {
+            "name": "ud_features",
+            "type": "Dict",
+            "key_type": "str",
+            "value_type": "str"
+          },
+          {
+            "name": "ud_misc",
+            "type": "Dict",
+            "key_type": "str",
+            "value_type": "str"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Subword",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "Used to represent subword tokenization results.",
+        "attributes": [
+          {
+            "name": "is_first_segment",
+            "type": "bool"
+          },
+          {
+            "name": "is_unk",
+            "type": "bool"
+          },
+          {
+            "name": "vocab_id",
+            "type": "int"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Classification",
+        "parent_entry": "forte.data.ontology.top.Generics",
+        "description": "Used to store values for classification prediction",
+        "attributes": [
+          {
+            "name": "classification_result",
+            "type": "Dict",
+            "key_type": "str",
+            "value_type": "float"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Document",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `Document`, normally used to represent a document.",
+        "attributes": [
+          {
+            "name": "document_class",
+            "type": "List",
+            "item_type": "str",
+            "description": "A list of class names that the document belongs to."
+          },
+          {
+            "name": "sentiment",
+            "type": "Dict",
+            "key_type": "str",
+            "value_type": "float"
+          },
+          {
+            "name": "classifications",
+            "type": "Dict",
+            "key_type": "str",
+            "value_type": "ft.onto.base_ontology.Classification",
+            "description": "Stores the classification results for this document. The key is the name/task of the classification, the value is an classification object storing the results."
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Sentence",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `Sentence`, normally used to represent a sentence.",
+        "attributes": [
+          {
+            "name": "speaker",
+            "type": "str"
+          },
+          {
+            "name": "part_id",
+            "type": "int"
+          },
+          {
+            "name": "sentiment",
+            "type": "Dict",
+            "key_type": "str",
+            "value_type": "float"
+          },
+          {
+            "name": "classification",
+            "type": "Dict",
+            "key_type": "str",
+            "value_type": "float"
+          },
+          {
+            "name": "classifications",
+            "type": "Dict",
+            "key_type": "str",
+            "value_type": "ft.onto.base_ontology.Classification",
+            "description": "Stores the classification results for this sentence. The key is the name/task of the classification, the value is an classification object storing the results."
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Phrase",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `Phrase`.",
+        "attributes": [
+          {
+            "name": "phrase_type",
+            "type": "str"
+          },
+          {
+            "name": "headword",
+            "type": "ft.onto.base_ontology.Token"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.UtteranceContext",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "`UtteranceContext` represents the context part in dialogue."
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Utterance",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `Utterance`, normally used to represent an utterance in dialogue.",
+        "attributes": [
+          {
+            "name": "speaker",
+            "type": "str"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.PredicateArgument",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.",
+        "attributes": [
+          {
+            "name": "ner_type",
+            "type": "str"
+          },
+          {
+            "name": "predicate_lemma",
+            "type": "str"
+          },
+          {
+            "name": "is_verb",
+            "type": "bool"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.EntityMention",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.",
+        "attributes": [
+          {
+            "name": "ner_type",
+            "type": "str"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.EventMention",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `EventMention`, used to refer to a mention of an event.",
+        "attributes": [
+          {
+            "name": "event_type",
+            "type": "str"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.PredicateMention",
+        "parent_entry": "ft.onto.base_ontology.Phrase",
+        "description": "A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.",
+        "attributes": [
+          {
+            "name": "predicate_lemma",
+            "type": "str"
+          },
+          {
+            "name": "framenet_id",
+            "type": "str"
+          },
+          {
+            "name": "is_verb",
+            "type": "bool"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.PredicateLink",
+        "parent_entry": "forte.data.ontology.top.Link",
+        "description": "A `Link` type entry which represent a semantic role link between a predicate and its argument.",
+        "attributes": [
+          {
+            "name": "arg_type",
+            "type": "str",
+            "description": "The predicate link type."
+          }
+        ],
+        "parent_type": "ft.onto.base_ontology.PredicateMention",
+        "child_type": "ft.onto.base_ontology.PredicateArgument"
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Dependency",
+        "parent_entry": "forte.data.ontology.top.Link",
+        "description": "A `Link` type entry which represent a syntactic dependency.",
+        "attributes": [
+          {
+            "name": "dep_label",
+            "type": "str",
+            "description": "The dependency label."
+          },
+          {
+            "name": "rel_type",
+            "type": "str"
+          }
+        ],
+        "parent_type": "ft.onto.base_ontology.Token",
+        "child_type": "ft.onto.base_ontology.Token"
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.EnhancedDependency",
+        "parent_entry": "forte.data.ontology.top.Link",
+        "description": "A `Link` type entry which represent a enhanced dependency: \n https://universaldependencies.org/u/overview/enhanced-syntax.html",
+        "attributes": [
+          {
+            "name": "dep_label",
+            "type": "str",
+            "description": "The enhanced dependency label in Universal Dependency."
+          }
+        ],
+        "parent_type": "ft.onto.base_ontology.Token",
+        "child_type": "ft.onto.base_ontology.Token"
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.RelationLink",
+        "parent_entry": "forte.data.ontology.top.Link",
+        "description": "A `Link` type entry which represent a relation between two entity mentions",
+        "attributes": [
+          {
+            "name": "rel_type",
+            "type": "str",
+            "description": "The type of the relation."
+          }
+        ],
+        "parent_type": "ft.onto.base_ontology.EntityMention",
+        "child_type": "ft.onto.base_ontology.EntityMention"
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.CrossDocEntityRelation",
+        "parent_entry": "forte.data.ontology.top.MultiPackLink",
+        "description": "A `Link` type entry which represent a relation between two entity mentions across the packs.",
+        "attributes": [
+          {
+            "name": "rel_type",
+            "type": "str",
+            "description": "The type of the relation."
+          }
+        ],
+        "parent_type": "ft.onto.base_ontology.EntityMention",
+        "child_type": "ft.onto.base_ontology.EntityMention"
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.CoreferenceGroup",
+        "parent_entry": "forte.data.ontology.top.Group",
+        "description": "A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.",
+        "member_type": "ft.onto.base_ontology.EntityMention"
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.EventRelation",
+        "parent_entry": "forte.data.ontology.top.Link",
+        "description": "A `Link` type entry which represent a relation between two event mentions.",
+        "attributes": [
+          {
+            "name": "rel_type",
+            "type": "str",
+            "description": "The type of the relation."
+          }
+        ],
+        "parent_type": "ft.onto.base_ontology.EventMention",
+        "child_type": "ft.onto.base_ontology.EventMention"
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.CrossDocEventRelation",
+        "parent_entry": "forte.data.ontology.top.MultiPackLink",
+        "description": "A `Link` type entry which represent a relation between two event mentions across the packs.",
+        "attributes": [
+          {
+            "name": "rel_type",
+            "type": "str",
+            "description": "The type of the relation."
+          }
+        ],
+        "parent_type": "ft.onto.base_ontology.EventMention",
+        "child_type": "ft.onto.base_ontology.EventMention"
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.ConstituentNode",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `ConstituentNode` to represent constituents in constituency parsing. This can also sentiment values annotated on the nodes.",
+        "attributes": [
+          {
+            "name": "label",
+            "type": "str"
+          },
+          {
+            "name": "sentiment",
+            "type": "Dict",
+            "key_type": "str",
+            "value_type": "float"
+          },
+          {
+            "name": "is_root",
+            "type": "bool"
+          },
+          {
+            "name": "is_leaf",
+            "type": "bool"
+          },
+          {
+            "name": "parent_node",
+            "type": "ft.onto.base_ontology.ConstituentNode"
+          },
+          {
+            "name": "children_nodes",
+            "type": "List",
+            "item_type": "ft.onto.base_ontology.ConstituentNode"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Title",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `Title`, normally used to represent a title."
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Body",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `Body`, normally used to represent a document body."
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.MCOption",
+        "parent_entry": "forte.data.ontology.top.Annotation"
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.MCQuestion",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "attributes": [
+          {
+            "name": "options",
+            "type": "List",
+            "item_type": "ft.onto.base_ontology.MCOption"
+          },
+          {
+            "name": "answers",
+            "type": "List",
+            "item_type": "int"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.MRCQuestion",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "An `Annotation` type which represents an MRC question.",
+        "attributes": [
+          {
+            "name": "qid",
+            "type": "int"
+          },
+          {
+            "name": "answers",
+            "type": "List",
+            "item_type": "ft.onto.base_ontology.Phrase"
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.Recording",
+        "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+        "description": "A span based annotation `Recording`, normally used to represent a recording.",
+        "attributes": [
+          {
+            "name": "recording_class",
+            "type": "List",
+            "item_type": "str",
+            "description": "A list of class names that the recording belongs to."
+          }
+        ]
+      },
+      {
+        "entry_name": "ft.onto.base_ontology.AudioUtterance",
+        "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+        "description": "A span based annotation `AudioUtterance`, normally used to represent an utterance in dialogue.",
+        "attributes": [
+          {
+            "name": "speaker",
+            "type": "str"
+          }
+        ]
+      },
+      {
+        "entry_name": "ftx.medical.clinical_ontology.NegationContext",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `NegationContext`, used to represent the negation context of a named entity.",
+        "attributes": [
+          {
+            "name": "polarity",
+            "type": "bool"
+          }
+        ]
+      },
+      {
+        "entry_name": "ftx.medical.clinical_ontology.MedicalEntityMention",
+        "parent_entry": "ft.onto.base_ontology.EntityMention",
+        "description": "A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain",
+        "attributes": [
+          {
+            "name": "umls_link",
+            "type": "str"
+          },
+          {
+            "name": "umls_entities",
+            "type": "List",
+            "item_type": "ftx.medical.clinical_ontology.UMLSConceptLink"
+          }
+        ]
+      },
+      {
+        "entry_name": "ftx.medical.clinical_ontology.MedicalArticle",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "An annotation based representation for the whole medical text chunk/document",
+        "attributes": [
+          {
+            "name": "icd_version",
+            "type": "int",
+            "description": "The version of ICD-Coding being used."
+          },
+          {
+            "name": "icd_code",
+            "type": "str",
+            "description": "The ICD code assigned to current medical article."
+          }
+        ]
+      },
+      {
+        "entry_name": "ftx.medical.clinical_ontology.Disease",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `Disease`, used to represent the diseases in a piece of clinical text."
+      },
+      {
+        "entry_name": "ftx.medical.clinical_ontology.Chemical",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `Chemical`, used to represent the chemical in a piece of clinical text."
+      }
+    ]
+  },
+  "config": {
+    "legendConfigs": {
+      "ft.onto.base_ontology.Token": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {
+          "pos": false,
+          "ud_xpos": false,
+          "lemma": false,
+          "chunk": false,
+          "ner": false,
+          "sense": false
+        }
+      },
+      "ft.onto.base_ontology.Subword": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {}
+      },
+      "ft.onto.base_ontology.Classification": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {}
+      },
+      "ft.onto.base_ontology.Document": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {}
+      },
+      "ft.onto.base_ontology.Sentence": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {
+          "speaker": false
+        }
+      },
+      "ft.onto.base_ontology.Phrase": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {
+          "phrase_type": false
+        }
+      },
+      "ft.onto.base_ontology.UtteranceContext": {
+        "is_selected": false,
+        "is_shown": false
+      },
+      "ft.onto.base_ontology.Utterance": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "speaker": false
+        }
+      },
+      "ft.onto.base_ontology.PredicateArgument": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "ner_type": false,
+          "predicate_lemma": false
+        }
+      },
+      "ft.onto.base_ontology.EntityMention": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {
+          "ner_type": false
+        }
+      },
+      "ft.onto.base_ontology.EventMention": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {
+          "event_type": false
+        }
+      },
+      "ft.onto.base_ontology.PredicateMention": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {
+          "predicate_lemma": false,
+          "framenet_id": false
+        }
+      },
+      "ft.onto.base_ontology.PredicateLink": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "arg_type": false
+        }
+      },
+      "ft.onto.base_ontology.Dependency": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "dep_label": false,
+          "rel_type": false
+        }
+      },
+      "ft.onto.base_ontology.EnhancedDependency": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "dep_label": false
+        }
+      },
+      "ft.onto.base_ontology.RelationLink": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {
+          "rel_type": false
+        }
+      },
+      "ft.onto.base_ontology.CrossDocEntityRelation": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "rel_type": false
+        }
+      },
+      "ft.onto.base_ontology.CoreferenceGroup": {
+        "is_selected": false,
+        "is_shown": false
+      },
+      "ft.onto.base_ontology.EventRelation": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "rel_type": false
+        }
+      },
+      "ft.onto.base_ontology.CrossDocEventRelation": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "rel_type": false
+        }
+      },
+      "ft.onto.base_ontology.ConstituentNode": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "label": false
+        }
+      },
+      "ft.onto.base_ontology.Title": {
+        "is_selected": false,
+        "is_shown": false
+      },
+      "ft.onto.base_ontology.Body": {
+        "is_selected": false,
+        "is_shown": false
+      },
+      "ft.onto.base_ontology.MCOption": {
+        "is_selected": false,
+        "is_shown": false
+      },
+      "ft.onto.base_ontology.MCQuestion": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {}
+      },
+      "ft.onto.base_ontology.MRCQuestion": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {}
+      },
+      "ft.onto.base_ontology.Recording": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {}
+      },
+      "ft.onto.base_ontology.AudioUtterance": {
+        "is_selected": false,
+        "is_shown": false,
+        "attributes": {
+          "speaker": false
+        }
+      },
+      "ftx.medical.clinical_ontology.NegationContext": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {}
+      },
+      "ftx.medical.clinical_ontology.MedicalEntityMention": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {
+          "umls_link": false
+        }
+      },
+      "ftx.medical.clinical_ontology.MedicalArticle": {
+        "is_selected": false,
+        "is_shown": true,
+        "attributes": {
+          "icd_code": false
+        }
+      },
+      "ftx.medical.clinical_ontology.Disease": {
+        "is_selected": false,
+        "is_shown": true
+      },
+      "ftx.medical.clinical_ontology.Chemical": {
+        "is_selected": false,
+        "is_shown": true
+      }
+    },
+    "scopeConfigs": {
+      "ft.onto.base_ontology.Token": false,
+      "ft.onto.base_ontology.Subword": false,
+      "ft.onto.base_ontology.Document": false,
+      "ft.onto.base_ontology.Sentence": false,
+      "ft.onto.base_ontology.Phrase": false,
+      "ft.onto.base_ontology.UtteranceContext": false,
+      "ft.onto.base_ontology.Utterance": false,
+      "ft.onto.base_ontology.PredicateArgument": false,
+      "ft.onto.base_ontology.EntityMention": false,
+      "ft.onto.base_ontology.EventMention": false,
+      "ft.onto.base_ontology.PredicateMention": false,
+      "ft.onto.base_ontology.ConstituentNode": false,
+      "ft.onto.base_ontology.Title": false,
+      "ft.onto.base_ontology.Body": false,
+      "ft.onto.base_ontology.MCOption": false,
+      "ft.onto.base_ontology.MCQuestion": false,
+      "ft.onto.base_ontology.MRCQuestion": false,
+      "ftx.medical.clinical_ontology.NegationContext": false,
+      "ftx.medical.clinical_ontology.MedicalEntityMention": false,
+      "ftx.medical.clinical_ontology.MedicalArticle": false,
+      "ftx.medical.clinical_ontology.Disease": false,
+      "ftx.medical.clinical_ontology.Chemical": false
+    },
+    "layoutConfigs": {
+      "center-middle": "default-nlp",
+      "left": "default-meta",
+      "right": "default-attribute",
+      "center-bottom": "disable"
+    },
+    "remoteConfigs": {
+      "pipelineUrl": "",
+      "doValidation": false,
+      "expectedName": "",
+      "inputFormat": "string",
+      "expectedRecords": {}
+    }
+  }
+}
\ No newline at end of file

From f6dea81cd070a4652727b70d6c037bc20bf7d20d Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 01:48:34 +0800
Subject: [PATCH 10/38] add some CRUD to temporally fix a bug

---
 examples/label_example/sqlite_utils.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/examples/label_example/sqlite_utils.py b/examples/label_example/sqlite_utils.py
index 6cc7c036..3d223078 100644
--- a/examples/label_example/sqlite_utils.py
+++ b/examples/label_example/sqlite_utils.py
@@ -2,8 +2,10 @@
 this file defines sqlite3 related utils for inserting data to the database of stave.
 """
 import json
+import yaml
 from typing import List
 from stave_backend.lib.stave_session import StaveSession
+from forte.common import Config
 import sqlite3
 
 
@@ -76,4 +78,23 @@ def update_stave_db(default_project_json, config):
 
         resp1 = session.create_project(default_project_json)
         project_id_base = json.loads(resp1.text)["id"]
+
+        config = yaml.safe_load(open("stave_config.yml", "r"))
+        config = Config(config, default_hparams=None)
+        con = sqlite3.connect(config.Stave.stave_db_path)
+
+        cursorObj = con.cursor()
+        cursorObj.execute('SELECT ontology, config FROM stave_backend_project WHERE id = {0}'.format(project_id_base))
+        results = cursorObj.fetchall()
+        onto = results[0][0]
+        conf = results[0][1]
+
+        onto_new = onto.replace("\'","\"")
+        conf_new = conf.replace("\'", "\"").replace("True", "true").replace("False", "false")
+
+        cursorObj.execute("UPDATE stave_backend_project SET ontology ='" + onto_new + "' WHERE id = {0}".format(project_id_base))
+        cursorObj.execute("UPDATE stave_backend_project SET config ='" + conf_new + "' WHERE id = {0}".format(project_id_base))
+
+        con.commit()
+
     return project_id_base

From 62328123008fdcb31797961734992aa1c423b4fc Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:20:16 +0800
Subject: [PATCH 11/38] black and pylint

---
 examples/label_example/clinical_pipeline.py | 21 +++----------
 examples/label_example/search_engine.py     | 31 ++++++++----------
 examples/label_example/sqlite_utils.py      | 35 ++++++++++++++-------
 3 files changed, 40 insertions(+), 47 deletions(-)

diff --git a/examples/label_example/clinical_pipeline.py b/examples/label_example/clinical_pipeline.py
index e4644c45..43047d7f 100644
--- a/examples/label_example/clinical_pipeline.py
+++ b/examples/label_example/clinical_pipeline.py
@@ -1,37 +1,24 @@
 import sys
 import time
 
-sys.path.insert(0,"E:\\NLP\\Forte\\ForteHealthBranches\\53\\ForteHealth")
-print(sys.path)
-
 from forte.data.data_pack import DataPack
 from forte.data.readers import PlainTextReader
 from forte.pipeline import Pipeline
 from forte.processors.writers import PackIdJsonPackWriter
+from mimic3_note_reader import Mimic3DischargeNoteReader
 from fortex.elastic import ElasticSearchPackIndexProcessor
 from fortex.health.processors.ner_label_processor import NERLabelProcessor
-# from ner_label_processor import NERLabelProcessor
-
-from mimic3_note_reader import Mimic3DischargeNoteReader
-
-# from stave_backend.lib.stave_session import StaveSession
 
 
-def main(
-    input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1
-    ):
+def main(input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1):
 
     pl = Pipeline[DataPack]()
     if use_mimiciii_reader == 1:
-        pl.set_reader(
-            Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
-            )
+        pl.set_reader(Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs})
     else:
         pl.set_reader(PlainTextReader())
 
-    config_for_ner = {
-        "labels": ["disease", "chemical"]
-    }
+    config_for_ner = {"labels": ["disease", "chemical"]}
     pl.add(NERLabelProcessor(), config=config_for_ner)
 
     pl.add(
diff --git a/examples/label_example/search_engine.py b/examples/label_example/search_engine.py
index 03d58640..b1be57c2 100644
--- a/examples/label_example/search_engine.py
+++ b/examples/label_example/search_engine.py
@@ -19,9 +19,7 @@
 
 default_project_json = get_json("default_onto_project.json")
 
-base_project_id = update_stave_db(
-        default_project_json, config
-    )
+base_project_id = update_stave_db(default_project_json, config)
 
 st.title("Search the MIMIC III Data...")
 search = st.text_input("Enter search words:")
@@ -41,10 +39,10 @@
 
         # Now you can write the pack into the database and generate url.
         item = {
-                    "name": f"clinical_results_{idx}",
-                    "textPack": raw_pack_str,
-                    "project_id": base_project_id,
-                }
+            "name": f"clinical_results_{idx}",
+            "textPack": raw_pack_str,
+            "project_id": base_project_id,
+        }
 
         db_id = sqlite_insert(conn, "stave_backend_document", item)
         answers += [db_id]
@@ -70,10 +68,10 @@
         highlight = "...".join(hit["highlight"]["content"])
         # Now you can write the pack into the database and generate url.
         item = {
-                    "name": f"clinical_results_{idx}",
-                    "textPack": raw_pack_str,
-                    "project_id": base_project_id,
-                }
+            "name": f"clinical_results_{idx}",
+            "textPack": raw_pack_str,
+            "project_id": base_project_id,
+        }
 
         db_id = sqlite_insert(conn, "stave_backend_document", item)
         answers += [db_id]
@@ -82,12 +80,9 @@
 
     links: List[str] = create_links(config.Stave.url, answers)
 
-    for i in range(len(links)):
+    for i, _ in enumerate(links):
         st.write(links[i], unsafe_allow_html=True)
         st.write(
-        templates.search_result(
-            docs[i]
-            .replace("\n", " ")
-        ),
-        unsafe_allow_html=True,
-    )
+            templates.search_result(docs[i].replace("\n", " ")),
+            unsafe_allow_html=True,
+        )
diff --git a/examples/label_example/sqlite_utils.py b/examples/label_example/sqlite_utils.py
index 3d223078..da82e0ed 100644
--- a/examples/label_example/sqlite_utils.py
+++ b/examples/label_example/sqlite_utils.py
@@ -1,12 +1,13 @@
 """
-this file defines sqlite3 related utils for inserting data to the database of stave.
+this file defines sqlite3 related utils for inserting data to
+the database of stave.
 """
 import json
-import yaml
 from typing import List
+import sqlite3
+import yaml
 from stave_backend.lib.stave_session import StaveSession
 from forte.common import Config
-import sqlite3
 
 
 def sqlite_insert(conn, table, row):
@@ -18,7 +19,7 @@ def sqlite_insert(conn, table, row):
     """
     cols: str = ", ".join('"{}"'.format(col) for col in row.keys())
     vals: str = ", ".join(":{}".format(col) for col in row.keys())
-    sql: str = 'INSERT INTO "{0}" ({1}) VALUES ({2})'.format(table, cols, vals)
+    sql: str = f'INSERT INTO "{table}" ({cols}) VALUES ({vals})'
     cursor = conn.cursor()
     cursor.execute(sql, row)
     conn.commit()
@@ -65,9 +66,7 @@ def update_stave_db(default_project_json, config):
         projects = session.get_project_list().json()
         project_names = [project["name"] for project in projects]
 
-        if (
-            default_project_json["name"] in project_names
-            ):
+        if default_project_json["name"] in project_names:
 
             base_project = [
                 proj
@@ -84,16 +83,28 @@ def update_stave_db(default_project_json, config):
         con = sqlite3.connect(config.Stave.stave_db_path)
 
         cursorObj = con.cursor()
-        cursorObj.execute('SELECT ontology, config FROM stave_backend_project WHERE id = {0}'.format(project_id_base))
+        cursorObj.execute(
+            f"SELECT ontology, config FROM stave_backend_project WHERE id = {project_id_base}"
+        )
         results = cursorObj.fetchall()
         onto = results[0][0]
         conf = results[0][1]
 
-        onto_new = onto.replace("\'","\"")
-        conf_new = conf.replace("\'", "\"").replace("True", "true").replace("False", "false")
+        onto_new = onto.replace("'", '"')
+        conf_new = (
+            conf.replace("'", '"').replace("True", "true").replace("False", "false")
+        )
 
-        cursorObj.execute("UPDATE stave_backend_project SET ontology ='" + onto_new + "' WHERE id = {0}".format(project_id_base))
-        cursorObj.execute("UPDATE stave_backend_project SET config ='" + conf_new + "' WHERE id = {0}".format(project_id_base))
+        cursorObj.execute(
+            "UPDATE stave_backend_project SET ontology ='"
+            + onto_new
+            + f"' WHERE id = {project_id_base}"
+        )
+        cursorObj.execute(
+            "UPDATE stave_backend_project SET config ='"
+            + conf_new
+            + f"' WHERE id = {project_id_base}"
+        )
 
         con.commit()
 

From a8e880368cd0269e22c8c1dbb3286590f2dc701a Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:20:59 +0800
Subject: [PATCH 12/38] solve pylint issues

---
 fortex/health/processors/ner_label_processor.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 92dbfcd5..3e52f357 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -14,9 +14,8 @@
 """
 NER Labeling Processor
 """
-
-import spacy
 from typing import Dict, Set
+import spacy
 from forte.data.data_pack import DataPack
 from forte.processors.base import PackProcessor
 from forte.common.configuration import Config
@@ -94,9 +93,7 @@ def default_configs(cls):
 
         Returns: A dictionary with the default config for this processor.
         """
-        return {
-            "labels":["disease","chemical"]
-        }
+        return {"labels": ["disease", "chemical"]}
 
     def record(self, record_meta: Dict[str, Set[str]]):
         r"""

From e5418512e0080087431c3768fae88e20d1924e45 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:21:15 +0800
Subject: [PATCH 13/38] normalize the config

---
 examples/label_example/stave_config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/label_example/stave_config.yml b/examples/label_example/stave_config.yml
index f9ff6f02..1b5e0c7f 100644
--- a/examples/label_example/stave_config.yml
+++ b/examples/label_example/stave_config.yml
@@ -1,5 +1,5 @@
 Stave:
-  stave_db_path: "C://Users//Leo//.stave//db.sqlite3"
+  stave_db_path: "$HOME//.stave//db.sqlite3"
   url: "http://localhost:8899"
   username: admin
   pw: admin

From 9c7d6e2d8b9c474d32a67b9a91ebb32fedc04128 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:29:34 +0800
Subject: [PATCH 14/38] solve black issue

---
 fortex/health/processors/ner_label_processor.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 3e52f357..939cae69 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -69,18 +69,14 @@ def _process(self, input_pack: DataPack):
             for ent in result.ents:
                 if ent.label_ == "DISEASE":
                     Disease(
-                        pack=input_pack,
-                        begin=ent.start_char,
-                        end=ent.end_char
+                        pack=input_pack, begin=ent.start_char, end=ent.end_char
                     )
 
         if "chemical" in labels:
             for ent in result.ents:
                 if ent.label_ == "CHEMICAL":
                     Chemical(
-                        pack=input_pack,
-                        begin=ent.start_char,
-                        end=ent.end_char
+                        pack=input_pack, begin=ent.start_char, end=ent.end_char
                     )
 
     @classmethod

From c7ea7e16c298057e3da63fac7a6f9272ee8ad6f9 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:36:10 +0800
Subject: [PATCH 15/38] solve pylint issue

---
 examples/label_example/clinical_pipeline.py  | 14 +++-
 examples/label_example/mimic3_note_reader.py | 80 --------------------
 2 files changed, 11 insertions(+), 83 deletions(-)
 delete mode 100644 examples/label_example/mimic3_note_reader.py

diff --git a/examples/label_example/clinical_pipeline.py b/examples/label_example/clinical_pipeline.py
index 43047d7f..3d4939b3 100644
--- a/examples/label_example/clinical_pipeline.py
+++ b/examples/label_example/clinical_pipeline.py
@@ -5,16 +5,24 @@
 from forte.data.readers import PlainTextReader
 from forte.pipeline import Pipeline
 from forte.processors.writers import PackIdJsonPackWriter
-from mimic3_note_reader import Mimic3DischargeNoteReader
+from fortex.health.readers import Mimic3DischargeNoteReader
 from fortex.elastic import ElasticSearchPackIndexProcessor
 from fortex.health.processors.ner_label_processor import NERLabelProcessor
 
 
-def main(input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1):
+def main(
+    input_path: str,
+    output_path: str,
+    max_packs: int = -1,
+    use_mimiciii_reader=1
+    ):
 
     pl = Pipeline[DataPack]()
     if use_mimiciii_reader == 1:
-        pl.set_reader(Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs})
+        pl.set_reader(
+            Mimic3DischargeNoteReader(),
+            config={"max_num_notes": max_packs}
+        )
     else:
         pl.set_reader(PlainTextReader())
 
diff --git a/examples/label_example/mimic3_note_reader.py b/examples/label_example/mimic3_note_reader.py
deleted file mode 100644
index b3f02de6..00000000
--- a/examples/label_example/mimic3_note_reader.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright 2021 The Forte Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import csv
-import logging
-from pathlib import Path
-from typing import Any, Iterator, Union, List
-
-from smart_open import open
-
-from demo.clinical import Description, Body
-from forte.data.data_pack import DataPack
-from forte.data.base_reader import PackReader
-from ft.onto.base_ontology import Document
-
-
-class Mimic3DischargeNoteReader(PackReader):
-    """This class is designed to read the discharge notes from MIMIC3 dataset
-    as plain text packs.
-
-    For more information for the dataset, visit:
-      https://mimic.physionet.org/
-    """
-
-    def __init__(self):
-        super().__init__()
-        self.headers: List[str] = []
-        self.text_col = -1  # Default to be last column.
-        self.description_col = 0  # Default to be first column.
-        self.__note_count = 0  # Count number of notes processed.
-
-    def _collect(  # type: ignore
-        self, mimic3_path: Union[Path, str]
-    ) -> Iterator[Any]:
-        with open(mimic3_path) as f:
-            for r in csv.reader(f):
-                if 0 < self.configs.max_num_notes <= self.__note_count:
-                    break
-                yield r
-
-    def _parse_pack(self, row: List[str]) -> Iterator[DataPack]:
-        if len(self.headers) == 0:
-            self.headers.extend(row)
-            for i, h in enumerate(self.headers):
-                if h == "TEXT":
-                    self.text_col = i
-                    logging.info("Text Column is %d", i)
-                if h == "DESCRIPTION":
-                    self.description_col = i
-                    logging.info("Description Column is %d", i)
-        else:
-            pack: DataPack = DataPack()
-            description: str = row[self.description_col]
-            text: str = row[self.text_col]
-            delimiter = "\n-----------------\n"
-            full_text = description + delimiter + text
-            pack.set_text(full_text)
-
-            Description(pack, 0, len(description))
-            Body(pack, len(description) + len(delimiter), len(full_text))
-            Document(pack, 0, len(pack.text))
-            self.__note_count += 1
-            yield pack
-
-    @classmethod
-    def default_configs(cls):
-        # If this is set (>0), the reader will only read up to
-        # the number specified.
-        return {'max_num_notes':-1}

From 557b8276bc1fbf4806ccf3e9afb6520715a9bc8a Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 02:40:24 +0800
Subject: [PATCH 16/38] solve pylint issue: import itself

---
 fortex/health/readers/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fortex/health/readers/__init__.py b/fortex/health/readers/__init__.py
index 076a48e7..d3745f4b 100644
--- a/fortex/health/readers/__init__.py
+++ b/fortex/health/readers/__init__.py
@@ -11,5 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from fortex.health.readers.mimic3_note_reader import *

From 2e2dcd50676d84d04c0c80f0a130fb12ba9d9fa4 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 03:20:08 +0800
Subject: [PATCH 17/38] add a README file

---
 examples/label_example/README.md | 89 ++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 examples/label_example/README.md

diff --git a/examples/label_example/README.md b/examples/label_example/README.md
new file mode 100644
index 00000000..dbcb9bee
--- /dev/null
+++ b/examples/label_example/README.md
@@ -0,0 +1,89 @@
+## NER Label Example
+
+This example shows how we start a search engine in streamlit and link the search results to stave.
+
+## Install extra dependencies
+
+To install from PyPI,
+```bash
+pip install forte.elastic
+pip install forte.health
+pip install stave
+pip install streamlit
+```
+
+## Download spaCy model
+
+run the following command to download the model
+```bash
+pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz
+```
+
+## Set up the configuration
+Before run Elastic Searcher and Stave, we need to ensure that the current configuration is compatible with the environment of our computer.
+
+Please check and change the following configurations in `stave_config.yml`:
+
+1. Ensure `Stave.stave_db_path` is the correct path -> `$Home/.stave`, e.g.,  `"/home/name/.stave"`.
+2. Ensure `Stave.username` and `Stave.pw`is `"admin"` and `"admin"`.
+
+## Prepare elastic searcher
+Download corresponding elasticsearch archive from https://www.elastic.co/downloads/past-releases/elasticsearch-7-17-2, unzip it and run `elasticsearch-7-17-2/bin/elasticsearch` to start the service. 
+
+Run the following to check if elasticsearch is running properly:
+```bash
+curl -XGET localhost:9200/_cluster/health?pretty
+```
+
+Make sure you create index 'elastic_indexer' in the cluster before working with this example, you can run the following command:
+```bash
+curl -X PUT localhost:9200/elastic_indexer
+```
+
+You can also follow the online blog for more information:
+
+https://www.elastic.co/guide/en/elasticsearch/reference/current/starting-elasticsearch.html
+
+## Run pipeline
+First, you should start an Elastic Indexer backend.
+
+Now, open a terminal. You can run the following command to parse some files and index them.
+```bash
+python clinical__pipeline.py path_to_mimiciii/1.4/NOTEEVENTS.csv.gz path_to_mimiciii_output 10 1
+```
+
+Here, we write out the raw data pack to `/path_to_sample_output`, and only index the first 10 notes. You can change the number to whatever you want in the above command.
+
+Also, we write the data into elasticsearch. You can run the command line to check whether the 10 notes are written into your database:
+
+```bash
+curl -X GET localhost:9200/elastic_indexer/_search
+```
+
+## Run indexer and Stave
+Again, you should start an Elastic Indexer backend.
+
+Then, to start the Stave server that our pipeline will connect to for visualization purposes, run
+```bash
+stave -s start -o -l -n 8899
+```
+Then, login with username (admin) and password (admin).
+
+Here, you need to make sure `Stave.url` in `stave_config.yml` is `"http://localhost:8899"`. Or you can change the port 8899 to any port you like.
+
+## Run streamlit
+
+To run streamlit, the python version should be >= 3.7.2. 
+
+Now, open the terminal. Run the following command to start the streamlit.
+```bash
+streamlit run search_engine.py
+```
+
+Now open  `http://localhost:8501` on your browser to access the streamlit interface.
+
+Next, you will see the reports shown on the interface. You can also search with the search engine.
+
+Click the report with link, it will link to Stave, the visualization and annotation page.
+
+Click the radio (Disease and Chemical) on the sidebar,  you can see the annotations on the UI.

From 8ba0d6f527a63d264922f5c6dd7bef7d27f0a874 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 03:56:10 +0800
Subject: [PATCH 18/38] add ner label test

---
 .../processors/ner_label_processor_test.py    | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 tests/fortex/health/processors/ner_label_processor_test.py

diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
new file mode 100644
index 00000000..a91f1356
--- /dev/null
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -0,0 +1,62 @@
+# Copyright 2022 The Forte Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Unit tests for ICDCodingProcessor
+"""
+
+import unittest
+
+from ddt import data, ddt
+from forte.data.data_pack import DataPack
+from forte.data.readers import StringReader
+from forte.pipeline import Pipeline
+from fortex.health.processors.ner_label_processor import NERLabelProcessor
+from ftx.medical.clinical_ontology import Chemical, Disease
+
+
+@ddt
+class TestNERLabelProcessor(unittest.TestCase):
+
+    @data(
+        "He got cancer, and he needs oxygen."
+    )
+    def test_ner_label_processor(self, input_data):
+        self.nlp = Pipeline[DataPack]()
+        self.nlp.set_reader(StringReader())
+        config = {
+            "labels": ["disease", "chemical"]
+        }
+
+        self.nlp.add(NERLabelProcessor(), config=config)
+        self.nlp.initialize()
+        pack = self.nlp.process(input_data)
+
+        exp_disease = ["cancer"]
+        disease = []
+
+        for idx, d in enumerate(pack.get(Disease)):
+            disease.append(d.text)
+
+        assert exp_disease == disease
+
+        exp_chemical = ["oxygen"]
+        chemical = []
+        for idx, c in enumerate(pack.get(Chemical)):
+            chemical.append(c.text)
+
+        assert exp_chemical == chemical
+
+
+if __name__ == "__main__":
+    unittest.main()

From 555610bcb0e52b9f8e51fafdebee0be00d173fc2 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:01:52 +0800
Subject: [PATCH 19/38] solve black issue

---
 tests/fortex/health/processors/ner_label_processor_test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index a91f1356..dc24cc97 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -21,7 +21,9 @@
 from forte.data.data_pack import DataPack
 from forte.data.readers import StringReader
 from forte.pipeline import Pipeline
-from fortex.health.processors.ner_label_processor import NERLabelProcessor
+from fortex.health.processors.ner_label_processor import (
+    NERLabelProcessor
+)
 from ftx.medical.clinical_ontology import Chemical, Disease
 
 

From 94ae23049d56ed7ae37d4192fa5766f4a197741f Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:06:05 +0800
Subject: [PATCH 20/38] black check

---
 .../health/processors/ner_label_processor_test.py   | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index dc24cc97..b89b5806 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -21,24 +21,17 @@
 from forte.data.data_pack import DataPack
 from forte.data.readers import StringReader
 from forte.pipeline import Pipeline
-from fortex.health.processors.ner_label_processor import (
-    NERLabelProcessor
-)
+from fortex.health.processors.ner_label_processor import NERLabelProcessor
 from ftx.medical.clinical_ontology import Chemical, Disease
 
 
 @ddt
 class TestNERLabelProcessor(unittest.TestCase):
-
-    @data(
-        "He got cancer, and he needs oxygen."
-    )
+    @data("He got cancer, and he needs oxygen.")
     def test_ner_label_processor(self, input_data):
         self.nlp = Pipeline[DataPack]()
         self.nlp.set_reader(StringReader())
-        config = {
-            "labels": ["disease", "chemical"]
-        }
+        config = {"labels": ["disease", "chemical"]}
 
         self.nlp.add(NERLabelProcessor(), config=config)
         self.nlp.initialize()

From e21b7a2b93ed95d1fc4ca590983ccd2e03f59480 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:11:44 +0800
Subject: [PATCH 21/38] remove main

---
 tests/fortex/health/processors/ner_label_processor_test.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index b89b5806..23a7bbe0 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -51,7 +51,3 @@ def test_ner_label_processor(self, input_data):
             chemical.append(c.text)
 
         assert exp_chemical == chemical
-
-
-if __name__ == "__main__":
-    unittest.main()

From 42b2b063a0f8f4092a6eb2cb5708ec7f980e3a18 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:28:52 +0800
Subject: [PATCH 22/38] add set_up

---
 fortex/health/processors/ner_label_processor.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 939cae69..0812bcf1 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -15,6 +15,9 @@
 NER Labeling Processor
 """
 from typing import Dict, Set
+import subprocess
+import sys
+import os
 import spacy
 from forte.data.data_pack import DataPack
 from forte.processors.base import PackProcessor
@@ -45,6 +48,13 @@ def __init__(self):
         super().__init__()
         self.nlp = None
 
+    def set_up(self):
+        download_url = "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz"
+        command = [sys.executable, "-m", "pip", "install"] + [download_url]
+        subprocess.run(
+            command, env=os.environ.copy(), encoding="utf8", check=False
+        )
+
     def initialize(self, resources: Resources, configs: Config):
         super().initialize(resources, configs)
         self.nlp = spacy.load("en_ner_bc5cdr_md")

From 7015b8412cb84142048dcfa2209c7ff20a8a78b3 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:30:40 +0800
Subject: [PATCH 23/38] shorten the string

---
 fortex/health/processors/ner_label_processor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 0812bcf1..97b6ebd6 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -49,7 +49,8 @@ def __init__(self):
         self.nlp = None
 
     def set_up(self):
-        download_url = "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz"
+        download_url = """https://s3-us-west-2.amazonaws.com/
+        ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz"""
         command = [sys.executable, "-m", "pip", "install"] + [download_url]
         subprocess.run(
             command, env=os.environ.copy(), encoding="utf8", check=False

From 560631f53a40be5eee10dc8e99b51ff9e52328e2 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:42:20 +0800
Subject: [PATCH 24/38] fix test bug

---
 fortex/health/processors/ner_label_processor.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 97b6ebd6..a25cb253 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -14,11 +14,13 @@
 """
 NER Labeling Processor
 """
+import importlib
 from typing import Dict, Set
 import subprocess
 import sys
 import os
 import spacy
+from spacy.cli.download import download
 from forte.data.data_pack import DataPack
 from forte.processors.base import PackProcessor
 from forte.common.configuration import Config
@@ -49,12 +51,7 @@ def __init__(self):
         self.nlp = None
 
     def set_up(self):
-        download_url = """https://s3-us-west-2.amazonaws.com/
-        ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz"""
-        command = [sys.executable, "-m", "pip", "install"] + [download_url]
-        subprocess.run(
-            command, env=os.environ.copy(), encoding="utf8", check=False
-        )
+        download("en_ner_bc5cdr_md")
 
     def initialize(self, resources: Resources, configs: Config):
         super().initialize(resources, configs)

From b83e4aefa80a96f74faefe8b8261cfbf92a8fda0 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:46:25 +0800
Subject: [PATCH 25/38] remove unused import

---
 fortex/health/processors/ner_label_processor.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index a25cb253..1bd37df2 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -14,11 +14,8 @@
 """
 NER Labeling Processor
 """
-import importlib
+
 from typing import Dict, Set
-import subprocess
-import sys
-import os
 import spacy
 from spacy.cli.download import download
 from forte.data.data_pack import DataPack

From 3634a07a36ad61de8fc36e172e1321be486802d4 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 04:53:01 +0800
Subject: [PATCH 26/38] fix pytest issue

---
 fortex/health/processors/ner_label_processor.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 1bd37df2..fffe00da 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -47,11 +47,9 @@ def __init__(self):
         super().__init__()
         self.nlp = None
 
-    def set_up(self):
-        download("en_ner_bc5cdr_md")
-
     def initialize(self, resources: Resources, configs: Config):
         super().initialize(resources, configs)
+        download("en_ner_bc5cdr_md")
         self.nlp = spacy.load("en_ner_bc5cdr_md")
 
     def _process(self, input_pack: DataPack):

From 0ccdfa44b5591cb5fe9c658b12cb93b147be591e Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 05:03:40 +0800
Subject: [PATCH 27/38] fix pytest bug

---
 .../health/processors/ner_label_processor.py  | 46 ++++++++++++++++++-
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index fffe00da..970a0b1b 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -32,6 +32,49 @@
     "NERLabelProcessor",
 ]
 
+CUSTOM_SPACYMODEL_URL = {
+    "en_core_sci_sm": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+    "/releases/v0.3.0/en_core_sci_sm-0.3.0.tar.gz",
+    "en_core_sci_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+    "/releases/v0.3.0/en_core_sci_md-0.3.0.tar.gz",
+    "en_core_sci_lg": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+    "/releases/v0.3.0/en_core_sci_lg-0.3.0.tar.gz",
+    "en_ner_craft_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+    "/releases/v0.3.0/en_ner_craft_md-0.3.0.tar.gz",
+    "en_ner_jnlpba_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+    "/releases/v0.3.0/en_ner_jnlpba_md-0.3.0.tar.gz",
+    "en_ner_bc5cdr_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy"
+    "/releases/v0.3.0/en_ner_bc5cdr_md-0.3.0.tar.gz",
+    "en_ner_bionlp13cg_md": "https://s3-us-west-2.amazonaws.com/ai2-s2"
+    "-scispacy/releases/v0.3.0/en_ner_bionlp13cg_md-0"
+    ".3.0.tar.gz",
+}
+
+def load_lang_model(lang_model):
+    # pylint: disable=import-outside-toplevel
+    if lang_model in CUSTOM_SPACYMODEL_URL:
+        # download ScispaCy model using URL
+        import subprocess
+        import sys
+        import os
+        import importlib
+
+        download_url = CUSTOM_SPACYMODEL_URL[lang_model]
+        command = [sys.executable, "-m", "pip", "install"] + [download_url]
+        subprocess.run(
+            command, env=os.environ.copy(), encoding="utf8", check=False
+        )
+        cls = importlib.import_module(lang_model)
+        return cls.load()  # type: ignore
+    else:
+        # Use spaCy download
+        try:
+            nlp = spacy.load(lang_model)  # type: ignore
+        except OSError:
+            download(lang_model)
+            nlp = spacy.load(lang_model)  # type: ignore
+    return nlp
+
 
 class NERLabelProcessor(PackProcessor):
     r"""
@@ -49,8 +92,7 @@ def __init__(self):
 
     def initialize(self, resources: Resources, configs: Config):
         super().initialize(resources, configs)
-        download("en_ner_bc5cdr_md")
-        self.nlp = spacy.load("en_ner_bc5cdr_md")
+        self.nlp = load_lang_model("en_ner_bc5cdr_md")
 
     def _process(self, input_pack: DataPack):
         r"""

From b9d08cf6f455873c1a8f2f9e75e3633607d44138 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 11:52:57 +0800
Subject: [PATCH 28/38] black reformat

---
 fortex/health/processors/ner_label_processor.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 970a0b1b..d9d48694 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -50,6 +50,7 @@
     ".3.0.tar.gz",
 }
 
+
 def load_lang_model(lang_model):
     # pylint: disable=import-outside-toplevel
     if lang_model in CUSTOM_SPACYMODEL_URL:

From deefc849b37dc10f7bb7d3093626987b954f43f1 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 22 Jul 2022 12:00:12 +0800
Subject: [PATCH 29/38] remove unused comment

---
 fortex/health/processors/ner_label_processor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index d9d48694..54ebe68e 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -66,14 +66,14 @@ def load_lang_model(lang_model):
             command, env=os.environ.copy(), encoding="utf8", check=False
         )
         cls = importlib.import_module(lang_model)
-        return cls.load()  # type: ignore
+        return cls.load()
     else:
         # Use spaCy download
         try:
-            nlp = spacy.load(lang_model)  # type: ignore
+            nlp = spacy.load(lang_model)
         except OSError:
             download(lang_model)
-            nlp = spacy.load(lang_model)  # type: ignore
+            nlp = spacy.load(lang_model)
     return nlp
 
 

From 7625dd95f5deb5c38879945de1bafd4a128469ec Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Mon, 25 Jul 2022 17:51:24 +0800
Subject: [PATCH 30/38] add json dumps and remove SQL statements

---
 examples/label_example/sqlite_utils.py | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/examples/label_example/sqlite_utils.py b/examples/label_example/sqlite_utils.py
index da82e0ed..38d61dce 100644
--- a/examples/label_example/sqlite_utils.py
+++ b/examples/label_example/sqlite_utils.py
@@ -75,37 +75,13 @@ def update_stave_db(default_project_json, config):
             ][0]
             return base_project["id"]
 
-        resp1 = session.create_project(default_project_json)
+        resp1 = session.create_project(json.dumps(default_project_json))
         project_id_base = json.loads(resp1.text)["id"]
 
         config = yaml.safe_load(open("stave_config.yml", "r"))
         config = Config(config, default_hparams=None)
         con = sqlite3.connect(config.Stave.stave_db_path)
 
-        cursorObj = con.cursor()
-        cursorObj.execute(
-            f"SELECT ontology, config FROM stave_backend_project WHERE id = {project_id_base}"
-        )
-        results = cursorObj.fetchall()
-        onto = results[0][0]
-        conf = results[0][1]
-
-        onto_new = onto.replace("'", '"')
-        conf_new = (
-            conf.replace("'", '"').replace("True", "true").replace("False", "false")
-        )
-
-        cursorObj.execute(
-            "UPDATE stave_backend_project SET ontology ='"
-            + onto_new
-            + f"' WHERE id = {project_id_base}"
-        )
-        cursorObj.execute(
-            "UPDATE stave_backend_project SET config ='"
-            + conf_new
-            + f"' WHERE id = {project_id_base}"
-        )
-
         con.commit()
 
     return project_id_base

From b9af6dfa9b3d6eca7af55a28ac16cec492a762f4 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:03:46 +0800
Subject: [PATCH 31/38] wrap search engine code in method

---
 examples/label_example/search_engine.py | 159 ++++++++++++------------
 1 file changed, 82 insertions(+), 77 deletions(-)

diff --git a/examples/label_example/search_engine.py b/examples/label_example/search_engine.py
index b1be57c2..4c9bc98f 100644
--- a/examples/label_example/search_engine.py
+++ b/examples/label_example/search_engine.py
@@ -9,80 +9,85 @@
 import templates
 
 
-st.set_page_config(page_title="ForteHealth_Search_Engine", layout="wide")
-
-es = Elasticsearch(hosts=["http://localhost:9200/"])
-INDEX = "elastic_indexer"
-
-config = yaml.safe_load(open("stave_config.yml", "r"))
-config = Config(config, default_hparams=None)
-
-default_project_json = get_json("default_onto_project.json")
-
-base_project_id = update_stave_db(default_project_json, config)
-
-st.title("Search the MIMIC III Data...")
-search = st.text_input("Enter search words:")
-
-if not search:
-    records = {}
-    results = all_search(es, INDEX)
-    hits = results["hits"]["hits"]
-
-    conn = sqlite3.connect(config.Stave.stave_db_path)
-    answers = []
-    for idx, hit in enumerate(hits):
-        source = hit["_source"]
-        # The raw pack string and pack id (not database id)
-        raw_pack_str: str = source["pack_info"]
-        pack_id: str = source["doc_id"]
-
-        # Now you can write the pack into the database and generate url.
-        item = {
-            "name": f"clinical_results_{idx}",
-            "textPack": raw_pack_str,
-            "project_id": base_project_id,
-        }
-
-        db_id = sqlite_insert(conn, "stave_backend_document", item)
-        answers += [db_id]
-        print(pack_id, db_id)
-
-    links: List[str] = create_links(config.Stave.url, answers)
-
-    for link in links:
-        st.write(link, unsafe_allow_html=True)
-
-if search:
-    results = index_search(es, INDEX, search)
-    hits = results["hits"]["hits"]
-
-    conn = sqlite3.connect(config.Stave.stave_db_path)
-    answers = []
-    docs = []
-    for idx, hit in enumerate(hits):
-        source = hit["_source"]
-        # The raw pack string and pack id (not database id)
-        raw_pack_str: str = source["pack_info"]
-        pack_id: str = source["doc_id"]
-        highlight = "...".join(hit["highlight"]["content"])
-        # Now you can write the pack into the database and generate url.
-        item = {
-            "name": f"clinical_results_{idx}",
-            "textPack": raw_pack_str,
-            "project_id": base_project_id,
-        }
-
-        db_id = sqlite_insert(conn, "stave_backend_document", item)
-        answers += [db_id]
-
-        docs.append(highlight)
-
-    links: List[str] = create_links(config.Stave.url, answers)
-
-    for i, _ in enumerate(links):
-        st.write(links[i], unsafe_allow_html=True)
-        st.write(
-            templates.search_result(docs[i].replace("\n", " ")),
-            unsafe_allow_html=True,
-        )
+def main():
+    st.set_page_config(page_title="ForteHealth_Search_Engine", layout="wide")
+
+    es = Elasticsearch(hosts=["http://localhost:9200/"])
+    INDEX = "elastic_indexer"
+
+    config = yaml.safe_load(open("stave_config.yml", "r"))
+    config = Config(config, default_hparams=None)
+
+    default_project_json = get_json("default_onto_project.json")
+
+    base_project_id = update_stave_db(default_project_json, config)
+
+    st.title("Search the MIMIC III Data...")
+    search = st.text_input("Enter search words:")
+
+    if not search:
+        records = {}
+        results = all_search(es, INDEX)
+        hits = results["hits"]["hits"]
+
+        conn = sqlite3.connect(config.Stave.stave_db_path)
+        answers = []
+        for idx, hit in enumerate(hits):
+            source = hit["_source"]
+            # The raw pack string and pack id (not database id)
+            raw_pack_str: str = source["pack_info"]
+            pack_id: str = source["doc_id"]
+
+            # Now you can write the pack into the database and generate url.
+            item = {
+                "name": f"clinical_results_{idx}",
+                "textPack": raw_pack_str,
+                "project_id": base_project_id,
+            }
+
+            db_id = sqlite_insert(conn, "stave_backend_document", item)
+            answers += [db_id]
+            print(pack_id, db_id)
+
+        links: List[str] = create_links(config.Stave.url, answers)
+
+        for link in links:
+            st.write(link, unsafe_allow_html=True)
+
+    if search:
+        results = index_search(es, INDEX, search)
+        hits = results["hits"]["hits"]
+
+        conn = sqlite3.connect(config.Stave.stave_db_path)
+        answers = []
+        docs = []
+        for idx, hit in enumerate(hits):
+            source = hit["_source"]
+            # The raw pack string and pack id (not database id)
+            raw_pack_str: str = source["pack_info"]
+            pack_id: str = source["doc_id"]
+            highlight = "...".join(hit["highlight"]["content"])
+            # Now you can write the pack into the database and generate url.
+            item = {
+                "name": f"clinical_results_{idx}",
+                "textPack": raw_pack_str,
+                "project_id": base_project_id,
+            }
+
+            db_id = sqlite_insert(conn, "stave_backend_document", item)
+            answers += [db_id]
+
+            docs.append(highlight)
+
+        links: List[str] = create_links(config.Stave.url, answers)
+
+        for i, _ in enumerate(links):
+            st.write(links[i], unsafe_allow_html=True)
+            st.write(
+                templates.search_result(docs[i].replace("\n", " ")),
+                unsafe_allow_html=True,
+            )
+
+
+if __name__ == '__main__':
+    main()

From 8ffa0fac22e5434bd5eba2e6db918f48fcb09d51 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:04:49 +0800
Subject: [PATCH 32/38] remove unnecessary comments

---
 examples/label_example/search_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/label_example/search_utils.py b/examples/label_example/search_utils.py
index eb85da10..35f1e6c4 100644
--- a/examples/label_example/search_utils.py
+++ b/examples/label_example/search_utils.py
@@ -47,8 +47,6 @@ def index_search(es, index: str, keywords: str) -> dict:
             "post_tags": ["</font>"],
             "fields": {"content": {}},
         },
-        # "from": from_i,
-        # "size": size,
         "aggs": {"match_count": {"value_count": {"field": "_id"}}},
     }
 

From 9532d136408752cb991c3fbb1c873a606d887ce4 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:20:08 +0800
Subject: [PATCH 33/38] remove extra empty lines

---
 examples/label_example/templates.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/label_example/templates.py b/examples/label_example/templates.py
index 63bf9aa2..5278a33a 100644
--- a/examples/label_example/templates.py
+++ b/examples/label_example/templates.py
@@ -15,7 +15,6 @@ def number_of_results(total_hits: int, duration: float) -> str:
 def search_result(highlights: str) -> str:
     """HTML scripts to display search results."""
     return f"""
-
         <div style="font-size:100%; white-space: pre-line;">
         {highlights}
         </div>

From 24dca3e8ab6b9ce7e57ea9b835dcaf38f082db77 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:40:09 +0800
Subject: [PATCH 34/38] add lang model as config

---
 fortex/health/processors/ner_label_processor.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 54ebe68e..cbb938c0 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -93,7 +93,7 @@ def __init__(self):
 
     def initialize(self, resources: Resources, configs: Config):
         super().initialize(resources, configs)
-        self.nlp = load_lang_model("en_ner_bc5cdr_md")
+        self.nlp = load_lang_model(configs.lang)
 
     def _process(self, input_pack: DataPack):
         r"""
@@ -111,15 +111,13 @@ def _process(self, input_pack: DataPack):
             )
         result = self.nlp(doc)
 
-        if "disease" in labels:
-            for ent in result.ents:
+        for ent in result.ents:
+            if "disease" in labels:
                 if ent.label_ == "DISEASE":
                     Disease(
                         pack=input_pack, begin=ent.start_char, end=ent.end_char
                     )
-
-        if "chemical" in labels:
-            for ent in result.ents:
+            if "chemical" in labels:
                 if ent.label_ == "CHEMICAL":
                     Chemical(
                         pack=input_pack, begin=ent.start_char, end=ent.end_char
@@ -135,7 +133,10 @@ def default_configs(cls):
 
         Returns: A dictionary with the default config for this processor.
         """
-        return {"labels": ["disease", "chemical"]}
+        return {
+            "labels": ["disease", "chemical"],
+            "lang": "en_ner_bc5cdr_md"
+            }
 
     def record(self, record_meta: Dict[str, Set[str]]):
         r"""

From 9d98979e980925867f96ce05db7bc670d8fa92dc Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:41:08 +0800
Subject: [PATCH 35/38] fix dependency

---
 examples/label_example/clinical_pipeline.py                | 5 ++++-
 tests/fortex/health/processors/ner_label_processor_test.py | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/examples/label_example/clinical_pipeline.py b/examples/label_example/clinical_pipeline.py
index 3d4939b3..24df20de 100644
--- a/examples/label_example/clinical_pipeline.py
+++ b/examples/label_example/clinical_pipeline.py
@@ -26,7 +26,10 @@ def main(
     else:
         pl.set_reader(PlainTextReader())
 
-    config_for_ner = {"labels": ["disease", "chemical"]}
+    config_for_ner = {
+        "labels": ["disease", "chemical"],
+        "lang": "en_ner_bc5cdr_md"
+        }
     pl.add(NERLabelProcessor(), config=config_for_ner)
 
     pl.add(
diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index 23a7bbe0..4271f31f 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -31,7 +31,10 @@ class TestNERLabelProcessor(unittest.TestCase):
     def test_ner_label_processor(self, input_data):
         self.nlp = Pipeline[DataPack]()
         self.nlp.set_reader(StringReader())
-        config = {"labels": ["disease", "chemical"]}
+        config = {
+            "labels": ["disease", "chemical"],
+            "lang": "en_ner_bc5cdr_md"
+            }
 
         self.nlp.add(NERLabelProcessor(), config=config)
         self.nlp.initialize()

From 1b2263ed627225b895214c4292065631cdc09356 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:46:43 +0800
Subject: [PATCH 36/38] black reformat

---
 fortex/health/processors/ner_label_processor.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index cbb938c0..30c8e1bd 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -62,9 +62,7 @@ def load_lang_model(lang_model):
 
         download_url = CUSTOM_SPACYMODEL_URL[lang_model]
         command = [sys.executable, "-m", "pip", "install"] + [download_url]
-        subprocess.run(
-            command, env=os.environ.copy(), encoding="utf8", check=False
-        )
+        subprocess.run(command, env=os.environ.copy(), encoding="utf8", check=False)
         cls = importlib.import_module(lang_model)
         return cls.load()
     else:
@@ -114,14 +112,10 @@ def _process(self, input_pack: DataPack):
         for ent in result.ents:
             if "disease" in labels:
                 if ent.label_ == "DISEASE":
-                    Disease(
-                        pack=input_pack, begin=ent.start_char, end=ent.end_char
-                    )
+                    Disease(pack=input_pack, begin=ent.start_char, end=ent.end_char)
             if "chemical" in labels:
                 if ent.label_ == "CHEMICAL":
-                    Chemical(
-                        pack=input_pack, begin=ent.start_char, end=ent.end_char
-                    )
+                    Chemical(pack=input_pack, begin=ent.start_char, end=ent.end_char)
 
     @classmethod
     def default_configs(cls):
@@ -133,10 +127,7 @@ def default_configs(cls):
 
         Returns: A dictionary with the default config for this processor.
         """
-        return {
-            "labels": ["disease", "chemical"],
-            "lang": "en_ner_bc5cdr_md"
-            }
+        return {"labels": ["disease", "chemical"], "lang": "en_ner_bc5cdr_md"}
 
     def record(self, record_meta: Dict[str, Set[str]]):
         r"""

From 2694aec7462d7c0686dff0c602cf69a3b636e55c Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:47:27 +0800
Subject: [PATCH 37/38] black reformat

---
 tests/fortex/health/processors/ner_label_processor_test.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/fortex/health/processors/ner_label_processor_test.py b/tests/fortex/health/processors/ner_label_processor_test.py
index 4271f31f..aba91a09 100644
--- a/tests/fortex/health/processors/ner_label_processor_test.py
+++ b/tests/fortex/health/processors/ner_label_processor_test.py
@@ -31,10 +31,7 @@ class TestNERLabelProcessor(unittest.TestCase):
     def test_ner_label_processor(self, input_data):
         self.nlp = Pipeline[DataPack]()
         self.nlp.set_reader(StringReader())
-        config = {
-            "labels": ["disease", "chemical"],
-            "lang": "en_ner_bc5cdr_md"
-            }
+        config = {"labels": ["disease", "chemical"], "lang": "en_ner_bc5cdr_md"}
 
         self.nlp.add(NERLabelProcessor(), config=config)
         self.nlp.initialize()

From a8494cf826ea4ef72089d94ebbf2db52c5b8a5e3 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 14 Aug 2022 02:52:04 +0800
Subject: [PATCH 38/38] black line len 80 format

---
 fortex/health/processors/ner_label_processor.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fortex/health/processors/ner_label_processor.py b/fortex/health/processors/ner_label_processor.py
index 30c8e1bd..55932867 100644
--- a/fortex/health/processors/ner_label_processor.py
+++ b/fortex/health/processors/ner_label_processor.py
@@ -62,7 +62,9 @@ def load_lang_model(lang_model):
 
         download_url = CUSTOM_SPACYMODEL_URL[lang_model]
         command = [sys.executable, "-m", "pip", "install"] + [download_url]
-        subprocess.run(command, env=os.environ.copy(), encoding="utf8", check=False)
+        subprocess.run(
+            command, env=os.environ.copy(), encoding="utf8", check=False
+        )
         cls = importlib.import_module(lang_model)
         return cls.load()
     else:
@@ -112,10 +114,14 @@ def _process(self, input_pack: DataPack):
         for ent in result.ents:
             if "disease" in labels:
                 if ent.label_ == "DISEASE":
-                    Disease(pack=input_pack, begin=ent.start_char, end=ent.end_char)
+                    Disease(
+                        pack=input_pack, begin=ent.start_char, end=ent.end_char
+                    )
             if "chemical" in labels:
                 if ent.label_ == "CHEMICAL":
-                    Chemical(pack=input_pack, begin=ent.start_char, end=ent.end_char)
+                    Chemical(
+                        pack=input_pack, begin=ent.start_char, end=ent.end_char
+                    )
 
     @classmethod
     def default_configs(cls):