Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 87 additions & 70 deletions autocorpus/bioc_passages.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,87 @@
"""BioC Passage builder script."""


class BioCPassage:
"""BioC Passage builder class."""

@classmethod
def from_title(cls, title, offset):
"""Creates a BioCPassage object from a title.

Args:
title (str): Passage title
offset (int): Passage offset

Returns:
(dict): BioCPassage object
"""
title_passage = {
"section_heading": "",
"subsection_heading": "",
"body": title,
"section_type": [{"iao_name": "document title", "iao_id": "IAO:0000305"}],
}
return cls(title_passage, offset)

def __build_passage(self, passage, offset):
defaultkeys = ["section_heading", "subsection_heading", "body", "section_type"]
passage_dict = {
"offset": offset,
"infons": {},
"text": passage["body"],
"sentences": [],
"annotations": [],
"relations": [],
}
for key in passage.keys():
if key not in defaultkeys:
passage_dict["infons"][key] = passage[key]
# TODO: currently assumes section_heading and subsection_heading will always exist, should ideally check for existence.
# Also doesn't account for subsubsection headings which might exist
if passage["section_heading"] != "":
passage_dict["infons"]["section_title_1"] = passage["section_heading"]
if passage["subsection_heading"] != "":
passage_dict["infons"]["section_title_2"] = passage["subsection_heading"]
counter = 1
for section_type in passage["section_type"]:
passage_dict["infons"][f"iao_name_{counter}"] = section_type["iao_name"]
passage_dict["infons"][f"iao_id_{counter}"] = section_type["iao_id"]
counter += 1

return passage_dict

def __init__(self, passage, offset):
"""Construct a passage object from the provided passage dict and offset.

Args:
passage (dict): Article passage dictionary
offset (int): Passage offset to use
"""
self.offset = 0
self.passage = self.__build_passage(passage, offset)
pass

def as_dict(self):
"""Returns a dictionary representation of the passage.

Returns:
(dict): Dictionary representation of the passage
"""
return self.passage
"""BioC Passage builder script."""


class BioCPassage:
"""BioC Passage builder class."""

@classmethod
def from_title(cls, title, offset):
"""Creates a BioCPassage object from a title.

Args:
title (str): Passage title
offset (int): Passage offset

Returns:
(dict): BioCPassage object
"""
title_passage = {
"section_heading": "",
"subsection_heading": "",
"body": title,
"section_type": [{"iao_name": "document title", "iao_id": "IAO:0000305"}],
}
return cls(title_passage, offset)

def __build_passage(self, passage, offset):
defaultkeys = ["section_heading", "subsection_heading", "body", "section_type"]
passage_dict = {
"offset": offset,
"infons": {},
"text": passage["body"],
"sentences": [],
"annotations": [],
"relations": [],
}
for key in passage.keys():
if key not in defaultkeys:
passage_dict["infons"][key] = passage[key]
# TODO: currently assumes section_heading and subsection_heading will always exist, should ideally check for existence.
# Also doesn't account for subsubsection headings which might exist
if passage["section_heading"] != "":
passage_dict["infons"]["section_title_1"] = passage["section_heading"]
if passage["subsection_heading"] != "":
passage_dict["infons"]["section_title_2"] = passage["subsection_heading"]
counter = 1
for section_type in passage["section_type"]:
passage_dict["infons"][f"iao_name_{counter}"] = section_type["iao_name"]
passage_dict["infons"][f"iao_id_{counter}"] = section_type["iao_id"]
counter += 1
if passage_dict["infons"]["iao_id_1"] == "IAO:0000305":
passage_dict["infons"]["type"] = (
"title_1" # Suggest to make this (title) title_1 instead of front
)
elif passage_dict["infons"]["iao_id_1"] == "IAO:0000320":
passage_dict["infons"]["type"] = (
"ref" # does not look as nice, consider keeping as paragraph
)
else:
passage_dict["infons"]["type"] = "paragraph"
# TODO: make optional input to AutoCORPus to have section (sub)headers a separate passage in the document.
# section headers: # Suggest to make this (section) title_2
# passage_dict["infons"]["type"] = "title_1"
# subsection headers: # Suggest to make this (subsection) title_3
# passage_dict["infons"]["type"] = "title_2"
# subsubsection headers: # suggest not to use this as we only look at 2 levels currently, see above.
# passage_dict["infons"]["type"] = "title_3"

return passage_dict

def __init__(self, passage, offset):
"""Construct a passage object from the provided passage dict and offset.

Args:
passage (dict): Article passage dictionary
offset (int): Passage offset to use
"""
self.offset = 0
self.passage = self.__build_passage(passage, offset)
pass

def as_dict(self):
"""Returns a dictionary representation of the passage.

Returns:
(dict): Dictionary representation of the passage
"""
return self.passage
Loading