Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
8 changes: 4 additions & 4 deletions VariantFormatter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import pkg_resources
import importlib.metadata
import re
import warnings

# Pull in use_scm_version=True enabled version number
_is_released_version = False
try:
__version__ = pkg_resources.get_distribution("VariantFormatter").version
__version__ = importlib.metadata.version("VariantFormatter")
if re.match(r"^\d+\.\d+\.\d+$", __version__) is not None:
_is_released_version = True
except pkg_resources.DistributionNotFound as e:
except importlib.metadata.PackageNotFoundError:
warnings.warn("can't get __version__ because %s package isn't installed" % __package__, Warning)
__version__ = None


# <LICENSE>
# Copyright (C) 2016-2025 VariantValidator Contributors
# Copyright (C) 2016-2026 VariantValidator Contributors
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
Expand Down
2 changes: 1 addition & 1 deletion VariantFormatter/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def gap_checker(hgvs_transcript, hgvs_genomic, genome_build, vfo, transcript_mod


# <LICENSE>
# Copyright (C) 2016-2025 VariantValidator Contributors
# Copyright (C) 2016-2026 VariantValidator Contributors
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
Expand Down
2 changes: 1 addition & 1 deletion VariantFormatter/gapGenes.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def fully_normalize(hgvs_tx, hgvs_genomic, hn, reverse_normalizer, vm, vfo):


# <LICENSE>
# Copyright (C) 2016-2025 VariantValidator Contributors
# Copyright (C) 2016-2026 VariantValidator Contributors
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
Expand Down
2 changes: 1 addition & 1 deletion VariantFormatter/simpleVariantFormatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def format(batch_input, genome_build, transcript_model=None, specify_transcripts


# <LICENSE>
# Copyright (C) 2016-2025 VariantValidator Contributors
# Copyright (C) 2016-2026 VariantValidator Contributors
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
Expand Down
64 changes: 52 additions & 12 deletions VariantFormatter/variantformatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,23 +388,27 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None
else:
transcript_list = formatter.fetch_aligned_transcripts(g_hgvs, self.transcript_model, self.vfo, genome_build)

# Remove malform IDs
cp_transcript_list = copy.copy(transcript_list)
transcript_list = []
for tx in cp_transcript_list:
# De-dup and remove malformed IDs (EG ENS seqs with different GRCh37 and GRCh38 ver but same id and version no.)
# Also sort by the number of main chr like mappings, this avoids pyliftover where possible, since we re-use the
# first non-failing hit for all liftover. (Where mapping details exist the order is, ref, alt, strand ....)
transcript_dict = {}
for tx in transcript_list:
# Known UTA ID malforms
if re.search('\/', tx[0]):
continue
else:
transcript_list.append(tx)

# don't exclude any tx, even if we have no chr like mapping
if tx[0] not in transcript_dict:
transcript_dict[tx[0]] = +1
# but add to the sort number if we do have a main chr like mapping
if len(tx) > 2 and (tx[1].startswith('NC_00') or tx[1] in ['NC_012920.1', 'NC_001807.4']):
transcript_dict[tx[0]] = transcript_dict[tx[0]] +1

transcript_list = sorted(transcript_dict.keys(),key=lambda k:transcript_dict[k],reverse=True)
# Create a variable to trap direct g_g liftover
g_to_g_lift = {}

# Create transcript level descriptions
for tx_alignment_data in transcript_list:
tx_id = tx_alignment_data[0]

for tx_id in transcript_list:
# Get transcript annotations
try:
annotation = vfo.db.get_transcript_annotation(tx_id)
Expand Down Expand Up @@ -461,7 +465,6 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None
if tx_id not in str(overlapping_tx):
continue


hgvs_transcript_dict = formatter.hgvs_genomic2hgvs_transcript(g_hgvs, tx_id, self.vfo)

# Gap checking
Expand Down Expand Up @@ -496,6 +499,7 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None
hgvs_protein_tlc = formatter.hgvs_transcript2hgvs_protein(am_i_gapped['hgvs_transcript'],
self.genome_build,
self.vfo)
hgvs_protein_tlc = formatter.remove_reference(hgvs_protein_tlc)
# Handle edits that have been stringified
try:
hgvs_protein_tlc.posedit.edit.ref
Expand Down Expand Up @@ -577,6 +581,29 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None
specified_tx_variant=specified_tx_variant
)

# Use PyLiftover if needed to add missing lifts to primary assembly
if current_lift[build_to.lower()] == {}:
direct_lift = lo.liftover(self.genomic_descriptions.g_hgvs,
self.genomic_descriptions.selected_build,
build_to,
vfo.splign_normalizer,
vfo.reverse_splign_normalizer,
None,
vfo,
specify_tx=tx_id,
liftover_level=self.liftover,
gap_map=formatter.gap_checker,
vfo=self.vfo,
specified_tx_variant=specified_tx_variant,
force_pyliftover=True
)

current_lift[build_to.lower()] = direct_lift[build_to.lower()]
if build_to == "GRCh37":
current_lift["hg19"] = direct_lift[build_to.lower()]
if build_to == "GRCh38":
current_lift["hg39"] = direct_lift[build_to.lower()]

if "am_i_gapped" in current_lift.keys():
if order_my_tp['gapped_alignment_warning'] == "":
order_my_tp['gapped_alignment_warning'] = current_lift['am_i_gapped'][
Expand All @@ -591,6 +618,12 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None
elif order_my_tp['transcript_variant_error'] is not None and g_to_g_lift != {}:
current_lift = g_to_g_lift

# first edit liftover to text, as required for output
for key, val in current_lift.items():
for chr_type in val.keys():
current_lift[key][chr_type]['hgvs_genomic_description'] = \
current_lift[key][chr_type]['hgvs_genomic_description'].format(
{'max_ref_length': 0})
# Copy the liftover and split into primary and alt
cp_current_lift = copy.deepcopy(current_lift)
scaff_lift = copy.deepcopy(current_lift)
Expand Down Expand Up @@ -662,6 +695,13 @@ def stucture_data(self):
liftover_level=self.liftover
)

# First edit liftover to text, as required for output
for key, val in current_lift.items():
for chr_type in val.keys():
current_lift[key][chr_type]['hgvs_genomic_description'] = \
current_lift[key][chr_type]['hgvs_genomic_description'].format(
{'max_ref_length': 0})

# Copy the liftover and split into primary and alt
cp_current_lift = copy.deepcopy(current_lift)
scaff_lift = copy.deepcopy(current_lift)
Expand Down Expand Up @@ -701,7 +741,7 @@ def collect_metadata(self):
return meta

# <LICENSE>
# Copyright (C) 2016-2025 VariantValidator Contributors
# Copyright (C) 2016-2026 VariantValidator Contributors
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
Expand Down