Skip to content

Commit

Permalink
feat: Added ability to set vcftype for reading str files (#214)
Browse files Browse the repository at this point in the history
  • Loading branch information
mlamkin7 authored May 19, 2023
1 parent afeab85 commit 0d734cd
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
1 change: 0 additions & 1 deletion docs/api/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ All of the other methods in the :class:`Genotypes` class are inherited, but the
genotypes = data.GenotypesTR.load('tests/data/simple_tr.vcf')
# make the first sample have 4 and 7 repeats for the alleles of the fourth variant
genotypes.data[0, 3] = (4, 7)
genotypes.write()
.. _api-data-genotypestr:

Expand Down
4 changes: 3 additions & 1 deletion haptools/clump.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import math
import sys

from haptools.data.genotypes import Genotypes, GenotypesVCF, GenotypesTR
from haptools.data.genotypes import Genotypes, GenotypesPLINK, GenotypesVCF, GenotypesTR


class Variant:
Expand Down Expand Up @@ -587,6 +587,8 @@ def clumpstr(
strgts.samples = tuple(np.array(strgts.samples)[str_samples])

# Merge STR and SNP GTs
# NOTE if Genotypes is not used and GenotypesVCF is instead it will error because
# GenotypesVCF requires alleles to be present and Genotypes does not
gts = Genotypes.merge_variants((snpgts, strgts), fname=None)
elif gts_snps:
gts = snpgts
Expand Down
14 changes: 12 additions & 2 deletions haptools/data/genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,8 +829,15 @@ class GenotypesTR(Genotypes):
3. POS
log: Logger
See documentation for :py:attr:`~.Genotypes.log`
vcftype: str
TR vcf type currently being read.
{'auto', 'gangstr', 'advntr', 'hipstr', 'eh', 'popstr'}
"""

def __init__(self, fname: Path | str, log: Logger = None, vcftype: str = "auto"):
super().__init__(fname, log)
self.vcftype = vcftype

def _variant_arr(self, record: Variant):
"""
See documentation for :py:meth:`~.Genotypes._variant_arr`
Expand All @@ -851,6 +858,7 @@ def load(
region: str = None,
samples: list[str] = None,
variants: set[str] = None,
vcftype: str = "auto",
) -> Genotypes:
"""
Load STR genotypes from a VCF file
Expand All @@ -873,7 +881,7 @@ def load(
Genotypes
A Genotypes object with the data loaded into its properties
"""
genotypes = cls(fname)
genotypes = cls(fname, vcftype=vcftype)
genotypes.read(region, samples, variants)
genotypes.check_phase()
return genotypes
Expand All @@ -895,7 +903,9 @@ def _return_vcf_iter(self, vcf: cyvcf2.VCF, region: str):
TRRecord objects yielded from TRRecordHarmonizer.
"""
vcfiter = vcf(region)
tr_records = trh.TRRecordHarmonizer(vcffile=vcf, vcfiter=vcfiter, region=region)
tr_records = trh.TRRecordHarmonizer(
vcffile=vcf, vcfiter=vcfiter, region=region, vcftype=self.vcftype
)
return tr_records

def _return_data(self, variant: trh.TRRecord):
Expand Down

0 comments on commit 0d734cd

Please sign in to comment.