Skip to content

Commit

Permalink
reduce memory in Genotypes.read (see #19)
Browse files Browse the repository at this point in the history
  • Loading branch information
aryarm committed Apr 13, 2022
1 parent e290f2e commit 7827e3d
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions haptools/data/genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,29 +92,29 @@ def read(self, region: str = None, samples: list[str] = None):
# load all info into memory
vcf = VCF(str(self.fname), samples=samples)
self.samples = tuple(vcf.samples)
variants = list(vcf(region))
self.variants = []
self.data = []
for variant in vcf(region):
# save meta information about each variant
self.variants.append((variant.ID, variant.CHROM, variant.POS, variant.aaf))
# extract the genotypes to a matrix of size n x p x 3
# the last dimension has three items:
# 1) presence of REF in strand one
# 2) presence of REF in strand two
# 3) whether the genotype is phased
self.data.append(variant.genotypes)
vcf.close()
# save meta information about each variant
# convert to np array for speedy operations later on
self.variants = np.array(
[
(variant.ID, variant.CHROM, variant.POS, variant.aaf)
for variant in variants
],
self.variants,
dtype=[
("id", "U50"),
("chrom", "U10"),
("pos", np.uint),
("aaf", np.float64),
],
)
# extract the genotypes to a np matrix of size n x p x 3
# the last dimension has three items:
# 1) presence of REF in strand one
# 2) presence of REF in strand two
# 3) whether the genotype is phased
self.data = np.array(
[variant.genotypes for variant in variants], dtype=np.uint8
)
self.data = np.array(self.data, dtype=np.uint8)
if self.data.shape == (0, 0, 0):
raise ValueError(
"Failed to load genotypes. If you specified a region, check that the"
Expand Down

0 comments on commit 7827e3d

Please sign in to comment.