Skip to content

Commit

Permalink
add option to specify chromosomes to run imputation for
Browse files Browse the repository at this point in the history
  • Loading branch information
LindoNkambule committed Feb 5, 2025
1 parent 0ff3e08 commit a2e3364
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 7 deletions.
6 changes: 5 additions & 1 deletion gwaspy/imputation/glimpse2_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def glimpse_phase_impute(
batch: hb.Batch = None,
bam_files: str = None,
reference_path: str = None,
chromosomes: str = "all",
output_filename: str = None,
output_path: str = None):

Expand Down Expand Up @@ -286,7 +287,10 @@ def ligate_chunks(
bam_list = [(sample_id, bam_path) for sample_id, bam_path in zip(bams['sample'], bams['path'])]
bam_sizes = [size(bam_list[s][1]) for s in range(len(bam_list))]

for i in range(1, 23):
chroms = chromosomes.replace(" ", "") # remove spaces if there are any
chroms = [i for i in range(1, 23)] if chroms == "all" else chroms.split(",")

for i in chroms:
ref_chrom_path = reference_path.replace('CNUMBER', str(i))
ref_idx = f'{ref_chrom_path}.tbi' if hfs.exists(f'{ref_chrom_path}.tbi') else f'{ref_chrom_path}.csi'
ref_vcf = batch.read_input_group(**{'vcf': ref_chrom_path,
Expand Down
15 changes: 10 additions & 5 deletions gwaspy/imputation/impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@


def run_impute(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
input_vcf: str = None,
input_file: str = None,
vcf_ref: str = None,
chromosomes: str = "all",
software: str = 'impute5',
output_filename: str = None,
n_samples: int = None,
Expand All @@ -34,8 +35,9 @@ def run_impute(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
print(f'\nIMPUTING GENOTYPES USING IMPUTE5\n')
impute5_imputation(
batch=b,
input_path=input_vcf,
input_path=input_file,
reference_path=ref_path,
chromosomes=chromosomes,
output_filename=output_filename,
n_samples=n_samples,
n_panel_samples=n_panel_samples,
Expand All @@ -44,8 +46,9 @@ def run_impute(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
elif software == 'glimpse2':
glimpse_phase_impute(
batch=b,
bam_files=input_vcf,
bam_files=input_file,
reference_path=ref_path,
chromosomes = chromosomes,
output_filename=output_filename,
output_path=out_dir
)
Expand All @@ -54,8 +57,9 @@ def run_impute(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,

def main():
parser = argparse.ArgumentParser()
parser.add_argument('--input-vcf', type=str, required=True)
parser.add_argument('--input-file', type=str, required=True)
parser.add_argument('--vcf-ref', type=str, default='hgdp1kgp')
parser.add_argument('--chromosomes', type=str, default='all')
parser.add_argument('--local', action='store_true')
parser.add_argument('--billing-project', required=True)
parser.add_argument('--n-samples', type=int, required=True)
Expand All @@ -73,8 +77,9 @@ def main():
remote_tmpdir=f'{args.out_dir}/tmp/')

run_impute(backend=backend,
input_vcf=args.input_vcf,
input_file=args.input_file,
vcf_ref=args.vcf_ref,
chromosomes=args.chromosomes,
software=args.software,
output_filename=args.output_filename,
n_samples=args.n_samples,
Expand Down
6 changes: 5 additions & 1 deletion gwaspy/imputation/impute5_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def impute5_imputation(
batch: hb.Batch = None,
input_path: str = None,
reference_path: str = None,
chromosomes: str = "all",
output_filename: str = None,
n_samples: int = None,
n_panel_samples: int = 4091,
Expand Down Expand Up @@ -117,7 +118,10 @@ def concatenate_imputed_chunks(

return j

for i in range(1, 23):
chroms = chromosomes.replace(" ", "") # remove spaces if there are any
chroms = [i for i in range(1, 23)] if chroms == "all" else chroms.split(",")

for i in chroms:
# read chrom input files
if "CNUMBER" in input_path: # input VCF is already split by chromosome
vcf_path = input_path.replace('CNUMBER', str(i))
Expand Down

0 comments on commit a2e3364

Please sign in to comment.