|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | + |
| 4 | +""" |
| 5 | +
|
| 6 | +Takes input a relatedness file, a fam file, and a list of individuals and extracts the sub-matrix from the relatedness file |
| 7 | +for the given individuals |
| 8 | +
|
| 9 | +
|
| 10 | +Jean-Tristan Brandenburg |
| 11 | +""" |
| 12 | + |
| 13 | + |
| 14 | +import sys |
| 15 | +import pandas as pd |
| 16 | +import numpy as np |
| 17 | +import argparse |
| 18 | + |
| 19 | +EOL=chr(10) |
| 20 | + |
| 21 | +def errorMessage10(phe): |
| 22 | + print(""" |
| 23 | +
|
| 24 | + A problem has been detected in file <%s> column <%s>. |
| 25 | +
|
| 26 | + There is some invalid data. I regret I can't tell you which row. |
| 27 | +
|
| 28 | +
|
| 29 | + Please check -- the data should be numeric only. |
| 30 | +
|
| 31 | +
|
| 32 | + If there is missing data, please use NA |
| 33 | +
|
| 34 | +
|
| 35 | +
|
| 36 | + """%(sys.argv[1],phe)) |
| 37 | + |
| 38 | +def parseArguments(): |
| 39 | + parser = argparse.ArgumentParser(description='fill in missing bim values') |
| 40 | + parser.add_argument('--rel',type=str,required=True,help="File of relatdness matrix as gemma output") |
| 41 | + parser.add_argument('--phenofile',type=str,required=True,help="fam file use for compute relatdness matrix") |
| 42 | + parser.add_argument('--covfile',type=str,required=True,help="fam file use for compute relatdness matrix") |
| 43 | + parser.add_argument('--pospheno',type=int,required=True,help="fam file use for compute relatdness matrix") |
| 44 | + parser.add_argument('--relout',type=str,required=True,help="File with output pheno") |
| 45 | + parser.add_argument('--phenofileout',type=str,required=True,help="File with output pheno") |
| 46 | + parser.add_argument('--covfileout',type=str,required=True,help="File with output pheno") |
| 47 | + args = parser.parse_args() |
| 48 | + return args |
| 49 | + |
| 50 | +args=parseArguments() |
| 51 | + |
| 52 | +pospheno=args.pospheno |
| 53 | +readpheno=open(args.phenofile) |
| 54 | +NewHeader="FID IID\t"+readpheno.readline().split()[pospheno+1] |
| 55 | +listeFIDKeep=[] |
| 56 | +DicPheno={} |
| 57 | +for Lines in readpheno : |
| 58 | + SplL=Lines.split() |
| 59 | + if SplL[1+pospheno]!='-9' and SplL[1+pospheno].upper()!="NA" : |
| 60 | + listeFIDKeep.append(SplL[0]+" "+SplL[1]) |
| 61 | + DicPheno[SplL[0]+" "+SplL[1]]=SplL[0]+" "+SplL[1]+"\t"+SplL[1+pospheno] |
| 62 | + |
| 63 | + |
| 64 | +readmat=open(args.rel) |
| 65 | +linemat=readmat.readline() |
| 66 | +listeFID=linemat.split('\t') |
| 67 | +print(listeFID[1:5]) |
| 68 | +readmat.close() |
| 69 | + |
| 70 | +ListePosKept=[0] |
| 71 | +CmtFID=0 |
| 72 | +FinalIdList=[] |
| 73 | +for FID in listeFID : |
| 74 | + if FID in listeFIDKeep : |
| 75 | + ListePosKept.append(CmtFID) |
| 76 | + FinalIdList.append(FID) |
| 77 | + CmtFID+=1 |
| 78 | + |
| 79 | +readmat=open(args.rel) |
| 80 | +writemat=open(args.relout, 'w') |
| 81 | +CmtL=0 |
| 82 | +print('begin : open and write maatrix pheno in file '+args.relout) |
| 83 | +for Line in readmat : |
| 84 | + Line=Line.replace('\n','') |
| 85 | + if CmtL in ListePosKept : |
| 86 | + Chaine=[] |
| 87 | + SplLine=Line.split('\t') |
| 88 | + for Pos in ListePosKept : |
| 89 | + Chaine.append(SplLine[Pos]) |
| 90 | + writemat.write("\t".join(Chaine)+"\n") |
| 91 | + CmtL+=1 |
| 92 | +readmat.close() |
| 93 | +writemat.close() |
| 94 | +print('end : open and write maatrix pheno in file '+args.relout) |
| 95 | + |
| 96 | +print('begin : write pheno in file '+args.phenofileout) |
| 97 | +WritePheno=open(args.phenofileout,'w') |
| 98 | +WritePheno.write(NewHeader+'\n') |
| 99 | +for FID in FinalIdList : |
| 100 | + WritePheno.write(DicPheno[FID]+'\n') |
| 101 | +WritePheno.close() |
| 102 | +print('end : write pheno in file '+args.phenofileout) |
| 103 | + |
| 104 | +readcov=open(args.covfile) |
| 105 | +NewHeader=readcov.readline().replace('\n','') |
| 106 | +DicCov={} |
| 107 | +print('begin : red cov from file '+args.covfile) |
| 108 | +for Lines in readcov : |
| 109 | + SplL=Lines.split() |
| 110 | + DicCov[SplL[0]+" "+SplL[1]]=Lines.replace('\n','') |
| 111 | +readcov.close() |
| 112 | +print('emd : red cov from file '+args.covfile) |
| 113 | + |
| 114 | +print('begin : write cov '+args.covfileout) |
| 115 | +writecov=open(args.covfileout, 'w') |
| 116 | +writecov.write(NewHeader+'\n') |
| 117 | +for FID in FinalIdList : |
| 118 | + writecov.write(DicCov[FID]+'\n') |
| 119 | +writecov.close() |
| 120 | +print('end : write cov') |
| 121 | + |
| 122 | + |
0 commit comments