From 762d30c737c4793678d7f3470f6e1c1c0667d884 Mon Sep 17 00:00:00 2001 From: Matthew Mah Date: Fri, 10 Jan 2020 13:06:38 -0500 Subject: [PATCH] Nick's updates to v6.0 --- README | 17 +- README.Dstatistics | 3 +- README.INSTALL | 2 +- README.QPGRAPH | 8 +- README.QpWave | 9 +- README.ROLLOFF | 6 + README.qp4diff | 56 + examples/README | 2 + src/Makefile | 6 +- src/admutils.c | 1970 +++++++++++++++-------------- src/admutils.h | 25 +- src/convertf.c | 151 ++- src/dowtjack.c | 152 ++- src/f4rank.c | 51 +- src/kimf.c | 2692 ---------------------------------------- src/mcio.c | 396 +++++- src/mcio.h | 11 +- src/mergeit.c | 105 +- src/nicksrc/gds.c | 45 +- src/nicksrc/getpars.c | 10 +- src/nicksrc/linsubs.c | 29 + src/nicksrc/linsubs.h | 1 + src/nicksrc/ranmath.h | 1 + src/nicksrc/sortit.c | 2 +- src/nicksrc/sortit.h | 1 + src/nicksrc/statsubs.c | 218 +++- src/nicksrc/statsubs.h | 12 +- src/nicksrc/strsubs.c | 108 +- src/nicksrc/strsubs.h | 4 + src/nicksrc/vsubs.c | 133 ++ src/nicksrc/vsubs.h | 10 + src/qp3Pop.c | 40 +- src/qp4diff.c | 98 +- src/qpAdm.c | 277 +++-- src/qpBound.c | 28 +- src/qpDpart.c | 45 +- src/qpDstat.c | 52 +- src/qpF4ratio.c | 87 +- src/qpGraph.c | 107 +- src/qpWave.c | 134 +- src/qpff3base.c | 40 +- src/qpgbug.c | 2335 ---------------------------------- src/qpgsubs.c | 165 ++- src/qpreroot.c | 50 +- src/qpsubs.c | 198 ++- src/qpsubs.h | 8 + 46 files changed, 3391 insertions(+), 6509 deletions(-) create mode 100644 README.qp4diff create mode 100644 examples/README delete mode 100644 src/kimf.c delete mode 100644 src/qpgbug.c diff --git a/README b/README index 88e0371..4d577e4 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -ADMIXTOOLS version 5.1 9/18/18 (for Linux and Mac) +ADMIXTOOLS version 6.0 1/10/20 (for Linux and Mac) See README.INSTALL for installation info. @@ -44,6 +44,21 @@ beyond our scope. Nick Patterson +*** NEW *** +Lightweight onlline documentation added, thanks to Eric Deveaud + or -h to get brief documentation or + +*** NEW *** +Alter number of chromosomes: +numchrom: +zzz should be the autosome number (default 22 -- the number for humans) + +Custom block numbers (for jackknife) +blockname: +bbb should contain a list of snps 1 / line followed by a block number (integer) +block number should be at least 1. -1 -> snp ignored. If this option is used +snps not in list will be ignored. + ---------------------------- SOFTWARE COPYRIGHT NOTICE AGREEMENT This software and its documentation are copyright (2010) by Harvard University diff --git a/README.Dstatistics b/README.Dstatistics index 17325b3..2d77724 100644 --- a/README.Dstatistics +++ b/README.Dstatistics @@ -31,9 +31,10 @@ parfile: Name of parameter file NEW FEATURE In some cases using popfilename (see below) the program uses an excessive amount of memory. -A solution, designed to be convenient is to make multiple runs with -l lo -h hi set when only lines +A solution, designed to be convenient, is to make multiple runs with -L lo -H hi set when only lines lo through hi (starting at 1) of popfilename will be used. This makes it easy to do multiple runs and then extract all results by grep result: ... +*** note new parameter flags. Used to be -l, -h DESCRIPTION OF EACH PARAMETER in parfile: diff --git a/README.INSTALL b/README.INSTALL index 2e1ebd4..5001544 100644 --- a/README.INSTALL +++ b/README.INSTALL @@ -1,4 +1,4 @@ -ADMIXTOOLS version 5.1 9/18/18 (for Linux and Mac) +ADMIXTOOLS version 5.0 6/28/18 (for Linux and Mac) Best is to recompile on your system. diff --git a/README.QPGRAPH b/README.QPGRAPH index f2bf677..25d6832 100644 --- a/README.QPGRAPH +++ b/README.QPGRAPH @@ -55,12 +55,12 @@ admix T S1 S2 w1 w2 These lines are in the same format as the output graph [-o option below] and such a graph can be used (only admix lines are recognized) admixout: -This file is of course easy to habnd edit. +This file is of course easy to hand edit. If admixin is present then initial try estimation is omitted which will make qpGraph run very much faster. - - - +inbreed: YES +Genotypes are expected to be pseudo-haploid -- 2 samples at least per population or drift lengths on +leaves are not meaningful. -g graph diff --git a/README.QpWave b/README.QpWave index 6267d13..5d78239 100644 --- a/README.QpWave +++ b/README.QpWave @@ -22,7 +22,7 @@ logfile: Name of the logfile. The logfile contains the output of the run. parfile: Name of parameter file qpWave gives evidence of the number of admixture flows between the left and right populations, - and should be run as a precursor to qpAdm (see below). + and should usually be run as a precursor to qpAdm (see below). DESCRIPTION OF EACH PARAMETER in parfile: @@ -33,6 +33,13 @@ popleft: left population list (1 per line) popright: right population list (1 per line) details: YES +allsnps: YES +## default NO +When set to YES, the maximum number of snps available for each f4 statistic are used. By default, +only the intersect of snps across all Left and Right populations are used. + +chrom: Only use snps in the specified chromosome. + DESCRIPTION OF OUTPUT FILE: The program will write all the output to stdout. The output file prints the parfile entered by the user, number of snps and individuals, jackknife block size, number of blocks for jackknife and the results. diff --git a/README.ROLLOFF b/README.ROLLOFF index c0583d9..59d7db5 100644 --- a/README.ROLLOFF +++ b/README.ROLLOFF @@ -66,4 +66,10 @@ Nick Patterson ------------------------------------------------------------------------------ Last revision: 2016-01-11 +------------------------------------------------------------------------------------- +*** A new program DATES developed by the Moorjani lab seems superior to rolloff. See +https://github.com/priyamoorjani/DATES + +12/15/2019 + diff --git a/README.qp4diff b/README.qp4diff new file mode 100644 index 0000000..69e58e1 --- /dev/null +++ b/README.qp4diff @@ -0,0 +1,56 @@ +DOCUMENTATION OF qp4diff: + +It is often useful to directly test the difference of 2 f4 statistics. +If we merely run qpDstat and difference 2 results, we cannot correctly compute standard errors. + +qp4diff requires that the input data is available in one of the 5 supported formats. + + +Executable and source code: +------------------------------------------------------------------------------ + +For information about installing the program, see README.ADMIXTOOLS. After installing the programs, the executable qp4diff) should be located in the bin directory. + +To run qp4diff, type the following on a linux machine. +$DIR/bin/qp4diff -p parfile >logfile + +$DIR: Path to the bin directory. +logfile: Name of the logfile. The logfile contains the output of the run. +parfile: Name of parameter file + +DESCRIPTION OF EACH PARAMETER in parfile: + +genotypename: input genotype file (in eigenstrat format) +snpname: input snp file (in eigenstrat format) +indivname: input indiv file (in eigenstrat format) +popfilename: list (contains list of 8 populations- A B : C D :: E F : G H +Colon separators are not required and are ignored by the program. They have been included to make the input/output more readable). +Populations can repeat beteeen the left and right quadruplets +blgsize: jackknife block size (in centimorgan) + +*** 2 optional parameters *** +allsnps: YES +## default. By default only SNPs with valid f4 for both quads are used. +firstf4mult: val +The first f4 is multiplied by val. Can be used to test if alpha a_1 = a_2 for a fixed alpha. + +Note: +If you would like to use SNP number to define block size, you can: +make a fake .snp file with (false) physical positions, and 0 genetic distances. +The software defaults to 1M bases = 1cm, so if (for example) you set the +distance between each snp as 10000 bases and blgsize: .01 then each block +will be 100 snps. + +DESCRIPTION OF OUTPUT FILE: +The program will write all the output to stdout. The output file prints the parfile entered by the user, number of snps and individuals, jackknife block size, number of blocks for jackknif +e and the results. + +The results have the following format - +result: A B C D : E F G H f4diff std. error Z + +Nick Patterson + +------------------------------------------------------------------------------ + + + diff --git a/examples/README b/examples/README new file mode 100644 index 0000000..d078d2f --- /dev/null +++ b/examples/README @@ -0,0 +1,2 @@ +Download data directory from + Reich Lab web page. datasets :: Affymmetrix Human Origins (2012) diff --git a/src/Makefile b/src/Makefile index 96b0f39..fd792ac 100644 --- a/src/Makefile +++ b/src/Makefile @@ -21,16 +21,19 @@ endif ND = nicksrc NLIB = $(ND)/libnick.a -PROGS= qp3Pop qpDstat qpF4ratio qpAdm qpWave qp4diff dowtjack expfit.sh qpBound qpGraph qpreroot qpff3base qpDpart convertf mergeit ## gcount kimf +PROGS= qp3Pop qpDstat qpF4ratio qpAdm qpWave qp4diff dowtjack expfit.sh qpBound qpGraph qpreroot qpff3base qpDpart ## gcount kimf ## rolloff* convertf mergeit from .../o2src PROGS2 = rexpfit.r wtjack.pl all: $(NLIB) $(PROGS) + $(NLIB): $(MAKE) -C $(ND) +## an ubuntu user found he needed: make all LDLIBS="-llapack" + statsubs.o: tables nicksrc/statsubs.c $(CC) $(CFLAGS) -o statsubs.o nicksrc/statsubs.c @@ -106,7 +109,6 @@ rmdirs: qpGraph: $(NLIB) qpGraph.o gslqp.o qpgsubs.o qpsubs.o mcio.o ldsubs.o admutils.o egsubs.o regsubs.o -qpgbug: $(NLIB) qpgbug.o gslqp.o qpgsubs.o qpsubs.o mcio.o ldsubs.o admutils.o egsubs.o regsubs.o qpreroot: qpreroot.o qpgsubs.o qpsubs.o mcio.o ldsubs.o admutils.o egsubs.o regsubs.o diff --git a/src/admutils.c b/src/admutils.c index aab5b71..188c099 100644 --- a/src/admutils.c +++ b/src/admutils.c @@ -2,262 +2,232 @@ #include #include // #include -#include "admutils.h" +#include "admutils.h" -#include "packit.h" -static int fastdupnum = 10; -static double fastdupthresh = 0.75; -static double fastdupkill = 0.75; -static int snptab = NO; +#include "packit.h" +static int fastdupnum = 10 ; +static double fastdupthresh = 0.75 ; +static double fastdupkill = 0.75 ; +static int snptab = NO ; -#define MAXSTR 256 +#define MAXSTR 1024 -int hashit (char *str); +int hashit (char *str) ; -int -countcol (char *fname) -{ - FILE *fp; - int t; +int countcol(char *fname) { + FILE *fp ; + int t ; - openit (fname, &fp, "r"); - t = countcolumns (fp); - fclose (fp); - return t; + openit(fname, &fp, "r") ; + t = countcolumns(fp) ; + fclose(fp) ; + return t ; } -int -countcolumns (FILE * fp) -{ /* count number of text columns separated by whitespace */ - int i = 0, c; +int countcolumns(FILE *fp) +{ /* count number of text columns separated by whitespace */ + int i=0,c; fpos_t ptr; - if (fgetpos (fp, &ptr)) { - printf ("error counting columns\n"); + if (fgetpos(fp,&ptr)) { + printf("error counting columns\n"); return 0; } - while ((c = getc (fp)) != '\n') { - if (isgraph (c)) { + while ( (c = getc(fp)) != '\n' ) { + if (isgraph(c)) { i++; - while (isgraph (c = getc (fp))) { - } - ungetc (c, fp); + while (isgraph(c = getc(fp))) {} + ungetc(c,fp); } } - fsetpos (fp, &ptr); + fsetpos(fp,&ptr); return i; } -void -sett1 (double *tt, double theta, int numstates) +void sett1(double *tt, double theta, int numstates) { - if (numstates == 2) { - tt[0] = 1.0 - theta; - tt[1] = theta; - tt[2] = 0.0; - } - if (numstates == 3) { - tt[0] = (1.0 - theta) * (1.0 - theta); - tt[1] = 2.0 * theta * (1.0 - theta); - tt[2] = theta * theta; - } + if (numstates==2) { + tt[0] = 1.0-theta ; + tt[1] = theta ; + tt[2] = 0.0 ; + } + if (numstates==3) { + tt[0] = (1.0-theta)*(1.0-theta) ; + tt[1] = 2.0*theta*(1.0-theta) ; + tt[2] = theta*theta ; + } } -void -sett1r (double *tt, double theta, int numstates, double risk) +void sett1r(double *tt, double theta, int numstates, double risk) { - double y; - sett1 (tt, theta, numstates); - tt[1] *= risk; - tt[2] *= risk * risk; - y = asum (tt, numstates); - vst (tt, tt, 1.0 / y, numstates); + double y ; + sett1(tt, theta, numstates) ; + tt[1] *= risk ; + tt[2] *= risk*risk ; + y = asum(tt, numstates) ; + vst(tt, tt, 1.0/y, numstates) ; } -void -gettln (SNP * cupt, Indiv * indx, - double *ptheta, double *plambda, int *pnumstates, int *pignore) +void gettln(SNP *cupt, Indiv *indx, + double *ptheta, double *plambda, int *pnumstates, int *pignore) /* set theta, lambda numstates */ { - double theta, lambda; - int numstates, chrom, ignore; - - theta = indx->theta_mode; - lambda = indx->lambda_mode; - ignore = indx->ignore; - numstates = 3; - - chrom = cupt->chrom; - - if (chrom == 23) { - theta = indx->Xtheta_mode; - lambda = indx->Xlambda_mode; - if (indx->gender == 'M') - numstates = 2; - if (indx->gender == 'U') - ignore = YES; - } - if (ptheta != NULL) - *ptheta = theta; - if (plambda != NULL) - *plambda = lambda; - if (pnumstates != NULL) - *pnumstates = numstates; - if (pignore != NULL) - *pignore = ignore; + double theta, lambda ; + int numstates, chrom, ignore ; + + theta = indx->theta_mode ; + lambda = indx->lambda_mode ; + ignore = indx->ignore ; + numstates = 3 ; + + chrom = cupt -> chrom ; + + if (chrom == 23) { + theta = indx->Xtheta_mode ; + lambda = indx->Xlambda_mode ; + if (indx -> gender == 'M') numstates = 2; + if (indx -> gender == 'U') ignore = YES ; + } + if (ptheta !=NULL) *ptheta = theta ; + if (plambda != NULL) *plambda = lambda ; + if (pnumstates != NULL) *pnumstates = numstates ; + if (pignore != NULL) *pignore = ignore ; } -void -puttln (SNP * cupt, Indiv * indx, double ptheta, double plambda) +void puttln(SNP *cupt, Indiv *indx, + double ptheta, double plambda) /* put theta, lambda */ { - int chrom; + int chrom ; - chrom = cupt->chrom; + chrom = cupt -> chrom ; - if (chrom == 23) { + if (chrom == 23) { indx->Xtheta_mode = ptheta; - indx->Xlambda_mode = plambda; - return; - } - indx->theta_mode = ptheta; - indx->lambda_mode = plambda; - return; + indx->Xlambda_mode = plambda ; + return ; + } + indx->theta_mode = ptheta; + indx->lambda_mode = plambda ; + return ; } -double -hwcheck (SNP * cupt, double *cc) +double hwcheck (SNP *cupt, double *cc) { - int i, n, g; + int i, n, g ; - vzero (cc, 3); + vzero(cc, 3) ; - n = cupt->ngtypes; - for (i = 0; i < n; i++) { - g = getgtypes (cupt, i); - if (g < 0) - continue; - ++cc[g]; - } - return (hwstat (cc)); + n = cupt -> ngtypes ; + for (i=0; ingtypes; - for (i = 0; i < n; i++) { - indx = indm[i]; - gettln (cupt, indx, &t, &l, &numstates, &ignore); - if (ignore) - continue; - if (numstates != 3) - continue; - g = getgtypes (cupt, i); - if (g < 0) - continue; - ++cc[g]; - } - return (hwstat (cc)); + int i, n, g, ignore, numstates ; + Indiv *indx ; + double t, l ; -} -void -hap2dip (SNP * cupt) + vzero(cc, 3) ; + + n = cupt -> ngtypes ; + for (i=0; ingtypes; - for (i = 0; i < n; i++) { - g = getgtypes (cupt, i); - if (g < 0) - continue; - if (g == 2) - g = -1; - if (g == 1) - g = 2; - putgtypes (cupt, i, g); - } + int i, n, g ; + + n = cupt -> ngtypes ; + for (i=0; ingtypes; - for (i = 0; i < n; i++) { - g = getgtypes (cupt, i); - if (g < 0) - continue; - putgtypes (cupt, i, 2 - g); - } + int i, n, g ; + n = cupt -> ngtypes ; + for (i=0; i diplike == NULL) return ; + for (i=0; i diplike[i], cupt -> diplike[i], 3) ; + } } -void -flipalleles_phased (SNP * cupt) +void flipalleles_phased(SNP *cupt) // flip reference, variant counts { - int i, n, g; - n = cupt->ngtypes; - for (i = 0; i < n; i++) { - g = getgtypes (cupt, i); - if (g < 0) - continue; - if (g > 1) - fatalx ("genotype > 1 in phased file\n"); - putgtypes (cupt, i, 1 - g); - } + int i, n, g ; + n = cupt -> ngtypes ; + for (i=0; i1) fatalx("genotype > 1 in phased file\n") ; + putgtypes(cupt, i, 1-g) ; + } } -void -cntit (double *xc, SNP * c1, SNP * c2) +void cntit(double *xc, SNP *c1, SNP *c2) { - int n, i, e, f; - - n = MIN (c1->ngtypes, c2->ngtypes); - vzero (xc, 9); - for (i = 0; i < n; i++) { - e = getgtypes (c1, i); - f = getgtypes (c2, i); - if (e < 0) - continue; - if (f < 0) - continue; - ++xc[3 * e + f]; - } + int n, i, e, f ; + + n = MIN(c1->ngtypes, c2->ngtypes) ; + vzero(xc, 9) ; + for (i=0; i *db) - (*da < *db); } -void -pcheck (char *name, char x) +void pcheck (char *name, char x) { - - if (name != NULL) - return; - if (x != CNULL) - fatalx ("parameter %c compulsory\n", x); - else - fatalx ("missing argument\n"); + + if (name != NULL) return ; + if (x != CNULL) + fatalx ("parameter %c compulsory\n",x) ; + else fatalx("missing argument\n") ; } -void -printm (double **M, int numstates) +void printm(double **M, int numstates) { - int i, j; - printf ("M:\n"); - for (i = 0; i < numstates; i++) { - for (j = 0; j < numstates; j++) { - printf ("%9.3f ", M[j][i]); - } - printf ("\n"); - } + int i,j ; + printf("M:\n") ; + for (i=0; iisfake) - return 0; - n = cupt->ngtypes; - nvalid = 0; - for (i = 0; i < n; i++) { - k = getgtypes (cupt, i); - if (k >= 0) - ++nvalid; + int nvalid, n, i, k ; + if (cupt -> isfake) return 0 ; + n = cupt -> ngtypes ; + nvalid = 0 ; + for (i=0; i= 0) ++nvalid ; } - return nvalid; -} - -int -numvalids (Indiv * indx, SNP ** snpmarkers, int fc, int lc) -{ - SNP *cupt; - int idnum, numstates, ignore; - int k, nval = 0; - - if (fc > lc) - return 0; - if (lc < 0) - return 0; - idnum = indx->idnum; - for (k = fc; k <= lc; ++k) { - cupt = snpmarkers[k]; - if (cupt->isfake) - continue; - if (cupt->ignore) - continue; -/** + return nvalid ; +} + +int numvalids(Indiv *indx, SNP **snpmarkers, int fc, int lc) +{ + SNP *cupt ; + int idnum, numstates, ignore ; + int k, nval= 0 ; + + if (fc>lc) return 0 ; + if (lc<0) return 0 ; + idnum = indx -> idnum ; + for (k=fc; k<=lc; ++k) { + cupt = snpmarkers[k] ; + if (cupt -> isfake) continue ; gettln(cupt, indx, NULL, NULL, &numstates, &ignore) ; if (ignore) continue ; -*/ - if (cupt->ngtypes == 0) - continue; - if (getgtypes (cupt, idnum) >= 0) - ++nval; - } - return nval; -} + if (cupt -> ngtypes == 0) continue ; + if (getgtypes(cupt, idnum) >= 0) ++nval ; + } + return nval ; +} -double -malefreq (Indiv ** indmarkers, int numindivs) +double malefreq(Indiv **indmarkers, int numindivs) /* pop freq of males in sample */ { - int i; - Indiv *indx; - double cmale, cfemale; - - cmale = 0; - for (i = 0; i < numindivs; ++i) { - indx = indmarkers[i]; - if (indx->gender == 'M') - ++cmale; - } - - cmale /= (double) numindivs; - - return cmale; -} - -int -isimatch (int a, int b) -{ - if (a < 0) - return YES; - if (b < 0) - return YES; - if (a == b) - return YES; - return NO; -} + int i ; + Indiv *indx ; + double cmale, cfemale ; -void -gethpos (int *fc, int *lc, SNP ** snpm, int numsnps, - int xchrom, int lo, int hi) -{ - int k, xfc, xlc, pos; - SNP *cupt; - - xfc = 9999999; - xlc = -9999999; - for (k = 0; k < numsnps; k++) { - cupt = snpm[k]; - if (cupt->chrom != xchrom) - continue; - pos = cupt->physpos; - if (pos < lo) - continue; - if (pos > hi) - continue; - xfc = MIN (xfc, k); - xlc = MAX (xlc, k); - } - *fc = xfc; - *lc = xlc; -} + cmale = 0 ; + for (i=0; i gender == 'M') ++cmale ; + } -void -makedir (char *dirname) + cmale /= (double) numindivs ; + + return cmale ; +} + +int isimatch(int a, int b) +{ + if (a < 0) return YES ; + if (b < 0) return YES ; + if (a==b) return YES ; + return NO; +} + +void gethpos(int *fc, int *lc, SNP **snpm, int numsnps, + int xchrom, int lo, int hi) +{ + int k, xfc, xlc, pos ; + SNP *cupt ; + + xfc = 9999999 ; + xlc = -9999999 ; + for (k=0; k chrom != xchrom) continue ; + pos = cupt -> physpos ; + if (pos < lo) continue ; + if (pos > hi) continue ; + xfc = MIN(xfc, k) ; + xlc = MAX(xlc, k) ; + } + *fc = xfc ; + *lc = xlc ; +} + +void makedir (char * dirname) // AT wrote original // sets up directory. Will fail hard if directory does not // exist and can't be made { - int fdir; - fdir = open (dirname, O_RDONLY, 0); - if (fdir >= 0) { - close (fdir); - return; - } - fdir = mkdir (dirname, 0775); - if (fdir < 0) { - perror ("makedir"); - fatalx ("(makedir) directory %s not created\n"); - } - printf ("Created a new directory %s\n", dirname); + int fdir ; + fdir = open(dirname,O_RDONLY,0); + if (fdir >= 0) { + close (fdir) ; + return ; + } + fdir = mkdir(dirname,0775); + if (fdir < 0) { + perror("makedir") ; + fatalx("(makedir) directory %s not created\n") ; + } + printf("Created a new directory %s\n",dirname); } int -indxindex (char **namelist, int len, char *strid) +indxindex(char **namelist, int len, char *strid) // look for string in list { - int k; - for (k = 0; k < len; k++) { - if (strcmp (namelist[k], strid) == 0) - return k; - } - return -1; + int k ; + for (k=0; k< len; k++) { + if (strcmp(namelist[k], strid) == 0) return k ; + } + return -1 ; } -int -indindex (Indiv ** indivmarkers, int numindivs, char *indid) +int indindex(Indiv **indivmarkers, int numindivs, char *indid) /* hash table would be good here */ { - int k; - for (k = 0; k < numindivs; k++) { - if (strcmp (indivmarkers[k]->ID, indid) == 0) - return k; - } - return -1; + int k ; + for (k=0; k< numindivs; k++) { + if (strcmp(indivmarkers[k] -> ID, indid) == 0) return k ; + } + return -1 ; } -void -inddupcheck (Indiv ** indivmarkers, int numindivs) +void inddupcheck (Indiv ** indivmarkers, int numindivs) { // fail hard if duplicate int t, k, n; @@ -460,822 +398,932 @@ inddupcheck (Indiv ** indivmarkers, int numindivs) free (ss); } -int -snpindex (SNP ** snpmarkers, int numsnps, char *snpid) + +int snpindex(SNP **snpmarkers, int numsnps, char *snpid) { - int k; - char **ss; + int k ; + char **ss ; - if (snptab == NO) { + if (snptab==NO) { // set up hash table - snptab = YES; - ZALLOC (ss, numsnps, char *); - for (k = 0; k < numsnps; k++) { - ss[k] = strdup (snpmarkers[k]->ID); - } - xloadsearch (ss, numsnps); - freeup (ss, numsnps); - free (ss); - } - - k = xfindit (snpid); - return k; + snptab = YES ; + ZALLOC(ss, numsnps, char *) ; + for (k=0; k< numsnps; k++) { + ss[k] = strdup(snpmarkers[k] -> ID) ; + } + xloadsearch(ss, numsnps) ; + freeup(ss, numsnps) ; + free(ss) ; + } + + k = xfindit(snpid) ; + return k ; } - -void -freesnpindex () +void freesnpindex() { - if (snptab == NO) - return; - snptab = NO; - xdestroy (); + if (snptab == NO) return ; + snptab = NO ; + xdestroy() ; } -int -ignoresnp (SNP * cupt) +int ignoresnp(SNP *cupt) { - if (cupt->ignore) - return YES; - if (cupt->isfake) - return YES; - if (cupt->ngtypes == 0) - return YES; - if (cupt->isrfake) - return NO; - return NO; -} - -double -entrop (double *a, int n) + if (cupt -> ignore) return YES ; + if (cupt -> isfake) return YES ; + if (cupt -> ngtypes == 0) return YES ; + if (cupt -> isrfake) return NO ; + return NO ; +} + +double entrop(double *a, int n) { - int i; - double ysum, t, ans; - - ans = 0.0; - ysum = asum (a, n); - for (i = 0; i < n; i++) { - t = a[i] / ysum; - ans += xxlog2 (t); + int i ; + double ysum, t, ans ; + + ans = 0.0 ; + ysum = asum(a,n) ; + for (i=0; igtypes == NULL) { - ivclear (x, -1, n); - return; + if (cupt -> gtypes == NULL) { + ivclear(x, -1, n) ; + return ; } - if (!packmode) { - copyiarr (cupt->gtypes, x, n); - return; + if (!packmode) { + copyiarr(cupt-> gtypes, x, n) ; + return ; } - k = 0; - for (a = 0; 4 * a < n; ++a) { - w = cupt->pbuff[a]; - for (t = 0; t < 4; t++) { - b = w >> 2 * (3 - t); - x[k] = b & 3; - ++k; - if (k >= n) - break; - } + k = 0 ; + for (a=0; 4*a pbuff[a] ; + for (t = 0; t < 4 ; t++) { + b = w >> 2*(3-t) ; + x[k] = b & 3 ; + ++k ; + if (k>=n) break ; + } } } -int -getgtypes (SNP * cupt, int k) +int getgtypes(SNP *cupt, int k) { - char *buff; - int g; - - if (cupt->gtypes == NULL) - return -1; - - if (packmode) { - buff = cupt->pbuff; - g = rbuff ((unsigned char *) buff, k); - if (g == 3) - g = -1; - return g; + char *buff ; + int g ; + + if (cupt -> gtypes == NULL) return -1 ; + + if (packmode) { + buff = cupt -> pbuff ; + g = rbuff((unsigned char *)buff, k) ; + if (g==3) g = -1 ; + return g ; } - return cupt->gtypes[k]; + return cupt -> gtypes[k] ; } -void -putgtypes (SNP * cupt, int k, int val) +void putgtypes(SNP *cupt, int k, int val) { - char *buff; - int g; - - if (k >= cupt->ngtypes) - fatalx ("(putgtypes)\n"); - if (packmode) { - buff = cupt->pbuff; - g = val; - if (g < 0) - g = 3; - wbuff ((unsigned char *) buff, k, g); - return; + char *buff ; + int g ; + + if (k>= cupt -> ngtypes) fatalx("(putgtypes)\n") ; + if (packmode) { + buff = cupt -> pbuff ; + g = val ; + if (g <0) g=3 ; + wbuff((unsigned char *)buff, k, g) ; + return ; } - cupt->gtypes[k] = val; + cupt ->gtypes[k] = val ; } -int -getep (SNP * cupt, int k) +int getep(SNP *cupt, int k) { - char *buff; - int g; + char *buff ; + int g ; - if (cupt->gtypes == NULL) - return -1; - if (k >= cupt->ngtypes) - return -1; - buff = cupt->ebuff; - g = rbuff ((unsigned char *) buff, k); - if (g == 3) - g = -1; - return g; + if (cupt -> gtypes == NULL) return -1 ; + if (k>= cupt -> ngtypes) return -1 ; + buff = cupt -> ebuff ; + g = rbuff((unsigned char *)buff, k) ; + if (g==3) g = -1 ; + return g ; } -void -putep (SNP * cupt, int k, int val) +void putep(SNP *cupt, int k, int val) { - char *buff; - int g; + char *buff ; + int g ; - buff = cupt->ebuff; - if (buff == NULL) - fatalx ("(putep) no buffer\n"); - if (k >= cupt->ngtypes) - fatalx ("(putep)\n"); - g = val; - if (g < 0) - g = 3; - wbuff ((unsigned char *) buff, k, g); - return; + buff = cupt -> ebuff ; + if (buff == NULL) fatalx("(putep) no buffer\n") ; + if (k>= cupt -> ngtypes) fatalx("(putep)\n") ; + g = val ; + if (g <0) g=3 ; + wbuff((unsigned char *)buff, k, g) ; + return ; } -int -hasharr (char **xarr, int nxarr) +int hasharr(char **xarr, int nxarr) // in application ordering matters so we hash order dependent { + + int hash, thash, i, n ; - int hash, thash, i, n; - - hash = 0; + hash = 0 ; - for (i = 0; i < nxarr; i++) { - thash = hashit (xarr[i]); - hash *= 17; - hash ^= thash; - } - return hash; + for (i=0; i< nxarr; i++) { + thash = hashit(xarr[i]) ; + hash *= 17 ; + hash ^= thash ; + } + return hash ; } - -int -hashit (char *str) +int hashit (char *str) { /* simple and unimpressive hash function NJP */ - int j, len, hash; + int j, len, hash ; - hash = 0; - len = strlen (str); + hash = 0 ; + len = strlen(str) ; - for (j = 0; j < len; j++) { - hash *= 23; - hash += (int) str[j]; - } - return hash; + for (j=0; j3)) fatalx("(wbuff) invalid g value\n", g) ; - if ((g < 0) || (g > 3)) - fatalx ("(wbuff) invalid g value\n", g); + ++ncall ; - ++ncall; + msk = 3 << 6 ; + mm = g << 6 ; + ones = 0XFF ; - msk = 3 << 6; - mm = g << 6; - ones = 0XFF; + wnum = num/4 ; + wplace = num%4 ; - wnum = num / 4; - wplace = num % 4; - - mm = mm >> (wplace * 2); - msk = (msk >> (wplace * 2)) ^ ones; - buff[wnum] &= msk; - buff[wnum] |= mm; + mm = mm >> (wplace * 2) ; + msk = (msk >> (wplace *2)) ^ ones ; + buff[wnum] &= msk ; + buff[wnum] |= mm ; /** printf("zz %d %d %d %d %02x\n", num, wnum, wplace, g, buff[wnum]) ; printf("yyy %d %d\n", g, rbuff(buff,num)) ; */ } - int -rbuff (unsigned char *buff, int num) +rbuff(unsigned char *buff, int num) { - int wnum, wplace, rshft; - unsigned char b; - static int ncall = 0; - + int wnum, wplace, rshft ; + unsigned char b ; + static int ncall = 0 ; + // ++ncall ; - wnum = num >> 2; - wplace = num & 3; + wnum = num >> 2 ; + wplace = num & 3 ; - rshft = (3 - wplace) << 1; - b = buff[wnum] >> rshft; + rshft = (3-wplace) << 1 ; + b = buff[wnum] >> rshft ; - b = b & 3; - return b; + b = b & 3 ; + return b ; } - // fast dup code -void -setfastdupnum (int num) +void setfastdupnum(int num) { - fastdupnum = num; + fastdupnum = num ; } - -void -setfastdupthresh (double thresh, double kill) +void setfastdupthresh(double thresh, double kill) { - fastdupthresh = thresh; - fastdupkill = kill; + fastdupthresh = thresh ; + fastdupkill = kill ; } void -killxhets (SNP ** snpmarkers, Indiv ** indivmarkers, int numsnps, - int numindivs) -{ - SNP *cupt; - Indiv *indx; - int i, k, g; - - for (i = 0; i < numsnps; i++) { - cupt = snpmarkers[i]; - if (cupt->ignore) - continue; - if (cupt->isfake) - continue; - if (cupt->chrom != 23) - continue; - for (k = 0; k < numindivs; k++) { - indx = indivmarkers[k]; - if (indx->gender != 'M') - continue; - g = getgtypes (cupt, k); - if (g != 1) - continue; - putgtypes (cupt, k, -1); - } +killxhets(SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs) +{ + SNP *cupt ; + Indiv *indx ; + int i, k, g ; + + for (i=0; i ignore) continue ; + if (cupt -> isfake) continue ; + if (cupt -> chrom != 23) continue ; + for (k=0; k gender != 'M') continue ; + g = getgtypes(cupt, k) ; + if (g != 1) continue ; + putgtypes(cupt, k, -1) ; } + } } + void -printdup (SNP ** snpm, int numsnp, Indiv * inda, Indiv * indb, int nmatch, - int nnomatch, int iter) -{ - int t1, t2; - double y; +fastdupcheck(SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs) +{ + SNP *cupt ; + Indiv *indx ; + int *gtypes ; + int i, j, k, n ; + int *snphets, *indsnp, tab[15], ww[15], **codeit, *cc, g, *cbuff ; + int *buff, val, vv, lbuff, itry, ilo, ihi ; + + ZALLOC(gtypes, numindivs, int) ; + ZALLOC(cbuff, 2*numindivs, int) ; + ZALLOC(codeit, numindivs, int *) ; + ZALLOC(snphets, numsnps, int) ; + ZALLOC(indsnp, numsnps, int) ; + for (i=0; i ignore) continue ; + if (cupt -> isfake) continue ; + if (cupt -> chrom > 22) continue ; + grabgtypes(gtypes, cupt, numindivs) ; + for (k=0; k=numsnps) break ; + for (i=0; i<15; i++) { + j = indsnp[i+ilo] ; + tab[i] = j ; + } + n = 0 ; + for (k=0; k ignore) continue ; + for (i=0; i<15; i++) { + j = tab[i] ; + g = getgtypes(snpmarkers[j], k) ; + if (g<0) break ; + ww[i] = g ; + } + if (g < 0 ) continue ; + cc = codeit[n] = cbuff+2*n ; + cc[0] = kcode(ww, 15, 4) ; + cc[1] = k ; + ++n ; + } - if (nmatch <= 0) - return; - if (inda->ignore) - return; - if (indb->ignore) - return; - t1 = numvalids (inda, snpm, 0, numsnp - 1); - t2 = numvalids (indb, snpm, 0, numsnp - 1); - printf ("dup? %s %s match: %d mismatch: %d %d %d ", - inda->ID, indb->ID, nmatch, nnomatch, t1, t2); - printf ("%20s ", inda->egroup); - printf ("%20s", indb->egroup); - y = nnomatch / (double) (nnomatch + nmatch); - printf (" %9.3f", y); - printf (" %d", iter); - printnl (); -} + if (n==0) continue ; + ipsortit(codeit, NULL, n, 2) ; + buff = gtypes ; lbuff = 0; val = -1 ; -void -cdup (SNP ** snpm, Indiv ** indm, int nsnp, int *buff, int lbuff, int iter) -{ - static int ncall = 0; - SNP *cupt; - Indiv *inda, *indb; - double ytot, yhit; - int g1, g2, k1, k2, match, nomatch; - int i1, i2, j; -#define MINMARK 200 + for (i=0; i ignore) continue ; + if (cupt -> isfake) continue ; + g1 = getgtypes(cupt, k1) ; + g2 = getgtypes(cupt, k2) ; + if ( (g1<0) || (g2<0) ) continue ; + if (g1==g2) ++match ; + if (g1!=g2) ++nomatch ; + } - if (lbuff <= 1) - return; -// printf("zzcdup1 %d %d\n", ncall, lbuff) ; fflush(stdout) ; - if (lbuff > 20) - printf ("skipping...\n"); - for (i1 = 0; i1 < lbuff; ++i1) { - for (i2 = i1 + 1; i2 < lbuff; ++i2) { - k1 = buff[i1]; - k2 = buff[i2]; - match = nomatch = 0; - for (j = 0; j < nsnp; ++j) { - cupt = snpm[j]; - if (cupt->ignore) - continue; - if (cupt->isfake) - continue; - g1 = getgtypes (cupt, k1); - g2 = getgtypes (cupt, k2); - if ((g1 < 0) || (g2 < 0)) - continue; - if (g1 == g2) - ++match; - if (g1 != g2) - ++nomatch; - } + inda = indm[k1] ; + indb = indm[k2] ; + ytot = (double) (match + nomatch) ; + if (ytot< MINMARK) continue ; + yhit = ((double) match) / ytot ; - inda = indm[k1]; - indb = indm[k2]; - ytot = (double) (match + nomatch); - if (ytot < MINMARK) - continue; - yhit = ((double) match) / ytot; - - if (yhit > fastdupthresh) { - printdup (snpm, nsnp, inda, indb, match, nomatch, iter); - if (yhit > fastdupkill) - killdup (inda, indb, snpm, nsnp); - } - } + if (yhit>fastdupthresh) { + printdup(snpm, nsnp, inda, indb, match, nomatch) ; + if (yhit>fastdupkill) killdup(inda, indb, snpm, nsnp) ; + } } + } } -void -fastdupcheck (SNP ** snpmarkers, Indiv ** indivmarkers, int numsnps, - int numindivs) +void killdup(Indiv *inda, Indiv *indb, SNP **snpm, int nsnp) { - SNP *cupt; - Indiv *indx; - int *gtypes; - int g, i, j, k, n, t; - int *snphets, *indsnp, tab[15], ww[15], **codeit, *cc, *cbuff; - int *buff, val, vv, lbuff, itry, ilo, ihi; - int dd[2]; - - ZALLOC (gtypes, numindivs, int); - ZALLOC (cbuff, 2 * numindivs, int); - ZALLOC (codeit, numindivs, int *); - ZALLOC (snphets, numsnps, int); - ZALLOC (indsnp, numsnps, int); - - t = 0; - for (i = 0; i < numsnps; i++) { - cupt = snpmarkers[i]; - if (cupt->ignore) - continue; - if (cupt->isfake) - continue; - if (cupt->chrom > 22) - continue; - ivzero (dd, 2); - for (k = 0; k < numindivs; k++) { - g = getgtypes (cupt, k); - if (g < 0) - continue; - dd[1] += g; - dd[0] += (2 - g); - } - snphets[i] = MIN (dd[0], dd[1]); - if (snphets[i] > 0) - ++t; - } - printf ("number of polymorphic snps: %d\n", t); - fflush (stdout); - ivst (snphets, snphets, -1, numsnps); - isortit (snphets, indsnp, numsnps); -// make fastdupnum shots at exact match on 15 snps */ - for (itry = 1; itry < fastdupnum; itry++) { - printf ("fdup iteration: %d\n", itry); - fflush (stdout); - ilo = 15 * (itry - 1); - if ((ilo + 15) >= numsnps) - break; - for (i = 0; i < 15; i++) { - j = indsnp[i + ilo]; - tab[i] = j; - } - n = 0; - for (k = 0; k < numindivs; ++k) { - indx = indivmarkers[k]; - if (indx->ignore) - continue; - for (i = 0; i < 15; i++) { - j = tab[i]; - g = getgtypes (snpmarkers[j], k); - if (g < 0) - break; - ww[i] = g; - } - if (g < 0) - continue; - cc = codeit[n] = cbuff + 2 * n; - cc[0] = kcode (ww, 15, 4); - cc[1] = k; - ++n; - } + int t1, t2 ; + Indiv *indx ; - - if (n == 0) - continue; - ipsortit (codeit, NULL, n, 2); - - buff = gtypes; - lbuff = 0; - val = -1; - - for (i = 0; i < n; i++) { - cc = codeit[i]; - vv = cc[0]; - if (vv != val) { - cdup (snpmarkers, indivmarkers, numsnps, buff, lbuff, itry); - lbuff = 0; - val = vv; - } - buff[lbuff] = cc[1]; - ++lbuff; - } - cdup (snpmarkers, indivmarkers, numsnps, buff, lbuff, itry); - } // itry - - free (snphets); - free (indsnp); - free (gtypes); - free (codeit); - free (cbuff); + t1 = numvalids(inda, snpm, 0, nsnp-1) ; + t2 = numvalids(indb, snpm, 0, nsnp-1) ; + indx = inda ; + if (t1>t2) indx = indb ; + indx -> ignore = YES ; + printf("dup. %s ignored\n", indx -> ID) ; } -void -killdup (Indiv * inda, Indiv * indb, SNP ** snpm, int nsnp) +void printdup(SNP **snpm, int numsnp, Indiv *inda, Indiv *indb, int nmatch, int nnomatch) { - int t1, t2; - Indiv *indx; + int t1, t2 ; + double y ; - t1 = numvalids (inda, snpm, 0, nsnp - 1); - t2 = numvalids (indb, snpm, 0, nsnp - 1); - indx = inda; - if (t1 > t2) - indx = indb; - indx->ignore = YES; - printf ("dup. %s ignored\n", indx->ID); + if (nmatch<=0) return ; + if (inda -> ignore) return ; + if (indb -> ignore) return ; + + t1 = numvalids(inda, snpm, 0, numsnp-1) ; + t2 = numvalids(indb, snpm, 0, numsnp-1) ; + printf("dup? %s %s match: %d mismatch: %d %d %d ", + inda->ID, indb -> ID, nmatch, nnomatch, t1, t2) ; + printf("%20s ", inda->egroup) ; + printf("%20s", indb->egroup) ; + y = nnomatch / (double) (nnomatch+nmatch) ; + printf(" %9.3f", y) ; + printnl() ; } -int -kcode (int *w, int len, int base) +int kcode(int *w, int len, int base) { - int i, t; - t = 0; - for (i = 0; i < len; i++) { - t *= base; - t += w[i]; - } - return t; + int i, t ; + t = 0; + for (i=0; iignore) - continue; - ++nvalids; + for (i=0; i< numind; i++) { + indx = indivmarkers[i] ; + if (indx -> ignore) continue ; + ++nvalids ; } - return nvalids; -} + return nvalids ; +} -void -numvalidgtallind (int *x, SNP ** snpm, int numsnps, int numind) -{ +void numvalidgtallind(int *x, SNP **snpm, int numsnps, int numind) { // count valids for all - int n = 0; - int k, t, j; - SNP *cupt; - int *z; - - ZALLOC (z, numind, int); - ivzero (x, numind); - for (k = 0; k < numsnps; ++k) { - cupt = snpm[k]; - if (cupt->ignore) - continue; - getgall (cupt, z, numind); - for (j = 0; j < numind; ++j) { - if (z[j] >= 0) - ++x[j]; - } + int n = 0 ; + int k, t, j ; + SNP *cupt ; + int *z ; + + ZALLOC(z, numind, int) ; + ivzero(x, numind) ; + for (k=0; k< numsnps; ++k) { + cupt = snpm[k] ; + if (cupt -> ignore) continue ; + getgall(cupt, z, numind) ; + for (j=0; j=0) ++x[j] ; + } } - free (z); + free(z) ; } -int -numvalidgtind (SNP ** snpm, int numsnps, int ind) -{ - int n = 0; - int k, t; - SNP *cupt; - - for (k = 0; k < numsnps; ++k) { - cupt = snpm[k]; - if (cupt->ignore) - continue; - t = getgtypes (cupt, ind); - if (t >= 0) - ++n; +int numvalidgtind(SNP **snpm, int numsnps, int ind) { + int n = 0 ; + int k, t ; + SNP *cupt ; + + for (k=0; k< numsnps; ++k) { + cupt = snpm[k] ; + if (cupt -> ignore) continue ; + t = getgtypes(cupt, ind) ; + if (t >= 0) ++n ; } - return n; + return n ; } -int -numvalidgt (Indiv ** indivmarkers, SNP * cupt) +int numvalidgt(Indiv **indivmarkers, SNP *cupt) // like numvalidgtypes but tests ignore -{ - int n, k, nvalids; - if (cupt->gtypes == NULL) - return 0; - nvalids = 0; - n = cupt->ngtypes; - for (k = 0; k < n; k++) { - if (indivmarkers[k]->ignore) - continue; - if (getgtypes (cupt, k) >= 0) - ++nvalids; - } - return nvalids; -} - -int -numvalidgtx (Indiv ** indivmarkers, SNP * cupt, int affst) +{ + int n, k, nvalids ; + if (cupt -> gtypes == NULL) return 0 ; + nvalids = 0 ; + n = cupt -> ngtypes ; + for (k=0; k ignore) continue ; + if (getgtypes(cupt, k) >=0) ++nvalids ; + } + return nvalids ; +} +int numvalidgtx(Indiv **indivmarkers, SNP *cupt, int affst) // like numvalidgtypes but tests ignore counts only when status=affst -{ - int n, k, nvalids; - if (cupt->gtypes == NULL) - return 0; - nvalids = 0; - n = cupt->ngtypes; - for (k = 0; k < n; k++) { - if (indivmarkers[k]->ignore) - continue; - if (indivmarkers[k]->affstatus != affst) - continue; - if (getgtypes (cupt, k) >= 0) - ++nvalids; - } - return nvalids; -} - -int -isxmale (SNP * cupt, Indiv * indx) -{ - if (cupt->chrom != 23) - return NO; - if (indx->gender != 'M') - return NO; - return YES; +{ + int n, k, nvalids ; + if (cupt -> gtypes == NULL) return 0 ; + nvalids = 0 ; + n = cupt -> ngtypes ; + for (k=0; k ignore) continue ; + if (indivmarkers[k] -> affstatus != affst) continue ; + if (getgtypes(cupt, k) >=0) ++nvalids ; + } + return nvalids ; +} + +int isxmale(SNP *cupt, Indiv *indx) +{ + if (cupt -> chrom != 23) return NO ; + if (indx -> gender != 'M') return NO ; + return YES ; } void -printmatz (double *ww, char **eglist, int n) +printmatz(double *ww, char **eglist, int n) { - int i, j, x; - printf (" %4s", " "); - for (i = 0; i < n; i++) { - printf (" %4s", get3 (eglist[i])); + int i, j , x ; + printf(" %4s", " ") ; + for (i=0; i egroup) ; + indx -> egroup = strdup(indx -> ID) ; + } + freeup(spt, nsplit) ; + return nsplit ; +} + + + +int gvalm(char cc, char cm, char c1, char c2, int minfilterval) +{ + int x=0, t ; + char cb[2] ; + + if (fvalid(cm, minfilterval) == NO) return 9 ; + if (isiub2(cc) == NO) return 9 ; + t = iubcbases(cb, cc) ; + + if (cb[0] == c1) ++x ; + if (cb[1] == c1) ++x ; + + + return x ; + +} + +int fvalid(char cm, int minfilterval) +// is cm indicating valid? +{ + int t ; + + if (minfilterval < 0) return YES ; + t = (int) cm - (int) '0' ; + + if (t<0) return NO; + if (t>=10) return NO ; + if (t 0) { + + if (ww[0] == '#') + continue; + plen = strlen (ww); + if (ww[plen - 1] != ':') continue ; + ww[plen-1] = CNULL ; + if (upstring(ww) == NO) continue ; + + tkword[nmap] = strdup(ww) ; + tval[nmap] = strdup(rest) ; + ++nmap ; + } - indx = indmarkers[t]; - freestring (&indx->egroup); - indx->egroup = strdup (indx->ID); + } - freeup (spt, nsplit); - return nsplit; + + fclose(fff) ; + + if (nmap == 0) { + freeup(tkword, n) ; + freeup(val, n) ; + return 0 ; + } + +// sort in descending order of length + ZALLOC(slenm, nmap, int) ; + ZALLOC(indx, nmap, int) ; + for (k=0; k #include #include +#include #include #include @@ -16,7 +17,7 @@ #include "egsubs.h" #include "exclude.h" -#define WVERSION "5000" +#define WVERSION "5250" /** reformats files. pedfile junk (6, 7 cols, ACGT added) @@ -89,6 +90,11 @@ minvalpop :: every pop must have at least this number of valids (default not set) fillmissing: added better handling of seed + + hiresgendis added + + O2 version + .prob aware */ @@ -97,15 +103,16 @@ char *trashdir = "/var/tmp"; int qtmode = NO; -Indiv **indivmarkers, **indm2; +Indiv **indivmarkers, **probindivs, **indm2 ; +int numsnps, numindivs, numprobindivs, numind2 ; SNP **snpmarkers; SNP **snpm2; int zerodistance = NO; // YES => force gdis 0 int downsample = NO; // make pseudo homozygotes int pordercheck = YES; int familypopnames = NO; +int hiresgendis = NO ; -int numsnps, numindivs, numind2; int nums2; char *genotypename = NULL; @@ -117,6 +124,12 @@ char *snpoutfilename = NULL; char *genooutfilename = NULL; char *indivname = NULL; char *newindivname = NULL; + +char *probfilename = NULL; +char *proboutfilename = NULL; +char *probindivname = NULL; +int probmode = NO ; + char *badsnpname = NULL; char *xregionname = NULL; char *deletesnpoutname = NULL; @@ -202,7 +215,10 @@ void downsamp (SNP * cupt); int setsamp (Indiv ** indivmarkers, int numindivs, char *usesamples); int testmisspop(SNP **snpmarkers, int numsnps, Indiv **indivmarkers, int numindivs, int minvalpops) ; int fillmiss(SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs, char **fpops, int nfpops) ; - +int fixsnpdistance(SNP **snpm, int numsnps) ; +int loaddiplike (double *dip, unsigned char *sp) ; +long loadprobpack(SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs, char *bigbuff) ; +int usage (char *prog, int exval); int @@ -230,19 +246,25 @@ main (int argc, char **argv) int nindiv = 0, e, f, lag = 1; double xc[9], xd[4], xc2[9]; double ychi, zscore, zthresh = 20.0; - double y1, y2; + double y1, y2, ymem; int nignore, numrisks = 1; char **genolist; int numgenolist; char c1, c2; - int t1, t2; + int t1, t2, x; + + unsigned char *packp, *packp2 ; + long plen, plen2, numx ; + int rl2 = 4 ; malexhet = YES; // convertf default is don't change the data tersem = YES; // no snp counts readcommands (argc, argv); + cputime(0) ; + calcmem(0) ; printf("## %s version: %s\n", argv[0], WVERSION) ; @@ -267,10 +289,17 @@ main (int argc, char **argv) setfamilypopnames (YES); } + if (proboutfilename != NULL) { + probmode = YES ; + if (probfilename == NULL) fatalx("no probfilename but proboutfiename!\n") ; + } + setomode (&outputmode, omode); packmode = YES; settersemode (tersem); + if (hiresgendis) sethiressnp() ; + if (r2thresh > 0.0) killr2 = YES; if (badpedignore) @@ -361,6 +390,51 @@ main (int argc, char **argv) } + numprobindivs = 0 ; + plen = 0 ; + packp = NULL ; + + if (probmode) { + if (probindivname == NULL) probindivname = indivname ; + + numprobindivs = getindivs (probindivname, &probindivs); + for (j=0; j ID) ; + indx -> idnum = k ; + if (k<0) { + indx -> ignore = YES ; + printf("*** warning *** ID %s missing in indivs\n", indx -> ID) ; + } + } + plen = numsnps*numprobindivs*rl2 ; + ZALLOC(packp, plen, unsigned char) ; + printf("calling inprobx: hashcheck: %d\n", hashcheck) ; + inprobx (probfilename, snpmarkers, probindivs, numsnps, numprobindivs, packp) ; + } + + + + if (probmode) { + for (i=0; i probbuff = packp + i*numprobindivs*rl2 ; + ZALLOC(cupt -> diplike, numindivs, double *) ; + cupt -> diplike = initarray_2Ddouble(numindivs, 3, -1) ; + } + for (i=0; i ignore) continue ; + k = indx -> idnum ; + x = loaddiplike(cupt -> diplike[k], cupt -> probbuff + j*rl2) ; + } + } + + } + if (deletedup) dedupit (snpmarkers, numsnps); // only one marker per position @@ -377,8 +451,8 @@ main (int argc, char **argv) if ((t1 == 0) && (t2 > 0)) flip1 (cupt, phasedmode, YES); } - - + t = fixsnpdistance(snpmarkers, numsnps) ; + if (t>0) printf("%12d SNP positions adjusted\n", t) ; if (deletedup) dedupit (snpmarkers, numsnps); // only one marker per position @@ -456,7 +530,6 @@ main (int argc, char **argv) else setstatus (indivmarkers, numindivs, "Case"); - numsnps = rmsnps (snpmarkers, numsnps, deletesnpoutname); numindivs = rmindivs (snpmarkers, numsnps, indivmarkers, numindivs); // printf("got here! 2\n") ; fflush(stdout) ; @@ -510,17 +583,9 @@ main (int argc, char **argv) if (maxmiss < t) { cupt->ignore = YES; } -/** - if (numvalidind == t) { - printf("no data for snp: %s\n", cupt -> ID) ; - cupt -> ignore = YES ; - } -*/ } -// printf("got here! 3\n") ; fflush(stdout) ; - if (fastdup) { printf ("fastdup set %d\n", fastdupnum); @@ -538,10 +603,23 @@ main (int argc, char **argv) snpdecimate (snpmarkers, numsnps, decim, dmindis, dmaxdis); } + if (probmode) { + plen2 = numsnps*numindivs*rl2 ; + ZALLOC(packp2, plen2, unsigned char) ; + + numx = loadprobpack(snpmarkers, indivmarkers, numsnps, numindivs, packp2) ; + outprobx(proboutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packp2) ; + + printf("PROB file %s written: %ld records\n", proboutfilename, numx) ; + } + + + outfiles (snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode); - printf ("##end of convertf run\n"); + ymem = calcmem(1)/1.0e6 ; + printf("##end of convertf: %12.3f seconds cpu %12.3f Mbytes in use\n", cputime(1), ymem) ; return 0; } @@ -555,6 +633,7 @@ readcommands (int argc, char **argv) char *tempname; int n; + if (argc == 1) { usage(basename(argv[0]), 1); } while ((i = getopt (argc, argv, "p:vV")) != -1) { switch (i) { @@ -667,6 +746,10 @@ output: eurout getstring (ph, "newindivname:", &newindivname); getint (ph, "deletedup:", &deletedup); getint (ph, "mkdiploid:", &mkdiploid); + getint (ph, "hiresgendis:", &hiresgendis); + getstring (ph, "probfilename:", &probfilename); + getstring (ph, "proboutfilename:", &proboutfilename); + getstring (ph, "probindivname:", &probindivname); writepars (ph); closepars (ph); @@ -1230,5 +1313,37 @@ int fillmiss(SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs, free(nmiss) ; return nfill ; +} +int fixsnpdistance(SNP **snpm, int numsnps) +{ + int k, n=0 ; + SNP *cupt1, *cupt2 ; + double dis ; + if (hiresgendis == NO) return 0 ; + + for (k=1; k chrom != cupt1 -> chrom) continue ; + dis = cupt2 -> genpos - cupt1 -> genpos ; + if (fabs(dis) < 1.0e-8) { + cupt2 -> genpos = cupt1 -> genpos + 1.0e-9 ; + ++n ; + } + } + return n ; } + +int +usage (char *prog, int exval) +{ + + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + + exit(exval); +}; diff --git a/src/dowtjack.c b/src/dowtjack.c index fbd281d..c613978 100644 --- a/src/dowtjack.c +++ b/src/dowtjack.c @@ -1,107 +1,121 @@ #include #include -#include +#include #include #define MAXSTR 512 #define MAXFF 20 -char *iname = NULL; -char *oname = NULL; +char *iname = NULL ; +char *oname = NULL ; double mean; -void readcommands (int argc, char **argv); -void callwtjack (char *iname, char *oname); +int terse = NO ; -int -main (int argc, char **argv) +void readcommands(int argc, char **argv) ; +void callwtjack(char *iname, char *oname); + +int main(int argc, char **argv) { - readcommands (argc, argv); - callwtjack (iname, oname); + readcommands(argc, argv) ; + callwtjack(iname, oname); } -void -readcommands (int argc, char **argv) +void readcommands(int argc, char **argv) { int i; - while ((i = getopt (argc, argv, "i:o:m:")) != -1) { - switch (i) { + while ((i = getopt (argc, argv, "i:o:m:t:")) != -1) + { + switch (i) + { + + case 'i': + iname = strdup(optarg) ; + break; - case 'i': - iname = strdup (optarg); - break; + case 'o': + oname = strdup(optarg) ; + break; - case 'o': - oname = strdup (optarg); - break; + case 'm': + mean = atof(optarg) ; + break; - case 'm': - mean = atof (optarg); - break; + case 't': + terse = NO ; + break; - case '?': - printf ("Usage: bad params.... \n"); - fatalx ("bad params\n"); - } + case '?': + printf ("Usage: bad params.... \n") ; + fatalx("bad params\n") ; + } } } void -callwtjack (char *iname, char *oname) +callwtjack(char *iname, char *oname) { - FILE *ifile, *ofile; + FILE *ifile, *ofile ; char line[MAXSTR]; char *sx; char *spt[MAXFF]; int nsplit, len, i, k; char c; - - openit (iname, &ifile, "r"); - openit (oname, &ofile, "w"); + + openit(iname, &ifile, "r") ; + openit(oname, &ofile, "w") ; double *jwt, *jmean; - + /* output variable */ - double est, sig; // NB - est = 0; - sig = 0; + double est, sig; // NB + est =0; sig=0; /*input variables */ - len = numlines (iname); - ZALLOC (jwt, len, double); - ZALLOC (jmean, len, double); + len = numlines(iname); + ZALLOC(jwt, len, double); + ZALLOC(jmean, len, double); k = 0; - /* read input file and store data */ - while (fgets (line, MAXSTR, ifile) != NULL) { - nsplit = splitup (line, spt, MAXFF); - sx = spt[0]; - c = sx[0]; - if (c == '#') { - freeup (spt, nsplit); - continue; - } - - jwt[k] = atof (spt[1]); - jmean[k] = atof (spt[2]); - //printf("mean: %9.3f len: %9.3f\n", jmean[k], jwt[k]) ; - k++; - freeup (spt, nsplit); - } - len = k; // better style who knows how numlines handles commas - fclose (ifile); - // printf("mean: %9.3f len: %d\n", mean, len) ; - - - /*call weightjack */ - weightjack (&est, &sig, mean, jmean, jwt, len); - fprintf (ofile, "%9.3f", est); // d format ?? - fprintf (ofile, "%9.3f", sig); - fprintf (ofile, "\n"); - free (jmean); - free (jwt); - fclose (ofile); - -} + /* read input file and store data */ + while (fgets(line,MAXSTR,ifile) != NULL) + { + nsplit = splitup(line, spt, MAXFF); + sx = spt[0]; + c = sx[0]; + if (c == '#') { + freeup(spt, nsplit); + continue; + } + + jwt[k] = atof(spt[1]); + jmean[k] = atof(spt[2]); + //printf("mean: %9.3f len: %9.3f\n", jmean[k], jwt[k]) ; + k++; + freeup(spt, nsplit); + } + len = k ; // better style who knows how numlines handles commas + fclose(ifile) ; + // printf("mean: %9.3f len: %d\n", mean, len) ; + + + /*call weightjack */ + weightjack(&est, &sig, mean, jmean, jwt, len); + if (terse) { + fprintf(ofile,"%9.3f ", est); // d format ?? + fprintf(ofile,"%9.3f", sig); + fprintf(ofile,"\n"); + } + else { + fprintf(ofile,"mean: %9.3f ", est); // d format ?? + fprintf(ofile,"std error: %9.3f ", sig); + fprintf(ofile,"Z: %9.3f", est/sig); + fprintf(ofile,"\n"); + } + free(jmean); + free(jwt); + fclose(ofile) ; + + } diff --git a/src/f4rank.c b/src/f4rank.c index 4e50ef0..c15a46c 100644 --- a/src/f4rank.c +++ b/src/f4rank.c @@ -243,7 +243,7 @@ ranktestfix (double *mean, double *var, int m, int n, int rank, double *pA, // vfix is m long { int d = m * n, dd; - int f, i, j, k, l, a, b, s, t, r1, r2, k1, k2, u1, u2; + int nfix, f, i, j, k, l, a, b, s, t, r1, r2, k1, k2, u1, u2; int iter, numiter = 20, ret; double *ww, *varinv; double T2, tail; @@ -257,7 +257,7 @@ ranktestfix (double *mean, double *var, int m, int n, int rank, double *pA, int nf; - t = intsum (vfix, m); + nfix = t = intsum (vfix, m); if (t > rank) fatalx ("(ranktestfix) too many fixed variables\n"); ZALLOC (vfl, m * rank, double); @@ -273,7 +273,7 @@ ranktestfix (double *mean, double *var, int m, int n, int rank, double *pA, l = vl[nf] = j * rank + k; // column k // variables to force to zero if (verbose) - printf ("zzvl %d %9.3f\n", nf, vl[nf], vfl[nf]); + printf ("zzvl %d %d %9.3f\n", nf, vl[nf], vfl[nf]); ++nf; } ++k; @@ -421,6 +421,12 @@ ranktestfix (double *mean, double *var, int m, int n, int rank, double *pA, printf ("iter A: %d %9.3f\n", iter, y1); } + if (nfix == -1) { + printf("zzf ") ; printimat(vfix, 1, m) ; + printimat(vl, 1, nf) ; + printmat(coeffs, adim, adim) ; + printmatw(ans, 1, adim, adim) ; + } if (pA != NULL) copyarr (A, pA, adim); if (pB != NULL) @@ -450,15 +456,23 @@ ranktest (double *mean, double *var, int m, int n, int rank, double *pA, { int d = m * n, dd; int i, j, a, b, s, t, r1, r2, k1, k2, u1, u2; - int iter, numiter = 20; + int iter, numiter = 20, retkode; double *ww, *varinv; double T2, tail; double *A, *B, *wleft, *wright, *mt, *evecs, *xmean; - double y, y0, y1; + double z, y, y0, y1; int adim, bdim, tdim; double *coeffs, *rhs, *ans; + if (rank==m) { +// full rank nothing serious to do) + y1 = 0 ; + if (pA != NULL) setidmat(pA, m) ; + if (pB != NULL) copyarr(mean, pB, m*n) ; + return y1 ; + } + dd = d * d; ZALLOC (varinv, dd, double); @@ -556,7 +570,8 @@ ranktest (double *mean, double *var, int m, int n, int rank, double *pA, } addscaldiag(coeffs, yscale, bdim) ; - solvit (coeffs, rhs, bdim, ans); + retkode = solvit (coeffs, rhs, bdim, ans); + if (retkode < 0) fatalx("bad ranktest B\n") ; copyarr (ans, B, bdim); normab (A, B, m, n, rank); @@ -581,7 +596,8 @@ ranktest (double *mean, double *var, int m, int n, int rank, double *pA, for (r2 = 0; r2 < rank; ++r2) { k2 = j * rank + r2; u2 = r2 * n + t; - coeffs[k1 * adim + k2] += y * B[u1] * B[u2]; + z = coeffs[k1 * adim + k2] += y * B[u1] * B[u2]; + if (isnan(z)) fatalx("bad coeffs %d %d %9.3f %9.3f %9.3f\n", u1, u2, B[u1], B[u2], y) ; } } } @@ -596,7 +612,8 @@ ranktest (double *mean, double *var, int m, int n, int rank, double *pA, vzero (ans, adim); addscaldiag(coeffs, yscale, adim) ; - solvit(coeffs, rhs, adim, ans); + retkode = solvit(coeffs, rhs, adim, ans); + if (retkode < 0) fatalx("bad ranktest A\n") ; copyarr (ans, A, adim); normab (A, B, m, n, rank); @@ -640,8 +657,13 @@ normab (double *A, double *B, int m, int n, int rank) // not needed but seems like good practice. Makes answer canonical { int i, r; - double y, *bpt; + double z, y, *bpt; + double *wa, *wb ; + ZALLOC(wa, rank*n, double) ; + ZALLOC(wb, rank*n, double) ; + copyarr(A, wa, rank*n) ; + copyarr(B, wb, rank*n) ; for (r = 0; r < rank; ++r) { bpt = B + r * n; y = asum2 (bpt, n); @@ -651,9 +673,18 @@ normab (double *A, double *B, int m, int n, int rank) y = -y; vst (bpt, bpt, 1.0 / y, n); for (i = 0; i < m; ++i) { - A[i * rank + r] *= y; + z = A[i * rank + r] *= y; + if (isnan(z)) { + printf("bad normab :: %d %d %d %15.9f\n", r, i, n, y) ; + printmat(wa, rank, n) ; + printnl() ; + printmat(wb, rank, n) ; + fatalx("bad normab\n") ; + } } } + free(wa) ; + free(wb) ; } void diff --git a/src/kimf.c b/src/kimf.c deleted file mode 100644 index aca3417..0000000 --- a/src/kimf.c +++ /dev/null @@ -1,2692 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "admutils.h" -#include "mcio.h" -#include "mcmcpars.h" -#include "regsubs.h" -#include "egsubs.h" -#include "qpsubs.h" - -int gsldetails = NO; -double gslprecision = .001; - -int gslsetup (int npars, double *vpars, double initscale) ; -void gslfree() ; -double gslopt(double *pars) ; -void printslurmenv() ; - - -double maxtau = 10.0 ; - - - -/** - includes calendar time calculation -*/ - -int hashsize = 1000 * 1000 ; -ENTRY *hashlist ; -int hashmode = 2 ; // FNV -int quitearly = NO ; - -int npars = 0 ; -int paramindex[1000] ; -double *vpars ; -int newformat = YES ; - -enum actiontype {TASC, TSAMP, TSPEC, TLIFT, TROOT, TADMIX, TNEWFORMAT} ; - -typedef struct { - int *cl ; - int lev ; - double bprob ; - int largeliter ; -} BSTORE ; - -int **clstore ; -char **clcstore ; -int numclstore ; -int maxclstore ; -int xcload = 0 ; -int xchit = 0 ; -double lastkimscore = -100 ; -double minsig = 0 ; // after normalization - -BSTORE **bstlist ; -int maxbstnum ; - -typedef struct { - int count[200] ; - int *acount ; - int *bcount ; - int *ac ; - int kode ; - int level ; - int number ; - double aprob ; - double bprob ; - double dprob ; // external probability (data) - double dsig ; // std. error - double bmult ; - int isroot ; -} CONFIG ; - -typedef struct { - enum actiontype type ; // 1-> 1 parent, 2-> 2 parents, 10-> 0 parents (root) - int pop1 ; // Population index of this vertex (specfied in variables) - int pop2 ; // admixture vertex : index of parent; for lift index of kid - int pop3 ; // Index of parent / kid - int leaf ; - int n1 ; - int n2 ; - double tau1 ; - double tau2 ; - double tmix ; - double thetaval1 ; - double thetaval2 ; - int padmix ; - int ptau ; - int ptheta ; - int pnumber ; - int mutate ; - int isfixed ; - int isfixedl ; - int isfixedr ; -} LOGENTRY ; - - - -#define WVERSION "930" - -// timescore, timegmul added but not tested -// minsig addes -// iterate. pass scale to gslsetup -// seed, ranstart -// tau capped off (setptrans) -// big bug fixed in mutlike -// default theta = 1 -// script created if needed -// print worst Z (finalconfig) -// support for lock - -#define MAXFL 50 -#define MAXSTR 512 -#define MAXPOPS 100 - -char *parname = NULL; -char *trashdir = "/var/tmp"; -int details = NO; -int qtmode = NO; -int fstdmode = NO; -int hires = NO; -int inbreed = NO; - -int isinit = NO; - -int seed = 0; -int ranstart = 100 ; -double ransd = 0.01 ; - -char *graphname = NULL; -char *graphoutname = NULL; -char *graphdotname = NULL; -char *poplistname = NULL; - -char *dumpname = NULL; -char *loadname = NULL; -char *rootname; - -char *outpop = NULL; -// list of outliers -char *basepop = NULL; -int basenum = -1; -int calchash = NO; -int gsimp = NO; - -int bigiter = 1 ; - -// outnum used for weights -// basenum for f3 status - -char **poplist ; -int *popvertex ; - -int *lmix, nmix; -int nh2, numeg; -int *ezero = NULL; -double wtmin = .0001; -int xnumeg; -char *newnodename = NULL; -char *edgename = NULL; - -double *amut = NULL, bmut, **cmut, *dmut ; // see note. These are coefficients for mutation along edge - - -char *outputname = NULL; -char *weightname = NULL; -char *countname = NULL ; -char *scriptname = NULL ; - -FILE *ofile; -char **eglist; -char **egshort; -char **enames; - -void readcommands (int argc, char **argv); -char *mytemp(char *string) ; - -void getvpars(double *vp) ; // vp -> log -void setvpars(double *vp) ; // log -> vp -void putgpars(double *vpars) ; // log -> graph -void printpars() ; - -void initlog(LOGENTRY *logpt) ; -void printlog(LOGENTRY *logpt) ; -void genrcoeffs(double ****rr, int n) ; -void print4(char *title, double ****rr, int n) ; - -void genwcoeffs(double **ww, int n) ; -void printwcoeffs(double **ww, int n) ; -void setatrans(double **atrans, int n, double tmix) ; -void setptrans(double **ptrans, int n, double tau) ; -void setltrans(double **ltrans, double **ptrans, int u, int v) ; -void free4(double *****pvv, int a, int b, int c, int d) ; -void alloc4(double *****pvv, int a, int b, int c, int d) ; -void printv(int *v) ; -double bprob1(int *cl, int lev) ; -void setmut(int n) ; -double mutlike(double theta, double tau, int u, int v, double **xtrans) ; -double mutlike0(double tau, int u) ; -int loadspec(char *countname, int **lconf, char **vnames, int numvertex) ; - -void setcaltime() ; -int calctime(int s) ; -double calctscore() ; - -int numvertex, numalleles = -1 ; -char **vnames ; -// int *acount, *bcount, *tcount ; -int *sampsize, *sampx1, *tt, specsize ; -char **sampstr ; -int *popindex, npop=0 ; -double *caltime, **calss ; -int timecheck = -1 ; -double timesig = 30, timescore , timegscore, timegmul= 0.01 ; -double ****ttrans, **wtrans ; - - - -void free_cl() ; -void alloc_cl() ; -void set_cl() ; -void printbcl() ; - -double configprob(int *cfig) ; -double kimscore() ; -double grscore(double *vp) ; -double printconfig(CONFIG *cpt, int mode, char *title) ; -int printtime() ; - - CONFIG **configlist ; - CONFIG *cpt ; - int numconfig =0 , maxconfig = 1000 ; - - LOGENTRY **loglist ; - LOGENTRY *logpt ; - LOGENTRY **loglev ; - int numlogs= 0 , maxlog = 200 ; - double ****rcoeffs = NULL ; - double *****ttable1 ; // level 1 u v a b - double *****ttable2 ; // level 2 u v a b - double *** ttabmix ; // level u a or level v b - - int **activelist, **maxlevel, *vact, *vmax, *maxtarget ; - int *isslice, maxlevels ; - int *amax, *bmax, *tmax ; - -// configurations by level - int ***clist, *nclist = NULL ; - double **wlist ; - - int maxbst, maxcl ; - int largeliter = 0 ; - double zbreak = -1 ; - double scbreak = .001 ; - int numconfighash = 0 ; - -int -main (int argc, char **argv) -{ - - int nedge, t, s, i, j, k, l, z ; - int a, b, c, d, u, v, x ; - - char *psname, *pdfname; - char sss[MAXSTR]; - char *stmp ; - - int **lconf; - double *lprob ; - double *lsig ; - double tau ; - double zmax, zz ; - - int nlconf = 0 ; - int level, numlevels ; - int *tfig, *xpt ; - - double ylbase, y1, y2, yscore, y, yscale ; - double *w1, *dprob ; - int xamax, xbmax ; - double *sciter ; - int iter ; - double *tpars, *wpars ; - double initscore, tscore ; - double ymem ; - double yyy ; - - readcommands (argc, argv); - printf ("## kimf version: %s\n", WVERSION); - printf("graph: %s counts: %s\n", graphname, countname) ; - xhcreate(hashsize*2) ; - ZALLOC(hashlist, hashsize, ENTRY) ; - - cputime(0) ; - calcmem(0) ; - - if (seed == 0) seed = seednum() ; - printf("seed: %d\n", seed) ; - SRAND(seed) ; - printf("maxtau: %9.3f\n", maxtau) ; - - setfancyhash(hashmode) ; - - if (graphdotname == NULL) graphdotname = strdup("ttt.dot") ; - - numeg = loadgraph (graphname, &eglist); - - numvertex = getnumvertex() ; - printf("number of vertices: %d\n", numvertex) ; - ZALLOC(configlist, maxconfig, CONFIG *) ; - for (i=0; i acount = cpt -> count ; - cpt -> bcount = cpt -> count + numvertex ; - cpt -> ac = cpt -> count + 2*numvertex ; - cpt -> number = i ; - } - ZALLOC(vnames, numvertex, char *) ; - t = numlines(countname); - lconf = initarray_2Dint(t, numvertex, 0) ; - - ZALLOC(loglist, maxlog, LOGENTRY *) ; - ZALLOC(loglev, maxlog, LOGENTRY *) ; - for (i=0; i /dev/null", graphname, scriptname) ; - system(sss) ; - } - - printnl() ; - printf("scriptname: %s\n", scriptname) ; - sprintf(sss, "cat %s", scriptname) ; - system(sss) ; - fflush(stdout) ; - - nlconf = loadspec(scriptname, lconf, vnames, numvertex) ; // load menu - printnl() ; - printnl() ; - printf("numalleles: %d", numalleles) ; - printf( " npars: %d ", npars) ; - printf( " nlconf: %d ", nlconf) ; - printf( " numlogs: %d ", numlogs) ; - printnl() ; - fflush(stdout) ; - - for (k=0; k0) vact[k] = 1 ; - if (vact[k] == 1) vmax[k] = level ; - } - - xamax = xbmax = 0 ; - y = 0.0 ; - for (k=0; k dprob ; - } - printf("config scale: %9.3f\n", y ) ; - yscale = 1.0/y ; - for (k=0; k dprob *= yscale ; - yyy = cpt -> dsig ; - cpt -> dsig *= yscale ; - cpt -> dsig = MAX(cpt -> dsig, minsig) ; -// printf(%12.6f %12.6f %12.6f\n", yscale, yyy, cpt -> dsig) ; - } - - for (k=0; k acount, numvertex) ; - xamax = MAX(xamax, t) ; - t = intsum(cpt-> bcount, numvertex) ; - xbmax = MAX(xbmax, t) ; - - for (j=0; j acount[j] ; - b = cpt -> bcount[j] ; - - a = MIN(a, numalleles) ; - b = MIN(b, numalleles) ; - t = a + b ; - t = MIN(t, numalleles) ; - - amax[j] = MAX(amax[j], a) ; - bmax[j] = MAX(bmax[j], b) ; - tmax[j] = MAX(tmax[j], t) ; - - } - } - for (j=0; j type == TROOT) { - ++level ; - loglev[level] = logpt ; - - copyiarr(activelist[level-1], activelist[level], numvertex) ; - vact = activelist[level] ; - s = logpt -> pop1 ; - if (vact[s] <= 0) fatalx("rootbug\n") ; - - for (k=0; k type == TADMIX) { - ++level ; - loglev[level] = logpt ; - tact = YES ; - t = logpt -> pop1 ; if (t<0) fatalx(" TADMIX badbug\n") ; - s = logpt -> pop2 ; - maxtarget[s] = level ; - - copyiarr(activelist[level-1], activelist[level], numvertex) ; - vact = activelist[level] ; - vmax = maxlevel[level] ; - a1 = b1 = t1 = 0 ; - a2 = b2 = t2 = 0 ; - - vact[t] = 0 ; - vact[s] = 1 ; - a1= amax[t] ; b1=bmax[t] ; t1=tmax[t] ; - - amax[s] = MAX(amax[s], a1) ; - bmax[s] = MAX(bmax[s], b1) ; - tmax[s] = MAX(tmax[s], t1) ; - - s = logpt -> pop3 ; - maxtarget[s] = level ; - - vact[t] = 0 ; - vact[s] = 1 ; - a2= amax[t] ; b2=bmax[t] ; t2=tmax[t] ; - - amax[s] = MAX(amax[s], a2) ; - bmax[s] = MAX(bmax[s], b2) ; - tmax[s] = MAX(tmax[s], t2) ; - - } - - if (logpt -> type == TLIFT) { - ++level ; - loglev[level] = logpt ; - tact = YES ; - t = logpt -> pop1 ; if (t<0) fatalx("badbug\n") ; - s = logpt -> pop2 ; - maxtarget[t] = level ; - copyiarr(activelist[level-1], activelist[level], numvertex) ; - vact = activelist[level] ; - vmax = maxlevel[level] ; - a1 = b1 = t1 = 0 ; - a2 = b2 = t2 = 0 ; - if (s>=0) { - vact[s] = 0 ; - vact[t] = 1 ; - a1= amax[s] ; b1=bmax[s] ; t1=tmax[s] ; - } - s = logpt -> pop3 ; - if (s>=0) { - vact[s] = 0 ; - vact[t] = 1 ; - a2= amax[s] ; b2=bmax[s] ; t2=tmax[s] ; - } - - amax[t] = MAX(amax[t], a1+a2) ; - bmax[t] = MAX(bmax[t], b1+b2) ; - tmax[t] = MAX(tmax[t], t1+t2) ; - - } - - for (j=0; j level) t = NO ; - } - isslice[level] = t ; - if (t==YES) { - printf("slice on level: %3d\n", level) ; - printv(activelist[level]) ; - } - } - - printf("amax:\n") ; printv(amax) ; - printf("bmax:\n") ; printv(bmax) ; - printf("tmax:\n") ; printv(tmax) ; - t = numalleles + 1 ; - - alloc4(&rcoeffs, t, t, t, t) ; - genrcoeffs(rcoeffs, numalleles) ; - wtrans = initarray_2Ddouble(t, t, 0) ; - - ZALLOC(ttable1, maxlevels, double ****) ; - ZALLOC(ttable2, maxlevels, double ****) ; - ZALLOC(ttabmix, maxlevels, double **) ; - - for (k=0; k<= numlogs; ++k) { - alloc4(&ttable1[k], t, t, t, t) ; - alloc4(&ttable2[k], t, t, t, t) ; - ttabmix[k] = initarray_2Ddouble(t, t, 0) ; - } - - - ZALLOC(bstlist, maxbst, BSTORE *) ; - for (i=0; i largeliter = 1 ; - bstlist[i] -> bprob = -1 ; - } - - clstore = initarray_2Dint(hashsize, numvertex*2, 0) ; - ZALLOC(clcstore, hashsize, char *) ; - maxclstore = hashsize ; - numclstore = 0 ; - - alloc_cl() ; - - ++largeliter ; - - - setvpars(vpars) ; - yscore = - grscore(vpars) ; - - printf("init: %9.3f\n", yscore) ; - printf("number of distinct configurations (internal): %d\n", numconfighash) ; - printbcl() ; - - for (k=0; k yscore) { - printf("raninit: %3d %9.3f", iter, tscore) ; - printf(" *** ") ; - yscore = tscore ; - copyarr(wpars, vpars, npars) ; - printnl() ; - } - } - - iter = 0 ; - printf("iter: %3d %12.6f\n", iter, yscore) ; -// printf("bigiter:: %d\n", bigiter) ; fflush(stdout) ; - - getvpars(vpars) ; - putgpars(vpars) ; - dumpgraph (graphoutname); - dumpdotgraph (graphdotname); - - zmax = 0 ; - for (k=0; k0) { - - ZALLOC(sciter, bigiter+1, double) ; - sciter[0] = yscore ; - for (iter = 1; iter <= bigiter; ++iter) { - gslsetup(npars, vpars, 0.1) ; - gslopt(vpars) ; - getvpars(vpars) ; - yscore = -grscore(vpars) ; - putgpars(vpars) ; - zmax = 0 ; - for (k=0; k tau1 ; - break ; - - case 1: - t -= 1000 ; - logpt = loglist[t] ; - vp[k] = logpt -> tau2 ; - break ; - - case 2: - t -= 2000 ; - logpt = loglist[t] ; - vp[k] = logpt -> tmix ; - break ; - - case 3: - t -= 3000 ; - logpt = loglist[t] ; - vp[k] = logpt -> thetaval1; - break ; - - case 4: - t -= 4000 ; - logpt = loglist[t] ; - vp[k] = logpt -> thetaval2; - break ; - - default: fatalx("(setvpars)\n") ; - } - } - -} - -void printbcl() -// dummy -{ - -} -void getvpars(double *vp) -{ - int k, t, tt, z, f ; - LOGENTRY *logpt ; - - for (k=0; k pop1 ; - f = logpt -> isfixedl ; - if (f == NO) logpt -> tau1 = vp[k] ; - } - - if (tt==1) { - t -= 1000 ; - logpt = loglist[t] ; - z = logpt -> pop1 ; - f = logpt -> isfixedr ; - if (f == NO) logpt -> tau2 = vp[k] ; - } - - if (tt==2) { - t -= 2000 ; - logpt = loglist[t] ; - if (logpt->isfixed == NO) logpt -> tmix = vp[k] ; - } - - if (tt==3) { - t -= 3000 ; - logpt = loglist[t] ; - z = logpt -> pop1 ; - f = logpt -> isfixedl ; - if (f == NO) logpt -> thetaval1 = vp[k] ; - } - if (tt==4) { - t -= 4000 ; - logpt = loglist[t] ; - z = logpt -> pop1 ; - f = logpt -> isfixedr ; - if (f == NO) logpt -> thetaval2 = vp[k] ; - } - } -} - - -int getstype(char *sx) { - - int t ; - - t = strcmp(sx, "ascertain:") ; - if (t==0) return TASC ; - - t = strcmp(sx, "sampsizes:") ; - if (t==0) return TSAMP ; - - t = strcmp(sx, "spec:") ; - if (t==0) return TSPEC ; - - t = strcmp(sx, "lift:") ; - if (t==0) return TLIFT ; - - t = strcmp(sx, "root:") ; - if (t==0) return TROOT ; - - t = strcmp(sx, "admix:") ; - if (t==0) return TADMIX ; - - t = strcmp(sx, "newformat:") ; - if (t==0) { - return TNEWFORMAT ; - } - - return -1 ; -} - -int vnum(char *sx) -{ - int t ; - - t = indxstring(vnames, numvertex, sx) ; - if (t<0) fatalx("vertex %s not found\n", sx) ; - - return t ; -} - - -void printpars() -{ - int k, t, tt, a, b, c, q, z, f ; - LOGENTRY *logpt ; - double y, theta ; - - for (k=0; k tau1 ; - z = logpt -> pop1 ; - f = isfixed(z, YES) ; - printf("printp: %3d %6d %12.6f\n", k, q, y) ; - break ; - - case 1: - t -= 1000 ; - logpt = loglist[t] ; - z = logpt -> pop1 ; - f = isfixed(z, NO) ; - y = logpt -> tau2 ; - printf("printp: %3d %6d %12.6f\n", k, q, y) ; - break ; - - case 2: - t -= 2000 ; - logpt = loglist[t] ; - y = logpt -> tmix ; - printf("printp: %3d %6d %12.6f\n", k, q, y) ; - break ; - - case 3: - t -= 3000 ; - logpt = loglist[t] ; - theta = logpt -> thetaval1 ; - printf("printp: %3d %6d %12.6f\n", k, q, theta) ; - break ; - - case 4: - t -= 4000 ; - logpt = loglist[t] ; - theta = logpt -> thetaval2 ; - printf("printp: %3d %6d %12.6f\n", k, q, theta) ; - break ; - - default: - if (t>4000) fatalx("(printpars)\n") ; - } - } -} - -void putgpars(double *vpars) -{ - int k, t, tt, a, b, c, q ; - LOGENTRY *logpt ; - double y, theta ; - - for (k=0; k tau1 ; - theta = logpt -> thetaval1 ; - a = logpt -> pop1 ; - b = logpt -> pop2 ; - t = logpt -> isfixedl ; - if (t==NO) setepar(a, b, y, theta) ; - break ; - - case 1: - t -= 1000 ; - logpt = loglist[t] ; - y = logpt -> tau2 ; - theta = logpt -> thetaval2 ; - a = logpt -> pop1 ; - b = logpt -> pop3 ; - t = logpt -> isfixedr ; - if (t==NO) setepar(a, b, y, theta) ; - break ; - - case 2: - t -= 2000 ; - logpt = loglist[t] ; - y = logpt -> tmix ; - a = logpt -> pop1 ; - b = logpt -> pop2 ; - c = logpt -> pop3 ; - if (logpt -> isfixed == NO) setapar(a, b, c, y) ; - break ; - - default: - if (tt>4) fatalx("(putgpars)\n, k, q") ; - } - } -} - -void loadroot(char *buff) -{ - char *spt[MAXFF], *sx, *s2[20] ; - int nsplit, nasc, k, z, xd, xt, n ; - LOGENTRY *logpt ; - int p1, p2 ; - double tau1, tau2 ; - - nsplit = splitup(buff, spt, MAXFF) ; - - sx = spt[0] ; - mkupper(sx) ; - - z = vnum(sx) ; - - logpt = loglist[numlogs] ; - logpt -> type = TROOT ; - logpt -> pop1 = z ; - logpt -> pop2 = logpt -> pop3 = -1 ; - - ++numlogs ; -} - -void loadadmix(char *buff) -{ - char *spt[MAXFF], *sx, *s2[20] ; - int nsplit, nasc, k, z, xd, xt, n, t ; - LOGENTRY *logpt ; - int p1, p2 ; - double tmix ; - - nsplit = splitup(buff, spt, MAXFF) ; - - sx = spt[0] ; - mkupper(sx) ; - - z = vnum(sx) ; - t = getmixinfo(z, &p1, &p2, &tmix) ; - - logpt = loglist[numlogs] ; - logpt -> type = TADMIX ; - logpt -> pop1 = z ; - logpt -> pop2 = p1 ; - logpt -> pop3 = p2 ; - logpt -> tmix = tmix ; - -// if (t<0) return ; fixed but we want that - if (t<0) logpt -> isfixed = YES ; - paramindex[npars] = numlogs + 2000 ; - ++npars ; - ++numlogs ; -} -void loadlift(char *buff) -{ - char *spt[MAXFF], *sx, *s2[20] ; - int nsplit, nasc, k, z, xd, xt, n, t1, t2 ; - LOGENTRY *logpt ; - int p1, p2 ; - double tau1, tau2 ; - double th1, th2 ; - - nsplit = splitup(buff, spt, MAXFF) ; - - sx = spt[0] ; - mkupper(sx) ; - - z = vnum(sx) ; - getkidinfo(z, &p1, &p2, &tau1, &tau2, &th1, &th2) ; - - logpt = loglist[numlogs] ; - logpt -> type = TLIFT ; - logpt -> pop1 = z ; - logpt -> pop2 = p1 ; - logpt -> pop3 = p2 ; - logpt -> tau1 = tau1 ; - logpt -> tau2 = tau2 ; - if (th1==0.0) th1 = -1 ; - if (th2==0.0) th2 = -1 ; - logpt -> thetaval1 = th1 ; - logpt -> thetaval2 = th2 ; - logpt -> isfixedl = isfixed(z, YES) ; - logpt -> isfixedr = isfixed(z, NO) ; - - if (p1>=0) { - paramindex[npars] = numlogs ; - ++npars ; - } - if (p2>=0) { - paramindex[npars] = numlogs + 1000 ; - ++npars ; - } - - if ((th1>=0) && (p1>=0)) { - paramindex[npars] = numlogs + 3000 ; - ++npars ; - } - - if ((th2>=0) && (p2>=0)) { - paramindex[npars] = numlogs + 4000 ; - ++npars ; - } - - ++numlogs ; -} - - -/** -void loadasc(char *buff) -{ - char *spt[MAXFF], *sx, *s2[20] ; - int nsplit, nasc, k, z, xd, xt, n ; - - nasc = nsplit = splitupx(buff, spt, MAXFF, ';') ; - ivzero(acount, numvertex) ; - ivzero(tcount, numvertex) ; - - for (k=0; k int arrays -{ - int *aa, *bb, k, na, nb ; - double y, *yy ; - - ZALLOC(aa, numvertex, int) ; - ZALLOC(bb, numvertex, int) ; - ZALLOC(yy, numvertex, double) ; - - na = jcrackit(sa, aa) ; - nb = jcrackit(sb, bb) ; - - if (na != nb) fatalx("specmissmatch: %s %s\n", sa, sb) ; - - - for (k=0; k ac[k] = tt[z] = nsize(sx) ; - sy = sampstr[z] ; - ybprob *= bcoeff(sy, sx) ; - } - - copyiarr(tt, cpt -> acount, numvertex) ; - ivvm(tt, sampsize, tt, numvertex) ; - copyiarr(tt, cpt -> bcount, numvertex) ; -// store configuration - - cpt -> aprob = 1 ; - cpt -> bprob = -1 ; - cpt -> dprob = y ; - cpt -> dsig = ysig ; - cpt -> level = 0 ; -//set bmult ; - cpt -> bmult = ybprob ; - ++numconfig ; - - freeup(spt, nsplit) ; - free(tt) ; - - -} - -void free4(double *****pvv, int a, int b, int c, int d) -{ - double ****vv ; - int i, j ; - - if (*pvv==NULL) return ; - vv = *pvv ; - - for (i=0; i count ; - copyiarr(acount, cc, numvertex) ; - cc += numvertex ; - copyiarr(bcount, cc, numvertex) ; - pt -> level = level ; - - return ; -} - - - -int loadspec(char *countname, int **lconf, char **vnames, int numvertex) -{ - - FILE *fff ; - char line[MAXSTR+1], c ; - char buff[MAXSTR] ; - char *spt[MAXFF], *sx ; - int nsplit, num=0 ; - int skipit ; - int t, k, z, k1, k2, k3, oldmaxterms ; - int *w ; - static int numl = 0 ; - - openit(countname, &fff, "r") ; - while (fgets(line, MAXSTR, fff) != NULL) { - nsplit = splitup(line, spt, MAXFF) ; - if (nsplit == 0) continue ; // blank line - sx = spt[0] ; - if (sx[0] == '#') { - freeup(spt, nsplit) ; - continue ; - } - catxx(buff, spt+1, nsplit-1) ; // params - t = strlen(line) ; - - line[t-1] = CNULL ; - t = getstype(spt[0]) ; - printf("%s type: %d\n", line, t) ; - if (t<0) fatalx("bad parse:\n%s\n", line) ; - - switch (t) { - - case TASC: - fatalx("ascertain: no longer supported\n") ; - - case TSAMP: - loadssize(buff) ; - printf("specsize: %d\n", specsize) ; - break ; - - case TSPEC: - loaddata(buff) ; - ++numl ; - break ; - - case TLIFT: - loadlift(buff) ; - break ; - - case TROOT: - loadroot(buff) ; - break ; - - case TADMIX: - loadadmix(buff) ; - break ; - - case TNEWFORMAT: - newformat = YES ; - printf("newformat set!\n") ; - break ; - - default: - printf("???\n") ; - } - } - fclose(fff) ; - return numl ; -} - -void initlog(LOGENTRY *logpt) -{ - logpt -> type = -1 ; - logpt -> pop1 = -1 ; - logpt -> pop2 = -1 ; - logpt -> pop3 = -1 ; - logpt -> n1 = 0 ; - logpt -> leaf = NO ; - logpt -> tau1 = logpt -> tau2 = 0 ; - logpt -> tmix = -1 ; - logpt -> ptheta = -1 ; - logpt -> thetaval1 = -1 ; - logpt -> thetaval2 = -1 ; - logpt -> pnumber = -1 ; - logpt -> isfixed = NO ; - logpt -> isfixedl = NO ; - logpt -> isfixedr = NO ; -} - -char *vn(int k) -{ - if (k<0) return "?" ; - return vnames[k] ; -} - -void printlog(LOGENTRY *logpt) -{ - - printf("scripttype: %d %s %s %s", logpt-> type, vn(logpt -> pop1), vn(logpt -> pop2), vn(logpt -> pop3)) ; - if (logpt -> type == 1) printf("%9.3f %9.3f", logpt -> tau1, logpt -> tau2) ; - printnl() ; - fflush(stdout) ; - -} - -void printwcoeffs(double **ww, int n) -{ - int u, v, a, b, t ; - - printf("wcoeffs:\n") ; - for (t=2; t<=n; ++t) { - for (u=1; u=1; --j) { - if (tau<=0.0) break ; - yt = dd[i]*ptrans[i-1][j] - dd[j+1]*ptrans[i][j+1] ; - yb = dd[i] - dd[j] ; - ptrans[i][j] = yt/yb ; - } - if (verbose) printf("ptcheck: %3d %9.3f %3d %12.6f\n", n, tau, i, asum(ptrans[i], n+1) ) ; - } - free(dd) ; -} - -void genwcoeffs(double **ww, int n) -// recursion from "considering the first coalescent" . At root and polymorphic -{ - int u, v, t ; - double c1, c2, y1, yt, yb ; - - ww[1][1] = 1 ; - - for (t=3; t<=n; ++t) { - u = 1 ; // special case. Mutation can occur - v = t - u ; - - y1 = 2.0 / (double) (v*(v+1)) ; - c1 = (double) (v-1) / (double) (v+1) ; - ww[u][v] = y1 + c1*ww[u][v-1] ; - - for (u=2; umaxlog) continue ; - if (loglev[level] == NULL) continue ; - logpt = loglev[level] ; - if (logpt -> type == TADMIX) { - s = logpt -> pop1 ; - maxt = tmax[s] ; - setatrans(ttabmix[level], maxt, logpt -> tmix) ; - continue ; - } - t = logpt -> pop1 ; - maxa = amax[t] ; - maxb = bmax[t] ; - s = logpt -> pop2 ; - if (s>=0) { - maxu = amax[s] ; - maxv = bmax[s] ; - maxt = tmax[s] ; - setptrans(ttr, maxt, logpt -> tau1) ; - ttab = ttable1[level] ; - for (u=0; u<=maxu; u++) { - for (v=0; v<=maxv; v++) { - if ((u+v) > maxt) continue ; - setltrans(ttab[u][v], ttr, u, v) ; - } - } - } - - s = logpt -> pop3 ; - if (s>=0) { - maxu = amax[s] ; - maxv = bmax[s] ; - maxt = tmax[s] ; - setptrans(ttr, maxt, logpt -> tau2) ; - ttab = ttable2[level] ; - for (u=0; u<=maxu; u++) { - for (v=0; v<=maxv; v++) { - if ((u+v) > maxt) continue ; - setltrans(ttab[u][v], ttr, u, v) ; - } - } - } - } -} - -void set_cl() -{ - int level ; - int **cl ; - - for (level=0; level <= maxlevels; ++level) { - cl = clist[level] ; - iclear2D(&cl, maxcl, numvertex, 0) ; - vzero(wlist[level], maxcl) ; - } - ivzero(nclist, maxlevels+1) ; - -} -void liftit(int lev, int u, int v, double **ans, int mode) -{ - double **source ; - int t ; - - t = MAX(u, v) ; - - if (mode == 3) { - source = ttabmix[lev] ; - copyarr2D(source, ans, t+1, t+1) ; - return ; - } - - if (mode == 1) source = ttable1[lev][u][v] ; - if (mode == 2) source = ttable2[lev][u][v] ; - - copyarr2D(source, ans, u+1, v+1) ; -} - -void putab (int *cl, int z, int pa, int pb) -// store pa, pb in config -{ - if (z<0) return ; - cl[z] = pa ; - cl[z+numvertex] = pb ; - return ; -} - -void getab (int *cl, int z, int *pa, int *pb) -//extract a, b from config -{ - if (z<0) { - *pa = *pb = 0 ; - } - else { - *pa = cl[z] ; - *pb = cl[z+numvertex] ; - } - return ; -} -double top_prob(int a, int b) -{ - double yfac = 1.0, y, y1, ans ; - - yfac = 1.0 ; - y = bino(a+b, a) ; - y1 = 2.0/ (double) a ; - ans = yfac*y1/y ; - return ans ; -} - -double bprob1_root(int *cl, int lev) -{ - LOGENTRY *logpt ; - int p1, a, b ; - - -// yfac = pow(2, numalleles) ; - logpt = loglev[lev] ; - - p1 = logpt -> pop1 ; - getab(cl, p1, &a, &b) ; - - return top_prob(a, b) ; - -} - - -/** -typedef struct { - int *cl ; - double bprob ; -} BSTORE ; - -int **clstore ; -int numclstore ; -int maxclstore ; - -BSTORE **bstlist ; - -*/ - - -int hashiarr(char *str, int *a, int n) -{ - char *sx ; - static char **spt ; - char ss[10] ; - int k, t ; - static long ncall = 0 ; - - ++ncall ; - if (ncall==1) { - ZALLOC(spt, 100, char *) ; - } - sx = str ; - for (k=0; k key = s1 ; - ept -> data = NULL ; - - fpt = xhsearch(*ept, FIND) ; - - if (fpt == NULL) { - copyiarr(cl, clstore[numclstore], 2*numvertex) ; - ept -> key = clcstore[numclstore] = strdup(s1) ; - ept -> data = base + numclstore ; - ++numclstore ; - ++numconfighash ; - ++xcload ; - fpt = xhsearch(item1, ENTER) ; - - t = fpt -> data - base ; - bstpt = bstlist[t] ; - bstpt -> largeliter = -1; - bstpt -> bprob = -2 ; - } - - t = fpt -> data - base ; - return t ; - -} - -double bprob1_admix(int *cl, int lev) -{ - - int lp, p1, p2, p3, t, x, k, u, v, tmax ; - int olda1, oldb1, olda2, oldb2 ; - int maxa, maxb ; - int a1, a2, b1, b2 ; - int xa1, xa2, xb1, xb2 ; - double y1, y2, cprob, ytot, ans ; - double z1, z2 ; - LOGENTRY *logpt ; - double **x1trans, **ttrans ; - double *xprobs ; - int **xlist, nx ; - int *wcl ; - BSTORE *bstpt ; - - verbose = NO ; - lp = lev + 1 ; - logpt = loglev[lp] ; - -// admix - p1 = logpt -> pop1 ; // source - p2 = logpt -> pop2 ; - p3 = logpt -> pop3 ; - - - - ZALLOC(wcl, numvertex*2, int) ; - copyiarr(cl, wcl, numvertex*2) ; - - if (verbose) { - printf("zzadmix:\n") ; - printimat(cl, 2, numvertex) ; - } - - - getab(cl, p1, &xa1, &xb1) ; - getab(cl, p2, &olda1, &oldb1) ; - getab(cl, p3, &olda2, &oldb2) ; - - u = maxa = xa1 ; - v = maxb = xb1 ; - - tmax = t = MAX(maxa, maxb) + 1 ; - - x1trans = initarray_2Ddouble(t, t, 0) ; - ttrans = initarray_2Ddouble(maxa+1, maxb+1, 0) ; - - putab(wcl, p1, 0, 0) ; - - liftit(lp, cl[p1], cl[p1+numvertex], x1trans, 3) ; - - for (a1=0; a1<=u; ++a1) { - y1 = x1trans[u][a1] ; - for (b1=0; b1<=v; ++b1) { - y2 = x1trans[v][b1] ; - ttrans[a1][b1] = y1*y2 ; - if (verbose == NO) continue ; - printf("zzqtrans: %d %d %d %d ", u, v, a1, b1) ; - printf("%12.6f ", y1) ; - printf("%12.6f ", y2) ; - printf("%12.6f ", y1*y2) ; - printnl() ; - }} - - ytot = 0 ; - for (a1=0; a1<=u; ++a1) { - for (b1=0; b1<=v; ++b1) { - z1 = y1 = ttrans[a1][b1] ; - if (y1<=0) continue ; - if (verbose) printf("zza %d %3d %3d %15.9f %d %d\n", lev, a1, b1, y1, xa1, xb1) ; - putab(wcl, p2, a1+olda1, b1+oldb1) ; - a2 = u-a1 ; - b2 = v-b1 ; - putab(wcl, p3, a2+olda2, b2+oldb2) ; - y2 = bprob1(wcl, lev+1) ; - if (verbose) { - printf("zz3 %15.9f %12.6f %12.6f %12.6f %12.6f\n", y2, y1, y1*y2) ; - printimat(wcl, 2, numvertex) ; - } - ytot += y1*y2 ; - }} - - - free2D(&x1trans, tmax) ; - free2D(&ttrans, maxa+1) ; - - verbose = NO ; - - free(wcl) ; - ans = ytot ; - return ans ; - -// bprob1 calls addbst - -} - -int numnonz(int *a, int n) -{ - int k, t ; - - t = 0 ; - for (k=0; k bprob = ans ; - bstpt -> largeliter = largeliter ; - -} - -double bprob1(int *cl, int lev) -{ - - int lp, p1, p2, p3, t, x, k, olda, oldb; - int maxa, maxb ; - int a1, a2, b1, b2, u, v ; - int xa1, xa2, xb1, xb2 ; - double y1, y2, cprob, ytot, ans ; - LOGENTRY *logpt ; - double **x1trans, **x2trans, **ttrans, **xtrans ; - double *xprobs ; - int **xlist, nx ; - int *wcl ; - int bstindex ; - BSTORE *bstpt ; - double checkprob = -1 ; - double theta, tau ; - - lp = lev + 1 ; - logpt = loglev[lp] ; - - bstindex = k = seekbst(lev, cl) ; - - if (k >= 0) { - bstpt = bstlist[k] ; - if (bstpt -> largeliter == largeliter) { - checkprob = bstpt -> bprob ; - ++xchit ; - return bstpt -> bprob ; - } - } - - if (k<0) fatalx("seekbst failed\n") ; - - - - if (logpt -> type == TROOT) { - ans = bprob1_root(cl, lev) ; - addbst(bstindex, ans) ; - return ans ; - } - - if (logpt -> type == TADMIX) { - ans = bprob1_admix(cl, lev) ; - addbst(bstindex, ans) ; - return ans ; - } -// lift - p1 = logpt -> pop1 ; // target for lift - p2 = logpt -> pop2 ; - p3 = logpt -> pop3 ; - - - - ZALLOC(wcl, numvertex*2, int) ; - copyiarr(cl, wcl, numvertex*2) ; - - - getab(cl, p1, &olda, &oldb) ; - getab(cl, p2, &xa1, &xb1) ; - getab(cl, p3, &xa2, &xb2) ; - - maxa = xa1 + xa2 + olda ; - maxb = xb1 + xb2 + oldb ; - - - x1trans = initarray_2Ddouble(maxa+1, maxb+1, 0) ; - x2trans = initarray_2Ddouble(maxa+1, maxb+1, 0) ; - ttrans = initarray_2Ddouble(maxa+1, maxb+1, 0) ; - - - putab(wcl, p2, 0, 0) ; - putab(wcl, p3, 0, 0) ; - - x1trans[0][0] = x2trans[0][0] = 1 ; - if (p2>=0) liftit(lp, cl[p2], cl[p2+numvertex], x1trans, 1) ; - if (p3>=0) liftit(lp, cl[p3], cl[p3+numvertex], x2trans, 2) ; - - - for (a1=0; a1<=xa1; ++a1) { - for (b1=0; b1<=xb1; ++b1) { - y1 = x1trans[a1][b1] ; - if (verbose) printf("zzqq1 %d %3d %3d %15.9f %d %d\n", lev, a1, b1, y1, xa1, xb1) ; - if (y1<=0) continue ; - for (a2=0; a2<=xa2; ++a2) { - for (b2=0; b2<=xb2; ++b2) { - y2 = x2trans[a2][b2] ; - if (verbose) printf("zzqq2 %d %d %12.6f %d %d\n", a2, b2, y2, xa2, xb2) ; - if (y2<=0) continue ; - ttrans[a1+a2+olda][b1+b2+oldb] += y1*y2 ; - }}}} - - ytot = 0 ; - for (a1=0; a1<=maxa; ++a1) { - for (b1=0; b1<=maxb; ++b1) { - y1 = ttrans[a1][b1] ; - if (y1<=0) continue ; - putab(wcl, p1, a1, b1) ; - y2 = bprob1(wcl, lev+1) ; - ytot += y1*y2 ; - if (verbose) printf("zztran: %3d %3d %3d %12.6f %12.6f %15.9f %d %d %d\n", lev, a1, b1, y1, y2, y1*y2, p1, p2, p3) ; - }} - - ans = ytot ; - - t = numnonz(cl, numvertex) ; -// number of pops with derived count non zero - xtrans = NULL ; - if ((t==1) && (xa1>0)) { - u = xa1 ; v = xb1 ; xtrans = x1trans, tau = logpt -> tau1 ; - theta = logpt -> thetaval1 ; - if (theta<0) theta = 1.0 ; - } - if ((t==1) && (xa2>0)) { - u = xa2 ; v = xb2 ; xtrans = x2trans ; tau = logpt -> tau2 ; - theta = logpt -> thetaval2 ; - if (theta<0) theta = 1.0 ; - } - if (xtrans != NULL) ans += mutlike(theta, tau, u, v, xtrans) ; - -/** - We just add probabilities here as the model has an eps in mutation probability - So prob of mutation on edge is O(eps) and remaining prob is multiplied by 1 - eps. - Thus to order eps in total prob we just add -*/ - - - free2D(&x1trans, maxa+1) ; - free2D(&x2trans, maxa+1) ; - free2D(&ttrans, maxa+1) ; - - - free(wcl) ; - if (checkprob >= 0) { - printf("zzcheck %12.6f %12.6f", ans, checkprob) ; - if (ans != checkprob) { - printf (" ***") ; - printnl() ; - printf ("lev: index: %d %d\n", lev, bstindex) ; - printimat(cl, 2, numvertex) ; - printnl() ; - printimat(clstore[bstindex], 2, numvertex) ; - printnl() ; - } - printnl() ; - } - - addbst(bstindex, ans) ; - return ans ; - -} - - - -double configprob(int *cfig) -{ - int level ; - - set_cl() ; // check - - return bprob1(cfig, 0) ; - -} -void printv(int *v) -{ - int k ; - for (k=0; k maxtau) bigtau += (vp[k] - bigtau) ; - } - - - alloc_cl() ; // free_cl called here - - y = kimscore() ; - y -= 0.01 * bigtau ; - return -y ; - -} -double kimscore () -{ - double *w1, *dprob ; - int *tfig, *xpt ; - double y1, y2, yscore, ylbase ; - CONFIG *cpt ; - int k ; - double ytim ; - - ZALLOC(w1, numconfig, double) ; - ZALLOC(dprob, numconfig, double) ; - ZALLOC(tfig, 2*numvertex, int) ; - if (timecheck != 0) setcaltime() ; - - numconfighash = 0 ; - - for (k=0; k acount, xpt, numvertex) ; - xpt += numvertex ; - copyiarr(cpt -> bcount, xpt, numvertex) ; - - y1 = configprob(tfig) ; - if (y1 == 0.0) { - printconfig(cpt, 2, "bad configuration") ; - fatalx("bad score\n") ; - } - y2 = y1 * cpt -> bmult ; - y2 += 1.0e-10 ; - w1[k] = y2 ; - dprob[k] = cpt -> dprob ; - } - - y1 = bal1(w1, numconfig) ; - bal1(dprob, numconfig) ; - - for (k=0; k aprob = w1[k] ; - } - - vst(w1, w1, numconfig, numconfig) ; // score against random . ie all configs equiprobable - yscore = 100 * vldot(dprob, w1, numconfig) ; - - free(dprob) ; - free(w1) ; - free(tfig) ; - - ytim= calctscore() ; - y1 = yscore ; // old score - yscore -= ytim ; - lastkimscore = yscore ; - if (verbose) printf("zzsc %12.6f %12.6f %12.6f\n" , y1, ytim, yscore) ; - return yscore ; - -} -void setmut(int n) -{ - int k, j ; - double y ; - - ZALLOC(amut, n+1, double) ; - bmut = 1 ; - cmut = initarray_2Ddouble(n+1, n+1, 0) ; - ZALLOC(dmut, n+1, double) ; - for (k=2; k<=n; ++k) { - dmut[k] = (double) (k*(k-1)) / 2.0 ; - } - for (k=2; k<=n; ++k) { - amut[k] = amut[k-1] - (1.0 / dmut[k]) ; - } - for (k=2; k<=n; ++k) { - for (j=2; j=0) && (xx<=1)) continue ; - if (xx<0) yy = -yy ; - if ((s==1) & (yy>1)) { // bad mixing coeff - yy = 2.0*modf(0.5*yy, &y) ; - if (yy>1) yy = 2-yy ; - } - - y = yy - xx; - penalty += y * y; - vp[k] = xx = yy; - } - - return penalty; -} - -double printconfig(CONFIG *cpt, int mode, char *title) -{ - double y1, y2, z=0 ; - - printf("%15s %3d npop: %d", title, cpt -> number, npop) ; - if (mode == 2) { - printimatx(cpt -> ac, 1, npop) ; - } - - if (mode >= 1) { - printf(" %12.6f", cpt -> dprob) ; - printf(" %12.6f", cpt -> aprob) ; - y1 = cpt -> aprob - cpt -> dprob ; - y2 = cpt -> dsig ; - z = y1/(y2+1.0e-8) ; - printf(" %9.3f", z) ; // Z score - printf(" :: %12.6f %12.6f", y1, y2) ; - printnl() ; - return z ; - } - - printnl() ; - printimat(cpt -> acount, 1, numvertex) ; - printimat(cpt -> bcount, 1, numvertex) ; - printf("bmult: %12.0f", cpt -> bmult) ; - printnl() ; - return 0 ; - -} - -// caltime code - -void inittime() -{ - double y ; - int k, z ; - - - vclear(caltime, -999., numvertex) ; - clear2D(&calss, numvertex, 3, 0) ; - - for (k=0; k < npop; ++k) { - z = popindex[k] ; - y = caltime[z] = 0 ; // will generalise - calss[z][0] = 1 ; - calss[z][1] = y ; - calss[z][2] = y * y ; - } - -} - -double callength(double tau, double theta) -// time in generations, 10K nominal eff. pop. size -{ - double y ; - if (tau<0) return -999 ; - if (theta < 0) return -888 ; - - y = tau*theta*2*10000 ; // effect pop 10000 time in generations - - return y ; - -} - -int printtime() -{ - int k ; - - if (timecheck==0) return 0 ; - for (k=0; k type == TROOT) { - s = logpt -> pop1 ; - calctime(s) ; - continue ; - } - - if (logpt -> type == TADMIX) { - - a = logpt -> pop1 ; if (a<0) fatalx(" TADMIX badbug\n") ; - b = logpt -> pop2 ; if (b<0) fatalx(" TADMIX badbug\n") ; - s = logpt -> pop3 ; - tt = calctime(s) ; // evaluate time - if (tt<0) continue ; - - settime1(b, s, 0) ; - settime1(a, s, 0) ; - calctime(a) ; - calctime(b) ; - continue ; - } - - if (logpt -> type == TLIFT) { - t = logpt -> pop1 ; if (t<0) fatalx("badbug\n") ; - s = logpt -> pop2 ; - tt = calctime(s) ; - y = -77 ; - if (tt>0) { - y = callength(logpt -> tau1, logpt -> thetaval1) ; - t2 = settime1(t, s, y) ; - x = strcmp(vnames[t], "R") ; - } - - s = logpt -> pop3 ; - tt = calctime(s) ; - if (tt>0) { - y = callength(logpt -> tau2, logpt -> thetaval2) ; - t2 = settime1(t, s, y) ; - x = strcmp(vnames[t], "R") ; - } - calctime(t) ; - } - continue ; - } - timecheck = 0 ; - - for (s=0; s1) timecheck += (t-1) ; - } - -} - -double calctscore() -{ - -// double timesig = 30, timescore , timegscore, timegmul=1.0 ; - - - int k ; - double tsc, y0, y1, y2, ym ; - - timescore = timegscore = 0 ; -// printf("zztch %d\n", timecheck) ; - if (timecheck <= 0) return 0 ; - - tsc = 0 ; - - - for (k=0; kaftrue = cupt->af_freq = fraw; cupt->aa_aftrue = cupt->aa_af_freq = fraw; - if (sdpt->alleles != NULL) { - cupt->alleles[0] = sdpt->alleles[0]; - cupt->alleles[1] = sdpt->alleles[1]; - } - else { - cupt->alleles[0] = '1'; - cupt->alleles[1] = '2'; - } + cupt->alleles[0] = sdpt->alleles[0]; + cupt->alleles[1] = sdpt->alleles[1]; n0 = sdpt->nn[2]; n1 = sdpt->nn[3]; @@ -1645,6 +1645,7 @@ clearsnp (SNP * cupt) cupt->gpnum = 0; cupt->pcupt = NULL; cupt->tagnumber = -1; + cupt -> diplike = NULL ; charclear (cupt->cchrom, CNULL, 7); strcpy (cupt->cchrom, ""); @@ -1659,7 +1660,7 @@ rmindivs (SNP ** snpm, int numsnps, Indiv ** indivmarkers, int numindivs) // squeeze out ignore // dangerous bend. Of course indivmarkers indexing will change int n = 0, g, i, k; - int x; + int x, t; Indiv *indx; SNP *cupt; @@ -1668,6 +1669,7 @@ rmindivs (SNP ** snpm, int numsnps, Indiv ** indivmarkers, int numindivs) for (k = 0; k < numindivs; ++k) { if (indivmarkers[k]->ignore == YES) continue; // don't store + if (n == k) { // if no ignored found yet, ++n; // next unused is next element continue; // and no need to copy @@ -1678,14 +1680,18 @@ rmindivs (SNP ** snpm, int numsnps, Indiv ** indivmarkers, int numindivs) indivmarkers[n] = indivmarkers[k]; // into next unused element indx->idnum = n; for (i = 0; i < numsnps; i++) { + cupt = snpm[i]; + cupt = snpm[i]; if (cupt->gtypes == NULL) - break; + continue; if (cupt->ignore) continue; // copy only genotypes of non-ignored SNPs g = getgtypes (cupt, k); putgtypes (cupt, n, g); - } + if (cupt -> diplike != NULL) { + copyarr(cupt -> diplike[k], cupt -> diplike[n], 3) ; + }} ++n; } @@ -1793,6 +1799,7 @@ clearind (Indiv ** indm, int numind) indx->Xlambda_mode = indx->lambda_mode = lp1 / lp2; indx->thetatrue = -1.0; // silly value indx->qval = indx->rawqval = 0.0; + indx -> ishaploid = NO ; } cleartg (indm, numind); } @@ -1936,6 +1943,7 @@ printsnps (char *snpoutfilename, SNP ** snpm, int num, Indiv ** indm, double ppos; SNP *cupt; char ss[10]; + char sformat[20] ; FILE *xfile; int numvcase, numvcontrol; char c; @@ -1948,6 +1956,13 @@ printsnps (char *snpoutfilename, SNP ** snpm, int num, Indiv ** indm, else xfile = stdout; + strcpy(sformat, "%15.6f %15.0f") ; + + if (hiresgendis) + strcpy(sformat, "%15.9f %15.0f") ; + + + if (tersemode == NO) { fprintf (xfile, "\n"); fprintf (xfile, "###DETAILS ABOUT THE MARKERS\n"); @@ -1983,7 +1998,7 @@ printsnps (char *snpoutfilename, SNP ** snpm, int num, Indiv ** indm, fprintf (xfile, "%15.0f %15.0f", cupt->genpos, ppos); } else { - fprintf (xfile, "%15.6f %15.0f", cupt->genpos, ppos); + fprintf (xfile, sformat, cupt->genpos, ppos); } if (tersemode) { @@ -2294,6 +2309,57 @@ getindvals (char *fname, Indiv ** indivmarkers, int numindivs) return num; } +int +getblocks (char *fname, SNP ** snpm, int numsnps) +{ + // nmax block + char line[MAXSTR]; + char *spt[MAXFF], *sx; + int nsplit, num = 0; + int skipit, k; + int block, tmax = -1 ; + + FILE *fff; + + if (fname == NULL) return 0 ; + for (k = 0; k < numsnps; ++k) { + snpm[k]->tagnumber = -1 ; + } + openit (fname, &fff, "r"); + while (fgets (line, MAXSTR, fff) != NULL) { + nsplit = splitup (line, spt, MAXFF); + if (nsplit == 0) { + continue; + } + sx = spt[0]; + skipit = NO; + skipit = setskipit (sx); + k = snpindex (snpm, numsnps, sx); + if (k < 0) + skipit = YES; + if (skipit == NO) { + if (nsplit > 1) { + sx = spt[1]; + block = atoi(sx); + tmax = MAX(tmax, block) ; + snpm[k]->tagnumber = block; + ++num; + } + } + freeup (spt, nsplit); + continue; + } + fclose (fff); + fflush (stdout); + + for (k = 0; k < numsnps; ++k) { + block = snpm[k] -> tagnumber ; + if (block < 0) snpm[k] -> ignore = YES ; + } + + return tmax ; +} + int getweights (char *fname, SNP ** snpm, int numsnps) { @@ -2532,7 +2598,7 @@ ineigenstrat (char *gname, SNP ** snpm, Indiv ** indiv, int numsnps, FILE *fff; char *line = NULL, c; char *spt[2], *sx; - int nsplit, rownum = 0, k, num; + int nsplit, rownum = 0, k, num, t; int maxstr, maxff = 2; int nind, nsnp, len; double y; @@ -2542,6 +2608,8 @@ ineigenstrat (char *gname, SNP ** snpm, Indiv ** indiv, int numsnps, SNP *cupt; Indiv *indx; int nbad = 0; + double ytime1, ytime2, yend ; + int pubtime = NO ; packmode = YES; @@ -2567,6 +2635,8 @@ ineigenstrat (char *gname, SNP ** snpm, Indiv ** indiv, int numsnps, rownum = 0; pbuff = packgenos; + ytime1 = cputime(1) ; + while (fgets (line, maxstr, fff) != NULL) { nsplit = splitup (line, spt, maxff); if (nsplit == 0) @@ -2587,6 +2657,19 @@ ineigenstrat (char *gname, SNP ** snpm, Indiv ** indiv, int numsnps, num = snpord[rownum]; cupt = snpm[num]; ++rownum; + + t = rownum % 100000 ; + if (t==0) { + ytime2 = cputime(1) - ytime1 ; + yend = ytime2 * (double) (numsnps - rownum) / (double) rownum ; + yend *= 1.0e-6 ; + if (yend>1800) pubtime = YES ; + if (pubtime) { + fprintf(stderr, "%8d snps processed. Estimated time to completion: %6.0f seconds", rownum, yend) ; + fflush(stderr) ; + } + } + if (cupt == NULL) continue; @@ -2675,8 +2758,15 @@ calcishash (SNP ** snpm, Indiv ** indiv, int numsnps, int numind, int *pihash, arrx[num] = strdup (indx->ID); ++num; } + *pihash = hasharr (arrx, num); + if (verbose) { + printf("zz+++\n") ; + printstrings(arrx, num) ; + printf("%x\n", *pihash) ; + } + freeup (arrx, num); free (arrx); @@ -2794,7 +2884,7 @@ inpack (char *gname, SNP ** snpm, Indiv ** indiv, int numsnps, int numind) fdes = open (gname, O_RDONLY); if (fdes < 0) { perror ("open failure"); - fatalx ("(ispack) bad open %s\n", gname); + fatalx ("(inpack) bad open %s\n", gname); } t = read (fdes, buff, rlen); if (t < 0) { @@ -3257,6 +3347,179 @@ outfiles (char *snpname, char *indname, char *gname, SNP ** snpm, } } + +long loadprobpack(SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs, char *bigbuff) + +{ + SNP *cupt ; + int rl2, i, j, x ; + int sval; + double yy, ww[3] ; + unsigned char *buff ; + unsigned short bb[2] ; + long numx = 0 ; + + + + buff = (unsigned char *) bigbuff ; + rl2 = 4 ; + sval = (1 <<16 ) -1 ; + + for (i=0; i scount = 1 ; + for (j=0; j diplike[j], ww, 3) ; + bb[0] = bb[1] = sval ; + if (ww[0] > -0.5) { + bal1(ww, 3) ; + yy = (double) sval * ww[0] ; x = nnint(yy) ; bb[0] = (unsigned short) x ; + yy = (double) sval * ww[2] ; x = nnint(yy) ; bb[1] = (unsigned short) x ; + } + memcpy(buff, bb, rl2) ; + buff += rl2 ; + ++numx ; + }} + + return numx ; + +} +void +outprobx (char *pname, SNP ** snpm, Indiv ** indiv, int numsnps, int numindivs, char *bigbuff) +// bigbbuff already loaded +{ + + char **arrx; + int n, num, ihash, shash, i, g, j, k, t; + int nind, nsnp, irec; + Indiv *indx; + SNP *cupt; + unsigned char *buff; + int fdes, ret; + char *packit; + int rl1, rl2, aa[3] ; + unsigned short bb[2] ; + double ww[3], yy ; + int sval, x, plen ; + +// dipscore and scount set + + if (pname == NULL) return ; + + calcishash (snpm, indiv, numsnps, numindivs, &ihash, &shash); + + rl1 = 48 ; rl2 = 4 ; + ZALLOC (buff, rl1, unsigned char); + sprintf ((char *) buff, "PROB %7d %7d %x %x", numindivs, numsnps, ihash, shash); + + ridfile (pname); + fdes = open (pname, O_CREAT | O_TRUNC | O_RDWR, 0666); + + if (fdes < 0) { + perror ("bad outprobx"); + fatalx ("open failed for %s\n", pname); + } + if (verbose) + printf ("file %s opened\n", pname); + + ret = write (fdes, buff, rl1); + if (ret < 0) { + perror ("write failure"); + fatalx ("(outprobx) bad write (header)"); + } + + plen = numindivs*numsnps*rl2 ; + + ret = write (fdes, bigbuff, plen); // print out all SNPs in packed data buffer + if (ret < 0) { + perror ("write failure"); + fatalx ("(outprobx) bad write"); + } + close (fdes); + free (buff); +} + +void +outprob (char *pname, SNP ** snpm, Indiv ** indiv, int numsnps) +// single individual ?? +{ + + char **arrx; + int n, num, ihash, shash, i, g, j, k, t; + int nind, nsnp, irec; + Indiv *indx; + SNP *cupt; + unsigned char *buff; + int fdes, ret; + char *packit; + int rl1, rl2, aa[3] ; + unsigned short bb[2] ; + double ww[3], yy ; + int sval, x, numind = 1 ; + +// dipscore and scount set + + if (pname == NULL) return ; + + n = numind; + + calcishash (snpm, indiv, numsnps, numind, &ihash, &shash); + + rl1 = 48 ; rl2 = 4 ; + ZALLOC (buff, rl1, unsigned char); + sprintf ((char *) buff, "PROB %7d %7d %x %x", numind, numsnps, ihash, shash); + + ridfile (pname); + fdes = open (pname, O_CREAT | O_TRUNC | O_RDWR, 0666); + + if (fdes < 0) { + perror ("bad outprob"); + fatalx ("open failed for %s\n", pname); + } + if (verbose) + printf ("file %s opened\n", pname); + + ret = write (fdes, buff, rl1); + if (ret < 0) { + perror ("write failure"); + fatalx ("(outprob) bad write (header)"); + } + + irec = 1; + sval = (1<<16) - 1 ; + for (i = 0; i < numsnps; i++) { + cupt = snpm[i]; + if (ignoresnp (cupt)) continue; + if (cupt->isrfake) continue; + cclear (buff, 0X00, rl2); + bb[0] = bb[1] = sval ; // pattern if no reads + vclear(ww, 1.0/3.0, 3) ; + if (cupt -> scount > 0) { // + vexp(ww, cupt -> dipscore, 3) ; + bal1(ww, 3) ; + yy = (double) sval * ww[0] ; x = nnint(yy) ; bb[0] = (unsigned short) x ; + yy = (double) sval * ww[2] ; x = nnint(yy) ; bb[1] = (unsigned short) x ; + } + memcpy(buff, bb, rl2) ; + t = i % 100000 ; + if (t==-1) { + printf("zzprob: %20s %3d ", cupt -> ID, cupt -> scount) ; + printmatx(ww, 1, 3) ; + printf(" %6d %6d", bb[0], bb[1]) ; + printnl() ; + } + + ret = write (fdes, buff, rl2); // print out all SNPs in packed data buffer + if (ret < 0) { + perror ("write failure"); + fatalx ("(outprob) bad write"); + } + } + close (fdes); + free (buff); +} + + /* ---------------------------------------------------------------------------------------------------- */ void outeigenstrat (char *snpname, char *indname, char *gname, SNP ** snpm, @@ -4692,6 +4955,13 @@ clearpackgenos () packgenos = NULL; } +void +freepackgenos () +{ + if (packgenos != NULL) free(packgenos) ; + packgenos = NULL; +} + /* ---------------------------------------------------------------------------------------------------- */ void @@ -5637,3 +5907,101 @@ ckdup (char **eglist, int n) fatalx ("dup population found!\n"); } } + +long +inprob (char *pname, SNP ** snpm, Indiv ** indiv, int numsnps, int numind) +{ + int rl2 = 4 ; + packlen = rl2*numsnps ; + ZALLOC (packgenos, packlen, char); + clearepath (packgenos); + + return inprobx (pname, snpm, indiv, numsnps, numind, packgenos) ; + +} +long +inprobx (char *pname, SNP ** snpm, Indiv ** indiv, int numsnps, int numind, char *packp) +{ + + char **arrx, junk[10]; + int n, num, ihash, shash, i, g, j, k; + long t; + int xihash, xshash, xnsnp, xnind; + int nind, nsnp, irec; + Indiv *indx; + SNP *cupt; + double y; + unsigned char *buff; + int fdes, ret; + char *packit, *pbuff; + int rl1, rl2, rlen ; + long plen ; + + nind = n = numind; + nsnp = calcishash (snpm, indiv, numsnps, numind, &ihash, &shash); + +// printf("zzind: %s %x\n", indiv[0] -> ID, ihash) ; + + rl1 = 48 ; + rl2 = 4*numind ; + rlen = rl1 ; + + ZALLOC (buff, rlen, unsigned char); + // open binary file and check readability + fdes = open (pname, O_RDONLY); + if (fdes < 0) { + perror ("open failure"); + fatalx ("(inprob) bad open %s\n", pname); + } + t = read (fdes, buff, rlen); + if (t < 0) { + perror ("read failure"); + fatalx ("(inprob) bad read"); + } + + if (pordercheck && (snpordered == NO)) + failorder (); + + // check for file modification + if (hashcheck) { + sscanf ((char *) buff, "PROB %d %d %x %x", &xnind, &xnsnp, &xihash, + &xshash); + if (xnind != nind) + fatalx ("OOPS number of individuals %d != %d in input files\n", nind, + xnind); + if (xnsnp != nsnp) + fatalx ("OOPS number of SNPs %d != %d in input file: %s\n", nsnp, xnsnp, + pname); + + if (xshash != shash) + fatalx ("OOPS snp file has changed since PTOB file was created %x %x\n", shash, xshash); + + if (xihash != ihash) + fatalx ("OOPS indiv file has changed since PROB file was created %x %x\n", ihash, xihash); + } + + plen = rl2 * nsnp; + cclear ((unsigned char *) packp, 0XFF, plen); + + + t = bigread (fdes, packp, plen); + if (t < 0) { + perror ("read failure"); + fatalx ("(inprob) bad data read"); + } + if (t != plen) { + perror ("read failure (length mismatch)"); + printf ("numsnps: %d nsnp (from geno file): %d\n", numsnps, nsnp); + fatalx ("(inprob) bad data read (length mismatch) %ld %ld\n", t, plen); + } + else + printf ("packed PROB file: %s read OK\n", pname); + + free (buff); + close (fdes); + + printf ("end of inprob\n"); + fflush (stdout); + return plen ; +} + diff --git a/src/mcio.h b/src/mcio.h index 2423a97..7eb5296 100644 --- a/src/mcio.h +++ b/src/mcio.h @@ -12,7 +12,7 @@ #define MAXSTR 512 #define LONGSTR 10000 -#define MAXFF 200 +#define MAXFF 50 #define MAXCH 100 #define MTCHROM 90 #define XYCHROM 91 @@ -92,6 +92,7 @@ int numvalidind(Indiv **indivmarkers, int numind) ; int numvalidgtind(SNP **snpm, int numsnps, int ind) ; int numvalidgt(Indiv **indivmarkers, SNP *cupt) ; int numvalidgtx(Indiv **indivmarkers, SNP *cupt, int affst) ; +int getblocks(char *fname, SNP **snpm, int numsnps) ; int getweights(char *fname, SNP **snpm, int numsnps) ; int getindvals (char *fname, Indiv ** indivmarkers, int numindivs) ; void outpack(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind) ; @@ -155,6 +156,7 @@ void outfiles(char *snpname, char *indname, char *gname, SNP **snpm, Indiv **indiv, int numsnps, int numind, int packem, int ogmode) ; + void snpdecimate(SNP **snpm, int nsnp, int decim, int mindis, int maxdis) ; void decimate(SNP **cbuff, int n, int decim, int mindis, int maxdis) ; int vvadjust(double *cc, int n, double *pmean) ; @@ -165,6 +167,7 @@ void cntpops(int *count, Indiv **indm, int numindivs, char **eglist, int numeg) void printalleles(SNP *cupt, FILE *fff) ; char *getpackgenos() ; void clearpackgenos() ; +void freepackgenos() ; void setchr(int mode) ; void setchimpmode(int mode) ; @@ -183,5 +186,11 @@ void putsnpordered(int mode) ; int getsnpordered() ; void ckdup(char **eglist, int n) ; +long inprob (char *pname, SNP ** snpm, Indiv ** indiv, int numsnps, int numind) ; +long inprobx (char *pname, SNP ** snpm, Indiv ** indiv, int numsnps, int numind, char *packprobs) ; +long loadprobpack(SNP **snpmarkers, Indiv **indivmarkers, int numsnps, int numindivs, char *bigbuff) ; +void outprob(char *oname, SNP **snpm, Indiv **indiv, int numsnps) ; +void outprobx(char *oname, SNP **snpm, Indiv **indiv, int numsnps, int numindivs, char *packprobs) ; +void sethiressnp() ; #endif diff --git a/src/mergeit.c b/src/mergeit.c index b81482c..254da43 100644 --- a/src/mergeit.c +++ b/src/mergeit.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -14,7 +15,7 @@ #include "admutils.h" #include "mcio.h" -#define WVERSION "2450" +#define WVERSION "2600" #define MAXFL 50 #define MAXSTR 512 @@ -25,6 +26,8 @@ // and better code for mergeit routine (overflow bug in cclear) // malexhet added // polarmode added both input files assumed polarized +// O2 version +// .prob aware char *trashdir = "/var/tmp"; extern int verbose; @@ -37,15 +40,21 @@ int numi1, numi2; char *snp1 = NULL; char *ind1 = NULL; char *geno1 = NULL; +char *xprob1 = NULL; char *snp2 = NULL; char *ind2 = NULL; char *geno2 = NULL; +char *xprob2 = NULL; + +int probmode = NO ; char *indoutfilename = NULL; char *snpoutfilename = NULL; char *genooutfilename = NULL; char *badsnpname = NULL; +char *proboutfilename = NULL ; + int packout = -1; int tersem = YES; @@ -85,6 +94,7 @@ void outfiles (char *snpname, char *indname, char *gname, SNP ** snpm, int ogmode); int checkmatch (SNP * cupt1, SNP * cupt2); +int usage (char *prog, int exval); int mergeit (SNP ** snpm1, SNP ** snpm2, Indiv *** pindm1, Indiv ** indm2, int nums1, int nums2, int numi1, int numi2); @@ -117,11 +127,22 @@ main (int argc, char **argv) int numgenolist; int maxmiss; int sorder[2]; + double ymem ; + + unsigned char *packp, *packp1, *packp2 ; + long plen, plen1, plen2 ; + int numx ; + int rl2 = 4 ; + + tersem = YES; // no snp counts readcommands (argc, argv); + cputime(0) ; + calcmem(0) ; + setomode (&outputmode, omode); packmode = YES; settersemode (tersem); @@ -132,6 +153,16 @@ main (int argc, char **argv) printf ("polarmode set!\n"); } + if (proboutfilename != NULL) { + probmode = YES ; + if (xprob1 == NULL) fatalx("proboutfilename set but not prob1\n") ; + if (xprob2 == NULL) fatalx("proboutfilename set but not prob2\n") ; + if (allowdups) { + fatalx("dups + .prob processing not yet supprted\n") ; + } + printf("probmode set!\n") ; + } + nums1 = getsnps (snp1, &snpm1, 0.0, NULL, &nignore, numrisks); sorder[0] = getsnpordered (); @@ -208,8 +239,48 @@ main (int argc, char **argv) numi1 = rmindivs(snpm1, nums1, indm1, numi1) ; numi2 = rmindivs(snpm2, nums2, indm2, numi2) ; */ + if (probmode) { + plen1 = nums1*(numi1+numi2)*rl2 ; + plen2 = nums2*numi2*rl2 ; + ZALLOC(packp1, plen1, unsigned char) ; + ZALLOC(packp2, plen2, unsigned char) ; + inprobx (xprob1, snpm1, indm1, nums1, numi1, packp1) ; + inprobx (xprob2, snpm2, indm2, nums2, numi2, packp2) ; + + packp = packp1 ; + if (xprob1 != NULL) { + for (i=0; i probbuff = packp + i*numi1*rl2 ; + cupt -> diplike = initarray_2Ddouble(numi1+numi2, 3, -1) ; + + for (j=0; j ignore) continue ; + indx = indm1[j] ; + if (indx -> ignore) continue ; + loaddiplike(cupt -> diplike[j], cupt -> probbuff + j*rl2) ; + } + }} + + packp = packp2 ; + if (xprob2 != NULL) { + for (i=0; i probbuff = packp + i*numi2*rl2 ; + cupt -> diplike = initarray_2Ddouble(numi2, 3, -1) ; + for (j=0; j ignore) continue ; + loaddiplike(cupt -> diplike[j], cupt -> probbuff + j*rl2) ; + } + }} + } + + packg2 = (unsigned char *) getpackgenos (); + numindivs = mergeit (snpm1, snpm2, &indm1, indm2, nums1, nums2, numi1, numi2); @@ -217,7 +288,7 @@ main (int argc, char **argv) numsnps = nums1; indivmarkers = indm1; - free (packg1); + free (packg1); // point here is to free packed data after mergeit free (packg2); // numsnps = rmsnps(snpmarkers, numsnps, NULL) ; @@ -227,7 +298,14 @@ main (int argc, char **argv) outfiles (snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode); - printf ("##end of mergeit run\n"); + if (probmode) { + numx = loadprobpack(snpmarkers, indivmarkers, numsnps, numindivs, packp1) ; + outprobx(proboutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packp1) ; + } + + + ymem = calcmem(1)/1.0e6 ; + printf("##end of mergeit: %12.3f seconds cpu %12.3f Mbytes in use\n", cputime(1), ymem) ; return 0; } @@ -353,6 +431,8 @@ readcommands (int argc, char **argv) char *tempname; int n; + if (argc == 1) { usage(basename(argv[0]), 1); } + while ((i = getopt (argc, argv, "p:vVf")) != -1) { switch (i) { @@ -388,14 +468,17 @@ readcommands (int argc, char **argv) getstring (ph, "geno1:", &geno1); getstring (ph, "snp1:", &snp1); getstring (ph, "ind1:", &ind1); + getstring (ph, "prob1:", &xprob1); getstring (ph, "geno2:", &geno2); getstring (ph, "snp2:", &snp2); getstring (ph, "ind2:", &ind2); + getstring (ph, "prob2:", &xprob2); getstring (ph, "indoutfilename:", &indoutfilename); getstring (ph, "snpoutfilename:", &snpoutfilename); getstring (ph, "genooutfilename:", &genooutfilename); + getstring (ph, "proboutfilename:", &proboutfilename); getstring (ph, "outputformat:", &omode); getint (ph, "malexhet:", &malexhet); @@ -484,6 +567,7 @@ mergeit (SNP ** snpm1, SNP ** snpm2, Indiv *** pindm1, Indiv ** indm2, if (g < 0) g = 3; wbuff ((unsigned char *) buff, tt, g); + if (probmode) copyarr(cupt1 -> diplike[t], cupt1 -> diplike[tt], 3) ; ++tt; } for (t = 0; t < numi2; ++t) { @@ -493,6 +577,7 @@ mergeit (SNP ** snpm1, SNP ** snpm2, Indiv *** pindm1, Indiv ** indm2, if (g < 0) g = 3; wbuff ((unsigned char *) buff, tt, g); + if (probmode) copyarr(cupt2 -> diplike[t], cupt1 -> diplike[tt], 3) ; ++tt; } cupt1->ngtypes = numindivs; @@ -502,3 +587,17 @@ mergeit (SNP ** snpm1, SNP ** snpm2, Indiv *** pindm1, Indiv ** indm2, *pindm1 = indivmarkers; return numindivs; } + +int usage (char *prog, int exval) +{ + + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -f ... toggle phasemode ON.\n"); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + + exit(exval); +}; + diff --git a/src/nicksrc/gds.c b/src/nicksrc/gds.c index 73c883d..02ade4c 100644 --- a/src/nicksrc/gds.c +++ b/src/nicksrc/gds.c @@ -452,7 +452,7 @@ ranpoiss1 (double xm) void genmultgauss (double *rvec, int num, int n, double *covar) -// rvec contains num mvg variates +// rvec contains num mvg variates. Mean 0 { double *cf; ZALLOC (cf, n * n, double); @@ -657,8 +657,8 @@ prob1 (double p) if ((p < 0) || (p > 1)) fatalx ("(prob1) bad p %12.6f\n", p); z = DRAND2 (); - if (z < p) - return YES; + + if (z < p) return YES; return NO; } @@ -731,21 +731,17 @@ rejnorm (double lo, double hi) int iter = 0, iterlim = 1000 * 1000; double y; + if (lo==hi) return lo ; + if (lo>hi) fatalx("(rejnorm) bad bounds\n") ; + for (;;) { ++iter; if (iter == iterlim) fatalx ("(rejnorm) looping\n"); y = gauss (); - if (lo >= 0) - y = abs (y); - if (hi <= 0) - y = -abs (y); - if (y < lo) - continue; - if (y > hi) - continue; - return y; + if ((lo<=y) && (y<=hi)) return y ; } + return 0 ; } @@ -777,6 +773,29 @@ ranboundnorm (double lo, double hi) } +double rtrunc2(double thresh) +// Botev and Ecuyer. Algorithm 5 +{ + + double a, c, q, u, v, x, y ; + int iter = 0 ; + + a = thresh ; + c = a*a/2 ; + + for (;;) { + u = DRAND2() ; + v = DRAND2() ; + if (u==0.0) continue ; + x = c - log(u) ; + y = v*v*x ; + if (y > a) continue ; + return sqrt(2*x) ; + } + +} + + double rantruncnorm (double T, int upper) // random normal | > T (upper = 1) @@ -789,6 +808,8 @@ rantruncnorm (double T, int upper) y = ntail (T); if (y > 0.1) return rejnorm (T, 1.0e6); + if (T>5.0) return rtrunc2(T) ; + u = DRAND2 (); if (u == 0.0) u = 0.5; // tiny hack diff --git a/src/nicksrc/getpars.c b/src/nicksrc/getpars.c index 28065b5..4095be1 100644 --- a/src/nicksrc/getpars.c +++ b/src/nicksrc/getpars.c @@ -8,6 +8,7 @@ #include "nicklib.h" #include "getpars.h" +#include "strsubs.h" /** a very simple keyword parameter cracker @@ -72,8 +73,8 @@ openpars (char *fname) fatalx ("duplicate parameter: %s\n", ww); ppars[npars] = strdup (ww); - striptrail (rest, ' '); /* no trailing blanks */ stripcomment (rest); + striptrail (rest, ' '); /* no trailing blanks */ pdata[npars] = strdup (rest); ++npars; @@ -95,11 +96,11 @@ openpars (char *fname) for (i = 0; i < npars; i++) { + pp->ppars[i] = strdup (ppars[i]); pp->pdata[i] = strdup (pdata[i]); - - - /* printf("zz: %d %s %s\n",i,ppars[i],pp->ppars[i]) ; */ + striplead(pp -> pdata[i], ' ') ; +// printf("zz: %d %s %s\n",i,ppars[i],pp->ppars[i]) ; } @@ -446,3 +447,4 @@ dostrsub (phandle * pp) dostrsub (pp); } + diff --git a/src/nicksrc/linsubs.c b/src/nicksrc/linsubs.c index 05c0331..9084760 100644 --- a/src/nicksrc/linsubs.c +++ b/src/nicksrc/linsubs.c @@ -22,6 +22,7 @@ bal (double *a, double *b, int n) /** normalize mean 0 s.d 1 + no error checking */ { double t; @@ -271,6 +272,8 @@ solvit (double *prod, double *rhs, int n, double *ans) int i; int ret; + ret = visnan(prod, n*n) ; if (ret==YES) return -4 ; + ret = visnan(rhs, n) ; if (ret==YES) return -3 ; ZALLOC (ttt, n * n, double); ZALLOC (p, n, double); @@ -355,6 +358,32 @@ choldc (double *a, int n, double *p) return 1; +} +int isposdef (double *a, int n) +{ + double *aa, *p ; + int ret, k ; + +/** + quick check on diagonal +*/ + for (k=0; k0) return YES ; + return NO ; + } void diff --git a/src/nicksrc/linsubs.h b/src/nicksrc/linsubs.h index f1d3e98..7f77e3a 100644 --- a/src/nicksrc/linsubs.h +++ b/src/nicksrc/linsubs.h @@ -16,6 +16,7 @@ double pdinv(double *cinv, double *coeff, int n) ; double logdet(double *mat, int n) ; int choldc (double *a, int n, double p[]); void cholsl (double *a, int n, double p[], double b[], double x[]); +int isposdef (double *a, int n) ; void cholesky(double *cf, double *a, int n) ; void pmat(double *mat, int n) ; void imulmat(int *a, int *b, int *c, int a1, int a2, int a3) ; diff --git a/src/nicksrc/ranmath.h b/src/nicksrc/ranmath.h index f42bf79..6f36191 100644 --- a/src/nicksrc/ranmath.h +++ b/src/nicksrc/ranmath.h @@ -45,6 +45,7 @@ double rant(double df) ; // t distribution double samppow(double e, double a, double b) ; double rejnorm(double lo, double hi) ; // usually call ranboundnorm double ranboundnorm(double lo, double hi) ; // sample standard normal in [lo, hi] +double rtrunc2(double T) ; // sample standard normal > T Rayleigh rejection double rantruncnorm(double T, int upper) ; // sample standard normal > T (upper =1) < T (upper = 0) int ranhprob(int n, int a, int m) ; // n balls a block sample m . Fow many black double rangeom (double theta) ; // Geometric distribution, mean 1/theta diff --git a/src/nicksrc/sortit.c b/src/nicksrc/sortit.c index a671c74..19d11ed 100644 --- a/src/nicksrc/sortit.c +++ b/src/nicksrc/sortit.c @@ -7,7 +7,7 @@ #include "vsubs.h" /** - a simple sort routine + a simple set of sort routines */ static double *ttt; diff --git a/src/nicksrc/sortit.h b/src/nicksrc/sortit.h index 975c53e..b1b0319 100644 --- a/src/nicksrc/sortit.h +++ b/src/nicksrc/sortit.h @@ -10,6 +10,7 @@ void invperm(int *a, int *b, int n) ; int ipcompit (int *a1, int *a2) ; int comparr(double *a, double *b, int len) ; int compiarr(int *a, int *b, int len) ; +int complarr(long *a, long *b, int len) ; void ipsortit(int **a, int *ind, int len, int rlen) ; void ipsortitp(int **a, int *ind, int len, int rlen, int *pp) ; void setorder (int *pp, int rlen) ; diff --git a/src/nicksrc/statsubs.c b/src/nicksrc/statsubs.c index 22b79c8..6279cfd 100644 --- a/src/nicksrc/statsubs.c +++ b/src/nicksrc/statsubs.c @@ -24,7 +24,7 @@ static int numbox = QSIZE * ZLIM; -static double zzprob (double zval); +static double zzprob (double pval); static double znewt (double z, double ptail); static double ltlg1 (double a, double x); @@ -87,7 +87,7 @@ ntail (double zval) return q; } - pi = 2.0 * acos (0.0); + pi = PI; t = exp (-0.5 * zval * zval); t /= (sqrt (2.0 * pi) * zval); @@ -95,6 +95,28 @@ ntail (double zval) return t; } +void tailstats(double *x, double a, int isupper) +// P(z>a) and conditional mean and variance if isupper = YES +{ + double p0, y1, y2, m, z1, z2 ; + + if (isupper == NO) { + tailstats(x, -a, YES) ; + x[1] = -x[1] ; + return ; + } + p0 = x[0] = ntail(a) ; + y1 = exp(-a*a/2.0) / sqrt(2.0*PI) ; +// printf("zz %12.6f %12.6f\n", p0, y1) ; + y2 = a*y1 + p0 ; + z1 = m = x[1] = y1/p0 ; + z2 = x[2] = y2/p0 - m*m ; + if (!isfinite(z1) || (!isfinite(z2))) { + x[1] = a ; + x[2] = 0 ; + } + return ; +} double zzprob (double pval) @@ -365,8 +387,7 @@ z2x2 (double *a) fatalx ("(conchi) zero row or column sum\n"); } y = a[k] - ee; - if (k == 0) - dev00 = y; + if (k == 0) dev00 = y; chsq += (y * y) / ee; } } @@ -972,6 +993,7 @@ hwstat (double *x) double bprob (double p, double a, double b) +// beta log density { double q, yl; q = 1.0 - p; @@ -1975,13 +1997,6 @@ bpars (double *a, double *b, double mean, double var) xb = x * (1 - m); -/** - ym = xa/(xa+xb) ; - yv = xa*xb/((xa+xb)*(xa+xb)*(xa+xb+1)) ; - printf("%9.3f %9.3f\n", mean, ym) ; - printf("%9.3f %9.3f\n", var, yv) ; -*/ - *a = xa; *b = xb; @@ -1998,6 +2013,7 @@ bmoments (double a, double b, double *mean, double *var) *mean = a / x; *var = (a * b) / (x * x * (x + 1)); + } double @@ -2773,7 +2789,7 @@ void mlebeta(double *a, int n, double *p1, double *p2) int k, num=0 ; a1 = a2 = 0 ; - estbpars(a, n, p1, p2) ; // initial momen ts estimator + estbpars(a, n, p1, p2) ; // initial moments estimator for (k=0; k= 1.0) continue ; @@ -2932,7 +2948,7 @@ int qinterp(double *a, double *b, int n, double val, double *ans) /** a and b are matched values sort a and corresponding b Set val as linear combo of 2 a values - returnlinear combo of corresponding b values + return linear combo of corresponding b values called by quartile return approx index in sorted list. -1 if lo n+1000 if hi */ @@ -3026,3 +3042,179 @@ double quartile(double *x, int n, double q) free(a) ; } +double poissexp(int k, double mean) +// expected value of # k hits mean m +{ + double y ; + y = -mean ; + y += (double) k * log (mean) ; + y -= logfac(k) ; + + return exp(y) ; + +} + +double wynn(double *v, int n, double *acc, int *nacc) +{ + double *x0, *x1, *xn, y ; + double t, amax, amin, tlast, normlim ; + int iter = 0, j, nn, isok ; + + normlim = vnorm(v,n) * 1.0e-8 ; + vmaxmin(v, n, &amax, &amin) ; + if (amax<=amin) { + vclear(acc, amax, n/2) ; + *nacc = n/2 ; + return amax ; + } + + ZALLOC(x0, n, double) ; + ZALLOC(x1, n, double) ; + ZALLOC(xn, n, double) ; + copyarr(v, x1, n) ; + nn = n ; + tlast = acc[0] = v[n-1] ; + for (;;) { + for (j=0; (j+1) < nn ; ++j) { + isok = YES ; + y = x1[j+1] - x1[j] ; + if (fabs(y) < normlim) { + isok = NO ; break ; + } + t = x0[j+1] + 1.0/y ; + xn[j] = t ; + } + if (isok == NO) break ; + --nn ; + if (nn<2) break ; + copyarr(x1, x0, n) ; + copyarr(xn, x1, n) ; + + for (j=0; (j+1) < nn ; ++j) { + isok = YES ; + y = x1[j+1] - x1[j] ; + if (fabs(y) < normlim) { + isok = NO ; break ; + } + t = x0[j+1] + 1.0/y ; + xn[j] = t ; + } + if (isok == NO) break ; + --nn ; + if (nn<2) break ; + copyarr(x1, x0, n) ; + copyarr(xn, x1, n) ; + tlast = acc[iter] = t ; + ++iter ; + } + free(x0) ; free(x1) ; free(xn) ; + *nacc = iter ; + return tlast ; +} + +double *vwynn(double **vv, int n, int dim, double **acc, int *nacc) +{ + double **x0, **x1, **xn, *tt, *ans ; + double t, amax, amin, tlast, normlim ; + int iter = 0, j, nn, isok ; + double *ww, *ww2, y ; + + ans = tt = NULL ; + *nacc = 0 ; + if (n==0) return ans ; + ans = tt = vv[n-1] ; + normlim = 0 ; + for (j=0; j +#include #include #include #include @@ -420,6 +422,28 @@ printnl () printf ("\n"); } +void +striplead (char *sss, char c) + +/** + strip out lead characters + c will usually be ' ' +*/ +{ + char *sx ; + sx = strdup(sss) ; + + int len, i; + len = strlen (sss); + for (i = 0 ; i = maxrow) fatalx ("too much data\n"); for (i = 0; i < numcol; i++) { // NA -> 0 - strcpy(sstt, spt[i+1]) ; + strncpy(sstt, spt[i+1], MAXSTR) ; if (strcmp(sstt, "NA") == 0) strcpy(sstt, "0") ; xx[i][num] = atof (sstt) ; } @@ -1061,7 +1108,7 @@ like getxxnames but file already open char *sx; int nsplit, i, j, num = 0, maxff, numcolp; int nbad = 0; - char **names; + char **names = NULL; if (pnames != NULL) names = *pnames; @@ -2024,3 +2071,54 @@ int copyfs(char *infile, FILE *fff) return num ; } +int putdata(char *buff, int nbytes, char *fname) +{ + int fdes, t, ret ; + + if (fname == NULL) return 0 ; + + ridfile(fname) ; + + fdes = open(fname, O_CREAT | O_TRUNC | O_RDWR, 0666); + if (fdes<0) { + perror(" open failure") ; + fatalx("(putdata) bad open %s\n", fname) ; + } + + t = write(fdes, buff, nbytes ) ; + if (t<0) { + perror("write failure") ; + fatalx("(putdata) bad write") ; + } + close(fdes) ; + return t ; + +} + +int getdata(char *buff, int nbytes, char *fname) +{ + int fdes, t, ret ; + + if (fname == NULL) return 0 ; + fdes = open(fname, O_RDONLY) ; + if (fdes<0) { + perror(" open failure") ; + fatalx("(getdata) bad open %s\n", fname) ; + } + t = read(fdes, buff, nbytes ) ; + if (t<0) { + perror("read failure") ; + fatalx("(getdata) bad read") ; + } + close(fdes) ; + return t ; + +} + + + + + + + + diff --git a/src/nicksrc/strsubs.h b/src/nicksrc/strsubs.h index 0937045..0d8e2a5 100644 --- a/src/nicksrc/strsubs.h +++ b/src/nicksrc/strsubs.h @@ -17,6 +17,7 @@ void fatalx( char *fmt, ...) ; long seednum() ; void printbl(int n) ; void printnl() ; +void striplead(char *sss, char c) ; void striptrail(char *sss, char c) ; void catx(char *sout, char **spt, int n) ; void catxx(char *sout, char **spt, int n) ; @@ -27,6 +28,7 @@ int mapstrings(char **pstr, char **insub, char **outsub, int n) ; int upstring (char *ss) ; int numcols (char *name) ; int numlines(char *name) ; +int openit_trap (char *name, FILE ** fff, char *type); void openit(char *name, FILE **fff, char *type) ; int ftest(char *aname) ; void fcheckr(char *name) ; @@ -86,6 +88,8 @@ int getfline(char *ss, char *fname, int maxstr) ; int copyfs(char *infile, FILE *fff) ; int getxxq(double **xx, int maxrow, int numcol, char *fname) ; int numcolsq (char *name) ; +int getdata(char *buff, int nbytes, char *fname) ; +int putdata(char *buff, int nbytes, char *fname) ; #define ZALLOC(item,n,type) if ((item = (type *)calloc((n),sizeof(type))) == NULL) \ diff --git a/src/nicksrc/vsubs.c b/src/nicksrc/vsubs.c index 7d45466..ee64e28 100644 --- a/src/nicksrc/vsubs.c +++ b/src/nicksrc/vsubs.c @@ -523,6 +523,9 @@ void copyarr (double *a, double *b, int n) { int i; + + if (a==b) return ; + for (i = 0; i < n; i++) { b[i] = a[i]; } @@ -910,6 +913,29 @@ printmat (double *a, int m, int n) printmatw (a, m, n, 5); } + +void +printmatwxfile (double *a, int m, int n, int w, FILE *fff) + +/** + print a matrix n wide m rows w to a row + no final nl +*/ +{ + int i, j, jmod; + for (i = 0; i < m; i++) { + for (j = 0; j < n; j++) { + fprintf (fff, "%9.3f ", a[i * n + j]); + jmod = (j + 1) % w; + if ((jmod == 0) && (j < (n - 1))) { + fprintf (fff, " ...\n"); + } + } + if (i < (m - 1)) + fprintf (fff, "\n"); + } +} + void printmatwx (double *a, int m, int n, int w) @@ -973,6 +999,56 @@ printmatlx (double *a, int m, int n) printmatwlx (a, m, n, 5); } +void printmatlfile(double *a, int m, int n, FILE *fff) +{ + + printmatwlfile(a, m, n, n, fff) ; + +} + +void +printmatwlxfile (double *a, int m, int n, int w, FILE *fff) + +/** + print a matrix n wide m rows w to a row + 15.9f format No final newline +*/ +{ + int i, j, jmod; + for (i = 0; i < m; i++) { + for (j = 0; j < n; j++) { + fprintf (fff, "%15.9f ", a[i * n + j]); + jmod = (j + 1) % w; + if ((jmod == 0) && (j < (n - 1))) { + fprintf (fff, " ...\n"); + } + } + } +} + + +void +printmatwlfile (double *a, int m, int n, int w, FILE *fff) + +/** + print a matrix n wide m rows w to a row + 15.9f format +*/ +{ + int i, j, jmod; + for (i = 0; i < m; i++) { + for (j = 0; j < n; j++) { + fprintf (fff, "%15.9f ", a[i * n + j]); + jmod = (j + 1) % w; + if ((jmod == 0) && (j < (n - 1))) { + fprintf (fff, " ...\n"); + } + } + fprintf (fff, "\n"); + } +} + + void printmatwl (double *a, int m, int n, int w) @@ -2144,6 +2220,15 @@ dekodeitb (int *xx, int kode, int len, int base) } +void copycol(double *x, double **a, int n, int col) +{ + int i ; + for (i=0; i= 10) fatalx("clock out of bounds\n") ; + + if (mode==0) { + tt[clock] = clocktime() ; + return 0 ; + } + + return clocktime() - tt[clock] ; + +} + + double cputime (int mode) { static double ttt=0 ; @@ -2700,4 +2802,35 @@ double exp1minus(double x) return ans ; } +double vnorm(double *a, int n) +{ + double y ; + y = asum2(a, n)/ (double) n ; + + if (y==0.0) return y ; + return sqrt(y) ; + +} + +void vin(double *a, double *b, int n) +// invert in unit sphere +{ + double y ; + y = asum2(b, n) ; + if (y==0.0) fatalx("(inv) zero vectro\n") ; + vst(a, b, 1.0/y, n) ; +} + +int visnan(double *a, int n) +{ + int k, ret ; + + for (k=0; k #include #include +#include #include #include @@ -25,7 +26,7 @@ // (YRI, CEU, Papua, .... ) -#define WVERSION "435" +#define WVERSION "600" // popsizelimit // dzeromode. But this is a bad idea. Must include monomorphic snps if we are to get unbiasedness // snpdetailsname added @@ -81,6 +82,7 @@ char *badsnpname = NULL; char *popfilename = NULL; char *outliername = NULL; char *snpdetailsname = NULL; +char *blockname = NULL; FILE *fff; char *dumpname = NULL; @@ -133,6 +135,7 @@ void dof2score (double *f3score, double *f3scoresig, SNP ** xsnplist, void estjackq (double *pjest, double *pjsig, double *btop, double *bbot, double *wjack, int nblocks); +int usage (char *prog, int exval); int main (int argc, char **argv) { @@ -247,18 +250,16 @@ main (int argc, char **argv) setplimit (indivmarkers, numindivs, eglist, numeg, popsizelimit); } - - nblocks = numblocks (snpmarkers, numsnps, blgsize); - ZALLOC (blstart, nblocks, int); - ZALLOC (blsize, nblocks, int); - printf ("number of blocks for block jackknife: %d\n", nblocks); - ZALLOC (xsnplist, numsnps, SNP *); ncols = loadsnpx (xsnplist, snpmarkers, numsnps, indivmarkers); printf ("snps: %d\n", ncols); - setblocks (blstart, blsize, &xnblocks, xsnplist, ncols, blgsize); + nblocks = setblocksz (&blstart, &blsize, xsnplist, ncols, blgsize, blockname) ; + +// loads tagnumber + printf ("number of blocks for block jackknife: %d\n", nblocks); + xnblocks = nblocks += 10 ; for (trun = 0; trun < nplist; ++trun) { dopop3 (plists[trun], xsnplist, ncols, nblocks); @@ -381,10 +382,15 @@ readcommands (int argc, char **argv) char *tempname; int n, t; - while ((i = getopt (argc, argv, "f:b:p:s:vV")) != -1) { + if (argc == 1) { usage(basename(argv[0]), 1); } + while ((i = getopt (argc, argv, "f:b:p:s:vVh")) != -1) { switch (i) { + case 'h': + usage(basename(argv[0]), 0); + break; + case 'p': parname = strdup (optarg); break; @@ -459,6 +465,7 @@ readcommands (int argc, char **argv) getint (ph, "pubjack:", &pubjack); getstring (ph, "dumpname:", &dumpname); getstring (ph, "loadname:", &loadname); + getstring (ph, "blockname:", &blockname); getdbl (ph, "jackquart:", &jackquart); printf ("### THE INPUT PARAMETERS\n"); @@ -795,3 +802,18 @@ dof3score (double *f3score, double *f3scoresig, SNP ** xsnplist, int *xindex, } + +int usage (char *prog, int exval) +{ + + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -f ... use as snp details name.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -s ... use as base value.\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + + exit(exval); +}; + diff --git a/src/qp4diff.c b/src/qp4diff.c index 0524019..765c822 100644 --- a/src/qp4diff.c +++ b/src/qp4diff.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -28,10 +29,11 @@ */ -#define WVERSION "315" +#define WVERSION "400" // overlap NO added // allsnps: YES and instem: added +// firstf4mult added (Mark request) #define MAXFL 50 #define MAXSTR 512 @@ -43,6 +45,7 @@ int xverbose ; int qtmode = NO ; int colcalc = YES ; int hires = NO ; +int fancyf4 = NO; Indiv **indivmarkers; SNP **snpmarkers ; @@ -90,12 +93,14 @@ char *snpoutfilename = NULL ; char *badsnpname = NULL ; char *popfilename = NULL ; char *outliername = NULL ; +char *blockname = NULL; int inbreed = NO ; double lambdascale ; int *f2ind, *ind2f, ng2, nh2 ; int ldregress = 0 ; double ldlimit = 9999.0 ; /* default is infinity */ +double firstf4mult = 1.0 ; /* we only consider markers as in possible LD if gdis <= ldlimit */ char *outputname = NULL ; @@ -108,7 +113,7 @@ void doq4diff(double *q4rat, double *q4ratsig, int ***counts, int *bcols, int nrows, int ncols, int *xtop, int *xbot, int numeg, int nblocks) ; -int getf4(int **xx, int *indx, double *ans) ; +int usage (char *prog, int exval); int main(int argc, char **argv) { @@ -186,6 +191,7 @@ int main(int argc, char **argv) char *px ; int nplist = 0, trun, nplisth ; double ymem ; + int numchromp ; readcommands(argc, argv) ; @@ -195,19 +201,26 @@ int main(int argc, char **argv) printf("## qp4diff version: %s\n", WVERSION) ; + numchromp = numchrom+1 ; if (parname == NULL) return 0 ; xverbose = verbose ; if (xverbose) printf("verbose set\n") ; if (allsnps == YES) overlap = NO ; if (allsnps == NO) overlap = YES ; + if (fancyf4) printf("fancyf4 on\n") ; + if (fancyf4==NO) printf("fancyf4 off\n") ; if (instem != NULL) { setinfiles(&indivname, &snpname, &genotypename, instem) ; } + y = fabs(firstf4mult-1.0) ; + if (y>.0001) printf("*** firstf4mult: %9.3f ***\n", firstf4mult) ; + nostatslim = MAX(nostatslim, 3) ; setinbreed(inbreed) ; + setfancyf4(fancyf4) ; if (outputname != NULL) openit(outputname, &ofile, "w") ; @@ -226,7 +239,7 @@ int main(int argc, char **argv) chrom = cupt -> chrom ; if ((xchrom>0) && (chrom != xchrom)) cupt -> ignore = YES ; if (chrom == 0) cupt -> ignore = YES ; - if (chrom >= 23) cupt -> ignore = YES ; + if (chrom >= numchromp) cupt -> ignore = YES ; } nplist = numlines(popfilename) ; @@ -301,10 +314,6 @@ int main(int argc, char **argv) setmgpos(snpmarkers, numsnps, &maxgendis) ; if ((maxgendis < .00001) || (gfromp == YES)) setgfromp(snpmarkers, numsnps) ; - nblocks = numblocks(snpmarkers, numsnps, blgsize) ; - ZALLOC(blstart, nblocks, int) ; - ZALLOC(blsize, nblocks, int) ; -// printf("number of blocks for moving block jackknife: %d\n", nblocks) ; ZALLOC(xsnplist, numsnps, SNP *) ; ZALLOC(tagnums, numsnps, int) ; @@ -316,9 +325,11 @@ int main(int argc, char **argv) ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ; printf("snps: %d indivs: %d\n", ncols, nrows) ; - setblocks(blstart, blsize, &xnblocks, xsnplist, ncols, blgsize) ; + nblocks = setblocksz (&blstart, &blsize, xsnplist, ncols, blgsize, blockname) ; + // loads tagnumber - printf("number of blocks for moving block jackknife: %d\n", xnblocks) ; + printf ("number of blocks for block jackknife: %d\n", nblocks); + xnblocks = nblocks += 10 ; ZALLOC(counts, ncols, int **) ; for (k=0; k=nblocks) fatalx("logic bug\n") ; ret = getf4(counts[col], xtop1, &y1) ; + y1 *= firstf4mult ; if (ret == 2) fatalx("bad pop\n") ; if (ret>=0) { btop1[bnum] += y1 ; @@ -698,10 +717,14 @@ doq4diff(double *q4diff, double *q4diffsig, int ***counts, int *bcols, totnum = 0 ; for (col=0; col\n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); - int a, i ; - double y0, y1, ytot, ff[4] ; - double h0, h1 ; - - *ans = 0.0 ; - for (i=0; i<4; ++i) { - a = indx[i] ; - if (a<0) { - *ans = 1.0 ; - return 2 ; - } - y0 = (double) xx[a][0] ; - y1 = (double) xx[a][1] ; - ytot = y0+y1 ; - if (ytot <= 0.0) return -1 ; - ff[i] = y0/ytot ; - } - *ans = (ff[0]-ff[1]) * (ff[2]-ff[3]) ; - a = indx[0] ; - h0 = hfix(xx[a]) ; - a = indx[1] ; - h1 = hfix(xx[a]) ; - if (indx[0] == indx[2]) *ans -= h0 ; - if (indx[0] == indx[3]) *ans += h0 ; - if (indx[1] == indx[3]) *ans -= h1 ; - if (indx[1] == indx[2]) *ans += h1 ; - return 1 ; + exit(exval); +}; -} diff --git a/src/qpAdm.c b/src/qpAdm.c index bc972e7..941c9c0 100644 --- a/src/qpAdm.c +++ b/src/qpAdm.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -21,7 +22,7 @@ #include "eigsubs.h" -#define WVERSION "810" +#define WVERSION "1000" // best analysis added // hires added // chrom: 23 added @@ -38,6 +39,12 @@ // better treatment of boundary case (nl=1) // instem added // numchrom added +// format change for large number of sources +// calcadmfix added; +// hiprec_covar added +// fancyf4 added -- remove bias on f4 calculation +// code cleeanup (Mac) 10/2/19 +// fixed up boundary case (m=n) in ranktest #define MAXFL 50 #define MAXSTR 512 @@ -68,6 +75,7 @@ int zchrom = -1; int lopos, hipos ; int allsnps = NO; +int hiprec_covar = NO ; double blgsize = 0.05; // block size in Morgans */ double *chitot; @@ -85,13 +93,16 @@ char *outliername = NULL; int inbreed = NO; double lambdascale; double *jmean; -int *wkprint; +long *wkprint; +int *zprint ; int ldregress = 0; double ldlimit = 9999.0; /* default is infinity */ +int fancyf4 = YES; char *outputname = NULL; char *weightname = NULL; +char *blockname = NULL; FILE *ofile; double **btop, **bbot, *gtop, *gbot; @@ -114,12 +125,15 @@ calcevar (double *var, double *yvar, int nl, int nr, int nblocks, int dim); int getf4 (int **xx, int *indx, double *ans); +int getf4old (int **xx, int *indx, double *ans); +void calcadmfix (double *ans, double *A, int n, int *vf) ; void calcadm (double *ans, double *A, int n); void printss (double *mean, double *wt, int nl, int nr); double scorel(double *d, double *V, double *lam, int n) ; double gendstat(double *ca, int b1, int b2, int nr, int nl, double *mean, double *var, int *ktable) ; void addscaldiag(double *mat, double scal, int n) ; int isnested(int a, int b) ; +int usage (char *prog, int exval); int main (int argc, char **argv) @@ -142,7 +156,7 @@ main (int argc, char **argv) int **xtop; int npops = 0; int nr, nl, minnm, d, dd; - double *ymean, *yvar, *yvarinv, *ww; + double *ymean, *yvar, *yvarinv, *ww, *lambda; int nindiv = 0, e, f, lag = 1; int nignore, numrisks = 1; SNP **xsnplist; @@ -166,7 +180,7 @@ main (int argc, char **argv) double *yscbest, *ychi, *yinfeasbest; int *sbest, *infeassbest, *nfeas, isfeas; double *var; - int *jvar; + long *jvar; double *w0, *w1, *w2; double *wbest; int dim, b1, b2; @@ -174,6 +188,7 @@ main (int argc, char **argv) double lfudge ; int numchromp ; int pos ; + double ymem ; F4INFO **f4info, *f4pt, *f4pt2, **g4info, *ggpt; @@ -182,6 +197,10 @@ main (int argc, char **argv) lopos = -1 ; hipos = BIGINT - 1 ; readcommands (argc, argv); + + cputime(0) ; + calcmem(0) ; + printf ("## qpAdm version: %s\n", WVERSION); if (seed==0) seed = seednum() ; printf("seed: %d\n", seed) ; @@ -191,6 +210,7 @@ main (int argc, char **argv) numchromp = numchrom+ 1 ; setinbreed (inbreed); + setfancyf4(fancyf4) ; if (instem != NULL) { setinfiles(&indivname, &snpname, &genotypename, instem) ; @@ -319,11 +339,6 @@ main (int argc, char **argv) if ((maxgendis < .00001) || (gfromp == YES)) setgfromp (snpmarkers, numsnps); - nblocks = numblocks (snpmarkers, numsnps, blgsize); - nblocks += 10; - ZALLOC (blstart, nblocks, int); - ZALLOC (blsize, nblocks, int); - ZALLOC (xsnplist, numsnps, SNP *); ZALLOC (tagnums, numsnps, int); @@ -334,9 +349,12 @@ main (int argc, char **argv) ncols = loadsnpx (xsnplist, snpmarkers, numsnps, indivmarkers); printf ("snps: %d indivs: %d\n", ncols, nrows); - setblocks (blstart, blsize, &xnblocks, xsnplist, ncols, blgsize); + + nblocks = setblocksz (&blstart, &blsize, xsnplist, ncols, blgsize, blockname) ; + // loads tagnumber - printf ("number of blocks for block jackknife: %d\n", xnblocks); + printf ("number of blocks for block jackknife: %d\n", nblocks); + xnblocks = nblocks += 10 ; ZALLOC (counts, ncols, int **); for (k = 0; k < ncols; ++k) { @@ -378,6 +396,7 @@ main (int argc, char **argv) dd = d * d; ZALLOC (ymean, d + 10, double); ZALLOC (ww, d + 10 + nl * nl, double); + ZALLOC (lambda, nl, double); ZALLOC (yvar, dd, double); ZALLOC (xind, nl, int); @@ -424,7 +443,8 @@ main (int argc, char **argv) ZALLOC (jmean, nl, double); - ZALLOC (wkprint, nl * nl, int); + ZALLOC (wkprint, nl * nl, long); + ZALLOC (zprint, nl * nl, int); for (x = rank; x <= maxrank; ++x) { if (x == rank) @@ -448,7 +468,7 @@ main (int argc, char **argv) if (hires) printmatl (ww, 1, nl); else - printmat (ww, 1, nl); + printmatw (ww, 1, nl, nl); ZALLOC (wbest, nl, double); ZALLOC (w0, nl * nr, double); @@ -457,7 +477,7 @@ main (int argc, char **argv) copyarr (ww, wbest, nl); ZALLOC (var, nl * nl, double); - ZALLOC (jvar, nl * nl, int); + ZALLOC (jvar, nl * nl, long); // printss (ymean, ww, nl, nr); @@ -466,27 +486,58 @@ main (int argc, char **argv) vsp (ww, ww, 1.0e-20, nl); vsqrt (ww, ww, nl); printf (" std. errors: "); + if (hires) printmatl (ww, 1, nl); else printmat (ww, 1, nl); + + + printnl (); - vst (ww, var, 1.0e6, nl * nl); - fixit (jvar, ww, nl * nl); - printf ("error covariance (* 1000000)\n"); - printimatl (jvar, nl, nl); + + if (hiprec_covar == NO) { + + vst (ww, var, 1.0e6, nl * nl); + +/** + printf("zzz\n") ; + printmatl(ww, nl, nl) ; + printnl() ; + printmatl(var, nl, nl) ; + printnl() ; +*/ + + fixitl (jvar, ww, nl * nl); + printf ("error covariance (* 1,000,000)\n"); + printlmat (jvar, nl, nl); + } + + else { + vst (ww, var, 1.0e9, nl * nl); + fixitl (jvar, ww, nl * nl); + printf ("error covariance (* 1,000,000,000)\n"); + printlmat (jvar, nl, nl); + + eigvecs (var, lambda, ww, nl); + printf("eigenanalysis:\n") ; + vst(lambda, lambda, 1.0e9, nl) ; + printf("evals: (*10e9)\n") ; printmat(lambda, 1, nl) ; + printnl() ; + printf("evecs:\n") ; printmat(ww, nl, nl) ; + printnl() ; + } if (hires) { printf ("hires: "); vst (ww, jmean, 1.0e6, nl); - fixit (wkprint, ww, nl); - printimatx (wkprint, 1, nl); + fixit (zprint, ww, nl); + printimatx(zprint, 1, nl); printf (" "); t = mktriang (ww, var, nl); vst (ww, ww, 1.0e8, t); - fixit (wkprint, ww, t); - printimatx (wkprint, 1, t); - printnl (); + fixitl (wkprint, ww, t); + printlmat (wkprint, 1, t); printf ("[mean *1.0e6, var *1.0e8]\n"); } @@ -496,12 +547,11 @@ main (int argc, char **argv) printf("summ: %s ", popllist[0]) ; printf(" %3d ", nl) ; printf(" %12.6f ", pval) ; - printmatx(jmean, 1, nl) ; + printmatwx(jmean, 1, nl, nl) ; t = mktriang (ww, var, nl); vst (ww, ww, 1.0e6, t); - fixit (wkprint, ww, t); - printimatx (wkprint, 1, t); - printnl (); + fixitl (wkprint, ww, t); + printlmat (wkprint, 1, t); printnl (); if (nl == 1) { @@ -543,7 +593,8 @@ main (int argc, char **argv) else { doranktestfix (ymean, yvar, nl, nr, nl - 1, f4pt, vfix); } - calcadm (ww, f4pt->A, nl); + if (wt==0) calcadm (ww, f4pt->A, nl); + else calcadmfix(ww, f4pt -> A, nl, vfix) ; vmaxmin (ww, nl, NULL, &y); yfeas = y ; dof = nnint (f4pt->dof); @@ -587,7 +638,7 @@ main (int argc, char **argv) printf (" %12s %1d %3d %9.3f %15.6g ", binary_string (k, nl), wt, dof, f4pt->chisq, ttail); - printmatx (ww, 1, nl); + printmatwx (ww, 1, nl, nl); if (isfeas == NO) printf (" infeasible"); printnl (); @@ -628,7 +679,7 @@ main (int argc, char **argv) fflush (stdout); if (details) { printf ("coeffs: "); - printmat (wbest, 1, nl); + printmatw (wbest, 1, nl, nl); printnl() ; printf("## dscore:: f_4(Base, Fit, Rbase, right2)\n") ; printf("## genstat:: f_4(Base, Fit, right1, right2)\n") ; @@ -640,7 +691,7 @@ main (int argc, char **argv) for (a = 0; a < nl; ++a) { k = ktable[a * nr + b]; printf ("details: "); - printf ("%15s %15s ", popllist[a + 1], poprlist[b + 1]); + printf ("%20s %20s ", popllist[a + 1], poprlist[b + 1]); y1 = w0[a] = ymean[k]; w1[k] = wbest[a]; printf ("%12.6f", y1); @@ -649,10 +700,9 @@ main (int argc, char **argv) printf ("%12.6f", y1 / ysig); // Z-score printnl (); } - printf ("dscore: %15s ", poprlist[b + 1]); -// printf(" ") ; printmatl(w0, 1, nl) ; + printf ("dscore: %20s ", poprlist[b + 1]); y1 = vdot (w0, wbest, nl); -// w0 is empirical f4; wbest coeffs; ideally should eb zero. +// w0 is empirical f4; wbest coeffs; ideally should be zero. mulmat (w2, yvar, w1, dim, dim, 1); y2 = vdot (w1, w2, nl * nr); // variance ysig = y1 / sqrt (y2); @@ -672,7 +722,8 @@ main (int argc, char **argv) printnl() ; } - printf ("## end of run\n"); + ymem = calcmem(1)/1.0e6 ; + printf("##end of qpAdm: %12.3f seconds cpu %12.3f Mbytes in use\n", cputime(1), ymem) ; return 0; } @@ -687,11 +738,12 @@ void addco(double *co, double yadd, int n) } } + void -calcadm (double *ans, double *A, int n) +calcadmfix (double *ans, double *A, int n, int *vf) { - double *coeff, *rhs, y, ytrace ; - int t, baditer = 0 ; + double *coeff, *rhs, y, ytrace, *ctran, *prod, *fvals, *rr ; + int t, baditer = 0, *vfix, nfix, k ; if (A == NULL) { vzero (ans, n); @@ -703,19 +755,35 @@ calcadm (double *ans, double *A, int n) return ; } ZALLOC (coeff, n * n, double); + ZALLOC (ctran, n * n, double); + ZALLOC (prod, n * n, double); ZALLOC (rhs, n, double); + ZALLOC (rr, n, double); + ZALLOC (fvals, n, double); + ZALLOC (vfix, n, int); transpose (coeff, A, n, n - 1); ytrace = trace(coeff, n) ; vclear (coeff + n * (n - 1), 1.0, n); rhs[n - 1] = 1; vzero(ans, n) ; + transpose(ctran, coeff, n, n) ; + + mulmat(prod, ctran, coeff, n, n, n) ; + mulmat(rr, ctran, rhs, n, n, 1) ; + nfix = 0 ; + for (k=0; k=10) break ; - t = linsolv (n, coeff, rhs, ans); + t = solvitfix (prod, rr, n, ans, vfix, fvals, nfix); y = asum(ans, n) ; if (!isfinite(y)) t = 1 ; if (t==0) break ; @@ -723,21 +791,56 @@ calcadm (double *ans, double *A, int n) if (verbose) printf("singular matrix: %3d %12.6f\n", baditer, ytrace) ; addco(coeff, ytrace*.001, n) ; } -// printf("zz2: ") ; printmat(ans, 1, n) ; if (asum(ans, n) > 0) bal1(ans, n) ; + free (coeff); + free (rhs); + free(ctran) ; + free(prod) ; + free(rr) ; + free(fvals) ; + free(vfix) ; -/** - printf("zzcalcadm\n") ; - vst(coeff, coeff, 1000, n*n) ; - printmat(coeff, n, n) ; - printmat(rhs, 1, n) ; - printmat(ans, 1, n) ; -*/ +} +void +calcadm (double *ans, double *A, int n) +{ + double *coeff, *rhs, y, ytrace ; + int t, baditer = 0 ; + + if (A == NULL) { + vzero (ans, n); + return; + } + + if (n==1) { + ans[0] = 1 ; + return ; + } + ZALLOC (coeff, n * n, double); + ZALLOC (rhs, n, double); + + transpose (coeff, A, n, n - 1); + ytrace = trace(coeff, n) ; + vclear (coeff + n * (n - 1), 1.0, n); + rhs[n - 1] = 1; + vzero(ans, n) ; + + for (;;) { + if (baditer>=10) break ; + t = linsolv (n, coeff, rhs, ans); + y = asum(ans, n) ; + if (!isfinite(y)) t = 1 ; + if (t==0) break ; + ++baditer ; + if (verbose) printf("singular matrix: %3d %12.6f\n", baditer, ytrace) ; + addco(coeff, ytrace*.001, n) ; + } + if (asum(ans, n) > 0) bal1(ans, n) ; free (coeff); free (rhs); @@ -753,10 +856,15 @@ readcommands (int argc, char **argv) char *tempname; int n; - while ((i = getopt (argc, argv, "p:vVd")) != -1) { + if (argc == 1) { usage(basename(argv[0]), 1); } + while ((i = getopt (argc, argv, "p:vVdh")) != -1) { switch (i) { + case 'h': + usage(basename(argv[0]), 0); + break ; + case 'p': parname = strdup (optarg); break; @@ -800,6 +908,8 @@ readcommands (int argc, char **argv) getstring (ph, "output:", &outputname); getstring (ph, "outputname:", &outputname); getstring (ph, "badsnpname:", &badsnpname); + getstring (ph, "blockname:", &blockname); + getdbl (ph, "blgsize:", &blgsize); getint (ph, "popsizelimit:", &popsizelimit); @@ -812,9 +922,11 @@ readcommands (int argc, char **argv) getint (ph, "maxrank:", &maxrank); getint (ph, "hires:", &hires); getint (ph, "allsnps:", &allsnps); + getint (ph, "fancyf4:", &fancyf4); getint (ph, "useallsnps:", &allsnps); getint (ph, "details:", &details); getint (ph, "seed:", &seed) ; + getint (ph, "hiprec_covar:", &hiprec_covar) ; getdbl (ph, "diagplus:", &yscale); @@ -835,16 +947,17 @@ doq4vecb (double *ymean, double *yvar, int ***counts, int *bcols, int a, b, c, d; int k, g, i, col, j; -double ya, yb, y; +double ya, yb, y ; double *top, *bot, *wjack, *wbot, *wtop, **bjtop; double *bwt; double ytop, ybot; double y1, y2, yscal; double *w1, *w2, *ww, m1, m2; double *mean; +static int nbad = 0 ; int bnum, totnum; -int ret; +int ret, ret2; double *f4, *f4bot; int dim = nl * nr; int isok; @@ -898,23 +1011,20 @@ vzero (f4bot, dim); isok = YES; for (a = 0; a < dim; ++a) { ret = getf4 (counts[col], xtop[a], &y); -if (ret==2) { - printf("bad quad: "); printimat(xtop[a], 1, 4) ; - fatalx("bad quad\n") ; -} /** -if (verbose && (a==2)) { - printf("zz %3d %d %3d %12.3f ", col, ret, bnum, y) ; printimat(xtop[a], 1, 4) ; - printf("zzc ") ; - for (j=0; j<4; ++j) { - b = xtop[a][j] ; - printf("%3d %3d ", counts[col][b][0], counts[col][b][1]) ; - } - printnl() ; +ret2 = getf4old (counts[col], xtop[a], &y2); +if ((ret != ret2) || (fabs(y-y2) > .001)) { + ++nbad ; + if (nbad < 100) printf("zzbad %d %d :: $%d %d :: %9.3f %9.3f\n", col, a, ret, ret2, y, y2) ; } */ +if (ret==2) { + printf("bad quad: "); printimat(xtop[a], 1, 4) ; + fatalx("bad quad\n") ; +} + f4[a] = y ; f4bot[a] = 1 ; if (ret<0) { @@ -985,22 +1095,8 @@ return (y1 * y1) / y2; // a natural estimate for degrees of freedom in F-test. } -double -hfix (int *x) -{ -// correction factor counts in x -double ya, yb, yt, h; -ya = (double) x[0]; -yb = (double) x[1]; -yt = ya + yb; -if (yt <= 1.5) -fatalx ("(hfix)\n"); -h = ya * yb / (yt * (yt - 1.0)); -return h / yt; -} - int -getf4 (int **xx, int *indx, double *ans) +getf4old (int **xx, int *indx, double *ans) { int a, i; @@ -1031,7 +1127,7 @@ for (i = 0; i < 4; ++i) { } - *ans - 0 ; + // *ans - 0 ; y0 = (double) xx[a][0]; y1 = (double) xx[a][1]; ytot = y0 + y1; @@ -1101,7 +1197,6 @@ vvd (mean, gtop, gbot, dim); doranktest (mean, yvar, nl, nr, nl - 1, f4wk); calcadm (totmean, f4wk->A, nl); -// printf("zztotmean: ") ; printmat(totmean, 1, nl) ; for (k = 0; k < nblocks; ++k) { vvm (wtop, gtop, btop[k], dim); @@ -1126,7 +1221,7 @@ wjack[k] = asum (bbot[k], dim); wjackvest (jmean, var, nl, totmean, tmean, wjack, nblocks); printf ("Jackknife mean: "); if (nl==1) jmean[0] = 1 ; -printmatl (jmean, 1, nl); +printmatwl (jmean, 1, nl, nl); free (wjack); @@ -1183,6 +1278,8 @@ ff[0] = -log(theta) - u/theta ; ff[1] = -(1/theta) + (u/t2) ; ff[2] = (1/t2) - (2*u)/t3 ; +return asum(ff, 3) ; + } @@ -1198,6 +1295,10 @@ for (k=0; k\n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + (void)fprintf(stderr, " -V ... toggle details mode ON.\n"); + + exit(exval); +}; diff --git a/src/qpBound.c b/src/qpBound.c index a910b83..9b89617 100644 --- a/src/qpBound.c +++ b/src/qpBound.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -134,6 +135,7 @@ void dof2score (double *f3score, double *f3scoresig, SNP ** xsnplist, int nblocks); void estjackq (double *pjest, double *pjsig, double *btop, double *bbot, double *wjack, int nblocks); +int usage (char *prog, int exval); int main (int argc, char **argv) @@ -446,10 +448,16 @@ readcommands (int argc, char **argv) char *tempname; int n, t; - while ((i = getopt (argc, argv, "f:b:p:g:s:o:vVx")) != -1) { + if (argc == 1) { usage(basename(argv[0]), 1); } + while ((i = getopt (argc, argv, "f:b:p:g:s:o:vVxh")) != -1) { switch (i) { + case 'h': + usage(basename(argv[0]), 0); + break; + + case 'p': parname = strdup (optarg); break; @@ -873,3 +881,21 @@ dof3score (double *f3score, double *f3scoresig, SNP ** xsnplist, int *xindex, fclose (fff); } +int usage (char *prog, int exval) +{ + + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -f ... use as snp details name.\n"); + (void)fprintf(stderr, " -b ... use as base value.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -g <> ... Print this message and exit.\n"); + (void)fprintf(stderr, " -s ... use as seed value.\n"); + (void)fprintf(stderr, " -o ... give to produced out graph.\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + (void)fprintf(stderr, " -x ... toggle analysis mode ON.\n"); + + exit(exval); +}; + diff --git a/src/qpDpart.c b/src/qpDpart.c index 99f8f6d..b9e615d 100644 --- a/src/qpDpart.c +++ b/src/qpDpart.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -19,7 +20,7 @@ #include "qpsubs.h" -#define WVERSION "110" +#define WVERSION "200" // outpop NONE forced // print number of samples / pop // popfilename added @@ -31,7 +32,7 @@ // nzdata; require 10 blocks with abba/baba counts // xmode ; call countpopsx which deals with gender on X // syntactic sugar (strip :) in popfilename -// numchrm added +// numchrom added // instem: #define MAXFL 50 @@ -100,6 +101,7 @@ char *badsnpname = NULL; char *poplistname = NULL; char *popfilename = NULL; char *outliername = NULL; +char *blockname = NULL; char *outpop = NULL; // list of outliers int fullmode = NO; @@ -122,6 +124,7 @@ void setcolprobs(double **colprobs, int ***counts, int ncols, int numeg) ; void patternscore (double *pysc, double *pysig, char **pattern, double **colprobs, int *egindex, int ncols, int numeg, int *bcols, int nblocks) ; +int usage (char *prog, int exval); int main (int argc, char **argv) @@ -330,10 +333,6 @@ main (int argc, char **argv) if ((maxgendis < .00001) || (gfromp == YES)) setgfromp (snpmarkers, numsnps); - nblocks = numblocks (snpmarkers, numsnps, blgsize); - ZALLOC (blstart, nblocks, int); - ZALLOC (blsize, nblocks, int); - ZALLOC (xsnplist, numsnps, SNP *); ZALLOC (tagnums, numsnps, int); @@ -344,10 +343,12 @@ main (int argc, char **argv) ncols = loadsnpx (xsnplist, snpmarkers, numsnps, indivmarkers); printf ("snps: %d indivs: %d\n", ncols, nrows); - setblocks (blstart, blsize, &xnblocks, xsnplist, ncols, blgsize); + nblocks = setblocksz (&blstart, &blsize, xsnplist, ncols, blgsize, blockname) ; // loads tagnumber - printf ("number of blocks for jackknife: %d\n", xnblocks); - nblocks = xnblocks; + + printf ("number of blocks for block jackknife: %d\n", nblocks); + xnblocks = nblocks += 10 ; + printf ("nrows, ncols: %d %d\n", nrows, ncols); ZALLOC (counts, ncols, int **); @@ -426,15 +427,21 @@ readcommands (int argc, char **argv) char *tempname; int n; - while ((i = getopt (argc, argv, "l:h:p:vV")) != -1) { + if (argc == 1) { usage(basename(argv[0]), 1); } + + while ((i = getopt (argc, argv, "L:H:p:hvV")) != -1) { switch (i) { - case 'l': + case 'h': + usage(basename(argv[0]), 0); + break ; + + case 'L': locount = atoi (optarg); break; - case 'h': + case 'H': hicount = atoi (optarg); break; @@ -509,6 +516,7 @@ readcommands (int argc, char **argv) getint (ph, "fmode:", &fmode); getlongstring (ph, "pattern:", &rawpattern); getdbl (ph, "fscale:", &fscale); + getstring (ph, "blockname:", &blockname); printf ("### THE INPUT PARAMETERS\n"); @@ -694,3 +702,16 @@ void patternscore (double *pysc, double *pysig, char **pattern, double **colpr } +int usage (char *prog, int exval) +{ + + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -L ... use as low count value.\n"); + (void)fprintf(stderr, " -H ... use sa high count value.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + + exit(exval); +} diff --git a/src/qpDstat.c b/src/qpDstat.c index 27b2403..3391c59 100644 --- a/src/qpDstat.c +++ b/src/qpDstat.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -25,7 +26,7 @@ */ -#define WVERSION "755" +#define WVERSION "900" // clade hits and misses (migrations?) // forcclade added // outpop NONE forced @@ -111,6 +112,7 @@ char *badsnpname = NULL; char *poplistname = NULL; char *popfilename = NULL; char *outliername = NULL; +char *blockname = NULL; char *outpop = NULL; // list of outliers int fullmode = NO; @@ -146,6 +148,8 @@ void regesttree (double *ans, double *xn, double xd); void ckdups (char **list, int n); void printbadql (char ***qlist, int tt, char **eglist, int numeg); +int usage (char *prog, int exval); + int main (int argc, char **argv) @@ -397,10 +401,6 @@ main (int argc, char **argv) if ((maxgendis < .00001) || (gfromp == YES)) setgfromp (snpmarkers, numsnps); - nblocks = numblocks (snpmarkers, numsnps, blgsize); - ZALLOC (blstart, nblocks, int); - ZALLOC (blsize, nblocks, int); - ZALLOC (xsnplist, numsnps, SNP *); ZALLOC (tagnums, numsnps, int); @@ -411,10 +411,16 @@ main (int argc, char **argv) ncols = loadsnpx (xsnplist, snpmarkers, numsnps, indivmarkers); printf ("snps: %d indivs: %d\n", ncols, nrows); - setblocks (blstart, blsize, &xnblocks, xsnplist, ncols, blgsize); + nblocks = setblocksz (&blstart, &blsize, xsnplist, ncols, blgsize, blockname) ; + // loads tagnumber - printf ("number of blocks for jackknife: %d\n", xnblocks); - nblocks = xnblocks; + printf ("number of blocks for block jackknife: %d\n", nblocks); + xnblocks = nblocks += 10 ; + +/** + tt = xsnplist[1000] -> tagnumber ; + if (tt>nblocks) fatalx("yuck!\n") ; +*/ printf ("nrows, ncols: %d %d\n", nrows, ncols); ZALLOC (counts, ncols, int **); @@ -466,7 +472,8 @@ main (int argc, char **argv) ivclear (bcols, -1, ncols); for (k = 0; k < ncols; k++) { cupt = xsnplist[k]; - bcols[k] = cupt->tagnumber; + tt = bcols[k] = cupt->tagnumber; + if (tt>nblocks) fatalx("bad tagnumber!\n") ; } setabx (abx, bax, counts, ncols, numeg); @@ -701,15 +708,20 @@ readcommands (int argc, char **argv) char *tempname; int n; - while ((i = getopt (argc, argv, "l:h:p:vV")) != -1) { + if (argc == 1) { usage(basename(argv[0]), 1); } + while ((i = getopt (argc, argv, "L:H:p:hvV")) != -1) { switch (i) { - case 'l': + case 'h': + usage(basename(argv[0]), 0); + break ; + + case 'L': locount = atoi (optarg); break; - case 'h': + case 'H': hicount = atoi (optarg); break; @@ -783,6 +795,7 @@ readcommands (int argc, char **argv) getint (ph, "chrom:", &xchrom); getint (ph, "xmode:", &xmode); getint (ph, "f4mode:", &f4mode); + getstring (ph, "blockname:", &blockname); printf ("### THE INPUT PARAMETERS\n"); @@ -1381,3 +1394,18 @@ ckdups (char **list, int n) } } } +int usage (char *prog, int exval) +{ + + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -L ... use as low value.\n"); + (void)fprintf(stderr, " -H ... use as high value.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + + exit(exval); +}; + + diff --git a/src/qpF4ratio.c b/src/qpF4ratio.c index 84b4c5a..1f12fd0 100644 --- a/src/qpF4ratio.c +++ b/src/qpF4ratio.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -28,9 +29,10 @@ */ -#define WVERSION "320" +#define WVERSION "400" // diffmode added // xdata supported +// code cleeanup 10/2/19 #define MAXFL 50 #define MAXSTR 512 @@ -41,6 +43,7 @@ char *trashdir = "/var/tmp"; int qtmode = NO; int colcalc = YES; int hires = NO; +int fancyf4 = YES; Indiv **indivmarkers; SNP **snpmarkers; @@ -86,6 +89,7 @@ char *indivname = NULL; char *badsnpname = NULL; char *popfilename = NULL; char *outliername = NULL; +char *blockname = NULL; int inbreed = NO; double lambdascale; int *f2ind, *ind2f, ng2, nh2; @@ -110,6 +114,7 @@ void doq4ratdiff (double *q4rat, double *q4ratsig, int ***counts, int *bcols, int *xbot2, int numeg, int nblocks); int getf4 (int **xx, int *indx, double *ans); +int usage (char *prog, int exval); int main (int argc, char **argv) @@ -202,6 +207,7 @@ main (int argc, char **argv) nostatslim = MAX (nostatslim, 3); setinbreed (inbreed); + setfancyf4(fancyf4) ; if (outputname != NULL) openit (outputname, &ofile, "w"); @@ -307,10 +313,6 @@ main (int argc, char **argv) if ((maxgendis < .00001) || (gfromp == YES)) setgfromp (snpmarkers, numsnps); - nblocks = numblocks (snpmarkers, numsnps, blgsize); - ZALLOC (blstart, nblocks, int); - ZALLOC (blsize, nblocks, int); -// printf("number of blocks for moving block jackknife: %d\n", nblocks) ; ZALLOC (xsnplist, numsnps, SNP *); ZALLOC (tagnums, numsnps, int); @@ -322,9 +324,12 @@ main (int argc, char **argv) ncols = loadsnpx (xsnplist, snpmarkers, numsnps, indivmarkers); printf ("snps: %d indivs: %d\n", ncols, nrows); - setblocks (blstart, blsize, &xnblocks, xsnplist, ncols, blgsize); + nblocks = setblocksz (&blstart, &blsize, xsnplist, ncols, blgsize, blockname) ; + // loads tagnumber - printf ("number of blocks for block jackknife: %d\n", xnblocks); + printf ("number of blocks for block jackknife: %d\n", nblocks); + xnblocks = nblocks += 10 ; + ZALLOC (counts, ncols, int **); for (k = 0; k < ncols; ++k) { @@ -369,7 +374,7 @@ main (int argc, char **argv) printf (" : "); } printf ("%12.6f %12.6f %9.3f", y, ysig, y / ysig); - printf(" 9d", nsnps) ; + printf(" %9d", nsnps) ; printnl (); } } @@ -428,10 +433,14 @@ readcommands (int argc, char **argv) char *tempname; int n; - while ((i = getopt (argc, argv, "p:vV")) != -1) { + while ((i = getopt (argc, argv, "p:hvV")) != -1) { switch (i) { + case 'h': + usage(basename(argv[0]), 0); + break ; + case 'p': parname = strdup (optarg); break; @@ -477,6 +486,9 @@ readcommands (int argc, char **argv) getint (ph, "chrom:", &xchrom); getint (ph, "noxdata:", &noxdata); getint (ph, "xdata:", &xdata); + getint (ph, "fancyf4:", &fancyf4); + getint (ph, "numchrom:", &numchrom); + getstring (ph, "blockname:", &blockname); printf ("### THE INPUT PARAMETERS\n"); printf ("##PARAMETER NAME: VALUE\n"); @@ -848,56 +860,15 @@ doq4ratdiff (double *q4rat, double *q4ratsig, int ***counts, int *bcols, } - -double -hfix (int *x) -{ -// correction factor counts in x - double ya, yb, yt, h; - ya = (double) x[0]; - yb = (double) x[1]; - yt = ya + yb; - if (yt <= 1.5) - fatalx ("(hfix)\n"); - h = ya * yb / (yt * (yt - 1.0)); - return h / yt; -} - -int -getf4 (int **xx, int *indx, double *ans) +int usage (char *prog, int exval) { - int a, i; - double y0, y1, ytot, ff[4]; - double h0, h1; + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); - *ans = 0.0; - for (i = 0; i < 4; ++i) { - a = indx[i]; - if (a < 0) { - *ans = 1.0; - return 2; - } - y0 = (double) xx[a][0]; - y1 = (double) xx[a][1]; - ytot = y0 + y1; - if (ytot <= 0.0) - return -1; - ff[i] = y0 / ytot; - } - *ans = (ff[0] - ff[1]) * (ff[2] - ff[3]); - a = indx[0]; - h0 = hfix (xx[a]); - a = indx[1]; - h1 = hfix (xx[a]); - if (indx[0] == indx[2]) - *ans -= h0; - if (indx[0] == indx[3]) - *ans += h0; - if (indx[1] == indx[3]) - *ans -= h1; - if (indx[1] == indx[2]) - *ans += h1; - return 1; + exit(exval); +}; -} diff --git a/src/qpGraph.c b/src/qpGraph.c index ff36fc3..5825256 100644 --- a/src/qpGraph.c +++ b/src/qpGraph.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -24,7 +25,7 @@ // (YRI, CEU, Papua, .... ) -#define WVERSION "6450" +#define WVERSION "6600" // lsqmode // ff3fit added // reroot added @@ -58,6 +59,7 @@ // instem added // new version of qpgsubs (kimfit) // admixout, admixin +// dumpfstats added (for MIT, Dimitris, Julia) #define MAXFL 50 @@ -75,6 +77,7 @@ int gsldetails = NO; double gslprecision = .00001; int tersemode = NO; char *f3name = NULL; +int optit = YES ; Indiv **indivmarkers; SNP **snpmarkers; @@ -92,6 +95,7 @@ int allsnpsmode = NO; int inbreed = NO; int initmixnum = -1; int initverbose = NO ; +int numscorit = 0 ; double blgsize = 0.05; // block size in Morgans */ double *chitot ; double diag = 0.0; @@ -114,9 +118,12 @@ char *graphname = NULL; char *graphoutname = NULL; char *graphdotname = NULL; char *poplistname = NULL; -char *outliername = NULL; char *phylipname = NULL; +char *blockname = NULL; + char *dottitle = NULL ; +char *outliername = NULL ; +char *fstatsname = NULL ; char *admixout = NULL; char *admixin = NULL; @@ -206,6 +213,8 @@ int nbad2 (int a, int b, int c, int d); void wtov (double *vv, double **ww, int n); void vtow (double *vv, double **ww, int n); void loadppwts (double *ppwts, double *pwts, int n, double *awts, int nanc) ; +void dumpfstats(char *fstatsname, double *ff3, double *ff3var, char **eglist, int numeg, int *indx, int basenum) ; +int usage (char *prog, int exval); int main (int argc, char **argv) @@ -298,6 +307,7 @@ main (int argc, char **argv) int bad2 = NO; int callinitv = YES ; + double ymem ; readcommands (argc, argv); @@ -546,9 +556,12 @@ outpop: (not present) printf ("snps: %d indivs: %d\n", ncols, nrows); - setblocks (blstart, blsize, &xnblocks, xsnplist, ncols, blgsize); + nblocks = setblocksz (&blstart, &blsize, xsnplist, ncols, blgsize, blockname) ; + +// loads tagnumber + printf ("number of blocks for block jackknife: %d\n", nblocks); + xnblocks = nblocks += 10 ; - nblocks = xnblocks; zz = initarray_2Ddouble (numeg * numeg, ncols, 0.0); qplist = initarray_2Dint (numeg * numeg * numeg * numeg, 4, 0); @@ -657,6 +670,7 @@ outpop: (not present) doff3 (ff3, ff3var, xsnplist, xindex, xtypes, nrows, ncols, numeg, nblocks, lambdascale); // side effect vvar made + dumpfstats(fstatsname, ff3, ff3var, eglist, numeg, ind2f, basenum) ; if (badpop2name != NULL) { @@ -821,16 +835,18 @@ outpop: (not present) fatalx ("qpGraph: only binary admixture supported\n"); } - if (initmixnum < 0) + if (initmixnum < 0) { t = (int) pow (2, nmix); - initmixnum = MAX (100 * nmix, 20 * t); + } + + if (initmixnum>0) initmixnum = MAX (100 * nmix, 20 * t); if (loadname != NULL) callinitv = NO ; if (admixin != NULL) callinitv = NO ; if (callinitv) { y = initvmix (wwtemp, nwts, initmixnum); // side effect sets vmix -// printf ("initial admix wts: score: %9.3f\n", y); + printf ("callinitv set. initial admix wts: score: %9.3f\n", y); getwww (vmix, wwtemp, nwts); putgmix (vmix); } @@ -893,7 +909,7 @@ outpop: (not present) y = scorit (wwtemp, nwts, &y1, ww2); printf ("initial score: %9.3f\n", y); - if (nmix > 0) { + if ((nmix > 0) && (optit==YES)) { wtov (vgsl, vmix, nmix); printf ("init vg:\n"); @@ -917,8 +933,10 @@ outpop: (not present) dof -= (nwts - nmix); ytail = rtlchsq (dof, y); + printf (" final score: %12.3f dof: %d nominal p-val %12.6f\n", y, dof, ytail); + printf("number of graph evaluations: %d\n", numscorit) ; calcfit (ff3fit, ww2, numeg); @@ -943,8 +961,8 @@ outpop: (not present) fflush(stdout) ; writeadmix(admixout) ; - y = 1.0e-6 * (double) calcmem(1) ; - printf ("## end of qpGraph time: %9.3f seconds memory: %9.3f Mbytes\n", cputime(1), y) ; + ymem = 1.0e-6 * (double) calcmem(1) ; + printf ("## end of qpGraph time: %9.3f seconds memory: %9.3f Mbytes\n", cputime(1), ymem) ; return 0; } @@ -1195,8 +1213,6 @@ printfit (double *ww) t = numqq (a, b, c, d); fprintf (outff, "%d ", t); fprintf (outff, "\n"); - - printnl (); } } } @@ -1441,6 +1457,7 @@ scorit (double *www, int n, double *pfix, double *ans) double tt[MAXG] ; ++ncall; + ++numscorit ; if (n != intsum (lmix, nmix)) fatalx ("dimension bug\n"); @@ -1777,6 +1794,8 @@ readcommands (int argc, char **argv) char *tempname; int n, t; + if (argc == 1) { usage(basename(argv[0]), 1); } + while ((i = getopt (argc, argv, "z:p:g:s:o:d:x:l:vV")) != -1) { switch (i) { @@ -1846,6 +1865,7 @@ readcommands (int argc, char **argv) getstring (ph, "graphoutname:", &graphoutname); getstring (ph, "graphdotname:", &graphdotname); getstring (ph, "phylipname:", &phylipname); + getstring (ph, "fstatsname:", &fstatsname); getstring (ph, "outpop:", &outpop); getstring (ph, "output:", &outputname); getstring (ph, "badsnpname:", &badsnpname); @@ -1867,6 +1887,7 @@ readcommands (int argc, char **argv) getint (ph, "fulloutlier:", &fulloutlier); getint (ph, "gsldetails:", &gsldetails); getint (ph, "terse:", &tersemode); + getint (ph, "optit:", &optit); getdbl (ph, "precision:", &gslprecision); getstring (ph, "badpop2name:", &badpop2name); getstring (ph, "weightname:", &weightname); @@ -1892,6 +1913,8 @@ readcommands (int argc, char **argv) getint (ph, "fstdmode:", &fstdmode); getstring (ph, "dumpname:", &dumpname); getstring (ph, "loadname:", &loadname); + getstring (ph, "outliername:", &outliername); + getstring (ph, "blockname:", &blockname); getint (ph, "hires:", &hires); printf ("### THE INPUT PARAMETERS\n"); @@ -2158,6 +2181,8 @@ initvmix (double *wwinit, int nwts, int numiter) //verbose = YES ; ybest = scorit (ww, nwts, NULL, ww2); + printf("initvmix called. big iterations: %d\n", numiter) ; + printf("start score: %9.3f\n", ybest) ; if (verbose) printf ("initv: %4d %9.3f\n", -1, ybest); calcfit (ff3fit, ww2, numeg); @@ -2362,3 +2387,61 @@ vtow (double *vv, double **ww, int n) ww[k][1] = 1.0 - y; } } +void dumpfstats(char *fstatsname, double *ff3, double *ff3var, char **eglist, int numeg, int *indx, int basenum) +{ + FILE *fff ; + int a, b, nh2, k, x, u, v, c, d ; + double y1, y2 ; + + if (fstatsname == NULL) return ; + + openit(fstatsname, &fff, "w") ; + fprintf(fff, "##fbasis. basepop: %s :: f3*1000 covar*1000000\n", eglist[basenum]) ; + + nh2 = numeg * (numeg - 1); + nh2 /= 2; + for (u=0; u\n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -z ... use as Z threshold.\n"); + (void)fprintf(stderr, " -s ... use seed.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -g ... use as graph name.\n"); + (void)fprintf(stderr, " -o ... use au out graph.\n"); + (void)fprintf(stderr, " -d ... use for graph dot name.\n"); + (void)fprintf(stderr, " -x ... use as oulier name.\n"); + (void)fprintf(stderr, " -l ... use as lambda scale value.\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + + exit(exval); +}; + diff --git a/src/qpWave.c b/src/qpWave.c index 5367e51..046e25d 100644 --- a/src/qpWave.c +++ b/src/qpWave.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -25,16 +26,17 @@ // chrom: 23 now supported // bugfix in doq4vecb (same as qpAdm) // instem: + // cleaned up boundary case (m=n in ranktest) */ -#define WVERSION "410" +#define WVERSION "600" #define MAXFL 50 #define MAXSTR 512 #define MAXPOPS 100 -double yscale = 0.0 ; +double yscale = 0.0001 ; char *parname = NULL; char *trashdir = "/var/tmp"; @@ -57,6 +59,7 @@ int xchrom = -1; // if bankermode bankers MUST be in quartet at most one type 1 in quartet int allsnps = NO; +int fancyf4 = YES; double blgsize = 0.05; // block size in Morgans */ double *chitot; @@ -71,6 +74,7 @@ char *snpoutfilename = NULL; char *badsnpname = NULL; char *popfilename = NULL; char *outliername = NULL; +char *blockname = NULL; int inbreed = NO; double lambdascale; @@ -93,6 +97,7 @@ doq4vecb (double *ymean, double *yvar, int ***counts, int *bcols, int *rlist, int nr, int nblocks); int getf4 (int **xx, int *indx, double *ans); +int usage (char *prog, int exval); int main (int argc, char **argv) @@ -130,6 +135,7 @@ main (int argc, char **argv) int a, b, x, t, col; double T2, dof; int *xind; + int numchromp ; int ***counts, **ccc; @@ -145,6 +151,8 @@ main (int argc, char **argv) if (parname == NULL) return 0; + numchromp = numchrom + 1 ; + setinbreed (inbreed); if (outputname != NULL) @@ -154,11 +162,13 @@ main (int argc, char **argv) setinfiles(&indivname, &snpname, &genotypename, instem) ; } + numsnps = getsnps (snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks); numindivs = getindivs (indivname, &indivmarkers); setindm (indivmarkers); + setfancyf4(fancyf4) ; k = getgenos (genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore); @@ -170,9 +180,9 @@ main (int argc, char **argv) cupt->ignore = YES; if (chrom == 0) cupt->ignore = YES; - if (chrom > 23) + if (chrom > numchromp) cupt->ignore = YES; - if ((chrom == 23) && (xchrom != 23)) + if ((chrom == numchromp) && (xchrom != numchromp)) cupt->ignore = YES; } @@ -262,11 +272,6 @@ main (int argc, char **argv) if ((maxgendis < .00001) || (gfromp == YES)) setgfromp (snpmarkers, numsnps); - nblocks = numblocks (snpmarkers, numsnps, blgsize); - nblocks += 10; - ZALLOC (blstart, nblocks, int); - ZALLOC (blsize, nblocks, int); - ZALLOC (xsnplist, numsnps, SNP *); ZALLOC (tagnums, numsnps, int); @@ -277,9 +282,10 @@ main (int argc, char **argv) ncols = loadsnpx (xsnplist, snpmarkers, numsnps, indivmarkers); printf ("snps: %d indivs: %d\n", ncols, nrows); - setblocks (blstart, blsize, &xnblocks, xsnplist, ncols, blgsize); -// loads tagnumber - printf ("number of blocks for block jackknife: %d\n", xnblocks); + nblocks = setblocksz (&blstart, &blsize, xsnplist, ncols, blgsize, blockname) ; + + printf ("number of blocks for block jackknife: %d\n", nblocks); + xnblocks = nblocks += 10 ; ZALLOC (counts, ncols, int **); for (k = 0; k < ncols; ++k) { @@ -370,10 +376,16 @@ readcommands (int argc, char **argv) char *tempname; int n; - while ((i = getopt (argc, argv, "p:vV")) != -1) { + if (argc == 1) { usage(basename(argv[0]), 1); } + + while ((i = getopt (argc, argv, "p:vVh")) != -1) { switch (i) { + case 'h': + usage(basename(argv[0]), 0); + break ; + case 'p': parname = strdup (optarg); break; @@ -414,12 +426,16 @@ readcommands (int argc, char **argv) getstring (ph, "outputname:", &outputname); getstring (ph, "badsnpname:", &badsnpname); getdbl (ph, "blgsize:", &blgsize); + getint (ph, "numchrom:", &numchrom); getint (ph, "popsizelimit:", &popsizelimit); getint (ph, "gfromp:", &gfromp); // gen dis from phys getint (ph, "chrom:", &xchrom); getint (ph, "maxrank:", &maxrank); getint (ph, "allsnps:", &allsnps); + getint (ph, "fancyf4:", &fancyf4); + getdbl (ph, "diagplus:", &yscale); + getstring (ph, "blockname:", &blockname); printf ("### THE INPUT PARAMETERS\n"); @@ -560,91 +576,15 @@ nsnpused = totnum; return (y1 * y1) / y2; // a natural estimate for degrees of freedom in F-test. } - - -double -hfix (int *x) -{ -// correction factor counts in x - double ya, yb, yt, h; - ya = (double) x[0]; - yb = (double) x[1]; - yt = ya + yb; - if (yt <= 1.5) - fatalx ("(hfix)\n"); - h = ya * yb / (yt * (yt - 1.0)); - return h / yt; -} - - -int -getf4 (int **xx, int *indx, double *ans) +int usage (char *prog, int exval) { -int a, i; -double ya, yb, y0, y1, ytot, ff[4]; -double h0, h1; -int isok, f4mode ; -// f4mode == NO => f2, or f3 + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); -*ans = 0.0; -if (indx == NULL) { -*ans = 1.0; -return 2; -} - -isok = f4mode = YES ; - - if (indx[0] == indx[2]) f4mode = NO ; - if (indx[0] == indx[3]) f4mode = NO ; - if (indx[1] == indx[2]) f4mode = NO ; - if (indx[1] == indx[3]) f4mode = NO ; - -for (i = 0; i < 4; ++i) { - ff[i] = -10*(i+1) ; // silly value ; - a = indx[i]; - if (a < 0) { - *ans = 1.0; - return 2; - } - - - *ans - 0 ; - y0 = (double) xx[a][0]; - y1 = (double) xx[a][1]; - ytot = y0 + y1; - if (ytot <= 0.0) { - isok = NO ; - continue ; - } - ff[i] = y0 / ytot; -} -if ((isok == NO) && (f4mode == NO)) return -1 ; - -ya = fabs(ff[0]-ff[1]) ; -yb = fabs(ff[2]-ff[3]) ; - -if (f4mode && (MIN(ya, yb) < .00001)) { - return 1 ; -} -if (isok == NO) return -1 ; - -*ans = (ff[0] - ff[1]) * (ff[2] - ff[3]); -if (f4mode == YES) return 1 ; - -a = indx[0]; -h0 = hfix (xx[a]); -a = indx[1]; -h1 = hfix (xx[a]); -if (indx[0] == indx[2]) -*ans -= h0; -if (indx[0] == indx[3]) -*ans += h0; -if (indx[1] == indx[3]) -*ans -= h1; -if (indx[1] == indx[2]) -*ans += h1; -return 1; - -} + exit(exval); +}; diff --git a/src/qpff3base.c b/src/qpff3base.c index ebe119b..3daa823 100644 --- a/src/qpff3base.c +++ b/src/qpff3base.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -25,7 +26,7 @@ // (YRI, CEU, Papua, .... ) -#define WVERSION "330" +#define WVERSION "331" // phylipname added (f2 stats) // dumpname added (binary file) @@ -162,6 +163,7 @@ void dumpit (char *dumpname, double *ff3, double *ff3var, char **eglist, int numeg); void checkpd (double *a, int n); void dumpf3 (char *dumpf3name, double **btop, double **bbot, int nblock); +int usage (char *prog, int exval); int main (int argc, char **argv) @@ -459,6 +461,7 @@ main (int argc, char **argv) printf ("lambdascale: %9.3f\n", lambdascale); + printf("statistics multiplied by 1000\n") ; printf ("fst:"); printnl (); @@ -835,10 +838,14 @@ readcommands (int argc, char **argv) char *tempname; int n, t; - while ((i = getopt (argc, argv, "f:b:p:g:s:o:l:vVx")) != -1) { + while ((i = getopt (argc, argv, "f:b:p:g:s:o:l:vVxh")) != -1) { switch (i) { + case 'h': + usage(basename(argv[0]), 0); + break ; + case 'p': parname = strdup (optarg); break; @@ -1222,7 +1229,7 @@ dumpf3 (char *dumpf3name, double **btop, double **bbot, int nblock) ridfile (dumpf3name); fdes = open (dumpf3name, O_CREAT | O_TRUNC | O_RDWR, 006); - cclear (sss, CNULL, SQ); + cclear ((unsigned char *) sss, CNULL, SQ); sprintf (sss, "numeg: %d nblock: %d nh2: %d", numeg, nblock, nh2); ret = write (fdes, sss, SQ * sizeof (char)); if (ret < 0) { @@ -1230,7 +1237,7 @@ dumpf3 (char *dumpf3name, double **btop, double **bbot, int nblock) fatalx ("bad write: %s", sss); } for (k = 0; k < numeg; ++k) { - cclear (sss, CNULL, SQ); + cclear ((unsigned char *) sss, CNULL, SQ); strncpy (sss, eglist[k], SQ); ret = write (fdes, sss, SQ * sizeof (char)); if (ret < 0) { @@ -1266,7 +1273,7 @@ dumpit (char *dumpname, double *ff3, double *ff3var, char **eglist, int numeg) ridfile (dumpname); fdes = open (dumpname, O_CREAT | O_TRUNC | O_RDWR, 006); - cclear (sss, CNULL, SQ); + cclear ((unsigned char *) sss, CNULL, SQ); sprintf (sss, "numeg: %d", numeg); ret = write (fdes, sss, SQ * sizeof (char)); if (ret < 0) { @@ -1274,7 +1281,7 @@ dumpit (char *dumpname, double *ff3, double *ff3var, char **eglist, int numeg) fatalx ("bad write: %s", sss); } for (k = 0; k < numeg; ++k) { - cclear (sss, CNULL, SQ); + cclear ((unsigned char *) sss, CNULL, SQ); strncpy (sss, eglist[k], SQ); ret = write (fdes, sss, SQ * sizeof (char)); if (ret < 0) { @@ -1312,3 +1319,24 @@ checkpd (double *a, int n) free (b); free (d); } + +int usage (char *prog, int exval) +{ + + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -f ... use sa fixname.\n"); + (void)fprintf(stderr, " -b ... use as base value.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -g <> ... .\n"); + (void)fprintf(stderr, " -s ... use as seed.\n"); + (void)fprintf(stderr, " -o <> ... .\n"); + (void)fprintf(stderr, " -l ... use as lambda scale.\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + (void)fprintf(stderr, " -x ... toggle doAnalysis ON.\n"); + + exit(exval); +}; + + diff --git a/src/qpgbug.c b/src/qpgbug.c deleted file mode 100644 index 1e86446..0000000 --- a/src/qpgbug.c +++ /dev/null @@ -1,2335 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "badpairs.h" -#include "admutils.h" -#include "mcio.h" -#include "mcmcpars.h" -#include "regsubs.h" -#include "egsubs.h" -#include "qpsubs.h" - -#define Y 0 -#define E 1 -#define A 2 - -// (YRI, CEU, Papua, .... ) - - -#define WVERSION "6151" -// lsqmode -// ff3fit added -// reroot added -// deleted wtmin check -// fstdmode added (denominator a la fst) -// lambdascale can be a parameter -// format change (10.4f on graph -// inbreed -// phylipname added -// graphdotname added -// map4x bug fixed -// zthresh param -// isfixed supported -// outliername added -// useallsnps added -// inbreed added. numchrom added -// lock on edge value -// better coding for outpop -// NONE => het over all samples. NULL => wt = 1 -// : and - in .dot output fixed -// initmix added -// fulloutlier added -// bad2popname added -// worst Z added for outliers -// weigntname added -// dottitle added -// big new feature. Multiple ancestors -// initverbose added -// calcfit not called after loadname -// basenum bug fixed -// instem added - - -#define MAXFL 50 -#define MAXSTR 512 -#define MAXPOPS 100 - -char *parname = NULL; -char *rootname = NULL; -char *trashdir = "/var/tmp"; -//int fstdetails = NO ; -int qtmode = NO; -int fstdmode = NO; -int details = NO; -int gsldetails = NO; -double gslprecision = .00001; -int tersemode = NO; -char *f3name = NULL; - -Indiv **indivmarkers; -SNP **snpmarkers; -int numsnps, numindivs; -int seed = 0; -int missingmode = NO; -int noxdata = YES; /* default as pop structure dubious if Males and females */ -int doanalysis = YES; /* if no just print stats */ -int nostatslim = 10; -int znval = -1; -int popsizelimit = -1; -int gfromp = NO; // genetic distance from physical -int hires = NO; -int allsnpsmode = NO; -int inbreed = NO; -int initmixnum = -1; -int initverbose = NO ; - -double blgsize = 0.05; // block size in Morgans */ double *chitot ; -double diag = 0.0; -int xchrom = -1; -int xnochrom = -1; -int *xpopsize; - -int fulloutlier = NO; -int isinit = NO; -int lsqmode = NO; -double f2weight = 1.0; // lsqmode only - -char *instem = NULL ; -char *indivname = NULL; -char *genotypename = NULL; -char *snpname = NULL; -char *snpoutfilename = NULL; -char *badsnpname = NULL; -char *graphname = NULL; -char *graphoutname = NULL; -char *graphdotname = NULL; -char *poplistname = NULL; -char *outliername = NULL; -char *phylipname = NULL; -char *dottitle = NULL ; - -char *dumpname = NULL; -char *loadname = NULL; - -char *badpop2name = NULL; -double bad2fudge = 10000; -char *outpop = NULL; -// list of outliers -char *basepop = NULL; -int basenum = -1; -int *bad2arr = NULL; - -// outnum used for weights -// basenum for f3 status -int *edgelock, nedgelock = 0; -double *lockvals; - - -double lambdascale = -1.0; -int *f2ind, *ind2f; -double *vest, *vvar, *vvinv, *xvvar, *xvvinv, *vvdiag; -double **vmix; -int *lmix, nmix; -int nh2, numeg; -int *ezero = NULL; -double wtmin = .0001; -double minvar = 0.0; // minvalue for variance term -int quartet = NO; -int xnumeg; - - -char *outputname = NULL; -char *weightname = NULL; -FILE *ofile; -char **eglist; -char **egshort; -char **enames; -double zthresh = 3.0; -double f2diag = 0.0; - -int gslsetup (int nmix, double *vmix) ; -double gslopt (double *wpars) ; -void readcommands (int argc, char **argv); -void indiaestit (double *f2, double *f3, double *f4, int n); -void sol2 (double *co, double *rhs, double *ans); -void mkww (double *f3, double *f2, int k, double *ww, int n); -char *getshort (char *ss, int n); -void doff3 (double *ff3, double *ff3var, SNP ** xsnplist, int *xindex, - int *xtypes, int nrows, int ncols, int numeg, int nblocks, - double scale); -void map4x (double *aa, double *bb, int n2, int *indx); -void map4y (double *aa, double *bb, int n2, int *indx); -void getmv (int a, int b, int c, int d, double *mean, double *var, - double *xest, double *xvar); -void bumpm (double *y, int a, int c, double val, double *xest); -void bumpw (double *w, int a, int c, double val); -void lncoadppwts (double *ppwts, double *pwts, int n, double *awts, int nanc); -double calcxx (double *xxans, double *qmat, double *ppwts, double *rhs, - int nrow, int nedge, int nanc); -void setwww (double **tmix, double *www, int n); -void getwww (double **tmix, double *www, int n); -double scorit (double *www, int n, double *pfix, double *ans); -void nagopt (double *params, int n); -void printvals (double **tmix, double *edgelen, int nedge); -void printfit (double *ww); -double initvmix (double *wwinit, int nwts, int numiter); -int iscanon (int a, int b, int c, int d); - -void setrand (double *ww, int n); -void setsimp (double *ww, int n); -void calcfit (double *ff3fit, double *ww, int numeg); -void dumppars (char *dumpname, double *www, int nwts, double *xxans, - int nedge); -void dump1 (FILE * dumpfile, double *ww, int n); -void loadpars (char *loadname, double *www, int nwts, double *xxans, - int nedge); -void read1 (FILE * loadfile, double *ww, int n); -double ff4val (double *ff3, int a, int b, int c, int d, int numeg); -void print4 (double *ff3, int a, int b, int c, int d, int numeg); -void printf3 (char *sss, FILE * fff, double *ww, char **eglist, int n); -int listsubset (int **x, int n, int k); -int numqq (int a, int b, int c, int d); -int loadbad2 (int *bad2arr, char **eglist, int numeg, char *fname); -int nbad2 (int a, int b, int c, int d); -void wtov (double *vv, double **ww, int n); -void vtow (double *vv, double **ww, int n); -void loadppwts (double *ppwts, double *pwts, int n, double *awts, int nanc) ; - -int -main (int argc, char **argv) -{ - - char sss[MAXSTR]; - int **snppos; - int *snpindx; - char **snpnamelist, **indnamelist; - int lsnplist, lindlist; - int i, j, k, k1, k2, k3, k4, kk; - SNP *cupt, *cupt1, *cupt2, *cupt3; - Indiv *indx; - double y1, y2, y, sig, tail, yy1, yy2; - char ss[11]; - int *blstart, *blsize, nblocks; - int xnblocks; /* for xsnplist */ - int *bcols; - int **subsets; - double maxgendis; - - int ch1, ch2; - int fmnum, lmnum; - int num, n1, n2; - - int nindiv = 0, e, f, lag = 1; - double xc[9], xd[4], xc2[9]; - int nignore, numrisks = 1; - double *xrow, *xpt; - SNP **xsnplist; - int *tagnums; - Indiv **xindlist; - int *xindex, *xtypes; - int nrows, ncols, nedge, nanc, m, nc, nvar; - double zn, zvar; - int weightmode = NO; - double chisq, ynrows; - int *numhits, t; - double *xmean, *xfancy; - double *divans, *divsd; - double *hettop, *hetbot; - int chrom, numclear; - double gdis; - int outliter, *badlist, nbad; - double **zdata, *z1, *z2; - int maxtag = -1; - double **zz; - double *pmean, *pnum, rscore[3], tscore[3], hscore[3], rrr[3], ww[3]; - int tpat[3][4], rpat[3][4]; - int *rawcol;; - int a, b, c, d, col, u, v, x; - double *qpscores; - double *hest, *hsig; - double mingenpos, maxgenpos; - int *qhit; /* number of times pair is clade in quartet */ - int *qmiss; /* number of times pair migration event implied */ - int **qplist, numqp = 0, maxqp = 10000; - int *popsizes; - double *qpscore; - double scale; - - - double **zzn, **zzd; - int popx[4]; - double tn[4 * 5], td[4 * 4]; - double zzsig[5], zzest[5], zsc[5]; - double ymin; - double *f2, *f2sig, *fst; - double *ff3, *ff3var, *ff3fit, *ff3sig; - double *vgsl; - - double *f3, *f4, *f3sig, *f4sig, *www, *ww2; - int ng2, ng3, ng4; - double *pwts; - double *ppwts, *awts; - int na2 ; - - char **cnames; - char **vnames; - double yscore; - double *xxans; - double *wwtemp; - int nwts, nforce; - int iter, dof; - double ytail; - int *xpopsize; - FILE *f3file = NULL; - FILE *phylipfile = NULL; - int dmode = NO; - int bad2 = NO; - - - readcommands (argc, argv); - printf ("## qpGraph version: %s\n", WVERSION); - if (parname == NULL) - return 0; - if (outpop != NULL) - printf ("outpop: %s\n", outpop); - - if (instem != NULL) { - setinfiles(&indivname, &snpname, &genotypename, instem) ; - } - - setallsnpsmode (allsnpsmode); - - if (seed == 0) - seed = seednum (); - SRAND (seed); - printf ("seed: %d\n", seed); - - setinbreed (inbreed); - - if (xchrom == (numchrom + 1)) - noxdata = NO; - if (lsqmode) - printf ("simple lsqmode\n"); - if (outputname != NULL) - openit (outputname, &ofile, "w"); - if (f3name != NULL) - openit (f3name, &f3file, "w"); - numeg = loadgraph (graphname, &eglist); - printstrings (eglist, numeg); - - numsnps = - getsnps (snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks); - - numindivs = getindivs (indivname, &indivmarkers); - setindm (indivmarkers); - k = getgenos (genotypename, snpmarkers, indivmarkers, - numsnps, numindivs, nignore); - - for (i = 0; i < numsnps; i++) { - cupt = snpmarkers[i]; - chrom = cupt->chrom; - if ((noxdata == YES) && (cupt->chrom == (numchrom + 1))) - cupt->ignore = YES; - if ((noxdata == NO) && (cupt->chrom != (numchrom + 1))) - cupt->ignore = YES; - if ((xchrom > 0) && (xchrom != chrom)) - cupt->ignore = YES; - if ((xnochrom > 0) && (xnochrom == chrom)) - cupt->ignore = YES; - } - - - printnl (); - - - if ((doanalysis == NO) && (poplistname != NULL)) { - ZALLOC (eglist, MAXPOPS, char *); - numeg = loadlist (eglist, poplistname); - } - else { - if (graphname == NULL) - fatalx ("no graph given\n"); - printf ("graph: %s\n", graphname); - -/** - sprintf(sss, "cat %s", graphname) ; - system(sss) ; - fflush(stdout) ; -*/ - printnl (); - printnl (); - numeg = loadgraph (graphname, &eglist); - if (rootname != NULL) - reroot (rootname); - nedge = getnumedge (); - nanc = getnumanc (); - printf("numedge: %d numancestor: %d\n", nedge, nanc) ; - ZALLOC (enames, nedge, char *); - getenames (enames); - ZALLOC (edgelock, nedge, int); - ZALLOC (lockvals, nedge, double); - nedgelock = getedgelock (edgelock, lockvals); - for (k = 0; k < nedgelock; ++k) { - printf ("locked: %s %12.6f\n", enames[k], lockvals[k]); - } - } - xnumeg = numeg; - for (i = 0; i < numeg; i++) { - setstatus (indivmarkers, numindivs, eglist[i]); - } - - ZALLOC (egshort, numeg, char *); - for (i = 0; i < numeg; i++) { - egshort[i] = strdup (getshort (eglist[i], 5)); - printf ("%3d %s\n", i, eglist[i]); - } - - if (!doanalysis) - printf ("jackknife block size: %9.3f\n", blgsize); - - outnum = -1; - basenum = 0; - - t = getrootlabel (sss); - printf ("root label: %s\n", sss); - - if (outpop == NULL) { - outpop = strdup ("NULL"); - } - -/** - -outpop: NULL (snps are flat weighted) -outpop: NONE snps are weighted 1/ (p * (1--p)) where p is the allele frequency in the entire sample -outpop: Mbuti (say) snps are weighted 1/ (p*(1-p)) where p is allele frequency in Mbuti - -outpop: (not present) - a) if graph root is labeled population (say Mbuti) use this population - b) if graph root is not a labeled population, same as outpop: NULL - - -*/ - - t = strcmp (outpop, "NULL"); - if (t == 0) - outnum = -99; - - t = strcmp (outpop, "NONE"); - if (t == 0) - outnum = -2; - - for (k = 0; k < numeg; ++k) { - t = strcmp (eglist[k], outpop); - if (t == 0) - outnum = k; - } - - if (outnum == -1) { - eglist[numeg] = strdup (outpop); - outnum = numeg; - ++numeg; - } - -// printf("zzbasenum: %d %d\n", outnum, basenum) ; - - basenum = 0 ; // hope this is OK - - - x = (1 << numeg); - subsets = initarray_2Dint (x, numeg, 0); - - if (outnum >= 0) - outpop = eglist[outnum]; - basepop = eglist[basenum]; - - for (i = 0; i < numsnps; i++) { - cupt = snpmarkers[i]; - if (cupt->ignore) - continue; - if (numvalidgtx (indivmarkers, cupt, YES) <= 1) { - if (tersemode == NO) - printf ("nodata: %20s\n", cupt->ID); - cupt->ignore = YES; - } - } - - printf ("outpop: %s\n", outpop); - - ZALLOC (xindex, numindivs, int); - ZALLOC (xindlist, numindivs, Indiv *); - ZALLOC (rawcol, numindivs, int); - nrows = loadindx (xindlist, xindex, indivmarkers, numindivs); - ZALLOC (xtypes, nrows, int); - - for (i = 0; i < nrows; i++) { - indx = xindlist[i]; - k = indxindex (eglist, numeg, indx->egroup); - xtypes[i] = k; - t = strcmp (indx->egroup, outpop); - if (t == 0) - xtypes[i] = outnum; - } - - ZALLOC (xpopsize, numeg, int); - for (i = 0; i < nrows; i++) { - k = xtypes[i]; - ++xpopsize[k]; - } - - for (i = 0; i < numeg; i++) { - printf ("population: %3d %20s %4d\n", i, eglist[i], xpopsize[i]); - } - for (i = 0; i < numeg; i++) { - if (xpopsize[i] == 0) - fatalx ("zero popsize: %s\n", eglist[i]); - } - - - printf ("before setwt numsnps: %d outpop: %s\n", numsnps, outpop); - if (weightname != NULL) { - getweights(weightname, snpmarkers, numsnps) ; - } - else { - setwt (snpmarkers, numsnps, indivmarkers, nrows, xindex, xtypes, outpop, - eglist, numeg); - } - - for (i = 0; i < numsnps; ++i) { - cupt = snpmarkers[i]; - if (cupt->weight <= 0.0) - cupt->ignore = YES; - } - - numsnps = rmsnps (snpmarkers, numsnps, NULL); // rid ignorable snps - printf ("setwt numsnps: %d\n", numsnps); - if (numsnps == 0) - fatalx ("no valid snps\n"); - - setmgpos (snpmarkers, numsnps, &maxgendis); - if ((maxgendis < .00001) || (gfromp == YES)) - setgfromp (snpmarkers, numsnps); - - nblocks = numblocks (snpmarkers, numsnps, blgsize); - ZALLOC (blstart, nblocks, int); - ZALLOC (blsize, nblocks, int); - printf ("number of blocks for moving block jackknife: %d\n", nblocks); - - ZALLOC (xsnplist, numsnps, SNP *); - ZALLOC (tagnums, numsnps, int); - - if (popsizelimit > 0) { - setplimit (indivmarkers, numindivs, eglist, numeg, popsizelimit); - } - - ZALLOC (popsizes, numeg, int); - cntpops (popsizes, indivmarkers, numindivs, eglist, numeg); - setpopsizes (popsizes, eglist, numeg); - - ncols = loadsnpx (xsnplist, snpmarkers, numsnps, indivmarkers); - - - printf ("snps: %d indivs: %d\n", ncols, nrows); - setblocks (blstart, blsize, &xnblocks, xsnplist, ncols, blgsize); - - nblocks = xnblocks; - - zz = initarray_2Ddouble (numeg * numeg, ncols, 0.0); - qplist = initarray_2Dint (numeg * numeg * numeg * numeg, 4, 0); - ZALLOC (qpscore, numeg * numeg * numeg * numeg, double); - - ZALLOC (pmean, numeg, double); - ZALLOC (pnum, numeg, double); - ZALLOC (rawcol, nrows, int); - - vmix = initarray_2Ddouble (MAXG, MAXW, 0.0); - ZALLOC (lmix, MAXG, int); - nmix = 0; - - ng2 = numeg * numeg; - ng3 = numeg * ng2; - ng4 = numeg * ng3; - - ZALLOC (hest, ng2, double); - ZALLOC (hsig, ng2, double); - - ZALLOC (f2, ng2, double); - ZALLOC (f2sig, ng2, double); - ZALLOC (fst, ng2, double); - ZALLOC (www, ng2, double); - ZALLOC (ww2, MAXG, double); - - ZALLOC (ff3, ng2, double); - ZALLOC (ff3fit, ng2, double); - ZALLOC (ff3sig, ng2, double); - ZALLOC (ff3var, ng4, double); -// differences from basepoint - dmode = NO; - if (fstdmode) - dmode = 2; - - scale = - dofstnumx (fst, f2, f2sig, xsnplist, xindex, xtypes, nrows, ncols, numeg, - xnblocks, indivmarkers, dmode); - if (lambdascale <= 0.0) - lambdascale = scale; - else { - vst (f2, f2, lambdascale / scale, ng2); - vst (f2sig, f2sig, lambdascale / scale, ng2); - } - printf ("lambdascale: %9.3f\n", lambdascale); - - - printf ("fst:"); - printnl (); - printmatz (fst, egshort, numeg); - printnl (); - printf ("f2:"); - printnl (); - printmatz (f2, egshort, numeg); - printnl (); - printf3 ("f2:", f3file, f2, egshort, numeg); - - if (phylipname != NULL) { - openit (phylipname, &phylipfile, "w"); - fprintf (phylipfile, "%6d\n", numeg); - sss[10] = CNULL; - for (k1 = 0; k1 < numeg; ++k1) { - strncpy (sss, eglist[k1], 10); - fprintf (phylipfile, "%10s", sss); - for (k2 = 0; k2 < numeg; ++k2) { - y1 = f2[k1 * numeg + k2]; - y2 = f2[k2 * numeg + k1]; - fprintf (phylipfile, "%6.3f", (0.5 * (y1 + y2))); - } - fprintf (phylipfile, "\n"); - } - fclose (phylipfile); - } - - - ZALLOC (f3, ng3, double); - ZALLOC (f4, ng4, double); - ZALLOC (f3sig, ng3, double); - ZALLOC (f4sig, ng4, double); - - nh2 = numeg * (numeg - 1); - nh2 /= 2; - ZALLOC (ind2f, nh2, int); - ZALLOC (f2ind, ng2, int); - - ZALLOC (vest, nh2, double); - ZALLOC (vvar, nh2 * nh2, double); - ZALLOC (xvvar, nh2 * nh2, double); // adjusted - ZALLOC (vvinv, nh2 * nh2, double); - ZALLOC (xvvinv, nh2 * nh2, double); - - k = 0; - for (a = 0; a < numeg; ++a) { - if (a == basenum) - continue; - for (b = a; b < numeg; ++b) { - if (b == basenum) - continue; - ind2f[k] = a * numeg + b; - f2ind[a * numeg + b] = k; - f2ind[b * numeg + a] = k; - ++k; - } - } - - doff3 (ff3, ff3var, xsnplist, xindex, xtypes, nrows, ncols, numeg, nblocks, - lambdascale); -// side effect vvar made - - if (badpop2name != NULL) { - - ZALLOC (bad2arr, numeg * numeg, int); - t = loadbad2 (bad2arr, eglist, numeg, badpop2name); - ZALLOC (vvdiag, nh2, double); - for (u = 0; u < nh2; u++) { - vvdiag[u] = vvar[u * nh2 + u]; - } - - x = intsum (bad2arr + basenum * numeg, numeg); - if (x > 0) { - fatalx ("basepop must not be in bad2 list!\n"); - } - - for (u = 0; u < nh2; u++) { - if (t == 0) - break; - x = ind2f[u]; - b = x / numeg; - c = x % numeg; - if (bad2arr[x] > 0) { - vvar[u * nh2 + u] += bad2fudge; - printf ("forcing large variance for f3(%s ; %s, %s)\n", basepop, - eglist[b], eglist[c]); - } - } - - if (t > 0) - bad2 = YES; - - } - - for (k = 0; k < MIN (4, numeg - 3); ++k) { - x = listsubset (subsets, numeg - 1, k); - } - - for (u = 0; u < nh2; ++u) { - y = vvar[u * nh2 + u]; - vvar[u * nh2 + u] = MAX (y, minvar); - } - - copyarr (vvar, xvvar, nh2 * nh2); - - for (u = 0; u < nh2; ++u) { - y = vvar[u * nh2 + u]; - xvvar[u * nh2 + u] += y * f2diag; - } -// wipe out off diagonal - for (u = 0; u < nh2; ++u) { - for (v = 0; v < nh2; ++v) { - if (u == v) - continue; - xvvinv[u * nh2 + v] = 0.0; - } - } - - -// print some values - printf ("ff3:"); - printnl (); - printmatz (ff3, egshort, numeg); - printnl (); - printf3 ("ff3:", f3file, ff3, egshort, numeg); - - for (a = 0; a < numeg; ++a) { - for (b = 0; b < numeg; ++b) { - y = dump4 (ff3var, a, b, a, b, numeg); - y += 1.0e-12; - ff3sig[a * numeg + b] = 10 * sqrt (y); - } - } - - printf ("ff3sig*10:"); - printnl (); - printmatz (ff3sig, egshort, numeg); - printnl (); - printf3 ("ff3sig:", f3file, ff3sig, egshort, numeg); - -/** - for (a=0; a 0) { - - wtov (vgsl, vmix, nmix); - printf ("init vg:\n"); - printmat (vgsl, 1, nmix); - gslsetup (nmix, vgsl); - gslopt (vgsl); - printf ("final vg:\n"); - printmat (vgsl, 1, nmix); - vtow (vgsl, vmix, nmix); - - printnl (); - putgmix (vmix); - setwww (vmix, wwtemp, 2 * nmix); - } - - if (details) - verbose = YES; - y = scorit (wwtemp, nwts, &y1, ww2); - - dof = nh2 -nvar + intsum (ezero, nedge); - dof -= (nwts - nmix); - - ytail = rtlchsq (dof, y); - printf (" final score: %12.3f dof: %d nominal p-val %12.6f\n", - y, dof, ytail); - - - calcfit (ff3fit, ww2, numeg); - -// now print answers - printf ("ff3fit:\n"); - printmatz (ff3fit, egshort, numeg); - printnl (); - printf3 ("ff3fit:", f3file, ff3fit, egshort, numeg); - printf ("ff3diff:\n"); - vvm (ff3fit, ff3fit, ff3, ng2); - printmatz (ff3fit, egshort, numeg); - printnl (); - printf3 ("ff3diff:", f3file, ff3fit, egshort, numeg); - printvals (vmix, ww2, nedge); - printfit (ww2); - putewts (ww2); // load graph - - dumppars (dumpname, wwtemp, nwts, ww2, nedge); - dumpgraph (graphoutname); - dumpdotgraph_title (graphdotname, dottitle); - - printf ("## end of run\n"); - return 0; -} - -void -calcfit (double *ff3fit, double *ww, int numeg) -{ - double *pwts, *awts, *ppwts, *bestf; - double *xwts; - int u, x; - int a, b, c, d; - int nrows, nedge, nanc, nvar, ng2; - double y1, y2, x1, x2, diff, sig, z; - double *zz; - - ZALLOC (pwts, numeg * MAXG, double); - ZALLOC (awts, numeg * MAXG, double); - getpwts (pwts, awts, &nrows, &nedge, &nanc); - nvar = nedge + nanc*(nanc-1)/2 ; - - ZALLOC (ppwts, nh2 * nvar, double); // coeffs of x_{ab} = (p_0 - p_a) (p_0-p_b) - loadppwts (ppwts, pwts, nedge, awts, nanc); - ZALLOC (bestf, nh2, double); - - xwts = ppwts; - ng2 = numeg + numeg; - vzero (ff3fit, ng2); - for (u = 0; u < nh2; ++u) { - y1 = bestf[u] = vdot (xwts, ww, nvar); - x = ind2f[u]; - b = x / numeg; - c = x % numeg; - ff3fit[b * numeg + c] = y1; - ff3fit[c * numeg + b] = y1; - xwts += nvar; - } - - if (verbose) { - printf("calcfit: %d %d\n", nedge, nanc) ; - printf("pwts:\n") ; - printmat(pwts, nrows, nedge) ; - printnl() ; - printf("awts:\n") ; - printmat(awts, nrows, nanc) ; - printnl() ; - printmat(ppwts, nh2, nvar) ; - printnl() ; - printmat(ww, 1, nvar) ; - printnl() ; - } - - free (pwts); - free (awts); - free (ppwts); - free (bestf); - -} - -void -printfit (double *ww) -{ - double *pwts, *awts, *ppwts, *bestf; - double *xwts; - int u; - int x, a, b, c, d, t, f; - int xa, xb, xc, xd; - int nrows, nedge, nanc, nvar ; - double y, worstz, y1, y2, x1, x2, diff, sig, z; - double *ffaa ; - FILE *outff; - char ss[MAXSTR], ssworst[MAXSTR], *ssx ; - int isworst ; - char **ancnames ; - - ZALLOC (pwts, numeg * MAXG, double); - ZALLOC (awts, numeg * MAXG, double); - getpwts (pwts, awts, &nrows, &nedge, &nanc); - nvar = nedge + nanc*(nanc-1)/2 ; - ZALLOC (ppwts, nh2 * nvar, double); // coeffs of x_{ab} = (p_0 - p_a) (p_0-p_b) - loadppwts (ppwts, pwts, nedge, awts, nanc); - ZALLOC (bestf, nh2, double); - - xwts = ppwts; - for (u = 0; u < nh2; ++u) { - bestf[u] = vdot (xwts, ww, nvar); - xwts += nvar; - } - if (outliername != NULL) - openit (outliername, &outff, "w"); - - printnl (); - printnl (); - - for (u = 0; u < nh2; u++) { - if (badpop2name == NULL) - break; - vvar[u * nh2 + u] = vvdiag[u]; - } - - if (nanc>1) { - printf("ancestor f-stats (estimated):\n") ; - ZALLOC(ancnames, nanc, char *) ; - getancnames(ancnames) ; - c = 0; - ffaa = ww + nedge ; - f = 0 ; - for (a=1; a 0) - printf (" ---"); - printnl (); - } - } - printnl (); - printnl (); - for (a = 1; a < numeg; a++) { - for (b = 1; b < numeg; b++) { - if (details == NO) - break; - u = f2ind[a * numeg + b]; - printf ("%10s %10s ff3fit: ", egshort[a], egshort[b]); - printf ("%12.6f ", bestf[u]); - printf ("%12.6f ", vest[u]); - printnl (); - } - } - - printf ("outliers:"); - printnl (); - - xa = xb = xc = xd = -1; - worstz = -1; - ssworst[0] = CNULL ; - - for (a = 0; a < numeg; a++) { - for (b = 0; b < numeg; b++) { - for (c = 0; c < numeg; c++) { - for (d = 0; d < numeg; d++) { - if (iscanon (a, b, c, d) == NO) - continue; - getmv (a, b, c, d, &y1, &y2, vest, vvar); - getmv (a, b, c, d, &x1, &x2, bestf, vvar); - diff = y1 - x1; - sig = sqrt (y2); - z = diff / sig; - y = fabs (z); - isworst = NO ; - if (y > worstz) { - isworst = YES ; - worstz = y ; - xa = a; - xb = b; - xc = c; - xd = d; - } - ssx = ss ; - if (fulloutlier) { - ssx += sprintf (ssx, "%15s %15s ", eglist[a], eglist[b]); - ssx += sprintf (ssx, "%15s %15s ", eglist[c], eglist[d]); - } - else { - ssx += sprintf (ssx, "%10s %10s ", egshort[a], egshort[b]); - ssx += sprintf (ssx, "%10s %10s ", egshort[c], egshort[d]); - } - ssx += sprintf (ssx, " %12.6f %12.6f ", x1, y1); - ssx += sprintf (ssx, "%12.6f ", diff); - ssx += sprintf (ssx, "%12.6f ", sig); - ssx += sprintf (ssx, "%9.3f ", z); - x = nbad2 (a, b, c, d); - if (x > 0) - ssx += sprintf (ssx, " ---"); - if (isworst) strcpy(ssworst, ss) ; - if (fabs (z) < zthresh) continue; - printf("%s", ss) ; - printnl (); - if (outliername != NULL) { - if (fulloutlier) { - fprintf (outff, "%15s %15s ", eglist[a], eglist[b]); - fprintf (outff, "%15s %15s ", eglist[c], eglist[d]); - } - else { - fprintf (outff, "%10s %10s ", egshort[a], egshort[b]); - fprintf (outff, "%10s %10s ", egshort[c], egshort[d]); - } - fprintf (outff, " %12.6f %12.6f ", x1, y1); - fprintf (outff, "%9.3f ", z); - t = numqq (a, b, c, d); - fprintf (outff, "%d ", t); - fprintf (outff, "\n"); - - printnl (); - } - } - } - } - } - - printnl (); - free (pwts); - free (awts); - free (ppwts); - free (bestf); - if (outliername != NULL) - fclose (outff); - if (xa < 0) - return; - - printnl() ; - printf ("worst f-stat: "); - printf("%s", ssworst) ; - printnl() ; - printnl() ; - - sprintf(ss, "%s ::%s\n", graphname, ssworst) ; - dottitle = strdup(ss) ; - - return; -} - -int -numqq (int a, int b, int c, int d) -{ - int xx[100]; - ivzero (xx, 100); - xx[a] = xx[b] = xx[c] = xx[d] = 1; - return intsum (xx, 100); -} - -int -iscanon (int a, int b, int c, int d) -// test quartet for is it canonical? -{ - if (a >= b) - return NO; - if (c >= d) - return NO; - if (a > c) - return NO; - if ((a == c) && (b > d)) - return NO; - - return YES; - -} - -void -print4 (double *ff3, int a, int b, int c, int d, int numeg) -{ - double y; - - y = ff4val (ff3, a, b, c, d, numeg); - printf ("%4s ", get3 (egshort[a])); - printf ("%4s ", get3 (egshort[b])); - printf (" "); - printf ("%4s ", get3 (egshort[c])); - printf ("%4s ", get3 (egshort[d])); - printf ("%6d", nnint (1000.0 * y)); - printnl (); -} - -double -ff4val (double *ff3, int a, int b, int c, int d, int numeg) -{ - double y1, y2, y3, y4; - - y1 = dump2 (ff3, a, c, numeg); - y2 = dump2 (ff3, a, d, numeg); - y3 = dump2 (ff3, b, c, numeg); - y4 = dump2 (ff3, b, d, numeg); - - return y1 + y4 - (y2 + y3); - -} - -void -printmix () -{ - - char sss[MAXSTR]; - int k; - - getgmix (vmix, lmix, &nmix); - for (k = 0; k < nmix; ++k) { - getmixstr (k, sss); - printf ("mix: %40s ", sss); - printmat (vmix[k], 1, lmix[k]); - } - printnl (); -} - -void -printvals (double **tmix, double *edgelen, int nedge) -{ - char sss[MAXSTR]; - int k, n; - - for (k = 0; k < nmix; ++k) { - getmixstr (k, sss); - printf ("mix: %40s ", sss); - printmat (tmix[k], 1, lmix[k]); - } - printnl (); - - for (k = 0; k < nedge; ++k) { - printf ("%9s ", enames[k]); - } - printnl (); - printmatwf (edgelen, 1, nedge, nedge, "%9.4f "); - -} - -void -setwww (double **tmix, double *www, int n) -// copy tmix to vector -{ - int k, l; - double *ww; - - if (n != intsum (lmix, nmix)) - fatalx ("dimension bug\n"); - - ww = www; - for (k = 0; k < nmix; ++k) { - l = lmix[k]; - copyarr (tmix[k], ww, l); - bal1 (ww, l); - ww += l; - } - -} - -void -getwww (double **tmix, double *www, int n) -// copy vector to tmix -{ - int k, l; - double *ww; - - if (n != intsum (lmix, nmix)) - fatalx ("dimension bug\n"); - - ww = www; - for (k = 0; k < nmix; ++k) { - l = lmix[k]; - copyarr (ww, tmix[k], l); - ww += l; - } -} - -void -dumppars (char *dumpname, double *www, int nwts, double *xxans, int nedge) -{ - FILE *dumpfile; - - if (dumpname == NULL) - return; - openit (dumpname, &dumpfile, "w"); - dump1 (dumpfile, www, nwts); - dump1 (dumpfile, xxans, nedge); - - fclose (dumpfile); -} - -void -dump1 (FILE * dumpfile, double *ww, int n) -{ - int k; - for (k = 0; k < n; ++k) - fprintf (dumpfile, "%12.6f\n", ww[k]); - -} - -void -loadpars (char *loadname, double *www, int nwts, double *xxans, int nedge) -{ - FILE *loadfile; - - if (loadname == NULL) - return; - printf ("zzloadpars %d %d\n", nwts, nedge); - openit (loadname, &loadfile, "r"); - read1 (loadfile, www, nwts); - read1 (loadfile, xxans, nedge); - printmat (www, 1, nwts); - - printnl (); - printnl (); - printmat (xxans, 1, nedge); - - fclose (loadfile); - -} - -void -read1 (FILE * loadfile, double *ww, int n) -{ - - int k; - vclear (ww, -1.0, n); - for (k = 0; k < n; ++k) { - fscanf (loadfile, "%lf\n", &ww[k]); -// printf ("zzfs %d %9.3f\n", k, ww[k]); - } - -} - -void -normvec (double *www, int n) -{ - double **tmix; - - tmix = initarray_2Ddouble (nmix, MAXW, 0.0); - - getwww (tmix, www, n); - setwww (tmix, www, n); - free2D (&tmix, nmix); -} - - -double -scorit (double *www, int n, double *pfix, double *ans) -// pfix and ans can be NULL -{ - - double **tmix; - double *awts, *pwts, *aawts, *ppwts, *xxans; - double yfix, yscore, y, yy ; - - int k, l, j; - double *ppinv; - int nrows, nedge, nanc, na2, nvar; - static int ncall = 0; - double tt[MAXG] ; - - ++ncall; - if (n != intsum (lmix, nmix)) - fatalx ("dimension bug\n"); - - tmix = initarray_2Ddouble (MAXG, MAXW, 0.0); - getwww (tmix, www, n); - - yfix = 0.0; - for (k = 0; k < nmix; ++k) { - l = lmix[k]; - y = asum (tmix[k], l); - for (j = 0; j < l; ++j) { - yy = tmix[k][j]; - if (yy < 0.0) { - tmix[k][j] = fabs (yy); - yfix += 10000 * fabs (yy); - } - } - vabs (tmix[k], tmix[k], l); - - bal1 (tmix[k], l); - yfix += (y - 1.0) * (y - 1.0); - tt[k] = tmix[k][0] ; - } - - putgmix (tmix); - ZALLOC (pwts, numeg * MAXG, double); - ZALLOC (awts, numeg * MAXG, double); - - getpwts (pwts, awts, &nrows, &nedge, &nanc); - na2 = nanc*(nanc-1)/2 ; - nvar = nedge + na2 ; - ZALLOC (xxans, nvar, double); - ZALLOC (ppwts, nh2 * nvar, double); // coeffs of x_{ab} = (p_0 - p_a) (p_0-p_b) - loadppwts (ppwts, pwts, nedge, awts, nanc); - - - ppinv = vvinv; - if (lsqmode) - ppinv = xvvinv; - - yscore = calcxx (xxans, ppinv, ppwts, vest, nh2, nedge, nanc); - - if (ncall == -1) { - printf("init score: %9.3f\n", yscore) ; - printf("zzinit: %d %d ", nedge, nvar) ; - if (nmix!=0) printmat(tt, 1, nmix) ; - else printnl() ; - printf("pwts:\n") ; - printmat(pwts, nrows, nedge) ; - printnl() ; - printf("awts:\n") ; - printmat(awts, nrows, nanc) ; - printnl() ; - printmat(ppwts, nh2, nvar) ; - printnl() ; - printmat(xxans, 1, nvar) ; - printnl() ; - printfit(xxans) ; - } - - if (pfix != NULL) - *pfix = yfix; - if (ans != NULL) - copyarr (xxans, ans, nvar); - free2D (&tmix, MAXG); - free (pwts); - free (awts); - free (ppwts); - - return yscore; - -} - -double -calcxx (double *xxans, double *qmat, double *ppwts, double *rhs, - int nrow, int nedge,int nanc) - -/** - Set L(a) = P(a, *) * X(*) - minimize Q(a,b) (L(a) - R(a))(L(b)-R(b)) - returns min value -*/ -{ - - double *cc, *rr, *ll, *w1, *w2; - double *pa, *pb; - int a, b, i, j, kret, u, x, c, f=0; - int ncol, na2 ; - double y; - double ttans[100]; - double *q1, *q2; - int *constraint, *cpt ; - - na2 = nanc*(nanc-1)/2 ; - ncol = nedge + na2 ; - - ZALLOC(constraint, ncol, int) ; - ivclear(constraint, 1, ncol) ; - - cpt = constraint + nedge ; - - f = 0 ; - for (a=1; a< nanc; ++a) { - for (b=a; b 0) { - for (i = 0; i < ncol; i++) { - cc[i * ncol + i] += diag / y; - } - } - - vst (q1, rr, -2, ncol); - qminposfixc (q2, cc, q1, ncol, edgelock, lockvals, nedgelock, constraint); - - copyarr (q2, xxans, ncol); - - free (q1); - free (q2); - - - mulmat (ll, ppwts, xxans, nrow, ncol, 1); - vvm (w2, ll, rhs, nrow); - mulmat (w1, qmat, w2, nrow, nrow, 1); - - y = vdot (w1, w2, nrow); - - free (cc); - free (rr); - free (ll); - free (w1); - free (w2); - free(constraint) ; - - return y; -} - -void loadppwts (double *ppwts, double *pwts, int n, double *awts, int nanc) -{ - int u, x, b, c, d, e, f; - int na2, nam, nvar ; - double *wa, *wb, *ww, *zwts ; - - na2 = nanc*(nanc-1)/2 ; - if (awts == NULL) na2 = 0 ; - nam = nanc-1 ; - nvar = n + na2 ; - - zwts = ppwts ; - for (u = 0; u < nh2; u++) { - x = ind2f[u]; - b = x / numeg; - c = x % numeg; - --b; - --c; - vvt (zwts, pwts + b * n, pwts + c * n, n); - zwts += n ; - - if (na2==0) continue ; - - - wa = awts + b*nanc ; - wb = awts + c*nanc ; - - f = 0 ; - for (d=1; d<=nanc-1; ++d) { - for (e=d; e<=nanc-1; ++e) { - zwts[f] = wa[d] * wb[e] ; - if (d= 0) - noxdata = 1 - t; - getint (ph, "chrom:", &xchrom); - getint (ph, "nochrom:", &xnochrom); - getint (ph, "doanalysis:", &doanalysis); - getint (ph, "quartet:", &quartet); - - getint (ph, "nostatslim:", &nostatslim); - getint (ph, "popsizelimit:", &popsizelimit); - getint (ph, "gfromp:", &gfromp); // gen dis from phys - getint (ph, "seed:", &seed); - getint (ph, "details:", &details); - getint (ph, "lsqmode:", &lsqmode); - getint (ph, "fstdmode:", &fstdmode); - getstring (ph, "dumpname:", &dumpname); - getstring (ph, "loadname:", &loadname); - getint (ph, "hires:", &hires); - - printf ("### THE INPUT PARAMETERS\n"); - printf ("##PARAMETER NAME: VALUE\n"); - writepars (ph); - -} - -void -sol2 (double *co, double *rhs, double *ans) -// solve 2x2 system -{ - - double c00, c01, c10, c11, r0, r1, ra, xa; - - c00 = co[0]; - c01 = co[1]; - c10 = co[2]; - c11 = co[3]; - r0 = rhs[0]; - r1 = rhs[1]; - - ra = r0 * c11 - r1 * c01; - xa = c00 * c11 - c10 * c01; - ans[0] = ra / xa; - if (c11 != 0.0) { - ra = r1 - c10 * ans[0]; - xa = c11; - } - else { - ra = r1 - c00 * ans[0]; - xa = c01; - } - ans[1] = ra / xa; - -} -void printbug(int a,int b,int c, int col, SNP *cupt,double ytop) -{ - if (col>100000) return ; - - if (a>3) return ; - if (b>3) return ; - if (c>3) return ; - - printf("zzbug: %20s ", cupt -> ID) ; - printf("%1d %1d %1d ", a,b,c) ; - printf("%9.3f\n", ytop) ; - -} - -void -doff3 (double *ff3, double *ff3var, SNP ** xsnplist, int *xindex, int *xtypes, - int nrows, int ncols, int numeg, int nblocks, double scale) -{ - - int t1, t2; - int a, b, c; - int ng2, ng3; - int c1[2], c2[2], *cc; - int *rawcol, *popall, *pop0, *pop1; - int k, g, i, col, j; - double ya, yb, y, jest, jsig, mean; - SNP *cupt; - double *top, *bot, *djack, *wjack, *gtop, *gbot, *wbot, *wtop; - double **btop, **bbot, wt; - double *w1, *w2, *w3; - double ytop, ybot; - double y1, y2, yscal; - int bnum; - int numegm = numeg - 1; - int u, v, x, kret; - double *estmat; - - ng2 = numeg * numeg; - ng3 = numeg * numeg * numeg; - - ZALLOC (w1, ng3, double); - ZALLOC (w2, ng3, double); - ZALLOC (w3, ng3 * numeg, double); - ZALLOC (gtop, ng3, double); - ZALLOC (gbot, ng3, double); - ZALLOC (wtop, ng3, double); - ZALLOC (wbot, ng3, double); - ZALLOC (estmat, ng3, double); - ZALLOC (djack, nblocks, double); - ZALLOC (wjack, nblocks, double); - btop = initarray_2Ddouble (nblocks, ng3, 0.0); - bbot = initarray_2Ddouble (nblocks, ng3, 0.0); - - ZALLOC (vest, nh2, double); - ZALLOC (vvar, nh2 * nh2, double); - -// printf("zz ") ; printimat(ind2f, 1, nh2) ; - - for (col = 0; col < ncols; ++col) { - cupt = xsnplist[col]; - if (cupt->ignore) - continue; - wt = cupt->weight; - if (wt <= 0.0) - continue; - bnum = cupt->tagnumber; - if (bnum < 0) - continue; - top = btop[bnum]; - bot = bbot[bnum]; - - kret = f3yyx (estmat, cupt, xindex, xtypes, nrows, numeg, indivmarkers); - if (kret < 0) - continue; - - ++wjack[bnum]; - - a = basenum; - for (u = 0; u < nh2; u++) { - x = ind2f[u]; - b = x / numeg; - c = x % numeg; - ytop = dump3 (estmat, a, b, c, numeg); - printbug(a,b,c,col,cupt,ytop) ; - if (ytop < -100) continue; - if (fstdmode == NO) { - top[u] += wt * ytop; - bot[u] += 1.0; - } - else { - top[u] += ytop; - bot[u] += 1.0 / wt; - } - } - } - - for (k = 0; k < nblocks; k++) { - top = btop[k]; - bot = bbot[k]; - vvp (gtop, gtop, top, nh2); - vvp (gbot, gbot, bot, nh2); - } - - vsp (w2, gbot, 1.0e-10, nh2); - vvd (w3, gtop, w2, nh2); - vzero (ff3, numeg * numeg); - for (u = 0; u < nh2; u++) { - x = ind2f[u]; - b = x / numeg; - c = x % numeg; - y1 = w3[u]; - bump2 (ff3, b, c, numeg, y1); - if (b < c) - bump2 (ff3, c, b, numeg, y1); - } - - - for (k = 0; k < nblocks; k++) { - top = btop[k]; - bot = bbot[k]; - vvm (wtop, gtop, top, nh2); - vvm (wbot, gbot, bot, nh2); - vsp (wbot, wbot, 1.0e-10, nh2); - vvd (top, wtop, wbot, nh2); // delete-block estimate - } - vsp (gbot, gbot, 1.0e-10, nh2); - vvd (gtop, gtop, gbot, nh2); - - - wjackvest (vest, vvar, nh2, gtop, btop, wjack, nblocks); - vst (vest, vest, scale, nh2); - vst (vvar, vvar, scale * scale, nh2 * nh2); - - vst (ff3, ff3, scale, numeg * numeg); - map4x (vvar, ff3var, numeg, ind2f); -// vst(ff3var, ff3var, scale*scale, numeg*numeg*numeg*numeg) ; - - - free (w1); - free (w2); - free (w3); - - free (gbot); - free (wtop); - free (wbot); - free (estmat); - free (djack); - free (wjack); - - free2D (&btop, nblocks); - free2D (&bbot, nblocks); - -} - -void -map4y (double *aa, double *bb, int n2, int *indx) -// map 4d array (n1 x n1 x n1 x n1 -> b n2 x n2 x n2 x n2 -{ - int u, v, a, b, c, d; - int x; - double y1; - int nh2; - - nh2 = n2 * (n2 - 1); - nh2 /= 2; - vzero (aa, nh2 * nh2); - for (u = 0; u < nh2; ++u) { - for (v = u; v < nh2; ++v) { - - x = indx[u]; - a = x / n2; - b = x % n2; - x = indx[v]; - c = x / n2; - d = x % n2; - - y1 = dump4 (bb, a, b, c, d, n2); - aa[u * nh2 + v] = y1; - aa[v * nh2 + u] = y1; - } - } -} - -void -bumpm (double *y, int a, int c, double val, double *xest) -{ - - int k; - - if (a == basenum) - return; - if (c == basenum) - return; - k = f2ind[a * numeg + c]; - *y += val * xest[k]; - -} - -void -bumpw (double *w, int a, int c, double val) -{ - - int k; - - if (a == basenum) - return; - if (c == basenum) - return; - k = f2ind[a * numeg + c]; - w[k] += val; - -} - -double -initvmix (double *wwinit, int nwts, int numiter) -{ - double *ww, yold, ybest; - double *ww2; - int iter, k, a, b; - int xniter; - double y; - int nedge, ng2, nanc, nvar; - double *ff3fit; - int ntrials = 0 ; - - if (nwts == 0) - return 0 ; - nedge = getnumedge (); - nanc = getnumanc() ; - nvar = nedge + nwts + nanc*(nanc-1)/2 ; - ng2 = numeg * numeg; - - ZALLOC (ff3fit, ng2, double); - - ZALLOC (ww, nwts, double); - ZALLOC (ww2, nvar, double); - - getgmix (vmix, lmix, &nmix); - setwww (vmix, wwinit, nwts); - copyarr (wwinit, ww, nwts); - -//verbose = YES ; - ybest = scorit (ww, nwts, NULL, ww2); - if (verbose) - printf ("initv: %4d %9.3f\n", -1, ybest); - calcfit (ff3fit, ww2, numeg); -// now print answers - - xniter = numiter/nmix ; - xniter /= 2 ; - for (iter = 1; iter <= xniter; ++iter) { - setrand (ww, nwts); - y = scorit (ww, nwts, NULL, NULL); - ++ntrials ; - if (y < ybest) { - ybest = y; - copyarr (ww, wwinit, nwts); - if (initverbose) { - printf("initmixiter: %4d %9.3f\n", iter, ybest) ; - fflush(stdout) ; - } - } - } - getwww (vmix, wwinit, nwts); - putgmix (vmix); - getgmix (vmix, lmix, &nmix); - setwww (vmix, wwinit, nwts); - - xniter = numiter / nmix ; - xniter /= 2 ; - for (iter = 1; iter <= xniter; ++iter) { - for (k = 0; k < nmix; ++k) { - getwww (vmix, wwinit, nwts); - setsimp (vmix[k], lmix[k]); - setwww (vmix, ww, nwts); - y = scorit (ww, nwts, NULL, NULL); - ++ntrials ; - if (y < ybest) { - ybest = y; - copyarr (ww, wwinit, nwts); - if (initverbose) { - printf("initmixiter (setsimp): %4d : %d %9.3f\n", iter, k, ybest) ; - fflush(stdout) ; - } - } - } - if (nmix<=1) continue ; - for (k=0; k> 1; - } - if (intsum (w, n) != k) - continue; - copyiarr (w, x[t], n); - ++t; - } - - free (w); - return t; - -} - -int -loadbad2 (int *bad2arr, char **eglist, int numeg, char *fname) -{ - FILE *fff; - char line[MAXSTR + 1], c; - char *spt[MAXFF], *sx, *s1, *s2; - int nsplit, n = 0; - int kret, t, i, j, w, k, u, v; - int nt; - double val; - int num = 0; - int xind[MAXFF]; - - t = numeg * numeg; - ivzero (bad2arr, t); - if (fname == NULL) - return 0; - - openit (fname, &fff, "r"); - line[MAXSTR] = '\0'; - while (fgets (line, MAXSTR, fff) != NULL) { - nsplit = splitup (line, spt, MAXFF); - if (nsplit == 0) - continue; - sx = spt[0]; - if (sx[0] == '#') { - freeup (spt, nsplit); - continue; - } - ++num; - for (k = 0; k < nsplit; ++k) { - sx = spt[k]; - w = indxstring (eglist, numeg, sx); - if (w < 0) - fatalx ("loadbad2: bad pop %s\n", sx); - xind[k] = w; - } - for (i = 0; i < nsplit; ++i) { - for (j = i + 1; j < nsplit; ++j) { - u = xind[i]; - v = xind[j]; - bad2arr[u * numeg + v] = bad2arr[v * numeg + u] = 1; - } - } - n += nsplit; - freeup (spt, nsplit); - - } - fclose (fff); - return n; -} - -int -nbad2 (int a, int b, int c, int d) -{ - - int tt[4], i, j, u, v, n = 0; - - if (badpop2name == NULL) - return 0; - if (bad2arr == NULL) - return 0; - tt[0] = a, tt[1] = b, tt[2] = c, tt[3] = d; - for (i = 0; i < 4; i++) { - for (j = i; j < 4; j++) { - u = tt[i]; - v = tt[j]; - n += bad2arr[u * numeg + v]; - } - } - - return n; - -} - -void -wtov (double *vv, double **ww, int n) -{ - int k; - - for (k = 0; k < n; ++k) { - vv[k] = ww[k][0]; - } -} - -void -vtow (double *vv, double **ww, int n) -{ - int k; - double y; - - for (k = 0; k < n; ++k) { - y = vv[k]; - ww[k][0] = y; - ww[k][1] = 1.0 - y; - } -} diff --git a/src/qpgsubs.c b/src/qpgsubs.c index a8168a0..e56ad91 100644 --- a/src/qpgsubs.c +++ b/src/qpgsubs.c @@ -21,6 +21,7 @@ extern int hires; static int totpop = 0; static int *ispath = NULL; static int ispathlen = 0; +static int outformat = 1 ; // ispath [x*numvertex=y] = 1 iff path from x -> y char *bugstring ; @@ -587,6 +588,11 @@ destroyg () } +void setoutformat(int xxx) +{ + outformat = xxx ; +} + void setispath () { @@ -1176,6 +1182,7 @@ loadgraph (char *rname, char ***peglist) node = root (); node->isroot = YES; +// printf("zzroot: %s %d\n", node -> name, node -> gnode) ; node->ancestornumber = 0 ; node -> numadaughter = 0 ; // printf("numvertex: %d root: %s\n", numvertex, node -> name) ; @@ -1183,10 +1190,24 @@ loadgraph (char *rname, char ***peglist) ZALLOC (eglist, MAXG, char *); for (k = 0; k < numvertex; ++k) { node = &vlist[k]; +// printf("zznode: %s\n", node -> name) ; + tnode = (NODE *) node -> left ; +/** + if (tnode != NULL) { + tnode -> parent = (struct NODE *) node ; + printf("left:: %s %d\n", tnode-> name, tnode -> gnode) ; + } + tnode = (NODE *) node -> right ; + if (tnode != NULL) { + tnode -> parent = (struct NODE *) node ; + printf("right:: %s %d\n", tnode-> name, tnode -> gnode) ; + } +*/ if ((node -> isadmix == NO) && (node -> parent == NULL)) { if (node -> isroot == NO) { ++numanc ; node -> ancestornumber = numanc ; +// printf("zzanc: %s %d\n", node -> name, node -> gnode) ; } } @@ -1325,8 +1346,11 @@ readit (char *cname) okline = NO; kret = strcmp (sx, "vertex"); if (kret == 0) { + sx = spt[1] ; + t = vindex (sx, vlist, n); + if (t>=0) fatalx("duplicate vertex: %s\n", sx) ; node = &vlist[n]; - node->name = strdup (spt[1]); + node->name = strdup (sx); ++n; if (nsplit>2) node -> time = atof(spt[2]) ; okline = YES; @@ -1337,7 +1361,7 @@ readit (char *cname) t = vindex (sx, vlist, n); if (t < 0) fatalx ("bad label: %s\n", sx); - sx = spt[2]; + if (nsplit >=3) sx = spt[2]; tnode = &vlist[t]; tnode->label = strdup (sx); if (nsplit >= 4) { @@ -1766,6 +1790,134 @@ pedge (FILE * fff, NODE * anode, NODE * bnode, double val, double theta, int mod freestring (&ss3); } + +void +dumpgraphnew (char *graphname) +{ + + FILE *fff; + NODE *node, *xnode, *xroot; + EDGE *edge; + int k, j, t, vind, kk; + int *dd, *ind; + char sform[10], sformx[10] ; + char sss[100] ; + double y ; + + if (hires) { + strcpy (sform, " %12.6f"); + strcpy (sformx, ":%.6f") ; + } + else { + strcpy (sform, " %9.3f"); + strcpy (sformx, ":%.3f") ; + } + if (graphname == NULL) + fff = stdout; + else + openit (graphname, &fff, "w"); + +/** + we process vertices from the root +*/ + node = root() ; + fprintf(fff, "root %s\n", node -> name) ; + + setdistances (); + ZALLOC (dd, numvertex, int); + ZALLOC (ind, numvertex, int); + for (k = 0; k < numvertex; ++k) { + node = &vlist[k]; + dd[k] = node->distance; + } + isortit (dd, ind, numvertex); // ind stores indices in distance order + + //1 dump vertex + for (k = 0; k < numvertex; ++k) { + break ; + kk = ind[k]; + node = &vlist[kk]; + if (node -> isroot) continue ; + if (node->distance == HUGEDIS) + fprintf (fff, "##"); + y = node -> time ; + if (y<0) y=-1 ; + fprintf (fff, "vertex %12s %9.0f\n", node->name, y) ; + } + //2 dump label + for (k = 0; k < numvertex; ++k) { + kk = ind[k]; + node = &vlist[kk]; + if (node->isdead) + continue; + if (node->label == NULL) + continue; + fprintf (fff, "label %12s", node->name); + fprintf (fff, " %20s", node->label); + t = node->popsize; + if ((totpop > 0) || (t > 0)) { + fprintf (fff, " %5d", t); + } + fprintf (fff, "\n"); + } +// dump ledge, redge + for (k = 0; k < numvertex; ++k) { + kk = ind[k]; + node = &vlist[kk]; + if (node->isdead) + continue; + edge = (EDGE *) node->eleft; + + if (edge != NULL) { + xnode = (NODE *) node->left; + fprintf (fff, "edge %12s", edge->name); + fprintf (fff, " %12s", node->name); + fprintf (fff, " %12s", xnode->name); + fprintf (fff, sform, edge->val); + if (edge -> theta >= 0) { + fprintf (fff, sformx, edge->theta); + } + fprintf (fff, "\n"); + } + edge = (EDGE *) node->eright; + + if (edge != NULL) { + edge->val = MAX (edge->val, 0.0); + xnode = (NODE *) node->right; + fprintf (fff, "edge %12s", edge->name); + fprintf (fff, " %12s", node->name); + fprintf (fff, " %12s", xnode->name); + fprintf (fff, sform, edge->val); + if (edge -> theta > 0) { + fprintf (fff, sformx, edge->theta); + } + fprintf (fff, "\n"); + } + } + for (k = 0; k < numvertex; ++k) { + kk = ind[k]; + node = &vlist[kk]; + if (node->isdead) + continue; + t = node->numwind; + if (t == 0) + continue; + fprintf (fff, "admix %12s ", node->name); + for (j = 0; j < t; ++j) { + vind = node->windex[j]; + xnode = &vlist[vind]; + fprintf (fff, "%10s ", xnode->name); + } + for (j = 0; j < t; ++j) { + fprintf (fff, sform, node->wmix[j]); + } + fprintf (fff, "\n"); + } + if (graphname != NULL) + fclose (fff); + free (dd); + free (ind); +} void dumpgraph (char *graphname) { @@ -1779,6 +1931,11 @@ dumpgraph (char *graphname) char sss[100] ; double y ; + if (outformat == 2) { + dumpgraphnew(graphname) ; + return ; + } + if (hires) { strcpy (sform, " %12.6f"); strcpy (sformx, ":%.6f") ; @@ -2793,6 +2950,7 @@ int calcscript(char **string) int na[10], nb[10], isleaf, ispub ; NODE *node, *son, *daughter ; char sss[MAXSTR] ; + int pubroot = NO ; nv = numvertex ; ZALLOC(vtable, nv, int) ; @@ -2836,6 +2994,7 @@ int calcscript(char **string) sprintf(string[n], "root: %s", node -> name) ; ++n ; vtable[kroot] = -3 ; + pubroot = YES ; } for (k=0; k #include #include +#include #include #include @@ -24,7 +25,7 @@ // (YRI, CEU, Papua, .... ) -#define WVERSION "651" +#define WVERSION "700" // gsimplify option added // calctime added @@ -103,6 +104,7 @@ void dump1 (FILE * dumpfile, double *ww, int n); void loadpars (char *loadname, double *www, int nwts, double *xxans, int nedge); void read1 (FILE * loadfile, double *ww, int n); +int usage (char *prog, int exval); int main (int argc, char **argv) @@ -241,12 +243,22 @@ readcommands (int argc, char **argv) phandle *ph; char str[5000]; char *tempname; - int n; + int n, t; - while ((i = getopt (argc, argv, "p:r:g:o:d:x:s:hvVt")) != -1) { + if (argc == 1) { usage(basename(argv[0]), 1); } + while ((i = getopt (argc, argv, "p:r:g:o:d:x:s:HhvVtf")) != -1) { switch (i) { + case 'h': + usage(basename(argv[0]), 0); + break ; + + case 'f': + setoutformat(2) ; + break; + + case 'p': parname = strdup (optarg); break; @@ -275,16 +287,10 @@ readcommands (int argc, char **argv) delpop = strdup (optarg); break; - case 'h': + case 'H': calchash = YES; break; -/** - case 's': - gsimp = YES; - break; -*/ - case 'v': printf ("version: %s\n", WVERSION); break; @@ -323,7 +329,31 @@ readcommands (int argc, char **argv) getstring (ph, "edgename:", &edgename); getdbl (ph, "valbreak:", &valbreak); getint(ph, "calctime:", &calctime) ; + t = 1 ; + getint(ph, "outformat:", &t) ; + setoutformat(t) ; writepars (ph); } + +int usage (char *prog, int exval) +{ + + (void)fprintf(stderr, "Usage: %s [options] \n", prog); + (void)fprintf(stderr, " -h ... Print this message and exit.\n"); + (void)fprintf(stderr, " -p ... use parameters from .\n"); + (void)fprintf(stderr, " -r ... use as root name.\n"); + (void)fprintf(stderr, " -g ... use as graph name.\n"); + (void)fprintf(stderr, " -o ... use as out graph name.\n"); + (void)fprintf(stderr, " -d ... use as dot graph name.\n"); + (void)fprintf(stderr, " -s ... use as script name.\n"); + (void)fprintf(stderr, " -x ... delete population .\n"); + (void)fprintf(stderr, " -H ... toggle hash calculation ON.\n"); + (void)fprintf(stderr, " -v ... print version and exit.\n"); + (void)fprintf(stderr, " -V ... toggle verbose mode ON.\n"); + (void)fprintf(stderr, " -f ... new output format (edge not ledge etc.\n") ; + + exit(exval); +}; + diff --git a/src/qpsubs.c b/src/qpsubs.c index 29e2c47..18fe073 100644 --- a/src/qpsubs.c +++ b/src/qpsubs.c @@ -1,4 +1,6 @@ #include "qpsubs.h" +#include "mcio.h" + extern int fancynorm, verbose, plotmode, outnum; extern FILE *fstdetails; @@ -23,6 +25,10 @@ static char **aalist; static int aanum = -1; void loadaa (SNP * cupt, int *xindex, int *xtypes, int nrows, int numeg); void destroyaa (); +static int fancyf4 = YES ; + +static int *xblock, *xbsize ; +static int xnblock ; void printsc (int tpat[3][4], double tscore[3], char **eglist, double ymin) @@ -72,6 +78,12 @@ setallsnpsmode (int mode) allsnpsmode = mode; } +void +setfancyf4 (int mode) +{ + fancyf4 = mode; +} + void settsc (int tpat[3][4], double tscore[3], int rpat[3][4], double rscore[3]) /// process rscore and return scores in tscore with tscore[0] best @@ -1530,6 +1542,88 @@ dohzg (double *top, double *bot, SNP ** xsnplist, int *xindex, int *xtypes, } + +void +setblocksf (int *block, int *bsize, int *nblock, SNP ** snpm, int numsnps, + double blocklen, char *fname) +// block, bsize are first element and block length +// must have been allocated if not NULL +{ + int n = 0, i, t; + int chrom, xsize, lchrom, olds, numbl; + double fpos, dis, gpos; + SNP *cupt; + + + if (fname==NULL) { + setblocks (block, bsize, nblock, snpm, numsnps, blocklen) ; + return ; + } + + getblocks (fname, snpm, numsnps) ; + + numbl = 0 ; + for (i = 0; i < numsnps; i++) { + cupt = snpm[i] ; + numbl = MAX(numbl, cupt -> tagnumber) ; + } + + ivzero(bsize, numbl+1) ; + ivclear(block, numsnps + 9999, numbl+1) ; + + fpos = -1.0e20; + for (i = 0; i < numsnps; i++) { + cupt = snpm[i] ; + if (cupt->ignore) continue; + if (cupt->isfake) continue; + t = cupt -> tagnumber ; + if (t<0) continue ; + ++bsize[t] ; + block[t] = MIN(block[t], i) ; + } + + *nblock = numbl; + printf("blockname: %s numblocks: %d\n", fname, numbl) ; + return; +} + +int +setblocksz (int **pblock, int **pbsize, SNP ** snpm, int numsnps, + double blocklen, char *fname) +// block, bsize are first element and block length +{ + + int n ; + int *tblock, *tbsize, tnblock ; + + ZALLOC(tblock, numsnps+20, int) ; + ZALLOC(tbsize, numsnps+20, int) ; + + + ivclear(tblock, -1, numsnps) ; + + if (fname == NULL) setblocks(tblock, tbsize, &tnblock, snpm, numsnps, blocklen) ; + else setblocksf(tblock, tbsize, &tnblock, snpm, numsnps, blocklen, fname) ; + + n = tnblock + 10; + + ZALLOC(xblock, n, int) ; + ZALLOC(xbsize, n, int) ; + + copyiarr(tblock, xblock, n) ; + copyiarr(tbsize, xbsize, n) ; + free(tblock) ; + free(tbsize) ; + + *pblock = xblock ; + *pbsize = xbsize ; + + return tnblock ; + +} + + + void setblocks (int *block, int *bsize, int *nblock, SNP ** snpm, int numsnps, double blocklen) @@ -1549,10 +1643,8 @@ setblocks (int *block, int *bsize, int *nblock, SNP ** snpm, int numsnps, for (i = 0; i < numsnps; i++) { cupt = snpm[i]; cupt->tagnumber = -1; - if (cupt->ignore) - continue; - if (cupt->isfake) - continue; + if (cupt->ignore) continue; + if (cupt->isfake) continue; chrom = cupt->chrom; gpos = cupt->genpos; dis = gpos - fpos; @@ -1569,6 +1661,12 @@ setblocks (int *block, int *bsize, int *nblock, SNP ** snpm, int numsnps, olds = i; xsize = 0; } + +/** + if (i<10000) { + printf("zzt %d %d %d \n", i, cupt -> tagnumber, n) ; + } +*/ cupt->tagnumber = n; ++xsize; } @@ -4200,6 +4298,98 @@ oldf3yyx (double *estmat, SNP * cupt, free2Dint (&ccc, nrows); free2Dint (&ccx, numeg + 1); return kret; +} + +double +hfix (int *x) +{ +// correction factor counts in x +double ya, yb, yt, h; +ya = (double) x[0]; +yb = (double) x[1]; +yt = ya + yb; +if (yt <= 1.5) +fatalx ("(hfix)\n"); +h = ya * yb / (yt * (yt - 1.0)); +return h / yt; +} + + + +int +getf4 (int **xx, int *indx, double *ans) +{ + +int a, i; +double ya, yb, y0, y1, ytot, ff[4]; +double h0, h1; +int isok, f4mode ; +// f4mode == NO => f2, or f3 + +*ans = 0.0; +if (indx == NULL) { +*ans = 1.0; +return 2; +} + +isok = f4mode = YES ; + + if (indx[0] == indx[2]) f4mode = NO ; + if (indx[0] == indx[3]) f4mode = NO ; + if (indx[1] == indx[2]) f4mode = NO ; + if (indx[1] == indx[3]) f4mode = NO ; + +for (i = 0; i < 4; ++i) { + ff[i] = -100*(i+1) ; // silly value ; + a = indx[i]; + if (a < 0) { + *ans = 1.0; + return 2; + } + + + *ans = 0 ; + y0 = (double) xx[a][0]; + y1 = (double) xx[a][1]; + ytot = y0 + y1; + if (ytot <= 0.01) { + isok = NO ; + continue ; + } + ff[i] = y0 / ytot; +} + +if ((isok == NO) && (f4mode == NO)) return -1 ; +if ((isok == NO) && (fancyf4 == NO)) return -1 ; + +ya = fabs(ff[0]-ff[1]) ; +yb = fabs(ff[2]-ff[3]) ; + +if (f4mode && (MIN(ya, yb) < .00001)) { + return 1 ; +} +/** + Note that if pop1 is missing and ff[2]=ff[3] then f4 is zero +*/ + +if (isok == NO) return -1 ; + +*ans = (ff[0] - ff[1]) * (ff[2] - ff[3]); +if (f4mode == YES) return 1 ; + +a = indx[0]; +h0 = hfix (xx[a]); +a = indx[1]; +h1 = hfix (xx[a]); +if (indx[0] == indx[2]) +*ans -= h0; +if (indx[0] == indx[3]) +*ans += h0; +if (indx[1] == indx[3]) +*ans -= h1; +if (indx[1] == indx[2]) +*ans += h1; +return 1; } diff --git a/src/qpsubs.h b/src/qpsubs.h index 291005a..0664e45 100644 --- a/src/qpsubs.h +++ b/src/qpsubs.h @@ -37,6 +37,7 @@ void getrawcol(int *rawcol, SNP *cupt, int *xindex, int nrows) ; void getrawcolx(int **ccc, SNP *cupt, int *xindex, int nrows, Indiv **indm) ; void getmixstr(int k, char *sss) ; void setpopsizes(int *sizes, char **eglist, int numeg) ; +void setoutformat(int outformat) ; void putewts(double *ewts) ; void getewts(double *ewts) ; void getezero(int *zpat) ; @@ -111,7 +112,9 @@ void gethscore(double *hscore, double *scores, int a, int b, int c, int d, int numeg) ; double qhdiff(double *scores, int a, int b, int c, int d, int numeg) ; +void setblocksf(int *block, int *bsize, int *nblock, SNP **snpm, int numsnps, double blocklen, char *fname) ; void setblocks(int *block, int *bsize, int *nblock, SNP **snpm, int numsnps, double blocklen) ; +int setblocksz (int **pblock, int **pbsize, SNP ** snpm, int numsnps, double blocklen, char *fname) ; int numblocks(SNP **snpm, int numsnps, double blocklen) ; void setmgpos(SNP **snpm, int numsnps, double *maxgdis) ; void setgfromp(SNP **snpm, int numsnps) ; @@ -263,6 +266,11 @@ void superreest(double *s2, int *xvlist, int nxvlist, int **xelist, int nxelist, int **admixv, int *admixedge, int nxalist) ; void setadmfix(char *fixname) ; + +void setfancyf4 (int mode) ; +double hfix(int *aa) ; +int getf4 (int **xx, int *indx, double *ans) ; + void fstcolinb(double *estn, double *estd, SNP *cupt, int *xindex, int *xtypes, int nrows, int numeg) ; double doinbreed(double *inb, double *inbest, double *inbsig, SNP **xsnplist, int *xindex, int *xtypes,