Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace ukb_common with ukbb_common #59

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion assign_phecodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import hail as hl
from hail.utils import hadoop_open
from ukbb_pan_ancestry import *
from ukb_common import *
from ukbb_common import *


def checkpoint_tmp(hail_obj, tmppath='gs://ukbb-diverse-temp-30day/', tmpname=None, overwrite=True):
Expand Down
2 changes: 1 addition & 1 deletion get_timings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
logging.basicConfig(format="%(levelname)s (%(name)s %(lineno)s): %(message)s", level='INFO', filename='saige_pipeline.log')

from ukb_common import *
from ukbb_common import *
from ukbb_pan_ancestry import *

logger = logging.getLogger("saige_pan_ancestry")
Expand Down
2 changes: 1 addition & 1 deletion get_timings_null_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
logging.basicConfig(format="%(levelname)s (%(name)s %(lineno)s): %(message)s", level='INFO', filename='saige_pipeline.log')

from ukb_common import *
from ukbb_common import *

from ukbb_pan_ancestry import *

Expand Down
2 changes: 1 addition & 1 deletion heritability/import_heritability.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__author__ = 'Rahul Gupta'

import hail as hl
from ukb_common.resources.generic import PHENO_KEY_FIELDS
from ukbb_common.resources.generic import PHENO_KEY_FIELDS


def get_h2_flat_file():
Expand Down
2 changes: 1 addition & 1 deletion load_timings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
logging.basicConfig(format="%(levelname)s (%(name)s %(lineno)s): %(message)s", level='INFO', filename='saige_pipeline.log')

from ukb_common import *
from ukbb_common import *
from ukbb_pan_ancestry import *

logger = logging.getLogger("saige_pan_ancestry")
Expand Down
12 changes: 6 additions & 6 deletions plink_clump_hail.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
import argparse
import hail as hl
import sys
import ukb_common
import ukbb_common
from ukbb_pan_ancestry import POPS, bucket, get_clumping_results_path, get_meta_analysis_results_path #get_pheno_manifest_path
#from ukb_common import mwzj_hts_by_tree
#from ukbb_common import mwzj_hts_by_tree

ldprune_dir = f'{bucket}/ld_prune'

Expand Down Expand Up @@ -72,7 +72,7 @@ def tsv_to_ht(args):
pos=ht.pos,
reference_genome='GRCh37'),
alleles = ht.varid.split(':')[2:4])
ht = ht.annotate_globals(**{k: getattr(args, k) for k in ukb_common.PHENO_KEY_FIELDS})
ht = ht.annotate_globals(**{k: getattr(args, k) for k in ukbb_common.PHENO_KEY_FIELDS})
ht = ht.drop('contig','varid','pos')
ht.describe()
# ht.select().show()
Expand Down Expand Up @@ -142,7 +142,7 @@ def mwzj_hts_by_tree(all_hts, temp_dir, globals_for_col_key,
debug=False, inner_mode = 'overwrite', repartition_final: int = None,
read_if_exists = False):
r'''
Adapted from ukb_common mwzj_hts_by_tree()
Adapted from ukbb_common mwzj_hts_by_tree()
Uses read_clump_ht() instead of read_table()
'''
chunk_size = int(len(all_hts) ** 0.5) + 1
Expand All @@ -153,7 +153,7 @@ def mwzj_hts_by_tree(all_hts, temp_dir, globals_for_col_key,
checkpoint_kwargs = {inner_mode: not read_if_exists,
'_read_if_exists': read_if_exists} #
if repartition_final is not None:
intervals = ukb_common.get_n_even_intervals(repartition_final)
intervals = ukbb_common.get_n_even_intervals(repartition_final)
checkpoint_kwargs['_intervals'] = intervals

if debug: print(f'Running chunk size {chunk_size}...')
Expand Down Expand Up @@ -231,7 +231,7 @@ def join_clump_hts(pop, not_pop, max_pops, high_quality=False, overwrite=False):
temp_dir = ('gs://ukbb-diverse-temp-30day/nb-temp/'+
'max_pops' if max_pops else f'{"not_" if not_pop else ""}{pop}'+
f'{"-hq" if high_quality else ""}')
globals_for_col_key = ukb_common.PHENO_KEY_FIELDS
globals_for_col_key = ukbb_common.PHENO_KEY_FIELDS
mt = mwzj_hts_by_tree(all_hts=all_hts,
temp_dir=temp_dir,
globals_for_col_key=globals_for_col_key)
Expand Down
2 changes: 1 addition & 1 deletion pre_process_saige_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import argparse
from gnomad.utils.vep import *
from gnomad.utils.filtering import *
from ukb_common import *
from ukbb_common import *
from ukbb_pan_ancestry import *


Expand Down
2 changes: 1 addition & 1 deletion resources/results.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .generic import *
from ukb_common.resources.results import *
from ukbb_common.resources.results import *


def get_gene_intervals_path(reference: str = 'GRCh37'):
Expand Down
4 changes: 2 additions & 2 deletions saige_pan_ancestry.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
logging.basicConfig(format="%(levelname)s (%(name)s %(lineno)s): %(message)s", level='INFO', filename='saige_pipeline.log')

from gnomad.utils import slack
from ukb_common import *
from ukbb_common import *
import time
import re

from ukbb_pan_ancestry import *
from ukb_common.utils.saige_pipeline import *
from ukbb_common.utils.saige_pipeline import *

logger = logging.getLogger("saige_pan_ancestry")
logger.addHandler(logging.StreamHandler(sys.stderr))
Expand Down
2 changes: 1 addition & 1 deletion summary_statistics_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pprint import pprint

from gnomad.utils import slack
from ukb_common import *
from ukbb_common import *
from ukbb_pan_ancestry import *

def remove_phenos_from_analysis(mt: hl.MatrixTable):
Expand Down
2 changes: 1 addition & 1 deletion utils/phenotype_loading.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ukb_common import *
from ukbb_common import *
from ukbb_pan_ancestry.resources import *


Expand Down
2 changes: 1 addition & 1 deletion utils/results_loading.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from ukb_common import *
from ukbb_common import *
from ukbb_pan_ancestry.resources import *


Expand Down
2 changes: 1 addition & 1 deletion website/docs/batch-pipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ This file can then be loaded using:
python load_phenotype_data.py --add_dataset TRAITTYPE_NAME
```

If you do not have the google cloud connector installed, you can do this as a submission (`hailctl dataproc submit load_phenotype_data.py --add_dataset TRAITTYPE_NAME`); note that in either case, you will need to have the `ukb_common` and `ukbb_pan_ancestry` packages on your path. This script will make a backup copy of the existing phenotype MatrixTable and overwrite the master file with a new one including the new phenotype(s).
If you do not have the google cloud connector installed, you can do this as a submission (`hailctl dataproc submit load_phenotype_data.py --add_dataset TRAITTYPE_NAME`); note that in either case, you will need to have the `ukbb_common` and `ukbb_pan_ancestry` packages on your path. This script will make a backup copy of the existing phenotype MatrixTable and overwrite the master file with a new one including the new phenotype(s).

#### Step 2 - Run tests

Expand Down
4 changes: 2 additions & 2 deletions website/docs/hail-format.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ gsutil -u your_project_id ls gs://ukb-diverse-pops-public/sumstats_release

## Using the libraries and files

The files on Google Cloud Platform can be accessed by cloning the [ukbb_pan_ancestry](https://github.com/atgu/ukbb_pan_ancestry) and the [ukb_common](https://github.com/Nealelab/ukb_common) repos and accessing them programmatically. We recommend using these functions, as they apply our QC metrics (e.g. the raw file contains 7,271 phenotypes, but use of this function will return 7,221 phenotypes after removing low-quality ones) and include convenience metrics such as lambda GC.
The files on Google Cloud Platform can be accessed by cloning the [ukbb_pan_ancestry](https://github.com/atgu/ukbb_pan_ancestry) and the [ukbb_common](https://github.com/Nealelab/ukbb_common) repos and accessing them programmatically. We recommend using these functions, as they apply our QC metrics (e.g. the raw file contains 7,271 phenotypes, but use of this function will return 7,221 phenotypes after removing low-quality ones) and include convenience metrics such as lambda GC.

```
%%bash
git clone https://github.com/atgu/ukbb_pan_ancestry
git clone https://github.com/Nealelab/ukb_common
git clone https://github.com/Nealelab/ukbb_common
```


Expand Down