diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 0eb5fe4..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2014-2015 Dana Farber Cancer Institute - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/README.md b/README.md deleted file mode 100644 index 9b32a3a..0000000 --- a/README.md +++ /dev/null @@ -1,7 +0,0 @@ -BreaKmer -======== - -A method to identify genomic structural variation in target regions/genes from reference-aligned high-throughput sequence data. It uses a “kmer” strategy to assemble misaligned sequence reads for predicting insertions, deletions, inversions, tandem duplications, and translocations at base-pair resolution. - -Documentation: -https://github.com/ccgd-profile/BreaKmer diff --git a/VERSION b/VERSION deleted file mode 100644 index a3dce6c..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -v0.0.2 diff --git a/breakmer.cfg b/breakmer.cfg deleted file mode 100644 index b681ce4..0000000 --- a/breakmer.cfg +++ /dev/null @@ -1,20 +0,0 @@ -analysis_name= -targets_bed_file= -sample_bam_file= -analysis_dir= -reference_data_dir= -cutadapt_config_file= -cutadapt= -jellyfish= -blat= -blat_port= -gfclient= -gfserver= -fatotwobit= -reference_fasta= -gene_annotation_file= -kmer_size= -other_regions_file= -repeat_mask_file= -alternate_fastas= -normal_bam_file= diff --git a/breakmer/__init__.py b/breakmer/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/breakmer/annotation/__init__.py b/breakmer/annotation/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/breakmer/annotation/sv_annotation.py b/breakmer/annotation/sv_annotation.py deleted file mode 100644 index f56dadb..0000000 --- a/breakmer/annotation/sv_annotation.py +++ /dev/null @@ -1,224 +0,0 @@ -#! /usr/bin/local/python -# -*- coding: utf-8 -*- - -import os -import shutil -import subprocess -import breakmer.utils as utils - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -class Exon: - def __init__(self, values): - self.chr = '' - self.start = '' - self.stop = '' - self.featureType = '' - self.set_values(values) - - def set_values(self, values): - """ """ - self.chr, self.src, featureType, self.start, self.stop, fill, self.strand, fill2, meta = values - self.start = int(self.start) - self.stop = int(self.stop) - self.featureType = featureType - - -class Transcript: - def __init__(self, values): - self.chr = '' - self.src = '' - self.start = '' - self.stop = '' - self.strand = '' - self.id = '' - self.geneName = '' - self.geneId = '' - self.geneStatus = '' - self.len = 0 - self.exons = [] - self.set_values(values) - - def set_values(self, values): - """ """ - self.chr, self.src, featureType, self.start, self.stop, fill, self.strand, fill2, meta, dist = values - self.start = int(self.start) - self.stop = int(self.stop) - meta = meta.split(';') - self.id = meta[1].split(' ')[2].lstrip('"').rstrip('"') - self.geneId = meta[0].split(' ')[1].lstrip('"').rstrip('"') - self.geneName = meta[4].split(' ')[2].lstrip('"').rstrip('"') - self.geneStatus = meta[3].split(' ')[2].lstrip('"').rstrip('"') - self.len = int(self.stop) - int(self.start) - - def get_exons(self, annotationFn, tmpFilePath): - """ """ - # Grep the exons and UTRs from the annotationFn - exonSelect = '$3 == "exon"' #' || $3 == "UTR")' - outFn = os.path.join(tmpFilePath, self.id + '.exons') - cmd = 'cat ' + annotationFn + " | awk '" + exonSelect + "' | grep '" + self.id + "' > " + os.path.join(tmpFilePath, self.id + '.exons') - os.system(cmd) - # ' 'bedtools multicov -bams ' + args.bam + ' -bed ' + args.intervals - for line in open(outFn, 'r'): - self.exons.append(Exon(line.strip().split('\t'))) - os.remove(outFn) - - -def annotate_event(svEventResult, contigMeta): - """ """ - if svEventResult.is_filtered(): - svEventResult.annotated = False - else: - svEventResult.annotated = True - # Make sure annotation file is sorted for bedtools use. - bedtools = contigMeta.params.get_param('bedtools') - annotationFn = contigMeta.params.get_param('gene_annotation_file') - brkptBedFn = os.path.join(contigMeta.path, contigMeta.id + '_breakpoints.bed') - - # Dictionary with 'targets' and 'other' breakpoint lists - # Deletions have two breakpoints in reference. - # Insertions have one breakpoint in reference. - # Rearrangements have breakpoints for each segment that is rearranged. - # genomicBrkpts = svEventResult.get_genomic_brkpts() - bpMap = write_brkpt_bed_file(brkptBedFn, svEventResult.blatResults) - # print 'sv_annotation.py bpMap', bpMap - outputFiles = run_bedtools(bedtools, annotationFn, brkptBedFn, contigMeta.path) - trxMap = parse_bedtools_output(outputFiles) - store_annotations(svEventResult, bpMap, trxMap, annotationFn, contigMeta.params, contigMeta.path) - # Remove temporary bedtools output files. - # print 'annotate_event, sv_annotation.py', svEventResult - # svEventResult.set_annotations() - # print 'svEvent annotated', svEventResult.annotated - - -def store_annotations(svEventResult, bpMap, trxMap, annotationFn, params, tmpFilePath): - for bpKey in bpMap: - blatResult, svBrkptIdx, coordIdx = bpMap[bpKey] - # print 'sv_annotation store_annotations', bpKey, bpMap[bpKey] - if bpKey not in trxMap: - print 'Missing a breakpoint annotation', bpKey - svEventResult.set_failed_annotation() - svEventResult.set_filtered('Breakpoints are not fully annotated. Typically due to non-primary chromosome.') - else: - svBreakpoint = blatResult.get_sv_brkpts()[svBrkptIdx] - trxMappings = trxMap[bpKey] - intersect = trxMap[bpKey]['intersect'] - upstream = trxMap[bpKey]['upstream'] - downstream = trxMap[bpKey]['downstream'] - # print 'Intersect', intersect - # print 'Downstream', downstream - # print 'Upstream', upstream - if intersect is not None: - trx, dist = intersect - if params.get_param('generate_image') or True: - trx.get_exons(annotationFn, tmpFilePath) - # print blatResult, blatResult.get_sv_brkpts() - blatResult.get_sv_brkpts()[svBrkptIdx].store_annotation([trx], [dist], coordIdx) - else: - upTrx = None - upDist = None - downTrx = None - downDist = None - if upstream is not None: - upTrx, upDist = upstream - if downstream is not None: - downTrx, downDist = downstream - # print 'Up', upTrx.id, upDist - # print 'Down', downTrx.id, downDist - if params.get_param('generate_image') or True: - if upTrx is not None: - upTrx.get_exons(annotationFn, tmpFilePath) - if downTrx is not None: - downTrx.get_exons(annotationFn, tmpFilePath) - blatResult.get_sv_brkpts()[svBrkptIdx].store_annotation([upTrx, downTrx], [upDist, downDist], coordIdx) - - -def write_brkpt_bed_file(bpBedFn, blatResults): - """ """ - bpMap = {} - bpBedFile = open(bpBedFn, 'w') - bpIter = 1 - for queryStartCoord, blatResult in blatResults: - svBreakpoints = blatResult.get_sv_brkpts() - svBrkptIdx = 0 - for svBreakpoint in svBreakpoints: - chrom = svBreakpoint.chrom - brkptCoords = svBreakpoint.genomicCoords - # print 'write_brkpt_bed_file', chrom, brkptCoords, svBreakpoint.svType - # brkptKey = 'BP' + str(bpIter) + '|' + chrom + ':' + '-'.join([str(x) for x in brkptCoords]) - coordIdx = 0 - for coord in brkptCoords: - bpKey = chrom + ':' + str(coord) + '_BP' + str(bpIter) + '_' + str(svBrkptIdx) - # print 'write_brkpt_bed_file', bpKey - bpStr = [chrom, coord, int(coord) + 1, bpKey] - bpBedFile.write('\t'.join([str(x) for x in bpStr]) + '\n') - bpMap[bpKey] = (blatResult, svBrkptIdx, coordIdx) - coordIdx += 1 - svBrkptIdx += 1 - bpIter += 1 - bpBedFile.close() - cmd = 'sort -k1,1 -k2,2n %s > %s' % (bpBedFn, bpBedFn + '.sorted') - os.system(cmd) - shutil.move(bpBedFn + '.sorted', bpBedFn) - return bpMap - - -def run_bedtools(bedtools, annotationFn, brkptBedFn, tmpFilePath): - """ """ - - # Identify the transcripts first - trxSelect = '$3 == "transcript"' - knownGeneSelect = 'gene_status "KNOWN"' - - outputFiles = {'intersect': os.path.join(tmpFilePath, 'bedtools.intersect.out'), - 'upstream': os.path.join(tmpFilePath, 'bedtools.upstream.out'), - 'downstream': os.path.join(tmpFilePath, 'bedtools.downstream.out')} - # Intersecting transcripts - cmd = 'cat ' + annotationFn + " | awk '" + trxSelect + "' | grep '" + knownGeneSelect + "' | " + bedtools + ' intersect -wo -a %s -b - > %s' % (brkptBedFn, outputFiles['intersect']) - os.system(cmd) - # Upstream transcripts - cmd = 'cat ' + annotationFn + " | awk '" + trxSelect + "' | grep '" + knownGeneSelect + "' | " + bedtools + ' closest -D a -id -a %s -b - > %s' % (brkptBedFn, outputFiles['upstream']) - os.system(cmd) - # Downstream transcripts - cmd = 'cat ' + annotationFn + " | awk '" + trxSelect + "' | grep '" + knownGeneSelect + "' | " + bedtools + ' closest -D a -iu -a %s -b - > %s' % (brkptBedFn, outputFiles['downstream']) - os.system(cmd) - return outputFiles - - -def parse_bedtools_file(fn, fileKey, trxMap): - for line in open(fn, 'r'): - line = line.strip() - linesplit = line.split('\t') - bpChrom, bpStart, bpEnd, bpKey = linesplit[0:4] - - # No value found for this breakpoint. This could be due to the chromsome not existing in the - # annotation file. - if linesplit[4] == '.': - return - - if bpKey not in trxMap: - trxMap[bpKey] = {'intersect': None, 'upstream': None, 'downstream': None} - trx = Transcript(linesplit[4:]) - dist = int(linesplit[-1]) - checkStorage = ((fileKey != 'intersect') and (trxMap[bpKey]['intersect'] is None)) or (fileKey == 'intersect') - if checkStorage: - if trxMap[bpKey][fileKey] is None: - trxMap[bpKey][fileKey] = [trx, dist] - else: - # Check if trx is longer (i.e. canonical) vs. current stored - if trx.len > trxMap[bpKey][fileKey][0].len: - trxMap[bpKey][fileKey] = [trx, dist] - - -def parse_bedtools_output(outputFileDict): - """ """ - trxMap = {} - # Map each bp to a transcript (or two) if it is intergenic. - parse_bedtools_file(outputFileDict['intersect'], 'intersect', trxMap) - parse_bedtools_file(outputFileDict['upstream'], 'upstream', trxMap) - parse_bedtools_file(outputFileDict['downstream'], 'downstream', trxMap) - return trxMap diff --git a/breakmer/assembly/__init__.py b/breakmer/assembly/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/breakmer/assembly/assembler.py b/breakmer/assembly/assembler.py deleted file mode 100644 index 5920260..0000000 --- a/breakmer/assembly/assembler.py +++ /dev/null @@ -1,271 +0,0 @@ -#! /usr/bin/local/python -# -*- coding: utf-8 -*- - -import re -import logging -from collections import OrderedDict -import breakmer.assembly.contig as contig_assembler -import breakmer.assembly.utils as assemblyUtils - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -def init_assembly(kmers, fqRecs, kmerLen, rcThresh, readLen): - """Entry function for assemblying a contiguous sequence from - a pool of sample only kmers and the reads that contain them. - A kmer tracker object is instantiated containing all the kmer seqs and - their associated counts. These are sorted by - Args: - kmers: Dictionary of kmers only in the sample key = kmer, value = count in reads - fqRecs: Dictionary with sequence values as keys and a list of fq_read objects. - kmerLen: Integer of kmer size. - rcThresh: Integer representing the minimum readcount threshold for keeping a contig. - readLen: Integer of the read length. - Return: - contigs: List of contig objects. - """ - logger = logging.getLogger('breakmer.assembly.assembler') - contigs = [] - - # Return if no kmers to analyze. - if len(kmers) == 0: - logger.info('No kmers to built contigs, returning.') - return contigs - - # Store kmers in KmerTracker object. - kmerTracker = KmerTracker() - for kmer in kmers: - kmerTracker.add_kmer(kmer, kmers[kmer]) - - # While there are kmers to analyze continue to build contigs. - contigBuffer = ContigBuffer() - # Sort all the kmers by count and store in order. - kmerTracker.set_all_kmer_values() - # Check if there are any kmers left to seed the build process. - while kmerTracker.has_mers(): - # Update the set of kmers to consider for building. - kmerTracker.update_kmer_set() - # Get kmer seed for new contig. - kmer, kmer_count = kmerTracker.get_kmer() - # Only analyze contigs that exist in 2 or more reads. - if kmer_count < 2: - continue - logger.info('Initiating kmer %s, found in %d reads' % (kmer, kmer_count)) - setup_contigs(kmer, fqRecs, kmerLen, kmerTracker, contigBuffer) - - # Deal with buffered contig objects that need to be grown or completed. - while len(contigBuffer.contigs) > 0: - contig = contigBuffer.get_contig() - contig.grow(fqRecs, kmerTracker, kmerLen, contigBuffer) - if contig.check_invalid(rcThresh, readLen): - logger.info('Contig did not meet the read count threshold %d, with %d or contig length (%d) < readLen (%d)' % (rcThresh, len(contig.reads), len(contig.seq), readLen)) - else: - logger.info('Adding contig to buffer') - contigs.append(contig) - - # Clean up the data to free up memory. - contigBuffer.remove_kmers(kmerTracker) - contigBuffer.remove_reads(fqRecs) - return contigs - - -def setup_contigs(kmerSeq, fqRecs, kmerLen, kmerTracker, contigBuffer): - """Create a contig instance starting with a seed kmer and associated reads. - First find the reads containing the kmerSeq value, iterate through reads and - either create a new contig or add to existing contig. - Args: - kmerSeq: String of kmer sequence. - fqRecs: Dictionary with sequence values as keys and a list of fq_read objects. - kmerLen: Integer of kmer size. - kmerTracker: KmerTracker object that contains all the kmer values. - contigBuffer: ContigBuffer object to track the buffered contig objects. - Return: None - """ - logger = logging.getLogger('breakmer.assembly.assembler') - contig = None - # Find all reads with kmer sequence passed in. - # kmerReads contains a list of tuples. - # 1. fq_read object defined in breakmer.utils.py - # 2. Starting position of the kmer match in the read sequence - # 3. Boolean that a match was found. - # 4. Length of the read sequence. - # 5. Number of reads with this sequence. - kmerReads = assemblyUtils.find_reads(kmerSeq, fqRecs.items(), set()) - contigBuffer.add_used_mer(kmerSeq) - kmerObj = assemblyUtils.Kmer(kmerSeq, kmerTracker.get_count(kmerSeq), kmerTracker.kmerSeqs, kmerLen) - for readVals in kmerReads: - read, kmerPos, matchFound, seqLen, nReadsWithSeq = readVals - readAlignValues = {'read': read, - 'align_pos': kmerPos, - 'nreads': nReadsWithSeq} - contigBuffer.add_used_read(read.id) - # If no contig, build one. - if not contig: - contig = contig_assembler.Contig(kmerObj, readAlignValues) - contigBuffer.add_contig(read, contig) - # Check if read should be added to the existing contig. - else: - contig.check_read(kmerObj, readAlignValues, 'setup') - if contig: - contig.finalize(fqRecs, kmerTracker, contigBuffer, 'setup') - - -class ContigBuffer: - """A class to track the used kmers and reads and their relation to contigs. - Attributes: - used_kmers: Set of kmer sequences that have been used to build contigs. - used_reads: Set of read IDs that have been used to build contigs. - contigs: OrderedDict to track reads and the contigs they contribute to. - """ - def __init__(self): - self.used_kmers = set() - self.used_reads = set() - self.contigs = OrderedDict() - - def add_contig(self, read, contig): - """Add read to contigs dict with contig object it is connected to. - Set key to read ID and value to the contig object. Set the read used to True. - Args: - read: fq_read object - contig: Contig object. - Return: - None - """ - # Tie a contig to the seed read ID and store in dictionary. - if read.id not in self.contigs and not read.used: - self.contigs[read.id] = contig - read.used = True - - def remove_contig(self, read_id): - """Remove read ID from contigs dictionary. - Args: - read_id: String of read ID. - Return: None - """ - if read_id in self.contigs: - del self.contigs[read_id] - - def get_contig(self): - """Return the contig associated with the first record the contigs dictionary. - Delete the entry. - Args: None - Return: - Contig object to grow or complete. - """ - read_id = self.contigs.keys()[0] - contig = self.contigs[read_id] - del self.contigs[read_id] - return contig - - def add_used_read(self, read_id): - """Add read ID to used set. - Args: - read_id: String for read ID. - Return: None - """ - self.used_reads.add(read_id) - - def add_used_mer(self, kmer_seq): - """Add kmer sequence to used set. - Args: - kmer_seq: String for kmer sequence. - Return: None - """ - self.used_kmers.add(kmer_seq) - - def remove_kmers(self, kmer_tracker): - """Remove used kmer sequences from the kmer tracking object and reset used - kmer set. - Args: - kmer_tracker: KmerTracker object. - Return: None - """ - map(kmer_tracker.remove_kmer, list(self.used_kmers)) - self.used_kmers = set() - - def remove_reads(self, fqReads): - """Remove the used reads from the fq_reads dictionary. - Args: - fqReads: Dictionary of fq_reads. - Return: None - """ - del_used = filter(lambda x: x in fqReads, list(self.used_reads)) - map(fqReads.__delitem__, del_used) - self.used_reads = set() - - -class KmerTracker: - """Wrapper class for storing the kmer objects. Useful for adding - and extracting kmers. - Attributes: - kmers: List of tuples containing kmer count, kmer, kmer object. - orderedKmers: OrderedDict object containing kmer seq as key and kmer count as value. - The top values are the most frequence kmer values. - kmerSeqs: Set of kmer seq values that exist in orderedKmers. - """ - def __init__(self): - self.kmers = [] - self.orderedKmers = OrderedDict() - self.kmerSeqs = set() - - def add_kmer(self, mer, count): - """Add a kmer object to the list. Stores a tuple with kmer count and kmer sequence string. - This allows easy sorting. - Args: - mer: String kmer sequence value. - count: Integer of number of reads kmer is within. - Return: - None - """ - if len(set(mer)) > 1: - self.kmers.append((int(count), mer)) - - def set_all_kmer_values(self): - """Sort the kmer list by number of reads (descending) first and then - by sequence value and store them in an ordered dictionary. - Args: - None - Return: - None - """ - kmersSorted = sorted(self.kmers, key=lambda x: (int(x[0]), x[1]), reverse=True) - for kmer in kmersSorted: - self.orderedKmers[kmer[1]] = kmer[0] - - def has_mers(self): - """Check if there are any kmers left in the dictionary. - Args: - None - Return: - True if there are items in the dictionary and the counts of those items are > 1. - False if there are no items in the dictionary or the counts of those items are <= 1. - """ - if len(self.orderedKmers) > 0 and max(self.orderedKmers.values()) > 1: - return True - else: - return False - - def update_kmer_set(self): - """Update the set of kmer values. The orderedKmers dictionary - dynamically changes as kmers are taken out. - Args: - None - Return: - None - """ - self.kmerSeqs = set(self.orderedKmers.keys()) - - def get_kmer(self): - """Return the first kmer in the ordered dictionary""" - return self.orderedKmers.items()[0] - - def get_count(self, kmerSeq): - """Return the number of reads the kmer_seq is within.""" - return self.orderedKmers[kmerSeq] - - def remove_kmer(self, kmerSeq): - """Delete the record associated with kmer sequence.""" - del self.orderedKmers[kmerSeq] diff --git a/breakmer/assembly/contig.py b/breakmer/assembly/contig.py deleted file mode 100644 index 7a7762a..0000000 --- a/breakmer/assembly/contig.py +++ /dev/null @@ -1,992 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os -import sys -import logging -import shutil -import pysam -import breakmer.assembly.olc as olcAssembly -import breakmer.assembly.utils as assemblyUtils -import breakmer.realignment.realigner as realigner -import breakmer.caller.sv_caller as sv_caller -import breakmer.utils as utils -import breakmer.annotation.sv_annotation as annotator -import breakmer.plotting.sv_viz as svplotter - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -def get_read_kmers(new_seq, kmerLen, kmer_seqs, order='for'): - """Return new sample kmers from the existing contig sequence that can help extend - the contig sequence. - All the k-length mers are determined from the new_seq. These kmer sequences are put - into a set and intersected with the kmer sequences in the sample, ordered according - to the position of the kmer in the new_seq string and returned. - Args: - new_seq: String of the contig sequence to create kmers from. - kmerLen: Integer of the kmer length - kmer_seqs: The set of kmer sequences from the pool of extracted reads. - order: String for the direction to order the new set of kmer sequences. A None - value indicates no ordering. - - Return: - kmers: List of tuples containing: - 1. String kmer seq - 2. Integer kmer position - 3. Boolean if kmer seq is in the first half of the sequence - 4. Integer of position distance to middle of sequence - 5. String of how to order tuples in the list - """ - m = len(new_seq) / 2 - kmers = map(lambda x: (new_seq[x:x + kmerLen], x, int(x < m), abs(x - m), order), range(0, (len(new_seq) - kmerLen))) - ks = set(map(lambda x: x[0], kmers)) - ss = ks & kmer_seqs - kmers = filter(lambda x: x[0] in ss, kmers) - if order == 'rev': - kmers.reverse() - elif order == 'mid': - kmers = sorted(kmers, key=lambda x: (x[2], x[3])) - return kmers - - -class AssemblyRead: - """Wrapper class for a sequence read used in a contig assembly. This will - track meta information about the sequence read. - Attributes: - read: fq_read object - redundant: Boolean to indicate whether the read is duplicated. - alignChecked: Boolean to indicate if the read has been checked against - the contig sequence. - aligned: Boolean to indicate if the read aligned to the contig sequence. - """ - def __init__(self, read, redundant, checked, aligned): - self.read = read - self.redundant = redundant - self.alignChecked = checked - self.aligned = aligned - - -class ReadBatch: - """A class to track the reads that are being considered for building a contig - sequence. - Attributes: - delete: Set of fq_read objects to remove from further analysis. - alt: List of tuples containing (fq_read object, integer of nreads with the same sequence) - reads: List of AssemblyRead objects containing the reads used to build a contig. - mer_pos_d: Dictionary containing kmer position information. DEPRECATED - """ - def __init__(self, read, mer_pos): - self.delete = set() - self.alt = [] - self.reads = [AssemblyRead(read, False, True, True)] - # self.mer_pos_d = {mer_pos: [0]} DEPRECATED - - def check_kmer_read(self, kmer_read_align_pos, read): - """Adds AssemblyRead to reads list. Note that the check for add_to_pos_d is deprecated. - Args: - kmer_read_align_pos: Integer of the position the kmer sequence found - in the read sequence. - read: fq_read object. - Return: None - """ - check = True - redund_read = False - add_read = True - add_to_pos_d = False - - """ - # Deprecated code. - if add_read : - if add_to_pos_d : - if pos not in self.mer_pos_d : - self.mer_pos_d[pos] = [] - self.mer_pos_d[pos].append(len(self.reads)) - self.reads.append(AssemblyRead(read, False, check, False)) - return check - """ - self.reads.append(AssemblyRead(read, False, check, False)) - - def set_last_read_aligned(self): - """Sets the last read added to the reads list as aligned.""" - self.reads[-1].aligned = True - - def clean(self, fq_reads, contigBuffer, last_keep_read): - """Remove all data from data structures. - Iterate through reads in delete set and delete them from the fq dictionary. - Check if the delete reads are in the contigBuffer contig dictionary. If the - contig associated with the read is not setup then delete the read from the dictionary. - Args: - fq_reads: Dictionary containing the extracted reads. - contigBuffer: ContigBuffer object. - last_keep_read: fq_read object kept for further use. - Return: None - """ - - map(fq_reads.__delitem__, map(lambda x: x.seq, list(self.delete))) - for read_id in filter(lambda x: x in contigBuffer.contigs, list(self.delete)): - if not contigBuffer.contigs[read_id].setup: - del contigBuffer.contigs[read_id] - self.delete = set() - self.alt = [] - self.reads = [last_keep_read] - self.mer_pos_d = {} - - -class ContigCounts: - """A class to track the number of read sequences that support a consensus sequence. - - Initially set counts for the first read in the contig. - Attributes: - indel_only: List of integers, providing the count for number of indel only reads support - the given position in the consensus sequence. - others: List of integers, providing the count of non indel only reads that are assembled - at a given position of the consensus sequence. - """ - - def __init__(self, read, nreads): - self.indel_only = [0] * len(read.seq) - self.others = [0] * len(read.seq) - self.set_counts(0, len(read.seq), nreads, read.indel_only) - - def get_counts(self, p1, p2, sv_type): - """Return the counts for a range of positions in the consensus sequence. - If the positions are the same, then return the counts for the single position. - Args: - p1: Integer indicating the first position. - p2: Integer indicating the second position. - sv_type: String indicating what kind of event the count is intended to support. - Return: - counts: List of integers for counts of reads assembled at the provided range. - """ - - # print 'Contig stored counts, indel_only', self.indel_only - # print 'Contig stored counts, others', self.others - counts = [] - if sv_type == 'indel' or sv_type == 'rearr': - if p1 == p2: - counts = self.indel_only[p1] + self.others[p1] - else: - counts = map(lambda (x, y): x + y, zip(self.indel_only[p1:p2], self.others[p1:p2])) - else: - if p1 == p2: - counts = self.others[p1] - else: - counts = self.others[p1:p2] - return counts - - def get_total_reads(self): - """Return the total read count supporting a contig sequence.""" - return max(self.indel_only) + max(self.others) - - def set_superseq(self, read, nreads, start, end): - """The read sequence is a super sequence to the current contig sequence. - The count vectors need to be adjusted accordingly based on the read. - Temporary count vectors are created for the read sequence and the number - of reads with this sequence. The current count vectors are then added into - the temporary vectors and then set as the new count vectors. - Args: - read: fq_read object. - nreads: Integer for number of reads with read sequence. - start: Integer for start alignment position of the current contig in - the read sequence. - end: Integer for the end alignment position of the current contig in - the read sequence. - Return: None - """ - tmp_indel_only = [0] * len(read.seq) - tmp_others = [0] * len(read.seq) - if read.indel_only: - tmp_indel_only = [nreads] * len(read.seq) - else: - tmp_others = [nreads] * len(read.seq) - tmp_indel_only[start:end] = map(lambda (x, y): x + y, zip(tmp_indel_only[start:end], self.indel_only)) - tmp_others[start:end] = map(lambda (x, y): x + y, zip(tmp_others[start:end], self.others)) - self.indel_only = tmp_indel_only - self.others = tmp_others - - def set_counts(self, start, end, nreads, indel_only): - """Add the read count to the stored contig sequence count vectors. - With paired end reads, there are reads that can contribute to indels only - or to all types of variation. The counts are added according to how the - read has been defined. - Args: - start: Integer of the start of the sequence to add count. - end: Integer of the end of the sequence to add count. - nreads: Integer of total number of reads that should be added. - indel_only: Boolean to indicate if read should only support indels. - Return: None - """ - if indel_only: - self.indel_only[start:end] = map(lambda x: x + nreads, self.indel_only[start:end]) - else: - self.others[start:end] = map(lambda x: x + nreads, self.others[start:end]) - - def extend_counts(self, extend_size, nreads, indel_only, direction): - """Increase the size of the count vectors when the contig sequence is grown. - If the direction is 'post', the count vectors must be increased at the end. - If the direction is 'pre', the count vectors must be increased at the beginning. - Args: - extend_size: Integer for number of positions to increase the vectors. - nreads: Integer for count to add to the count vectors. - indel_only: Boolean to indicate if count should only be added to indel only vector. - direction: String to indicate which side the count vector is extended. - Return: None - """ - fill_counts = [0] * extend_size - ecounts = [nreads] * extend_size - if indel_only: - if direction == 'post': - self.indel_only.extend(ecounts) - self.others.extend(fill_counts) - else: - ecounts.extend(self.indel_only) - self.indel_only = ecounts - fill_counts.extend(self.others) - self.others = fill_counts - else: - if direction == 'post': - self.indel_only.extend(fill_counts) - self.others.extend(ecounts) - else: - ecounts.extend(self.others) - self.others = ecounts - fill_counts.extend(self.indel_only) - self.indel_only = fill_counts - - -class Builder: - """A class to perform all the contig building functions and store temporary data structures. - Attributes: - read_batch: ReadBatch object - seq: String of the consensus sequence. - counts: ContigCounts object to manage all the read counts supporting the consensus sequence. - checked_kmers: List of kmer sequences that had previously been checked while building the contig. - kmerLen: Integer of the kmer length. - kmers: List of kmer sequences that have contributed to building the contig. - kmer_locs: List of integers representing the positions of the kmers in the contig seq. - """ - - def __init__(self, kmerObj, readAlignValues): - """ - Args: - kmerObj: A Kmer object containing seq and count information for a kmer. - readAlignValues: A dictionary containing information about where a kmer exists in a read. - """ - self.read_batch = ReadBatch(readAlignValues['read'], readAlignValues['align_pos']) - self.seq = readAlignValues['read'].seq - self.counts = ContigCounts(readAlignValues['read'], readAlignValues['nreads']) - self.checked_kmers = [kmerObj.seq] - self.kmerLen = kmerObj.kmerLen - self.kmers = [] - self.kmer_locs = [] - - def check_read(self, kmerObj, readAlignValues, alignType): - """Determine if the read should be added to the assembly or not. - If the read aligns to the contig, set the fq_read status to used and indicate - the AssemblyRead has been aligned. If the kmer is in more than 1 read and the - current read has not been used in any other contigs then store for later - analysis to build another contig possibly. Otherwise, discard the read for - further analysis. - Args: - kmerObj: Kmer object containing kmer seq specific values. - readAlignValues: Dictionary containing: - - 'read': fq_read object that contains kmer sequence. - - 'align_pos': Integer position of kmer in read sequence - - 'nreads': Integer of number of reads with the same sequence. - type: String indicating the state of this function. - Return: - hit: String value 'remove' or ''. - """ - hit = '' - self.read_batch.check_kmer_read(readAlignValues['align_pos'], readAlignValues['read']) - if self.check_align(kmerObj, readAlignValues, alignType): - hit = 'remove' - readAlignValues['read'].used = True - self.read_batch.set_last_read_aligned() - elif kmerObj.counts > 2 and not readAlignValues['read'].used: - self.read_batch.alt.append((readAlignValues['read'], readAlignValues['nreads'])) - else: - self.read_batch.delete.add(readAlignValues['read']) - return hit - - def check_align(self, kmerObj, readAlignValues, alignType='setup'): - """Check the alignment of the read sequence to the contig sequence. - The read sequence must match at least 25% of the shortest sequence between - the contig and the read and an identity at least 90%. If there is clear - alignment between the read sequence and the contig sequence, then the - assembly consensus sequence is appropriately changed. - Args: - kmerObj: Dictionary containing: - - 'seq': String kmer sequence value. - - 'counts': Integer of reads containing kmer sequence. - - 'kmer_set': Set with all kmer sequences. - - 'len': Integer for kmer length. - readAlignValues: Dictionary containing: - - 'read': fq_read object that contains kmer sequence. - - 'align_pos': Integer position of kmer in read sequence - - 'nreads': Integer of number of reads with the same sequence. - type: String indicating the state of this function. - Return: - match: Boolean indicating if the read aligns sufficiently with the - contig sequence and will be added. - """ - match = False - queryRead = readAlignValues['read'] - - minScore = float(min(len(self.seq), len(queryRead.seq))) / 4.0 - alignManager = olcAssembly.AlignManager(self.seq, queryRead.seq, minScore, 0.90) - - if alignManager.check_align_thresholds(): - # Alignment fails thresholds. - return False - if alignManager.same_seqs(): - # Read and contigs sequences are the same. - return True - if alignManager.same_max_scores(): - # Alignments both ways had equal scores. - match = True - if alignManager.read_is_superseq(): - # Check if the read sequence fully contains the contig sequence. - self.set_superseq(queryRead, readAlignValues['nreads'], alignManager.get_alignment_values(0, 'i'), alignManager.get_alignment_values(0, 'prei')) - if alignType == 'grow': - # Contig sequence has changed, set the kmers. - self.set_kmers(kmerObj.kmerSeqSet) - # Check if the contig sequence full contains the read sequence. - elif alignManager.read_is_subseq(): - self.add_subseq(alignManager.get_alignment_values(1, 'i'), alignManager.get_alignment_values(1, 'prei'), readAlignValues['nreads'], queryRead.indel_only) - # There appears to be overlap, figure out how to assemble. - else: - match = False - indx1 = alignManager.get_kmer_align_indices(0, kmerObj.seq) - indx2 = alignManager.get_kmer_align_indices(1, kmerObj.seq) - if indx1[0] > -1 and indx1[1] > -1: - # Read overlaps off front of contig sequence. - if (indx2[0] == -1 and indx2[1] == -1) or (abs(indx2[0] - indx2[1]) > abs(indx1[0] - indx1[1])): - match = True - self.contig_overlap_read(alignManager.get_alignment(0), queryRead, readAlignValues['nreads'], kmerObj.kmerSeqSet, alignType) - elif indx2[0] > -1 and indx2[1] > -1: - # Read overlaps off end of contig sequence. - if (indx1[0] == -1 and indx1[1] == -1) or (abs(indx2[0] - indx2[1]) < abs(indx1[0] - indx1[1])): - match = True - self.read_overlap_contig(alignManager.get_alignment(1), queryRead, readAlignValues['nreads'], kmerObj.kmerSeqSet, alignType) - # Read sequence overlaps off the front of the contig sequence. - elif alignManager.better_align(): - match = True - self.contig_overlap_read(alignManager.get_alignment(0), queryRead, readAlignValues['nreads'], kmerObj.kmerSeqSet, alignType) - # Read sequence overlaps off the end of the contig sequence. - else: - match = True - self.read_overlap_contig(alignManager.get_alignment(1), queryRead, readAlignValues['nreads'], kmerObj.kmerSeqSet, alignType) - return match - - def set_superseq(self, read, nreads, start, end): - """The read sequence contains the current contig sequence. - Args: - read: fq_read object. - nreads: Integer for the number of reads with the same sequence as the read passed in. - start: Integer for the start position the contig sequence aligns to the read sequence. - end: Integer for the end position the contig sequence aligns to the read sequence. - Return: None - """ - self.seq = read.seq - self.counts.set_superseq(read, nreads, start, end) - - def add_subseq(self, start, end, nreads, indel_only): - """The read checked against the contig was found to be a subsequence of the - contig. The nreads with the checked sequence are added to the count vectors. - Args: - start: Integer for start of sequence to add counts. - end: Integer for end of sequence to add counts. - nreads: Integer of number of reads to add. - indel_only: Boolean to indicate whether to add to indel only count vector. - Return: None - """ - self.counts.set_counts(start, end, nreads, indel_only) - - def add_postseq(self, post_seq, start, end, nreads, indel_only): - """Sequence is appended to the end of the current contig sequence. The - read support vectors are appropriately incremented. - Args: - post_seq: String of sequence to add to the end of the assembled contig. - start: Integer for start position of contig to add counts. - end: Integer for end position of the contig to add counts. - nreads: Integer for number of reads to add to count vectors. - indel_only: Boolean to indicate whether read only supports indel events. - Return: None - """ - self.seq += post_seq - self.counts.set_counts(start, end, nreads, indel_only) - self.counts.extend_counts(len(post_seq), nreads, indel_only, 'post') - - def add_preseq(self, pre_seq, start, end, nreads, indel_only): - """Sequence is append to the front of the current contig sequence. The read - support vectors are appropriately changed. - Args: - pre_seq: String of sequence to add to the front of the assembly contig. - start: Integer for start position of contig to add counts. - end: Integer for end position of contig to add counts. - nreads: Integer for number of reads to add to count vectors. - indel_only: Boolean to indicate whether read only supports indel events. - Return: None - """ - self.seq = pre_seq + self.seq - self.counts.set_counts(start, end, nreads, indel_only) - self.counts.extend_counts(len(pre_seq), nreads, indel_only, 'pre') - - def finalize_reads(self, contig_reads, fq_recs, contigBuffer): - """Sort out the reads to keep for reporting and remove the others. - Aligned and non-redundant reads are removed from the contig read set. The - variables in read_batch are cleared. - Args: - contig_reads: Set of fq_read objects. - fq_recs: Dictionary of fq_read objects. - contigBuffer: ContigBuffer class object. - Return: - contig_reads: Set of fq_reads objects - """ - rm_reads = map(lambda y: y.read, filter(lambda x: x.redundant, self.read_batch.reads)) - keep_reads = filter(lambda x: x.aligned and not x.redundant, self.read_batch.reads) - add_reads = map(lambda y: y.read, keep_reads) - # Merge add_reads into contig_reads - contig_reads = contig_reads | set(add_reads) - # Remove rm_reads - contig_reads = contig_reads - set(rm_reads) - self.read_batch.clean(fq_recs, contigBuffer, keep_reads[-1]) - return contig_reads - - def contig_overlap_read(self, alignment, query_read, nreads, kmer_seqs, assemblyType): - """Assembled consensus and read sequences, where the consensus end overlaps - with the read sequence beginning. - Args: - alignment: olc.Align object - query_read: fq_read object - nreads: Integer for number of reads to add to count vectors. - kmer_seqs: Set of kmer sequence values. - type: String for source of call to function. - Return: None - """ - if alignment.prej == len(self.seq) and alignment.j == 0: - self.set_superseq(query_read, nreads, alignment.i, alignment.prei) - if assemblyType == 'grow': - self.set_kmers(kmer_seqs) - else: - post_seq = query_read.seq[alignment.prei:] - nseq = self.seq[(len(self.seq) - (self.kmerLen - 1)):] + post_seq - self.add_postseq(post_seq, alignment.j, alignment.prej, nreads, query_read.indel_only) - if assemblyType == 'grow': - nkmers = get_read_kmers(nseq, self.kmerLen, kmer_seqs, 'for') - self.kmers.extend(nkmers) - - def read_overlap_contig(self, alignment, query_read, nreads, kmer_seqs, type): - """Assemble consensus and read sequences togheter, where the consensus - beginning overlaps the read sequence end. - Args: - alignment: olc.Align object - query_read: fq_read object - nreads: Integer for number of reads to add to count vectors. - kmer_seqs: Set of kmer sequence values. - type: String for source of call to function. - Return: None - """ - - if alignment.prej == len(query_read.seq) and alignment.j == 0: - self.add_subseq(alignment.i, alignment.prei, nreads, query_read.indel_only) - else: - pre_seq = query_read.seq[0:alignment.j] - nseq = pre_seq + self.seq[0:(self.kmerLen - 1)] - self.add_preseq(query_read.seq[0:alignment.j], alignment.i, alignment.prei, nreads, query_read.indel_only) - if type == 'grow': - nkmers = get_read_kmers(nseq, self.kmerLen, kmer_seqs, 'rev') - self.kmers.extend(nkmers) - - def check_alternate_reads(self, kmerTracker, contigBuffer, contigKmers): - """Iterate through the buffered reads that were not aligned to the contig - and determine if a new contig should be created. - Args: - kmerTracker: KmerTracker object contains all the kmer sequence values. - contigBuffer: ContigBuffer object - contigKmers: List of kmer sequence used in the contig assembly. - """ - newContigs = [] - kmerSet = set() - for read, nreads in self.read_batch.alt: - altKmers = get_read_kmers(read.seq, self.kmerLen, kmerTracker.kmerSeqs, '') - altKmerSeqs = set(map(lambda x: x[0], altKmers)) - newKmers = set(altKmerSeqs) - set(contigKmers) - contigBuffer.used_kmers - kmerSet - if len(newKmers) > 0: - for kmerSeq in list(newKmers): - readCount = kmerTracker.get_count(kmerSeq) - if readCount > 1: - kmerPos = read.seq.find(kmerSeq) - kmerObj = assemblyUtils.Kmer(kmerSeq, kmerTracker.get_count(kmerSeq), kmerTracker.kmerSeqs, self.kmerLen) - read_align_values = {'read': read, - 'align_pos': kmerPos, - 'nreads': nreads} - newContigs.append((read, Contig(kmerObj, read_align_values))) - kmerSet = kmerSet | newKmers - break - return newContigs - - def set_kmers(self, kmer_seqs): - """Wrapper function to get_read_kmers function to parse a sequence string - and generate all relevant kmers from the sequence. - Args: - kmer_seqs: Set of kmer sequences. - Return: None - """ - self.kmers = get_read_kmers(str(self.seq), self.kmerLen, kmer_seqs, 'mid') - - def set_kmer_locs(self): - """Add the start alignment positions of each kmer sequence in the kmers list - to the kmer_locs list. - Args: None - Return: None - """ - self.kmer_locs = [0] * len(self.seq) - for kmer in self.kmers: - kmerPos = self.seq.find(kmer[0]) - self.kmer_locs[kmerPos:(kmerPos + self.kmerLen)] = map(lambda x: x + 1, self.kmer_locs[kmerPos:(kmerPos + self.kmerLen)]) - - def refresh_kmers(self): - """Return a list of kmer_sequences that have not been checked already. - Args: None - Return: - List of kmer sequences. - """ - return filter(lambda x: x[0] not in set(self.checked_kmers), self.kmers) - - def get_seq(self): - """Return the final consensus sequence.""" - return self.seq - - def get_kmers(self): - """Return the final kmer sequence list.""" - return self.kmers - - def get_kmer_locs(self): - """Return the final kmer locations list.""" - return self.kmer_locs - - def get_total_reads(self): - """Return total number of reads supporting contig.""" - return self.counts.get_total_reads() - - -class Meta: - """A class to track the contig information for downstream calling and writing - to file. - Attributes: - params: Param object with all BreaKmer parameters. - path: String of path to write all files. - id: String for contig ID. - target_region: Tuple containing target information: - 1. String chromosome ID - 2. Integer of target region start position. - 3. Integer of target region end position. - 4. String target region name. - 5. List of target intervals tuples. - fq_fn: String of the fastq file containing sequence reads used to build contig. - fa_fn: String of the fasta file containing the contig sequence. - """ - - def __init__(self): - self.loggingName = 'breakmer.assembly.contig' - self.params = None - self.path = None - self.id = None - self.chr = None - self.start = None - self.end = None - self.targetName = None - self.regionBuffer = 0 - self.fq_fn = None - self.fa_fn = None - self.readVariation = None - - def set_values(self, contigId, params, queryRegionValues, contigPath, readVariation): - """Sets the contig values after contig has been compeleted and ready for - realignment. - Args: - contigId: String containing contid ID. - params: Param object. - queryRegionValues: Tuple containing the target region information - contigPath: String of the path to the contig directory to store files. - Return: None - """ - self.params = params - self.id = contigId - self.readVariation = readVariation - self.chr = queryRegionValues[0] - self.start = int(queryRegionValues[1]) - self.end = int(queryRegionValues[2]) - self.targetName = queryRegionValues[3] - self.regionBuffer = queryRegionValues[5] - self.path = os.path.join(contigPath, self.id) - logger = logging.getLogger('breakmer.assembly.contig') - utils.log(self.loggingName, 'info', 'Setting up contig path %s' % self.path) - - if not os.path.exists(self.path): - os.makedirs(self.path) - self.fq_fn = os.path.join(contigPath, self.id, self.id + '.fq') - self.fa_fn = os.path.join(contigPath, self.id, self.id + '.fa') - - def get_target_region_coordinates(self): - """ """ - return (self.chr, self.start, self.end, self.targetName, self.regionBuffer) - - def write_files(self, cluster_fn, kmers, reads, seq): - """Write cluster, read fastq, and contig fasta files. - Args: - cluster_fn: String of the file to write kmer clusters to. - kmers: List of kmers used in the building of the contig. - reads: List of reads used in the building of the contig. - seq: String of contig sequence. - Return: None - """ - logger = logging.getLogger('breakmer.assembly.contig') - cluster_f = open(cluster_fn, 'w') - cluster_f.write(self.id + ' ' + str(len(kmers)) + '\n') - cluster_f.write(','.join([x[0] for x in kmers]) + '\n') - cluster_f.write(','.join([x.id for x in reads]) + '\n\n') - cluster_f.close() - assembly_fq = open(self.fq_fn, 'w') - logger.info('Writing reads containing kmers to fastq %s' % self.fq_fn) - for read in reads: - assembly_fq.write(read.id + '\n' + read.seq + '\n+\n' + read.qual + '\n') - assembly_fq.close() - logger.info('Writing contig fasta file for blatting %s' % self.fa_fn) - blat_f = open(self.fa_fn, 'w') - blat_f.write('>' + self.id + '\n' + seq) - blat_f.close() - - def write_result(self, svEventResult, outputPath): - resultFn = os.path.join(self.path, self.id + "_svs.out") - utils.log(self.loggingName, 'info', 'Writing %s result file %s' % (self.id, resultFn)) - resultFile = open(resultFn, 'w') - - # A string of output values for writing to file. - headerStr, formattedResultValuesStr = svEventResult.get_formatted_output_values() - resultFile.write(headerStr + '\n' + formattedResultValuesStr + '\n') - resultFile.close() - shutil.copyfile(resultFn, os.path.join(outputPath, self.id + "_svs.out")) - - def write_bam(self, outputPath, svBamReadsFn, reads): - bamOutFn = os.path.join(outputPath, self.id + "_reads.bam") - utils.log(self.loggingName, 'info', 'Writing contig reads bam file %s' % bamOutFn) - bam_out_sorted_fn = os.path.join(outputPath, self.id + "_reads.sorted.bam") - bamFile = pysam.Samfile(svBamReadsFn, 'rb') - bam_out_f = pysam.Samfile(bamOutFn, 'wb', template=bamFile) - for bam_read in bamFile.fetch(): - for read in reads: - rid, idx = read.id.lstrip("@").split("/") - ridx, indel_only_read = idx.split("_") - if (bam_read.qname == rid) and ((ridx == '2' and bam_read.is_read2) or (ridx == '1' and bam_read.is_read1)): - bam_out_f.write(bam_read) - bamFile.close() - bam_out_f.close() - utils.log(self.loggingName, 'info', 'Sorting bam file %s to %s' % (bamOutFn, bam_out_sorted_fn)) - pysam.sort(bamOutFn, bam_out_sorted_fn.replace('.bam', '')) - utils.log(self.loggingName, 'info', 'Indexing bam file %s' % bam_out_sorted_fn) - pysam.index(bam_out_sorted_fn) - return bam_out_sorted_fn - - -class Contig: - """Interface class to assemble a contig and store data all the relevant data - for the assembly. - Attributes: - meta: Meta class object to store all the interface related data. - kmer_locs: List of integers indicating the start alignment position of the kmers - in the contig sequence. - setup: Boolean to indicate whether a contig has gone through the setup process. - build: Builder class object that handles all the assembly functions. - seq: String for assembled sequence. - kmers: List of kmer sequences used to build contig. - reads: Set of read IDs that have been used to build contig. - buffer: Set of read IDs used in a batch of processing for building a contig. This is flushed. - """ - - def __init__(self, kmerObj, readAlignValues): - self.meta = Meta() - self.setup = False - self.builder = Builder(kmerObj, readAlignValues) - self.seq = None - self.kmers = [] - self.kmer_locs = [] - self.reads = set() - self.buffer = set([readAlignValues['read'].id]) - self.svEventResult = None - self.realignment = None - - def check_read(self, kmerObj, readAlignValues, fncType='setup'): - """Check if the read passed in can be added to the current contig. - Wrapper function to Builder class check_read function. - Args: - kmerObj: Instance of Kmer object with attributes for kmer sequence. - readAlignValues: Dictionary containing: - - 'read': fq_read object that contains kmer sequence. - - 'align_pos': Integer position of kmer in read sequence - - 'nreads': Integer of number of reads with the same sequence. - fncType: String indicating the state of this function. - Return: - String containing 'hit' or '' indicating that read matched contig seq - or did not, respectively. - """ - self.buffer.add(readAlignValues['read'].id) - return self.builder.check_read(kmerObj, readAlignValues, fncType) - - def check_invalid(self, read_count_thresh, read_len): - """Determine if the finished contig sequence meets minimum requirements for - length and read count support. - Args: - read_count_thresh: Integer for minimum reads that must support a contig. - read_len: Integer for read length. - Return: - Boolean indicating whether it meets thresholds or not. - """ - # print 'contig.py check_valid', self.get_total_read_support(), int(read_count_thresh), len(self.seq), int(read_len) - if (self.get_total_read_support() < int(read_count_thresh)) or (len(self.seq) <= int(read_len)): - return True - else: - return False - - def finalize(self, fq_recs, kmerTracker, contigBuffer, source='setup'): - """Finish an assembly and add the buffered contigs that were created from - non-aligned reads to the contigBuffer. - Args: - fq_recs: Dicionary of fq_read objects. - kmerTracker: KmerTracker object with all kmer sequence values. - contigBuffer: ContigBuffer object. - source: String for the source of function call. - Return: None - """ - if source == 'setup': - self.set_kmers(kmerTracker.kmerSeqs) - # Get alternate read kmers and see if any are different from contig kmers. - new_contigs = self.builder.check_alternate_reads(kmerTracker, contigBuffer, self.kmers) - for new_contig in new_contigs: - contigBuffer.add_contig(new_contig[0], new_contig[1]) - self.reads = self.builder.finalize_reads(self.reads, fq_recs, contigBuffer) - - def set_kmers(self, kmer_seqs): - """Wrapper function to Builder class set_kmers function. - Args: - kmer_seqs: Set of all kmer sequences. - Return: None - """ - self.setup = True - self.builder.set_kmers(kmer_seqs) - - def set_kmer_locs(self): - """Wrapper function to Builder class set_kmer_locs function. - Args: None - Return: None - """ - self.builder.set_kmer_locs() - - def refresh_kmers(self): - """A wrapper function to Builder class refresh kmers. - Args: None - Return: - List of kmers that have not been previously checked. - """ - return self.builder.refresh_kmers() - - def get_kmer_reads(self, kmer_values, read_items): - """ - Args: - kmer_values: Tuple containing the alignment information of a kmer sequence - in a read sequence. - 1. String kmer sequence. - 2. Integer kmer alignment position in sequence. - 3. Boolean whether kmer align position is below midpoint of sequence. - 4. Integer of absolute difference between align position and midpoint. - 5. String of the order for tuples in a list. - read_items: List of tuples for sequence reads: - 1. String read sequence. - 2. List of fq_read objects with read sequence. - Return: - reads: List of tuples containing: - 1. read object, - 2. start position of kmer match in read seq - 3. Boolean that a match was found. - 4. Length of the read sequence. - 5. Number of reads with this sequence. - """ - kmer, kmerPos, lessThanHalf, dist_half, order = kmer_values - read_order = 'for' - if order == 'mid': - if lessThanHalf == 0: - read_order = 'rev' - elif order == 'for': - read_order = 'rev' - reads = assemblyUtils.find_reads(kmer, read_items, self.buffer, read_order) - return reads - - def grow(self, fqRecs, kmerTracker, kmerLen, contigBuffer): - """Iterates through new sample only kmers in a contig assembly and tries to - add more relevant reads to extend the contig assembly sequence. - For each 'new' kmer, assess the reads that have the kmer. When this function - is complete, the contig is done assemblying. - Args: - fqRecs: Dictionary of fq_read objects key = sequence, value = list of fq_reads - kmerTracker: KmerTracker object containing all the kmer sequences. - kmerLen: Integer of kmer size. - contigBuffer: ContigBuffer object. - Return: None - """ - logger = logging.getLogger('breakmer.assembly.contig') - if not self.setup: - self.set_kmers(kmerTracker.kmerSeqs) - newKmers = self.refresh_kmers() - while len(newKmers) > 0: - iter = 0 - for kmer_lst in newKmers: - kmerSeq, kmerPos, lessThanHalf, dist_half, order = kmer_lst - reads = self.get_kmer_reads(kmer_lst, fqRecs.items()) - contigBuffer.add_used_mer(kmerSeq) - kmerObj = assemblyUtils.Kmer(kmerSeq, kmerTracker.get_count(kmerSeq), kmerTracker.kmerSeqs, kmerLen) - for read_lst in reads: - read, kmerPos, bool, rlen, nreads = read_lst - contigBuffer.add_used_read(read.id) - readAlignValues = {'read': read, - 'align_pos': kmerPos, - 'nreads': nreads} - hit = self.check_read(kmerObj, readAlignValues, 'grow') - if hit == 'remove': - contigBuffer.remove_contig(read.id) - self.finalize(fqRecs, kmerTracker, contigBuffer, 'grow') - self.builder.checked_kmers.append(kmerSeq) - iter += 1 - newKmers = self.refresh_kmers() - logger.debug("%d kmers left to check" % len(newKmers)) - self.set_kmer_locs() - self.set_final_values() - logger.info('Contig done with contig seq %s. Supported by %d read(s).' % (self.seq, len(self.reads))) - logger.info('Read IDs: %s' % (",".join([x.id for x in list(self.reads)]))) - - def set_meta_information(self, contig_id, params, query_region_values, contig_path, kmer_cluster_fn, readVariation): - """Sets the contig ID, params, target region values and contig path variables for later use. - Output files are also written with the contig assembly information. - Args: - contig_id: String containing contid ID. - params: Param object. - query_region_values: Tuple containing the target region information - contig_path: String of the path to the contig directory to store files. - kmer_cluster_fn: String of the path to write the kmer clustering information to. - Return: None - """ - self.meta.set_values(contig_id, params, query_region_values, contig_path, readVariation) - self.meta.write_files(kmer_cluster_fn, self.kmers, self.reads, self.seq) - - def set_final_values(self): - """Set the seq, kmers, kmer_locs variables when the contig is done assemblying. - Args: None - Return: None - """ - self.seq = self.builder.get_seq() - self.kmers = self.builder.get_kmers() - self.kmer_locs = self.builder.get_kmer_locs() - - def query_ref(self, targetRefFns): - """ - Args: - Return: - """ - self.realignment = realigner.RealignManager(self.meta.params, targetRefFns) - self.realignment.realign(self) - - def make_calls(self): - """ - """ - contigCaller = sv_caller.ContigCaller(self.realignment, self, self.meta.params) - self.svEventResult = contigCaller.call_svs() - - def filter_calls(self): - """ - """ - if self.svEventResult is not None: - svFilter = self.meta.params.filter - svFilter.check_filters(self.svEventResult) - - def annotate_calls(self): - """ """ - if self.svEventResult and self.meta.params.get_param('gene_annotation_file') and self.meta.params.get_param('bedtools'): - annotator.annotate_event(self.svEventResult, self.meta) - - def output_calls(self, outputPath, svReadsBamFn): - """ """ - if self.svEventResult: - self.meta.write_result(self.svEventResult, outputPath) - readBamFn = self.meta.write_bam(outputPath, svReadsBamFn, self.reads) - if self.meta.params.get_param('generate_image') and not self.svEventResult.is_filtered(): - # Generate image if option is set and the result is not being filtered out. - svplotter.generate_pileup_img(self.svEventResult, readBamFn, outputPath, self.get_id()) - - def get_total_read_support(self): - """Return the total read count supporting assembly.""" - return self.builder.get_total_reads() - - def get_contig_len(self): - """Return length of contig sequence.""" - return len(self.seq) - - def get_kmer_locs(self): - """Return the kmer locations in the contig sequence.""" - return self.kmer_locs - - def has_fa_fn(self): - """Check if fasta file has been written for contig.""" - return self.meta.fa_fn - - def get_path(self): - """Return file path to contig results""" - return self.meta.path - - def get_id(self): - """Return contig id""" - return self.meta.id - - def get_target_name(self): - """ """ - return self.meta.targetName - - def get_contig_count_tracker(self): - """ """ - return self.builder.counts - - def get_disc_reads(self): - """ """ - return self.meta.readVariation.get_disc_reads() - - def get_read_variation(self): - """ """ - return self.meta.readVariation - - def get_var_reads(self, sampleType): - """ """ - return self.meta.readVariation.get_var_reads(sampleType) - - def get_sample_bam_fn(self): - """ """ - return self.meta.params.get_param('sample_bam_file') - - def get_target_region_coordinates(self): - """ """ - return self.meta.get_target_region_coordinates() - - def get_chr(self): - """ """ - return self.meta.chr - - def get_target_start(self): - """ """ - return self.meta.start - - def get_target_buffer(self): - """ """ - return self.meta.regionBuffer diff --git a/breakmer/assembly/olc.py b/breakmer/assembly/olc.py deleted file mode 100644 index 52b6047..0000000 --- a/breakmer/assembly/olc.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - -match_award = 1 -mismatch_penalty = -2 -gap_penalty = -2 - - -# Creates empty matrix with all zeros -def zeros(shape): - retval = [] - for x in range(shape[0]): - retval.append([]) - for y in range(shape[1]): - retval[-1].append(0) - return retval - - -# No substituition matrix, just simple linear gap penalty model -def match_score(alpha, beta): - if alpha == beta: - return match_award - elif alpha == '-' or beta == '-': - return gap_penalty - else: - return mismatch_penalty - - -def nw(seq1, seq2): - global max_score - # lengths of two sequences - m = len(seq1) - n = len(seq2) - - # Generate DP (Dynamic Programmed) table and traceback path pointer matrix - # the DP table - score = zeros((n + 1, m + 1)) - for i in range(n + 1): - score[i][0] = 0 - for j in range(m + 1): - score[0][j] = 0 - - # Traceback matrix - # to store the traceback path - pointer = zeros((n + 1, m + 1)) - for i in range(n + 1): - pointer[i][0] = 1 - for j in range(m + 1): - pointer[0][j] = 2 - - # Calculate DP table and mark pointers - for i in range(1, n + 1): - for j in range(1, m + 1): - score_diagonal = score[i - 1][j - 1] + match_score(seq1[j - 1], seq2[i - 1]) - score_up = score[i][j - 1] + gap_penalty - score_left = score[i - 1][j] + gap_penalty - score[i][j] = max(score_left, score_up, score_diagonal) - - if score[i][j] == score_diagonal: - # 3 means trace diagonal - pointer[i][j] = 3 - elif score[i][j] == score_up: - # 2 means trace left - pointer[i][j] = 2 - elif score[i][j] == score_left: - # 1 means trace up - pointer[i][j] = 1 - - # Finding the right-most match which represents a longest overlap - # Note that .index() will find the index of the first item in the list that matches, - # so if you had several identical "max" values, the index returned would be the one for the first. - max_i = -200 - for ii in range(n + 1): - if score[ii][-1] >= max_i: - max_i = score[ii][-1] - i = ii - - prei = i - prej = j - # Traceback, follow pointers in the traceback matrix - # initial sequences - align1, align2 = '', '' - while 1: - if pointer[i][j] == 3: - align1 = seq1[j - 1] + align1 - align2 = seq2[i - 1] + align2 - i -= 1 - j -= 1 - elif pointer[i][j] == 2: - # 2 means trace left - align2 = '-' + align2 - align1 = seq1[j - 1] + align1 - j -= 1 - elif pointer[i][j] == 1: - # 1 means trace up - align2 = seq2[i - 1] + align2 - align1 = '-' + align1 - i -= 1 - if (i == 0 or j == 0): - break - - return (align1, align2, prej, j, prei, i, max_i) - - -class Align: - """ - """ - def __init__(self, seq1, seq2): - self.seq1 = seq1 - self.seq2 = seq2 - self.align1 = None - self.align2 = None - self.prej = None - self.j = None - self.prei = None - self.i = None - self.max = None - self.ident = None - self.align() - - def align(self): - self.align1, self.align2, self.prej, self.j, self.prei, self.i, self.max = nw(self.seq1, self.seq2) - self.ident = round(float(self.max) / float(self.prej - self.j), 2) - - -class AlignManager: - """ - """ - def __init__(self, seq1, seq2, scoreThresh, identThresh): - self.seq1 = seq1 - self.seq2 = seq2 - self.scoreThresh = scoreThresh - self.identThresh = identThresh - self.aligns = None - self.align_seqs() - - def align_seqs(self): - """ - """ - self.aligns = (Align(self.seq1, self.seq2), Align(self.seq2, self.seq1)) - - def check_align_thresholds(self): - align1Check = self.aligns[0].max < self.scoreThresh or self.aligns[0].ident < self.identThresh - align2Check = self.aligns[1].max < self.scoreThresh or self.aligns[1].ident < self.identThresh - return align1Check and align2Check - - def same_seqs(self): - hitEnds = self.aligns[0].j == 0 and self.aligns[0].i == 0 - equalLens = len(self.seq1) == len(self.seq2) - return self.same_max_scores() and hitEnds and equalLens - - def same_max_scores(self): - return self.aligns[0].max == self.aligns[1].max - - def read_is_superseq(self): - """Check if seq2 is a superseq of seq1 - This function should be called based on the assumption that - seq1 is checked against seq2. - Args: None - Return: Boolean of check - """ - return len(self.seq1) < len(self.seq2) or (self.aligns[0].prej == len(self.seq1) and self.aligns[0].j == 0) - - def read_is_subseq(self): - """Check if seq2 is a subseq of seq1 - Args: None - Return: Boolean of check - """ - return len(self.seq1) > len(self.seq2) or (self.aligns[1].prej == len(self.seq2) and self.aligns[1].j == 0) - - def better_align(self): - """Check if align1 or align2 has a better score. - """ - return self.aligns[0].max > self.aligns[1].max - - def get_alignment(self, index): - """Return align object""" - return self.aligns[index] - - def get_alignment_values(self, index, value): - """Return a specific value from the alignment results - Args: - index: Integer index of the alignment - 0,1 - value: String value for the alignment value - i, prei - """ - returnVal = None - alignment = self.aligns[index] - if value == 'i': - returnVal = alignment.i - elif value == 'prei': - returnVal = alignment.prei - return returnVal - - def get_kmer_align_indices(self, align_index, kmer_seq): - """Return the alignment index of the kmer sequence with the sequences. - """ - return (self.aligns[align_index].align1.replace('-', '').find(kmer_seq), self.aligns[align_index].align2.replace('-', '').find(kmer_seq)) diff --git a/breakmer/assembly/utils.py b/breakmer/assembly/utils.py deleted file mode 100644 index 2272382..0000000 --- a/breakmer/assembly/utils.py +++ /dev/null @@ -1,82 +0,0 @@ -#! /usr/bin/local/python -# -*- coding: utf-8 -*- - -import re -import logging -from collections import OrderedDict - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -class Kmer: - """Class to track value associated with a particular kmer sequence. - Attributes: - - """ - def __init__(self, seq, counts, kmerSeqSet, kmerLen): - self.seq = seq - self.counts = counts - self.kmerSeqSet = kmerSeqSet - self.kmerLen = kmerLen - - -def find_reads(kmerSeq, readItems, usedReads, order='for'): - """Return a list of tuples containing information from reads with the kmer sequence. - First search all the read sequences for the given kmer sequence. Then, - filter out used reads and order them according to position of the kmer - sequence in the read sequence. - Args: - kmerSeq: String of kmer sequence. - readItems: List of fq_recs (key, value) tuples. - usedReads: Set of read IDs that have been previously used. - order: String indicating how the list of the identified reads - should be ordered. - Return: - kmerReads: List of tuples containing: - 1. read object, - 2. start position of kmer match in read seq - 3. Boolean that a match was found. - 4. Length of the read sequence. - 5. Number of reads with this sequence. - """ - kmerReads = [] - # Filter all the reads not containing the kmerSeq - mappedReads = filter(lambda x: x[2], map(read_search, [kmerSeq] * len(readItems), readItems)) - # Extract the read ids of the reads containing kmerSeq - mappedReadIds = map(lambda x: x[0].id, mappedReads) - filterIds = set(mappedReadIds) - set(usedReads) - matchedReads = filter(lambda x: (x[0].id in filterIds), mappedReads) - if order == 'rev': - kmerReads = sorted(matchedReads, key=lambda z: (-z[1], -z[3])) - else: - kmerReads = sorted(matchedReads, key=lambda z: (z[1], -z[3])) - return kmerReads - - -def read_search(kmerSeq, readItems): - """Return a tuple containing information regarding the alignment of the kmerSeq - in a sequence read. - This uses regex searching function re.search to determine if the kmerSeq - is contained in the read sequence. If so, then it returns a 5 element - tuple about information regarding this alignment. If no match, then return - a 3 element tuple with None values. - Args: - kmerSeq: String of kmer sequence. - readItems: List of fq_recs (key, value) tuples. - Return: - searchResult: Tuple of result information. - 1. read object, - 2. start position of kmer match in read seq - 3. Boolean that a match was found. - 4. Length of the read sequence. - 5. Number of reads with this sequence. - """ - searchResult = (None, None, None) - seq, reads = readItems - x = re.search(kmerSeq, seq) - if x: - searchResult = (reads[0], x.start(), True, len(reads[0].seq), len(reads)) - return searchResult diff --git a/breakmer/caller/__init__.py b/breakmer/caller/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/breakmer/caller/filter.py b/breakmer/caller/filter.py deleted file mode 100644 index dfe14e4..0000000 --- a/breakmer/caller/filter.py +++ /dev/null @@ -1,228 +0,0 @@ -#! /usr/bin/local/python -# -*- coding: utf-8 -*- - -import breakmer.utils as utils - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -class Filter: - """ - """ - - def __init(self): - self.name = None - self.svType = None - self.breakpoints = None - self.description = None - - def set_values(self, values): - name, svType, breakpoints, description = values - self.name = name.lower() - self.svType = svType - self.breakpoints = self.parse_breakpoints(breakpoints) - self.description = description - - def parse_breakpoints(self, breakpoints): - """ """ - breakpointList = breakpoints.split(',') - brkpts = [] - for breakpoint in breakpointList: - chrom, bps = breakpoint.split(':') - bpSplit = bps.split('-') - brkptList = [chrom] - for bp in bpSplit: - brkptList.append(bp) - brkpts.append(brkptList) - return brkpts - - -class ResultFilter: - """ - """ - - def __init__(self, filterFn, params): - self.loggingName = 'breakmer.caller.filter' - self.filterFn = filterFn - self.filters = [] - self.params = params - self.setup() - - def setup(self): - if self.filterFn: - for line in open(self.filterFn, 'rU'): - line = line.strip() - resultFilter = Filter() - resultFilter.set_values(line.split('\t')) - self.filters.append(resultFilter) - -# Filters for events -# 1. Contig complexity - # avg_comp, comp_vec = calc_contig_complexity(self.contig_seq) - # brkpt_rep_filt = False - # brkpt_rep_filt = brkpt_rep_filt or (comp_vec[qb[0]] < (avg_comp / 2)) - # brkpt_rep_filt = brkpt_rep_filt or (len(filter(lambda x: x, brkpts['f'])) > 0) - - def check_filters(self, svEvent): - """ - """ - if len(self.filters) > 0: - # Check if event is in the pre-defined filters - self.check_defined_filters(svEvent) - - # print 'check filters', svEvent.resultValues.svType, svEvent.resultValues.svSubtype - if svEvent.svType == 'indel': - self.filter_indel(svEvent) - elif svEvent.resultValues.svType == 'rearrangement': - if svEvent.resultValues.svSubtype == 'trl': - # print 'Checking trl filters' - self.filter_trl(svEvent) - else: - self.filter_rearr(svEvent) - - def check_defined_filters(self, svEvent): - """The event must match - 1. target name - 2. SV type (indel, trl, rearrangement_inversion, rearrangement_tandem_dup) - 3. breakpoints - 4. description - """ - for SVFilter in self.filters: - nameMatch = svEvent.contig.get_target_name().lower() == SVFilter.name - typeMatch = svEvent.svType == SVFilter.svType - eventBrkpts = svEvent.get_genomic_brkpts() - # print 'Event breakpoints', eventBrkpts - ebps = [] - for key in eventBrkpts: - ebps.extend(eventBrkpts[key]) - bpMatches = True - for eventBrkpt in ebps: - # Should be a tuple with chr, bp1, bp2 or chr, bp1 - # print 'Event breakpoint', eventBrkpt - match = False - for filterBrkpt in SVFilter.breakpoints: - if len(eventBrkpt) == len(filterBrkpt): - bpMatch = True - # Check chroms - bpMatch = eventBrkpt[0].replace('chr', '') == filterBrkpt[0].replace('chr', '') - if bpMatch: - for v1, v2 in zip(eventBrkpt[1:], filterBrkpt[1:]): - if int(v1) != int(v2): - bpMatch = False - break - if bpMatch: - match = True - break - bpMatches = bpMatches and match - # print nameMatch, typeMatch, bpMatches - if nameMatch and typeMatch and bpMatches: - svEvent.set_filtered('Matched input filter variant') - - def filter_indel(self, svEvent): - """ """ - indelSizeThresh = int(self.params.get_param('indel_size')) - utils.log(self.loggingName, 'info', 'Checking if blat result contains an indel variant') - blatResult = svEvent.blatResults[0][1] - keep_br = blatResult.valid and blatResult.alignFreq < 2 and blatResult.in_target and (blatResult.indel_maxevent_size[0] >= indelSizeThresh) - utils.log(self.loggingName, 'debug', 'Keep blat result %r' % keep_br) - - # Determine the uniqueness of the realignment. - svFilterValues = svEvent.resultValues.filterValues - uniqRealignment = svFilterValues.realignFreq < 2 - indelSize = svFilterValues.maxEventSize >= indelSizeThresh - brkptCoverages = svFilterValues.brkptCoverages[0] >= self.params.get_sr_thresh('indel') - minFlankMatches = min(svFilterValues.flankMatchPercents) >= 10.0 - - if uniqRealignment and indelSize and brkptCoverages and minFlankMatches: - utils.log(self.loggingName, 'debug', 'Indel meets basic filtering requirements.') - else: - utils.log(self.loggingName, 'debug', 'Indel filtered due to non-unique realignment (%r), less than input size threshold (%r), low coverage at breakpoints (%r), or contig edge realignment not long enough (%r), filter status set to True.' % (uniqRealignment, indelSize, brkptCoverages, minFlankMatches)) - filterReasons = [] - if not uniqRealignment: - filterReasons.append('Non-unique realignment (%d) > 2' % svFilterValues.realignFreq) - if not indelSize: - filterReasons.append('Max indel size (%d) is less than %d' % (svFilterValues.maxEventSize, indelSizeThresh)) - if not brkptCoverages: - filterReasons.append('Minimum coverage at breakpoints (%d) less than input threshold %d' % (svFilterValues.brkptCoverages[0], self.params.get_sr_thresh('indel'))) - if not minFlankMatches: - filterReasons.append('Minimum percentage of contig sequence that realigns to the reference to the left or right of the indel event less than 10.0 percent (%d)' % min(svFilterValues.flankMatchPercents)) - svEvent.set_filtered(','.join(filterReasons)) - - def filter_rearr(self, svEvent): - # in_ff, span_ff = filter_by_feature(brkpts, query_region, params.opts['keep_intron_vars']) - # filter = (min(brkpt_counts['n']) < params.get_sr_thresh('rearrangement')) or self.blatResultsSorted[0][1] < params.get_min_segment_length('rearr') or (in_ff and span_ff) or (disc_read_count < 1) or (rearr_type == 'rearrangement') or (min(brkpt_kmers) == 0) - svFilterValues = svEvent.resultValues.filterValues - # print self.params.get_sr_thresh('rearrangement') - # print svFilterValues.brkptCoverages - missingQueryCoverage = svFilterValues.missingQueryCoverage < self.params.get_min_segment_length('rearr') - brkptCoverages = svFilterValues.brkptCoverages[0] >= self.params.get_sr_thresh('rearrangement') - minSegmentLen = svFilterValues.minSegmentLen >= self.params.get_min_segment_length('rearr') - # discReadCount = svEvent.resultValues.discReadCount >= 0 - minBrkptKmers = svFilterValues.minBrkptKmers > 0 - - if brkptCoverages and minSegmentLen and minBrkptKmers: - utils.log(self.loggingName, 'info', 'Rearrangement meets basic filtering requirements.') - else: - filteredReasons = [] - if not missingQueryCoverage: - logMsg = 'No realignment for %d bases in the contig sequence, more than threshold %d.' % (svFilterValues.missingQueryCoverage, self.params.get_min_segment_length('rearr')) - utils.log(self.loggingName, 'info', logMsg) - filteredReasons.append(logMsg) - if not brkptCoverages: - logMsg = 'Minimum coverage at breakpoints (%d) less than input threshold %d.' % (svFilterValues.brkptCoverages[0], self.params.get_sr_thresh('rearrangement')) - utils.log(self.loggingName, 'info', logMsg) - filteredReasons.append(logMsg) - if not minSegmentLen: - logMsg = 'The minimum realigned segment length (%d) is less than the input threshold %d.' % (svFilterValues.minSegmentLen, self.params.get_min_segment_length('rearr')) - utils.log(self.loggingName, 'info', logMsg) - filteredReasons.append(logMsg) - # if not discReadCount: - # logMsg = 'The number of discordant read pairs supporting the event (%d) is less than 1.' % svEvent.resultValues.discReadCount - # utils.log(self.loggingName, 'info', logMsg) - # filteredReasons.append(logMsg) - if not minBrkptKmers: - logMsg = 'There were no variant kmers at the one or more of the breakpoint locations.' - utils.log(self.loggingName, 'info', logMsg) - filteredReasons.append(logMsg) - svEvent.set_filtered(','.join(filteredReasons)) - - def filter_trl(self, svEvent): - svFilterValues = svEvent.resultValues.filterValues - maxBrkptCoverages = svFilterValues.brkptCoverages[1] >= self.params.get_sr_thresh('trl') - minBrkptCoverages = svFilterValues.brkptCoverages[0] >= self.params.get_sr_thresh('trl') - discReadCount = svEvent.resultValues.discReadCount - minSegmentLen = svFilterValues.minSegmentLen >= self.params.get_min_segment_length('trl') - minBrkptKmers = svFilterValues.minBrkptKmers > 0 - minSeqComplexity = svFilterValues.seqComplexity >= 25.0 - startEndMissingQueryCoverage = svFilterValues.startEndMissingQueryCoverage <= 5.0 - maxSegmentOverlap = svFilterValues.maxSegmentOverlap < 5 - maxAlignFreq = max(svFilterValues.realignFreq) < 10 - nReadStrands = svFilterValues.nReadStrands > 1 - maxRealignmentGaps = svFilterValues.maxRealignmentGap - - strictFilter = [minSeqComplexity, startEndMissingQueryCoverage, minSegmentLen, maxRealignmentGaps, maxAlignFreq, nReadStrands] - nStrictFiltersFail = 0 - for f in strictFilter: - if not f: - nStrictFiltersFail += 1 - - # print 'max breakpoint coverages', maxBrkptCoverages - if not maxBrkptCoverages: - logMsg = 'Maximum breakpoint coverages (%d) did not meet input threshold %d.' % (svFilterValues.brkptCoverages[1], self.params.get_sr_thresh('trl')) - utils.log(self.loggingName, 'info', logMsg) - svEvent.set_filtered(logMsg) - else: - if discReadCount >= 2: - utils.log(self.loggingName, 'info', 'Translocation event passed all basic requirements. Filter set 1.') - elif discReadCount == 0 and nStrictFiltersFail <= 1: - utils.log(self.loggingName, 'info', 'Translocation event passed all basic requirements. Filter set 2.') - elif (discReadCount < 2 and discReadCount > 0) and (minSegmentLen and minBrkptCoverages and minBrkptKmers): - utils.log(self.loggingName, 'info', 'Translocation event passed all basic requirements. Filter set 3.') - else: - logMsg = 'Translocation failed to pass filters.' - filteredReasons = [logMsg] - utils.log(self.loggingName, 'info', logMsg) - svEvent.set_filtered(logMsg) diff --git a/breakmer/caller/sv_caller.py b/breakmer/caller/sv_caller.py deleted file mode 100644 index 795a1e6..0000000 --- a/breakmer/caller/sv_caller.py +++ /dev/null @@ -1,955 +0,0 @@ -#! /usr/bin/local/python -# -*- coding: utf-8 -*- - -import sys -import os -import math -import pysam -import breakmer.utils as utils - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -class FilterValues: - """ - """ - def __init__(self): - self.maxEventSize = None - self.realignFreq = None - self.brkptCoverages = None - self.flankMatchPercents = None - self.minSegmentLen = None - self.minBrkptKmers = None - self.seqComplexity = None - self.startEndMissingQueryCoverage = None - self.missingQueryCoverage = None - self.maxSegmentOverlap = None - # self.maxMeanCoverage = None - self.nReadStrands = None - self.maxRealignmentGap = None - self.deletedSeqs = None - self.insertedSeqs = None - - def set_indel_values(self, blatResult, brkptCoverages): - """ """ - self.realignFreq = blatResult.alignFreq - self.maxEventSize = blatResult.indel_maxevent_size[0] - self.deletedSeqs = blatResult.get_indel_seqs('del') - self.insertedSeqs = blatResult.get_indel_seqs('ins') - self.brkptCoverages = [min(brkptCoverages), max(brkptCoverages)] - self.flankMatchPercents = [] - for flankMatch in blatResult.indel_flank_match: - self.flankMatchPercents.append(round((float(flankMatch) / float(blatResult.get_seq_size('query'))) * 100, 2)) - - def set_trl_values(self, svEvent): - """ """ - blatResult = svEvent.blatResultsSorted[0][0] - breakpoints = svEvent.brkpts - self.minSegmentLen = blatResult.get_nmatch_total() - # Set the min to be the surrounding area of breakpoints, and max to be the direct breakpoints - self.brkptCoverages = [min(breakpoints.counts['n']), max(breakpoints.counts['d'])] - self.minBrkptKmers = min(breakpoints.kmers) - # Sequence complexity of the shortest blat aligned sequence - self.seqComplexity = svEvent.get_seq_complexity() - self.startEndMissingQueryCoverage = svEvent.get_startend_missing_query_coverage() - self.missingQueryCoverage = svEvent.get_missing_query_coverage() - self.maxSegmentOverlap = max(blatResult.seg_overlap) - self.nReadStrands = svEvent.check_read_strands() - self.maxRealignmentGap = max(blatResult.gaps.get_gap_sizes()) - # Use this value to determine the uniqueness of the realignment - self.realignFreq = svEvent.get_realign_freq() - - def set_rearr_values(self, svEvent): - """ """ - breakpoints = svEvent.brkpts - blatResult = svEvent.blatResultsSorted[0][0] - self.brkptCoverages = [min(breakpoints.counts['n']), max(breakpoints.counts['d'])] - self.minBrkptKmers = min(breakpoints.kmers) - self.minSegmentLen = blatResult.get_nmatch_total() - self.missingQueryCoverage = svEvent.get_missing_query_coverage() - self.maxSegmentOverlap = max(blatResult.seg_overlap) - self.realignFreq = svEvent.get_realign_freq() - - def get_formatted_output_values(self, svType, svSubtype): - """ """ - outputValues = {} - if svType == 'indel': - outputValues['maxeventSize'] = self.maxEventSize - outputValues['realignFreq'] = self.realignFreq - # Store the minimum value. - outputValues['breakpointCoverages'] = self.brkptCoverages[0] - outputValues['minSeqEdgeRealignmentPercent'] = min(self.flankMatchPercents) - outputValues['deletedSequences'] = self.deletedSeqs - outputValues['insertedSequences'] = self.insertedSeqs - elif svType == 'rearrangement': - outputValues['minBrkptKmers'] = self.minBrkptKmers - outputValues['minSegmentLen'] = self.minSegmentLen - outputValues['missingQueryCoverage'] = self.missingQueryCoverage - outputValues['maxSegmentOverlap'] = self.maxSegmentOverlap - outputValues['realignFreq'] = ",".join([str(x) for x in self.realignFreq]) - if svSubtype == 'trl': - outputValues['breakpointCoverages'] = ",".join([str(x) for x in self.brkptCoverages]) - outputValues['sequenceComplexity'] = self.seqComplexity - outputValues['startEndMissingQueryCoverage'] = self.startEndMissingQueryCoverage - outputValues['nReadStrands'] = self.nReadStrands - outputValues['maxRealignmentGapSize'] = self.maxRealignmentGap - - outputList = [] - for key, value in outputValues.items(): - outputList.append(key + '=' + str(value)) - return ';'.join(outputList) - - -class SVResult: - """ - """ - def __init__(self): - self.loggingName = 'breakmer.caller.sv_caller' - self.fullBreakpointStr = None - self.targetBreakpointStr = None - self.alignCigar = None - self.totalMismatches = None - self.strands = None - self.totalMatching = None - self.svType = '' - self.svSubtype = None - self.splitReadCount = None - self.nKmers = None - self.discReadCount = None - self.contigId = None - self.contigSeq = None - self.targetName = None - self.breakpointCoverageDepth = None - self.description = None - self.genes = None - self.repeatOverlapPercent = None - self.realignmentUniqueness = None - self.filtered = {'status': False, 'reason': []} - self.filterValues = FilterValues() - - def format_indel_values(self, svEvent): - """ - """ - - self.targetName = svEvent.contig.get_target_name() - self.contigSeq = svEvent.get_contig_seq() - self.contigId = svEvent.get_contig_id() - blatResult = svEvent.blatResults[0][1] - self.genes = blatResult.get_gene_anno() - self.repeatOverlapPercent = 0.0 - self.totalMatching = blatResult.get_nmatch_total() - self.realignmentUniqueness = blatResult.alignFreq - self.totalMismatches = blatResult.get_nmatches('mismatch') - self.strands = blatResult.strand - self.fullBreakpointStr = svEvent.get_brkpt_str('target') - self.targetBreakpointStr = svEvent.get_brkpt_str('target') - self.breakpointCoverageDepth = svEvent.get_brkpt_depths() - # List of insertion or deletion sizes that coorespond with the breakpoints - self.description = blatResult.indel_sizes - self.alignCigar = blatResult.cigar - self.svType = 'indel' - contigCountTracker = svEvent.contig.get_contig_count_tracker() - contigBrkpts = [] - for x in blatResult.breakpts.contigBreakpoints: - for bp in x: - contigBrkpts.append(bp) - self.splitReadCount = [contigCountTracker.get_counts(x, x, 'indel') for x in contigBrkpts] - self.filterValues.set_indel_values(blatResult, self.splitReadCount) - - def format_rearrangement_values(self, svEvent): - """ """ - utils.log(self.loggingName, 'info', 'Resolving SVs call from blat results') - # Sort the stored blat results by the number of matches to the reference sequence. - blatResSorted = sorted(svEvent.blatResults, key=lambda x: x[0]) - resultValid = {'valid': True, 'repeatValid': True} - maxRepeat = 0.0 - - self.totalMatching = [] - self.repeatOverlapPercent = [] - self.realignmentUniqueness = [] - self.genes = [] - self.alignCigar = [] - self.strands = [] - self.totalMismatches = [] - - for i, blatResultTuple in enumerate(blatResSorted): - blatResult = blatResultTuple[1] - resultValid['valid'] = resultValid['valid'] and blatResult.valid - maxRepeat = max(maxRepeat, blatResult.repeat_overlap) - self.repeatOverlapPercent.append(blatResult.repeat_overlap) - self.realignmentUniqueness.append(blatResult.alignFreq) - self.totalMatching.append(blatResult.get_nmatch_total()) - self.genes.append(blatResult.get_gene_anno()) - self.alignCigar.append(blatResult.cigar) - self.strands.append(blatResult.strand) - self.totalMismatches.append(blatResult.get_nmatches('mismatch')) - svEvent.brkpts.update_brkpt_info(blatResult, i, i == (len(blatResSorted) - 1)) - - # Sort the blatResultsSorted list by the lowest matching result to the highest matching result - svEvent.blatResultsSorted = sorted(svEvent.blatResultsSorted, key=lambda x: x[1]) - if svEvent.brkpts.diff_chr(): - # translocation event - # print 'sv_caller.py format_rearrangement_values(), set trl values', svEvent.contig.meta.id, svEvent.contig.seq - svEvent.set_brkpt_counts('trl') - self.discReadCount = svEvent.get_disc_read_count() - self.svType = 'rearrangement' - self.svSubtype = 'trl' - self.filterValues.set_trl_values(svEvent) - else: - svEvent.set_brkpt_counts('rearr') - self.svType, self.svSubtype, self.discReadCount = svEvent.define_rearr() - self.genes = list(set(self.genes)) - self.description = svEvent.rearrDesc - self.filterValues.set_rearr_values(svEvent) - self.realignmentUniqueness = self.filterValues.realignFreq - self.targetName = svEvent.contig.get_target_name() - self.fullBreakpointStr = svEvent.get_brkpt_str('all') - self.targetBreakpointStr = svEvent.get_brkpt_str('target') - self.breakpointCoverageDepth = svEvent.get_brkpt_depths() - self.splitReadCount = svEvent.get_splitread_count() - self.contigSeq = svEvent.get_contig_seq() - self.contigId = svEvent.get_contig_id() - - def set_filtered(self, filterReason): - """ """ - self.filtered['status'] = True - self.filtered['reason'].append(filterReason) - - def get_old_formatted_output_values(self): - """ """ - headerStr = ['genes', - 'target_breakpoints', - 'align_cigar', - 'mismatches', - 'strands', - 'rep_overlap_segment_len', - 'sv_type', - 'split_read_count', - 'nkmers', - 'disc_read_count', - 'breakpoint_coverages', - 'contig_id', - 'contig_seq' - ] - - brkptStr = ','.join([str(x) for x in item]) - if self.svType == 'indel': - brkptStr += ' (' + ','.join([str(x) for x in self.descript]) + ')' - - repOverlap_segLen_hitFreq = [] - for i in self.totalMatching: - repOverlap_segLen_hitFreq.append('0.0:' + str(matchLen) + ':0.0') - - nkmers = '0' - - outList = [self.targetName, - self.brkptStr, - self.alignCigar, - self.totalMismatches, - self.strands, - repOverlap_segLen_hitFreq, - self.svType, - self.splitReadCount, - nkmers, - self.discReadCount, - self.breakpointCoverageDepth, - self.contigId, - self.contigSeq, - ] - - outListStr = [] - for item in outList: - if not isinstance(item, list): - outListStr.append(str(item)) - else: - outListStr.append(','.join([str(x) for x in item])) - - formattedFilterValsStr = self.filterValues.get_formatted_output_values(self.svType, self.svSubtype) - outListStr.append(formattedFilterValsStr) - return ('\t'.join(headerStr), '\t'.join(outListStr)) - - def get_formatted_output_values(self): - """ """ - headerStr = ['Target_Name', - 'SV_type', - 'SV_subtype', - 'Description', - 'All_genomic_breakpoints', - 'Target_genomic_breakpoints', - 'Split_read_counts', - 'Discordant_read_counts', - 'Read_depth_at_genomic_breakpoints', - 'Align_cigar', - 'Strands', - 'Total_mismatches', - 'Total_matching', - 'Realignment_uniqueness', - 'Contig_ID', - 'Contig_length', - 'Contig_sequence', - 'Filtered', - 'Filtered_reason', - 'Filter_values' - ] - - outList = [self.targetName, - self.svType, - self.svSubtype, - self.description, - self.fullBreakpointStr, - self.targetBreakpointStr, - self.splitReadCount, - self.discReadCount, - self.breakpointCoverageDepth, - self.alignCigar, - self.strands, - self.totalMismatches, - self.totalMatching, - self.realignmentUniqueness, - self.contigId, - len(self.contigSeq), - self.contigSeq, - self.filtered['status'], - ','.join(self.filtered['reason']) - ] - - outListStr = [] - for item in outList: - if not isinstance(item, list): - outListStr.append(str(item)) - else: - outListStr.append(','.join([str(x) for x in item])) - - formattedFilterValsStr = self.filterValues.get_formatted_output_values(self.svType, self.svSubtype) - outListStr.append(formattedFilterValsStr) - return ('\t'.join(headerStr), '\t'.join(outListStr)) - - def is_filtered(self): - """ """ - return self.filtered['status'] - - -class SVBreakpoints: - def __init__(self): - self.loggingName = 'breakmer.caller.sv_caller' - self.t = {'target': None, 'other': None} - self.formatted = [] - self.r = [] - self.q = [[0, 0], []] - self.chrs = [] - self.brkptStr = [] - self.tcoords = [] - self.qcoords = [] - self.f = [] - self.counts = {'n': [], 'd': [], 'b': []} - self.kmers = [] - # Standard format for storing genomic breakpoints for outputtting rsults - # List of tuples containing ('chr#', bp1, bp2), there will be multiple bp for deletions and - # only one bp for insertions or rearrangment breakpoints. - self.genomicBrkpts = {'target': [], 'other': [], 'all': []} - - def update_brkpt_info(self, br, i, last_iter): - """Infer the breakpoint information from the blat result for rearrangments. - """ - chrom = 'chr' + br.get_seq_name('ref') - ts, te = br.get_coords('ref') - qs, qe = br.get_coords('query') - targetKey = 'target' if br.in_target else 'other' - self.chrs.append(br.get_seq_name('ref')) - self.tcoords.append((ts, te)) - self.qcoords.append((qs, qe)) - tbrkpt = [] - filt_rep_start = None - if i == 0: - self.q[0] = [max(0, qs - 1), qe] - self.q[1].append([qe, qe - self.q[0][0], None]) - tbrkpt = [te] - filt_rep_start = br.filter_reps_edges[0] - if br.strand == '-': - tbrkpt = [ts] - filt_rep_start = br.filter_reps_edges[0] - self.genomicBrkpts[targetKey].append((chrom, tbrkpt[0])) - self.genomicBrkpts['all'].append((chrom, tbrkpt[0])) - br.set_sv_brkpt((chrom, tbrkpt[0]), 'rearrangement', targetKey) - elif last_iter: - self.q[1][-1][2] = qe - self.q[1][-1][0] - self.q[1].append([qs, qs - self.q[0][0], qe - qs]) - tbrkpt = [ts] - filt_rep_start = br.filter_reps_edges[0] - if br.strand == '-': - tbrkpt = [te] - filt_rep_start = br.filter_reps_edges[1] - self.genomicBrkpts[targetKey].append((chrom, tbrkpt[0])) - self.genomicBrkpts['all'].append((chrom, tbrkpt[0])) - br.set_sv_brkpt((chrom, tbrkpt[0]), 'rearrangement', targetKey) - else: - self.q[1][-1][2] = qe - self.q[1][-1][1] - self.q[1].append([qs, qs - self.q[0][0], qe - qs]) - self.q[1].append([qe, qe - qs, None]) - self.q[0] = [qs, qe] - tbrkpt = [ts, te] - self.genomicBrkpts[targetKey].append((chrom, ts, te)) - self.genomicBrkpts['all'].append((chrom, ts, te)) - if br.strand == '+': - br.set_sv_brkpt((chrom, ts, te), 'rearrangement', targetKey) - if br.strand == '-': - filt_rep_start = br.filter_reps_edges[1] - tbrkpt = [te, ts] - self.genomicBrkpts[targetKey].append((chrom, te, ts)) - self.genomicBrkpts['all'].append((chrom, te, ts)) - br.set_sv_brkpt((chrom, te, ts), 'rearrangement', targetKey) - - self.brkptStr.append('chr' + str(br.get_seq_name('ref')) + ":" + "-".join([str(x) for x in tbrkpt])) - self.r.extend(tbrkpt) - self.f.append(filt_rep_start) - self.t[targetKey] = (br.get_seq_name('ref'), tbrkpt[0]) - self.formatted.append('chr' + str(br.get_seq_name('ref')) + ":" + "-".join([str(x) for x in tbrkpt])) - - def set_indel_brkpts(self, blatResult): - """ """ - # List of tuples for indel breakpoints parsed from the blat result ('chr#', bp1, bp2) - self.genomicBrkpts['target'] = blatResult.get_genomic_brkpts() - for brkpt in self.genomicBrkpts['target']: - blatResult.set_sv_brkpt(brkpt, 'indel', 'target') - - def diff_chr(self): - """Determine if the stored realignment results are on multiple chromosomes - indicating a - translocation event. - """ - # print 'Rearr chrs', self.chrs, len(set(self.chrs)) - if len(set(self.chrs)) == 1: - return False - else: - return True - - def get_target_brkpt(self, key): - """ """ - return self.genomicBrkpts['target'] # target[key] - - def get_brkpt_str(self, targetKey): - """ """ - if targetKey is None: - brkptStr = ','.join(self.brkptStr) # self.genomicBrkpts['all'] - # for key in self.genomicBrkpts: - # outStr = self.get_brkpt_str(key) - # if brkptStr == '': - # brkptStr = outStr - # elif outStr != '': - # brkptStr += ',' + outStr - return brkptStr - else: - brkptStr = [] - for genomicBrkpts in self.genomicBrkpts[targetKey]: - chrom = genomicBrkpts[0] - bps = genomicBrkpts[1:] - brkptStr.append(chrom + ':' + '-'.join([str(x) for x in bps])) - return ','.join(brkptStr) - - def get_brkpt_depths(self, sampleBamFn): - """ """ - depths = [] - bamfile = pysam.Samfile(sampleBamFn, 'rb') - for genomicBrkpt in self.genomicBrkpts['all']: - chrom = genomicBrkpt[0].strip('chr') - bps = genomicBrkpt[1:] - for bp in bps: - alignedDepth = 0 - alignedReads = bamfile.fetch(str(chrom), int(bp), int(bp) + 1) - for alignedRead in alignedReads: - if alignedRead.is_duplicate or alignedRead.is_qcfail or alignedRead.is_unmapped or alignedRead.mapq < 10: - continue - alignedDepth += 1 - depths.append(alignedDepth) - return depths - - def get_splitread_count(self): - """ """ - return self.counts['b'] - - def set_counts(self, svType, contig): - """ """ - - contigCountTracker = contig.get_contig_count_tracker() - # print 'SV Breakpoint object set_counts(), self.q', self.q - # print 'self.tcoords', self.tcoords - # print 'self.qcoords', self.qcoords - # print 'self.counts', self.counts - for qb in self.q[1]: - left_idx = qb[0] - min(qb[1], 5) - right_idx = qb[0] + min(qb[2], 5) - # print 'qb', qb - # print 'left idx', left_idx - # print 'right idx', right_idx - bc = contigCountTracker.get_counts(left_idx, right_idx, svType) - self.counts['n'].append(min(bc)) - self.counts['d'].append(min(contigCountTracker.get_counts((qb[0] - 1), (qb[0] + 1), svType))) - self.counts['b'].append(contigCountTracker.get_counts(qb[0], qb[0], svType)) - self.kmers.append(contig.get_kmer_locs()[qb[0]]) - utils.log(self.loggingName, 'debug', 'Read count around breakpoint %d : %s' % (qb[0], ",".join([str(x) for x in bc]))) - # print 'Stored counts', self.counts - utils.log(self.loggingName, 'debug', 'Kmer count around breakpoints %s' % (",".join([str(x) for x in self.kmers]))) - - -class SVEvent: - def __init__(self, blatResult, contig, svType): - self.loggingName = 'breakmer.caller.sv_caller' - self.svType = svType - self.svSubtype = '' - self.events = [] - self.blatResults = [] - self.blatResultsSorted = [] - self.annotated = False - self.failed_annotation = False - self.qlen = 0 - self.nmatch = 0 - self.in_target = False - self.contig = contig - self.valid = True - self.in_rep = True - self.querySize = None - self.queryCoverage = [0] * len(contig.seq) - self.brkpts = SVBreakpoints() - self.rearrDesc = None - self.resultValues = SVResult() - self.add(blatResult) - - def add(self, blatResult): - queryStartCoord = blatResult.alignVals.get_coords('query', 0) - queryEndCoord = blatResult.alignVals.get_coords('query', 1) - self.blatResults.append((queryStartCoord, blatResult)) - - # Add the number of hits to the query region - for i in range(queryStartCoord, queryEndCoord): - self.queryCoverage[i] += 1 - if not self.querySize: - self.querySize = blatResult.get_seq_size('query') - self.qlen += blatResult.get_query_span() - self.nmatch += blatResult.get_nmatch_total() - self.in_target = self.in_target or blatResult.in_target - self.in_rep = self.in_rep and (blatResult.repeat_overlap > 75.0) - self.valid = self.valid and blatResult.valid - self.blatResultsSorted.append((blatResult, blatResult.get_nmatch_total())) - - def result_valid(self): - valid = False - if (len(self.blatResults) > 1) and self.in_target: - valid = True - return valid - - def check_annotated(self): - """ """ - return self.annotated and not self.failed_annotation - - def has_annotations(self): - """ """ - return self.annotated - - def get_genomic_brkpts(self): - """ """ - - return self.brkpts.genomicBrkpts - - def check_previous_add(self, br): - ncoords = br.get_coords('query') - prev_br, prev_nmatch = self.blatResultsSorted[-1] - prev_coords = prev_br.get_coords('query') - if ncoords[0] == prev_coords[0] and ncoords[1] == prev_coords[1]: - n_nmatch = br.get_nmatch_total() - if abs(prev_nmatch - n_nmatch) < 10: - if not prev_br.in_target and br.in_target: - self.blatResultsSorted[-1] = (br, n_nmatch) - self.blatResults[-1] = (ncoords[0], br) - self.in_target = True - - def format_indel_values(self): - """ - """ - self.brkpts.set_indel_brkpts(self.blatResults[0][1]) - self.resultValues.format_indel_values(self) - - def format_rearr_values(self): - """ - """ - self.resultValues.format_rearrangement_values(self) - - def get_disc_read_count(self): - """Get the number of discordant read pairs that contribute evidence to a detected translocation - event between a target region and another genomic location. - - It calls the check_inter_readcounts in breakmer.processor.bam_handler module with the target and - 'other' breakpoints. - - Args: - None - Returns: - discReadCount (int): The number of discordant read pairs that support a detected event with - specified breakpoints. - - This needs to deal with the situation below where the are more than two realignment results. - In this general scenario, the target breakpoint nearest the non-target breakpoint needs to be - passed to the check_inter_readcounts function. - - Example 1: - [blatResult1 (target), blatResult2 (non-target)] - most common scenario. - - Example 2: - [blatResult1 (target), blatResult2 (target), blatResult3 (non-target)] - """ - - # Sort the blat results by lowest to highest query coordinate value. - querySortedResults = sorted(self.blatResults, key=lambda x: x[0]) - inTarget = [None, None] # Tracks the in_target state of the last realignment result and the breakpoint of that result. - targetBrkpt = None # Track the target breakpoint nearest the non-target breakpoint result. - - # Iterate through realignment results starting with the lowest query coordinate hit. - # If there is a state change for in_target status between the last result and the current result, - # then store the in_target breakpoint. - for resultTuple in querySortedResults: - result = resultTuple[1] - if inTarget[0] is None: - inTarget = [result.in_target, result.tend()] - else: - if result.in_target != inTarget[0]: - targetBrkpt = inTarget[1] - if result.in_target: - targetBrkpt = result.tstart() - break - - varReads = self.contig.get_var_reads('sv') - discReadCount = 0 - # print self.get_genomic_brkpts()['target'][0] - targetBrkptValues = self.get_genomic_brkpts()['target'][0] - discReadCount = varReads.check_inter_readcounts(targetBrkptValues[0], targetBrkpt, self.get_genomic_brkpts()['other']) - return discReadCount - - def get_brkpt_str(self, targetKey=None): - """ """ - return self.brkpts.get_brkpt_str(targetKey) - - def get_brkpt_depths(self): - """ - """ - return self.brkpts.get_brkpt_depths(self.contig.get_sample_bam_fn()) - - def get_splitread_count(self): - """ """ - return self.brkpts.get_splitread_count() - - def set_filtered(self, filterReason): - """ """ - self.resultValues.set_filtered(filterReason) - - def get_missing_query_coverage(self): - """ """ - return len(filter(lambda y: y, map(lambda x: x == 0, self.queryCoverage))) - - def get_formatted_output_values(self): - """ """ - return self.resultValues.get_formatted_output_values() - - def get_contig_seq(self): - """ """ - return self.contig.seq - - def get_contig_id(self): - """ """ - return self.contig.get_id() - - def set_brkpt_counts(self, svType): - """ """ - self.brkpts.set_counts(svType, self.contig) - - def check_overlap(self, coord1, coord2): - contained = False - if coord1[0] >= coord2[0] and coord1[1] <= coord2[1]: - contained = True - elif coord2[0] >= coord1[0] and coord2[1] <= coord1[1]: - contained = True - return contained - - def which_rearr(self, varReads, tcoords, qcoords, strands, brkpts): - rearrValues = {'discReadCount': None, 'svType': 'rearrangement', 'svSubType': None, 'hit': False} - if not self.check_overlap(tcoords[0], tcoords[1]): - utils.log(self.loggingName, 'debug', 'Checking rearrangement svType, strand1 %s, strand2 %s, breakpt1 %d, breakpt %d' % (strands[0], strands[1], brkpts[0], brkpts[1])) - if (strands[0] != strands[1]): # and (brkpts[0] < brkpts[1]): - # Inversion - # Get discordantly mapped read-pairs - utils.log(self.loggingName, 'debug', 'Inversion event identified.') - rearrValues['hit'] = True - rearrValues['svSubType'] = 'inversion' - rearrValues['discReadCount'] = varReads.check_inv_readcounts(brkpts) - elif (strands[0] == strands[1]): - tgap = brkpts[1] - brkpts[0] - qgap = qcoords[1][0] - qcoords[0][1] - if tgap < 0: - utils.log(self.loggingName, 'debug', 'Tandem duplication event identified.') - rearrValues['hit'] = True - rearrValues['svSubType'] = 'tandem_dup' - rearrValues['discReadCount'] = varReads.check_td_readcounts(brkpts) - elif tgap > qgap: - # Gapped deletion from Blast result - utils.log(self.loggingName, 'debug', 'Deletion event identified.') - rearrValues['hit'] = True - rearrValues['svType'] = 'indel' - rearrValues['indelSize'] = 'D' + str(tgap) - else: - # Gapped insertion from Blast result - utils.log(self.loggingName, 'debug', 'Insertion event identified.') - rearrValues['hit'] = True - rearrValues['svType'] = 'indel' - rearrValues['indelSize'] = 'I' + str(qgap) - return rearrValues - - def define_rearr(self): - """ """ - varReads = self.contig.get_var_reads('sv') - strands = self.resultValues.strands - brkpts = self.brkpts.r - tcoords = self.brkpts.tcoords - qcoords = self.brkpts.qcoords - svType = 'rearrangement' - svSubType = None - rs = 0 - hit = False - rearrHits = {} - for i in range(1, len(self.blatResults)): - vals = self.which_rearr(varReads, tcoords[(i - 1):(i + 1)], qcoords[(i - 1):(i + 1)], strands[(i - 1):(i + 1)], brkpts[(i - 1):(i + 1)]) - if vals['hit']: - if vals['svType'] not in rearrHits: - rearrHits[vals['svType']] = [] - rearrHits[vals['svType']].append(vals) - - if 'rearrangement' not in rearrHits: - utils.log(self.loggingName, 'debug', 'Error in realignment parsing. Indel found without rearrangement event.') - - rearrHit = False - for rearr in rearrHits: - for i, rr in enumerate(rearrHits[rearr]): - if rearr == 'rearrangement': - if not rearrHit: - svSubType = rearrHits[rearr][i]['svSubType'] - rs = int(rearrHits[rearr][i]['discReadCount']) - rearrHit = True - else: - svSubType = None - if self.rearrDesc is None: - self.rearrDesc = [svSubType] - self.rearrDesc.append(rearrHits[rearr][i]['svSubType']) - else: - if self.rearrDesc is None: - self.rearrDesc = [] - self.rearrDesc.append(rearrHits[rearr][i]['indelSize']) - - if svSubType is None: - utils.log(self.loggingName, 'debug', 'Not inversion or tandem dup, checking for odd read pairs around breakpoints') - rs = varReads.check_other_readcounts(brkpts) - - return svType, svSubType, rs - - def get_max_meanCoverage(self): - """Return the highest mean hit frequency among all blat results stored. - """ - maxAlignFreq = 0 - for blatResult, nBasesAligned in self.blatResultsSorted: - if int(blatResult.alignFreq) > int(maxAlignFreq): - maxAlignFreq = int(blatResult.alignFreq) - - def get_realign_freq(self): - """ - """ - realignFreqs = [] - for blatResult, nBasesAligned in self.blatResultsSorted: - realignFreqs.append(int(blatResult.alignFreq)) - return realignFreqs - - def check_read_strands(self): - """ - """ - same_strand = False - strands = [] - for read in self.contig.reads: - strand = read.id.split("/")[1] - strands.append(strand) - if len(set(strands)) == 1: - same_strand = True - utils.log(self.loggingName, 'debug', 'Checking read strands for contig reads %s' % (",".join([read.id for read in self.contig.reads]))) - utils.log(self.loggingName, 'debug', 'Reads are on same strand: %r' % same_strand) - return len(set(strands)) - - def get_seq_complexity(self): - """Get the 3-mer complexity of the shortest aligned blat sequence. - """ - blatResult, nBasesAligned = self.blatResultsSorted[0] - alignedSeq = self.contig.seq[blatResult.qstart():blatResult.qend()] - merSize = 3 - utils.log(self.loggingName, 'debug', 'Checking sequence complexity of blat result segment %s using %d-mers' % (alignedSeq, merSize)) - nmers = {} - totalMersPossible = len(alignedSeq) - 2 - for i in range(len(alignedSeq) - (merSize - 1)): - nmers[str(alignedSeq[i:i + merSize]).upper()] = True - complexity = round((float(len(nmers)) / float(totalMersPossible)) * 100, 4) - utils.log(self.loggingName, 'debug', 'Complexity measure %f, based on %d unique %d-mers observed out of a total of %d %d-mers possible' % (complexity, len(nmers), merSize, totalMersPossible, merSize)) - return complexity - - def get_startend_missing_query_coverage(self): - """Calculate the percentage of the contig sequence that is not realigned to the reference, only examining the - beginning and end of the contig sequence. - """ - missingCov = 0 - for i in self.queryCoverage: - if i == 0: - missingCov += 1 - else: - break - for i in reversed(self.queryCoverage): - if i == 0: - missingCov += 1 - else: - break - percentMissing = round((float(missingCov) / float(len(self.contig.seq))) * 100, 4) - utils.log(self.loggingName, 'debug', 'Calculated %f missing coverage of blat query sequence at beginning and end' % percentMissing) - return percentMissing - - def is_filtered(self): - """""" - return self.resultValues.is_filtered() - - def set_filtered(self, filterReason): - """ """ - self.resultValues.set_filtered(filterReason) - - def set_annotations(self): - """ """ - self.annotated = True - - def set_failed_annotation(self): - """ """ - self.failed_annotation = True - - -class ContigCaller: - """ - """ - def __init__(self, realignment, contig, params): - self.realignment = realignment - self.contig = contig - self.params = params - self.clippedQs = [] - self.svEvent = None - self.loggingName = 'breakmer.caller.sv_caller' - - def call_svs(self): - """ """ - - if not self.realignment.has_results(): - utils.log(self.loggingName, 'info', 'No blat results file exists, no calls for %s.' % self.contig.get_id()) - else: - utils.log(self.loggingName, 'info', 'Making variant calls from blat results %s' % self.realignment.get_result_fn()) - if self.check_indels(): - self.svEvent.format_indel_values() - elif self.check_svs(): - self.svEvent.format_rearr_values() - return self.svEvent - - def check_indels(self): - """ """ - hasIndel = False - blatResults = self.realignment.get_blat_results() - for i, blatResult in enumerate(blatResults): - if i == 0 and blatResult.check_indel(len(blatResults)): - hasIndel = True - utils.log(self.loggingName, 'info', 'Contig has indel, returning %r' % hasIndel) - self.svEvent = SVEvent(blatResult, self.contig, 'indel') - return hasIndel - else: - utils.log(self.loggingName, 'debug', 'Storing clipped blat result start %d, end %d' % (blatResult.qstart(), blatResult.qend())) - self.clippedQs.append((blatResult.qstart(), blatResult.qend(), blatResult, i)) - utils.log(self.loggingName, 'info', 'Contig does not have indel, return %r' % hasIndel) - return hasIndel - - def check_svs(self): - """ """ - utils.log(self.loggingName, 'info', 'Checking for SVs') - gaps = [(0, self.realignment.get_qsize())] - if len(self.clippedQs) > 1: - utils.log(self.loggingName, 'debug', 'Iterating through %d clipped blat results.' % len(self.clippedQs)) - mergedClip = [0, None] - for i, clippedQs in enumerate(self.clippedQs): - qs, qe, blatResult, idx = clippedQs - utils.log(self.loggingName, 'debug', 'Blat result with start %d, end %d, chrom %s' % (qs, qe, blatResult.get_seq_name('ref'))) - gaps = self.iter_gaps(gaps, self.clippedQs[i], i) - if self.svEvent.qlen > mergedClip[0]: - mergedClip = [self.svEvent.qlen, self.svEvent] - self.svEvent = mergedClip[1] - else: - utils.log(self.loggingName, 'info', 'There are no more than 1 clipped blat results, not continuing with SVs calling.') - if self.svEvent and self.svEvent.result_valid(): - return True - else: - self.svEvent = None - return False - - def iter_gaps(self, gaps, clippedQuerySeqVals, iterIdx): - """ """ - new_gaps = [] - qs, qe, blatResult, idx = clippedQuerySeqVals - hit = False - for gap in gaps: - gs, ge = gap - utils.log(self.loggingName, 'debug', 'Gap coords %d, %d' % (gs, ge)) - startWithinGap = (qs >= gs and qs <= ge) - endWithinGap = (qe <= ge and qe >= gs) - gapEdgeDistStart = (qs <= gs) and ((gs - qs) < 15) - gapEdgeDistEnd = (qe >= ge) and ((qe - ge) < 15) - if startWithinGap or endWithinGap or (gapEdgeDistStart and (endWithinGap or gapEdgeDistEnd)) or (gapEdgeDistEnd and (startWithinGap or gapEdgeDistStart)): - ngap = [] - if qs > gs: - if (qs - 1 - gs) > 10: - ngap.append((gs, qs - 1)) - if qe < ge: - if (ge - qe + 1) > 10: - ngap.append((qe + 1, ge)) - if iterIdx == 0: - utils.log(self.loggingName, 'debug', 'Creating SV event from blat result with start %d, end %d' % (qs, qe)) - self.svEvent = SVEvent(blatResult, self.contig, 'rearrangement') - new_gaps.extend(ngap) - hit = True - elif self.check_add_br(qs, qe, gs, ge, blatResult): - utils.log(self.loggingName, 'debug', 'Adding blat result to event') - new_gaps.extend(ngap) - self.svEvent.add(blatResult) - hit = True - else: - new_gaps.append(gap) - else: - new_gaps.append(gap) - utils.log(self.loggingName, 'debug', 'New gap coords %s' % (",".join([str(x) for x in new_gaps]))) - if not hit: - self.svEvent.check_previous_add(blatResult) - return new_gaps - - def check_add_br(self, qs, qe, gs, ge, blatResult): - """ """ - utils.log(self.loggingName, 'info', 'Checking to add blat result with start %d, end %d' % (qs, qe)) - add = False - # Calc % of segment overlaps with gap - over_perc = round((float(min(qe, ge) - max(qs, gs)) / float(qe - qs)) * 100) - # Check overlap with other aligned segments - ov_right = 0 - if qe > ge: - ov_right = abs(qe - ge) - ov_left = 0 - if qs < gs: - ov_left = abs(qs - gs) - blatResult.set_segment_overlap(ov_left, ov_right) - max_seg_overlap = max(ov_right, ov_left) - utils.log(self.loggingName, 'debug', 'Blat query segment overlaps gap by %f' % over_perc) - utils.log(self.loggingName, 'debug', 'Max segment overlap %f' % max_seg_overlap) - utils.log(self.loggingName, 'debug', 'Event in target %r and blat result in target %r' % (self.svEvent.in_target, blatResult.in_target)) - if over_perc >= 50 and (max_seg_overlap < 15 or (blatResult.in_target and self.svEvent.in_target)): - add = True - utils.log(self.loggingName, 'debug', 'Add blat result to SV event %r' % add) - return add diff --git a/breakmer/params.py b/breakmer/params.py deleted file mode 100644 index 093b659..0000000 --- a/breakmer/params.py +++ /dev/null @@ -1,539 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os -import sys -import logging -import random -import subprocess -import time -import pysam -import shutil -import breakmer.utils as utils -import breakmer.caller.filter as resultfilter - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -class ParamManager: - """ParamManager class stores all the input specifications provided to the program to run. These include - file paths, thresholds, directories, etc... - - Attributes: - opts (dict): Containing parameter options and input values as key-values. - gene_annotations (Annotation): Tracks the annotation information. - targets (dict): Target region coordinates, key-values. - paths (dict): Dictionary containing the top level directories for the analysis output. - logging_name (str): Logging string name object for logging messages. - """ - - def __init__(self, arguments): - """Initialize ParamManager class. - - Args: - fncCmd (str): The command to execute - run / prepare_reference_data / start_blat_server. - arguments (dict): The argparse dictionary object from the command line parameters. - Returns: - None - Raises: - None - """ - - self.loggingName = 'breakmer.params' - self.opts = {} - self.filter = None - self.targets = {} - self.paths = {} - self.fncCmd = arguments.fncCmd - self.set_params(arguments) - - def set_params(self, arguments): - """Organize and format all input parameters into class variables to access - later. Specific instances of parameters are checked and set. All parameters that are - set are logged. The target objects are set along with the paths. - - Args: - arguments (dict): The argparse dictionary object from the command line options. - Returns: - None - Raises: - None - """ - - self.parse_opts(arguments) # Parse the config file and command line parameters into the self.opts dictionary. - utils.setup_logger(self.get_param('analysis_dir', True), 'breakmer') # Create logging object. - utils.log(self.loggingName, 'info', 'Setting up parameters') - - # Log all parameters passed in, warn for poor paths - for paramKey, paramValue in self.opts.items(): - utils.log(self.loggingName, 'info', '%s = %s' % (paramKey, paramValue)) - - self.set_targets() - self.paths['ref_data'] = os.path.abspath(os.path.normpath(self.opts['reference_data_dir'])) # Path to target reference sequence fast files. - self.set_param('reference_fasta_dir', os.path.split(self.opts['reference_fasta'])[0]) # Path to genome fasta file. - - # If only preseting the reference data no need to continue. - if self.fncCmd == 'prepare_reference_data': - self.set_insertsize_thresh() # Set the expected insert size threshold from the properly mapped read pairs. - utils.log(self.loggingName, 'info', 'Preset reference data option set! Only the reference data directory will be setup.') - return - - # Setup directories - self.paths['analysis'] = os.path.abspath(os.path.normpath(self.opts['analysis_dir'])) - self.paths['output'] = os.path.join(self.paths['analysis'], 'output') - if 'targets_dir' in self.opts: - self.paths['targets'] = os.path.abspath(os.path.normpath(self.opts['targets_dir'])) - else: - self.paths['targets'] = os.path.join(self.paths['analysis'], 'targets') - - # Create all the paths. - for path in self.paths: - utils.log(self.loggingName, 'info', 'Creating %s directory (%s)' % (path, self.paths[path])) - if not os.path.exists(self.paths[path]): - os.makedirs(self.paths[path]) - - # If starting the blat server then return. - if self.fncCmd == 'start_blat_server': - utils.log(self.loggingName, 'info', 'Starting the blat server.') - return - - self.check_binaries() # Check if Jellyfish and Cutadapt work. - self.filter = resultfilter.ResultFilter(self.get_param('filterList'), self) # Instantiate the filter class. - self.set_insertsize_thresh() # Set the expected insert size threshold from the properly mapped read pairs. - - def parse_opts(self, arguments): - """Formats input parameters into self.opts dictionary. It first parses the configuration file and stores the key, values in the self.opts dictionary. - It will exit with an error if the configuration file does not have lines in the proper format (i.e., key=value). - It will also iterate through the command line paramaters and store the keys and values in the opts dictionary. - A final check is performed for the required parameters depending on the parameters that have been passed. - - Sanity check for required params - Required when preset_ref_data = True: - - reference_data_dir - - reference_fasta - - targets_bed_file - - Required when preset_ref_data = False - - analysis_name - - targets_bed_file - - sample_bam_file - - analysis_dir - - reference data_dir - - cutadapt_config_file - - reference_fasta - - gene_annotation_file - - Args: - arguments (dict): The argparse dictionary object from the command line options. - Returns: - None - Raises: - None - """ - - for line in open(arguments.config_fn, 'rU'): - line = line.strip() - if line == '' or line.find('#') > -1: # Allow for blank lines and comments - continue - linesplit = line.split("=") - if len(linesplit) == 1: # Make sure the lines in the configuration file are set properly. - err_msg = 'Config line', line, ' not set correctly. Exiting.' - print err_msg - utils.log(self.loggingName, 'error', err_msg) - sys.exit(1) - else: - key, value = linesplit - self.set_param(key, value) # Store key-value in opts dictionary. - - # Store all the arguments into the self.opts dictionary. - for opt in vars(arguments): - if (self.get_param(opt) is not None) and (vars(arguments)[opt] is None): - utils.log(self.loggingName, 'info', 'Parameter %s is set in config file and not on the command line. Using config file value %s.' % (opt, self.get_param(opt))) - else: - self.set_param(opt, vars(arguments)[opt]) - - # Check that the required parameters are set. - required = ['analysis_name', - 'targets_bed_file', - 'sample_bam_file', - 'analysis_dir', - 'reference_data_dir', - 'cutadapt_config_file', - 'reference_fasta', - 'gene_annotation_file'] - if self.fncCmd == 'prepare_reference_data': - required = ['reference_data_dir', 'reference_fasta', 'targets_bed_file'] - - for req in required: - self.get_param(req, True) - - def check_binaries(self): - """Check the required binaries. - There are six required binaries to perform the complete analysis (blat, gfserver, - gfclient, fatotwobit, cutadapt, jellyfish). Each binary is checked whether - the path provided in the configuration file has an executable file attached or - if no path was provided that the binary is on the path. Cutadapt and Jellyfish - are also tested using small set of hardcoded data. - - Args: - None - Returns: - None - Raises: - None - """ - - binaries = ('blat', - 'gfserver', - 'gfclient', - 'fatotwobit', - 'cutadapt', - 'jellyfish') - for binaryName in binaries: - binaryPath = self.get_param(binaryName) - if binaryPath is not None: - binaryCheck = utils.which(binaryPath) # Use the binary path specified in the config file. - else: - binaryCheck = utils.which(binaryName) # Perform a which on the server to see if the binary is in the path. - self.set_param(binaryName, binaryCheck) # Store the result in the opts dictionary. - if not binaryCheck: # No binary found or specified. Throw an error. - print 'Missing path/executable for', binaryName - utils.log(self.loggingName, 'error', 'Missing path/executable for %s' % binaryName) - sys.exit(1) - utils.log(self.loggingName, 'info', '%s path = %s' % (binaryName, binaryCheck)) - utils.log(self.loggingName, 'info', 'All the required binaries have been checked successfully!') - - # Test cutadapt and jellyfish binaries - testDir = os.path.join(self.paths['analysis'], 'bin_test') - testFq = os.path.join(testDir, 'test.fq') - if not os.path.exists(testDir): - os.makedirs(testDir) - - fqFile = open(testFq, 'w') - fqFile.write("@H91H9ADXX140327:1:2102:19465:23489/2\nCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATC\n+\n69EEEFBAFBFABCCFFBEFFFDDEEHHDGH@FEFEFCAGGCDEEEBGEEBCGBCCGDFGCBBECFFEBDCDCEDEEEAABCCAEC@>>BB?@C\n@H91H9ADXX140327:2:2212:12198:89759/2\nTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGA\n+\nA@C>C;?AB@BBACDBCAABBDDCDDCDEFCDDDDEBBFCEABCGDBDEEF>@GBGCEDGEDGCGFECAACFEGDFFGFECB@DFGCBABFAECEB?=") - fqFile.close() - - cleanFq, returnCode = utils.test_cutadapt(testFq, self.get_param('cutadapt'), self.get_param('cutadapt_config_file')) - if cleanFq: - utils.log(self.loggingName, 'info', 'Test cutadapt ran successfully') - jfish_prgm, rc = utils.test_jellyfish(self.get_param('jellyfish'), cleanFq, testDir) - if returnCode != 0: - utils.log(self.loggingName, 'error', '%s unable to run successfully, exit code %s. Check installation and correct version.' % (jfish_prgm, str(returnCode))) - sys.exit(1) - else: - utils.log(self.loggingName, 'info', 'Test jellyfish ran successfully') - else: - utils.log(self.loggingName, 'error', 'Cutadapt failed to run, exit code %s. Check installation and version.' % str(returnCode)) - sys.exit(1) - shutil.rmtree(testDir) # Remove the test directory. - - def set_insertsize_thresh(self): - """Store the insert sizes for a small number of "properly mapped" reads - and determine an upperbound cutoff to use to determine discordantly mapped read - pairs. - - Args: - None - Returns: - None - Raises: - None - """ - - nSampleReads = 100000 - bamF = pysam.Samfile(self.get_param('sample_bam_file'), 'rb') - testReads = bamF.fetch() - insertSizes = [] - readIter = 0 - for read in testReads: - if read.is_duplicate or read.mapq == 0: - continue - proper_map = read.flag == 83 or read.flag == 99 - if read.is_read1 and proper_map: # Sample the read and store the insert size to its partner. - readIter += 1 - insertSizes.append(abs(read.tlen)) - if 'readLen' not in self.opts: # Store the read length if it is not already stored. - self.set_param('readLen', read.rlen) - if readIter == nSampleReads: - break - isMedian = utils.median(insertSizes) - isSD = utils.stddev(utils.remove_outliers(insertSizes)) # Calculate the standard deviation of the sample read pairs insert sizes. - self.set_param('insertsize_thresh', isMedian + (5 * isSD)) # Set the threshold to be median + 5 standard deviations. - - def set_targets(self): - """Parse the targets bed file and store them in a dictionary. Limit to a gene - list if input. - - A list of genes can be passed in by the user to limit the analysis. This will - limit which targets are stored in the dictionary as the target bed file is parsed. - The target bed file is a tab-delimited text file that should have at minimum, - four columns (chromosome, start, end, name) with an optional fourth column - containing a coding feature (i.e., exon or intron). Each row is either a tiled - region with sequencing coverage or it is just a region to analyze by BreaKmer. - The name can be applied to multiple rows, and if multiple tiled regions are input - with the same name they are aggregated together under the same key. - - Store the target information in the self.target dictionary with the name as the key - and a list of tuples of interval genomic locations as the values. - self.target[gene_name] = [(chrom, start_bp, end_bp, name, feature),...] - - Args: - None - Returns: - None - Raises: - None - """ - - # Get the gene list file path if it exists. - geneList = self.get_param('gene_list') - regionList = None - if geneList: - regionList = [] - # Each line contains a gene name. - for line in open(geneList, 'r'): - regionList.append(line.strip().upper()) - - utils.log(self.loggingName, 'info', 'Parsing target list') - # TODO: Check to make sure there aren't duplicate genes. - cur_region = ['', []] - for target in open(self.get_param('targets_bed_file'), 'rU'): - # Each target is formatted like a bed, chr bp1 bp2 name - target = target.strip() - targetsplit = target.split() - chrm, bp1, bp2, name = targetsplit[0:4] - if regionList: - if name.upper() not in regionList: - continue - # Allow a fifth column containing indication of what type of region it is. - # Typically exon/intron designation. This will be deprecated. - feature = None if len(targetsplit) <= 4 else targetsplit[4] - self.targets.setdefault(name.upper(), []) - self.targets[name.upper()].append((chrm, int(bp1), int(bp2), name, feature)) - # print 'Targets', self.targets - utils.log(self.loggingName, 'info', '%d targets' % len(self.targets)) - - def check_blat_server(self): - """Run a test query on the specified blat server to make sure it is running. - - Args: - None - Returns: - serverSuccess (boolean): Indicates whether the test ran without errors. - Raises: - None - """ - - testDir = os.path.join(self.paths['analysis'], 'blatserver_test') - testFaFn = os.path.join(testDir, 'test.fa') - if not os.path.exists(testDir): - os.makedirs(testDir) - testFa = open(testFaFn, 'w') - testFa.write('>test\nCCAAGGGAGACTTCAAGCAGAAAATCTTTAAGGGACCCTTGCATAGCCAGAAGTCCTTTTCAGGCTGATGTACATAAAATATTTAGTAGCCAGGACAGTAGAAGGACTGAAGAGTGAGAGGAGCTCCCAGGGCCTGGAAAGGCCACTTTGTAAGCTCATTCTTG') - testFa.close() - - resultFn = os.path.join(testDir, 'blatserver_test.psl') - cmd = '%s -t=dna -q=dna -out=psl -minScore=20 -nohead %s %d %s %s %s' % (self.get_param('gfclient'), self.get_param('blat_hostname'), self.get_param('blat_port'), self.get_param('reference_fasta_dir'), testFaFn, resultFn) - utils.log(self.loggingName, 'info', 'Blat server test system command %s' % cmd) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - utils.log(self.loggingName, 'info', 'Realignment output file %s' % resultFn) - serverSuccess = True - if errors != '': - serverSuccess = False - utils.log(self.loggingName, 'info', 'Realignment errors %s' % errors) - return serverSuccess - - def start_blat_server(self): - """Fire up a blat server instance using a random port number and localhost. - The required files to start a blat server are first checked and created, if - necessary. These include a genome-wide reference fasta file and a 2bit - file generated from that fasta file. The faToTwoBit program is used if the - 2bit file needs to be generated on the fly. The gfServer is started and - we wait while the server is successfully started. - - Args: - None - Return: - None - """ - - if self.fncCmd == 'prepare_reference_data': # Do not start blat server for this function. - return - elif self.fncCmd == 'start_blat_server': - port = self.get_param('blat_port') - hostname = self.get_param('blat_hostname') - self.set_param('blat_hostname', hostname) - self.set_param('blat_port', port) - # If no port is specified for this function, then randomly select a port between 8000-9500. - if port is None: - self.set_param('blat_port', random.randint(8000, 9500)) - utils.log(self.loggingName, 'info', 'Starting blat server on port %d on host %s.' % (self.get_param('blat_port'), self.get_param('blat_hostname'))) - elif self.fncCmd == 'run': # Start the blat server if it is not already running. - if not self.get_param('start_blat_server'): # Start blat server option is not set. Check that one is running, if not, start it. - port = self.get_param('blat_port') - hostname = self.get_param('blat_hostname') - self.set_param('blat_hostname', hostname) - if port is None: # No port is specified for a server that should be running. It will start a new one on a random numbered port. - utils.log(self.loggingName, 'debug', 'BreaKmer set to run and start_blat_server is set to False, but no blat server port is specified. Setting blat port to random value and starting blat server.') - self.set_param('blat_port', random.randint(8000, 9500)) - else: # Blat server is already running in this instance. Check it to make sure with a test blat. - self.set_param('blat_port', int(self.get_param('blat_port'))) - if self.check_blat_server(): # Both port and hostname are specified. Check that the server is running. - return - else: - utils.log(self.loggingName, 'debug', 'Blat server with port %d and hostname %s did not pass test query. Please check specifications.' % (self.get_param('blat_port'), self.get_param('blat_hostname'))) - - self.set_param('reference_fasta_dir', os.path.split(self.get_param('reference_fasta'))[0]) - refFastaName = os.path.basename(self.get_param('reference_fasta').split(".fa")[0]) - - self.set_param('blat_2bit', os.path.join(self.get_param('reference_fasta_dir'), refFastaName + ".2bit")) - if not os.path.exists(self.get_param('blat_2bit')): # Create 2bit file to use for running the blat server. - utils.log(self.loggingName, 'info', 'Creating 2bit from %s reference fasta' % refFastaName + ".fa") - curdir = os.getcwd() - os.chdir(self.get_param('reference_fasta_dir')) - cmd = '%s %s %s' % (self.get_param('fatotwobit'), refFastaName + ".fa", refFastaName + ".2bit") - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - os.chdir(curdir) - - curdir = os.getcwd() - os.chdir(self.get_param('reference_fasta_dir')) - # Start gfServer, change dir to 2bit file, gfServer start localhost 8000 .2bit - self.set_param('gfserver_log', os.path.join(self.paths['output'], 'gfserver_%d.log' % self.get_param('blat_port'))) - cmd = '%s -canStop -log=%s -stepSize=5 start %s %d %s &' % (self.get_param('gfserver'), self.get_param('gfserver_log'), self.get_param('blat_hostname'), self.get_param('blat_port'), refFastaName + ".2bit") - utils.log(self.loggingName, 'info', "Starting gfServer %s" % cmd) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - startTime = time.time() - while not utils.server_ready(self.get_param('gfserver_log')): # Wait for the blat server to initiate. Timeout if it has not started in 15 minutes. - newTime = time.time() - waitTime = newTime - startTime - if waitTime > 1000: - utils.log(self.loggingName, 'error', 'gfServer wait time exceeded ~15 minutes, exiting') - sys.exit(1) - utils.log(self.loggingName, 'info', 'Waiting for blat gfServer to load reference seq') - time.sleep(60) - utils.log(self.loggingName, 'info', 'Server ready!') - os.chdir(curdir) - - def get_target_names(self): - """Get a list of target names. - - Args: - None - Returns: - A list of the target region names that were defined from the input bed file (list). - """ - - return self.targets.keys() - - def get_target_intervals(self, targetName): - """Return the stored intervals for a specific target. - """ - - if targetName in self.targets: - return self.targets[targetName] - else: - utils.log(self.loggingName, 'debug', '%s target name not in target dictionary.' % targetName) - sys.exit(1) - - def get_kmer_size(self): - """Get the input kmer size. This should be an integer value. - - Args: - None - Returns: - kmer size (int): Kmer size that was input to use. - Raises: - TypeError when the kmer_size is not an integer. - """ - - try: - int(self.get_param('kmer_size')) - except ValueError: - print 'The specified kmer size is not an integer.' - raise - else: - return int(self.get_param('kmer_size')) - - def get_min_segment_length(self, type): - """Get the input segment length limit. This should be an integer value. - - Args: - type (str): The variant type to get the minimum segment length - trl / rearr - Returns: - min_seg_len (int) - Raises: - TypeError when the value is not an integer. - """ - - try: - int(self.get_param(type + '_minseg_len')) - except ValueError: - print 'The specified minsegment limit is not an integer.' - raise - else: - return int(self.get_param(type + '_minseg_len')) - - def get_sr_thresh(self, type): - """Get the threshold input for the number of reads that are required to - support a structural variant event. - - Args: - type (str): The variant type to get the read support threshold. - Returns: - Integer of the split read threshold for specific events. - Raises: - """ - - if type == 'min': - return min(self.get_sr_thresh('trl'), self.get_sr_thresh('rearrangement'), self.get_sr_thresh('indel')) - else: - if type == 'trl': - return int(self.get_param('trl_sr_thresh')) - elif type == 'rearrangement': - return int(self.get_param('rearr_sr_thresh')) - elif type == 'indel': - return int(self.get_param('indel_sr_thresh')) - - def get_param(self, key, required=False): - """Get the parameter value in the self.opts dictionary. - - If the parameer is required to be availale, then exit the program - and throw an error. - Args: - key (str): The key in the opts dictionary to access the parameter value. - required: Boolean value to indicate if the key should be required to - be in the dictionary or not. - Returns: - value (int, str, boolean): The value of the parameter if it is found. If the parameter is - required and not found the program will exit with error. If the parameter is - not required and not found, it will return None. - Raises: - None - """ - - value = None - if key in self.opts: - value = self.opts[key] - elif required: - utils.log(self.loggingName, 'error', 'Missing required parameter %s, exiting.' % key) - sys.exit(1) - return value - - def set_param(self, key, value): - """Set the parameter value in the self.opts dict. - - Args: - key (str): Dictionary key - value (int/str/boolean): Value to store - Returns: - None - Raises: - None - """ - - self.opts[key] = value diff --git a/breakmer/plotting/ b/breakmer/plotting/ deleted file mode 100644 index 7771dfa..0000000 --- a/breakmer/plotting/ +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/breakmer/plotting/__init__.py b/breakmer/plotting/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/breakmer/plotting/sv_viz.py b/breakmer/plotting/sv_viz.py deleted file mode 100644 index c9da6fc..0000000 --- a/breakmer/plotting/sv_viz.py +++ /dev/null @@ -1,1162 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os -import sys -import re -import pysam -from math import log -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -from matplotlib.backends.backend_pdf import PdfPages -from matplotlib import patches -import breakmer.assembly.olc as olcAssembly - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -class TrxBrkpt: - def __init__(self, distToTrx, svBrkpt, brkptIdx, svType): - self.dist = distToTrx - self.svBrkpt = svBrkpt - self.brkptIdx = brkptIdx - self.svType = svType - - def get_genomic_coord(self): - return int(self.svBrkpt.genomicCoords[self.brkptIdx]) - - -class AnnoTrx: - def __init__(self, trx, trxDist, svBreakpoint, brkptIdx, svType): - self.trx = trx - self.svType = svType - self.brkpts = [TrxBrkpt(trxDist, svBreakpoint, brkptIdx, svType)] - - def add_brkpt(self, trxDist, svBreakpoint, brkptIdx, svType): - self.brkpts.append(TrxBrkpt(trxDist, svBreakpoint, brkptIdx, svType)) - - -def check_add_trx(trx, trxItems, trxIds, trxDist, svBreakpoint, brkptIdx, svType): - if trx.id in trxIds: - idx = trxIds.index(trx.id) - # print 'Adding breakpoint', brkptIdx, svBreakpoint, trxDist - trxItems[idx].add_brkpt(trxDist, svBreakpoint, brkptIdx, svType) - else: - # print 'Adding breakpoint', brkptIdx, svBreakpoint, trxDist - trxItems.append(AnnoTrx(trx, trxDist, svBreakpoint, brkptIdx, svType)) - trxIds.append(trx.id) - return trxItems, trxIds - - -class Segment: - def __init__(self, alignResult, segmentColor, segmentIdx, nSegments): - """ """ - self.alignResult = alignResult - self.color = segmentColor - self.queryCoordinates = [alignResult.qstart(), alignResult.qend()] - self.genomicCoordinates = [alignResult.tstart(), alignResult.tend()] - self.chromName = alignResult.get_seq_name('ref') - self.indelCoordinates = alignResult.breakpts.contigBreakpoints - self.indelSizes = alignResult.indel_sizes - self.strand = alignResult.strand - self.alignLen = alignResult.get_query_span() - self.idx = segmentIdx - self.nSegments = nSegments - self.genomicCoords = alignResult.alignVals.get_coords('ref') - - def get_len(self): - """ """ - return self.queryCoordinates[1] - self.queryCoordinates[0] - - def get_segment_trxs(self): - svBreakpoints = self.alignResult.get_sv_brkpts() - # print 'sv_viz.py breakpoints', svBreakpoints - # Determine the number of transcripts for this segment based on the sv breakpoints - trxItems = [] - trxIds = [] - for svBreakpoint in svBreakpoints: - annotatedTrxsDict = svBreakpoint.annotated_trxs - dKeys = annotatedTrxsDict.keys() - dKeys.sort() - # print 'sv_viz.py breakpoint location', svBreakpoint.chrom, svBreakpoint.genomicCoords, dKeys - # If svBreakpoint type is 'indel' then there can be two trxs associated with the svBreakpoint - # If the type is 'rearrangement' then there should only be one - this needs to be inferred by the realignment - # strand and the index of the segment. - brkptTrxs = [] - if svBreakpoint.svType == 'rearrangement': - if len(dKeys) > 1: - addLeft = True - addRight = True - # This indicates that the segment is in the middle - key inner transcripts if the breakpoints are intergenic. - leftBpTrxList, leftBpDistList = annotatedTrxsDict[0] - rightBpTrxList, rightBpDistList = annotatedTrxsDict[1] - keepIdx = 0 - if len(leftBpTrxList) > 1: - # Take the inner trxs - if self.alignResult != '-': - keepIdx = 1 - if leftBpTrxList[keepIdx] is None: - addLeft = False - # Check if the choosen transcript overlaps the segment. - # The start coord of the transcript should be < than the right breakpoint coord. - elif leftBpTrxList[keepIdx].start > svBreakpoint.genomicCoords[1]: - addLeft = False - else: - if leftBpTrxList[keepIdx] is None: - addLeft = False - # Check if the choosen transcript overlaps the segment. - # The end coord of the transcript should be > than the right breakpoint coord. - elif leftBpTrxList[keepIdx].stop < svBreakpoint.genomicCoords[1]: - addLeft = False - if addLeft: - trxItems, trxIds = check_add_trx(leftBpTrxList[keepIdx], trxItems, trxIds, leftBpDistList[keepIdx], svBreakpoint, 0, 'rearr') - # print 'Left brkpt items', trxItems, trxIds - keepIdx = 0 - if len(rightBpTrxList) > 1: - if self.alignResult == '-': - # Take the inner trxs - keepIdx = 1 - if rightBpTrxList[keepIdx] is None: - addRight = False - # Check if the choosen transcript overlaps the segment. - # The end coord of the transcript should be > than the left breakpoint coord. - elif rightBpTrxList[keepIdx].start > svBreakpoint.genomicCoords[0]: - addRight = False - else: - if rightBpTrxList[keepIdx] is None: - addRight = False - # Check if the choosen transcript overlaps the segment. - # The end coord of the transcript should be > than the left breakpoint coord. - elif rightBpTrxList[keepIdx].stop < svBreakpoint.genomicCoords[0]: - addRight = False - # Right - if addRight: - trxItems, trxIds = check_add_trx(rightBpTrxList[keepIdx], trxItems, trxIds, rightBpDistList[keepIdx], svBreakpoint, 1, 'rearr') - # print 'Right brkpt items', trxItems, trxIds - else: - add = True - # Single breakpoint - trxList, distList = annotatedTrxsDict[0] - # print 'Selecting transcripts', trxList, distList, self.idx, self.nSegments, self.alignResult.strand - if len(trxList) > 1: - # Pick which transcript to keep based on strands, breakpoint is outside of a transcript - trx = trxList[0] - trxDist = distList[0] - # print 'Index get_segment_trxs sv_viz.py', self.idx, self.nSegments, trxList, distList - if self.idx == 0: - # First - if self.alignResult.strand == '-': - # Get downstream gene - trx = trxList[1] - trxDist = distList[1] - if trx is None: - add = False - else: - trx = trxList[0] - trxDist = distList[0] - if trx is None: - add = False - elif self.idx == (self.nSegments - 1): - if self.alignResult.strand == '-': - # Get upstream gene - trx = trxList[0] - trxDist = distList[0] - if trx is None: - add = False - else: - trx = trxList[1] - trxDist = distList[1] - if trx is None: - add = False - # print 'trx', trx, trxDist - if add: - trxItems, trxIds = check_add_trx(trx, trxItems, trxIds, trxDist, svBreakpoint, 0, 'rearr') - # print 'trxItems, trxIds', trxItems, trxIds - else: - if trxList[0] is not None: - # lands in a single trancript - trxItems, trxIds = check_add_trx(trxList[0], trxItems, trxIds, distList[0], svBreakpoint, 0, 'rearr') - elif svBreakpoint.svType == 'indel': - # print 'sv_viz.py', annotatedTrxsDict, dKeys - if len(dKeys) == 1: - # Insertion with one genomic breakpoint - trxList, distList = annotatedTrxsDict[0] - trxItems, trxIds = check_add_trx(trxList[0], trxItems, trxIds, distList[0], svBreakpoint, 0, 'ins') - else: - # Deletion with two genomic breakpoints, if intergenic then keep the outer transcripts - # print annotatedTrxsDict - leftBpTrxList, leftBpDistList = annotatedTrxsDict[0] - rightBpTrxList, rightBpDistList = annotatedTrxsDict[1] - # Take the first trx no matter what - trx = leftBpTrxList[0] - trxDist = leftBpDistList[0] - trxItems, trxIds = check_add_trx(trx, trxItems, trxIds, trxDist, svBreakpoint, 0, 'del') - keepIdx = 0 - if len(rightBpTrxList) > 1: - # Take the outer trx - keepIdx = 1 - trx = rightBpTrxList[keepIdx] - trxDist = rightBpDistList[keepIdx] - trxItems, trxIds = check_add_trx(trx, trxItems, trxIds, trxDist, svBreakpoint, 1, 'del') - # print 'Returning items', trxItems, trxIds - return trxItems, trxIds - - -class AlignSegments: - def __init__(self, svEventResult): - """ """ - self.svEventResult = svEventResult - self.segments = [] - self.colors = ['green', 'orange', 'blue', 'orange', 'purple'] - self.orderedSeqs = None - self.readsSampled = False - self.setup() - - def has_annotations(self): - """ """ - return self.svEventResult.check_annotated() - - def setup(self): - """ """ - realignResults = sorted(self.svEventResult.blatResults, key=lambda x: x[0]) - for i, blatResult in enumerate(realignResults): - # print i, blatResult[1].alignVals.query - self.segments.append(Segment(blatResult[1], self.colors[i], i, len(self.svEventResult.blatResults))) - - def get_contig_seq(self): - """ """ - return self.svEventResult.contig.seq - - def get_contig_id(self): - """ """ - return self.svEventResult.contig.get_id() - - def set_orderedseqs(self, orderedSeqValues): - """ """ - self.orderedSeqs = orderedSeqValues[0] - self.readsSampled = orderedSeqValues[1] - - def get_segment_color(self, nucIter): - """ """ - colors = ['grey'] - for segment in self.segments: - # print nucIter - # print segment.queryCoordinates - if (nucIter >= segment.queryCoordinates[0]) and (nucIter < segment.queryCoordinates[1]): - colors.append(segment.color) - # print colors - if len(colors) > 2: - colors = 'black' - elif len(colors) == 1: - colors = colors[0] - else: - colors = colors[1] - return colors - - -def generate_pileup_img(svEventResult, bamReadsFn, outPath, contigId): - """ """ - segmentManager = AlignSegments(svEventResult) - # print 'sv_viz.py svEvent output', svEventResult.get_formatted_output_values() - bamFile = pysam.Samfile(bamReadsFn, "rb") - segmentManager.set_orderedseqs(pile_reads(bamFile.fetch(), svEventResult.contig.seq)) - plot_pileup(segmentManager, os.path.join(outPath, contigId)) - - -def pile_reads(reads, contigSeq): - """ """ - - readCounts = 0 - orderedSeqs = [] - for read in reads: - idx = contigSeq.find(read.seq) - seq = read.seq - add = True - if idx == -1: - aln1 = olcAssembly.nw(contigSeq, read.seq) - aln2 = olcAssembly.nw(read.seq, contigSeq) - idx = aln1[3] - seq = aln1[1] - if aln1[-1] < aln2[-1]: - idx = aln2[5] - seq = aln2[0] - if add: - orderedSeqs.append((idx, ' ' * idx + seq)) - readCounts += 1 - os = sorted(orderedSeqs, key=lambda x: x[0]) - - readsSampled = False - returnReads = [] - readSampleCounter = 0 - if len(os) > 50: - readsSampled = True - sampleIdx = readCounts / 50 - for read in os: - if readSampleCounter == sampleIdx: - readSampleCounter = 0 - returnReads.append(read) - else: - readSampleCounter += 1 - else: - returnReads = os - return (returnReads, readsSampled) - - -def plot_pileup(segmentManager, outName): - """ """ - # Determine coordinate constants - seqPlotSize = (len(segmentManager.orderedSeqs) + 1) * 0.75 - plotHeight = round(seqPlotSize) + 6 # seqPlotSize*1.5 - # if len(segmentManager.orderedSeqs) > 10: - # plotHeight = 20 - - # Setup figure - # print 'Plot height', plotHeight - fig = plt.figure(figsize=(35, plotHeight), frameon=False) - # fig = plt.figure(figsize=(15, 10), frameon=False) - ax = fig.add_subplot(111) - ax.axis('off') - - # Set the y-index for the sequence plotting - yCoord = 0 - # Start plotting at unit 2 on the x-axis - xOffset = 2 - # Increment text by 1 unit - xInc = 1 - # plot_realignment_strands(ax, yCoord + 0.5, xOffset, segmentManager) - plot_contig_seq(ax, yCoord, xOffset, segmentManager) - plot_pileup_seq(ax, yCoord, xOffset, segmentManager) - plot_segments(ax, yCoord + 1, xOffset, segmentManager) - plot_indel_track(ax, yCoord + 1, xOffset, segmentManager) - plot_annotation_track(ax, yCoord + 5, xOffset, segmentManager) - plot_global_trx_track(ax, yCoord + 7, xOffset, segmentManager) -# annoYidx = seqYidx + len(cSeq.segments) + 1 -# # Vertical breakpojnt lines, colors match the segments. -# brkptLines = [] -# contigGenomicCoords = [] -# ycoord = -seqPlotSize-1 -# for i, seg in enumerate(cSeq.segments): -# coords = seg.coords -# gCoords = seg.genomicBrkpts -# yoffset = float(i)/float(5) -# seg.yidx = annoYidx - yoffset -# plt.plot((xoffset+coords[0], xoffset+coords[0]+seg.plotLen), (seg.yidx, seg.yidx), color=seg.color, linewidth=2) -# lr = ['left', 'right'] -# for coord, gCoord, leftRight in zip(coords, gCoords, lr): -# print cSeq.contigBrkpts -# if coord in cSeq.contigBrkpts: -# brkptLines.append((coord, seg.color, seg.yidx)) -# contigGenomicCoords.append((coord, gCoord, leftRight, ycoord)) -# ycoord -= 0.8 - -# brkptIter = 0 -# bLines = sorted(brkptLines, key=lambda x: x[0]) -# print 'Blines', bLines -# bIdxs = [] -# for coord, scolor, syidx in bLines: -# xidx = coord + xoffset + brkptIter -# if coord in bIdxs : -# xidx = coord + xoffset -# bIdxs.append(coord) -# plt.plot((xidx, xidx), (-seqPlotSize-3, syidx), color=scolor, linewidth=2) -# brkptIter += 1 - -# for coord, gCoord, leftRight, coordYidx in contigGenomicCoords: -# xidx = coord + xoffset -# if leftRight == 'right' : -# xidx += 0.5 -# ax.text(xidx, coordYidx, gCoord, ha=leftRight, va='bottom', size=12) - -# pa = plotAnnot(svRes, annot, cSeq) -# pa.get_coding_features() - -# exonXoffset = xoffset -# iter = 0 -# for segAnnot in pa.segAnnots: -# for exCoords in segAnnot.mappedExons: -# ycoord = annoYidx - (float(iter)/float(5)) -# rect = patches.Rectangle((segAnnot.segment.coords[0]+exonXoffset+exCoords[0], ycoord), exCoords[1]-exCoords[0], 1, color=segAnnot.segment.color) -# ax.add_patch(rect) -# ax.text(segAnnot.segment.coords[0]+exonXoffset+exCoords[0], annoYidx+1, exCoords[2], ha='left', va='bottom', size=7, rotation=60) -# if segAnnot.geneName: -# offset = segAnnot.segment.coords[0]+exonXoffset -# midDist = float(abs(segAnnot.segment.coords[1] - segAnnot.segment.coords[0])) / float(2) -# ax.text(offset + midDist, annoYidx+3, segAnnot.geneName +'('+ segAnnot.geneStrand+')', ha='center', va='bottom', size=12, style='italic') -# # exonXoffset += segAnnot.segment.plotLen -# if segAnnot.genomicEnd: -# offsets = [exonXoffset+segAnnot.segment.coords[0], exonXoffset+segAnnot.segment.coords[1]] -# coordTexts = [segAnnot.genomicEnd, segAnnot.bp] -# leftRight = ['left', 'right'] -# if segAnnot.pos == 'last': -# coordTexts.reverse() -# for offset, coordText, lr in zip(offsets, coordTexts, leftRight): -# if lr == 'right': -# offset += 0.5 -# ax.text(offset, annoYidx-(iter*0.8)-0.5, coordText, ha=lr, va='top', size=12) -# iter += 1 - - ySize = (len(segmentManager.orderedSeqs) + 1) * 0.75 + 1.5 + 10 - ax.axis([0, len(segmentManager.get_contig_seq()) + 8, -seqPlotSize - 5, 10]) - plt.savefig(outName + '.pdf', bbox_inches='tight', dpi=300) - plt.savefig(outName + '.png', bbox_inches='tight', dpi=300) - plt.savefig(outName + '.svg', bbox_inches='tight', dpi=96) - plt.close(fig) - - -# def plot_realignment_strands(ax, seqYidx, xOffset, segmentManager): -# """Plot the strand the segments were realigned to the reference - +/-""" -# for segment in segmentManager.segments: -# queryStartCoord, queryEndCoord = segment.queryCoordinates -# midDist = float(abs(queryEndCoord - queryStartCoord)) / float(2) -# xCoord = xOffset + queryStartCoord + midDist -# ax.text(xCoord, seqYidx + 0.5, segment.strand, ha='center', va='bottom', size=14, family='monospace') - - -def add_seq_text(ax, x, y, char, color='black'): - """ """ - ax.text(x, y, char, ha='center', va='center', size=11, family='monospace', color=color) - - -def plot_contig_seq(ax, seqYidx, xOffset, segmentManager): - """ """ - xInc = 1 - # Add 5' designation to contig sequence. - add_seq_text(ax, 1, seqYidx, "5'") - # Iterate over the nucleotides of the contig sequence. - for nucIter, nuc in enumerate(segmentManager.get_contig_seq()): - add_seq_text(ax, xOffset, seqYidx, nuc) - xOffset += xInc - # Insert a pipe character for the breakpoint in the contig seq. - # if nucIter in cSeq.contigBrkpts: - # add_seq_text(ax, xOffset, seqYidx, ' ') - # xOffset += xinc - nucIter += 1 - # Add 3' designation to contig sequence. - add_seq_text(ax, xOffset, seqYidx, "3'") - - -def plot_segments(ax, yCoord, xOffset, segmentManager): - """ """ - - segStarts = [] - for i, segment in enumerate(segmentManager.segments): - segStarts.append((segment.queryCoordinates[0], segment)) - - sortedSegs = sorted(segStarts, key=lambda x: x[0]) - - for i, segmentTuple in enumerate(sortedSegs): - segment = segmentTuple[1] - # Plot rectangles for each realignment result - xCoord = xOffset + segment.queryCoordinates[0] - yCoord = yCoord + ((i + 0.75) * 0.50) - rectLen = segment.queryCoordinates[1] - segment.queryCoordinates[0] - rectHeight = 0.25 - lenText = str(rectLen) + 'bp' - xCoordLabel = xCoord + (float(rectLen) / float(2)) - rect = patches.Rectangle((xCoord, yCoord), rectLen, rectHeight, color=segment.color) - ax.add_patch(rect) - ax.text(xCoordLabel, yCoord - 0.125, lenText + ' (' + segment.strand + ')', ha='center', va='top', size=10) - # Plot genomic coordinates of the segment - gCoordOrder = [xCoord, xCoord + rectLen] - if segment.strand == '-': - gCoordOrder = [xCoord + rectLen, xCoord] - horizAlign = ['left', 'right'] - if segment.strand == '-': - horizAlign.reverse() - segCoordStart = segment.chromName + ':' + str(segment.genomicCoordinates[0]) - segCoordEnd = segment.chromName + ':' + str(segment.genomicCoordinates[1]) - ax.text(gCoordOrder[0], yCoord - 0.125, segCoordStart, ha=horizAlign[0], va='top', size=10) - ax.text(gCoordOrder[1], yCoord - 0.125, segCoordEnd, ha=horizAlign[1], va='top', size=10) - - -def plot_indel_track(ax, yCoord, xOffset, segmentManager): - """ """ - for i, segment in enumerate(segmentManager.segments): - indelCoordinates = segment.indelCoordinates - # print 'Indel coordinates', indelCoordinates - # print 'Indel', segment.indelSizes - yCoord = yCoord + ((i + 0.75) * 0.50) - for j, coord in enumerate(indelCoordinates): - xCoord = xOffset + coord[0] - rectLen = 1 - indelType = 'D' - if len(coord) == 2: - rectLen = coord[1] - coord[0] - indelType = 'I' - rectHeight = 0.25 - rect = patches.Rectangle((xCoord, yCoord), rectLen, rectHeight, color='red') - ax.add_patch(rect) - xCoordLabel = xCoord + (float(rectLen) / float(2)) - ax.text(xCoordLabel, yCoord + 0.6, segment.indelSizes[j], ha='center', va='top', size=10) - - -def plot_pileup_seq(ax, seqYidx, xOffset, segmentManager): - """ """ - yInc = 0.75 - xInc = 1 - # Iterate through sequences. - # print segmentManager.get_contig_seq(), len(segmentManager.get_contig_seq()) - for idx, seq in segmentManager.orderedSeqs: - seqTextOff = xOffset - seqYidx = seqYidx - yInc - segIdx = 0 - brkIdx = 0 - nucIter = 0 - for nuc in seq: - nucColor = segmentManager.get_segment_color(nucIter) - add_seq_text(ax, seqTextOff, seqYidx, nuc, nucColor) - seqTextOff += xInc - nucIter += 1 - if segmentManager.readsSampled: - ax.text(xOffset, seqYidx, "* Sequence reads were subsampled for plotting, only 50 are shown here.", ha='left', va='top', size=12, color='red') - - # print seq, idx, nucIter - - -# def get_exon_code(bp, segPos, segStrand): -# """ """ -# exonCode = 'right' -# if segPos == 'only': -# if bp.svType == 'del': -# if bp.brkptIdx == 0: -# exonCode = 'left' -# elif bp.svType == 'ins': -# exonCode = 'all' -# elif segPos == 'first': -# if bp.svType == 'rearr': -# if segStrand == '+': -# exonCode = 'left' -# elif segPos == 'middle': -# if bp.svType == 'rearr': -# self.bounds.append(bp.get_genomic_coord()) -# if bp.brkptIdx == 1: -# if segStrand == '+': -# exonCode = 'left' -# elif bp.brkptIdx == 0: -# if segStrand == '-': -# exonCode = 'left' -# elif segPos == 'last': -# if bp.svType == 'rearr': -# if segStrand == '-': -# exonCode = 'left' -# return exonCode - - -class AnnotationBrkpt: - def __init__(self, trxBrkpts, segPos, segStrand): - self.segPos = segPos - self.segStrand = segStrand - self.trxBrkpts = trxBrkpts - self.other_brkpts = None - self.bps = [] - self.bounds = [] - self.setup() - - def setup(self): - """ """ - for bp in self.trxBrkpts: - exonCode = 'right' - if self.segPos == 'only': - if bp.svType == 'del': - if bp.brkptIdx == 0: - exonCode = 'left' - elif bp.svType == 'ins': - exonCode = 'all' - elif self.segPos == 'first': - if bp.svType == 'rearr': - if self.segStrand == '+': - exonCode = 'left' - elif self.segPos == 'middle': - if bp.svType == 'rearr': - self.bounds.append(bp.get_genomic_coord()) - if bp.brkptIdx == 1: - if self.segStrand == '+': - exonCode = 'left' - elif bp.brkptIdx == 0: - if self.segStrand == '-': - exonCode = 'left' - elif self.segPos == 'last': - if bp.svType == 'rearr': - if self.segStrand == '-': - exonCode = 'left' - # print 'sv_viz.py setup() adding breakpoint', (bp, bp.get_genomic_coord(), exonCode) - self.bps.append((bp, bp.get_genomic_coord(), exonCode)) - - def add_brkpt(self, trxBrkpts): - """ """ - self.other_brkpts = trxBrkpts - - def select_exons(self, exons): - selectedExons = {} - if len(self.bounds) > 1: - # print 'Bounds', self.bounds - self.bounds.sort() - bpCoordKey = '-'.join([str(x) for x in self.bounds]) - selectedExons[bpCoordKey] = {'coords': []} - selectedExons[bpCoordKey]['coords'].append((self.bounds[0] - 1, self.bounds[0], 'breakpoint', None)) - selectedExons[bpCoordKey]['coords'].append((self.bounds[1] - 1, self.bounds[1], 'breakpoint', None)) - eIter = 1 - maxminCoords = [self.bounds[0], self.bounds[1], self.bounds[0], self.bps[0][2]] - bpOverlap = [False, None] - for exon in exons: - # print 'Check exon', exon.start, exon.stop, exon.featureType - add = False - estart = int(exon.start) - estop = int(exon.stop) - exonCoords = [estart, estop] - if (estart >= self.bounds[0] and estart <= self.bounds[1]): - add = True - if estop > self.bounds[1]: - bpOverlap = [True, self.bounds[1] - 1] - exonCoords[1] = self.bounds[1] - elif (estop >= self.bounds[0] and estop <= self.bounds[1]): - add = True - if estart < self.bounds[0]: - bpOverlap = [True, self.bounds[0] - 1] - exonCoords[0] = self.bounds[0] - if add: - # print 'sv_viz.py keep exon', bp, estart, estop, exonCode, exon.featureType - # absDist = abs(bpCoord - int(exonCoords[0])) - # if len(firstLastExons['nearest_exon']) == 0: - # firstLastExons['nearest_exon'] = [absDist, len(selectedExons), 'exon' + str(eIter)] - # elif absDist < firstLastExons['nearest_exon'][0]: - # firstLastExons['nearest_exon'] = [absDist, len(selectedExons), 'exon' + str(eIter)] - # if len(firstLastExons['furthest_exon']) == 0: - # firstLastExons['furthest_exon'] = [absDist, len(selectedExons), 'exon' + str(eIter)] - # elif absDist > firstLastExons['furthest_exon'][0]: - # firstLastExons['furthest_exon'] = [absDist, len(selectedExons), 'exon' + str(eIter)] - # print 'Adding exon', exonCoords, eIter, bpOverlap[1] - selectedExons[bpCoordKey]['coords'].append([int(exonCoords[0]), int(exonCoords[1]), 'exon' + str(eIter), bpOverlap[1]]) - if maxminCoords[0] > int(exonCoords[0]): - maxminCoords[0] = int(exonCoords[0]) - if maxminCoords[1] < int(exonCoords[1]): - maxminCoords[1] = int(exonCoords[1]) - eIter += 1 - selectedExons[bpCoordKey]['maxmincoords'] = maxminCoords - else: - for bp in self.bps: - maxminCoords = [] - bpObj, bpCoord, exonCode = bp - selectedExons[bpCoord] = {'coords': []} - selectedExons[bpCoord]['coords'].append((bpCoord - 1, bpCoord, 'breakpoint', None)) - if len(maxminCoords) == 0: - maxminCoords = [bpCoord - 1, bpCoord, bpCoord, exonCode] - eIter = 1 - firstLastExons = {'nearest_exon': [], 'furthest_exon': []} - for exon in exons: - bpOverlap = [False, None] - # print 'Check exon', exon.start, exon.stop, exon.featureType, exonCode - add = False - estart = int(exon.start) - estop = int(exon.stop) - exonCoords = [estart, estop] - # print 'Exoncoords', exonCoords, bpCoord - if (exonCode == 'left') and (estart <= bpCoord): - # Get all exons with start < bp - if bpCoord < estop: - # Breakpoint intersects with exon, reduce feature count to 2 - bpOverlap = [True, bpCoord - 1] - exonCoords[1] = bpCoord - add = True - elif (exonCode == 'right') and (estop >= bpCoord): - if bpCoord > estart: - bpOverlap = [True, bpCoord - 1] - exonCoords[0] = bpCoord - add = True - elif exonCode == 'all': - # Single insertion in a gene - if bpCoord >= estart and bpCoord <= estop: - bpOverlap = [True, bpCoord - 1] - add = True - if add: - # print 'sv_viz.py keep exon', bp, estart, estop, exonCode, exon.featureType - # absDist = abs(bpCoord - int(exonCoords[0])) - # if len(firstLastExons['nearest_exon']) == 0: - # firstLastExons['nearest_exon'] = [absDist, len(selectedExons), 'exon' + str(eIter)] - # elif absDist < firstLastExons['nearest_exon'][0]: - # firstLastExons['nearest_exon'] = [absDist, len(selectedExons), 'exon' + str(eIter)] - # if len(firstLastExons['furthest_exon']) == 0: - # firstLastExons['furthest_exon'] = [absDist, len(selectedExons), 'exon' + str(eIter)] - # elif absDist > firstLastExons['furthest_exon'][0]: - # firstLastExons['furthest_exon'] = [absDist, len(selectedExons), 'exon' + str(eIter)] - # print 'Adding exon', exonCoords, eIter, bpOverlap[1] - selectedExons[bpCoord]['coords'].append([int(exonCoords[0]), int(exonCoords[1]), 'exon' + str(eIter), bpOverlap[1]]) - if maxminCoords[0] > int(exonCoords[0]): - maxminCoords[0] = int(exonCoords[0]) - if maxminCoords[1] < int(exonCoords[1]): - maxminCoords[1] = int(exonCoords[1]) - eIter += 1 - # selectedExons[bpCoord]['coords'][firstLastExons['nearest_exon'][1]][2] = firstLastExons['nearest_exon'][2] - # selectedExons[bpCoord]['coords'][firstLastExons['furthest_exon'][1]][2] = firstLastExons['furthest_exon'][2] - selectedExons[bpCoord]['maxmincoords'] = maxminCoords - # print 'Selected exons', selectedExons - return selectedExons - - -def determine_annotation_brkpts(trxBrkpts, segPos, segStrand): - """ """ - abrkpt = None - brkptTypes = {} - for brkpt in trxBrkpts: - if brkpt.svType not in brkptTypes: - brkptTypes[brkpt.svType] = [] - # print 'determine_annotation_brkpts', brkpt, brkpt.svType - brkptTypes[brkpt.svType].append(brkpt) - - if 'rearr' in brkptTypes: - abrkpt = AnnotationBrkpt(brkptTypes['rearr'], segPos, segStrand) - if 'del' in brkptTypes: - abrkpt.add_brkpt(brkptTypes['del']) - if 'ins' in brkptTypes: - abrkpt.add_brkpt(brkptTypes['ins']) - elif 'del' in brkptTypes: - abrkpt = AnnotationBrkpt(brkptTypes['del'], segPos, segStrand) - else: - abrkpt = AnnotationBrkpt(brkptTypes['ins'], segPos, segStrand) - return abrkpt - - -def get_neighbor_exons(exons): - """ """ - leftExonBuffer = [] - rightExonBuffer = [] - bpExonBuffer = {} - currentBp = None - bpOverlaps = [] - # print 'Get neighbor exons', exons - for exon in exons: - # print 'Getting neighboring exons', exon - start, end, name, bpOverlapCoord = exon - if name == 'breakpoint': - bpExonBuffer[start] = {'left': leftExonBuffer, 'right': rightExonBuffer, 'add_to_list': False} - currentBp = start - # print 'Breakpoint saved with leftExonBuffer', leftExonBuffer - leftExonBuffer = [] - # print 'Current bp is', start - elif currentBp is None: - leftExonBuffer.append(exon) - if bpOverlapCoord is not None: - bpOverlaps.append(bpOverlapCoord) - else: - bpExonBuffer[currentBp]['right'].append(exon) - if bpOverlapCoord is not None: - bpOverlaps.append(bpOverlapCoord) - finalList = [] - for item in bpExonBuffer: - left = bpExonBuffer[item]['left'] - right = bpExonBuffer[item]['right'] - - if len(left) > 0: - finalList.extend(left[len(left) - 2: len(left)]) - if item not in bpOverlaps: - finalList.append((item, item + 1, 'breakpoint', None)) - if len(right) > 0: - finalList.extend(right[0:2]) - dupItems = [] - uniqList = [] - for item in finalList: - if item[2] == 'breakpoint' or (item[2] not in dupItems): - uniqList.append(item) - dupItems.append(item[2]) - return uniqList - - -def plot_global_trx_track(ax, yCoord, xOffset, segmentManager): - """ """ - # print 'PLOT GLOBAL TRX TRACK', '*'*20 - if not segmentManager.has_annotations(): - return - - segStarts = [] - for i, segment in enumerate(segmentManager.segments): - segStarts.append((segment.queryCoordinates[0], segment)) - - sortedSegs = sorted(segStarts, key=lambda x: x[0]) - - for i, segmentTuple in enumerate(sortedSegs): - # print 'sv_viz.py plot_annotation_track segment', i - segment = segmentTuple[1] - segmentPos = 'only' - if len(sortedSegs) > 1: - if i == 0: - segmentPos = 'first' - elif i > 0 and i < (len(sortedSegs) - 1): - segmentPos = 'middle' - elif i == (len(sortedSegs) - 1): - segmentPos = 'last' - # print 'segment position', segmentPos, 'segmentStrand', segment.strand - - segTrxs, segTrxIds = segment.get_segment_trxs() - # print 'Segment transcript ids', segTrxIds - segLen = segment.get_len() - segStart, segEnd = segment.queryCoordinates - reverse = False - - if segment.strand == '-': - reverse = True - - trxOffset = segStart + xOffset - segTrxIter = 0 - yCoord = yCoord + ((i + 0.75) * 0.25) - - if len(segTrxs) == 0: - trxLen = segLen - rect = patches.Rectangle((trxOffset, yCoord), trxLen, 0.125, color='grey') - ax.add_patch(rect) - else: - for segTrx in segTrxs: - # print 'Global trx ycoord', yCoord - trxLen = float(segLen) / float(len(segTrxs)) - # print 'TRX len', trxLen - trxOffset += segTrxIter * (trxLen) - # rect = patches.Rectangle((trxOffset, yCoord + 0.15), trxLen, 0.05, color=segment.color) - # ax.add_patch(rect) - # print 'TRX offset', trxOffset - trx = segTrx.trx - # print 'Trx', trx - brkpts = segTrx.brkpts - exons = sorted(trx.exons, key=lambda x: x.start) - # print 'Exons', exons - - parsedExons = [] - for exon in exons: - parsedExons.append((int(exon.start), int(exon.stop), 'exon')) - - bpPlotBins = [] - for brkpt in brkpts: - # print 'SV breakpoints for segTrx', brkpt.dist, brkpt.svBrkpt.chrom, brkpt.svBrkpt.svType, brkpt.svBrkpt.genomicCoords[brkpt.brkptIdx], brkpt.brkptIdx, segment.strand - gCoord = brkpt.get_genomic_coord() - # exonCode = get_exon_code(brkpt, segmentPos, segment.strand) - if gCoord < trx.start or gCoord > trx.stop: - parsedExons.append((int(gCoord) - 1, int(gCoord), 'breakpoint')) - else: - for i, exon in enumerate(exons): - if gCoord >= exon.start and gCoord <= exon.stop: - # within exon - # print 'Gcoord, exon', gCoord, exon.start, exon.stop, i - bpPlotBins.append(('exon', i)) - break - elif gCoord < exon.start: - # print 'Gcoord, exon.start', gCoord, exon.start, i - bpPlotBins.append(('intron', i - 1)) - break - - newExons = sorted(parsedExons, key=lambda x: x[0]) - - binSize = trxLen / (2 * len(newExons) - 1) - offset = trxOffset - ycoord = int(yCoord) - (float(segTrxIter) / float(5)) - # labelStr = trx.geneName + ':' + trx.id + ' (' + trx.strand + ')' - # ax.text(trxOffset + (float(trxLen) / float(2)), yCoord + 2, labelStr, ha='center', va='center', size=12) - trxElements = [] - # print 'New exons', newExons - # print 'Offset', offset, binSize - exonHit = False - for i, exon in enumerate(newExons): - rectLen = binSize - start = offset - color = segment.color - height = 0.35 - exonStr = exon[2] - # print 'start', start - if exon[2] == 'breakpoint': - rectLen = 0.5 - height = 5 - exonStr = '' - if i == (len(newExons) - 1) and segmentPos == 'first': - start += binSize - ax.vlines(x=start, ymin=yCoord - 0.35, ymax=yCoord + 0.35, color='grey', linewidth=1.5, zorder=2) - if int(exon[0]) >= int(trx.start) and int(exon[1]) <= int(trx.stop): - trxElements.append(start) - offset += binSize + rectLen + (binSize - rectLen) - if exon[2] != 'breakpoint': - # print 'Plotting rectangle', start, yCoord, rectLen, height - rect = patches.Rectangle((start, yCoord - 0.125), rectLen, height, color=color) - ax.add_patch(rect) - if exonStr != '': - if not exonHit: - tStart = trx.chr.replace('chr', '') + ':' + str(trx.start) - tStop = trx.chr.replace('chr', '') + ':' + str(trx.stop) - ax.text(start, yCoord - 0.35, tStart, ha='left', va='center', size=8) - ax.text(trxOffset + trxLen, yCoord - 0.35, tStop, ha='right', va='center', size=8) - exonLabel = 'exon1' - if trx.strand == '-': - exonLabel = 'exon' + str(len(exons)) - ax.text(start, yCoord + 0.4, exonLabel, ha='left', va='center', size=8) - exonHit = True - - # exstart = exon[0] - # exend = exon[1] - # if segment.strand == '-': - # exstart = exon[1] - # exend = exon[0] - # exstart = segment.chromName + ':' + str(exstart) - # exend = segment.chromName + ':' + str(exend) - # ax.text(start, yCoord - 0.45, str(exstart), ha='left', va='center', size=8) - # ax.text(start + binSize, yCoord - 0.45, str(exend), ha='right', va='center', size=8) - if int(exon[0]) >= int(trx.start) and int(exon[1]) <= int(trx.stop): - trxElements.append(start) - trxElements.append(start + binSize) - # print 'trxElements', exon, trxElements - segTrxIter += 1 - # This guarantees that intergenic breakpoints don't appear to be in the transcript. - # print 'TRX elements', trxElements, trxOffset, trxLen - trxMin = max(min(trxElements), trxOffset) - trxMax = min(max(trxElements), trxOffset + trxLen) - # print 'TRX max, min', trxMin, trxMax - # print 'Rectangle', trxMin, yCoord, trxMax - trxMin - rect = patches.Rectangle((trxMin, yCoord), trxMax - trxMin, 0.125, color=segment.color) - ax.add_patch(rect) - - for bp in bpPlotBins: - # print 'BP', bp - add = -(float(binSize) / float(2)) - inc = 1 - if bp[0] == 'exon': - add = (float(binSize) / float(2)) - inc = 0 - start = trxOffset + (binSize * 2 * (bp[1] + inc)) + add - # print 'Start coord', start - ax.vlines(x=start, ymin=yCoord - 0.35, ymax=yCoord + 0.35, color='grey', linewidth=1.5, zorder=2) - - -def plot_annotation_track(ax, yCoord, xOffset, segmentManager): - """ - """ - - # Only plot the annotations if available! - if not segmentManager.has_annotations(): - return - - # Sort the segments increasing in query coordinate. - # segStarts = [segment.queryCoordinates[0], segment for segment in segmentManager.segments] - # for i, segment in enumerate(segmentManager.segments): - # segStarts.append((segment.queryCoordinates[0], segment)) - sortedSegs = sorted([(segment.queryCoordinates[0], segment) for segment in segmentManager.segments], key=lambda x: x[0]) - - for i, segmentTuple in enumerate(sortedSegs): - # print 'sv_viz.py plot_annotation_track segment', i - # print 'sv_viz.py plot_annotation_track segment', segmentTuple - segment = segmentTuple[1] - segmentPos = 'only' - if len(sortedSegs) > 1: - if i == 0: - segmentPos = 'first' - elif i > 0 and i < (len(sortedSegs) - 1): - segmentPos = 'middle' - elif i == (len(sortedSegs) - 1): - segmentPos = 'last' - # print 'segment position', segmentPos, 'segmentStrand', segment.strand - - segTrxs, segTrxIds = segment.get_segment_trxs() - # print 'Segment transcript ids', segTrxIds, segTrxs - segLen = segment.get_len() - segStart, segEnd = segment.queryCoordinates - reverse = False - if segment.strand == '-': - reverse = True - - # Set the x-coordinate offset value for the transcript that will be plotted. - trxOffset = segStart + xOffset - if (segmentPos == 'first' or segmentPos == 'only'): - # Increase the offset by 3 units for the '...' for the first and - # last transcript plots. - trxOffset += 3 - - if len(segTrxs) == 0: - trxLen = segLen - rect = patches.Rectangle((trxOffset, yCoord), trxLen, 0.125, color='grey') - ax.add_patch(rect) - else: - # Iterate through the segment transcripts. - for segTrxIter, segTrx in enumerate(segTrxs): - # print 'segTRX svtype', segTrx.svType, segTrx.trx.exons - - # For the first and last segments, use ... at the beginning and end. - if (segmentPos == 'first' or segmentPos == 'only') and segTrxIter == 0: - # Decrease the segment length by 3 units for each '.' - segLen = segLen - 3 - for i in range(3): - rect = patches.Rectangle((trxOffset - 3 + i, yCoord), 0.25, 0.1, color=segment.color) - ax.add_patch(rect) - if segTrxIter == (len(segTrxs) - 1) and (segmentPos == 'last' or segmentPos == 'only'): - # Last segment and trx - segLen = segLen - 3 - for i in range(3): - rect = patches.Rectangle((trxOffset + 0.5 + segLen + i, yCoord), 0.2, 0.1, color=segment.color) - ax.add_patch(rect) - - # Transcript plot length is the length of the segment divided by the number of transcripts - # corresponding to that segment (i.e. 2 transcripts for an intergenic breakpoint.) - trxLen = float(segLen) / float(len(segTrxs)) - # print 'TRX len', trxLen - # Increment the transcript x-coordinate by transcript iterator * length. - trxOffset += segTrxIter * (trxLen) - # print 'TRX offset', trxOffset - trx = segTrx.trx - brkpts = segTrx.brkpts - trx_reverse = False - if trx.strand == '-': - trx_reverse = True - - # Sort the exons increasing in genome coordinate if the transcript is coded on the + strand. - # Sort in decreasing order if the transcript is coded on the - strand. - exons = sorted(trx.exons, key=lambda x: x.start, reverse=trx_reverse) - - # for brkpt in brkpts: - # print 'SV breakpoints for segTrx', brkpt.dist, brkpt.svBrkpt.chrom, brkpt.svBrkpt.svType, brkpt.svBrkpt.genomicCoords[brkpt.brkptIdx], brkpt.brkptIdx, segment.strand - - abrkpt = determine_annotation_brkpts(segTrx.brkpts, segmentPos, segment.strand) - selectedExons = abrkpt.select_exons(exons) - # print selectedExons - - # genomicLen = log(abs(maxminCoords[0] - maxminCoords[1]), 2) - # bpUnits = float(trxLen) / float(genomicLen) - - mergedExons = [] - for item in selectedExons: - mergedExons.extend(selectedExons[item]['coords']) - allExons = sorted(mergedExons, key=lambda x: x[0], reverse=reverse) - - # Only grab the nearest two (or one) exons to the breakpoints - plotExons = sorted(get_neighbor_exons(allExons), key=lambda x: x[0], reverse=reverse) - - ''' - Annotated exons are uniformly plotted across the transcript length. - The introns between exons are given the same amount of space as well. - There must be 3 units for every two exons - this is including breakpoints that do - not land within exons. - - binsize = transcript_length / (2 * number_exons) - 1 - 1 exon = 2 * 1 - 1 = 1 bin (note this scenario is unlikely.) - 2 exons = 2 * 2 - 1 = 3 bins (i.e. 2 exons and 1 intron) - 3 exons = 2 * 3 - 1 = 5 bins - - Example for the first segment where the breakpoint lands in a transcript intron - exon1 exon2 breakpoint - |||||||| |||||||| | - ---------------------------------------- - ''' - binSize = trxLen / (2 * len(plotExons) - 1) - offset = trxOffset - ycoord = int(yCoord) - (float(segTrxIter) / float(5)) - labelStr = trx.geneName + ':' + trx.id + ' (' + trx.strand + ')' - ax.text(trxOffset + (float(trxLen) / float(2)), yCoord + 1.25, labelStr, ha='center', va='center', size=12) - trxElements = [] - - # print 'Plot exons', plotExons - # print 'Offset$$$', offset, binSize, trxLen, (2 * len(plotExons) - 1) - for i, exon in enumerate(plotExons): - # exon list contains: exon.start, exon.stop, exon.number or breakpoint, breakpoint genomic coordinate - rectLen = binSize - start = offset - color = segment.color - height = 0.5 - exonStr = exon[2] - # print 'start$$$', start - if exon[2] != 'breakpoint': - rect = patches.Rectangle((start, yCoord - 0.1875), rectLen, height, color=segment.color) - ax.add_patch(rect) - ax.text(start + (float(binSize) / float(2)), yCoord + 0.45, exonStr, ha='center', va='center', size=8) - # if exonStr != '': - exstart = exon[0] - exend = exon[1] - if segment.strand == '-': - exstart = exon[1] - exend = exon[0] - exstart = segment.chromName + ':' + str(exstart) - exend = segment.chromName + ':' + str(exend) - ax.text(start, yCoord - 0.45, str(exstart), ha='left', va='center', size=8) - ax.text(start + binSize, yCoord - 0.45, str(exend), ha='right', va='center', size=8) - if int(exon[0]) >= int(trx.start) and int(exon[1]) <= int(trx.stop): - trxElements.append(start) - trxElements.append(start + binSize) - # print 'trxElements', exon, trxElements - - if exon[2] == 'breakpoint' or exon[3] is not None: - # The exon lists contain the genomic coordinate for the breakpoint if it - # overlaps an exon element. This checks for that instance and plots the - # '|' for the breakpoint at the end of the exon. - exonStr = '' - if i == (len(plotExons) - 1): - # If the breakpoint is the last 'exon' to be plotted - # in the segment, then adjust the start by a binsize. - # print 'Exon', exon, 'adjusting start', start - start += binSize - # print 'New start', start - minCoord = 0.2 - if segTrx.svType != 'rearrangement': - minCoord = yCoord - 0.5 - ax.vlines(x=start, ymin=minCoord, ymax=yCoord + 0.5, color='grey', linewidth=1.5, zorder=2) - - if int(exon[0]) >= int(trx.start) and int(exon[1]) <= int(trx.stop): - trxElements.append(start) - - offset += binSize + rectLen + (binSize - rectLen) - # print 'Rect plot coords', start, yCoord, start + rectLen, binSize - # if exon[3] is not None: - # # The exon lists contain the genomic coordinate for the breakpoint if it - # # overlaps an exon element. This checks for that instance and plots the - # # '|' for the breakpoint at the end of the exon. - # if i == (len(plotExons) - 1): - # start += binSize - # minCoord = 0.2 - # if segTrx.svType != 'rearrangement': - # minCoord = yCoord - 0.5 - # # print minCoord - # ax.vlines(x=start, ymin=minCoord, ymax=yCoord + 0.5, color='grey', linewidth=1.5, zorder=2) - # This guarantees that intergenic breakpoints don't appear to be in the transcript. - # print 'trxMin Max', trxElements - - if len(trxElements) == 0: - rect = patches.Rectangle((trxOffset, yCoord), trxLen, 0.125, color='grey') - ax.add_patch(rect) - continue - trxMin = max(min(trxElements), trxOffset) - trxMax = min(max(trxElements), trxOffset + trxLen) - rect = patches.Rectangle((trxMin, yCoord), trxMax - trxMin, 0.125, color=segment.color) - ax.add_patch(rect) - # rect = patches.Rectangle((start, yCoord), 0.1, 5, color='black') - # ax.add_patch(rect) - - - # for exon in selectedExons: - # genomicStart = maxminCoords[2] - # if maxminCoords[3] == 'all': - # genomicStart = int(trx.start) - # # if reverse: - # # genomicStart = maxminCoords[1] - # # ll = [log(int(exon[0]), 2), log(int(exon[1]), 2)] - # e1 = log(max(abs(int(genomicStart) - int(exon[0])), 1), 2) * bpUnits - # e2 = log(max(abs(int(genomicStart) - int(exon[1])), 1), 2) * bpUnits - # if segmentPos == 'first': - # e1 = trxLen - e1 - # e2 = trxLen - e2 - # print 'genomic start', genomicStart, bpUnits - # print 'Exon', exon[0], exon[1], trxOffset + e1, exon[0] - exon[1] - # eCoords = [e1, e2] - # print 'Mapped coords', e1, e2 - # eCoords.sort() - # if reverse: - # print 'Reversing exon coords' - # tmp = e2 - # e2 = e1 - # e1 = tmp - # print 'Mapped exon coords', e1, e2 - # ycoord = int(yCoord) - (float(segTrxIter) / float(5)) - # color = segment.color - # rectLen = e2 - e1 - # if exon[2] == 'breakpoint': - # color = 'black' - # # rectLen = 0.25 - - # rect = patches.Rectangle((trxOffset + e1, ycoord), rectLen, 1, color=color) - # ax.add_patch(rect) - # if exon[2] != '' and exon[2] != 'breakpoint': - # ax.text(trxOffset + e1, ycoord, exon[2], ha='center', va='top', size=8) - # ax.text(trxOffset, yCoord + 1, trx.strand, ha='center', va='top', size=10) - segTrxIter += 1 diff --git a/breakmer/processor/__init__.py b/breakmer/processor/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/breakmer/processor/analysis.py b/breakmer/processor/analysis.py deleted file mode 100644 index 21bf82a..0000000 --- a/breakmer/processor/analysis.py +++ /dev/null @@ -1,253 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os -import sys -import logging -import time -import math -import multiprocessing -import breakmer.processor.target as target -import breakmer.utils as utils - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -def check_status(results): - """Check the status of the multiple processors running analysis - - Args: - results (list): A list of results from the multiprocessing analysis. - Returns: - notReady (int): An integer value indicating the number of processors not complete. - """ - - notReady = 0 - for r in results: - if not r.ready(): - notReady += 1 - return (notReady) - - -def wait(results): - """Check if the nprocs are complete. - - Args: - results (list): A list of results from the multiprocessing analysis. - Returns: - None - Raises: - None - """ - - njobs = check_status(results) - while njobs > 0: - time.sleep(10) - jobs = check_status(results) - if jobs < njobs: - njobs = jobs - - -def analyze_targets(targetList): - """Analyze a list of targets. - - A list of TargetManager objects are passed in to be analyzed independently. - Each target ref data is set, if necessary, then the reads are extracted, - contigs built, and calls made. - - This function performs all the top level functions on the target regions being analyzed. - - Args: - targetList (list): A list of TargetManager objects, representing target regions. - Returns: - aggregateResults (dict): A dictoinary containing lists of formatted output strings for the - contig-based calls and the discordant-only read clusters. - Raises: - None - """ - - aggregateResults = {'contigs': [], 'discreads': []} # Formatted output strings for contig based calls and discordant read calls are different. - for targetRegion in targetList: - # print 'Analyzing', targetRegion.name - utils.log('breakmer.processor.analysis', 'info', 'Analyzing %s' % targetRegion.name) - targetRegion.set_ref_data() - if targetRegion.fnc == 'prepare_reference_data': # Stop here if only preparing ref data. - continue - if not targetRegion.find_sv_reads(): # No SV reads extracted. Exiting. - continue - targetRegion.compare_kmers() # Perform kmer subtraction. - targetRegion.resolve_sv() # Assemble extracted reads and make calls. - if targetRegion.has_results(): - outputs = targetRegion.get_formatted_output() - for key in outputs: - aggregateResults[key].extend(outputs[key]) - targetRegion.complete_analysis() # Write results out to file. - return aggregateResults - - -class RunTracker: - """Class to manage the running of all the target region analyses. - The params object is passed in with all the input information. - The run() function creates the target region objects from the - param inputs and then starts the analysis for each target. - - Args: - params: ParamManager object. - Returns: - None - """ - - def __init__(self, params): - self.params = params - # self.results = [] - self.loggingName = 'breakmer.processor.analysis' - - def run(self): - """Create and analyze the target regions. - The target objects are made and grouped for multiprocessing (if set) - and these are all analyzed independently. This is where the analysis - starts and ends. - - Args: - None - Returns - None - """ - - startTime = time.clock() # Track the run time. - - self.params.start_blat_server() - if self.params.fncCmd == 'start_blat_server': - print 'Server started!' - return - - targetAnalysisList = self.create_targets() - - aggResults = {'contigs': [], 'discreads': []} # Buffer the formatted output strings for each target to write out in batch. - nprocs = int(self.params.get_param('nprocs')) - if nprocs > 1: # Make use of multiprocessing by mapping targets to n jobs. - utils.log(self.loggingName, 'info', 'Creating all reference data.') - p = multiprocessing.Pool(nprocs) - multiprocResults = [] - for targetList in targetAnalysisList: - multiprocResults.append(p.apply_async(analyze_targets, (targetList, ))) - wait(multiprocResults) - for multiprocResult in multiprocResults: - a = multiprocResult.get() - aggResults['contigs'].extend(a['contigs']) - aggResults['discreads'].extend(a['discreads']) - else: - aggResults = analyze_targets(targetAnalysisList) - - if self.params.fncCmd == 'prepare_reference_data': - print 'Reference data setup!' - return - - self.write_aggregated_output(aggResults) - utils.log(self.loggingName, 'info', 'Analysis complete in %s' % str(time.clock() - startTime)) - - if not self.params.get_param('keep_blat_server'): # Keep blat server is specified. - cmd = '%s stop %s %d' % (self.params.get_param('gfserver'), self.params.get_param('blat_hostname'), int(self.params.get_param('blat_port'))) - os.system(cmd) - print 'Analysis complete!' - - def create_targets(self): - """Create target objects and group them by the number of - multiprocs that are specified (i.e. n=1 for 1 processor.) - - If multiprocs are used then split the list into n batches: - [1,2,3,4,5,....,20] = [[targetGroup1], [targetGroup2],...] - - N target groups (+1 if there is a remainder). - - Store all the TargetManager instances in self.targets dictionary. - self.targets[] = TargetManager instance. - - Args: - None - Returns: - trgtGroups (list): A list of lists containing target objects. Each toplevel list is - analyzed by a processor. - """ - - nprocs = int(self.params.get_param('nprocs')) - multiprocs = nprocs > 1 - ngroups = nprocs - ntargets = len(self.params.targets) - ntargetsPerGroup = ntargets / nprocs - modval = math.fmod(ntargets, nprocs) - if modval > 0: - ngroups += 1 - trgtGroups = [] - trgtGroup = [] - - # Iterate through the target name list, sorted alphabetically. - targetNames = self.params.get_target_names() - targetNames.sort() - for targetName in targetNames: - targetManager = target.TargetManager(targetName, self.params) - if multiprocs: - if len(trgtGroup) == ntargetsPerGroup: - trgtGroups.append(trgtGroup) - trgtGroup = [] - trgtGroup.append(targetManager) - else: - trgtGroups.append(targetManager) - - # For the last batch, check if there are less elements than each group has - # if so, then extend the last group to add them, otherwise create a new batch. - if multiprocs: - if len(trgtGroup) < ntargetsPerGroup: - trgtGroups[-1].extend(trgtGroup) - else: - trgtGroups.append(trgtGroup) - return trgtGroups - - def write_aggregated_output(self, aggregateResults): - """Write the SV calls to a top level file in the specified output directory. - Header is written at the top of the file if option to remove is not - specified. - - The output files are: - /_svs.out - /_discreads.out - - Args: - aggregateResults (dict): A dictionary containing the formatted output string values. - Returns: - None - """ - - # Write assembled contig-based SV calls. - if len(aggregateResults['contigs']) > 0: - allResultFn = os.path.join(self.params.paths['output'], self.params.get_param('analysis_name') + "_svs.all.out") - filteredResultFn = os.path.join(self.params.paths['output'], self.params.get_param('analysis_name') + "_svs.out") - utils.log(self.loggingName, 'info', 'Writing %s aggregated results files: all result - %s and filtered results - %s' % (self.params.get_param('analysis_name'), allResultFn, filteredResultFn)) - allResultFile = open(allResultFn, 'w') - filteredResultFile = open(filteredResultFn, 'w') - for i, formattedResultStr in enumerate(aggregateResults['contigs']): - headerStr, formattedResultValuesStr = formattedResultStr - if not self.params.get_param('no_output_header') and i == 0: - allResultFile.write(headerStr + '\n') - filteredResultFile.write(headerStr + '\n') - allResultFile.write(formattedResultValuesStr + '\n') - resultValues = formattedResultValuesStr.split('\t') - if resultValues[-3] != "True": - filteredResultFile.write(formattedResultValuesStr + '\n') - allResultFile.close() - filteredResultFile.close() - - # Write discordant read pair clusters. - if len(aggregateResults['discreads']) > 0: - resultFn = os.path.join(self.params.paths['output'], self.params.get_param('analysis_name') + "_discreads.out") - utils.log(self.loggingName, 'info', 'Writing %s aggregated results file %s' % (self.params.get_param('analysis_name'), resultFn)) - resultFile = open(resultFn, 'w') - for i, formattedResultStr in enumerate(aggregateResults['discreads']): - headerStr, formattedResultValuesStr = formattedResultStr - if not self.params.get_param('no_output_header') and i == 0: - resultFile.write(headerStr + '\n') - resultFile.write(formattedResultValuesStr + '\n') - resultFile.close() diff --git a/breakmer/processor/bam_handler.py b/breakmer/processor/bam_handler.py deleted file mode 100644 index 704b068..0000000 --- a/breakmer/processor/bam_handler.py +++ /dev/null @@ -1,773 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -"""bam_handler.py module - -This module contains the classes and functions to handle the -""" - -import pysam - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -def trim_qual(read, min_qual, min_len): - qual_str = read.qual - q = [] - coords = [0, len(qual_str)] - start = seq_trim(qual_str, min_qual) - if start == len(qual_str): - return None - else: - end = len(qual_str) - seq_trim(qual_str[::-1], min_qual) - lngth = end - start - if lngth < min_len: - return None - nseq = read.seq[start:end] - nqual = qual_str[start:end] - read.seq = nseq - read.qual = nqual - return read - - -def fq_line(read, indel_only, min_len, trim=True): - add_val = '0' - if indel_only: - add_val = '1' - lineout = None - if trim: - read = trim_qual(read, 5, min_len) - if read: - lineout = "@" + get_seq_readname(read) + "_" + add_val + "\n" + read.seq + "\n+\n" + read.qual + "\n" - return lineout - - -def get_seq_readname(read): - """ """ - end = '1' - if read.is_read2: - end = '2' - return read.qname + "/" + end - - -def check_pair_overlap(mate_seq, read, coords, trim_dir): - """ """ - nmisses = 0 - add_clip = True - clip_seq = read.seq[coords[0]:coords[1]] - clip_len = coords[1] - coords[0] - - if abs(read.isize) < len(read.seq): - if abs(len(read.seq) - (abs(read.isize) + 1)) >= clip_len: - add_clip = False - else: - while check_overlap(trim_dir, mate_seq, clip_seq) and nmisses < 5 and len(clip_seq) > 0: - if trim_dir == 'back': - clip_seq = clip_seq[0:(len(clip_seq) - 1)] - else: - clip_seq = clip_seq[1:len(clip_seq)] - nmisses += 1 - if len(clip_seq) == 0 or nmisses == 5: - add_clip = True - else: - add_clip = False - return add_clip - - -def check_overlap(dir, mate_seq, clip_seq): - """ """ - if dir == 'back': - return mate_seq.find(clip_seq) != (len(mate_seq) - len(clip_seq)) - else: - return mate_seq.find(clip_seq) != 0 - - -def get_clip_coords(read): - """This will parse a cigar string for a read and determine the coordinates - of the read that are not softclipped by the aligner. - - Read cigar is a list of tuples [(4,5),(0,80),(4,15)] 5 bp clipped in the start, 80 bp matching, 15 bp clipped at the end - Start: coords = [0,0] - Iter 1: coords = [5,5] - Iter 2: coords = [5,85] - Iter 3: coords = [5,85] - - Args: - read: pysam read object. - Return: - clip_coords: List with two integer values indicating the coordinates of - the sequence read that are not clipped. - """ - - clip_coords = [0, len(read.qual)] - # First value is start index, second value is end index. - coords = [0, 0] - for i in range(len(read.cigar)): - code, clen = read.cigar[i] - # Inc coords if not deletion or softclip - if not code == 2 and not code == 4: - coords[1] += clen - # First value is softclip, increment both by clip amount. - if code == 4: - if i == 0: - coords[0] = clen - coords[1] += clen - clip_coords = coords - return clip_coords - - -def seq_trim(qualStr, minQual): - """Find the first position in a list of quality values that is above the minimum - quality value input. - Iterate over the list of quality values, starting at the first position, and - return the position where the quality if greater than minQual. - Args: - qualStr: List of quality values from pysam read object (i.e., read.qual). - These are Phred-based and assumed to be offset by 33. - minQual: Integer value of the minimum acceptable quality - Return: - counter: Integer representing the position in the list. - """ - - counter = 0 - while (ord(qualStr[counter]) - 33) < minQual: - counter += 1 - if counter == len(qualStr): - break - return counter - - -def trim_coords(qualStr, minQual): - """Searches quality values of a sequence read start->end and end->start to - determine if there is a string of low quality sequences. - - Scan along the qualStr and continue while the quality is < minQual and - return the index of the last low quality score in the string. - - qualStr = [1-1-1-2-2-2-2-20-20-20-30-30-30] - seq_trim(qualStr, 3) will return 6 for the start and len(qualStr) for the end. - - Args: - qualStr (list): List of quality values from pysam read object (i.e., read.qual). - These are Phred-based and assumed to be offset by 33. - minQual (int): Value of the minimum acceptable Phred quality score. - Return: - three element tuple: - 1. Position start where sequence quality is good (> minQual) - 2. Position end where sequence quality is good (> minQual) - 3. Length of the sequence that has good quality. - """ - - # Scan from the start of the qualStr and stop when the base qual > minQual - start = seq_trim(qualStr, minQual) - if start == len(qualStr): - return (0, 0, 0) - else: - # Reverse qualStr and scan from the start and stop when the base qual > minQual - end = len(qualStr) - seq_trim(qualStr[::-1], minQual) - trimLength = end - start - return (start, end, trimLength) - - -def pe_meta(read): - """Checks if the read is from a proper paired-end mapping, assuming an Illumina - library. - - If the read is mapped in a proper pair, check if it overlaps with its paired read. - - Args: - read: pysam read object - Return: - proper_map: Boolean to indicate that the read-pair is properly mapped - overlap_read: Boolean to indicate that the read-pair overlap (i.e., - insert size < 2*read_len - """ - - properMap = False - overlapReads = False - if (((read.flag == 83 or read.flag == 147) and read.tlen < 0) or ((read.flag == 99 or read.flag == 163) and read.tlen > 0)): - properMap = True - if abs(read.tlen) < (2 * len(read.seq)): - overlapReads = True - return properMap, overlapReads - - -def get_region_reads(bamFile, chrom, start, end): - """Open BAM file using pysam and fetch aligned reads in the - specified region. - - Args: - bamFile (str): Bam file full path, index must be in the same location - chrom (str): Chromosome name for region - start (int): Region's start position. - end (int): Region's end position. - Return: - reads (list): List containing pysam read objects - bamF (pysam bam object): Open pysam bam file object. - """ - - bamF = pysam.Samfile(bamFile, 'rb') - reads = bamF.fetch(chrom, start, end) - return (reads, bamF) - - -def get_variant_reads(bamFile, chrom, start, end, insertSizeThresh): - """Get the softclipped, discordant read pairs, and unmapped reads. - These reads are stored in the VarReadTracker object. - - Iterate through all the reads in a region. Skip the duplicates and - qc failed reads. Store all the unmapped reads. All other reads pass - to the check_read function. - - Args: - bamFile (str): Path to the bam file to open, must be indexed! - chrom (str): Chromosome of the region to extract - start (int): Region start location to extract. - end (int): Region end location to extract. - Return: - varReadTracker (VariantReadTracker): VarReadTracker object - """ - - reads, bamF = get_region_reads(bamFile, chrom, start, end) - varReadTracker = VariantReadTracker(bamF, insertSizeThresh) - for read in reads: - skip = False - if read.mate_is_unmapped or read.rnext == -1: - read.mate_is_unmapped = True - if read.is_duplicate or read.is_qcfail: - skip = True - if read.is_unmapped: - varReadTracker.add_unmapped_read(read) - skip = True - if skip: - continue - varReadTracker.check_read(read) - return varReadTracker - - -def get_strand_str(isReverseBoolean): - strand = '+' - if isReverseBoolean: - strand = '-' - return strand - - -def get_strand_key(read, ordered=False): - strands = [] - readStrand = '+' - if read.is_reverse: - readStrand = '-' - mateStrand = '+' - if read.mate_is_reverse: - mateStrand = '-' - strands = [readStrand, mateStrand] - - if ordered: - strands.reverse() - return ':'.join(strands) - - -def cluster_regions(dReadLst, idx, clusterType): - distBuffer = None - clusterLst = [] - for dRead in dReadLst: - if distBuffer is None: - distBuffer = dRead.readLen - # trgtStart = dRead.pos[0] - # mateStart = dRead.pos[1] - # print 'cluster_regions dRead', dRead.pos, dRead.readLen, clusterType - if len(clusterLst) == 0: - clusterLst.append([dRead.pos[idx], dRead.pos[idx] + dRead.readLen, [dRead.readInfoStr]]) - # print 'Initial cluster list', clusterLst - else: - # Check overlap - add = False - for i, c in enumerate(clusterLst): - # print 'Checking read pos against cluster region', c, dRead.pos - startWithin = dRead.pos[idx] >= c[0] and dRead.pos[idx] <= c[1] - withinBuffer = dRead.pos[idx] > c[1] and dRead.pos[idx] - c[1] <= distBuffer - # print 'in check', startWithin, withinBuffer - if startWithin or withinBuffer: - readInfoLst = clusterLst[i][2] - readInfoLst.append(dRead.readInfoStr) - # print 'Add read to cluster region', clusterLst[i] - clusterLst[i] = [c[0], dRead.pos[idx] + dRead.readLen, readInfoLst] - add = True - if not add: - # print 'No add, creating new cluster region' - clusterLst.append([dRead.pos[idx], dRead.pos[idx] + dRead.readLen, [dRead.readInfoStr]]) - return clusterLst - - -def get_cluster_membership(item, clusters, idx): - for i, cluster in enumerate(clusters): - # print cluster - # print item.pos - if item.pos[idx] >= cluster[0] and item.pos[idx] <= cluster[1]: - return i - - -class discReadPair: - def __init__(self, read, orderType): - self.pos = [] - self.strands = [] - self.readName = read.qname - self.readLen = read.rlen - self.readInfoStr = '' - # self.read = read - self.set_values(read, orderType) - - def set_values(self, read, orderType): - # print 'bam_handler.py set_values() for discReadPair', read.pos, read.mpos - self.pos = [read.pos, read.mpos] - self.strands = [get_strand_str(read.is_reverse), get_strand_str(read.mate_is_reverse)] - if (orderType == 'ordered') and (read.mpos < read.pos): - # Store the read and mate ordered by chrom alignment position - self.pos.reverse() - self.strands.reverse() - self.readInfoStr = '|'.join([str(x) for x in [read.qname, self.strands[0], self.strands[1], read.tlen, read.mpos]]) - # print 'bam_hanlder.py set_values() readInfoStr', self.readInfoStr - - -class discReads: - """ - """ - def __init__(self, insertSizeThresh): - self.reads = {'inter': {}, 'intra': {}} - self.insertSizeThresh = insertSizeThresh - self.checkedIds = {} - self.clusters = {} - self.disc = {} - - def add_inter_discread(self, bam, read): - # print 'bam_handler.py add_inter_discread()', read - dRead = discReadPair(read, 'unordered') - mateRefId = bam.getrname(read.rnext) - if mateRefId not in self.reads['inter']: - self.reads['inter'][mateRefId] = {} - strandKey = get_strand_key(read) - if strandKey not in self.reads['inter'][mateRefId]: - self.reads['inter'][mateRefId][strandKey] = [] - self.reads['inter'][mateRefId][strandKey].append(dRead) - # print 'bam_handler.py add_inter_discread() self.reads inter', mateRefId, strandKey, '\n' - # for dRead in self.reads['inter'][mateRefId][strandKey]: - # print '\t', dRead.readInfoStr - - if mateRefId not in self.disc: - self.disc[mateRefId] = [] - self.disc[mateRefId].append((read.pos, read.mpos)) - # print 'bam_handler.py add_inter_discread() disc dictionary', mateRefId, self.disc[mateRefId] - - def add_intra_discread(self, read, overlapping_reads): - discType = 'other' - dRead = discReadPair(read, True) - disc_ins_size = abs(read.tlen) >= self.insertSizeThresh - strandKey = '' - if (read.is_reverse and read.mate_is_reverse) or (not read.is_reverse and not read.mate_is_reverse): - discType = 'inv' - strandKey = get_strand_key(read) - elif (read.is_reverse and not read.mate_is_reverse and read.pos < read.mpos) or (not read.is_reverse and read.mate_is_reverse and read.pos > read.mpos): - discType = 'td' - strandKey = '-:+' - elif disc_ins_size: - discType = 'dist' - strandKey = get_strand_key(read, True) - elif (read.is_reverse and not read.mate_is_reverse and read.pos < read.mpos) or (not read.is_reverse and read.mate_is_reverse and read.mpos < read.pos): - discType = 'other' - strandKey = get_strand_key(read, True) - else: - dRead = None - - if dRead is None: - return - - if discType not in self.reads['intra']: - self.reads['intra'][discType] = {} - if strandKey not in self.reads['intra'][discType]: - self.reads['intra'][discType][strandKey] = [] - self.reads['intra'][discType][strandKey].append(dRead) - - if read.tid not in self.disc: - self.disc[read.tid] = [] - self.disc[read.tid].append((read.pos, read.mpos)) - - def add_read_pair(self, bam, read, overlapping_reads): - """ - Args: - read: - Return: - None - """ - if read.qname not in self.checkedIds: - self.checkedIds[read.qname] = read.qname - else: - return - - if read.mapq == 0 or read.mate_is_unmapped: - return - - # Extract read-pairs that are mapped to different chromosomes or fair apart. - diff_chroms = read.rnext != -1 and read.tid != read.rnext - if read.tid == read.rnext and not overlapping_reads: - self.add_intra_discread(read, overlapping_reads) - elif diff_chroms: - # print 'bam_handler.py add_read_pair(), diff_chroms', diff_chroms, read.rnext, read.tid, read.rnext - self.add_inter_discread(bam, read) - - def cluster_discreads(self): - """self.reads is a dictionary with 3 levels - 1. Inter / intra - 2. Chrom (inter) / inv, td, dist, other (intra) - 3. -:+, -:-, +:+, +:- - 4. List of discRead objects - """ - # print 'cluster_discreads()', '*'*25 - for key1 in self.reads: - # print 'key1', key1 - d1 = self.reads[key1] - for key2 in d1: - # print 'key2', key2 - d2 = d1[key2] - interClusterClusters = {} - for key3 in d2: - # print 'key3', key3 - dReadsLst = d2[key3] - # print 'read list', dReadsLst - srt1 = sorted(dReadsLst, key=lambda x: x.pos[0]) - srt2 = sorted(dReadsLst, key=lambda x: x.pos[1]) - c1 = cluster_regions(srt1, 0, 'target') - c2 = cluster_regions(srt2, 1, 'mate') - for item in dReadsLst: - # print 'Disc read pair obj', item.readInfoStr - cIdx1 = get_cluster_membership(item, c1, 0) - cIdx2 = get_cluster_membership(item, c2, 1) - regionPairKey = '|'.join([key1, key2, key3, str(cIdx1), str(cIdx2)]) - # print 'regionPairKey', regionPairKey - leftBrkpt = c1[cIdx1][0] - rightBrkpt = c2[cIdx2][0] - leftStrand, rightStrand = key3.split(':') - if leftStrand == '+': - leftBrkpt = c1[cIdx1][1] - if rightStrand == '+': - rightBrkpt = c2[cIdx2][1] - if regionPairKey not in self.clusters: - self.clusters[regionPairKey] = {'readCount': 0, # Read count for sub cluster, based on strand - 'interClusterCount': 0, # Read count for cluster ignoring strands, this will be used for interchrom clustering. - 'leftBounds': c1[cIdx1][0:2], - 'rightBounds': c2[cIdx2][0:2], - 'leftBrkpt': leftBrkpt, - 'rightBrkpt': rightBrkpt, - 'clusterId': len(self.clusters) + 1} - if key1 == 'inter': - # print 'Inter check clustering', interClusterClusters - matchFound = False - for clusterKey in interClusterClusters: - # print 'Checking clustering of inter clusters', clusterKey, self.clusters[clusterKey]['leftBrkpt'], regionPairKey, leftBrkpt - # print 'Checking clustering of inter clusters', clusterKey, self.clusters[clusterKey]['rightBrkpt'], regionPairKey, rightBrkpt - if (abs(self.clusters[clusterKey]['leftBrkpt'] - leftBrkpt) < 1000) and (abs(self.clusters[clusterKey]['rightBrkpt'] - rightBrkpt) < 1000): - # Merge the clusters - interClusterClusters[clusterKey].append(regionPairKey) - matchFound = True - break - if not matchFound: - # print 'No match', regionPairKey - interClusterClusters[regionPairKey] = [regionPairKey] - self.clusters[regionPairKey]['readCount'] += 1 - self.clusters[regionPairKey]['interClusterCount'] += 1 - if len(interClusterClusters) > 0: - for clusterKey in interClusterClusters: - totalCounts = 0 - for cKey in interClusterClusters[clusterKey]: - totalCounts += self.clusters[cKey]['readCount'] - for cKey in interClusterClusters[clusterKey]: - self.clusters[cKey]['interClusterCount'] = totalCounts - self.clusters[cKey]['clusterId'] = self.clusters[clusterKey]['clusterId'] - # print 'Complete clusters', self.clusters - return self.clusters - - def check_inv_readcounts(self, brkpts): - """ """ - brkpt1 = min(brkpts) - brkpt2 = max(brkpts) - counts = 0 - bpBuffer = 50 - # print 'Inversion reads', self.reads['intra']['inv'] - # print 'Brkpts', brkpts - if 'inv' not in self.reads['intra']: - return counts - for strand in self.reads['intra']['inv']: - lStrand, rStrand = strand.split(':') - strandReads = self.reads['intra']['inv'][strand] - for dRead in strandReads: - # print strand, dRead.pos - if lStrand == '+' and rStrand == '+': - if (dRead.pos[0] <= (brkpt1 + bpBuffer)) and (dRead.pos[1] <= (brkpt2 + bpBuffer) and dRead.pos[1] >= (brkpt1 - bpBuffer)): - counts += 1 - else: - # print dRead.pos, brkpt1, brkpt2 - if (dRead.pos[0] <= (brkpt2 + bpBuffer) and dRead.pos[0] >= (brkpt1 - bpBuffer)) and dRead.pos[1] >= (brkpt2 - bpBuffer): - counts += 1 - # print 'Counts', counts - return counts - - def check_td_readcounts(self, brkpts): - """ """ - brkpt1 = min(brkpts) - brkpt2 = max(brkpts) - counts = 0 - bpBuffer = 50 - if 'td' not in self.reads['intra']: - return counts - for dRead in self.reads['intra']['td']['-:+']: - if (dRead.pos[0] >= (brkpt1 - bpBuffer) and dRead.pos[0] <= (brkpt2 + bpBuffer)) and (dRead.pos[1] <= (brkpt2 + bpBuffer) and dRead.pos[1] >= (brkpt1 - bpBuffer)): - counts += 1 - return counts - - def check_other_readcounts(self, brkpts): - """ """ - counts = [0] * len(brkpts) - for i in range(len(brkpts)): - b = brkpts[i] - if 'other' not in self.reads['intra']: - return max(counts) - for strand in self.reads['intra']['other']: - lStrand, rStrand = strand.split(':') - strandReads = self.reads['intra']['other'][strand] - for dRead in strandReads: - if abs(dRead.pos[0] - b) <= 300 or abs(dRead.pos[1] - b) <= 300: - counts[i] += 1 - return max(counts) - - def check_inter_readcounts(self, targetBrkptChr, targetBrkptBp, nonTargetBrkpts): - """ """ - # counts = [0] * len(brkpts) - # for i in range(len(brkpts)): - # b = brkpts[i] - # if 'other' not in self.reads['intra']: - # break - # for strand in self.reads['intra']['other']: - # lStrand, rStrand = strand.split(':') - # strandReads = self.reads['intra']['other'][strand] - # for dRead in strandReads: - # if abs(dRead.pos[0] - b) <= 300 or abs(dRead.pos[1] - b) <= 300: - # counts[i] += 1 - # return max(counts) - discReadCount = 0 - # print 'sv_caller.py get_disc_read_count', targetBrkptChr, targetBrkptBp - # print 'Read storage dict', self.reads['inter'] - for otherBrkpts in nonTargetBrkpts: - nonTargetBrkptChr = otherBrkpts[0].replace('chr', '') - nonTargetBrkptBps = otherBrkpts[1:] - # print 'Non-target brkpts', nonTargetBrkptChr, nonTargetBrkptBps - for nonTargetBrkptBp in nonTargetBrkptBps: - # print 'non-target brkpt', nonTargetBrkptBp - if nonTargetBrkptChr in self.reads['inter']: - for strand in self.reads['inter'][nonTargetBrkptChr]: - for discReadPair in self.reads['inter'][nonTargetBrkptChr][strand]: - d1 = abs(targetBrkptBp - discReadPair.pos[0]) - d2 = abs(nonTargetBrkptBp - discReadPair.pos[1]) - # print 'distances', d1, d2 - if d1 <= 1000 and d2 <= 1000: - discReadCount += 1 - return discReadCount - - -class VariantReadTracker: - """A class to track the reads that are identified to be 'misaligned' to - the reference sequence. - - Attributes: - pair_indices (dict): Dictionary of a dictionary tracking the index of paired - reads in the valid list. - valid (list): List of read objects that are valid to consider for extraction. - disc (dict): Dictionary of read IDs for read-pairs that are discordantly mapped. - unmapped (dict): Dictionary of unmapped reads with mapped mate in the region. - unmapped_keep (list): List containing names of reads that are mapped but their mate is unmapped and wasn't - kept on the first pass. - inv (list): List of tuples, each containing read-pair information that have alignments - suggestive of an inversion event. - td (list): List of tuples, each containing read-pair information that have alignments - suggestive of a tandem dup event. - other (list): List of tuples, each containing read-pair information that have alignments - suggestive of some uncategorized event. - sv (dict): Dictionary - bam (str): Bam file source the reads came from. - """ - - def __init__(self, bamFile, insertSizeThresh): - """ - """ - - self.pair_indices = {} - self.valid = [] - self.discReadTracker = discReads(insertSizeThresh) - self.unmapped = {} - self.unmapped_keep = [] - self.sv = {} - self.bam = bamFile - - def check_read(self, read): - """Stores all reads in the self.pair_indices dictionary if it is - mapped. - - Check if the read is part of a discordantly mapped read pair. - - Check if the read is properly mapped, as indicated by bam encoding, and - whether the read overlaps with its pair. - - self.valid = [(read, proper_map, overlapping_reads), (read, proper_map, overlapping_reads), ...] - self.pair_indices[read.qname][1 (read1)/0 (read2)] = index of read in self.valid - - Args: - read (pysam read obj): An aligned sequence read. - Return: - None - """ - - proper_map, overlapping_reads = pe_meta(read) - if read.qname not in self.pair_indices and not read.mate_is_unmapped: - self.discReadTracker.add_read_pair(self.bam, read, overlapping_reads) - - self.valid.append((read, proper_map, overlapping_reads)) - if read.qname not in self.pair_indices and not read.mate_is_unmapped: - self.pair_indices[read.qname] = {} - if read.qname in self.pair_indices: - self.pair_indices[read.qname][int(read.is_read1)] = len(self.valid) - 1 - - def add_unmapped_read(self, read): - """Add read to unmapped dictionary with name as the key, object as the value. - - Args: - read (pysam read obj): pysam read object. - Return: - None - """ - - self.unmapped[read.qname] = read - - def check_clippings(self, kmer_size, region_start_pos, region_end_pos): - """ - """ - - for read_vals in self.valid: - read, proper_map, overlap_reads = read_vals - if read.cigar or len(read.cigar) > 1: - good_qual_coords = trim_coords(read.qual, 3) # Get the (start, end, length) of the high-quality sequence bases. - clip_coords = get_clip_coords(read) # Get the [start, end] of the non-clipped sequence bases. - self.extract_clippings(read_vals, clip_coords, good_qual_coords, kmer_size) - - if (read.pos >= region_start_pos and read.pos <= region_end_pos) and read.mapq > 0 and read.mate_is_unmapped: - self.unmapped_keep.append(read.qname) - - def extract_clippings(self, read_vals, clip_coords, good_qual_coords, kmer_size): - """ - """ - - read, proper_map, overlap_reads = read_vals - clip_seqs = {'clipped': [], 'buffered': []} - - if clip_coords[0] <= good_qual_coords[0] and clip_coords[1] >= good_qual_coords[1]: - return - - new_clip_coords = [0, 0] - add_clip = [False, False] - indel_only = False - start_clip = clip_coords[0] > 0 - end_clip = clip_coords[1] < len(read.qual) - if start_clip and end_clip: - add_clip = [True, True] - else: - if start_clip: - add_clip[0] = True - new_clip_coords = [0, clip_coords[0]] - if overlap_reads and read.is_reverse: - mate_seq = self.valid[self.pair_indices[read.qname][int(read.is_read1)]][0].seq - add_clip[0] = check_pair_overlap(mate_seq, read, [0, clip_coords[0]], 'back') - if proper_map: - if read.is_reverse: - indel_only = True - else: - indel_only = False - elif end_clip: - new_clip_coords = [clip_coords[1], len(read.seq)] - add_clip[1] = True - if overlap_reads and not read.is_reverse: - mate_seq = self.valid[self.pair_indices[read.qname][int(read.is_read1)]][0].seq - add_clip[1] = check_pair_overlap(mate_seq, read, [clip_coords[1], len(read.seq)], 'front') - if proper_map: - if read.is_reverse: - indel_only = indel_only and False - else: - indel_only = indel_only and True - final_add = add_clip[0] or add_clip[1] - if add_clip[0]: - clip_seqs['buffered'].append(read.seq[0:(clip_coords[0] + kmer_size)]) - clip_seqs['clipped'].append(read.seq[0:clip_coords[0]]) - if add_clip[1]: - clip_seqs['buffered'].append(read.seq[(clip_coords[1] - kmer_size):len(read.seq)]) - clip_seqs['clipped'].append(read.seq[clip_coords[1]:len(read.seq)]) - if final_add: - self.sv[get_seq_readname(read)] = (read, clip_seqs, new_clip_coords, indel_only) - - def write_seqs(self, clipped_fa, reads_fq, sv_bam, kmer_size): - """ - """ - - for name in self.unmapped_keep: - if name in self.unmapped: - read = self.unmapped[name] - self.sv[get_seq_readname(read)] = (read, None, None, False) - lout = ">" + read.qname + "\n" + str(read.seq) - clipped_fa.write(lout + "\n") - - for name in self.sv: - read, clip_seqs, clip_coords, indel_only = self.sv[name] - if sv_bam: - sv_bam.write(read) - lout = fq_line(read, indel_only, kmer_size, True) - if lout: - reads_fq.write(lout) - if clip_seqs: - for clip in clip_seqs['buffered']: - clipped_fa.write(">" + name + "\n" + clip + "\n") - self.bam.close() - - def clear_sv_reads(self): - """ - """ - - self.sv = None - - def get_disc_reads(self): - """This function needs to be updated to handle the new disc read storage. - """ - - return self.discReadTracker.disc - - def cluster_discreads(self): - """ - """ - - dReadClusters = self.discReadTracker.cluster_discreads() - return dReadClusters - - def check_inv_readcounts(self, brkpts): - """ - """ - - return self.discReadTracker.check_inv_readcounts(brkpts) - - def check_td_readcounts(self, brkpts): - """ """ - return self.discReadTracker.check_td_readcounts(brkpts) - - def check_other_readcounts(self, brkpts): - """ """ - return self.discReadTracker.check_other_readcounts(brkpts) - - def check_inter_readcounts(self, targetChr, targetBps, nonTargetBrkpts): - """ """ - return self.discReadTracker.check_inter_readcounts(targetChr, targetBps, nonTargetBrkpts) diff --git a/breakmer/processor/target.py b/breakmer/processor/target.py deleted file mode 100644 index 72503c7..0000000 --- a/breakmer/processor/target.py +++ /dev/null @@ -1,683 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os -import pysam -import shutil -import subprocess -import breakmer.utils as utils -import breakmer.processor.bam_handler as bam_handler -import breakmer.assembly.assembler as assembly - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -def load_kmers(fns, kmers): - """Iterate through the kmer flat files and store them in the kmers dictionary. - Store the kmer sequence string as the key and the count of the number of reads - containing it as the value. - - Args: - fns (str): Filenames of the kmer flat files. - kmers (dict): Dictionary of the kmer, count values, - Returns: - None - Raises: - None - """ - - if not fns: - return kmers - fns = fns.split(',') - for fn in fns: # Iterate through all the jellyfish kmer files and store the kmer as key and count as value. - for line in open(fn, 'rU'): - line = line.strip() - mer, count = line.split() - if mer not in kmers: - kmers[mer] = 0 - kmers[mer] += int(count) - - -class Variation: - """This class handles the storage and interaction of all the variant reads that could - be contributing to the support of a structural variant. - - Attributes: - params (ParamManager): Parameters for breakmer analysis. - loggingName (str): Module name for logging file purposes. - var_reads (dict): Dictionary containing the tumor sample or normal sample variation read objects (breakmer.process.bam_handler.VariantReadTracker). - cleaned_read_recs (dict): Dictionary containing the cleaned reads. - files (dict): Dicionary containing paths to file names needed for analysis. - kmer_clusters (list): - kmers (dict): - results (list): - discReadClusters (dict): - discReadFormatted (list): - """ - - def __init__(self, params): - self.loggingName = 'breakmer.processor.target' - self.params = params - self.var_reads = {} - self.cleaned_read_recs = None - self.kmer_clusters = [] - self.kmers = {} - self.results = [] - self.files = {} - # self.svs = {} - self.discReadClusters = {} - self.discReadFormatted = [] - - def setup_cleaned_reads(self, sampleType): - """Initiate the cleaned_read_recs dictionary for sample or normal data. - - Args: - sampleType (str): String indicating the sample type - sv or normal - Returns: - None - Raises: - None - """ - - if not self.cleaned_read_recs: - self.cleaned_read_recs = {} - self.cleaned_read_recs[sampleType] = None - - def get_var_reads(self, sampleType): - """ - """ - - return self.var_reads[sampleType] - - def clear_sv_reads(self, sampleType): - """ - """ - - self.var_reads[sampleType].clear_sv_reads() - - def clear_cleaned_reads(self): - """ - """ - - self.cleaned_read_recs = None - - def continue_analysis_check(self, type): - """ - """ - - check = True - if len(self.cleaned_read_recs[type]) == 0: - check = False - return check - - def get_sv_reads(self, sampleType): - """ - """ - - return self.var_reads[sampleType].sv - - def add_result(self, result): - """ - """ - - self.results.append(result) - - def set_var_reads(self, sampleType, bamFile, chrom, start, end, regionBuffer): - """ - - Args: - sampleType (): - bamFile (): - chrom (): - start (): - end (): - regionBuffer (): - Returns: - None - Raises: - None - """ - - # Get VariantReadTracker object from bam_handler module and extract reads. - self.var_reads[sampleType] = bam_handler.get_variant_reads(bamFile, chrom, start - regionBuffer, end - regionBuffer, self.params.get_param('insertsize_thresh')) - # Iterate through reads that are not perfectly aligned and store necessary information for downstream analysis. - # Store the reads with softclipped sequences that are high quality in VariantReadTracker.sv dictionary. - self.var_reads[sampleType].check_clippings(self.params.get_kmer_size(), start, end) - - # Write the bam, fastq, and fasta files with the extracted reads. - svBam = None - if sampleType == 'sv': - svBam = pysam.Samfile(self.files['sv_bam'], 'wb', template=pysam.Samfile(bamFile, 'rb')) - readsFq = open(self.files['%s_fq' % sampleType], 'w') - scFa = open(self.files['%s_sc_unmapped_fa' % sampleType], 'w') - # Write all the stored sequences into files. - self.var_reads[sampleType].write_seqs(scFa, readsFq, svBam, self.params.get_kmer_size()) - readsFq.close() - scFa.close() - - # Close the bam file, sort and index. - if sampleType == 'sv': - svBam.close() - utils.log(self.loggingName, 'info', 'Sorting bam file %s to %s' % (self.files['sv_bam'], self.files['sv_bam_sorted'])) - pysam.sort(self.files['sv_bam'], self.files['sv_bam_sorted'].replace('.bam', '')) - utils.log(self.loggingName, 'info', 'Indexing sorted bam file %s' % self.files['sv_bam_sorted']) - pysam.index(self.files['sv_bam_sorted']) - - def setup_read_extraction_files(self, sampleType, dataPath, name): - """Create file names to store the extracted reads. - This creates four files (for tumor samples): - 1. fastq with extracted reads = sv_fq or normal_fq - 2. fasta file with softclipped sequences = sv_sc_unmapped_fa - 3. bam file with extracted reads = sv_bam - 4. sorted bam file with extracted reads = sv_bam_sorted - - Args: - sampleType (str): The type of input data - sv / normal - dataPath (str): The path to the data files for this target. - name (str): The target name. - Returns: - None - Raises: - None - """ - - # Store extracted reads in /__reads.fastq - self.files['%s_fq' % sampleType] = os.path.join(dataPath, name + '_%s_reads.fastq' % sampleType) - # Store softclipped sequences in a fasta file /__sc_seqs.fa - self.files['%s_sc_unmapped_fa' % sampleType] = os.path.join(dataPath, name + '_%s_sc_seqs.fa' % sampleType) - - if sampleType == 'sv': - # Store variant reads in bam formatted file /_sv_reads.bam - self.files['sv_bam'] = os.path.join(dataPath, name + '_sv_reads.bam') - # Store variant reads in sorted bam file - self.files['sv_bam_sorted'] = os.path.join(dataPath, name + '_sv_reads.sorted.bam') - - def clean_reads(self, dataPath, name, sampleType): - """Trim adapter sequences from the extracted reads, format and organize - the cleaned reads into new files. - - Cutadapt is run to trim the adapter sequences from the sequence reads to - remove any 'noise' from the assembly process. The cleaned reads output - from cutadapt are then reprocessed to determine if the softclipped sequences - were trimmed off or not to further filter out reads. - - The softclipped sequences that remain are stored and a new fastq file is written. - - Args: - dataPath (str): The path to the data files for this target. - name (str): The target name. - type (str): A string indicating a tumor ('sv') or normal ('norm') sample being processed. - Return: - check (boolean): A boolean to indicate whether the are any reads left after - cleaning is complete. - """ - - cutadapt = self.params.get_param('cutadapt') # Cutadapt binary - cutadaptConfigFn = self.params.get_param('cutadapt_config_file') - utils.log(self.loggingName, 'info', 'Cleaning reads using %s with configuration file %s' % (cutadapt, cutadaptConfigFn)) - self.files['%s_cleaned_fq' % sampleType] = os.path.join(dataPath, name + '_%s_reads_cleaned.fastq' % sampleType) - utils.log(self.loggingName, 'info', 'Writing clean reads to %s' % self.files['%s_cleaned_fq' % sampleType]) - output, errors = utils.run_cutadapt(cutadapt, cutadaptConfigFn, self.files['%s_fq' % sampleType], self.files['%s_cleaned_fq' % sampleType], self.loggingName) - - self.setup_cleaned_reads(sampleType) - self.files['%s_cleaned_fq' % sampleType], self.cleaned_read_recs[sampleType] = utils.get_fastq_reads(self.files['%s_cleaned_fq' % sampleType], self.get_sv_reads(sampleType)) - self.clear_sv_reads(sampleType) - check = self.continue_analysis_check(sampleType) - utils.log(self.loggingName, 'info', 'Clean reads exist %s' % check) - return check - - def set_reference_kmers(self, targetRefFns): - """Set the reference sequence kmers""" - - self.kmers['ref'] = {} - for i in range(len(targetRefFns)): - utils.log(self.loggingName, 'info', 'Indexing kmers for reference sequence %s' % targetRefFns[i]) - self.get_kmers(targetRefFns[i], self.kmers['ref']) - - def set_sample_kmers(self): - """Set the sample kmers - """ - - utils.log(self.loggingName, 'info', 'Indexing kmers for sample sequence %s' % self.files['sv_cleaned_fq']) - self.kmers['case'] = {} - self.kmers['case_sc'] = {} - self.get_kmers(self.files['sv_cleaned_fq'], self.kmers['case']) - self.get_kmers(self.files['sv_sc_unmapped_fa'], self.kmers['case_sc']) - - def get_kmers(self, seqFn, kmerDict): - """Generic function to run jellyfish on a set of sequences - """ - - jellyfish = self.params.get_param('jellyfish') - kmer_size = self.params.get_kmer_size() - # Load the kmers into the kmer dictionary based on keyStr value. - load_kmers(utils.run_jellyfish(seqFn, jellyfish, kmer_size), kmerDict) - - def compare_kmers(self, kmerPath, name, readLen, targetRefFns): - """ - """ - - # Set the reference sequence kmers. - self.set_reference_kmers(targetRefFns) - - # Set sample kmers. - self.set_sample_kmers() - # Merge the kmers from the cleaned sample sequences and the unmapped and softclipped sequences. - scKmers = set(self.kmers['case'].keys()) & set(self.kmers['case_sc'].keys()) - # Take the difference from the reference kmers. - sampleOnlyKmers = list(scKmers.difference(set(self.kmers['ref'].keys()))) - # Add normal sample kmers if available. - if self.params.get_param('normal_bam_file'): - normKmers = {} - self.get_kmers(self.files['norm_cleaned_fq'], normKmers) - sampleOnlyKmers = list(set(sampleOnlyKmers).difference(set(normKmers.keys()))) - - # Write case only kmers out to file. - self.files['sample_kmers'] = os.path.join(kmerPath, name + "_sample_kmers.out") - sample_kmer_fout = open(self.files['sample_kmers'], 'w') - kmer_counter = 1 - self.kmers['case_only'] = {} - for mer in sampleOnlyKmers: - sample_kmer_fout.write("\t".join([str(x) for x in [mer, str(self.kmers['case'][mer])]]) + "\n") - self.kmers['case_only'][mer] = self.kmers['case'][mer] - sample_kmer_fout.close() - - # Clean out data structures. - self.kmers['ref'] = {} - self.kmers['case'] = {} - self.kmers['case_sc'] = {} - - utils.log(self.loggingName, 'info', 'Writing %d sample-only kmers to file %s' % (len(self.kmers['case_only']), self.files['sample_kmers'])) - self.files['kmer_clusters'] = os.path.join(kmerPath, name + "_sample_kmers_merged.out") - utils.log(self.loggingName, 'info', 'Writing kmer clusters to file %s' % self.files['kmer_clusters']) - - self.kmers['clusters'] = assembly.init_assembly(self.kmers['case_only'], self.cleaned_read_recs['sv'], self.params.get_kmer_size(), self.params.get_sr_thresh('min'), readLen) - self.clear_cleaned_reads() - self.kmers['case_only'] = {} - - def get_disc_reads(self): - """ - """ - - return self.var_reads['sv'].get_disc_reads() - - def write_results(self, outputPath, targetName): - """ - """ - - if len(self.results) > 0: - resultFn = os.path.join(outputPath, targetName + "_svs.out") - utils.log(self.loggingName, 'info', 'Writing %s result file %s' % (targetName, resultFn)) - resultFile = open(resultFn, 'w') - for i, result in enumerate(self.results): - headerStr, formattedResultValuesStr = result.get_formatted_output_values() - if i == 0: - resultFile.write(headerStr + '\n') - resultFile.write(formattedResultValuesStr + '\n') - resultFile.close() - if len(self.discReadClusters) > 0: - resultFn = os.path.join(outputPath, targetName + "_discreads.out") - utils.log(self.loggingName, 'info', 'Writing %s discordant read cluster result file %s' % (targetName, resultFn)) - resultFile = open(resultFn, 'w') - for i, discReadRes in enumerate(self.discReadFormatted): - headerStr, outStr = discReadRes - if i == 0: - resultFile.write(headerStr + '\n') - resultFile.write(outStr + '\n') - resultFile.close() - - def get_formatted_output(self): - """ - """ - - formattedResultsDict = {'contigs': [], 'discreads': []} - if len(self.results) > 0: - for i, result in enumerate(self.results): - formattedResultsDict['contigs'].append(result.get_formatted_output_values()) - if len(self.discReadClusters) > 0: - for i, discReadRes in enumerate(self.discReadFormatted): - formattedResultsDict['discreads'].append(discReadRes) - return formattedResultsDict - - def cluster_discreads(self, targetName, targetChrom): - """ - """ - - self.discReadClusters = self.var_reads['sv'].cluster_discreads() - self.discReadFormatted = [] - headerStr = '\t'.join(['Target_name', 'sv_type', 'cluster_id', 'left_breakpoint_estimate', 'right_breakpoint_estimate', 'strands', 'discordant_readpair_count', 'cluster_distance']) - for key in self.discReadClusters: - readCount = self.discReadClusters[key]['readCount'] - k1, k2, k3, c1, c2 = key.split('|') - checkCount = readCount - clusterDist = '0' - if k1 == 'inter': - checkCount = self.discReadClusters[key]['interClusterCount'] - if checkCount < self.params.get_param('discread_only_thresh'): - continue - svType = 'inter-chromosomal' - lChrom = 'chr' + targetChrom.replace('chr', '') - if k1 == 'inter': - rChrom = 'chr' + k2.replace('chr', '') - elif k1 == 'intra': - svType = 'intra-chromosomal_' + k2 - rChrom = lChrom - clusterDist = abs(self.discReadClusters[key]['leftBrkpt'] - self.discReadClusters[key]['rightBrkpt']) - lStrand, rStrand = k3.split(':') - lBrkpt = self.discReadClusters[key]['leftBrkpt'] - rBrkpt = self.discReadClusters[key]['rightBrkpt'] - clusterId = targetName + '_' + str(self.discReadClusters[key]['clusterId']) - outStr = '\t'.join([targetName, svType, clusterId, lChrom + ':' + str(lBrkpt), rChrom + ':' + str(rBrkpt), lStrand + ',' + rStrand, str(readCount), str(clusterDist)]) - self.discReadFormatted.append((headerStr, outStr)) - - -class TargetManager(object): - """TargetManager class handles all the high level information relating to a target. - The analysis is peformed at the target level, so this class contains all the information - necessary to perform an independent analysis. - - Attributes: - params (ParamManager): Parameters for breakmer analysis. - loggingName (str): Module name for logging file purposes. - name (str): Target name specified in the input bed file. - chrom (str): Chromosome ID as specified in the input bed file. - start (int): Genomic position for the target region (minimum value among all intervals). - end (int): Genomic position for the target region (maximum value among all intervals). - paths (dict): Contains the analysis paths for this target. - files (dict): Dicionary containing paths to file names needed for analysis. - read_len (int): Length of a single read. - variation (Variation): Stores data for variants identified within the target. - regionBuffer (int): Base pairs to add or subtract from the target region end and start locations. - """ - - def __init__(self, name, params): - self.loggingName = 'breakmer.processor.target' - self.params = params - self.name = name - self.chrom = None - self.start = None - self.end = None - self.paths = {} - self.files = {} - self.readLen = int(params.get_param('readLen')) - self.variation = Variation(params) - self.regionBuffer = 200 - self.setup() - - @property - def values(self): - """Return the defined features of this target - """ - return (self.chrom, self.start, self.end, self.name, self.get_target_intervals(), self.regionBuffer) - - @property - def fnc(self): - """Return the function of the program. - """ - return self.params.fncCmd - - def setup(self): - """Setup the TargetManager object with the input params. - - Define the location (chrom, start, end), file paths, directory paths, and name. - - Args: - None - Returns: - None - """ - - # Define the target boundaries based on the intervals input. - # The target start is the minimum start of the intervals and the end - # is the maximum end of the intervals. - intervals = self.params.get_target_intervals(self.name) - for values in intervals: - chrom, start, end = values[0], int(values[1]), int(values[2]) - if self.chrom is None: - self.chrom = chrom - if self.start is None: - self.start = start - elif start < self.start: - self.start = start - if self.end is None: - self.end = end - elif end > self.end: - self.end = end - # print 'Region coords', self.chrom, self.start, self.end - - # Create the proper paths for the target analysis. - ''' - Each target analyzed has a set of directories associated with it. - targets/ - / - data/ - contigs/ - kmers/ - - There is separate directory for each target in the output directory. - output/ - / - ''' - self.add_path('ref_data', os.path.join(self.params.paths['ref_data'], self.name)) - if self.params.fncCmd == 'run': - self.add_path('base', os.path.join(self.params.paths['targets'], self.name)) - self.add_path('data', os.path.join(self.paths['base'], 'data')) - self.add_path('contigs', os.path.join(self.paths['base'], 'contigs')) - self.add_path('kmers', os.path.join(self.paths['base'], 'kmers')) - self.add_path('output', os.path.join(self.params.paths['output'], self.name)) - - ''' - Each target has reference files associated with it. - / - / - _forward_refseq.fa - _reverse_refseq.fa - _forward_refseq.fa_dump - _reverse_refseq.fa_dump - ''' - self.files['target_ref_fn'] = [os.path.join(self.paths['ref_data'], self.name + '_forward_refseq.fa'), os.path.join(self.paths['ref_data'], self.name + '_reverse_refseq.fa')] - # ref_fa_marker_f = open(os.path.join(self.paths['ref_data'], '.reference_fasta'), 'w') - # ref_fa_marker_f.write(self.params.get_param('reference_fasta')) - # ref_fa_marker_f.close() - self.files['ref_kmer_dump_fn'] = [os.path.join(self.paths['ref_data'], self.name + '_forward_refseq.fa_dump'), os.path.join(self.paths['ref_data'], self.name + '_reverse_refseq.fa_dump')] - - def add_path(self, key, path): - """Utility function to create all the output directories. - - Args: - key (str): String value to store the file path value. - path (str): File path value. - Returns: - None - Raises: - None - """ - - utils.log(self.loggingName, 'info', 'Creating %s %s path (%s)' % (self.name, key, path)) - self.paths[key] = path - if not os.path.exists(self.paths[key]): - os.makedirs(self.paths[key]) - - def set_ref_data(self): - """Write the reference sequence to a fasta file for this specific target if it does not - exist. - - Args: - None - Returns: - None - Raise: - None - """ - - # Write reference fasta file if needed. - for i in range(len(self.files['target_ref_fn'])): - fn = self.files['target_ref_fn'][i] - direction = "forward" if fn.find("forward") != -1 else "reverse" - utils.log(self.loggingName, 'info', 'Extracting refseq sequence and writing %s' % fn) - utils.extract_refseq_fa(self.values, self.paths['ref_data'], self.params.get_param('reference_fasta'), direction, fn) - - # If using blatn for target realignment, the db must be available. - blastn = self.params.get_param('blast') - if blastn is not None: - # Check if blast db files are available for each target. - if not os.path.isfile(self.files['target_ref_fn'][0] + '.nin'): - makedb = os.path.join(os.path.split(blastn)[0], 'makeblastdb') # Create blast db - cmd = "%s -in %s -dbtype 'nucl' -out %s" % (makedb, self.files['target_ref_fn'][0], self.files['target_ref_fn'][0]) - utils.log(self.loggingName, 'info', 'Creating blast db files for target %s with reference file %s' % (self.name, self.files['target_ref_fn'][0])) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - if errors != '': - utils.log(self.loggingName, 'debug', 'Failed to make blast db files using reference file %s' % self.files['target_ref_fn'][0]) - - def find_sv_reads(self): - """Entry function to extract sequence reads from sample or normal bam file. - It extracts and cleans the sample reads from the target region that may - be used to build a variant contig. - - 1. Extract bam reads - 2. Clean reads - - Args: - None - Returns: - check (boolean): Variable to determine if the analysis should continue. It is - False when there are no reads extracted or left after cleaning - and True when there are. - """ - - self.extract_bam_reads('sv') # Extract variant reads. - if self.params.get_param('normal_bam_file'): # Extract reads from normal sample, if input. - self.extract_bam_reads('norm') - self.clean_reads('norm') - check = True - if not self.clean_reads('sv'): # Check if there are any reads left to analyze after cleaning. - shutil.rmtree(self.paths['output']) # Remove the output directory since there is nothing to analyze - check = False - return check - - def extract_bam_reads(self, sampleType): - """Wrapper for Variation extract_bam_reads function. - - Args: - sampleType (str): Indicates a tumor ('sv') or normal ('norm') sample being processed. - Return: - None - """ - - # Create the file paths for the files that will be created from the read extraction. - self.variation.setup_read_extraction_files(sampleType, self.paths['data'], self.name) - bamType = 'sample' - if sampleType == 'norm': - bamType = 'normal' - bamFile = self.params.get_param('%s_bam_file' % bamType) - utils.log(self.loggingName, 'info', 'Extracting bam reads from %s to %s' % (bamFile, self.variation.files['%s_fq' % sampleType])) - self.variation.set_var_reads(sampleType, bamFile, self.chrom, self.start, self.end, self.regionBuffer) - - def clean_reads(self, sampleType): - """Wrapper for Variation clean_reads function. - - Args: - type (str): A string indicating a tumor ('sv') or normal ('norm') sample being processed. - Return: - check (boolean): A boolean to indicate whether the are any reads left after - cleaning is complete. - """ - - return self.variation.clean_reads(self.paths['data'], self.name, sampleType) - - def compare_kmers(self): - """Obtain the sample only kmers and initiate assembly of reads with these kmers. - - Args: - None - Returns: - None - """ - - self.variation.compare_kmers(self.paths['kmers'], self.name, self.readLen, self.files['target_ref_fn']) - - def resolve_sv(self): - """Perform operations on the contig object that was generated from the split reads in the target. - - Args: - None - Returns: - None - """ - - iter = 1 - contigs = self.variation.kmers['clusters'] - utils.log(self.loggingName, 'info', 'Resolving structural variants from %d kmer clusters' % len(contigs)) - for contig in contigs: - contigId = self.name + '_contig' + str(iter) - utils.log(self.loggingName, 'info', 'Assessing contig %s, %s' % (contigId, contig.seq)) - contig.set_meta_information(contigId, self.params, self.values, self.paths['contigs'], self.variation.files['kmer_clusters'], self.variation) - contig.query_ref(self.files['target_ref_fn']) - contig.make_calls() - if contig.svEventResult: - contig.filter_calls() - contig.annotate_calls() - contig.output_calls(self.paths['output'], self.variation.files['sv_bam_sorted']) - self.add_result(contig.svEventResult) - else: - utils.log(self.loggingName, 'info', '%s has no structural variant result.' % contigId) - iter += 1 - self.variation.cluster_discreads(self.name, self.chrom) # Cluster discordant reads. - - def complete_analysis(self): - """ - """ - - if len(self.variation.results) > 0 or len(self.variation.discReadFormatted) > 0: - self.variation.write_results(self.paths['output'], self.name) - else: - shutil.rmtree(self.paths['output']) - - def get_target_intervals(self): - """Return the list of tuples defining intervals for this target - """ - - return self.params.targets[self.name] - - def get_sv_reads(self, type): - """ """ - - return self.variation.get_sv_reads(type) - - def clear_sv_reads(self, type): - """ """ - - self.variation.clear_sv_reads(type) - - def clear_cleaned_reads(self): - """ """ - - self.variation.clear_cleaned_reads() - - def add_result(self, result): - """ """ - - if result: - self.variation.add_result(result) - - def has_results(self): - """ """ - - if len(self.variation.results) > 0 or len(self.variation.discReadFormatted) > 0: - return True - else: - return False - - def get_results(self): - """ """ - - return self.variation.results - - def get_formatted_output(self): - """ """ - - return self.variation.get_formatted_output() diff --git a/breakmer/realignment/__init__.py b/breakmer/realignment/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/breakmer/realignment/blat_result.py b/breakmer/realignment/blat_result.py deleted file mode 100644 index a830016..0000000 --- a/breakmer/realignment/blat_result.py +++ /dev/null @@ -1,823 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import math -import sys -from Bio import SeqIO -import breakmer.utils as utils - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -class Matches: - """ - """ - - def __init__(self, values): - self.match = values['matches'] - self.mismatch = values['mismatches'] - self.repeat = values['repmatches'] - - def get_total_matches(self): - """Sum all match values""" - return self.match + self.mismatch + self.repeat - - def get_total_matching(self): - """Sum all values where the is match""" - return self.match + self.repeat - - def get_mismatches(self): - return self.mismatch - - def get_nmatches(self, matchType): - nmatches = self.match - if matchType == "mismatch": - nmatches = self.mismatch - elif matchType == 'repeat': - nmatches = self.repeat - return int(nmatches) - - -class Gaps: - """Track the number and the size of gaps in the realignment - Attributes: - ref: List of integers for gaps in reference alignment [number of gaps, total bp of gaps] - query: List of integers for gaps in query alignment [number of gaps in query sequence, total bp of gaps] - """ - def __init__(self, values): - """ - """ - self.ref = [int(values['tNumInsert']), int(values['tBaseInsert'])] - self.query = [int(values['qNumInsert']), int(values['qBaseInsert'])] - - def get_ngaps(self, alignType): - """Return a gaps""" - ngaps = self.ref[0] - if alignType == 'query': - ngaps = self.query[0] - return int(ngaps) - - def get_total_size(self): - """Return the total bp of gap in the realignment""" - return self.ref[1] + self.query[1] - - def get_gap_sizes(self): - """ """ - return (self.ref[1], self.query[1]) - - def get_total_num_gaps(self): - """Return the total number of gaps in the realignment""" - return self.ref[0] + self.query[0] - - -class AlignFragments: - """ - """ - def __init__(self, values): - self.blockSizes = [int(x) for x in values['blockSizes'].rstrip(",").split(",")] - self.ref = [] - self.query = [] - self.count = len(self.blockSizes) - self.set_values(values) - - def set_values(self, values): - # print 'Set fragment values', values - refStarts = [int(x) for x in values['tStarts'].rstrip(",").split(",")] - queryStarts = [int(x) for x in values['qStarts'].rstrip(",").split(",")] - for qstart, tstart, blocksize in zip(queryStarts, refStarts, self.blockSizes): - self.ref.append((tstart, tstart + blocksize)) - self.query.append((qstart, qstart + blocksize)) - - -class SVBreakpoint: - """ - """ - def __init__(self, coords, svType, targetKey): - self.chrom = coords[0] - self.genomicCoords = coords[1:] - self.svType = svType - self.targetKey = targetKey - self.annotated_trxs = {} - - def store_annotation(self, trxList, distList, coordIdx): - """ """ - # print 'blat_result.py store_annotation', trxList, distList, coordIdx, self.annotated_trxs, self - self.annotated_trxs[coordIdx] = [trxList, distList] - - -class Breakpoints: - """ - """ - def __init__(self): - self.contigBreakpoints = [] - self.genomicBreakpoints = [] - self.svBreakpoints = [] - - def add_brkpts(self, which, bp): - """ """ - if which == 'contig': - self.contigBreakpoints.append(bp) - else: - # Tuple of coordinates for indel breakpoints with chromosome number as 'chr#' - # - Deletion = (chr, bp1, bp2) - # - Insertion = (chr, bp1) - self.genomicBreakpoints.append(bp) - - def reverse_breakpts(self, querySeqSize): - for i in range(len(self.contigBreakpoints)): - contigBrkpts = self.contigBreakpoints[i] - contigBrkpts[0] = querySeqSize - contigBrkpts[0] - if len(contigBrkpts) > 1: - contigBrkpts[1] = querySeqSize - contigBrkpts[1] - self.contigBreakpoints[i] = contigBrkpts - - def set_sv_brkpt(self, coords, svType, targetKey): - """ """ - self.svBreakpoints.append(SVBreakpoint(coords, svType, targetKey)) - # print self.svBreakpoints - - -class AlignValues: - """ - """ - def __init__(self, values): - self.ref = {'seqName': values['tName'], - 'seqSize': int(values['tSize']), - 'alignCoords': [int(values['tStart']), int(values['tEnd'])]} - self.query = {'seqName': values['qName'], - 'seqSize': int(values['qSize']), - 'alignCoords': [int(values['qStart']), int(values['qEnd'])]} - - def get_coords(self, alignType, index=None): - """Return the coordinate of the alignment for either the reference - or the query sequence. - Args: - alignType: String indicating the reference or query - index: Integer indicating the start(0) or end(1) - """ - - coords = self.ref['alignCoords'] - if alignType == 'query': - coords = self.query['alignCoords'] - if index is not None: - coords = coords[index] - return coords - - def get_seq_name(self, alignType): - """ - """ - name = str(self.ref['seqName']) - if alignType == 'query': - name = str(self.query['seqName']) - return name - - def get_seq_size(self, alignType): - """ - """ - size = str(self.ref['seqSize']) - if alignType == 'query': - size = str(self.query['seqSize']) - return size - - -class RealignValues: - def __init__(self, values, program, alignRefFn, querySeq, scope): - self.program = program - self.valueDict = {} - self.set_values(values, alignRefFn, querySeq, scope) - - def set_values(self, values, alignRefFn, querySeq, scope): - """ - BLAT values - 1. matches - Number of matching bases that aren't repeats. - 2. misMatches - Number of bases that don't match. - 3. repMatches - Number of matching bases that are part of repeats. - 4. nCount - Number of 'N' bases. - 5. qNumInsert - Number of inserts in query. - 6. qBaseInsert - Number of bases inserted into query. - 7. tNumInsert - Number of inserts in target. - 8. tBaseInsert - Number of bases inserted into target. - 9. strand - defined as + (forward) or - (reverse) for query strand. In mouse, a second '+' or '-' indecates genomic strand. - 10. qName - Query sequence name. - 11. qSize - Query sequence size. - 12. qStart - Alignment start position in query. - 13. qEnd - Alignment end position in query. - 14. tName - Target sequence name. - 15. tSize - Target sequence size. - 16. tStart - Alignment start position in query. - 17. tEnd - Alignment end position in query. - 18. blockCount - Number of blocks in the alignment. - 19. blockSizes - Comma-separated list of sizes of each block. - 20. qStarts - Comma-separated list of start position of each block in query. - 21. tStarts - Comma-separated list of start position of each block in target. - - BLAST values - 1. qName - 2. tName - 3. percentIdent - 4. matches - 5. misMatches - 6. ngaps - 7. qStart - 8. qEnd - 9. tStart - 10. tEnd - 11. evalue - 12. bitScore - 13. gapBp - 14. strand - 15. qSeq - 16. tSeq - """ - - if self.program == 'blat': - self.valueDict = {'matches': int(values[0]), - 'mismatches': int(values[1]), - 'repmatches': int(values[2]), - 'ncount': int(values[3]), - 'qNumInsert': int(values[4]), - 'qBaseInsert': int(values[5]), - 'tNumInsert': int(values[6]), - 'tBaseInsert': int(values[7]), - 'strand': values[8], - 'qName': values[9], - 'qSize': int(values[10]), - 'qStart': int(values[11]), - 'qEnd': int(values[12]), - 'tName': values[13].replace('chr', ''), - 'tSize': int(values[14]), - 'tStart': int(values[15]), - 'tEnd': int(values[16]), - 'blockCount': int(values[17]), - 'blockSizes': values[18], - 'qStarts': values[19], - 'tStarts': values[20], - 'deletionSeqs': '', - 'insertSeqs': '' - } - - if scope == 'target': - alignRefSeq = open(alignRefFn, "rU") - record = SeqIO.read(alignRefSeq, "fasta") - ref_target_seq = str(record.seq) - insertSeqs = [] - delSeqs = [] - listIter = 0 - for bSize, qStart, tStart in zip(self.valueDict['blockSizes'].split(','), self.valueDict['qStarts'].split(','), self.valueDict['tStarts'].split(',')): - # print bSize, qStart, tStart - if bSize == '' or qStart == '' or tStart == '': - continue - qEnd = int(qStart) + int(bSize) - tEnd = int(tStart) + int(bSize) - if listIter > 0: - qDiff = int(qStart) - qPrev - tDiff = int(tStart) - tPrev - if qDiff > 0: - # print qPrev, int(qStart) - # print 'Insertion sequence', querySeq[qPrev:int(qStart)] - insertSeqs.append(querySeq[qPrev:int(qStart)]) - # qDiffs.append(qDiff) - if tDiff > 0: - # print tPrev, int(tStart) - # print 'Deleted sequence in ref', ref_target_seq[tPrev:int(tStart)] - delSeqs.append(ref_target_seq[tPrev:int(tStart)]) - # tDiffs.append(tDiff) - qPrev = int(qEnd) - tPrev = int(tEnd) - listIter += 1 - self.valueDict['deletionSeqs'] = delSeqs - self.valueDict['insertSeqs'] = insertSeqs - elif self.program == 'blast': - self.valueDict = {'qName': values[0], - 'tName': values[1].replace('chr', ''), - 'percentIdent': float(values[2]), - 'qSize': values[3], - 'matches': int(values[4]), - 'mismatches': int(values[5]), - 'ngaps': int(values[6]), - 'qStart': int(values[7]) - 1, - 'qEnd': int(values[8]), - 'tStart': int(values[9]) - 1, - 'tEnd': int(values[10]), - 'evalue': float(values[11]), - 'bitscore': float(values[12]), - 'gapBp': int(values[13]), - 'strand': values[14], - 'qSeq': values[15], - 'tSeq': values[16], - 'repmatches': 0, - 'ncount': 0, - 'qNumInsert': None, - 'qBaseInsert': None, - 'tNumInsert': None, - 'tBaseInsert': None, - 'tSize': 0, - 'blockCount': None, - 'blockSizes': None, - 'qStarts': None, - 'tStarts': None, - 'deletionSeqs': '', - 'insertSeqs': '' - } - - if self.valueDict['strand'] == 'plus': - self.valueDict['strand'] = '+' - else: - self.valueDict['strand'] = '-' - - blockSizes = [] - blockCount = 0 - blockSize = None - qStarts = [] - tStarts = [] - gap = None - previous = None - nucIter = 0 - qIter = 0 - tIter = self.valueDict['tStart'] - qInserts = [0, 0] - tInserts = [0, 0] - insertSeqs = [] - delSeqs = [] - for qNuc, tNuc in zip(self.valueDict['qSeq'], self.valueDict['tSeq']): - # print qNuc, tNuc, blockSize, qStarts, tStarts, gap, previous, qIter, tIter - if qNuc != '-' and tNuc != '-': - if blockSize is None: - blockSize = 0 - qStarts.append(qIter) - tStarts.append(tIter) - gap = None - blockSize += 1 - qIter += 1 - tIter += 1 - elif qNuc == '-' or tNuc == '-': - startOfGap = False - if gap is None: - gap = 0 - startOfGap = True - if blockSize is not None: - blockSizes.append(blockSize) - blockSize = None - if qNuc == '-': - tIter += 1 - if startOfGap: - tInserts[0] += 1 - delSeqs.append(tNuc) - else: - delSeqs[-1] += tNuc - tInserts[1] += 1 - elif tNuc == '-': - qIter += 1 - if startOfGap: - qInserts[0] += 1 - insertSeqs.append(qNuc) - else: - insertSeqs[-1] += qNuc - qInserts[1] += 1 - gap += 1 - if blockSize is not None: - blockSizes.append(blockSize) - self.valueDict['blockCount'] = len(blockSizes) - self.valueDict['blockSizes'] = ','.join([str(x) for x in blockSizes]) + ',' - self.valueDict['qStarts'] = ','.join([str(x) for x in qStarts]) + ',' - self.valueDict['tStarts'] = ','.join([str(x) for x in tStarts]) + ',' - self.valueDict['qNumInsert'] = qInserts[0] - self.valueDict['tNumInsert'] = tInserts[0] - self.valueDict['qBaseInsert'] = qInserts[1] - self.valueDict['tBaseInsert'] = tInserts[1] - self.valueDict['deletionSeqs'] = delSeqs - self.valueDict['insertSeqs'] = insertSeqs - - def adjust_values(self, refName, offset): - """ - - """ - # Adjust values for targeted alignment - rName = self.valueDict['tName'].replace('chr', '') - if refName is not None: - rName = refName - self.valueDict['tName'] = rName - - coordOffset = 0 - if offset is not None: - coordOffset = offset - # print 'Offset', offset - self.valueDict['tStart'] = coordOffset + int(self.valueDict['tStart']) - self.valueDict['tEnd'] = coordOffset + int(self.valueDict['tEnd']) - - tstarts = [coordOffset + int(x) for x in self.valueDict['tStarts'].rstrip(",").split(",")] - self.valueDict['tStarts'] = ",".join([str(x) for x in tstarts]) + "," - - -class BlatResult: - """ - """ - def __init__(self, resultValues, refName, offset, programName, alignRefFn, querySeq, scope): - self.loggingName = 'breakmer.realignment.blat_result' - self.realignProgram = programName - self.resultValues = None - self.values = None # self.set_values(blatResultValues, refName, offset) - self.matches = None # Matches(self.values) - self.gaps = None # Gaps(self.values) - self.alignVals = None #AlignValues(self.values) - self.fragments = None # AlignFragments(self.values) - self.strand = None # blatResultValues[8] - self.breakpts = None #Breakpoints() - # Sort results based on alignScore, percentIdent, number of gaps - self.perc_ident = 0 # 100.0 - self.calcMilliBad() - self.alignScore = None #self.get_nmatch_total() + (float(self.get_nmatch_total()) / float(self.get_seq_size('query'))) - self.ngaps = None # self.get_total_num_gaps() - - self.alignFreq = 0.0 - self.seg_overlap = [0, 0] - self.cigar = '' - - self.genes = '' - self.in_target = False - self.valid = True - self.rep_man = None - self.in_repeat = False - self.repeat_overlap = 0.0 - self.repeat_coords = None - self.filter_reps_edges = [False, False] - self.interval = None - - self.indel_sizes = [] - self.indel_maxevent_size = [0, ''] - self.indel_flank_match = [0, 0] - self.set_values(resultValues, refName, offset, alignRefFn, querySeq, scope) - - def set_values(self, resultValues, refName, offset, alignRefFn, querySeq, scope): - """Modify the blat values if refName and offset are not None - Args: - resultValues: List of values from a realignment program - refName: String of chromosome AlignFragments - offset: Integer of genomic position for target alignment - """ - realignVals = RealignValues(resultValues, self.realignProgram, alignRefFn, querySeq, scope) - realignVals.adjust_values(refName, offset) - self.resultValues = realignVals.valueDict - self.values = realignVals.valueDict - self.matches = Matches(self.values) - self.gaps = Gaps(self.values) - self.alignVals = AlignValues(self.values) - self.fragments = AlignFragments(self.values) - self.strand = self.values['strand'] - self.breakpts = Breakpoints() - # Sort results based on alignScore, percentIdent, number of gaps - self.perc_ident = 100.0 - self.calcMilliBad() - if 'percentIdent' not in realignVals.valueDict: - self.perc_ident = 100.0 - self.calcMilliBad() - else: - self.perc_ident = realignVals.valueDict['percentIdent'] - - if 'bitscore' not in realignVals.valueDict: - self.alignScore = self.get_nmatch_total() + (float(self.get_nmatch_total()) / float(self.get_seq_size('query'))) - else: - self.alignScore = realignVals.valueDict['bitscore'] - - if 'ngaps' not in realignVals.valueDict: - self.ngaps = self.get_total_num_gaps() - else: - self.ngaps = realignVals.valueDict['ngaps'] - - self.set_indel_locs() - - def get_indel_seqs(self, seqType): - """ """ - # print self.values - if seqType == 'del': - return ','.join(self.values['deletionSeqs']) - elif seqType == 'ins': - return ','.join(self.values['insertSeqs']) - - def set_sv_brkpt(self, coords, svType, targetKey): - """ """ - self.breakpts.set_sv_brkpt(coords, svType, targetKey) - - def get_sv_brkpts(self): - """ """ - return self.breakpts.svBreakpoints - - def calcMilliBad(self): - """ """ - badAlign = 0.0 - queryAlignSize = self.qend() - self.qstart() - refAlignSize = self.tend() - self.tstart() - minAlignSize = min(queryAlignSize, refAlignSize) - if minAlignSize <= 0: - return 0.0 - sizeDif = queryAlignSize - refAlignSize - if sizeDif < 0: - sizeDif = 0 - insertFactor = self.gaps.get_ngaps('query') - totalMatches = self.matches.get_total_matches() - if totalMatches != 0: - badAlign = (1000 * (self.matches.get_mismatches() + insertFactor + round(3 * math.log(1 + sizeDif)))) / totalMatches - return badAlign * 0.1 - - def set_realign_freq(self, alignFreq): - self.alignFreq = alignFreq - - def set_segment_overlap(self, right, left): - self.seg_overlap = [left, right] - - def qstart(self): - """Query coordinate start""" - return int(self.alignVals.get_coords('query', 0)) - - def qend(self): - """Query coordinate end""" - return int(self.alignVals.get_coords('query', 1)) - - def tstart(self): - return self.alignVals.get_coords('reference', 0) - - def tend(self): - return self.alignVals.get_coords('reference', 1) - - def get_seq_name(self, alignType): - """Return the seq name of the query or reference. Typically the chromosome number""" - return self.alignVals.get_seq_name(alignType) - - def get_seq_size(self, alignType): - return int(self.alignVals.get_seq_size(alignType)) - - def get_query_span(self): - """Length of query sequence alignment to reference. - """ - return self.qend() - self.qstart() - - def get_query_coverage(self): - """Return percentage of query sequence realigned to reference""" - return round((float(self.get_query_span()) / float(self.get_seq_size('query'))) * 100, 2) - - def spans_query(self): - """Return boolean whether full query sequence is aligned""" - return self.get_seq_size('query') == (self.qend() - self.qstart()) - - def get_total_gap_size(self): - return self.gaps.get_total_size() - - def get_total_num_gaps(self): - return self.gaps.get_total_num_gaps() - - def get_nmatch_total(self): - return self.matches.get_total_matching() - - def get_nmatches(self, matchType): - return int(self.matches.get_nmatches(matchType)) - - def sum_indel_flank_matches(self, flank_str): - """ """ - m_indxs = [] - match_sum = 0 - for i in range(len(flank_str)): - if flank_str[i] == "M": - m_indxs.append(i) - for indx in m_indxs: - nmatch = '' - windx = indx - 1 - while windx > -1 and utils.is_number(flank_str[windx]): - nmatch = flank_str[windx] + nmatch - windx = windx - 1 - match_sum += int(nmatch) - return match_sum - - def set_indel_flank_matches(self): - """ """ - if self.indel_maxevent_size[0] > 0: - csplit = self.cigar.split(str(self.indel_maxevent_size[0]) + self.indel_maxevent_size[1]) - lflank = csplit[0] - self.indel_flank_match[0] += self.sum_indel_flank_matches(lflank) - rflank = csplit[-1] - self.indel_flank_match[1] += self.sum_indel_flank_matches(rflank) - - def set_indel_locs(self): - """ """ - chrom = 'chr' + self.get_seq_name('reference') - for i in range(self.fragments.count - 1): - if i == 0 and self.fragments.query[i][0] > 0: - self.cigar = str(self.fragments.query[i][0]) + "S" - qend1 = int(self.fragments.query[i][1]) - qstart2 = int(self.fragments.query[i + 1][0]) - tend1 = int(self.fragments.ref[i][1]) - tstart2 = int(self.fragments.ref[i + 1][0]) - ins_bp = qstart2 - qend1 - del_bp = tstart2 - tend1 - bp1 = tend1 - bp2 = tstart2 - self.cigar += str(self.fragments.blockSizes[i]) + "M" - if ins_bp > 0: - self.breakpts.add_brkpts('genomic', (chrom, bp1)) - self.indel_sizes.append("I" + str(ins_bp)) - self.breakpts.add_brkpts('contig', [qend1, qstart2]) - # self.breakpts.add_brkpts('contig', qstart2) - self.cigar += str(ins_bp) + "I" - if ins_bp > self.indel_maxevent_size[0]: - self.indel_maxevent_size = [ins_bp, "I"] - if del_bp > 0: - self.breakpts.add_brkpts('genomic', (chrom, bp1, bp2)) - self.indel_sizes.append("D" + str(del_bp)) - self.breakpts.add_brkpts('contig', [qend1]) - self.cigar += str(del_bp) + "D" - if del_bp > self.indel_maxevent_size[0]: - self.indel_maxevent_size = [del_bp, "D"] - - self.cigar += str(self.fragments.blockSizes[-1]) + "M" - # endClipped = self.get_seq_size('query') - self.qend() - # if endClipped > 0: - # self.cigar += str(endClipped) + "S" - - self.set_indel_flank_matches() - if self.strand == "-": - self.breakpts.reverse_breakpts(self.get_seq_size('query')) - - # def add_query_brkpt(self, brkpt): - # """ """ - # if brkpt not in self.query_brkpts: - # self.query_brkpts.append(brkpt) - - def get_genomic_brkpts(self): - """ """ - return self.breakpts.genomicBreakpoints - - def get_brkpt_str(self, with_sizes=False): - """ """ - brkpt_out = [] - bp_str = [] - chrm = 'chr' + str(self.get_name('hit')) - if len(self.breakpts) > 0: - for b, s in zip(self.breakpts, self.indel_sizes): - if len(b) > 1: - bb = "-".join([str(x) for x in b]) - else: - bb = str(b[0]) - bstr = chrm + ":" + bb - if with_sizes: - bstr += " " + "(" + s + ")" - bp_str.append(bstr) - brkpt_out.append(",".join(bp_str)) - return ",".join(brkpt_out) - - def get_brkpt_locs(self): - brkpt_locs = [] - for b in self.breakpts: - brkpt_locs.extend(b) - return brkpt_locs - - def get_gene_anno(self): - return self.genes - - def get_blat_output(self): - return "\t".join([str(x) for x in self.values]) - - def get_len(self): - return self.qend() - self.qstart() - - def get_coords(self, alignType): - """ """ - return self.alignVals.get_coords(alignType) - - # def set_repeats(self, target_rep_mask, all_rep_mask): - # self.rep_man = blat_repeat_manager() - # if self.matches['rep'] > 0: - # self.in_repeat = True - # if target_rep_mask and all_rep_mask: - # # Check rep_mask if it exists. - # rmask = target_rep_mask - # if not self.in_target: - # rmask = None - # if self.vals['hit']['name'] in all_rep_mask: - # rmask = all_rep_mask[self.vals['hit']['name']] - # if rmask: - # self.rep_man.setup(self.get_coords('hit'), rmask) - # self.in_repeat, self.repeat_overlap, self.repeat_coords, self.filter_reps_edges = self.rep_man.other_values - - def in_target_region(self, targetRegionCoordinates): - """ """ - refCoordStart, refCoordEnd = self.get_coords('ref') - regionStart = targetRegionCoordinates[1] - targetRegionCoordinates[4] - regionEnd = targetRegionCoordinates[2] + targetRegionCoordinates[4] - start_in = refCoordStart >= regionStart and refCoordStart <= regionEnd - end_in = refCoordEnd <= regionEnd and refCoordEnd >= regionStart - if targetRegionCoordinates[0] == self.get_seq_name('reference') and (start_in or end_in): - self.in_target = True - self.genes = targetRegionCoordinates[3] - - def set_gene_annotations(self, targetRegionCoordinates, annotations): - """ - Args: - targetRegionCoordinates: Tuple containing (chr, start, end, name, intervals, regionBufferSize) - annotations: Object containing gene annotations. If none, then no annotation is done. - """ - br_start = self.get_coords('hit')[0] - br_end = self.get_coords('hit')[1] - regionStart = targetRegionCoordinates[1] - targetRegionCoordinates[5] - regionEnd = targetRegionCoordinates[2] + targetRegionCoordinates[5] - start_in = br_start >= regionStart and br_start <= regionEnd - end_in = br_end <= regionEnd and br_end >= regionStart - if targetRegionCoordinates[0] == self.get_seq_name('reference') and (start_in or end_in): - self.in_target = True - self.genes = targetRegionCoordinates[3] - else: - ann_genes = [] - chrom = self.get_name('hit') - pos = self.get_coords('hit') - if chrom.find('chr') == -1: - chrom = 'chr' + str(chrom) - for g in annotations.genes: - gs = annotations.genes[g][1] - ge = annotations.genes[g][2] - if chrom == annotations.genes[g][0]: - if int(pos[0]) >= gs and int(pos[0]) <= ge: - ann_genes.append(g) - break - if len(ann_genes) == 0: - ann_genes = ['intergenic'] - self.valid = False - self.genes = ",".join(ann_genes) - - def check_indel(self, nBlatResults): - indel = False - utils.log(self.loggingName, 'info', 'Checking if blat result contains an indel variant') - if (self.spans_query() or (nBlatResults == 1 and self.in_target)) and (self.ngaps > 0): - utils.log(self.loggingName, 'info', 'Blat result spans query (%r) or only one blat result (%r) and blat result in target (%r)' % (self.spans_query(), (nBlatResults == 1), self.in_target)) - indel = True - return indel - - # indel = False - # indel_size_thresh = int(self.meta_dict['params'].opts['indel_size']) - # self.logger.info('Checking if blat result contains an indel variant') - # nhits = 0 - # for i in self.hit_freq: - # if i > 0: - # nhits += 1 - # if br.spans_query() or (len(self.blat_results) == 1 and br.in_target): - # self.logger.info('Blat result spans query (%r) or only one blat result (%r) and blat result in target (%r)' % (br.spans_query(), (len(self.blat_results) == 1), br.in_target)) - # indel = True - # keep_br = br.valid and br.mean_cov < 2 and br.in_target and (br.indel_maxevent_size[0] >= indel_size_thresh) and (not br.rep_man.breakpoint_in_rep[0] and not br.rep_man.breakpoint_in_rep[1]) - # self.logger.debug('Keep blat result %r' % keep_br) - # if keep_br: - # brkpt_cov = [self.meta_dict['contig_vals'][1].get_counts(x, x, 'indel') for x in br.query_brkpts] - # low_cov = min(brkpt_cov) < self.meta_dict['params'].get_sr_thresh('indel') - # flank_match_thresh = True - # for fm in br.indel_flank_match: - # fm_perc = round((float(fm) / float(br.get_size('query'))) * 100, 2) - # if fm_perc < 10.0: - # flank_match_thresh = False - # self.logger.info('Indel result has matching flanking sequence of largest indel event of %d (%d of query)' % (fm, fm_perc)) - # self.logger.info('Indel result has matching flanking sequence of largest indel event (10 perc of query) on both sides (%r)' % flank_match_thresh) - # in_ff, span_ff = filter_by_feature(br.get_brkpt_locs(), self.meta_dict['query_region'], self.meta_dict['params'].opts['keep_intron_vars']) - # if not in_ff and not low_cov and flank_match_thresh: - # self.se = sv_event(br, self.meta_dict['query_region'], self.meta_dict['contig_vals'], self.meta_dict['sbam']) - # self.logger.debug('Top hit contains whole query sequence, indel variant') - # else: - # self.logger.debug('Indel in intron (%r) or low coverage at breakpoints (%r) or minimum segment size < 20 (%r), filtering out.' % (in_ff, low_cov, min(br.query_blocksizes))) - # else: - # self.logger.debug('Indel failed checking criteria: in annotated gene: %r, mean query coverage < 2: %r, in target: %r, in repeat: %r, indel size < %d: %r' % (br.valid, br.mean_cov, br.in_target, ",".join([str(x) for x in br.rep_man.breakpoint_in_rep]), indel_size_thresh, br.indel_maxevent_size[0] < indel_size_thresh)) - # return indel - - -# class blat_repeat_manager: -# def __init__(self): -# # Booleans for both breakpoints and whether they land in simple repeats -# self.breakpoint_in_rep = [False, False] -# self.total_rep_overlap = 0.0 -# self.simple_rep_overlap = 0.0 -# self.other_values = [False, 0.0, [], [False, False]] - -# def setup(self, coords, repeat_locs): -# self.check_repeat_regions(coords, repeat_locs) - -# def check_repeat_regions(self, coords, repeat_locs): -# start, end = coords -# seg_len = float(end - start) -# in_repeat = False -# rep_overlap = 0.0 -# simple_overlap = 0.0 -# rep_coords = [] -# filter_reps_edges = [False, False] -# for rloc in repeat_locs: -# rchr, rbp1, rbp2, rname = rloc -# if (rbp1 >= start and rbp1 <= end) or (rbp2 >= start and rbp2 <= end) or (rbp1 <= start and rbp2 >= end): -# in_repeat = True -# rep_overlap += float(min(rbp2, end) - max(rbp1, start)) -# rep_coords.append((rbp1, rbp2)) -# # Simple or low complexity seq repeat for filtering -# if rname.find(")n") > -1 or rname.find("_rich") > -1: -# simple_overlap += float(min(rbp2, end) - max(rbp1, start)) -# if (rbp1 <= start and rbp2 >= start): -# filter_reps_edges[0] = True -# elif (rbp1 <= end and rbp2 >= end): -# filter_reps_edges[1] = True -# # if rep_overlap >= seg_len : -# # break -# roverlap = round((float(min(rep_overlap, seg_len)) / float(seg_len)) * 100, 2) -# self.total_rep_overlap = roverlap -# self.simple_rep_overlap = round((float(min(simple_overlap, seg_len)) / float(seg_len)) * 100, 2) -# self.breakpoint_in_rep = filter_reps_edges -# self.other_values = [in_repeat, roverlap, rep_coords, filter_reps_edges] diff --git a/breakmer/realignment/realigner.py b/breakmer/realignment/realigner.py deleted file mode 100644 index cdeac6b..0000000 --- a/breakmer/realignment/realigner.py +++ /dev/null @@ -1,434 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os -import subprocess -import breakmer.realignment.blat_result as blat_result -import breakmer.utils as utils - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -class AlignParams: - """ - """ - - def __init__(self, params, targetRefFns): - self.program = {'target': 'blat', 'genome': 'blat'} - self.extension = {'target': 'psl', 'genome': 'psl'} - self.binary = {'target': None, 'genome': None} - self.binaryParams = {'target': None, 'genome': None} - self.ref = {'target': None, 'genome': None} - self.set_values(params, targetRefFns) - - def set_values(self, params, targetRefFns): - """ - """ - self.binary['target'] = params.get_param('blat') - blast = params.get_param('blast') - if blast is not None: - self.program['target'] = 'blast' - self.binary['target'] = blast - self.extension['target'] = 'txt' - - self.binary['genome'] = params.get_param('gfclient') - self.binaryParams['genome'] = {'hostname': params.get_param('blat_hostname'), - 'port': int(params.get_param('blat_port'))} - # Use the forward sequence for blatting targeted sequences - self.ref['target'] = targetRefFns[0] - self.ref['genome'] = params.get_param('reference_fasta_dir') - - def get_values(self, type): - return (self.program[type], self.extension[type], self.binary[type], self.binaryParams[type], self.ref[type]) - - -class RealignManager: - """ - """ - - def __init__(self, params, targetRefFns): - self.realignment = None - self.alignParams = AlignParams(params, targetRefFns) - - def realign(self, contig): - """ - """ - if not contig.has_fa_fn(): - return - - self.realignment = Realignment(contig) - if not self.realignment.align(self.alignParams.get_values('target'), 'target'): - return - if not self.realignment.target_aligned(): - self.realignment.align(self.alignParams.get_values('genome'), 'genome') - else: - if self.realignment.targetHit and self.alignParams.get_values('target')[0] == 'blast': - self.realignment.check_record_merge() - - def get_result_fn(self): - resultFn = None - if self.realignment.has_results(): - resultFn = self.realignment.get_result_fn() - return resultFn - - def has_results(self): - """ """ - return self.realignment.has_results() - - def get_blat_results(self): - """ - """ - return self.realignment.get_blat_results() - - def store_clipped_queryseq(self, blatResultValues): - """ - """ - self.realignment.store_clipped_queryseq(blatResultValues) - - def get_qsize(self): - """ """ - return self.realignment.results.querySize - - -class Realignment: - """ - """ - def __init__(self, contig): - self.loggingName = 'breakmer.realignment.realigner' - self.scope = None - self.results = None - self.targetHit = False - self.resultFn = None - self.alignParams = None - self.contig = contig - - def align(self, alignParams, scope): - """ - """ - self.alignParams = alignParams - alignProgram, alignExt, alignBinary, binaryParams, alignRef = self.alignParams - self.scope = scope - - self.resultFn = os.path.join(self.contig.get_path(), '%s_res.%s.%s' % (alignProgram, scope, alignExt)) - utils.log(self.loggingName, 'info', 'Running realignment with %s, storing results in %s' % (alignProgram, self.resultFn)) - - cmd = '' - if alignProgram == 'blast': - cmd = "%s -task 'blastn-short' -db %s -query %s -evalue 0.01 -out %s -outfmt '7 qseqid sseqid pident qlen length mismatch gapopen qstart qend sstart send evalue bitscore gaps sstrand qseq sseq'" % (alignBinary, alignRef, self.contig.meta.fa_fn, self.resultFn) - elif alignProgram == 'blat': - if scope == 'genome': - # all blat server - cmd = '%s -t=dna -q=dna -out=psl -minScore=20 -nohead %s %d %s %s %s' % (alignBinary, binaryParams['hostname'], binaryParams['port'], alignRef, self.contig.meta.fa_fn, self.resultFn) - elif scope == 'target': - # target - cmd = '%s -t=dna -q=dna -out=psl -minScore=20 -stepSize=10 -minMatch=2 -repeats=lower -noHead %s %s %s' % (alignBinary, alignRef, self.contig.meta.fa_fn, self.resultFn) - - utils.log(self.loggingName, 'info', 'Realignment system command %s' % cmd) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - utils.log(self.loggingName, 'info', 'Realignment output file %s' % self.resultFn) - if errors != '': - utils.log(self.loggingName, 'info', 'Realignment errors %s' % errors) - - if not os.path.isfile(self.resultFn): - return False - else: - self.results = AlignResults(alignProgram, scope, self.resultFn, self.contig, alignRef) - return True - - def get_result_fn(self): - """ """ - if self.results is not None: - return self.resultFn - - def has_results(self): - """ """ - if self.results is not None: - return True - else: - return False - - def target_aligned(self): - """ - """ - noAlignmentResults = False - targetHit = False - if self.results is None: - noAlignmentResults = True - else: - self.results.modify_blat_result_file() - if self.results.target_hit(): - self.targetHit = True - utils.log(self.loggingName, 'debug', 'Top hit contains whole query sequence, indicating an indel variant within the target region.') - - # If there was a sufficient target hit or no alignment at all then return True - # this effectively prevents a genome alignment. - return self.targetHit or noAlignmentResults - - def get_blat_results(self): - """ - """ - return self.results.sortedResults - - def store_clipped_queryseq(self, blatResultValues): - self.clippedQs.append(blatResultValues) - - def check_record_merge(self): - """ """ - # Check if need to merge indels from blast results - if len(self.results.mergedRecords) > 0: - # Re-write a blast-style file and re-procress with merged records. - newResultFn = open(self.resultFn + '.merged_recs', 'w') - linesOut = self.results.merge_records() - for line in linesOut: - newResultFn.write(line) - newResultFn.close() - self.resultFn = self.resultFn + '.merged_recs' - alignProgram, alignExt, alignBinary, binaryParams, alignRef = self.alignParams - self.results = AlignResults('blat', 'blast_target', self.resultFn, self.contig, alignRef) - - -class AlignResults: - def __init__(self, program, scope, alignResultFn, contig, alignRefFn): - self.loggingName = 'breakmer.realignment.realigner' - self.resultFn = alignResultFn - self.program = program - self.scope = scope - self.querySize = 0 - self.alignmentFreq = [] - self.nmismatches = 0 - self.ngaps = 0 - self.hasResults = True - self.results = [] - self.sortedResults = [] - self.clippedQs = [] - self.contig = contig - self.alignRefFn = alignRefFn - self.mergedRecords = [] # List of tuples containing indices of realignment records that need to be merged. - self.targetSegmentsSorted = None - self.targetHit = False - self.set_values() - - def set_values(self): - """ """ - if not self.resultFn: - self.hasResults = False - elif len(open(self.resultFn, 'rU').readlines()) == 0: - self.hasResults = False - else: - self.parse_result_file() - - def modify_blat_result_file(self): - """ """ - blatFile = open(self.resultFn + '.mod', 'w') - for result in self.results: - blatFile.write(result.get_blat_output() + "\n") - blatFile.close() - self.resultFn = self.resultFn + '.mod' - - def target_hit(self): - """ """ - if self.hasResults: - cond1 = self.results[0].spans_query() and (self.ngaps > 0) - cond2 = (len(self.results) == 1) and self.get_query_coverage() >= 90.0 and (self.ngaps > 0) - # cond3 = (len(self.results) > 1) and (self.get_query_coverage() >= 90.0) - self.targetHit = cond1 or cond2 # or cond3 - utils.log(self.loggingName, 'debug', 'Checking if query is a target hit or not %r' % self.targetHit) - - if self.targetHit: - if ((len(self.results) > 1) and (self.get_query_coverage() >= 90.0)) and (self.program == 'blast'): - # Check for a gapped Blast result. - segments = [] - for i, result in enumerate(self.results): - resultOverlap = 0 - addSegment = True - for segment in segments: - overlapSeg = (result.qstart() >= segment[0] and result.qstart() <= segment[1]) or (result.qend() >= segment[0] and result.qend() <= segment[1]) - containSeg = result.qstart() >= segment[0] and result.qend() >= segment[1] - withinSeg = result.qstart() >= segment[0] and result.qend() <= segment[1] - # print i, result, containSeg, withinSeg, overlapSeg - if containSeg or withinSeg: - addSegment = False - elif overlapSeg: - if (result.qstart() >= segment[0] and result.qstart() <= segment[1]): - overlapBp = segment[1] - result.qstart() - # print 'Overlapbp', overlapBp - if overlapBp < 20: - resultOverlap += overlapBp - addSegment = True - elif (result.qend() >= segment[0] and result.qend() <= segment[1]): - overlapBp = result.qend() - segment[0] - # print 'Overlapbp', overlapBp - if overlapBp < 20: - resultOverlap += overlapBp - addSegment = True - if addSegment and (result.get_query_span() - resultOverlap) > 20: - segments.append((result.qstart(), result.qend(), result)) - self.targetSegmentsSorted = sorted(segments, key=lambda x: x[0]) - for i in range(1, len(self.targetSegmentsSorted)): - lResult = self.targetSegmentsSorted[i - 1][2] - rResult = self.targetSegmentsSorted[i][2] - qgap = rResult.qstart() - lResult.qend() - tgap = rResult.tstart() - lResult.tend() - if (tgap < 0 and (abs(tgap) > abs(qgap))) or (lResult.strand != rResult.strand): - # Tandem dup or inversion - break - else: - self.mergedRecords.append((i - 1, i)) - return self.targetHit - - def parse_result_file(self): - """ """ - refName = None - offset = None - if self.scope == 'target': - # Need to reset the chrom name and coordinates for blat results. - refName = self.contig.get_chr() - offset = self.contig.get_target_start() - self.contig.get_target_buffer() - # print 'Offset', offset - - for line in open(self.resultFn, 'r'): - if line.find('#') > -1: - continue - line = line.strip() - parsedResult = blat_result.BlatResult(line.split('\t'), refName, offset, self.program, self.alignRefFn, self.contig.seq, self.scope) - parsedResult.in_target_region(self.contig.get_target_region_coordinates()) - # parsedBlatResult.set_gene_annotations(self.contig.get_target_region_coordinates(), self.contig.get_gene_annotations()) - # parsedBlatResult.set_repeats(self.contig.get_repeat_annotations()) - self.process_blat_result(parsedResult) - self.results.append(parsedResult) - # Update to use class attributes as sorting categories - self.sortedResults = sorted(self.results, key=lambda x: (-x.alignScore, -x.perc_ident, x.get_total_num_gaps())) - - for i, blatResult in enumerate(self.sortedResults): - blatResult.set_realign_freq(self.get_align_freq(blatResult.qstart(), blatResult.qend())) - - if len(self.results) == 0: - self.hasResults = False - - def merge_records(self): - """ """ - mergedResults = [] - mapResults = {} - for mergeIdx in self.mergedRecords: - lResult = self.targetSegmentsSorted[mergeIdx[0]][2].resultValues - rResult = self.targetSegmentsSorted[mergeIdx[1]][2].resultValues - # print 'left', lResult - # print 'right', rResult - newMergedIdx = len(mergedResults) - if mergeIdx[0] in mapResults: - # Merge a result and a previously merged result. - lResult = mergedResults[mapResults[mergedIdx[0]]] - newMergedIdx = mapResults[mergedIdx[0]] - mergedResults[newMergedIdx] = self.merge_record_fields(lResult, rResult) - else: - # Merge right and left values - mergedResults.append(self.merge_record_fields(lResult, rResult)) - mapResults[mergeIdx[0]] = newMergedIdx - mapResults[mergeIdx[1]] = newMergedIdx - outputIdx = [] - lOuts = [] - for i, result in enumerate(self.targetSegmentsSorted): - if i in mapResults: - # print 'New result', i, mergedResults[mapResults[i]] - if mapResults[i] not in outputIdx: - outputIdx.append(mapResults[i]) - lOuts.append(self.format_to_blat_output(mergedResults[mapResults[i]])) - else: - # print 'Result', self.targetSegmentsSorted[i][2].resultValues - lOuts.append(self.format_to_blat_output(self.targetSegmentsSorted[i][2].resultValues)) - # print lOuts - return lOuts - - def format_to_blat_output(self, resultValues): - """ """ - outStr = [] - keys = ['matches', - 'mismatches', - 'repmatches', - 'ncount', - 'qNumInsert', - 'qBaseInsert', - 'tNumInsert', - 'tBaseInsert', - 'strand', - 'qName', - 'qSize', - 'qStart', - 'qEnd', - 'tName', - 'tSize', - 'tStart', - 'tEnd', - 'blockCount', - 'blockSizes', - 'qStarts', - 'tStarts'] - for key in keys: - outStr.append(resultValues[key]) - return '\t'.join([str(x) for x in outStr]) + '\n' - - def merge_record_fields(self, lResult, rResult): - """ """ - tGap = rResult['tStart'] - lResult['tEnd'] - qGap = rResult['qStart'] - lResult['qEnd'] - lqEnd = lResult['qEnd'] - - lResult['qStart'] = int(lResult['qStart']) - 1 - rResult['qStart'] = int(rResult['qStart']) - 1 - - if tGap > 1: - # Del - newqStart = str(lResult['qEnd'] - 1) - if qGap > 0: - newqStart = str(rResult['qStart']) - lResult['qStarts'] += newqStart + ',' - lResult['tStarts'] += str(rResult['tStart']) + ',' - lResult['tNumInsert'] += 1 - lResult['tBaseInsert'] += tGap - if qGap > 1: - # Ins - newtStart = str(lResult['tEnd'] - 1) - if tGap > 0: - newtStart = str(rResult['tStart']) - lResult['qStarts'] += str(rResult['qStart']) + ',' - lResult['tStarts'] += str(newtStart) + ',' - lResult['qNumInsert'] += 1 - lResult['qBaseInsert'] += qGap - - lResult['blockSizes'] = str(int(lResult['blockSizes'].rstrip(',')) - 1) + ',' - if qGap < 0: - rResult['blockSizes'] = str(int(rResult['blockSizes'].rstrip(',')) + qGap) + ',' - rResult['matches'] += qGap - 1 - keys = ['matches', 'mismatches', 'repmatches', 'ncount', 'qNumInsert', 'qBaseInsert', 'tNumInsert', 'tBaseInsert'] - for key in keys: - lResult[key] += rResult[key] - lResult['qEnd'] = rResult['qEnd'] - lResult['tEnd'] = rResult['tEnd'] - lResult['blockCount'] += 1 - lResult['blockSizes'] += rResult['blockSizes'] - return lResult - - def process_blat_result(self, blatResultObj): - """Summarize metrics from all alignments. - """ - self.nmismatches += blatResultObj.get_nmatches('mismatch') - self.ngaps += blatResultObj.get_total_num_gaps() - if not self.querySize: - self.querySize = blatResultObj.get_seq_size('query') - self.alignmentFreq = [0] * self.querySize - for i in range(blatResultObj.qstart(), blatResultObj.qend()): - self.alignmentFreq[i] += 1 - - def get_query_coverage(self): - nhits = 0 - for i in self.alignmentFreq: - if i > 0: - nhits += 1 - return round((float(nhits) / float(self.querySize)) * 100, 2) - - def get_align_freq(self, s, e): - return float(sum(self.alignmentFreq[s:e])) / float(len(self.alignmentFreq[s:e])) diff --git a/breakmer/utils.py b/breakmer/utils.py deleted file mode 100644 index af33eca..0000000 --- a/breakmer/utils.py +++ /dev/null @@ -1,853 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os -import sys -import glob -import logging -import time -import math -from Bio import SeqIO -import subprocess -from pysam import * - -__author__ = "Ryan Abo" -__copyright__ = "Copyright 2015, Ryan Abo" -__email__ = "ryanabo@gmail.com" -__license__ = "MIT" - - -def which(program): - """Determine the full path to a binary if it is in the system path for execution. - - Args: - program (str): Name of the binary to determine path. - Returns: - A file path to the binary file (str) or None if it doesn't exist. - Raises: - None - """ - - def is_exe(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - - fPath, fName = os.path.split(program) - if fPath: - if is_exe(program): - return program - else: - for path in os.environ["PATH"].split(os.pathsep): - path = path.strip('"') - exeFile = os.path.join(path, program) - if is_exe(exeFile): - return exeFile - return None - - -def median(lst): - """Returns the median value of a list of values. - - Args: - lst (list): List of numeric values - Returns: - median (numeric): Median value from list. - """ - - lst = sorted(lst) - if len(lst) < 1: - return None - if len(lst) % 2 == 1: - return lst[((len(lst) + 1) / 2) - 1] - else: - return float(sum(lst[(len(lst) / 2) - 1:(len(lst) / 2) + 1])) / 2.0 - - -def percentile(N, percent, key=lambda x: x): - """Find the percentile of a list of values. - - Args: - N (list): List of numeric values. N MUST BE sorted. - percent (float): Percentage value ranging from 0.0 to 1.0. - key (lambda): A function to compute a value from each element of N. - Returns: - percentile (numeric): The percentile of the values - """ - - if not N: - return None - N.sort() - k = (len(N) - 1) * percent - f = math.floor(k) - c = math.ceil(k) - if f == c: - return key(N[int(k)]) - d0 = key(N[int(f)]) * (c - k) - d1 = key(N[int(c)]) * (k - f) - return d0 + d1 - - -def remove_outliers(x): - """ - """ - - qnt1 = percentile(x, 0.25) - qnt2 = percentile(x, 0.75) - H = 1.5 * (qnt2 - qnt1) - x.sort() - i = 0 - while (x[i] < (qnt1 - H)): - i += 1 - cut1 = i - - x.sort(reverse=True) - while (x[i] > (qnt2 + H)): - i += 1 - - cut2 = len(x) - i - - x.sort() - return x[cut1:cut2] - - -def mean(lst): - """calculates mean - """ - - sum = 0 - for i in range(len(lst)): - sum += lst[i] - return (sum / len(lst)) - - -def stddev(lst): - """Calculates the standard deviation from the list of input values. - - Args: - lst (list): List of numeric values to calculate standard deviation - Returns: - Standard deviation (float) - Raises: - None - """ - - sum = 0 - mn = mean(lst) - for i in range(len(lst)): - sum += pow((lst[i] - mn), 2) - return math.sqrt(sum / len(lst) - 1) - - -def run_cutadapt(cutadapt, cutadapt_config_f, input_fn, output_fn, logging_src): - """ - """ - - cutadapt_parameters = stringify(cutadapt_config_f) - cmd = '%s %s %s %s > %s' % (sys.executable, cutadapt, cutadapt_parameters, input_fn, output_fn) - log(logging_src, 'debug', 'Cutadapt system command %s' % cmd) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - log(logging_src, 'debug', 'Cutadapt output %s' % output) - log(logging_src, 'debug', 'Cutadapt errors %s' % errors) - return output, errors - - -def log(name, level, msg): - """Write log message to the appropriate level. - - Args: - name (str): The logger name, typically the module name. - level (str): The level of debugging classification. - msg (str): The message to log. - Returns: - None - """ - - logger = logging.getLogger(name) - if level == 'info': - logger.info(msg) - elif level == 'debug': - logger.debug(msg) - elif level == 'error': - logger.error(msg) - - -def stringify(fn): - """Turn file contents into a space delimited string - """ - - str = [] - for line in open(fn, 'rU').readlines(): - line = line.strip() - str.append(line) - return ' '.join(str) - - -def create_ref_test_fa(target_fa_in, test_fa_out): - """ - """ - - if not os.path.isfile(get_marker_fn(test_fa_out)): - fa_in = open(target_fa_in, "rU") - fa_out = open(test_fa_out, "w") - - record = SeqIO.read(fa_in, "fasta") - ref_target_seq = str(record.seq) - end = min(len(ref_target_seq), 1500) - start = max(0, len(ref_target_seq) - 1500) - fa_out.write(">" + record.id + "_start\n" + ref_target_seq[0:end] + "\n>" + record.id + "_end\n" + ref_target_seq[start:len(ref_target_seq)] + "\n") - fa_out.close() - - cmd = 'touch %s' % get_marker_fn(test_fa_out) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - return True - else: - return False - - -def test_cutadapt(fqFn, cutadaptBinary, cutadaptConfigFn): - """Test the functionality of Cutadapt on test data. - - Args: - fqFn (str): The full path to the fastq filename to run Cutadapt on. - cutadaptBinary (str): The full path to the Cutadapt binary. - cutadaptConfigFn (str): The full path to the Cutadapt configuration file. - Returns: - A tuple with the clean fastq file and the return code. - Raise: - None - """ - - fqCleanBase = os.path.basename(fqFn).split('.')[0] + "_cleaned.fq" - fqCleanFn = os.path.join(os.path.dirname(fqFn), fqCleanBase) - cutadaptParams = stringify(cutadaptConfigFn) - cmd = '%s %s %s %s > %s' % (sys.executable, cutadaptBinary, cutadaptParams, fqFn, fqCleanFn) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - returnCode = p.returncode - if returnCode != 0: - return (None, returnCode) - else: - return (fqCleanFn, returnCode) - - -def test_jellyfish(jfish_bin, fa_fn, analysis_dir): - """ - """ - - cmd = '%s --version' % jfish_bin - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - jfish_version = int(output.split()[1].split('.')[0]) - - kmer_size = 15 - count_fn = os.path.join(analysis_dir, "test_jellyfish_counts") - cmd = '%s count -m %d -s %d -t %d -o %s %s' % (jfish_bin, kmer_size, 100000000, 8, count_fn, fa_fn) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - if p.returncode != 0: - return ("Jellyfish counts", p.returncode) - - if jfish_version < 2: - count_fn += '_0' - dump_fn = os.path.join(analysis_dir, "test_jellyfish_dump") - cmd = '%s dump -c -o %s %s' % (jfish_bin, dump_fn, count_fn) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - if p.returncode != 0: - return ("Jellyfish dump", p.returncode) - return ("Jellyfish", 0) - - -def calc_contig_complexity(seq, N=3, w=6): - """ - """ - - cmers = [] - for i in range(len(seq)): - s = max(0, i - w) - e = min(len(seq), i + w) - cmer = count_nmers(seq[s:e], N) - n = len(cmer) - nmod = float(n) / float(e - s) - cmers.append(round(nmod, 2)) - cmers_mean = sum(cmers) / len(cmers) - return cmers_mean, cmers - - -def count_nmers(seq, N): - """ - """ - - nmers = {} - for i in range(len(seq) - (N - 1)): - mer = str(seq[i:i + N]).upper() - if mer not in nmers: - nmers[mer] = 0 - nmers[mer] += 1 - return nmers - - -def is_number(s): - """ - """ - - try: - float(s) - return True - except ValueError: - pass - try: - import unicodedata - unicodedata.numeric(s) - return True - except (TypeError, ValueError): - pass - return False - - -def filter_by_feature(brkpts, query_region, keep_intron_vars): - """ - """ - - in_filter = False - span_filter = False - if not keep_intron_vars: - in_vals, span_vals = check_intervals(brkpts, query_region) - if in_vals[0]: - if 'exon' not in in_vals[1]: - in_filter = True - else: - in_filter = True - if span_vals[0]: - if 'exon' not in span_vals[1]: - span_filter = True - else: - span_filter = True - return in_filter, span_filter - - -def check_intervals(breakpts, query_region): - """ - """ - - in_values = [False, [], []] - span_values = [False, [], []] - for bp in breakpts: - for interval in query_region[4]: - if (int(bp) >= (interval[1] - 20)) and (int(bp) <= (interval[2] + 20)): - in_values[1].append(interval[4]) - in_values[0] = True - in_values[2].append(interval) - if (interval[2] <= max(breakpts)) and (interval[1] >= min(breakpts)): - span_values[0] = True - span_values[1].append(interval[4]) - span_values[2].append(interval) - return in_values, span_values - - -def setup_logger(logFnPath, name): - """Creates the logger object and associated text file to use throughout - the analysis. - It first creates a log.txt file in the specified analyis directory as the - FileHandler. The console handler is then formatted to report the time, - name of the source, level of the message and the message. - Args: - log_fn_path: Absolute path for the directory that will contain the - log file. - name: The name of the package to initially setup the logger object. - Returns: - Nothing is returned. - """ - - outputPath = os.path.abspath(logFnPath) - if not os.path.exists(outputPath): - os.makedirs(outputPath) - - logger = logging.getLogger(name) - logger.setLevel(logging.DEBUG) - - # FileHandler - fileHandle = logging.FileHandler(os.path.join(outputPath, 'log.txt'), mode='w') - fileHandle.setLevel(logging.DEBUG) - # ConsoleHandler - consoleHandle = logging.StreamHandler() - consoleHandle.setLevel(logging.ERROR) - - formatter = logging.Formatter(fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') - fileHandle.setFormatter(formatter) - consoleHandle.setFormatter(formatter) - - logger.addHandler(fileHandle) - logger.addHandler(consoleHandle) - - -# def check_repeat_regions(coords, repeat_locs): -# """ -# """ - -# start, end = coords -# seg_len = float(end - start) -# in_repeat = False -# rep_overlap = 0.0 -# rep_coords = [] -# filter_reps_edges = [False, False] -# for rloc in repeat_locs: -# rchr, rbp1, rbp2, rname = rloc -# if (rbp1 >= start and rbp1 <= end) or (rbp2 >= start and rbp2 <= end) or (rbp1 <= start and rbp2 >= end): -# in_repeat = True -# rep_overlap += float(min(rbp2, end) - max(rbp1, start)) -# rep_coords.append((rbp1, rbp2)) -# # Simple or low complexity seq repeat for filtering -# if rname.find(")n") > -1 or rname.find("_rich") > -1: -# if (rbp1 <= start and rbp2 >= start): -# filter_reps_edges[0] = True -# elif (rbp1 <= end and rbp2 >= end): -# filter_reps_edges[1] = True -# roverlap = round((float(min(rep_overlap, seg_len)) / float(seg_len)) * 100, 2) -# return in_repeat, roverlap, rep_coords, filter_reps_edges - - -def get_marker_fn(fn): - """ - """ - - return os.path.join(os.path.split(fn)[0], "." + os.path.basename(fn)) - - -def run_jellyfish(fa_fn, jellyfish, kmer_size): - """ - """ - - logger = logging.getLogger('root') - file_path = os.path.split(fa_fn)[0] - file_base = os.path.basename(fa_fn) - dump_fn = os.path.join(file_path, file_base + "_" + str(kmer_size) + "mers_dump") - dump_marker_fn = get_marker_fn(dump_fn) - if not os.path.isfile(dump_marker_fn): - if not os.path.exists(fa_fn): - logger.info('%s does not exist.' % fa_fn) - dump_fn = None - return dump_fn - cmd = '%s --version' % jellyfish - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - jfish_version = int(output.split()[1].split('.')[0]) - logger.info('Using jellyfish version %d' % jfish_version) - - count_fn = os.path.join(file_path, file_base + "_" + str(kmer_size) + "mers_counts") - logger.info('Running %s on file %s to determine kmers' % (jellyfish, fa_fn)) - cmd = '%s count -m %d -s %d -t %d -o %s %s' % (jellyfish, kmer_size, 100000000, 8, count_fn, fa_fn) - logger.info('Jellyfish counts system command %s' % cmd) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - logger.info('Jellyfish count output %s' % output) - logger.info('Jellyfish count errors %s' % errors) - - if jfish_version < 2: - count_fn += '_0' - cmd = '%s dump -c -o %s %s' % (jellyfish, dump_fn, count_fn) - logger.info('Jellyfish dump system command %s' % cmd) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - logger.info('Jellyfish dump output %s' % output) - logger.info('Jellyfish dump errors %s' % errors) - cmd = 'touch %s' % dump_marker_fn - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - logger.info('Completed jellyfish dump %s, touching marker file %s' % (dump_fn, dump_marker_fn)) - count_fns = glob.glob(os.path.join(file_path, "*mers_counts*")) - for cf in count_fns: - os.remove(cf) - else: - logger.info('Jellfish already run and kmers already generated for target.') - return dump_fn - - -def setup_ref_data(setup_params): - """ - """ - - genes = setup_params[0] - rep_mask, ref_fa, altref_fa_fns, ref_path, jfish_path, blat_path, kmer_size = setup_params[1] - logger = logging.getLogger('breakmer.utils') - - for gene in genes: - chr, bp1, bp2, name, intvs = gene - gene_ref_path = os.path.join(ref_path, name) - if rep_mask: - logger.info('Extracting repeat mask regions for target gene %s.' % name) - setup_rmask(gene, gene_ref_path, rep_mask) - - logger.info('Extracting refseq sequence for %s, %s:%d-%d' % (name, chr, bp1, bp2)) - directions = ['forward', 'reverse'] - for dir in directions: - target_fa_fn = os.path.join(gene_ref_path, name + '_' + dir + '_refseq.fa') - ref_fn = extract_refseq_fa(gene, gene_ref_path, ref_fa, dir, target_fa_fn) - run_jellyfish(ref_fn, jfish_path, kmer_size) - if altref_fa_fns: - if not create_ref_test_fa(os.path.join(gene_ref_path, name + '_forward_refseq.fa'), os.path.join(gene_ref_path, name + '_start_end_refseq.fa')): - return - - altref_fns = [] - alt_iter = 1 - altref_fas = altref_fa_fns.split(',') - for altref in altref_fas: - for dir in directions: - fn = os.path.join(gene_ref_path, name + '_' + dir + '_altrefseq_' + str(alt_iter) + '.fa') - marker_fn = get_marker_fn(fn) - if not os.path.isfile(marker_fn): - altref_fns.append((altref, fn, alt_iter)) - alt_iter += 1 - - if len(altref_fns) > 0: - altref_fas = altref_fa_fns.split(',') - alt_iter = 1 - for i in range(len(altref_fns)): - alt_gene_coords = get_altref_genecoords(blat_path, altref_fns[i][0], os.path.join(gene_ref_path, name + '_start_end_refseq.fa'), chr, os.path.join(gene_ref_path, name + '_altref_blat_' + str(altref_fns[i][2]) + '.psl')) - if not alt_gene_coords[2]: - logger.info("No sequence for target gene %s in %s, no reference kmers extracted." % (name, altref_fns[i][0])) - alt_iter += 1 - continue - gene = (chr, alt_gene_coords[0][1], alt_gene_coords[1][1], name, intvs) - target_fa_fn = altref_fns[i][1] #os.path.join(gene_ref_path, name + '_' + dir + '_altrefseq_' + str(alt_iter) + '.fa') - ref_fn = extract_refseq_fa(gene, gene_ref_path, altref_fns[i][0], dir, target_fa_fn) - run_jellyfish(ref_fn, jfish_path, kmer_size) - os.remove(os.path.join(gene_ref_path, name + '_start_end_refseq.fa')) - - -def get_fastq_reads(fn, sv_reads): - """ - """ - - # read_len = 0 - filtered_fq_fn = fn.split(".fastq")[0] + "_filtered.fastq" - filt_fq = open(filtered_fq_fn, 'w') - fq_recs = {} -# f = open(fn,'r') -# fq_recs = list(SeqIO.parse(f,'fastq')) - for header, seq, qual in FastqFile(fn): - qname_split = header.lstrip("@").split("_") - indel_only = qname_split[-1] - qname = "_".join(qname_split[0:len(qname_split) - 1]) - if qname in sv_reads: - oseq, sc_seqs, clip_coords, indel_meta = sv_reads[qname] - cleaned_seq = seq - old_seq = oseq.seq - add = True - if str(cleaned_seq) != str(old_seq) and sc_seqs: - sc_clips = sc_seqs['clipped'] - idx = old_seq.find(cleaned_seq) - trimmed_seq = '' - if idx == 0: - trimmed_seq = old_seq[len(cleaned_seq):len(old_seq)] - else: - trimmed_seq = old_seq[0:idx] - sc_lens = 0 - for sc_seq in sc_clips: - sc_lens += len(sc_seq) - if trimmed_seq.find(sc_seq) > -1: - add = False - if len(cleaned_seq) == (len(old_seq) - sc_lens): - for sc_seq in sc_clips: - if cleaned_seq.find(sc_seq) == -1: - # Don't add, just trimmed clipped portion. - add = False - if add: - filt_fq.write(header + "\n" + seq + "\n+\n" + qual + "\n") - fr = fq_read(header, seq, qual, indel_meta) - # read_len = max(read_len, len(fr.seq)) - seq = fr.seq - if seq not in fq_recs: - fq_recs[seq] = [] - fq_recs[seq].append(fr) - filt_fq.close() - return filtered_fq_fn, fq_recs - - -def get_fastq_reads_old(fn, sv_reads): - """ - """ - - read_len = 0 - fq_recs = {} - f = open(fn, 'r') -# fq_recs = list(SeqIO.parse(f,'fastq')) - for header, seq, qual in FastqFile(fn): - qname = header.lstrip("@") - if qname in sv_reads: - oseq, sc_seqs, clip_coords = sv_reads[qname] - cleaned_seq = seq - old_seq = oseq.seq - add = True - if str(cleaned_seq) != str(old_seq) and sc_seqs: - idx = old_seq.find(cleaned_seq) - trimmed_seq = '' - if idx == 0: - trimmed_seq = old_seq[len(cleaned_seq):len(old_seq)] - else: - trimmed_seq = old_seq[0:idx] - sc_lens = 0 - for sc_seq in sc_seqs: - sc_lens += len(sc_seq) - if trimmed_seq.find(sc_seq) > -1: - add = False - if len(cleaned_seq) == (len(old_seq) - sc_lens): - for sc_seq in sc_seqs: - if cleaned_seq.find(sc_seq) == -1: - # Don't add, just trimmed clipped portion. - add = False -# else: print qname, 'not in sv reads' - if add: - fr = fq_read(header, seq, qual) - read_len = max(read_len, len(fr.seq)) - fq_recs[fr.id] = fr - return fq_recs, read_len - - -# def load_kmers(fns, kmers): -# """ -# """ - -# if not fns: -# return kmers - -# fns = fns.split(",") -# for fn in fns: -# f = open(fn, 'rU') -# for line in f.readlines(): -# line = line.strip() -# mer, count = line.split() -# if mer not in kmers: -# kmers[mer] = 0 -# kmers[mer] += int(count) -# return kmers - - -def extract_refseq_fa(gene_coords, ref_path, ref_fa, direction, target_fa_fn): - """ - """ - - logger = logging.getLogger('breakmer.utils') - chrom = gene_coords[0] - start = gene_coords[1] - end = gene_coords[2] - name = gene_coords[3] - marker_fn = get_marker_fn(target_fa_fn) - - if not os.path.isfile(marker_fn): - ref_d = SeqIO.to_dict(SeqIO.parse(ref_fa, 'fasta')) - seq_str = '' - seq = ref_d[chrom].seq[(start - 200):(end + 200)] - if direction == "reverse": - seq_str = str(seq.reverse_complement()) - else: - seq_str = str(seq) - fa = open(target_fa_fn, 'w') - fa.write('>' + name + '\n' + seq_str + '\n') - fa.close() - cmd = 'touch %s' % marker_fn - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - output, errors = p.communicate() - logger.info('Completed writing refseq fasta file %s, touching marker file %s' % (target_fa_fn, marker_fn)) - else: - logger.info('Refseq sequence fasta (%s) exists already' % target_fa_fn) - return target_fa_fn - - -def seq_trim(qual_str, min_qual): - """ - """ - - counter = 0 - while ord(qual_str[counter]) - 33 < min_qual: - counter += 1 - if counter == len(qual_str): - break - return counter - - -def get_seq_readname(read): - """ - """ - - end = '1' - if read.is_read2: - end = '2' - return read.qname + "/" + end - - -def trim_coords(qual_str, min_qual): - """ - """ - - q = [] - coords = [0, len(qual_str)] - start = seq_trim(qual_str, min_qual) - if start == len(qual_str): - return (0, 0, 0) - else: - end = len(qual_str) - seq_trim(qual_str[::-1], min_qual) - lngth = end - start - return (start, end, lngth) - - -def trim_qual(read, min_qual, min_len): - """ - """ - - qual_str = read.qual - q = [] - coords = [0, len(qual_str)] - start = seq_trim(qual_str, min_qual) - if start == len(qual_str): - return None - else: - end = len(qual_str) - seq_trim(qual_str[::-1], min_qual) - lngth = end - start - if lngth < min_len: - return None - nseq = read.seq[start:end] - nqual = qual_str[start:end] - read.seq = nseq - read.qual = nqual - return read - - -def fq_line(read, indel_only, min_len, trim=True): - """ - """ - - add_val = '0' - if indel_only: - add_val = '1' - lineout = None - if trim: - read = trim_qual(read, 5, min_len) - if read: - lineout = "@" + get_seq_readname(read) + "_" + add_val + "\n" + read.seq + "\n+\n" + read.qual + "\n" - return lineout - - -def get_overlap_index_nomm(a, b): - """ - """ - - i = 0 - while a[i:] != b[:len(a[i:])]: - i += 1 - return i - - -def seq_complexity(seq, N): - nmers = {} - total_possible = len(seq) - 2 - for i in range(len(seq) - (N - 1)): - nmers[str(seq[i:i + N]).upper()] = True - complexity = round((float(len(nmers)) / float(total_possible)) * 100, 4) - return complexity - - -def get_overlap_index_mm(a, b): - i = 0 - nmismatch = [0, 0] - match = False - while i < len(a) and not match: - nmismatch = [0, 0] - c = 0 - match_len = min(len(a[i:]), len(b[:len(a[i:])])) - for aa, bb in zip(a[i:],b[:len(a[i:])]): - if aa != bb: - nmismatch[0] += 1 - nmismatch[1] += 1 - else: - nmismatch[0] = 0 - if nmismatch[0] > 1 or nmismatch[1] > 3: - break - c += 1 - if c == match_len: - match = True - i += 1 - return i - 1 - - -def get_read_kmers(seq, l, skmers): - kmers = [] - i = 0 - while (i + l) <= len(seq): - k = seq[i:i + l] - # if k in skmers: - kmers.append(k) - i += 1 - return list(set(kmers) & set(skmers)) - - -def get_overlap_index(a, b): - i = 0 - nmismatch = 10 - while nmismatch > 1: - nmismatch = 0 - for aa, bb in zip(a[i:], b[:len(a[i:])]): - if aa != bb: - nmismatch += 1 - i += 1 - return i - 1 - - -def server_ready(f): - logger = logging.getLogger('root') - while not os.path.exists(f): - logger.info("Waiting for log file %s" % f) - time.sleep(10) - ready = False - f = open(f, 'r') - flines = f.readlines() - for line in flines: - if line.find('Server ready for queries') > -1: - ready = True - return ready - - -class fq_read: - """ - """ - def __init__(self, header, seq, qual, indel_only): - self.id = header - self.seq = str(seq) - self.qual = str(qual) - self.used = False - self.dup = False - self.indel_only = indel_only - - -class FastqFile(object): - """ - """ - def __init__(self, f): - if isinstance(f, str): - f = open(f) - self._f = f - - def __iter__(self): - return self - - def next(self): - header, seq, qual_header, qual = [self._f.next() for _ in range(4)] - header = header.strip() - # inst, lane, tile, x, y_end = header.split(':') - seq = seq.strip() - qual = qual.strip() - # bc = None - # y = y_end - # if y.find('/') > -1: - # y, end = y.split('/') - # if y.find('#') > -1: - # y, bc = y.split('#') - # header_dict = {'inst': inst, - # 'lane': int(lane), - # 'tile': int(tile), - # 'x': int(x), - # 'y': int(y), - # 'end': end, - # 'bc': bc} - return (header, seq, qual) diff --git a/cutadapt.cfg b/cutadapt.cfg deleted file mode 100644 index bd055a4..0000000 --- a/cutadapt.cfg +++ /dev/null @@ -1,6 +0,0 @@ --a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC --a AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT --g GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT --g TCTACACTCTTTCCCTACACGACGCTCTTCCGATCT --q 5 ---minimum-length 15 diff --git a/example_data/B2M.bam b/example_data/B2M.bam deleted file mode 100644 index 6ed3d84..0000000 Binary files a/example_data/B2M.bam and /dev/null differ diff --git a/example_data/B2M.bam.bai b/example_data/B2M.bam.bai deleted file mode 100644 index 737b4a7..0000000 Binary files a/example_data/B2M.bam.bai and /dev/null differ diff --git a/example_data/README b/example_data/README deleted file mode 100644 index aecd000..0000000 --- a/example_data/README +++ /dev/null @@ -1,111 +0,0 @@ -BreaKmer example data README -------- - -To get started in running the example data, first make sure all the BreaKmer software dependencies are installed and running properly on your system. - -The follow files have been provided in the example_data directory to run the example data: -B2M.bam - sorted BAM file containing aligned reads to B2M. -B2M.bam.bai - index of B2M.bam file. -breakmer.cfg - BreaKmer config file. -cutadapt.cfg- Cutadapt configuration file containing the Illumina adapters to trim from sequences. -genes.bed - BED file specifying coordinates for B2M tiled regions. -ref_seq/B2M - all the extracted B2M reference sequences and associated kmer files. - -The following files have been provided in the BreaKmer directory (one level up from example_data directory) to run the example data: -/bin/cutadapt-1.5.tar.gz - uncompress and use binary in bin directory -/bin/jellyfish-1.1.11.tar.gz - uncompress and follow installation instructions, binary should be in the bin directory after successfull install -/bin/blat - blat binary for linux x86 64bit -/bin/gfClient - blat server binary for linux x86 64bit -/bin/gfServer - blat server binary for linux x86 64bit -/ref/ucsc_hg19_refgene.txt -/ref/ucsc_hg19_rmsk.bed - -To analyze the example data follow these steps: - -1. Edit the template configuration file appropriately (breakmer.cfg). - - We have provided all the files necessary to run the example data other than the genome fasta file (see below). - - The configuration file paths need to be changed accordingly. - - To create reference genome fasta file for example data: - a. Downloaded the chromFa.tar.gz file from UCSC Genome Browser (http://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz). - b. Make directory named genome in /ref - c. Unpack chromFa.tar.gz in /ref/genome (tar -xvzf chromFa.tar.gz) - this will output many reference chromosome sequence fasta files (e.g., chr1.fa, chr2.fa, chr3.fa, etc...) - d. Concatenate all the primary chromosomes together (chr1, chr2, chr3,...chrX, chrY, chrM) in order (i.e., cat chr1.fa chr2.fa ... chrM > hg19_ref_genome.primary-assembly.fa) - this will produce one file containing all the chromosome sequences. - e. Rename the sequences by removing all the 'chr' since these are the names of the chromosomes in our example data. (sed -i 's/chr//g' hg19_ref_genome.primary-assembly.fa) - f. Move reference genome file to /ref - -analysis_name=example -targets_bed_file=/genes.bed -sample_bam_file=/B2M.bam -analysis_dir= -reference_data_dir=/ref_seq -cutadapt=/bin/cutadapt-1.5/bin/cutadapt -cutadapt_config_file=/cutadapt.cfg -jellyfish=/bin/jellyfish-1.1.11/bin/jellyfish -blat=/bin/blat -gfclient=/bin/gfClient -gfserver=/bin/gfServer -fatotwobit=/bin/faToTwoBit -reference_fasta=/ref/ -gene_annotation_file=/ref/ucsc_hg19_refgene.txt -repeat_mask_file=/ref/ucsc_hg19_rmsk.bed -kmer_size=15 - -2. Run BreaKmer - - python /breakmer.py /breakmer.cfg - -3. The output files will be within the designated analysis directory (i.e., example_data) as specified in the configuration file. - -The final output file with the results will be within the example_indel_svs.out file with the following: -genes target_breakpoints align_cigar mismatches strands rep_overlap_segment_len sv_type split_read_count nkmers disc_read_count breakpoint_coverages contig_id contig_seq -B2M chr15:45007873-45008026 (D153) 89M153D50M 0 + 0.0:139 indel 12 14 0 176,53 B2M_contig1 AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG - -The results indicates: -genes = B2M - Event occurs in B2M gene -target_breakpoints = chr15:45007873-45008026 (D153) - 153 bp deletion with breakpoints located at 45007873 and 45008026 -align_cigar = 89M153D50M - 89 bp of the contig match the reference, 153 bp are deleted in the contig sequence compared to reference, and 50 bp are matching -mismatches = 0 - No single nucleotide mismatches between the reference sequence and the contig sequence. -strands = + - Contig aligns on the + strand of the reference -rep_overlap_segment_len = 0.0:139 - Two values indications 1. % of repeat overlap 2. Contig segment matching length -sv_type = indel -split_read_count = 12 - Number of split reads supporting the event -nkmers = 14 - Number of sample only kmers used to build the contig -disc_read_count = 0 - No discordantly mapped read pairs extracted for this event. -breakpoint_coverages = 176,53 - Number of non-duplicated aligned sequences at the inferred breakpoint locations -contig_id = B2M_contig1 -contig_seq = AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG - contig sequence assembled containing this event. - -The output of the program should appear in the example_data/output and example_data/targets directories. The follow example files have been provided to check the users output and the expected output. - -example_output/ - Directory containing the example output that should be expected with a successful run. - example_summary.out - Example summary output file. - example_indel_svs.out - Example indel output file. - B2M/ - B2M_indel_svs.out - Example indel output file for B2M. - contig1_reads.sorted.bam - Example BAM file containing extracted reads for detected indel event in B2M. - contig1_svs.out - Example output file for SV event found in a contig. - contig1_reads.bam - Unsorted extracted reads in BAM format. - contig_reads.sorted.bam.bai - Index of sorted extracted reads BAM file. -example_targets/ - Directory containing the example targets output. - B2M/ - data/ - B2M_sv_reads.bam - BAM formatted file with extracted reads from the defined B2M region. - B2M_sv_reads.fastq - Fastq formatted file with the extracted reads from the defined B2M region. - B2M_sv_reads.sorted.bam - Sorted BAM of extracted reads. - B2M_sv_reads.sorted.bam.bai - Index of the sorted BAM file. - B2M_sv_reads_cleaned.fastq - Fastq formatted file containing the read sequences after they have been cleaned of adapter and low-quality sequence. - B2M_sv_reads_cleaned_filtered.fastq - Cleaned reads that have been further filtered if the original soft-clipped sequences were removed. - B2M_sv_sc_seqs.fa - Fasta formatted file containing the soft-clipped sequences. - B2M_sv_reads_cleaned_filtered.fastq_15mers_counts_0 - Jellyfish intermediate output file for the 15mers generated from the filtered reads. - B2M_sv_reads_cleaned_filtered.fastq_15mers_dump_0 - Jellyfish final output file of the 15mers generated from the filtered reads. - B2M_sv_sc_seqs.fa_15mers_counts_0 - Jellyfish intermediate output file for the 15mers generated from the unaligned sequences from the extracted reads. - B2M_sv_sc_seqs.fa_15mers_dump_0 - Jellyfish final output file of the 15mers generated from the unaligned sequences from the extracted reads. - contigs/ - contig/ - blat_res_target.psl - Raw BLAT output from re-aligning the contig to the target region reference sequence. - blat_res_target.psl.mod - Modified BLAT output with the correct genomic chromosome and coordinates from hit. - contig1.fa - Fast formatted file containing contig1 consensus sequence. - contig1.fq - Fastq formatted file containing the read sequences used for assemblying contig1. - contig1_svs.out - The SV formatted output for contig1. - kmers/ - B2M_sample_kmers.out - Sample only 15-mers with number of reads in which they are found. - B2M_sample_kmers_merged.out - The 15-mers and the IDs of the reads used to assemble contigs. diff --git a/example_data/breakmer.cfg b/example_data/breakmer.cfg deleted file mode 100644 index d39db4b..0000000 --- a/example_data/breakmer.cfg +++ /dev/null @@ -1,16 +0,0 @@ -analysis_name=example -targets_bed_file=/genes.bed -sample_bam_file=/B2M.bam -analysis_dir=/example -reference_data_dir=/data/ref -cutadapt=/cutadapt -cutadapt_config_file=/cutadapt.cfg -jellyfish=/jellyfish -blat=/blat -gfclient=gfClient -gfserver=/gfServer -fatotwobit=/faToTwoBit -reference_fasta= -gene_annotation_file= -repeat_mask_file= -kmer_size=15 diff --git a/example_data/cutadapt.cfg b/example_data/cutadapt.cfg deleted file mode 100644 index bd055a4..0000000 --- a/example_data/cutadapt.cfg +++ /dev/null @@ -1,6 +0,0 @@ --a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC --a AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT --g GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT --g TCTACACTCTTTCCCTACACGACGCTCTTCCGATCT --q 5 ---minimum-length 15 diff --git a/example_data/example_output/B2M/B2M_indel_svs.out b/example_data/example_output/B2M/B2M_indel_svs.out deleted file mode 100644 index 1934a29..0000000 --- a/example_data/example_output/B2M/B2M_indel_svs.out +++ /dev/null @@ -1,2 +0,0 @@ -genes target_breakpoints align_cigar mismatches strands rep_overlap_segment_len sv_type split_read_count nkmers disc_read_count contig_id contig_seq -B2M chr15:45007873-45008026 (D153) 89M153D50M 0 + 0.0:139 indel 12 14 0 contig1 AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG diff --git a/example_data/example_output/B2M/contig1_reads.bam b/example_data/example_output/B2M/contig1_reads.bam deleted file mode 100644 index fdbb773..0000000 Binary files a/example_data/example_output/B2M/contig1_reads.bam and /dev/null differ diff --git a/example_data/example_output/B2M/contig1_reads.sorted.bam b/example_data/example_output/B2M/contig1_reads.sorted.bam deleted file mode 100644 index fdbb773..0000000 Binary files a/example_data/example_output/B2M/contig1_reads.sorted.bam and /dev/null differ diff --git a/example_data/example_output/B2M/contig1_reads.sorted.bam.bai b/example_data/example_output/B2M/contig1_reads.sorted.bam.bai deleted file mode 100644 index e5e92c7..0000000 Binary files a/example_data/example_output/B2M/contig1_reads.sorted.bam.bai and /dev/null differ diff --git a/example_data/example_output/B2M/contig1_svs.out b/example_data/example_output/B2M/contig1_svs.out deleted file mode 100644 index 6b7d93c..0000000 --- a/example_data/example_output/B2M/contig1_svs.out +++ /dev/null @@ -1 +0,0 @@ -B2M chr15:45007873-45008026 (D153) 89M153D50M 0 + 0.0:139 indel 12 14 0 contig1 AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG \ No newline at end of file diff --git a/example_data/example_output/example_indel_svs.out b/example_data/example_output/example_indel_svs.out deleted file mode 100644 index 1934a29..0000000 --- a/example_data/example_output/example_indel_svs.out +++ /dev/null @@ -1,2 +0,0 @@ -genes target_breakpoints align_cigar mismatches strands rep_overlap_segment_len sv_type split_read_count nkmers disc_read_count contig_id contig_seq -B2M chr15:45007873-45008026 (D153) 89M153D50M 0 + 0.0:139 indel 12 14 0 contig1 AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG diff --git a/example_data/example_output/example_summary.out b/example_data/example_output/example_summary.out deleted file mode 100644 index eea9c61..0000000 --- a/example_data/example_output/example_summary.out +++ /dev/null @@ -1,2 +0,0 @@ -Target N_contigs Total_variants N_indel N_rearrangement N_trl Trls -B2M 1 1 1 0 0 - diff --git a/example_data/example_targets/B2M/contigs/contig1/blat_res.target.psl b/example_data/example_targets/B2M/contigs/contig1/blat_res.target.psl deleted file mode 100644 index 54774e6..0000000 --- a/example_data/example_targets/B2M/contigs/contig1/blat_res.target.psl +++ /dev/null @@ -1 +0,0 @@ -139 0 0 0 0 0 1 153 + contig1 139 0 139 B2M 5195 4239 4531 2 89,50, 0,89, 4239,4481, diff --git a/example_data/example_targets/B2M/contigs/contig1/blat_res.target.psl.mod b/example_data/example_targets/B2M/contigs/contig1/blat_res.target.psl.mod deleted file mode 100644 index 0b4ca98..0000000 --- a/example_data/example_targets/B2M/contigs/contig1/blat_res.target.psl.mod +++ /dev/null @@ -1 +0,0 @@ -139 0 0 0 0 0 1 153 + contig1 139 0 139 15 5195 45007784 45008076 2 89,50, 0,89, 45007784,45008026, diff --git a/example_data/example_targets/B2M/contigs/contig1/contig1.fa b/example_data/example_targets/B2M/contigs/contig1/contig1.fa deleted file mode 100644 index e174536..0000000 --- a/example_data/example_targets/B2M/contigs/contig1/contig1.fa +++ /dev/null @@ -1,2 +0,0 @@ ->contig1 -AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG \ No newline at end of file diff --git a/example_data/example_targets/B2M/contigs/contig1/contig1.fq b/example_data/example_targets/B2M/contigs/contig1/contig1.fq deleted file mode 100644 index bfd4ed2..0000000 --- a/example_data/example_targets/B2M/contigs/contig1/contig1.fq +++ /dev/null @@ -1,48 +0,0 @@ -@H91H9ADXX140327:1:2102:19465:23489/2_1 -CACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATC -+ -69EEEFBAFBFABCCFFBEFFFDDEEHHDGH@FEFEFCAGGCDEEEBGEEBCGBCCGDFGCBBECFFEBDCDCEDEEEAABCCAEC@>>BB?@C -@H91H9ADXX140327:2:1209:17474:13884/1_0 -AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATA -+ ->>CC@EFGG@AFACFFBAEEEDC?@@CDBEEFFGCED?DDDGGGHDEFGEEFFEGDDGCHCDDGHHGHGH@CGBGEFFHECGBGDFFABBFDECFFFFC?@ -@H91H9ADXX140327:2:2202:12128:77556/2_0 -TTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTG -+ -BBD@ACFDFDDFBAFBBFCFDFDBCAABGFGGHCGDFCCCCFFCDFEC?ADFGGDFF?EDEDEBAEFCDDCEAEDDAAEAABEBDD@@@BACBAACBA -@H91H9ADXX140327:2:2212:12984:63612/1_1 -TCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTA -+ -B@;FFGGGDFDGBBBBGFCEEGEDDEFHGFHH@FFFFGCCGHDEFEFCGFFCCHCDEGEFGCCCFEGGFDFDFDGFGGFCCDEEDGECAAFFCEGEECDBB -@H91H9ADXX140327:2:2104:16508:95685/2_1 -CCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG -+ -EEEEAFDFBBBAFFDEFFECDEGFHEGG@FDEEGCBGGCDFDFBFEEC@FBCDGDFFCCCEDFFECDDECFDFEEAACCCBEC@@?CB@ACAA?CACA -@H91H9ADXX140327:1:2215:1194:96327/2_0 -AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGC -+ -ACBD@ABCC?BC?@CD@ADDEDAEA@DEBDCEEFCDBBFDEFFFFDEEGDEEEFGDCGDHBCCGGGFGGGACHCGEFFGECFBFDFFBABEDCADDDD -@H91H9ADXX140327:2:1109:18752:52441/1_0 -TGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGA -+ -BFECFEDDFBCDFDGEEEBBFCCGBGEGEDEC=;FGHHGCGDGDCCCFFDDFFECDDGGGEGG@FEFEFCBGHCEDEFBGEECBGCCDGEFGCCCFDFDDB -@H91H9ADXX140327:2:1214:6650:14917/1_0 -TATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGG -+ -@ABECFDDEB@GBBGBGDFECEEG?GGGHHCHEEBDDDGGDEGGFCDDGFHEGG@FDEEFBCGGDEFEFCGEFCCGCCDGDEFCCCEDDGECFEEEFEDDB -@H91H9ADXX140327:2:2212:12198:89759/2_0 -TGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGA -+ ->C;?AB@BBACDBCAABBDDCDDCDEFCDDDDEBBFCEABCGDBDEEF>@GBGCEDGEDGCGFECAACFEGDFFGFECB@DFGCBABFAECEB?= -@H91H9ADXX140327:2:2208:12019:73884/1_0 -GGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGAT -+ -EDBEDEDFBCDGDFEEEEAGDCHBFEGCDDB<8FGHHHCHEGCDCCFFDEGFFCDEGGHDGG@FEFEFCAGGDDFEFBGFECBGCCDGEGGCCCFEFFDAB -@H91H9ADXX140327:1:1211:4032:84920/1_0 -TTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTC -+ -BBCEECCEFEEAGBADECEDEFFDFCCFEEGGGGDFFGEFFFFHECHDHCDDGGGGHGG@CHCHEFGFFDHDHEFGABEFDECFGFGDCBCDFGGDBAF@D -@H91H9ADXX140327:2:1108:11441:47321/2_0 -CTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAA -+ -ACAD>?AB?BACCDBCAADBDEEEDCDEFCEDEDFCCECGCBCGEGFFFF?ADAGDCEGDDFCGEFFBACFEFEEFGFDDBCDFDGBCBFCFDEEEBB diff --git a/example_data/example_targets/B2M/contigs/contig1/contig1_svs.out b/example_data/example_targets/B2M/contigs/contig1/contig1_svs.out deleted file mode 100644 index 6b7d93c..0000000 --- a/example_data/example_targets/B2M/contigs/contig1/contig1_svs.out +++ /dev/null @@ -1 +0,0 @@ -B2M chr15:45007873-45008026 (D153) 89M153D50M 0 + 0.0:139 indel 12 14 0 contig1 AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG \ No newline at end of file diff --git a/example_data/example_targets/B2M/data/.B2M_sv_reads_cleaned_filtered.fastq_15mers_dump b/example_data/example_targets/B2M/data/.B2M_sv_reads_cleaned_filtered.fastq_15mers_dump deleted file mode 100644 index e69de29..0000000 diff --git a/example_data/example_targets/B2M/data/.B2M_sv_sc_seqs.fa_15mers_dump b/example_data/example_targets/B2M/data/.B2M_sv_sc_seqs.fa_15mers_dump deleted file mode 100644 index e69de29..0000000 diff --git a/example_data/example_targets/B2M/data/B2M_sv_reads.bam b/example_data/example_targets/B2M/data/B2M_sv_reads.bam deleted file mode 100644 index 97bc6cf..0000000 Binary files a/example_data/example_targets/B2M/data/B2M_sv_reads.bam and /dev/null differ diff --git a/example_data/example_targets/B2M/data/B2M_sv_reads.fastq b/example_data/example_targets/B2M/data/B2M_sv_reads.fastq deleted file mode 100644 index bbf921f..0000000 --- a/example_data/example_targets/B2M/data/B2M_sv_reads.fastq +++ /dev/null @@ -1,52 +0,0 @@ -@H91H9ADXX140327:2:2201:12089:90342/2_0 -TTCTTTCCCTGCTAAAATGTTTCACTGTCCTGAGGACTATTTATAGACAGCTCTAACATGATAACCCTCACTATGTGGAGAACATTGACAGAGTAACA -+ -AACC=>CC@EFGG@AFACFFBAEEEDC?@@CDBEEFFGCED?DDDGGGHDEFGEEFFEGDDGCHCDDGHHGHGH@CGBGEFFHECGBGDFFABBFDECFFFFC?@ -@H91H9ADXX140327:2:2104:16508:95685/2_1 -CCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG -+ -EEEEAFDFBBBAFFDEFFECDEGFHEGG@FDEEGCBGGCDFDFBFEEC@FBCDGDFFCCCEDFFECDDECFDFEEAACCCBEC@@?CB@ACAA?CACA -@H91H9ADXX140327:2:1109:18752:52441/1_0 -TGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGA -+ -BFECFEDDFBCDFDGEEEBBFCCGBGEGEDEC=;FGHHGCGDGDCCCFFDDFFECDDGGGEGG@FEFEFCBGHCEDEFBGEECBGCCDGEFGCCCFDFDDB -@H91H9ADXX140327:2:1108:11441:47321/2_0 -CTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAA -+ -ACAD>?AB?BACCDBCAADBDEEEDCDEFCEDEDFCCECGCBCGEGFFFF?ADAGDCEGDDFCGEFFBACFEFEEFGFDDBCDFDGBCBFCFDEEEBB -@H91H9ADXX140327:1:2102:19465:23489/2_1 -CACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGAT -+ -69EEEFBAFBFABCCFFBEFFFDDEEHHDGH@FEFEFCAGGCDEEEBGEEBCGBCCGDFGCBBECFFEBDCDCEDEEEAABCCAEC@>>BB?@CCC@A -@H91H9ADXX140327:2:2212:12984:63612/1_1 -TCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTA -+ -B@;FFGGGDFDGBBBBGFCEEGEDDEFHGFHH@FFFFGCCGHDEFEFCGFFCCHCDEGEFGCCCFEGGFDFDFDGFGGFCCDEEDGECAAFFCEGEECDBB -@H91H9ADXX140327:1:1211:4032:84920/1_0 -TTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTC -+ -BBCEECCEFEEAGBADECEDEFFDFCCFEEGGGGDFFGEFFFFHECHDHCDDGGGGHGG@CHCHEFGFFDHDHEFGABEFDECFGFGDCBCDFGGDBAF@D -@H91H9ADXX140327:2:2212:12198:89759/2_0 -TCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGA -+ -A@C>C;?AB@BBACDBCAABBDDCDDCDEFCDDDDEBBFCEABCGDBDEEF>@GBGCEDGEDGCGFECAACFEGDFFGFECB@DFGCBABFAECEB?= -@H91H9ADXX140327:2:2202:12128:77556/2_0 -TTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTG -+ -BBD@ACFDFDDFBAFBBFCFDFDBCAABGFGGHCGDFCCCCFFCDFEC?ADFGGDFF?EDEDEBAEFCDDCEAEDDAAEAABEBDD@@@BACBAACBA -@H91H9ADXX140327:2:1214:6650:14917/1_0 -TATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGG -+ -@ABECFDDEB@GBBGBGDFECEEG?GGGHHCHEEBDDDGGDEGGFCDDGFHEGG@FDEEFBCGGDEFEFCGEFCCGCCDGDEFCCCEDDGECFEEEFEDDB -@H91H9ADXX140327:1:2215:1194:96327/2_0 -AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGC -+ -ACBD@ABCC?BC?@CD@ADDEDAEA@DEBDCEEFCDBBFDEFFFFDEEGDEEEFGDCGDHBCCGGGFGGGACHCGEFFGECFBFDFFBABEDCADDDD diff --git a/example_data/example_targets/B2M/data/B2M_sv_reads.sorted.bam b/example_data/example_targets/B2M/data/B2M_sv_reads.sorted.bam deleted file mode 100644 index e462848..0000000 Binary files a/example_data/example_targets/B2M/data/B2M_sv_reads.sorted.bam and /dev/null differ diff --git a/example_data/example_targets/B2M/data/B2M_sv_reads.sorted.bam.bai b/example_data/example_targets/B2M/data/B2M_sv_reads.sorted.bam.bai deleted file mode 100644 index 61adb3a..0000000 Binary files a/example_data/example_targets/B2M/data/B2M_sv_reads.sorted.bam.bai and /dev/null differ diff --git a/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned.fastq b/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned.fastq deleted file mode 100644 index 73f493d..0000000 --- a/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned.fastq +++ /dev/null @@ -1,52 +0,0 @@ -@H91H9ADXX140327:2:2201:12089:90342/2_0 -TTCTTTCCCTGCTAAAATGTTTCACTGTCCTGAGGACTATTTATAGACAGCTCTAACATGATAACCCTCACTATGTGGAGAACATTGACAGAGTAACA -+ -AACC=>CC@EFGG@AFACFFBAEEEDC?@@CDBEEFFGCED?DDDGGGHDEFGEEFFEGDDGCHCDDGHHGHGH@CGBGEFFHECGBGDFFABBFDECFFFFC?@ -@H91H9ADXX140327:2:2104:16508:95685/2_1 -CCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG -+ -EEEEAFDFBBBAFFDEFFECDEGFHEGG@FDEEGCBGGCDFDFBFEEC@FBCDGDFFCCCEDFFECDDECFDFEEAACCCBEC@@?CB@ACAA?CACA -@H91H9ADXX140327:2:1109:18752:52441/1_0 -TGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGA -+ -BFECFEDDFBCDFDGEEEBBFCCGBGEGEDEC=;FGHHGCGDGDCCCFFDDFFECDDGGGEGG@FEFEFCBGHCEDEFBGEECBGCCDGEFGCCCFDFDDB -@H91H9ADXX140327:2:1108:11441:47321/2_0 -CTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAA -+ -ACAD>?AB?BACCDBCAADBDEEEDCDEFCEDEDFCCECGCBCGEGFFFF?ADAGDCEGDDFCGEFFBACFEFEEFGFDDBCDFDGBCBFCFDEEEBB -@H91H9ADXX140327:1:2102:19465:23489/2_1 -CACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATC -+ -69EEEFBAFBFABCCFFBEFFFDDEEHHDGH@FEFEFCAGGCDEEEBGEEBCGBCCGDFGCBBECFFEBDCDCEDEEEAABCCAEC@>>BB?@C -@H91H9ADXX140327:2:2212:12984:63612/1_1 -TCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTA -+ -B@;FFGGGDFDGBBBBGFCEEGEDDEFHGFHH@FFFFGCCGHDEFEFCGFFCCHCDEGEFGCCCFEGGFDFDFDGFGGFCCDEEDGECAAFFCEGEECDBB -@H91H9ADXX140327:1:1211:4032:84920/1_0 -TTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTC -+ -BBCEECCEFEEAGBADECEDEFFDFCCFEEGGGGDFFGEFFFFHECHDHCDDGGGGHGG@CHCHEFGFFDHDHEFGABEFDECFGFGDCBCDFGGDBAF@D -@H91H9ADXX140327:2:2212:12198:89759/2_0 -TGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGA -+ ->C;?AB@BBACDBCAABBDDCDDCDEFCDDDDEBBFCEABCGDBDEEF>@GBGCEDGEDGCGFECAACFEGDFFGFECB@DFGCBABFAECEB?= -@H91H9ADXX140327:2:2202:12128:77556/2_0 -TTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTG -+ -BBD@ACFDFDDFBAFBBFCFDFDBCAABGFGGHCGDFCCCCFFCDFEC?ADFGGDFF?EDEDEBAEFCDDCEAEDDAAEAABEBDD@@@BACBAACBA -@H91H9ADXX140327:2:1214:6650:14917/1_0 -TATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGG -+ -@ABECFDDEB@GBBGBGDFECEEG?GGGHHCHEEBDDDGGDEGGFCDDGFHEGG@FDEEFBCGGDEFEFCGEFCCGCCDGDEFCCCEDDGECFEEEFEDDB -@H91H9ADXX140327:1:2215:1194:96327/2_0 -AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGC -+ -ACBD@ABCC?BC?@CD@ADDEDAEA@DEBDCEEFCDBBFDEFFFFDEEGDEEEFGDCGDHBCCGGGFGGGACHCGEFFGECFBFDFFBABEDCADDDD diff --git a/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned_filtered.fastq b/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned_filtered.fastq deleted file mode 100644 index 73f493d..0000000 --- a/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned_filtered.fastq +++ /dev/null @@ -1,52 +0,0 @@ -@H91H9ADXX140327:2:2201:12089:90342/2_0 -TTCTTTCCCTGCTAAAATGTTTCACTGTCCTGAGGACTATTTATAGACAGCTCTAACATGATAACCCTCACTATGTGGAGAACATTGACAGAGTAACA -+ -AACC=>CC@EFGG@AFACFFBAEEEDC?@@CDBEEFFGCED?DDDGGGHDEFGEEFFEGDDGCHCDDGHHGHGH@CGBGEFFHECGBGDFFABBFDECFFFFC?@ -@H91H9ADXX140327:2:2104:16508:95685/2_1 -CCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAG -+ -EEEEAFDFBBBAFFDEFFECDEGFHEGG@FDEEGCBGGCDFDFBFEEC@FBCDGDFFCCCEDFFECDDECFDFEEAACCCBEC@@?CB@ACAA?CACA -@H91H9ADXX140327:2:1109:18752:52441/1_0 -TGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGA -+ -BFECFEDDFBCDFDGEEEBBFCCGBGEGEDEC=;FGHHGCGDGDCCCFFDDFFECDDGGGEGG@FEFEFCBGHCEDEFBGEECBGCCDGEFGCCCFDFDDB -@H91H9ADXX140327:2:1108:11441:47321/2_0 -CTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAA -+ -ACAD>?AB?BACCDBCAADBDEEEDCDEFCEDEDFCCECGCBCGEGFFFF?ADAGDCEGDDFCGEFFBACFEFEEFGFDDBCDFDGBCBFCFDEEEBB -@H91H9ADXX140327:1:2102:19465:23489/2_1 -CACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATC -+ -69EEEFBAFBFABCCFFBEFFFDDEEHHDGH@FEFEFCAGGCDEEEBGEEBCGBCCGDFGCBBECFFEBDCDCEDEEEAABCCAEC@>>BB?@C -@H91H9ADXX140327:2:2212:12984:63612/1_1 -TCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTA -+ -B@;FFGGGDFDGBBBBGFCEEGEDDEFHGFHH@FFFFGCCGHDEFEFCGFFCCHCDEGEFGCCCFEGGFDFDFDGFGGFCCDEEDGECAAFFCEGEECDBB -@H91H9ADXX140327:1:1211:4032:84920/1_0 -TTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTC -+ -BBCEECCEFEEAGBADECEDEFFDFCCFEEGGGGDFFGEFFFFHECHDHCDDGGGGHGG@CHCHEFGFFDHDHEFGABEFDECFGFGDCBCDFGGDBAF@D -@H91H9ADXX140327:2:2212:12198:89759/2_0 -TGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGA -+ ->C;?AB@BBACDBCAABBDDCDDCDEFCDDDDEBBFCEABCGDBDEEF>@GBGCEDGEDGCGFECAACFEGDFFGFECB@DFGCBABFAECEB?= -@H91H9ADXX140327:2:2202:12128:77556/2_0 -TTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTG -+ -BBD@ACFDFDDFBAFBBFCFDFDBCAABGFGGHCGDFCCCCFFCDFEC?ADFGGDFF?EDEDEBAEFCDDCEAEDDAAEAABEBDD@@@BACBAACBA -@H91H9ADXX140327:2:1214:6650:14917/1_0 -TATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGG -+ -@ABECFDDEB@GBBGBGDFECEEG?GGGHHCHEEBDDDGGDEGGFCDDGFHEGG@FDEEFBCGGDEFEFCGEFCCGCCDGDEFCCCEDDGECFEEEFEDDB -@H91H9ADXX140327:1:2215:1194:96327/2_0 -AAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGC -+ -ACBD@ABCC?BC?@CD@ADDEDAEA@DEBDCEEFCDBBFDEFFFFDEEGDEEEFGDCGDHBCCGGGFGGGACHCGEFFGECFBFDFFBABEDCADDDD diff --git a/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned_filtered.fastq_15mers_counts_0 b/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned_filtered.fastq_15mers_counts_0 deleted file mode 100644 index 0a55891..0000000 Binary files a/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned_filtered.fastq_15mers_counts_0 and /dev/null differ diff --git a/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned_filtered.fastq_15mers_dump b/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned_filtered.fastq_15mers_dump deleted file mode 100644 index 37e0511..0000000 --- a/example_data/example_targets/B2M/data/B2M_sv_reads_cleaned_filtered.fastq_15mers_dump +++ /dev/null @@ -1,209 +0,0 @@ -CCACTGAAAAAGATG 12 -ATGTTTCACTGTCCT 1 -CCCTGCTAAAATGTT 1 -CACTGAATTCACCCC 9 -TAACATGATAACCCT 1 -ACCCCCACTGAAAAA 11 -TATAGACAGCTCTAA 1 -TCTAACATGATAACC 1 -TAACCCTCACTATGT 1 -GATTGTCAGGGAATG 3 -CTGCATATTGGGATT 8 -AACATTGACAGAGTA 1 -AAAATGTTTCACTGT 1 -ATTGTCAGGGAATGT 3 -GATGAGTATGCCTGC 12 -CAGGGAATGTTCTTA 3 -TTTACAATCTGCATA 11 -TTTCACTGTCCTGAG 1 -CTGAATTCACCCCCA 9 -ACTGAATTCACCCCC 9 -GGAGAACATTGACAG 1 -TACAATCTGCATATT 10 -CATTGACAGAGTAAC 1 -CTAAAATGTTTCACT 1 -TGGTCTTTCTATCTC 3 -AGCTCTAACATGATA 1 -GAACATTGACAGAGT 1 -TTCTTAAAGATCAGA 2 -GGACTATTTATAGAC 1 -TCACCCCCACTGAAA 10 -AATGTTCTTAAAGAT 3 -AACCATGTGACTTTA 12 -AGATGAGTATGCCTG 12 -ACTATTTATAGACAG 1 -ACATGATAACCCTCA 1 -CACTATGTGGAGAAC 1 -AAAGATGAGTATGCC 12 -CTTGTACTACACTGA 8 -TGGGATTGTCAGGGA 5 -TGAGGACTATTTATA 1 -TGCTAAAATGTTTCA 1 -TGGAGAACATTGACA 1 -ACTACACTGAATTCA 9 -TGAATTCACCCCCAC 9 -GACTATTTATAGACA 1 -CCGTGTGAACCATGT 12 -TCTGCATATTGGGAT 9 -AATGTTTCACTGTCC 1 -TTTATAGACAGCTCT 1 -TTCTATCTCTTGTAC 6 -TCACTGTCCTGAGGA 1 -GAATTCACCCCCACT 9 -CATGATAACCCTCAC 1 -TACACTGAATTCACC 9 -ATCTCTTGTACTACA 7 -AGGGAATGTTCTTAA 3 -ATATTGGGATTGTCA 6 -TATCTCTTGTACTAC 7 -TCTTGTACTACACTG 7 -CAATCTGCATATTGG 10 -GTGACTTTACAATCT 12 -TCCTGAGGACTATTT 1 -TTCCCTGCTAAAATG 1 -CAGCTCTAACATGAT 1 -CCTCACTATGTGGAG 1 -ACAATCTGCATATTG 10 -ACACTGAATTCACCC 9 -AAAAAGATGAGTATG 12 -ACTGAAAAAGATGAG 12 -TCTATCTCTTGTACT 6 -TCAGGGAATGTTCTT 3 -GGGAATGTTCTTAAA 3 -GTGAACCATGTGACT 12 -ATGAGTATGCCTGCC 12 -CCCCCACTGAAAAAG 11 -AGACAGCTCTAACAT 1 -ATGTTCTTAAAGATC 3 -CTGGTCTTTCTATCT 2 -GTCCTGAGGACTATT 1 -GGAATGTTCTTAAAG 3 -TACTACACTGAATTC 9 -GAAAAAGATGAGTAT 12 -TATTGGGATTGTCAG 6 -GTATGCCTGCCGTGT 12 -TTCACTGTCCTGAGG 1 -AAGGACTGGTCTTTC 2 -TTCACCCCCACTGAA 9 -AAATGTTTCACTGTC 1 -GCCGTGTGAACCATG 12 -TGCCGTGTGAACCAT 12 -GGGATTGTCAGGGAA 4 -TCACTATGTGGAGAA 1 -AGGACTGGTCTTTCT 2 -AGTATGCCTGCCGTG 12 -TTGGGATTGTCAGGG 6 -TTTCCCTGCTAAAAT 1 -TTGTACTACACTGAA 8 -ACTATGTGGAGAACA 1 -ATAGACAGCTCTAAC 1 -TAGACAGCTCTAACA 1 -TCTCTTGTACTACAC 7 -TGTCCTGAGGACTAT 1 -TATTTATAGACAGCT 1 -GAGGACTATTTATAG 1 -ATGCCTGCCGTGTGA 12 -TCTTTCCCTGCTAAA 1 -GACTGGTCTTTCTAT 2 -GTGTGAACCATGTGA 12 -TGAGTATGCCTGCCG 12 -ACTGTCCTGAGGACT 1 -AGGACTATTTATAGA 1 -TTAAAGATCAGATTA 2 -CCTGCCGTGTGAACC 12 -CTGAAAAAGATGAGT 12 -AAAAGATGAGTATGC 12 -CTATCTCTTGTACTA 6 -CTTTCTATCTCTTGT 4 -ATCTGCATATTGGGA 10 -AAGATGAGTATGCCT 12 -TTACAATCTGCATAT 10 -CTGCCGTGTGAACCA 12 -CACTGAAAAAGATGA 12 -CTGAGGACTATTTAT 1 -CTAACATGATAACCC 1 -ATTCACCCCCACTGA 9 -ACTTTACAATCTGCA 11 -TGTGGAGAACATTGA 1 -TTCTTTCCCTGCTAA 1 -ATGATAACCCTCACT 1 -AATTCACCCCCACTG 9 -AACCCTCACTATGTG 1 -TGATAACCCTCACTA 1 -GTTCTTAAAGATCAG 2 -CCCTCACTATGTGGA 1 -TATGTGGAGAACATT 1 -GAATGTTCTTAAAGA 3 -CACTGTCCTGAGGAC 1 -CTTTCCCTGCTAAAA 1 -TGTTTCACTGTCCTG 1 -GAGAACATTGACAGA 1 -CTACACTGAATTCAC 9 -CTCTAACATGATAAC 1 -TATGCCTGCCGTGTG 12 -CTGCTAAAATGTTTC 1 -GCATATTGGGATTGT 7 -CCCACTGAAAAAGAT 12 -AGAACATTGACAGAG 1 -CTGTCCTGAGGACTA 1 -TTTCTATCTCTTGTA 5 -TGAAAAAGATGAGTA 12 -CCTGAGGACTATTTA 1 -TAAAATGTTTCACTG 1 -CTATGTGGAGAACAT 1 -CACCCCCACTGAAAA 11 -GGATTGTCAGGGAAT 3 -TCTTTCTATCTCTTG 4 -TGCCTGCCGTGTGAA 12 -AATCTGCATATTGGG 10 -GACAGCTCTAACATG 1 -CTTAAAGATCAGATT 2 -GTCTTTCTATCTCTT 4 -CTCACTATGTGGAGA 1 -GGACTGGTCTTTCTA 2 -CATATTGGGATTGTC 7 -CTCTTGTACTACACT 7 -TAAAGATCAGATTAG 1 -TTGTCAGGGAATGTT 3 -GTACTACACTGAATT 9 -ATGTGGAGAACATTG 1 -TGACTTTACAATCTG 12 -ATAACCCTCACTATG 1 -GTTTCACTGTCCTGA 1 -GCTAAAATGTTTCAC 1 -TGTACTACACTGAAT 9 -CCATGTGACTTTACA 12 -TGTTCTTAAAGATCA 2 -TGCATATTGGGATTG 8 -TGAACCATGTGACTT 12 -CGTGTGAACCATGTG 12 -GACTTTACAATCTGC 12 -GAGTATGCCTGCCGT 12 -TTATAGACAGCTCTA 1 -ACATTGACAGAGTAA 1 -GTCAGGGAATGTTCT 3 -ACCATGTGACTTTAC 12 -CTATTTATAGACAGC 1 -TCTTAAAGATCAGAT 2 -GTGGAGAACATTGAC 1 -GATAACCCTCACTAT 1 -GCCTGCCGTGTGAAC 12 -ATTGACAGAGTAACA 1 -ACAGCTCTAACATGA 1 -ACTGGTCTTTCTATC 2 -ATTTATAGACAGCTC 1 -GGTCTTTCTATCTCT 4 -AACATGATAACCCTC 1 -ATGTGACTTTACAAT 12 -GCTCTAACATGATAA 1 -CTTTACAATCTGCAT 11 -CATGTGACTTTACAA 12 -ATTGGGATTGTCAGG 6 -TGTGAACCATGTGAC 12 -CCCCACTGAAAAAGA 12 -TGTCAGGGAATGTTC 3 -CCTGCTAAAATGTTT 1 -TGTGACTTTACAATC 12 -TCCCTGCTAAAATGT 1 -ACCCTCACTATGTGG 1 -GAACCATGTGACTTT 12 diff --git a/example_data/example_targets/B2M/data/B2M_sv_sc_seqs.fa b/example_data/example_targets/B2M/data/B2M_sv_sc_seqs.fa deleted file mode 100644 index 38ddace..0000000 --- a/example_data/example_targets/B2M/data/B2M_sv_sc_seqs.fa +++ /dev/null @@ -1,30 +0,0 @@ ->H91H9ADXX140327:2:2201:12089:90342/2 -TTCTTTCCCTGCTAAAATGTTTCACTGTCCTG ->H91H9ADXX140327:2:2201:12089:90342/2 -GAGGACTATTTATAGACAGCTCTAACATGATAACCCTCACTATGTGGAGAACATTGACAGAGTAACA ->H91H9ADXX140327:2:2208:12019:73884/1 -GGTCTTTCTATCTCTTGTACTACACTGAATTCACCC ->H91H9ADXX140327:2:2208:12019:73884/1 -GAACCATGTGACTTTACAATCTGCATATTGGGAT ->H91H9ADXX140327:2:1209:17474:13884/1 -GAACCATGTGACTTTACAATCTGCATA ->H91H9ADXX140327:2:2104:16508:95685/2 -CCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTG ->H91H9ADXX140327:2:1109:18752:52441/1 -GAACCATGTGACTTTACAATCTGCATATTGGGA ->H91H9ADXX140327:2:1108:11441:47321/2 -GAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGGAA ->H91H9ADXX140327:1:2102:19465:23489/2 -CACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTG ->H91H9ADXX140327:2:2212:12984:63612/1 -TCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTG ->H91H9ADXX140327:1:1211:4032:84920/1 -GAACCATGTGACTTTACAATCTGCATATTGGGATTGTC ->H91H9ADXX140327:2:2212:12198:89759/2 -TCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTACAATCTGCATATTG ->H91H9ADXX140327:2:2202:12128:77556/2 -GAACCATGTGACTTTACAATCTGCATATTGGGATTG ->H91H9ADXX140327:2:1214:6650:14917/1 -GAACCATGTGACTTTACAATCTGCATATTGGGATTGTCAGGG ->H91H9ADXX140327:1:2215:1194:96327/2 -GAACCATGTGACTTTACAATCTGC diff --git a/example_data/example_targets/B2M/data/B2M_sv_sc_seqs.fa_15mers_counts_0 b/example_data/example_targets/B2M/data/B2M_sv_sc_seqs.fa_15mers_counts_0 deleted file mode 100644 index 1642308..0000000 Binary files a/example_data/example_targets/B2M/data/B2M_sv_sc_seqs.fa_15mers_counts_0 and /dev/null differ diff --git a/example_data/example_targets/B2M/data/B2M_sv_sc_seqs.fa_15mers_dump b/example_data/example_targets/B2M/data/B2M_sv_sc_seqs.fa_15mers_dump deleted file mode 100644 index d26f1ef..0000000 --- a/example_data/example_targets/B2M/data/B2M_sv_sc_seqs.fa_15mers_dump +++ /dev/null @@ -1,168 +0,0 @@ -CCACTGAAAAAGATG 4 -ATGTTTCACTGTCCT 1 -CCCTGCTAAAATGTT 1 -CACTGAATTCACCCC 1 -TAACATGATAACCCT 1 -ACCCCCACTGAAAAA 3 -TATAGACAGCTCTAA 1 -TCTAACATGATAACC 1 -TAACCCTCACTATGT 1 -CTGCATATTGGGATT 4 -AACATTGACAGAGTA 1 -AAAATGTTTCACTGT 1 -GATGAGTATGCCTGC 4 -TTTACAATCTGCATA 11 -CTGAATTCACCCCCA 1 -ACTGAATTCACCCCC 1 -GGAGAACATTGACAG 1 -TACAATCTGCATATT 10 -CATTGACAGAGTAAC 1 -CTAAAATGTTTCACT 1 -AGCTCTAACATGATA 1 -GAACATTGACAGAGT 1 -GGACTATTTATAGAC 1 -TCACCCCCACTGAAA 2 -AACCATGTGACTTTA 12 -AGATGAGTATGCCTG 4 -ACTATTTATAGACAG 1 -ACATGATAACCCTCA 1 -CACTATGTGGAGAAC 1 -AAAGATGAGTATGCC 4 -CTTGTACTACACTGA 2 -TGGGATTGTCAGGGA 1 -TGCTAAAATGTTTCA 1 -TGGAGAACATTGACA 1 -ACTACACTGAATTCA 2 -TGAATTCACCCCCAC 1 -GACTATTTATAGACA 1 -CCGTGTGAACCATGT 4 -TCTGCATATTGGGAT 5 -AATGTTTCACTGTCC 1 -TTTATAGACAGCTCT 1 -TTCTATCTCTTGTAC 1 -GAATTCACCCCCACT 1 -CATGATAACCCTCAC 1 -TACACTGAATTCACC 2 -ATCTCTTGTACTACA 1 -ATATTGGGATTGTCA 2 -TATCTCTTGTACTAC 1 -TCTTGTACTACACTG 2 -CAATCTGCATATTGG 6 -GTGACTTTACAATCT 12 -TTCCCTGCTAAAATG 1 -CAGCTCTAACATGAT 1 -CCTCACTATGTGGAG 1 -ACAATCTGCATATTG 10 -ACACTGAATTCACCC 2 -AAAAAGATGAGTATG 4 -ACTGAAAAAGATGAG 4 -TCTATCTCTTGTACT 1 -GTGAACCATGTGACT 4 -ATGAGTATGCCTGCC 4 -CCCCCACTGAAAAAG 3 -AGACAGCTCTAACAT 1 -TACTACACTGAATTC 2 -GAAAAAGATGAGTAT 4 -TATTGGGATTGTCAG 2 -GTATGCCTGCCGTGT 4 -TTCACCCCCACTGAA 1 -AAATGTTTCACTGTC 1 -GCCGTGTGAACCATG 4 -TGCCGTGTGAACCAT 4 -GGGATTGTCAGGGAA 1 -TCACTATGTGGAGAA 1 -AGTATGCCTGCCGTG 4 -TTGGGATTGTCAGGG 2 -TTTCCCTGCTAAAAT 1 -TTGTACTACACTGAA 2 -ACTATGTGGAGAACA 1 -ATAGACAGCTCTAAC 1 -TAGACAGCTCTAACA 1 -TCTCTTGTACTACAC 1 -TATTTATAGACAGCT 1 -GAGGACTATTTATAG 1 -ATGCCTGCCGTGTGA 4 -TCTTTCCCTGCTAAA 1 -GTGTGAACCATGTGA 4 -TGAGTATGCCTGCCG 4 -AGGACTATTTATAGA 1 -CCTGCCGTGTGAACC 4 -CTGAAAAAGATGAGT 4 -AAAAGATGAGTATGC 4 -CTATCTCTTGTACTA 1 -CTTTCTATCTCTTGT 1 -ATCTGCATATTGGGA 6 -AAGATGAGTATGCCT 4 -TTACAATCTGCATAT 10 -CTGCCGTGTGAACCA 4 -CACTGAAAAAGATGA 4 -CTAACATGATAACCC 1 -ATTCACCCCCACTGA 1 -ACTTTACAATCTGCA 11 -TGTGGAGAACATTGA 1 -TTCTTTCCCTGCTAA 1 -ATGATAACCCTCACT 1 -AATTCACCCCCACTG 1 -AACCCTCACTATGTG 1 -TGATAACCCTCACTA 1 -CCCTCACTATGTGGA 1 -TATGTGGAGAACATT 1 -CTTTCCCTGCTAAAA 1 -TGTTTCACTGTCCTG 1 -GAGAACATTGACAGA 1 -CTACACTGAATTCAC 2 -CTCTAACATGATAAC 1 -TATGCCTGCCGTGTG 4 -CTGCTAAAATGTTTC 1 -GCATATTGGGATTGT 3 -CCCACTGAAAAAGAT 4 -AGAACATTGACAGAG 1 -TTTCTATCTCTTGTA 1 -TGAAAAAGATGAGTA 4 -TAAAATGTTTCACTG 1 -CTATGTGGAGAACAT 1 -CACCCCCACTGAAAA 3 -TCTTTCTATCTCTTG 1 -TGCCTGCCGTGTGAA 4 -AATCTGCATATTGGG 6 -GACAGCTCTAACATG 1 -GTCTTTCTATCTCTT 1 -CTCACTATGTGGAGA 1 -CATATTGGGATTGTC 3 -CTCTTGTACTACACT 1 -GTACTACACTGAATT 2 -ATGTGGAGAACATTG 1 -TGACTTTACAATCTG 12 -ATAACCCTCACTATG 1 -GCTAAAATGTTTCAC 1 -TGTACTACACTGAAT 2 -CCATGTGACTTTACA 12 -TGCATATTGGGATTG 4 -TGAACCATGTGACTT 4 -CGTGTGAACCATGTG 4 -GACTTTACAATCTGC 12 -GAGTATGCCTGCCGT 4 -TTATAGACAGCTCTA 1 -ACATTGACAGAGTAA 1 -ACCATGTGACTTTAC 12 -CTATTTATAGACAGC 1 -GTGGAGAACATTGAC 1 -GATAACCCTCACTAT 1 -GCCTGCCGTGTGAAC 4 -ATTGACAGAGTAACA 1 -ACAGCTCTAACATGA 1 -ATTTATAGACAGCTC 1 -GGTCTTTCTATCTCT 1 -AACATGATAACCCTC 1 -ATGTGACTTTACAAT 12 -GCTCTAACATGATAA 1 -CTTTACAATCTGCAT 11 -CATGTGACTTTACAA 12 -ATTGGGATTGTCAGG 2 -TGTGAACCATGTGAC 4 -CCCCACTGAAAAAGA 4 -CCTGCTAAAATGTTT 1 -TGTGACTTTACAATC 12 -TCCCTGCTAAAATGT 1 -ACCCTCACTATGTGG 1 -GAACCATGTGACTTT 12 diff --git a/example_data/example_targets/B2M/kmers/B2M_sample_kmers.out b/example_data/example_targets/B2M/kmers/B2M_sample_kmers.out deleted file mode 100644 index 220723e..0000000 --- a/example_data/example_targets/B2M/kmers/B2M_sample_kmers.out +++ /dev/null @@ -1,24 +0,0 @@ -TGACTTTACAATCTG 12 -TAAAATGTTTCACTG 1 -CATGTGACTTTACAA 12 -TGCTAAAATGTTTCA 1 -CTTTACAATCTGCAT 11 -AAATGTTTCACTGTC 1 -GCTAAAATGTTTCAC 1 -ATGTGACTTTACAAT 12 -GACTTTACAATCTGC 12 -AAAATGTTTCACTGT 1 -AACCATGTGACTTTA 12 -TTACAATCTGCATAT 10 -TTTACAATCTGCATA 11 -TGTGACTTTACAATC 12 -CTAAAATGTTTCACT 1 -CCTGCTAAAATGTTT 1 -ATGTTTCACTGTCCT 1 -CCATGTGACTTTACA 12 -ACCATGTGACTTTAC 12 -GTGACTTTACAATCT 12 -AATGTTTCACTGTCC 1 -ACTTTACAATCTGCA 11 -CTGCTAAAATGTTTC 1 -TACAATCTGCATATT 10 diff --git a/example_data/example_targets/B2M/kmers/B2M_sample_kmers_merged.out b/example_data/example_targets/B2M/kmers/B2M_sample_kmers_merged.out deleted file mode 100644 index 092ede4..0000000 --- a/example_data/example_targets/B2M/kmers/B2M_sample_kmers_merged.out +++ /dev/null @@ -1,4 +0,0 @@ -contig1 14 -AACCATGTGACTTTA,ACCATGTGACTTTAC,CCATGTGACTTTACA,CATGTGACTTTACAA,ATGTGACTTTACAAT,TGTGACTTTACAATC,GTGACTTTACAATCT,TGACTTTACAATCTG,GACTTTACAATCTGC,ACTTTACAATCTGCA,CTTTACAATCTGCAT,TTTACAATCTGCATA,TTACAATCTGCATAT,TACAATCTGCATATT -@H91H9ADXX140327:1:2102:19465:23489/2_1,@H91H9ADXX140327:2:1209:17474:13884/1_0,@H91H9ADXX140327:2:2202:12128:77556/2_0,@H91H9ADXX140327:2:2212:12984:63612/1_1,@H91H9ADXX140327:2:2104:16508:95685/2_1,@H91H9ADXX140327:1:2215:1194:96327/2_0,@H91H9ADXX140327:2:1109:18752:52441/1_0,@H91H9ADXX140327:2:1214:6650:14917/1_0,@H91H9ADXX140327:2:2212:12198:89759/2_0,@H91H9ADXX140327:2:2208:12019:73884/1_0,@H91H9ADXX140327:1:1211:4032:84920/1_0,@H91H9ADXX140327:2:1108:11441:47321/2_0 - diff --git a/example_data/genes.bed b/example_data/genes.bed deleted file mode 100644 index 89f3aea..0000000 --- a/example_data/genes.bed +++ /dev/null @@ -1,3 +0,0 @@ -15 45003745 45003811 B2M exon -15 45007621 45007922 B2M exon -15 45008527 45008540 B2M exon diff --git a/example_data/ref_seq/B2M/.B2M_forward_refseq.fa b/example_data/ref_seq/B2M/.B2M_forward_refseq.fa deleted file mode 100644 index e69de29..0000000 diff --git a/example_data/ref_seq/B2M/.B2M_forward_refseq.fa_15mers_dump b/example_data/ref_seq/B2M/.B2M_forward_refseq.fa_15mers_dump deleted file mode 100644 index e69de29..0000000 diff --git a/example_data/ref_seq/B2M/.B2M_rep_mask.bed b/example_data/ref_seq/B2M/.B2M_rep_mask.bed deleted file mode 100644 index e69de29..0000000 diff --git a/example_data/ref_seq/B2M/.B2M_reverse_refseq.fa b/example_data/ref_seq/B2M/.B2M_reverse_refseq.fa deleted file mode 100644 index e69de29..0000000 diff --git a/example_data/ref_seq/B2M/.B2M_reverse_refseq.fa_15mers_dump b/example_data/ref_seq/B2M/.B2M_reverse_refseq.fa_15mers_dump deleted file mode 100644 index e69de29..0000000 diff --git a/example_data/ref_seq/B2M/.reference_fasta b/example_data/ref_seq/B2M/.reference_fasta deleted file mode 100644 index 211a7ce..0000000 --- a/example_data/ref_seq/B2M/.reference_fasta +++ /dev/null @@ -1 +0,0 @@ -/ifs/rcgroups/ccgd/rpa4/analysis/sv/130808/kmer_region_branch/data/ref/ucsc_hg19_dna.primary-assembly.fa \ No newline at end of file diff --git a/example_data/ref_seq/B2M/B2M_forward_refseq.fa b/example_data/ref_seq/B2M/B2M_forward_refseq.fa deleted file mode 100644 index 608bb80..0000000 --- a/example_data/ref_seq/B2M/B2M_forward_refseq.fa +++ /dev/null @@ -1,2 +0,0 @@ ->B2M -CTTCTTAAACATCACGAGACTCTAAGAAAAGGAAACTGAAAACGGGAAAGTCCCTCTCTCTAACCTGGCACTGCGTCGCTGGCTTGGAGACAGGTGACGGTCCCTGCGGGCCTTGTCCTGATTGGCTGGGCACGCGTTTAATATAAGTGGAGGCGTCGCGCTGGCGGGCATTCCTGAAGCTGACAGCATTCGGGCCGAGATGTCTCGCTCCGTGGCCTTAGCTGTGCTCGCGCTACTCTCTCTTTCTGGCCTGGAGGCTATCCAGCGTGAGTCTCTCCTACCCTCCCGCTCTGGTCCTTCCTCTCCCGCTCTGCACCCTCTGTGGCCCTCGCTGTGCTCTCTCGCTCCGTGACTTCCCTTCTCCAAGTTCTCCTTGGTGGCCCGCCGTGGGGCTAGTCCAGGGCTGGATCTCGGGGAAGCGGCGGGGTGGCCTGGGAGTGGGGAAGGGGGTGCGCACCCGGGACGCGCGCTACTTGCCCCTTTCGGCGGGGAGCAGGGGAGACCTTTGGCCTACGGCGACGGGAGGGTCGGGACAAAGTTTAGGGCGTCGATAAGCGTCAGAGCGCCGAGGTTGGGGGAGGGTTTCTCTTCCGCTCTTTCGCGGGGCCTCTGGCTCCCCCAGCGCAGCTGGAGTGGGGGACGGGTAGGCTCGTCCCAAAGGCGCGGCGCTGAGGTTTGTGAACGCGTGGAGGGGCGCTTGGGGTCTGGGGGAGGCGTCGCCCGGGTAAGCCTGTCTGCTGCGGCTCTGCTTCCCTTAGACTGGAGAGCTGTGGACTTCGTCTAGGCGCCCGCTAAGTTCGCATGTCCTAGCACCTCTGGGTCTATGTGGGGCCACACCGTGGGGAGGAAACAGCACGCGacgtttgtagaatgcttggctgtgatacaaagcggtttcgaataattaacttatttgttcccatcacaTGTCACTTTTAAAAAATTataagaactacccgttattgacatctttctgtgtgccaaggactttatgtgctttgcgtcatttaattttgaaaacagttatcttccgccatagataactactatggttatcttctgcctctcacagatgaagaaactaaggcaccgagattttaagaaacttaattacacaggggataaatggcagcaatcgagattgaagtcaagcCTAACCAGGGCTTTTGCGGGAGCGCATGCCTTTTGGCTGTAATTCGTGCATTTTTTTTTAAGAAAAACGCCTGCCTTCTGCGTGAGATTCTCCAGAGCAAACTGGGCGGCATGGGCCCTGTGGTCTTTTCGTACAGAGGGCTTCCTCTTTGGCTCTTTGCCTGGTTGTTTCCAAGATGTACTGTGCCTCTTACTTTCGGTTTTGAAAACATGAGGGggttgggcgtggtagcttacgcctgtaatcccagcacttagggaggccgaggcgggaggatggcttgaggtccgtagttgagaccagcctggccaacatggtgaagcctggtctctacaaaaaataataacaaaaattagccgggtgtggtggctcgtgcctgtggtcccagctgctccggtggctgaggcgggaggatctcttgagcttaggcttttgagctatcatggcgccagtgcactccagcgtgggcaacagagcgagaccctgtctctcaaaaaagaaaaaaaaaaaaaaagaaaGAGAAAAGAAAAGAAAGAAAGAAGTGAAGGTTTGTCAGTCAGGGGAGCTGTAAAACCATTAATAAAGATAATCCAAGATGGTTACCAAGACTGTTGAGGACGCCAGAGATCTTGAGCACTTTCTAAGTACCTGGCAATACACTAAGCGCGCTCACCTTTTCCTCTGGCAAAACATGATCGAAAGCAGAATGTTTTGATCATGAGAAAATTGCATTTAATTTGAATACAATTTATTTACAACATAAAGGATAATGTATATATCACCACCATTACTGGTATTTGCTGGTTATGTTAGATGTCATTTTAAAAAATAACAATCTGATATTTAAAAAAAAATCTTATTTTGAAAATTTCCAAAGTAATACATGCCATGCATAGACCATTTCTGGAAGATACCACAAGAAACATGTAATGATGATTGCCTCTGAAGGTCTATTTTCCTCCTCTGACCTGTGTGTGGGTTTTGTTTTTGTTTTACTGTGGGCATAAATTAATTTTTCAGTTaagttttggaagcttaaataactctccaaaagtcataaagccagtaactggttgagcccaaattcaaacccagcctgtctgaTACTTGTCCTCTTCTTAGAAAAGATTACAGTGATGCTCTCACAAAATCTTGCCGCCTTCCCTCAAACAGAGAGTTCCAGGCAGGATGAATCTGTGCTCTGATCCCTGAGGCATTTAATATGTTCTTATTATTAGAAGCTCAGATGCAAAGAGCTCTCTTAGCTTTTAATGTTATGAAAAAAATCAGGTCTTCATTAGATTCCCCAATCCACCTCTTGATGGGGCTAGTAGCCTTTCCTTAATGATAGGGTGTTTCTAGAGAGATATATCTGGTCAAGGTGGCCTGGTACTCCTCCTTCTCCCCACAGCCTCCCAGACAAGGAGGAGTAGCTGCCTTTTAGTGATCATGTACCCTGAATATAAGTGTATTTAAAAGAATTTTATACACATATATTTagtgtcaatctgtatatttagtagcactaacacttctcttcattttcaatgaaaaatatagagtttataatattttcttcccacttccccatggatggtctagtcatgcctctcattttggaaagtactgTTTCTGAAACATTAGGCAATATATTCCCAACCTGGCTAGTTTACAGCAATCACCTGTGGATGCTAATTAAAACGCAAATCCCACTGTCACATGCATTACTCCATTTGATCATAATGGAAAgtatgttctgtcccatttgccatagtcctcacctatccctgttgtattttatcgggtccaactcaaccatttaaggtatttgccagctcttgtatgcatttaggtttTGTTTCTTTGTTTTTTAGCTCATGAAATTAGGTACAAAGTCAGAGAGGGGTCTGGCATATAAAACCTCAGCAGAAATAAAGAGGTTTTGTTGTTTGGTAAGAACATACCTTGGGTTggttgggcacggtggctcgtgcctgtaatcccaacactttgggaggccaaggcaggctgatcacttgaagttgggagttcaagaccagcctggccaacatggtgaaatcccgtctctactgaaaatacaaaaattaaccaggcatggtggtgtgtgcctgtagtcccaggaatcacttgaacccaggaggcggaggttgcagtgagctgagatctcaccactgcacactgcactccagcctgggcaatggaatgagattccatcccaaaaaataaaaaaataaaaaaataaagaaCATACCTTGGGTTGATCCACTTAGGAACCTCAGataataacatctgccacgtatagagcaattgctatgtcccaggcactctactagacacttcatacaGTTTAGAAAATCAGATGGGTGTAGATCAAGGCAGGAGCAGGAACCAAAAAGAAAGGCATAAACATAAGAAAAAAAATGGAAGGGGTGGAAACAGAGTACAATAACATGAGTAATTTGATGGGGGCTATTATGAACTGAGAAATGAACTTTGAAAAGTATCTTGGGGCCAAATCATGTAGACTCTTGAGTGATGTGTTAAGGAATGCTATGAGTGCTGAGAGGGCATCAGAAGTCCTTGAGAGCCTCCAGAGAAAGGCTCTTAAAAATGCAGCGCAATCTCCAGTGACAGAAGATACTGCTAGAAATCTGCTAGAAAAAAAACAAAAAAGGCATGTATAGAGGAATTATGAGGGAAAGATACCAAGTCACGGTTTATTCTTCAAAATGGAGGTGGCTTGTTGGGAAGGTGGAAGCTCATTTGGCCAGAGTGGAAATGGAATTGGGAGAAATCGATGACCAAATGTAAACACTTGGTGCCTGATATAGCTTGACACCAAGTTAGCCCCAAGTGAAATACCCTGGCAATATTAATGTGTCTTTTCCCGATATTCCTCAGGTACTCCAAAGATTCAGGTTTACTCACGTCATCCAGCAGAGAATGGAAAGTCAAATTTCCTGAATTGCTATGTGTCTGGGTTTCATCCATCCGACATTGAAGTTGACTTACTGAAGAATGGAGAGAGAATTGAAAAAGTGGAGCATTCAGACTTGTCTTTCAGCAAGGACTGGTCTTTCTATCTCTTGTACTACACTGAATTCACCCCCACTGAAAAAGATGAGTATGCCTGCCGTGTGAACCATGTGACTTTGTCACAGCCCAAGATAGTTAAGTGGGGTAAGTCTTACATTCTTTTGTAAGCTGCTGAAAGTTGTGTATGAGTAGTCATATCATAAAGCTGCTTTGATATAAAAAAGgtctatggccatactaccctgaatgagtcccatcccatctgatATAAACAATCTGCATATTGGGATTGTCAGGGAATGTTCTTAAAGATCAGATTAGTGGCACCTGCTGAGATACTGATGCACAGCATGGTTTCTGAACCAGTAGTTTCCCTGCAGTTGAGCAGGGAGCAGCAGCAGCACTTGCACAAATACATATACACTCTTAACACTTCTTACCTACTGGCTTCCTCTAGCTTTTGTGGCAGCTTCAGGTATATTTAGCACTGAACGAACATCTCAAGAAGGTATAGGCCTTTGTTTGTAAGTCCTGCTGTCCTAGCATCCTATAATCCTGGACTTCTCCAGTACTTTCTGGCTGGATTGGTATCTGAGGCTAGTAGGAAGGGCTTGTTCCTGCTGGGTAGCTCTAAACAATGTATTCATGGGTAGGAACAGCAGCCTATTCTGCCAGCCTTATTTCTAACCATTTTAGACATTTGTTAGTACATGGTATTTTAAAAGTAAAACTTAATGTCTTCCTTTTTTTTCTCCACTGTCTTTTTCATAGATCGAGACATGTAAGCAGCATCATGGAGGTAAGTTTTTGACCTTGAGAAAATGTTTTTGTTTCACTGTCCTGAGGACTATTTATAGACAGCTCTAACATGATAACCCTCACTATGTGGAGAACATTGACAGAGTAACATTTTAGCAGGGAAAGAAGAATCCTACAGGGTCATGTTCCCTTCTCCTGTGGAGTGGCATGAAGAAGGTGTATGGCC diff --git a/example_data/ref_seq/B2M/B2M_forward_refseq.fa_15mers_counts_0 b/example_data/ref_seq/B2M/B2M_forward_refseq.fa_15mers_counts_0 deleted file mode 100644 index 85981b6..0000000 Binary files a/example_data/ref_seq/B2M/B2M_forward_refseq.fa_15mers_counts_0 and /dev/null differ diff --git a/example_data/ref_seq/B2M/B2M_forward_refseq.fa_15mers_dump b/example_data/ref_seq/B2M/B2M_forward_refseq.fa_15mers_dump deleted file mode 100644 index 38c56c0..0000000 --- a/example_data/ref_seq/B2M/B2M_forward_refseq.fa_15mers_dump +++ /dev/null @@ -1,5159 +0,0 @@ -AAAAAAAAAAAAAAA 1 -TTCGTGCATTTTTTT 1 -TTACCTACTGGCTTC 1 -TAAGTGGGGTAAGTC 1 -TAGACACTTCATACA 1 -TGCGCACCCGGGACG 1 -CGCTCTGCACCCTCT 1 -ATGGAGGTGGCTTGT 1 -CGTGGTAGCTTACGC 1 -TTCATAGATCGAGAC 1 -ATCCGACATTGAAGT 1 -CAATATTAATGTGTC 1 -TGTAGACTCTTGAGT 1 -CCACTGAAAAAGATG 1 -AAGTCAGAGAGGGGT 1 -AACACTTTGGGAGGC 1 -CCTCCTCTGACCTGT 1 -AAGGACTTTATGTGC 1 -ATCTGATATTTAAAA 1 -TCTTACTTTCGGTTT 1 -CTCAACCATTTAAGG 1 -GAAAGTTGTGTATGA 1 -GCGTGGTAGCTTACG 1 -AGGCATGGTGGTGTG 1 -CTGAGAAATGAACTT 1 -TGGAGGTGGCTTGTT 1 -GATCTCACCACTGCA 1 -TTGAAAACAGTTATC 1 -GGTAGCTTACGCCTG 1 -AAAGAAGTGAAGGTT 1 -TCAAAATGGAGGTGG 1 -TCATAATGGAAAGTA 1 -TGCCTGTGGTCCCAG 1 -CCCAACCTGGCTAGT 1 -CCCTGCAGTTGAGCA 1 -CACTGAACGAACATC 1 -TATATATCACCACCA 1 -CAAATCATGTAGACT 1 -AAAGTTGTGTATGAG 1 -ATATTCCTCAGGTAC 1 -CAGTTTAGAAAATCA 1 -ATTTAAAAAAAAATC 1 -ATATACACTCTTAAC 1 -AGACCCTGTCTCTCA 1 -GTAGATCAAGGCAGG 1 -TTCCCCAATCCACCT 1 -GGCGGGCATTCCTGA 1 -GCCAGCTCTTGTATG 1 -TCTTGCCGCCTTCCC 1 -CACTGAATTCACCCC 1 -TCCCCCAGCGCAGCT 1 -GTCACGGTTTATTCT 1 -AAAAAATAAAGAACA 1 -TAACATGATAACCCT 1 -GAGGCGTCGCGCTGG 1 -GGTTTATTCTTCAAA 1 -TGGCTAGTTTACAGC 1 -ACAGTTTAGAAAATC 1 -GAGGGCTTCCTCTTT 1 -AAATACAAAAATTAA 1 -ATCAGATGGGTGTAG 1 -TTCTGCCTCTCACAG 1 -GTAAACACTTGGTGC 1 -GGAAAGTCAAATTTC 1 -TCGTGCATTTTTTTT 1 -TACAGTGATGCTCTC 1 -TACACTCTTAACACT 1 -GGTCCCTGCGGGCCT 1 -AGTAATACATGCCAT 1 -CCTACAGGGTCATGT 1 -AATATAGAGTTTATA 1 -AAAAAAATGGAAGGG 1 -ACCCCCACTGAAAAA 1 -AGACTTGTCTTTCAG 1 -TCAGAGCGCCGAGGT 1 -GTTTTTTAGCTCATG 1 -AATACAAAAATTAAC 1 -CAAAGAGCTCTCTTA 1 -GGAAGCTTAAATAAC 1 -CGGGCATTCCTGAAG 1 -GTCGCGCTGGCGGGC 1 -CTGCTAGAAATCTGC 1 -GGGCGTCGATAAGCG 1 -GAAAAGTATCTTGGG 1 -GACATGTAAGCAGCA 1 -ATGAACTGAGAAATG 1 -CCTTTTTTTTCTCCA 1 -TTAACTTATTTGTTC 1 -CAGGAACCAAAAAGA 1 -CCAAGACTGTTGAGG 1 -ATGTATTCATGGGTA 1 -AGGTTTGTGAACGCG 1 -CAAAGTAATACATGC 1 -GCCGTGGGGCTAGTC 1 -CGTTTAATATAAGTG 1 -TAGCCTTTCCTTAAT 1 -ACTAACACTTCTCTT 1 -CAAAGATTCAGGTTT 1 -GGTGGCTCGTGCCTG 2 -ATTTTCTTCCCACTT 1 -ACAGAGAGTTCCAGG 1 -CATACAGTTTAGAAA 1 -CCACCATTACTGGTA 1 -TTGATGGGGGCTATT 1 -TGGCTTGTTGGGAAG 1 -GGGCATAAATTAATT 1 -ATCATAAAGCTGCTT 1 -GGCATAAATTAATTT 1 -TGATCGAAAGCAGAA 1 -TATAGACAGCTCTAA 1 -ATGAGGGGGTTGGGC 1 -GCATAAATTAATTTT 1 -TGGTGGTGTGTGCCT 1 -ACGCGCGCTACTTGC 1 -TGGTAGCTTACGCCT 1 -ATCCCAGCACTTAGG 1 -TTCTGGCTGGATTGG 1 -GATGGTCTAGTCATG 1 -AGAGCTGTGGACTTC 1 -CTTTTGTAAGCTGCT 1 -AGGGCTGGATCTCGG 1 -GAAATTAGGTACAAA 1 -TGAGGTTTGTGAACG 1 -AATCAGATGGGTGTA 1 -TGAAGCTGACAGCAT 1 -ACATGTCACTTTTAA 1 -TAATCCTGGACTTCT 1 -TCACCACTGCACACT 1 -GGGGAGGCGTCGCCC 1 -CGCGCTACTTGCCCC 1 -TCTGGCATATAAAAC 1 -ACCAGGCATGGTGGT 1 -GGATGAATCTGTGCT 1 -GAAGAAACTAAGGCA 1 -GCCTCTCATTTTGGA 1 -ATGGGCCCTGTGGTC 1 -GTGGATGCTAATTAA 1 -TAGAAATCTGCTAGA 1 -GATAAATGGCAGCAA 1 -TATAGAGGAATTATG 1 -AATCTTGCCGCCTTC 1 -TGGCCTGGTACTCCT 1 -TCTTCATTAGATTCC 1 -ACTTGGTGCCTGATA 1 -TATCATAAAGCTGCT 1 -AGTTTAGGGCGTCGA 1 -GTTATCTTCTGCCTC 1 -TCTGCTAGAAAAAAA 1 -GTCATGCCTCTCATT 1 -CTTTCGGCGGGGAGC 1 -TTTTGAAAACATGAG 1 -GGAGCGCATGCCTTT 1 -ACTCTACTAGACACT 1 -CCTTAGCTGTGCTCG 1 -TTTTCTTCCCACTTC 1 -GCATTCGGGCCGAGA 1 -TGGAGAGAGAATTGA 1 -ATGGCCATACTACCC 1 -TTAGATGTCATTTTA 1 -TATTATTAGAAGCTC 1 -TGTATTCATGGGTAG 1 -TGAGTGCTGAGAGGG 1 -GCCTTTTAGTGATCA 1 -CCTTCTCCAAGTTCT 1 -AGCAATTGCTATGTC 1 -TAAAACTTAATGTCT 1 -GTATTCATGGGTAGG 1 -ATACATATACACTCT 1 -TTTCTAACCATTTTA 1 -AGTTCCAGGCAGGAT 1 -GGCAGGCTGATCACT 1 -GGAGTGGCATGAAGA 1 -TAGCTCTAAACAATG 1 -TGGTCCCAGCTGCTC 1 -GAGAGTTCCAGGCAG 1 -AAAGTATCTTGGGGC 1 -TAGTTTACAGCAATC 1 -GCCGCCTTCCCTCAA 1 -ACAATCTGATATTTA 1 -TCTAACATGATAACC 1 -ATGGGGGCTATTATG 1 -TATGTGTCTGGGTTT 1 -GGGTCCAACTCAACC 1 -GGCGCTTGGGGTCTG 1 -GCATGGTGGTGTGTG 1 -AAGTCAAATTTCCTG 1 -TTTAATATGTTCTTA 1 -TTTCCTTAATGATAG 1 -TTTAGCACTGAACGA 1 -TTTGAGCTATCATGG 1 -TACGGCGACGGGAGG 1 -GCCTCTTACTTTCGG 1 -GCTTGTTGGGAAGGT 1 -CTTGGGTTGATCCAC 1 -AGGTGGAAGCTCATT 1 -ATCTTATTTTGAAAA 1 -AGCTATCATGGCGCC 1 -ACACTTGGTGCCTGA 1 -ATTTTATACACATAT 1 -CCAACACTTTGGGAG 1 -TACAGCAATCACCTG 1 -CCAGGGCTTTTGCGG 1 -GGTTGCAGTGAGCTG 1 -GAGGCCAAGGCAGGC 1 -CTTTCCTTAATGATA 1 -GAAAAAAAAAAAAAA 1 -GTCCAGGGCTGGATC 1 -AACAGAGTACAATAA 1 -GACCTTGAGAAAATG 1 -GTGTCTTTTCCCGAT 1 -CAGAGAGGGGTCTGG 1 -TCGCTCCGTGGCCTT 1 -GAGGAGTAGCTGCCT 1 -CTGTGGACTTCGTCT 1 -ATAGGGTGTTTCTAG 1 -GTTATTGACATCTTT 1 -TAACCCTCACTATGT 1 -GCTTTTGCGGGAGCG 1 -AGTACCTGGCAATAC 1 -TCAGATGGGTGTAGA 1 -TTTCCAAGATGTACT 1 -AAATTCAAACCCAGC 1 -AGTCCTTGAGAGCCT 1 -GTACTCCTCCTTCTC 1 -TCGATGACCAAATGT 1 -GCTGGCGGGCATTCC 1 -TGTGGTCCCAGCTGC 1 -GAAAACAGTTATCTT 1 -AAGTTGACTTACTGA 1 -CCGTTATTGACATCT 1 -TTCCTCTAGCTTTTG 1 -AGAGGGGTCTGGCAT 1 -AGCAGGGGAGACCTT 1 -CCAAATCATGTAGAC 1 -CAGGTGACGGTCCCT 1 -ATGGATGGTCTAGTC 1 -GGAGGATCTCTTGAG 1 -GTTTCTGAAACATTA 1 -GTGTATGAGTAGTCA 1 -AGAAAATCAGATGGG 1 -TTTACAGCAATCACC 1 -GAAACTTAATTACAC 1 -GATCAGATTAGTGGC 1 -TTCCTGAAGCTGACA 1 -GATTGTCAGGGAATG 1 -TGGGTAGGAACAGCA 1 -AAGCTGCTTTGATAT 1 -GGTTTCTCTTCCGCT 1 -GCCAAGGACTTTATG 1 -CTGCATATTGGGATT 1 -AGGAACCAAAAAGAA 1 -ATGTAAGCAGCATCA 1 -ACCCTGAATGAGTCC 1 -CAAAAGTCATAAAGC 1 -ATAATTAACTTATTT 1 -CCAGAGCAAACTGGG 1 -CCTCAGGTACTCCAA 1 -TCTGGGTTTCATCCA 1 -GCATAAACATAAGAA 1 -TCCACTTAGGAACCT 1 -ATTATGAGGGAAAGA 1 -GTTCCCATCACATGT 1 -CTTCCTCTCCCGCTC 1 -TGAAAACAGTTATCT 1 -CCGCTAAGTTCGCAT 1 -TCTGACCTGTGTGTG 1 -ATCCAGCAGAGAATG 1 -CTATGTGGGGCCACA 1 -CAATACACTAAGCGC 1 -GCTAATTAAAACGCA 1 -CGGGAGCGCATGCCT 1 -ACACTTCTTACCTAC 1 -ACGCGTTTAATATAA 1 -TTGTCTTTCAGCAAG 1 -GAACATACCTTGGGT 2 -CAAGGACTGGTCTTT 1 -GAGGGGCGCTTGGGG 1 -GGGTGTTTCTAGAGA 1 -CTTCTGCGTGAGATT 1 -GTAGCTTACGCCTGT 1 -AAAAAAAAAAGAAAG 1 -AACATTGACAGAGTA 1 -GATCAAGGCAGGAGC 1 -CCGAGGCGGGAGGAT 1 -GGGAAAGTCCCTCTC 1 -GCCCAAATTCAAACC 1 -TTGAGCTATCATGGC 1 -AGAAAAAAAACAAAA 1 -CGCTGTGCTCTCTCG 1 -TGAGAAATGAACTTT 1 -TCTAAACAATGTATT 1 -GTCTGGGTTTCATCC 1 -CAAGACTGTTGAGGA 1 -TCCGTAGTTGAGACC 1 -TATTCCCAACCTGGC 1 -GACATCTTTCTGTGT 1 -GAGGCGGGAGGATCT 1 -TCAAGACCAGCCTGG 1 -CTGAATATAAGTGTA 1 -AGGTTTGTCAGTCAG 1 -CAATCTGATATTTAA 1 -CACGAGACTCTAAGA 1 -TAAGCAGCATCATGG 1 -GAAGTGAAGGTTTGT 1 -TACGCCTGTAATCCC 1 -TTCCAGGCAGGATGA 1 -ACAAAGCGGTTTCGA 1 -GTATTTTAAAAGTAA 1 -CATTTAATATGTTCT 1 -CCAAGATGGTTACCA 1 -TGGTTATCTTCTGCC 1 -GCTTGAGGTCCGTAG 1 -GTTTTGATCATGAGA 1 -TGTTCCCATCACATG 1 -CAAGATGTACTGTGC 1 -GCGCCGAGGTTGGGG 1 -GTCTAGGCGCCCGCT 1 -AATATAAGTGTATTT 1 -AGGAACAGCAGCCTA 1 -GGGAGGCGTCGCCCG 1 -TATTTGCCAGCTCTT 1 -AATCCTACAGGGTCA 1 -ATTGTCAGGGAATGT 1 -AGAGTACAATAACAT 1 -TAAGCGTCAGAGCGC 1 -ATCTGGTCAAGGTGG 1 -CGTGGGCAACAGAGC 1 -GGCATGAAGAAGGTG 1 -ATTTGCCATAGTCCT 1 -CAGGTTTACTCACGT 1 -ACAGCCTCCCAGACA 1 -GATGAGTATGCCTGC 1 -CAGGGAATGTTCTTA 1 -AATCTGATATTTAAA 1 -AGCCTGTCTGCTGCG 1 -TTTGTTTGTAAGTCC 1 -ACCATTTCTGGAAGA 1 -GACAGGTGACGGTCC 1 -AGAATTTTATACACA 1 -TTTTACTGTGGGCAT 1 -TTTCCTCCTCTGACC 1 -AAAAGATTACAGTGA 1 -ACTCTCCAAAAGTCA 1 -CCCTTTCGGCGGGGA 1 -GGGAAGCGGCGGGGT 1 -AGGCAGGAGCAGGAA 1 -TACATATACACTCTT 1 -GAAGGGGTGGAAACA 1 -CCTCCCAGACAAGGA 1 -GCTCACCTTTTCCTC 1 -TTTGGTAAGAACATA 1 -CCAACATGGTGAAGC 1 -ACAAAAATTAACCAG 1 -CGGTGGCTGAGGCGG 1 -GAGTGCTGAGAGGGC 1 -GCTCTTTGCCTGGTT 1 -CTGGCAATACACTAA 1 -TCAAGAAGGTATAGG 1 -TTTGTCACAGCCCAA 1 -TCATAAAGCTGCTTT 1 -AGATCTCACCACTGC 1 -GCCTGTCTGCTGCGG 1 -CTGTGTGTGGGTTTT 1 -AATTAACTTATTTGT 1 -AGAAGGTATAGGCCT 1 -TAAGGAATGCTATGA 1 -AGGGGGTGCGCACCC 1 -GCTTCCCTTAGACTG 1 -TGGAAAGTATGTTCT 1 -AATTAGCCGGGTGTG 1 -ACATGCATTACTCCA 1 -CTCTGGGTCTATGTG 1 -AGCGAGACCCTGTCT 1 -AGTGTATTTAAAAGA 1 -TGGCCCGCCGTGGGG 1 -AATATATTCCCAACC 1 -TAATCCCAACACTTT 1 -GCTTACGCCTGTAAT 1 -TGTGGCAGCTTCAGG 1 -AGGCATTTAATATGT 1 -AACTGAAAACGGGAA 1 -GCTGCCTTTTAGTGA 1 -TCCCAACACTTTGGG 1 -GCTAGTTTACAGCAA 1 -AAAAACAAAAAAGGC 1 -AATAATAACAAAAAT 1 -GTTTCTAGAGAGATA 1 -ACCAGGGCTTTTGCG 1 -ACCTGGCAATACACT 1 -CTTTCGCGGGGCCTC 1 -TTAGTAGCACTAACA 1 -GGCTCGTGCCTGTAA 1 -ACATGAGTAATTTGA 1 -ACCTTTGGCCTACGG 1 -TGATATTTAAAAAAA 1 -AAGCCTGTCTGCTGC 1 -GCAGGGGAGACCTTT 1 -GTGGTGGCTCGTGCC 1 -GCTGGATTGGTATCT 1 -AGAAATAAAGAGGTT 1 -ACTGCTAGAAATCTG 1 -ATTTCTGGAAGATAC 1 -GATAACTACTATGGT 1 -ACAGAGCGAGACCCT 1 -AGGAAGGGCTTGTTC 1 -TTACTGAAGAATGGA 1 -GCCGGGTGTGGTGGC 1 -TTGAGGTCCGTAGTT 1 -CAAATTCAAACCCAG 1 -CAGGCTGATCACTTG 1 -TTGAATACAATTTAT 1 -ATCCCATCTGATATA 1 -GCTGGTTATGTTAGA 1 -CATTTAATTTGAATA 1 -CGCAGCTGGAGTGGG 1 -CAGTTATCTTCCGCC 1 -TGGCTGGGCACGCGT 1 -TCGCGCTACTCTCTC 1 -AGATCAAGGCAGGAG 1 -CACTTCCCCATGGAT 1 -AGTTCTCCTTGGTGG 1 -TATTTGTTCCCATCA 1 -AAATTTCCAAAGTAA 1 -TGACCTTGAGAAAAT 1 -ACGCCTGCCTTCTGC 1 -TCCTCTGGCAAAACA 1 -GTACTTTCTGGCTGG 1 -GCAGCAATCGAGATT 1 -AAGAAGGTATAGGCC 1 -CATGCCTCTCATTTT 1 -CCTGGCTAGTTTACA 1 -TGTAAGCTGCTGAAA 1 -TTTCACTGTCCTGAG 1 -TAGTCCAGGGCTGGA 1 -AAACTGGGCGGCATG 1 -AACCATGTGACTTTG 1 -AGACATTTGTTAGTA 1 -GCGGGGCCTCTGGCT 1 -CGTGGGGCTAGTCCA 1 -AGGGCTTCCTCTTTG 1 -CTTCCCACTTCCCCA 1 -GTTATGAAAAAAATC 1 -GCCGAGGTTGGGGGA 1 -CCTGTCTCTCAAAAA 1 -AAAAGTGGAGCATTC 1 -CTAAGTTCGCATGTC 1 -GTCGATAAGCGTCAG 1 -CTGAATTCACCCCCA 1 -CTTTTTCATAGATCG 1 -AAACAAAAAAGGCAT 1 -ACGTTTGTAGAATGC 1 -AAGAAAAAAAAAAAA 1 -GCCTTAGCTGTGCTC 1 -AAATGTTTTTGTTTC 1 -ATTTTCAATGAAAAA 1 -AACATGATCGAAAGC 1 -ATCTGATATAAACAA 1 -GGCACTGCGTCGCTG 1 -CTATTTTCCTCCTCT 1 -TGTAGATCAAGGCAG 1 -TCCCAACCTGGCTAG 1 -CCGTGACTTCCCTTC 1 -TAATACATGCCATGC 1 -TTTTGTAAGCTGCTG 1 -TCTGATATAAACAAT 1 -GGAGTGGGGAAGGGG 1 -GAAGAATGGAGAGAG 1 -ATTTAGCACTGAACG 1 -TTATGAAAAAAATCA 1 -TCTAAGAAAAGGAAA 1 -GCTCTGCACCCTCTG 1 -AAAGTTTAGGGCGTC 1 -ATACCTTGGGTTGGT 1 -ATTGCCTCTGAAGGT 1 -GGGAGCTGTAAAACC 1 -ACTGAATTCACCCCC 1 -GGAAAGATACCAAGT 1 -GGAGAACATTGACAG 1 -TTGAAGTCAAGCCTA 1 -TTCCATCCCAAAAAA 1 -TGGCTGTGATACAAA 1 -GAGTAACATTTTAGC 1 -AGGCCAAGGCAGGCT 1 -CTGCCAGCCTTATTT 1 -CGAGGTTGGGGGAGG 1 -CATGTAGACTCTTGA 1 -GAACATCTCAAGAAG 1 -ACTTTGTCACAGCCC 1 -ATCACATGTCACTTT 1 -GGGAGGAAACAGCAC 1 -CTTTGCGTCATTTAA 1 -TCTCACCACTGCACA 1 -AGTCATGCCTCTCAT 1 -CATTGACAGAGTAAC 1 -GAATCCTACAGGGTC 1 -TATTTAGTGTCAATC 1 -AGTAGCTGCCTTTTA 1 -CTGAAGGTCTATTTT 1 -AGGCATGTATAGAGG 1 -TTGTTAGTACATGGT 1 -TGAAATCCCGTCTCT 1 -TGGTTATGTTAGATG 1 -AACATCACGAGACTC 1 -AGTGCTGAGAGGGCA 1 -TCATACAGTTTAGAA 1 -GATGGTTACCAAGAC 1 -TAAGTTCGCATGTCC 1 -ACCCAGCCTGTCTGA 1 -ACAAAAAAGGCATGT 1 -GTATCTTGGGGCCAA 1 -TGGTCTTTCTATCTC 1 -GACGGGAGGGTCGGG 1 -CGTAGTTGAGACCAG 1 -GGCTTTTGAGCTATC 1 -TCTGGGGGAGGCGTC 1 -GAAGGGCTTGTTCCT 1 -GTGGGTTTTGTTTTT 1 -TAGAAAATCAGATGG 1 -GTCAAATTTCCTGAA 1 -TAGCTGCCTTTTAGT 1 -CGAACATCTCAAGAA 1 -TTTCTGGAAGATACC 1 -TTCTGGAAGATACCA 1 -CAGTTAAGTTTTGGA 1 -GCATGAAGAAGGTGT 1 -GGTAAGAACATACCT 1 -CGTGGAGGGGCGCTT 1 -GGCCAGAGTGGAAAT 1 -AGCTGTGGACTTCGT 1 -CATCTGCCACGTATA 1 -CATAATGGAAAGTAT 1 -CTGGGCAATGGAATG 1 -AAAAAAAGAAAGAGA 1 -TAATGTGTCTTTTCC 1 -AGATACCACAAGAAA 1 -CAAAGTTTAGGGCGT 1 -GTTATCTTCCGCCAT 1 -AAACTTAATTACACA 1 -GTCTTTTCCCGATAT 1 -AAATGCAGCGCAATC 1 -GAAAATACAAAAATT 1 -TGTAAACACTTGGTG 1 -CAGGCACTCTACTAG 1 -CTCAGATGCAAAGAG 1 -GATACTGCTAGAAAT 1 -CTCTCTAACCTGGCA 1 -GCAAACTGGGCGGCA 1 -CTTGAAGTTGGGAGT 1 -GTGTCTGGGTTTCAT 1 -CATACCTTGGGTTGA 1 -CTTACCTACTGGCTT 1 -CTCTCTCTAACCTGG 1 -ACATGGTGAAATCCC 1 -GAAAGTCAAATTTCC 1 -CTGTGGGCATAAATT 1 -AGAGGGCATCAGAAG 1 -GTGAAGGTTTGTCAG 1 -TGTCACTTTTAAAAA 1 -CCAAGTCACGGTTTA 1 -GGAATCACTTGAACC 1 -TTAGTGTCAATCTGT 1 -CTCTCAAAAAAGAAA 1 -GCTTTTGAGCTATCA 1 -GCTTGACACCAAGTT 1 -GGTGTGTGCCTGTAG 1 -TTTTTCATAGATCGA 1 -CCAGGCAGGATGAAT 1 -ACAATAACATGAGTA 1 -TTTCGCGGGGCCTCT 1 -TGGGAGGCCAAGGCA 1 -CGCCAGTGCACTCCA 1 -GTTTCGAATAATTAA 1 -TGGCTTCCTCTAGCT 1 -CATGCATTACTCCAT 1 -AGCTCTAACATGATA 1 -CTGGTCAAGGTGGCC 1 -GGAGCAGGAACCAAA 1 -GAACATTGACAGAGT 1 -TTCTTAAAGATCAGA 1 -AATTTCCTGAATTGC 1 -CTAGCATCCTATAAT 1 -TCCTCTGACCTGTGT 1 -ACTGGGCGGCATGGG 1 -CTTTCTGGCCTGGAG 1 -CACGCGTTTAATATA 1 -AAGCAGAATGTTTTG 1 -AATTTGAATACAATT 1 -AAAAATTATAAGAAC 1 -TACACATATATTTAG 1 -CTTTTAGTGATCATG 1 -TATTATGAACTGAGA 1 -TCCATCCGACATTGA 1 -AGTGACAGAAGATAC 1 -TTGAGCTTAGGCTTT 1 -AAAAAAAAAGAAAGA 1 -TTCCTGAATTGCTAT 1 -CGGGTCCAACTCAAC 1 -CAGCGTGAGTCTCTC 1 -TGGGTTGGTTGGGCA 1 -GGAGGTAAGTTTTTG 1 -TCATGTTCCCTTCTC 1 -TCAGTTAAGTTTTGG 1 -GTTGGGCGTGGTAGC 1 -GAAAGATACCAAGTC 1 -TGGGCGTGGTAGCTT 1 -TACATTCTTTTGTAA 1 -CTCCTTCTCCCCACA 1 -AAGTTGGGAGTTCAA 1 -AGGTCCGTAGTTGAG 1 -CAGTGAGCTGAGATC 1 -TCTCTCTAACCTGGC 1 -ACTCCATTTGATCAT 1 -CTGCACCCTCTGTGG 1 -CCAGCGCAGCTGGAG 1 -ATTTTAAGAAACTTA 1 -CCCACTGTCACATGC 1 -GGTAGGAACAGCAGC 1 -TTTTGGCTGTAATTC 1 -CCTCAGCAGAAATAA 1 -GCTCATTTGGCCAGA 1 -ATTAATAAAGATAAT 1 -TAATAAAGATAATCC 1 -GGGGAGGGTTTCTCT 1 -CCCCTTTCGGCGGGG 1 -ATTTCTAACCATTTT 1 -TAACAATCTGATATT 1 -AATCCACCTCTTGAT 1 -TGGTTGTTTCCAAGA 1 -TGCTCCGGTGGCTGA 1 -GGGAGGATCTCTTGA 1 -AAAATACAAAAATTA 1 -AGCATTCAGACTTGT 1 -ACATGTAATGATGAT 1 -ATAAGTGGAGGCGTC 1 -CTCAAGAAGGTATAG 1 -TAGTCCCAGGAATCA 1 -CTCTGCACCCTCTGT 1 -GCGGGCATTCCTGAA 1 -GTCATGTTCCCTTCT 1 -GCTATGTGTCTGGGT 1 -TCTTAGAAAAGATTA 1 -CCAAAAGTCATAAAG 1 -CCAGCAGAGAATGGA 1 -ACCACTGCACACTGC 1 -AAAAACGCCTGCCTT 1 -GCCCGCCGTGGGGCT 1 -TTTTTCTCCACTGTC 1 -GAATGCTATGAGTGC 1 -TGCTAATTAAAACGC 1 -CATGTGACTTTGTCA 1 -TTGATGGGGCTAGTA 1 -CCACAAGAAACATGT 1 -CGTGGCCTTAGCTGT 1 -ATTGAAGTCAAGCCT 1 -AGTTTCCCTGCAGTT 1 -CACTTCTCTTCATTT 1 -TACAGAGGGCTTCCT 1 -CCGCTCTTTCGCGGG 1 -TCCTACCCTCCCGCT 1 -GTTTAATATAAGTGG 1 -GGACTATTTATAGAC 1 -GACAGAGTAACATTT 1 -GTTTACAGCAATCAC 1 -GTACCCTGAATATAA 1 -CTGATCCCTGAGGCA 1 -AGGCGGAGGTTGCAG 1 -GCTGTCCTAGCATCC 1 -AAAATCAGGTCTTCA 1 -CATAAAGGATAATGT 1 -AAAAAATAAAAAAAT 2 -AAGGCGCGGCGCTGA 1 -TCCTCTAGCTTTTGT 1 -AAAAAAGGCATGTAT 1 -TCACCCCCACTGAAA 1 -AGCTGTGCTCGCGCT 1 -ATCGAAAGCAGAATG 1 -TGTTATGAAAAAAAT 1 -AGAAGTGAAGGTTTG 1 -CATAAACATAAGAAA 1 -GTGTGCCTGTAGTCC 1 -TCCTGCTGTCCTAGC 1 -AATTAACCAGGCATG 1 -GTCCCATCCCATCTG 1 -TATTTACAACATAAA 1 -TCTCCTTGGTGGCCC 1 -CCTCTTCTTAGAAAA 1 -CTGAATGAGTCCCAT 1 -CCTGTAGTCCCAGGA 1 -AGCTGGAGTGGGGGA 1 -TTAAAAATGCAGCGC 1 -TTCTCCCCACAGCCT 1 -GCCAGCCTTATTTCT 1 -AGTTAAGTGGGGTAA 1 -GTAAGTCCTGCTGTC 1 -GGAGAGAGAATTGAA 1 -AGCCTGGTCTCTACA 1 -GAATCTGTGCTCTGA 1 -ATCTCGGGGAAGCGG 1 -AAGTCCCTCTCTCTA 1 -CTGTAATTCGTGCAT 1 -ACTTCTCTTCATTTT 1 -CTCCAAAGATTCAGG 1 -GGTGCCTGATATAGC 1 -ATTTTTTTTTAAGAA 1 -ATTGAAGTTGACTTA 1 -TCAGAGAGGGGTCTG 1 -TTGAAAATTTCCAAA 1 -GGTTTGTGAACGCGT 1 -AAGTCAAGCCTAACC 1 -AGAATCCTACAGGGT 1 -TTGCATTTAATTTGA 1 -GACGGGTAGGCTCGT 1 -AGGGGGTTGGGCGTG 1 -GTGGGGCCACACCGT 1 -CTACGGCGACGGGAG 1 -GGAGCATTCAGACTT 1 -CCACACCGTGGGGAG 1 -GCAGCACTTGCACAA 1 -AGGCGGGAGGATGGC 1 -TCTCTTCATTTTCAA 1 -AATTAAAACGCAAAT 1 -AGATGTCATTTTAAA 1 -AACTTTGAAAAGTAT 1 -AAATTAGCCGGGTGT 1 -GTTTCTCTTCCGCTC 1 -CATCTTTCTGTGTGC 1 -TTTAAGAAAAACGCC 1 -AGTCAAGCCTAACCA 1 -AGGTCTATTTTCCTC 1 -CTTGAGGTCCGTAGT 1 -TAACCAGGCATGGTG 1 -CATCCCAAAAAATAA 1 -CGTCGATAAGCGTCA 1 -TGGGAGTGGGGAAGG 1 -CTCTTAAAAATGCAG 1 -CTTAGAAAAGATTAC 1 -TTTACTCACGTCATC 1 -ATATATCACCACCAT 1 -AACCAGGCATGGTGG 1 -CCATGCATAGACCAT 1 -TTGAGCACTTTCTAA 1 -AAGAAGTGAAGGTTT 1 -AACATAAGAAAAAAA 1 -GTGTGCCAAGGACTT 1 -TTAGGCTTTTGAGCT 1 -CTAGTCATGCCTCTC 1 -AGCTCTTGTATGCAT 1 -GAATGAGATTCCATC 1 -CATTTGCCATAGTCC 1 -GTAATGATGATTGCC 1 -TATGAACTGAGAAAT 1 -AGACACTTCATACAG 1 -TTGGTGCCTGATATA 1 -TAAAAAAATAAAGAA 1 -TGGGGAAGGGGGTGC 1 -AAGCCTGGTCTCTAC 1 -CTTAAATAACTCTCC 1 -TAAAGAGGTTTTGTT 1 -GCTCCGGTGGCTGAG 1 -GTTATGTTAGATGTC 1 -GATTCCATCCCAAAA 1 -TTCTTAGAAAAGATT 1 -GAGGCGGAGGTTGCA 1 -GAGCTGAGATCTCAC 1 -CTCCCCCAGCGCAGC 1 -TGGCATATAAAACCT 1 -TCCAGGGCTGGATCT 1 -AGGAGGAGTAGCTGC 1 -ACTGCACTCCAGCCT 1 -AATACAATTTATTTA 1 -AATGTTCTTAAAGAT 1 -CCTGCGGGCCTTGTC 1 -CCTGATTGGCTGGGC 1 -CTGAAACATTAGGCA 1 -ATCTCACCACTGCAC 1 -ACTGAAAATACAAAA 1 -TGTGGACTTCGTCTA 1 -AAACATGATCGAAAG 1 -AATGGAAAGTCAAAT 1 -AAAACGCAAATCCCA 1 -CTTAGGGAGGCCGAG 1 -AGATGAGTATGCCTG 1 -TCTTCTGCCTCTCAC 1 -GCATCATGGAGGTAA 1 -CTCTGACCTGTGTGT 1 -GGGCGGCATGGGCCC 1 -TCCTGCTGGGTAGCT 1 -CCTGCTGGGTAGCTC 1 -ACCTGCTGAGATACT 1 -AAGAAAGAGAAAAGA 1 -TCTGAGGCTAGTAGG 1 -CTTCCGCTCTTTCGC 1 -GCCCTCGCTGTGCTC 1 -AATCCAAGATGGTTA 1 -AAGCGTCAGAGCGCC 1 -TTCTGAAACATTAGG 1 -AAAAAAAATCTTATT 1 -TATGAGTAGTCATAT 1 -AGGTACTCCAAAGAT 1 -ACCCTGGCAATATTA 1 -TGTCACAGCCCAAGA 1 -TTCCTGCTGGGTAGC 1 -AGCGCGCTCACCTTT 1 -AGTGTCAATCTGTAT 1 -AGAAACTTAATTACA 1 -GTTGTGTATGAGTAG 1 -GAGTAATTTGATGGG 1 -TACCTACTGGCTTCC 1 -TTCGTACAGAGGGCT 1 -AGCAGCACTTGCACA 1 -GGCATTCCTGAAGCT 1 -GCTGATCACTTGAAG 1 -CCGAGATGTCTCGCT 1 -CCATGGATGGTCTAG 1 -TCTATGTGGGGCCAC 1 -CACAGCATGGTTTCT 1 -AAAAATTAGCCGGGT 1 -GGGGAGCAGGGGAGA 1 -TTTAAAAAATTATAA 1 -CCCCAGCGCAGCTGG 1 -AGCTCATGAAATTAG 1 -AAAAAATTATAAGAA 1 -AAAAGAAAGAGAAAA 1 -AATTTTTCAGTTAAG 1 -TGGGGCTAGTAGCCT 1 -TCTATGGCCATACTA 1 -GCAGGGAGCAGCAGC 1 -CGCTCACCTTTTCCT 1 -TTTAAAAAAAAATCT 1 -ACCTCTTGATGGGGC 1 -TGGGCATAAATTAAT 1 -TTTTGGAAGCTTAAA 1 -ACCCTCTGTGGCCCT 1 -AGTTAAGTTTTGGAA 1 -TAGGGTGTTTCTAGA 1 -TGCATTTAATTTGAA 1 -TGAAGGTTTGTCAGT 1 -AACCATTAATAAAGA 1 -AAATTAGGTACAAAG 1 -GTGAAATACCCTGGC 1 -GCATTTAATTTGAAT 1 -TAGATTCCCCAATCC 1 -TAAAGCTGCTTTGAT 1 -GCCCGCTAAGTTCGC 1 -TGAAAGTTGTGTATG 1 -GAGCTATCATGGCGC 1 -CTACAAAAAATAATA 1 -GCTGAGGCGGGAGGA 1 -GTACAGAGGGCTTCC 1 -TATTCTTCAAAATGG 1 -TCTCCTACCCTCCCG 1 -AGATGGTTACCAAGA 1 -CAGTGACAGAAGATA 1 -ATGTAATGATGATTG 1 -GCTTAGGCTTTTGAG 1 -AAAACTTAATGTCTT 1 -GTGATGCTCTCACAA 1 -AGCAAACTGGGCGGC 1 -GCTGCTCCGGTGGCT 1 -GACCCTGTCTCTCAA 1 -ACTATTTATAGACAG 1 -TAACACTTCTCTTCA 1 -TTAAGGAATGCTATG 1 -ATAACTACTATGGTT 1 -TAGATCAAGGCAGGA 1 -CTCTGCTTCCCTTAG 1 -AGCAAGGACTGGTCT 1 -ACAAAATCTTGCCGC 1 -GCACTGCGTCGCTGG 1 -GGCCATACTACCCTG 1 -GGGAGGCCAAGGCAG 1 -CCCGATATTCCTCAG 1 -ACATGATAACCCTCA 1 -CGATGACCAAATGTA 1 -TTTTAAGAAACTTAA 1 -CTGCTGTCCTAGCAT 1 -CATTCAGACTTGTCT 1 -TCATGTACCCTGAAT 1 -CCTTCTCCCCACAGC 1 -AGTTTACAGCAATCA 1 -GGGCTTCCTCTTTGG 1 -TGCTGGGTAGCTCTA 1 -GCCCGGGTAAGCCTG 1 -AAATCCCACTGTCAC 1 -TTACGCCTGTAATCC 1 -CAGAGATCTTGAGCA 1 -AGTAGTTTCCCTGCA 1 -GAGTGATGTGTTAAG 1 -CACTATGTGGAGAAC 1 -TAATGGAAAGTATGT 1 -GAACCTCAGATAATA 1 -CCAAAGTAATACATG 1 -CTGTCCTAGCATCCT 1 -TATCGGGTCCAACTC 1 -AATGGAATTGGGAGA 1 -CCTTGTCCTGATTGG 1 -GTCTCGCTCCGTGGC 1 -CGACGTTTGTAGAAT 1 -GCTTCCTCTAGCTTT 1 -ATGTTTTGATCATGA 1 -GCATATAAAACCTCA 1 -AGGGGAGACCTTTGG 1 -TTTTGTGGCAGCTTC 1 -CTTGAGCACTTTCTA 1 -GCAAAGAGCTCTCTT 1 -GACATTGAAGTTGAC 1 -AGGAAACTGAAAACG 1 -GTGGCATGAAGAAGG 1 -AAAGATGAGTATGCC 1 -AGTGATGTGTTAAGG 1 -CTTGTATGCATTTAG 1 -GAAGAAGGTGTATGG 1 -GTACTGTTTCTGAAA 1 -GTATGAGTAGTCATA 1 -TTGGCCTACGGCGAC 1 -CTTGTACTACACTGA 1 -TTTAATATAAGTGGA 1 -AGCCTGGCCAACATG 2 -ACGCCTGTAATCCCA 1 -ACCTACTGGCTTCCT 1 -ACTCTTAACACTTCT 1 -TTGATCCACTTAGGA 1 -CCTGTGGAGTGGCAT 1 -GAGATTCCATCCCAA 1 -ATCTTGGGGCCAAAT 1 -TACTGAAGAATGGAG 1 -AACGAACATCTCAAG 1 -AAAGCCAGTAACTGG 1 -TCGTACAGAGGGCTT 1 -AGAAGGTGTATGGCC 1 -TCGTGCCTGTGGTCC 1 -TCTCCTGTGGAGTGG 1 -CATACCTTGGGTTGG 1 -GCAGCTGGAGTGGGG 1 -GGCCTTAGCTGTGCT 1 -GATGGCTTGAGGTCC 1 -GCTTAAATAACTCTC 1 -ACATCTTTCTGTGTG 1 -CCAGCTGCTCCGGTG 1 -GTTTGTAGAATGCTT 1 -TGGGATTGTCAGGGA 1 -AGGAAACAGCACGCG 1 -GGACAAAGTTTAGGG 1 -TGCATTACTCCATTT 1 -TTGGGTTGATCCACT 1 -GAGACCTTTGGCCTA 1 -TGAGGACTATTTATA 1 -GATATAAACAATCTG 1 -TTGGCTGGGCACGCG 1 -CCTCTGGCAAAACAT 1 -CTACCCTGAATGAGT 1 -GGCTCTTAAAAATGC 1 -AAAAGGAAACTGAAA 1 -TGGTCCTTCCTCTCC 1 -TGTTGTATTTTATCG 1 -ACTACCCTGAATGAG 1 -GCGTGGAGGGGCGCT 1 -TGTGGCCCTCGCTGT 1 -TCGTCCCAAAGGCGC 1 -TGGAGAACATTGACA 1 -AGTGAGCTGAGATCT 1 -TTGCCAGCTCTTGTA 1 -CTCCAGTGACAGAAG 1 -ATGTGTCTTTTCCCG 1 -TCAACCATTTAAGGT 1 -AGAACATACCTTGGG 2 -TATATTTAGCACTGA 1 -ACTACACTGAATTCA 1 -AAAAAGAAAGGCATA 1 -CTTTTCGTACAGAGG 1 -CCAGTACTTTCTGGC 1 -TGCGGCTCTGCTTCC 1 -AAACAGAGAGTTCCA 1 -CATCCCATCTGATAT 1 -TAGTTAAGTGGGGTA 1 -CCTGTCTGATACTTG 1 -TGGGGCTAGTCCAGG 1 -CTGAGATACTGATGC 1 -AAAAATTAACCAGGC 1 -CTCGCGCTACTCTCT 1 -TCTGCCACGTATAGA 1 -AAAGTCAAATTTCCT 1 -CCACCTCTTGATGGG 1 -AGGTGACGGTCCCTG 1 -AAGAAGAATCCTACA 1 -TTCTCCAAGTTCTCC 1 -GCGTGGGCAACAGAG 1 -GGCAATATTAATGTG 1 -GAGCGAGACCCTGTC 1 -ATGAAGAAGGTGTAT 1 -CCAAGGCAGGCTGAT 1 -AGATTACAGTGATGC 1 -AAGTCCTGCTGTCCT 1 -ATAAAAAAATAAAGA 1 -CCAAAAAATAAAAAA 1 -GCTGAAAGTTGTGTA 1 -GTTGTATTTTATCGG 1 -TGACACCAAGTTAGC 1 -GCTGTGGACTTCGTC 1 -GATTGGTATCTGAGG 1 -AATATGTTCTTATTA 1 -AATTTTATACACATA 1 -CTGGCTTGGAGACAG 1 -AAGGTCTATGGCCAT 1 -ATAAAGAGGTTTTGT 1 -CCTGTGGTCCCAGCT 1 -CTCACCACTGCACAC 1 -ACAGCCCAAGATAGT 1 -GTCTGGCATATAAAA 1 -TGTAAGCAGCATCAT 1 -CTTAAAAATGCAGCG 1 -GGGGCCACACCGTGG 1 -CTCCAAAAGTCATAA 1 -TCACTTGAAGTTGGG 1 -TATGTGCTTTGCGTC 1 -TGAATTCACCCCCAC 1 -TCAGATTAGTGGCAC 1 -CTCGCTCCGTGGCCT 1 -GCAATATATTCCCAA 1 -AAAATAACAATCTGA 1 -ATATATCTGGTCAAG 1 -AAGGCACCGAGATTT 1 -CTGCGTCGCTGGCTT 1 -TTCCCTTCTCCAAGT 1 -CATAAATTAATTTTT 1 -TCTAGCTTTTGTGGC 1 -TTTTTTTAAGAAAAA 1 -GCTGTGCTCTCTCGC 1 -TTTGCGGGAGCGCAT 1 -TCACTTTTAAAAAAT 1 -GCCAGAGATCTTGAG 1 -ATTTATTTACAACAT 1 -TTTAGGGCGTCGATA 1 -TGTAAAACCATTAAT 1 -GTAGGAAGGGCTTGT 1 -AAAATGTTTTTGTTT 1 -AGACTCTTGAGTGAT 1 -ACAATGTATTCATGG 1 -AAAACATGATCGAAA 1 -AGAATGGAGAGAGAA 1 -GTAATCCCAACACTT 1 -CTCAAACAGAGAGTT 1 -AAAAAAAAAAAAAAG 1 -AGAAAAGATTACAGT 1 -CCGCCATAGATAACT 1 -TCTTAAACATCACGA 1 -ATACACTCTTAACAC 1 -AATCTGTGCTCTGAT 1 -TAGGCTTTTGAGCTA 1 -CTATGTCCCAGGCAC 1 -ACTTGAACCCAGGAG 1 -TTGATCATAATGGAA 1 -CTCACCTTTTCCTCT 1 -GTGGGCATAAATTAA 1 -GGAGGCCGAGGCGGG 1 -ACAATTTATTTACAA 1 -TGCTGAGATACTGAT 1 -TGTTTCCAAGATGTA 1 -TGTTGGGAAGGTGGA 1 -GGTATATTTAGCACT 1 -ACATGGTATTTTAAA 1 -CTGTCCCATTTGCCA 1 -GCAGCAGCACTTGCA 1 -TTTCTAGAGAGATAT 1 -TGCTGAAAGTTGTGT 1 -GCCTGCCTTCTGCGT 1 -GGCCACACCGTGGGG 1 -CAGGCAGGATGAATC 1 -AGAGAGATATATCTG 1 -GTGTGTGCCTGTAGT 1 -AAGGTATTTGCCAGC 1 -CCTATAATCCTGGAC 1 -TCCCTCTCTCTAACC 1 -AGTGATCATGTACCC 1 -CCAAGATGTACTGTG 1 -CAAACCCAGCCTGTC 1 -CACTCCAGCCTGGGC 1 -ACGCCAGAGATCTTG 1 -GAGGTAAGTTTTTGA 1 -CCCTCAAACAGAGAG 1 -TAAAAATGCAGCGCA 1 -TATGAGGGAAAGATA 1 -TTCCCTTAGACTGGA 1 -CCGGGTAAGCCTGTC 1 -TCACATGTCACTTTT 1 -TTAAAAGTAAAACTT 1 -GCTTTGATATAAAAA 1 -AAATCAGATGGGTGT 1 -GGAGACAGGTGACGG 1 -AAAAAGTGGAGCATT 1 -TACTCCTCCTTCTCC 1 -ATAAGAAAAAAAATG 1 -TCTTTCGCGGGGCCT 1 -AAAATCAGATGGGTG 1 -TGCAAAGAGCTCTCT 1 -GCACTTGCACAAATA 1 -CATGGTGAAGCCTGG 1 -AAAAATAAAAAAATA 2 -AACATGTAATGATGA 1 -ATCTTGAGCACTTTC 1 -CGCGCTCACCTTTTC 1 -CTATCCAGCGTGAGT 1 -GACTATTTATAGACA 1 -CCGTGTGAACCATGT 1 -TCTGCATATTGGGAT 1 -AGTGGGGGACGGGTA 1 -ATGAGTAATTTGATG 1 -GGCACGCGTTTAATA 1 -GCGCCCGCTAAGTTC 1 -ATAAACATAAGAAAA 1 -ATTTAAAAGAATTTT 1 -GTGCCAAGGACTTTA 1 -AGGATGAATCTGTGC 1 -CACAAATACATATAC 1 -TTAATATGTTCTTAT 1 -CAAAGTCAGAGAGGG 1 -GGTGGCCTGGTACTC 1 -CTCAAAAAAGAAAAA 1 -TCTGTGTGCCAAGGA 1 -TAGGCGCCCGCTAAG 1 -GCTCCCCCAGCGCAG 1 -CGCTGGCGGGCATTC 1 -CATGGCGCCAGTGCA 1 -TTTCTGGCTGGATTG 1 -GAATTGGGAGAAATC 1 -TGCATTTAGGTTTTG 1 -TCGGTTTTGAAAACA 1 -CTTAACACTTCTTAC 1 -AGATGCAAAGAGCTC 1 -GGGAGAAATCGATGA 1 -TTGGAAAGTACTGTT 1 -GGCCCTGTGGTCTTT 1 -ACACTCTTAACACTT 1 -GTGGCCTGGGAGTGG 1 -TGAAAAAAATCAGGT 1 -AATTACACAGGGGAT 1 -GTATTTGCTGGTTAT 1 -ACTGTGGGCATAAAT 1 -CGGGGAAGCGGCGGG 1 -CTTGAACCCAGGAGG 1 -GCTTGGGGTCTGGGG 1 -AGGGGCGCTTGGGGT 1 -AGGAGCAGGAACCAA 1 -GGGGGAGGGTTTCTC 1 -TCCCGATATTCCTCA 1 -TTGGCTCTTTGCCTG 1 -TTTATAGACAGCTCT 1 -TACAGGGTCATGTTC 1 -AAACAGCACGCGACG 1 -CACCTTTTCCTCTGG 1 -ACTCCTCCTTCTCCC 1 -GAGACTCTAAGAAAA 1 -GTGGAGGCGTCGCGC 1 -CGTCGCGCTGGCGGG 1 -TGCATTTTTTTTTAA 1 -GGGAGGGTTTCTCTT 1 -ACTTCATACAGTTTA 1 -CATCACATGTCACTT 1 -TGCAGTGAGCTGAGA 1 -AGGATAATGTATATA 1 -AGAATGGAAAGTCAA 1 -TCACCTGTGGATGCT 1 -TTCTTTGTTTTTTAG 1 -GGGAGGATGGCTTGA 1 -CTTGGTGGCCCGCCG 1 -AACGCCTGCCTTCTG 1 -AGGGAAAGAAGAATC 1 -AGGTATTTGCCAGCT 1 -CCTTATTTCTAACCA 1 -AGCTTAAATAACTCT 1 -AGGGTGTTTCTAGAG 1 -TGAACTTTGAAAAGT 1 -AAAGAGGTTTTGTTG 1 -TGGGTTTTGTTTTTG 1 -GCACAGCATGGTTTC 1 -CTCACAGATGAAGAA 1 -TGGACTTCGTCTAGG 1 -TGTATTTAAAAGAAT 1 -TTCCGCCATAGATAA 1 -AAATCGATGACCAAA 1 -GAAAATCAGATGGGT 1 -GATGCACAGCATGGT 1 -ACATCTGCCACGTAT 1 -ACTTTATGTGCTTTG 1 -ATCTCAAGAAGGTAT 1 -CCCGTTATTGACATC 1 -ATAAGAACTACCCGT 1 -CATAGATAACTACTA 1 -AGATACTGATGCACA 1 -TTCTATCTCTTGTAC 1 -ACAGAGTAACATTTT 1 -CTACTCTCTCTTTCT 1 -TCCAAGATGTACTGT 1 -GAAAAAAAATGGAAG 1 -TGAAAAAGTGGAGCA 1 -CATACTACCCTGAAT 1 -TGTCTTTTCCCGATA 1 -TGCTTGGCTGTGATA 1 -CTTGAGAAAATGTTT 1 -CTGTTGTATTTTATC 1 -ACTTGAAGTTGGGAG 1 -TCACTGTCCTGAGGA 1 -AGATGGGTGTAGATC 1 -GGCGCCCGCTAAGTT 1 -TCTCTACAAAAAATA 1 -GGACTTCGTCTAGGC 1 -AAAAAAACAAAAAAG 1 -GCGTCAGAGCGCCGA 1 -CTGTGCTCTGATCCC 1 -AAGATCAGATTAGTG 1 -AAAGAAAAAAAAAAA 1 -ATACAGTTTAGAAAA 1 -ATGGAGAGAGAATTG 1 -TTGCAGTGAGCTGAG 1 -AGTTAGCCCCAAGTG 1 -GGCTGGATTGGTATC 1 -TGAAATTAGGTACAA 1 -TTAAAAGAATTTTAT 1 -CTGAACCAGTAGTTT 1 -TAGGCAATATATTCC 1 -TGTCCCATTTGCCAT 1 -GGATTGGTATCTGAG 1 -CCCTCTGTGGCCCTC 1 -CCGCCTTCCCTCAAA 1 -TTGAAGTTGGGAGTT 1 -TGCTGAGAGGGCATC 1 -GAATTCACCCCCACT 1 -ACTGGCTTCCTCTAG 1 -CGCCCGCTAAGTTCG 1 -GAAATCCCGTCTCTA 1 -CTGTTGAGGACGCCA 1 -AAAGTATGTTCTGTC 1 -TCATGAAATTAGGTA 1 -AACATGAGGGGGTTG 1 -ACTGTCACATGCATT 1 -TTGCCATAGTCCTCA 1 -ACACTTCTCTTCATT 1 -CTTTTGTGGCAGCTT 1 -CGTACAGAGGGCTTC 1 -GTTGGGCACGGTGGC 1 -GTTGAGCCCAAATTC 1 -ATCTGTGCTCTGATC 1 -ATGTAGACTCTTGAG 1 -TAGACCATTTCTGGA 1 -CATTACTCCATTTGA 1 -ATTTGATGGGGGCTA 1 -TTTGCCATAGTCCTC 1 -TCTCTCTTTCTGGCC 1 -AGGTGGCTTGTTGGG 1 -TTGTTTGGTAAGAAC 1 -TGTGGTGGCTCGTGC 1 -GGTACAAAGTCAGAG 1 -CATTTGGCCAGAGTG 1 -CATGATAACCCTCAC 1 -TACACTGAATTCACC 1 -ATCTCTTGTACTACA 1 -GGTCTGGCATATAAA 1 -TGCACACTGCACTCC 1 -CAAATCCCACTGTCA 1 -TGCCATGCATAGACC 1 -CCGGGACGCGCGCTA 1 -CAGATTAGTGGCACC 1 -TCTGATACTTGTCCT 1 -AAACTGAAAACGGGA 1 -GGAAACTGAAAACGG 1 -TTGAGGACGCCAGAG 1 -AGGTTGCAGTGAGCT 1 -GTCAATCTGTATATT 1 -TAGACTGGAGAGCTG 1 -TTTCTGAAACATTAG 1 -GCACACTGCACTCCA 1 -ATAATGGAAAGTATG 1 -TCCCGTCTCTACTGA 1 -TACTTTCTGGCTGGA 1 -GTGGGGGACGGGTAG 1 -AGGGAATGTTCTTAA 1 -TTTCTGAACCAGTAG 1 -AATGAAAAATATAGA 1 -TTCCAAAGTAATACA 1 -TTTTGAAAACAGTTA 1 -ACTTCCCCATGGATG 1 -GGGGTGGAAACAGAG 1 -AGCTTACGCCTGTAA 1 -ACCATTTAAGGTATT 1 -GCCTCCCAGACAAGG 1 -TAGTAGGAAGGGCTT 1 -CTTGAGCTTAGGCTT 1 -CGTTTGTAGAATGCT 1 -CAGCATCATGGAGGT 1 -GCGGAGGTTGCAGTG 1 -AGTAACTGGTTGAGC 1 -CTACCCTCCCGCTCT 1 -TTAGTGATCATGTAC 1 -TCTGGTCCTTCCTCT 1 -AAAGATCAGATTAGT 1 -CAAACAGAGAGTTCC 1 -GCTCTGATCCCTGAG 1 -CCAGCCTGTCTGATA 1 -CCTATCCCTGTTGTA 1 -ATTGGTATCTGAGGC 1 -TGTTTGGTAAGAACA 1 -CCTTGAGAGCCTCCA 1 -AAATGGAAGGGGTGG 1 -GCCGAGGCGGGAGGA 1 -AAGGATAATGTATAT 1 -AATGAGTCCCATCCC 1 -GGAGGAAACAGCACG 1 -CTACTATGGTTATCT 1 -CTGTGCTCGCGCTAC 1 -CATGTACCCTGAATA 1 -ACATAAGAAAAAAAA 1 -ATATTGGGATTGTCA 1 -TTGTAAGTCCTGCTG 1 -GTCAGGGGAGCTGTA 1 -GATGTCTCGCTCCGT 1 -CAGGGGAGACCTTTG 1 -GGCTCTGCTTCCCTT 1 -AGCCTCCCAGACAAG 1 -CATTACTGGTATTTG 1 -AAAAATAAAGAACAT 1 -CAACAGAGCGAGACC 1 -TCTGATATTTAAAAA 1 -TATCTCTTGTACTAC 1 -GTCCCAGCTGCTCCG 1 -TTCGCGGGGCCTCTG 1 -AAGAAAAGGAAACTG 1 -TTCCCGATATTCCTC 1 -AAACAGAGTACAATA 1 -GTACAATAACATGAG 1 -CAGGGAAAGAAGAAT 1 -AGCCTGGGCAATGGA 1 -GTCTGCTGCGGCTCT 1 -GCCCCAAGTGAAATA 1 -CTTTTGAGCTATCAT 1 -AATCTTATTTTGAAA 1 -TGAAGTTGACTTACT 1 -ACTGAACGAACATCT 1 -TAGTCATGCCTCTCA 1 -AAGGCATGTATAGAG 1 -TCATTTTGGAAAGTA 1 -TAAGGTATTTGCCAG 1 -TTTGGCTCTTTGCCT 1 -TAACTGGTTGAGCCC 1 -AACCAGTAGTTTCCC 1 -TGTAATCCCAACACT 1 -AAGTTCTCCTTGGTG 1 -GCGTGAGATTCTCCA 1 -GCAAGGACTGGTCTT 1 -GACTGGAGAGCTGTG 1 -GGAATGCTATGAGTG 1 -TCTTGTACTACACTG 1 -AGGGAAAGATACCAA 1 -GGTTTTGTTTTTGTT 1 -ATTAGTGGCACCTGC 1 -TTACCAAGACTGTTG 1 -GCCTTTTGGCTGTAA 1 -TTGGTAAGAACATAC 1 -CTGATCACTTGAAGT 1 -CCTGGACTTCTCCAG 1 -TCTCCAGAGCAAACT 1 -GGCTAGTAGGAAGGG 1 -GCAATTGCTATGTCC 1 -TTAAATAACTCTCCA 1 -TTCCGCTCTTTCGCG 1 -CCCTTCTCCTGTGGA 1 -GTAGACTCTTGAGTG 1 -TTGTAAGCTGCTGAA 1 -TTAATTTGAATACAA 1 -AGGCACCGAGATTTT 1 -TTTTAGCAGGGAAAG 1 -TTAAGTGGGGTAAGT 1 -ACTGAGAAATGAACT 1 -CTGGCTTCCTCTAGC 1 -TTACATTCTTTTGTA 1 -GAGCACTTTCTAAGT 1 -CCACTGTCACATGCA 1 -TCCTCCTCTGACCTG 1 -CAATCTGCATATTGG 1 -TTTTGATCATGAGAA 1 -GTTGATCCACTTAGG 1 -TGGGCCCTGTGGTCT 1 -TACATGGTATTTTAA 1 -AATTGCATTTAATTT 1 -AAAATTGCATTTAAT 1 -CCATCCCATCTGATA 1 -GGAATGAGATTCCAT 1 -TTGGGAAGGTGGAAG 1 -TGGTATTTTAAAAGT 1 -TTCTTTTGTAAGCTG 1 -ATTCCCCAATCCACC 1 -TAAAAGTAAAACTTA 1 -TTTTGACCTTGAGAA 1 -AAATAAAGAACATAC 1 -AAAAGAAAAAAAAAA 1 -TCTCGCTCCGTGGCC 1 -ACCTGTGGATGCTAA 1 -CGTCTCTACTGAAAA 1 -AATCGATGACCAAAT 1 -GTGGACTTCGTCTAG 1 -TTTAGCAGGGAAAGA 1 -CGCTAAGTTCGCATG 1 -GATAGTTAAGTGGGG 1 -GCAAAACATGATCGA 1 -AGCGGCGGGGTGGCC 1 -TTTGTTTTTTAGCTC 1 -ATATATTCCCAACCT 1 -AAAATATAGAGTTTA 1 -TGGGTAGCTCTAAAC 1 -TGACTTCCCTTCTCC 1 -GACTTCCCTTCTCCA 1 -AGAAATGAACTTTGA 1 -AAGAACTACCCGTTA 1 -AATCACTTGAACCCA 1 -AGAACTACCCGTTAT 1 -TCCTGAGGACTATTT 1 -GGTGAAATCCCGTCT 1 -ATGGGTGTAGATCAA 1 -ATAGCTTGACACCAA 1 -GACTGTTGAGGACGC 1 -CCTGCAGTTGAGCAG 1 -TGTATGCATTTAGGT 1 -CTTATTATTAGAAGC 1 -TTGTTTTTGTTTTAC 1 -TCTTCTTAGAAAAGA 1 -TGAGGTCCGTAGTTG 1 -CAGAGAGTTCCAGGC 1 -ACACAGGGGATAAAT 1 -ATTATAAGAACTACC 1 -CCATTTAAGGTATTT 1 -TTTAAGAAACTTAAT 1 -AGCAGAATGTTTTGA 1 -AAAGATTACAGTGAT 1 -GGTCATGTTCCCTTC 1 -TCCGTGACTTCCCTT 1 -TTTCGGCGGGGAGCA 1 -GAAAAGGAAACTGAA 1 -GTACATGGTATTTTA 1 -CATAGTCCTCACCTA 1 -ACAGAGTACAATAAC 1 -AAAACCATTAATAAA 1 -GTGGGGTAAGTCTTA 1 -GGGCTTGTTCCTGCT 1 -GTCAAGGTGGCCTGG 1 -GTCAGAGAGGGGTCT 1 -TAGTCCTCACCTATC 1 -GTGTATTTAAAAGAA 1 -GCAGAGAATGGAAAG 1 -TTGGGGCCAAATCAT 1 -GCTTGGAGACAGGTG 1 -GATAAGCGTCAGAGC 1 -CAGCTCTAACATGAT 1 -CCCAGCGCAGCTGGA 1 -GGGGTCTGGGGGAGG 1 -ACACTAAGCGCGCTC 1 -AAAAAAGGTCTATGG 1 -CCTCACTATGTGGAG 1 -CGGCGGGGAGCAGGG 1 -GGGCTATTATGAACT 1 -GGTCTAGTCATGCCT 1 -GCGGTTTCGAATAAT 1 -AATCATGTAGACTCT 1 -CGAGACATGTAAGCA 1 -ACAAAGTCAGAGAGG 1 -CAACACTTTGGGAGG 1 -TCTTTTTCATAGATC 1 -GGCTATTATGAACTG 1 -TTAGGCAATATATTC 1 -TAGAGGAATTATGAG 1 -CACTAAGCGCGCTCA 1 -GCTTTGCGTCATTTA 1 -AGAGTTCCAGGCAGG 1 -TTTAAGGTATTTGCC 1 -AAGAAACATGTAATG 1 -TGGAGAGCTGTGGAC 1 -AAGTGAAGGTTTGTC 1 -ACGGTTTATTCTTCA 1 -ACAATCTGCATATTG 1 -TGTTAGTACATGGTA 1 -TGTTGAGGACGCCAG 1 -TTTTATCGGGTCCAA 1 -TCATGCCTCTCATTT 1 -AGCTTCAGGTATATT 1 -GCCATACTACCCTGA 1 -AGTTCAAGACCAGCC 1 -GAGGGAAAGATACCA 1 -TTGAAAAGTATCTTG 1 -CCGGGTGTGGTGGCT 1 -TTGAGAAAATGTTTT 1 -AGCACTGAACGAACA 1 -AATAAAAAAATAAAA 1 -GAATTGAAAAAGTGG 1 -CAAGTTAGCCCCAAG 1 -GTCCTAGCACCTCTG 1 -CCATCCCAAAAAATA 1 -CGCTCTGGTCCTTCC 1 -ACACTGAATTCACCC 1 -CCCAGCACTTAGGGA 1 -CACCTCTTGATGGGG 1 -CACCTGTGGATGCTA 1 -GTAGCTCTAAACAAT 1 -TTATGAGGGAAAGAT 1 -AGAAACTAAGGCACC 1 -TATACACTCTTAACA 1 -CATGGGTAGGAACAG 1 -GCAATCACCTGTGGA 1 -AGTGGAGGCGTCGCG 1 -GGTATTTGCTGGTTA 1 -GGGAGTGGGGAAGGG 1 -AATTAATTTTTCAGT 1 -ATTTTAGCAGGGAAA 1 -TATTTTGAAAATTTC 1 -CGCACCCGGGACGCG 1 -TCCCAAAGGCGCGGC 1 -CATAGATCGAGACAT 1 -ACAGGGGATAAATGG 1 -CTAAGAAAAGGAAAC 1 -GAGTCTCTCCTACCC 1 -GGGGCGCTTGGGGTC 1 -AAAAAGATGAGTATG 1 -ACTGAAAAAGATGAG 1 -CAGCGTGGGCAACAG 1 -TAAGAAAAAAAATGG 1 -TGAAAACGGGAAAGT 1 -TGCCGCCTTCCCTCA 1 -GAGGCGTCGCCCGGG 1 -CGCTACTTGCCCCTT 1 -CTCGTGCCTGTGGTC 1 -TGGATGGTCTAGTCA 1 -CCCAATCCACCTCTT 1 -CGTGCATTTTTTTTT 1 -CAAGGACTTTATGTG 1 -CTCTACTAGACACTT 1 -TGGAGGGGCGCTTGG 1 -AGGCGTCGCGCTGGC 1 -ACCAAGACTGTTGAG 1 -CAGGTACTCCAAAGA 1 -AAAATAATAACAAAA 1 -CTGCCACGTATAGAG 1 -TGCTGGTTATGTTAG 1 -GGAATTATGAGGGAA 1 -AGACTGGAGAGCTGT 1 -GCCTGGGCAATGGAA 1 -GCTCTGGTCCTTCCT 1 -AAAATAAAAAAATAA 2 -AAGAAACTAAGGCAC 1 -CGTCCCAAAGGCGCG 1 -GGAGGCGTCGCCCGG 1 -GAGAAATGAACTTTG 1 -GGAAATGGAATTGGG 1 -TTTATTTACAACATA 1 -AGCACTTTCTAAGTA 1 -CTCCAGAGAAAGGCT 1 -ATCTTCCGCCATAGA 1 -TACCACAAGAAACAT 1 -TCCTTGGTGGCCCGC 1 -CTGGGCGGCATGGGC 1 -TCTATCTCTTGTACT 1 -AGCATCCTATAATCC 1 -TGGAAACAGAGTACA 1 -CTATAATCCTGGACT 1 -GGCTGGGCACGCGTT 1 -TCAGGGAATGTTCTT 1 -GCTTGTTCCTGCTGG 1 -GGGAATGTTCTTAAA 1 -TGGATCTCGGGGAAG 1 -GGGCCAAATCATGTA 1 -CCCAACACTTTGGGA 1 -ATCGATGACCAAATG 1 -TTCCCTTCTCCTGTG 1 -CTTGAGAGCCTCCAG 1 -TCATGGCGCCAGTGC 1 -GTGAACCATGTGACT 1 -GCATCAGAAGTCCTT 1 -CAGAAATAAAGAGGT 1 -TTCCCAACCTGGCTA 1 -GGAGGATGGCTTGAG 1 -CTCAGGTACTCCAAA 1 -TTCTGAACCAGTAGT 1 -AAGGAAACTGAAAAC 1 -AAAAAAATCTTATTT 1 -TAAGCTGCTGAAAGT 1 -ATGAGTATGCCTGCC 1 -TGAATACAATTTATT 1 -CATTCTTTTGTAAGC 1 -GTATTTAAAAGAATT 1 -TTGCTATGTGTCTGG 1 -GATTCTCCAGAGCAA 1 -ATACATGCCATGCAT 1 -GTAGGCTCGTCCCAA 1 -TGTCTTTCAGCAAGG 1 -CACTTCTTACCTACT 1 -AATAATTAACTTATT 1 -TTGTTTCACTGTCCT 1 -TGGGGGACGGGTAGG 1 -ATATCACCACCATTA 1 -GGGCTGGATCTCGGG 1 -CAAGTCACGGTTTAT 1 -ATCCCACTGTCACAT 1 -GGCTGTAATTCGTGC 1 -TGGACTTCTCCAGTA 1 -CTTAGCTGTGCTCGC 1 -AGTAATTTGATGGGG 1 -TGGTCAAGGTGGCCT 1 -GGTTATGTTAGATGT 1 -CTCTTCCGCTCTTTC 1 -GAGAAATCGATGACC 1 -GTGGCTTGTTGGGAA 1 -AATCTCCAGTGACAG 1 -GTTTAGGGCGTCGAT 1 -ATTTGGCCAGAGTGG 1 -GTCTAGTCATGCCTC 1 -GATACCAAGTCACGG 1 -TGTGCTCTCTCGCTC 1 -CTCATTTGGCCAGAG 1 -TCTGTGGCCCTCGCT 1 -CCCCCACTGAAAAAG 1 -GATGAAGAAACTAAG 1 -AGACAGCTCTAACAT 1 -AGAGTGGAAATGGAA 1 -GGGTCGGGACAAAGT 1 -TCAATCTGTATATTT 1 -AAACAGTTATCTTCC 1 -TTGTTTCCAAGATGT 1 -CCTGAAGCTGACAGC 1 -CACTTGAACCCAGGA 1 -TGTGACTTTGTCACA 1 -GGACGCCAGAGATCT 1 -TGGCGGGCATTCCTG 1 -TCCCAGCACTTAGGG 1 -TGACAGAAGATACTG 1 -TTATTTTGAAAATTT 1 -GGTAGGCTCGTCCCA 1 -TTGACATCTTTCTGT 1 -TGTCCTAGCACCTCT 1 -ATATAAACAATCTGC 1 -CGATAAGCGTCAGAG 1 -GAAACATTAGGCAAT 1 -GATATTTAAAAAAAA 1 -TTTGTTTCTTTGTTT 1 -GGAGGCCAAGGCAGG 1 -CAAATTTCCTGAATT 1 -ATTTCCAAAGTAATA 1 -CTTTGATATAAAAAA 1 -TTAGAAAATCAGATG 1 -TACTCCATTTGATCA 1 -GTGCTCTCTCGCTCC 1 -AGAGAGAATTGAAAA 1 -AAGAAGGTGTATGGC 1 -CTGAAAGTTGTGTAT 1 -GGCGTCGCGCTGGCG 1 -ATGTTCTTAAAGATC 1 -CTGGTCTTTCTATCT 1 -GATATATCTGGTCAA 1 -CCTGGCACTGCGTCG 1 -GTCCTGAGGACTATT 1 -AAAAAAAAATCTTAT 1 -TGATGGGGGCTATTA 1 -GACGGTCCCTGCGGG 1 -TAAAACGCAAATCCC 1 -GGCAATGGAATGAGA 1 -TAGATAACTACTATG 1 -TGGGCGGCATGGGCC 1 -CCTGAATATAAGTGT 1 -CTGGTATTTGCTGGT 1 -ACCATGTGACTTTGT 1 -AAACATGTAATGATG 1 -GGGCACGCGTTTAAT 1 -CACATGCATTACTCC 1 -ATGGCTTGAGGTCCG 1 -AGCGCCGAGGTTGGG 1 -CCAAGTTAGCCCCAA 1 -TTATTTACAACATAA 1 -AAAAAAGAAAGAGAA 1 -ATTAATTTTTCAGTT 1 -CACTCTTAACACTTC 1 -ATAAAGCCAGTAACT 1 -GTCCTTGAGAGCCTC 1 -AAATTAACCAGGCAT 1 -AAAGTCCCTCTCTCT 1 -CCATCCGACATTGAA 1 -GTGACTTTGTCACAG 1 -GGAATGTTCTTAAAG 1 -TATGTTAGATGTCAT 1 -GTGGCTCGTGCCTGT 2 -CTGTCTCTCAAAAAA 1 -TACTACACTGAATTC 1 -AGCCCAAATTCAAAC 1 -TAAACAATGTATTCA 1 -CGGGGCCTCTGGCTC 1 -AAGATGGTTACCAAG 1 -ATTCCTCAGGTACTC 1 -TAAATAACTCTCCAA 1 -ATTGGCTGGGCACGC 1 -CGCGCTACTCTCTCT 1 -AAAACAAAAAAGGCA 1 -CACGTATAGAGCAAT 1 -ATACCACAAGAAACA 1 -GTGGAGGGGCGCTTG 1 -ATCATGAGAAAATTG 1 -TCGGGGAAGCGGCGG 1 -GTTCGCATGTCCTAG 1 -GATACAAAGCGGTTT 1 -GTAGTCATATCATAA 1 -CTGATACTTGTCCTC 1 -GAAAAACGCCTGCCT 1 -TGGTATCTGAGGCTA 1 -ATACCAAGTCACGGT 1 -GCTTCCTCTTTGGCT 1 -TCCTAGCACCTCTGG 1 -GGCGGGAGGATCTCT 1 -CCCGGGTAAGCCTGT 1 -TTCATGGGTAGGAAC 1 -GAGCCCAAATTCAAA 1 -TCACCTTTTCCTCTG 1 -CTTCTCCAAGTTCTC 1 -ACTGATGCACAGCAT 1 -GACACTTCATACAGT 1 -AATAACTCTCCAAAA 1 -GCACTTAGGGAGGCC 1 -GCCTTGTCCTGATTG 1 -GACAGAAGATACTGC 1 -AAGTATCTTGGGGCC 1 -CAGAGTGGAAATGGA 1 -TCCCTCAAACAGAGA 1 -TTAAGTTTTGGAAGC 1 -TATGTCCCAGGCACT 1 -CCAGCCTGGCCAACA 2 -TACCTTGGGTTGGTT 1 -AAGGAATGCTATGAG 1 -TCCCTTAGACTGGAG 1 -GTGCCTGTAATCCCA 1 -GTGGCAGCTTCAGGT 1 -CAGCAATCACCTGTG 1 -TTTGTTCCCATCACA 1 -GGGTAAGCCTGTCTG 1 -CAGCAATCGAGATTG 1 -GGGGGTGCGCACCCG 1 -ATTGCTATGTGTCTG 1 -ATTTGTTAGTACATG 1 -GGAGGGGCGCTTGGG 1 -ACAGATGAAGAAACT 1 -CTAGGCGCCCGCTAA 1 -CAGCCTCCCAGACAA 1 -CCCGCTCTGGTCCTT 1 -CAATGGAATGAGATT 1 -CCTGAATGAGTCCCA 1 -CTGCTTCCCTTAGAC 1 -TTTTTTCTCCACTGT 1 -TTTCAGTTAAGTTTT 1 -GAAAAAGATGAGTAT 1 -TTTTGAAAATTTCCA 1 -CGCGCTGGCGGGCAT 1 -GTCCTCACCTATCCC 1 -GGAGGTGGCTTGTTG 1 -ATATAAGTGTATTTA 1 -GGGTCTGGCATATAA 1 -CACGGTGGCTCGTGC 1 -GGTGGCCCGCCGTGG 1 -AAATCATGTAGACTC 1 -GGATCTCGGGGAAGC 1 -CTCGTCCCAAAGGCG 1 -TCAGCAAGGACTGGT 1 -ATCACCACCATTACT 1 -ACATATACACTCTTA 1 -GGCGTCGATAAGCGT 1 -TGAACGCGTGGAGGG 1 -TTCTTATTATTAGAA 1 -GGAAGCGGCGGGGTG 1 -GCACCGAGATTTTAA 1 -ACACTTTGGGAGGCC 1 -ACTCTTGAGTGATGT 1 -CTGCGGCTCTGCTTC 1 -CTTATTTGTTCCCAT 1 -CATTTGTTAGTACAT 1 -TGAAATACCCTGGCA 1 -AGGCCTTTGTTTGTA 1 -TCTTAACACTTCTTA 1 -GGGTTGGTTGGGCAC 1 -TTTAGTGATCATGTA 1 -CCTCCTTCTCCCCAC 1 -GTCGCCCGGGTAAGC 1 -AAGGCATAAACATAA 1 -CCCGCCGTGGGGCTA 1 -GAGAATGGAAAGTCA 1 -AGATACTGCTAGAAA 1 -GAGATGTCTCGCTCC 1 -CTTGAGTGATGTGTT 1 -TTTACTGTGGGCATA 1 -ACATATATTTAGTGT 1 -TGCACAGCATGGTTT 1 -CGCCGTGGGGCTAGT 1 -TTACACAGGGGATAA 1 -ATGAACTTTGAAAAG 1 -ACTCTCTCTTTCTGG 1 -GCGGCATGGGCCCTG 1 -TATAGAGTTTATAAT 1 -TAGATGTCATTTTAA 1 -TATTGGGATTGTCAG 1 -AGATTAGTGGCACCT 1 -AATCCCACTGTCACA 1 -AAGTACCTGGCAATA 1 -GTCTTTTTCATAGAT 1 -GGTGTAGATCAAGGC 1 -TGAATCTGTGCTCTG 1 -CCTGTGGTCTTTTCG 1 -TATAATATTTTCTTC 1 -TGTGCCTGTAGTCCC 1 -TAGACTCTTGAGTGA 1 -TAAAAGAATTTTATA 1 -ATGCTAATTAAAACG 1 -GATCCACTTAGGAAC 1 -GAACCAGTAGTTTCC 1 -ACCTGTGTGTGGGTT 1 -TCCTCTTCTTAGAAA 1 -CTACTAGACACTTCA 1 -GTATGCCTGCCGTGT 1 -TCTTTTGTAAGCTGC 1 -AAAAAACAAAAAAGG 1 -TTATAATATTTTCTT 1 -CTCTGATCCCTGAGG 1 -TTACTGTGGGCATAA 1 -GGGGTTGGGCGTGGT 1 -CATGTCCTAGCACCT 1 -CACAGGGGATAAATG 1 -TCAAACCCAGCCTGT 1 -ATCAGATTAGTGGCA 1 -TCCTGTGGAGTGGCA 1 -CCCATGGATGGTCTA 1 -GAGGAAACAGCACGC 1 -TTCACTGTCCTGAGG 1 -AAGGACTGGTCTTTC 1 -ATGGTTACCAAGACT 1 -GTGCCTGATATAGCT 1 -GGCAGGATGAATCTG 1 -ATGAAATTAGGTACA 1 -CGGGTGTGGTGGCTC 1 -GGTCTATGGCCATAC 1 -AAGTTTTGGAAGCTT 1 -AGCTCTCTTAGCTTT 1 -ATGAAGAAACTAAGG 1 -AAAAAATAATAACAA 1 -GTTTAGAAAATCAGA 1 -TAACATGAGTAATTT 1 -CGATATTCCTCAGGT 1 -TGTCCTAGCATCCTA 1 -GGAGTGGGGGACGGG 1 -AACATCTCAAGAAGG 1 -GTGGGGCTAGTCCAG 1 -CATATATTTAGTGTC 1 -TGCCTGGTTGTTTCC 1 -CTCCTACCCTCCCGC 1 -GTCACAGCCCAAGAT 1 -GAAGGTTTGTCAGTC 1 -TATTCTGCCAGCCTT 1 -GTTAAGTTTTGGAAG 1 -GGGTTGGGCGTGGTA 1 -ACCTTGAGAAAATGT 1 -AGCACGCGACGTTTG 1 -TAGCATCCTATAATC 1 -TGTGATACAAAGCGG 1 -GTCTATTTTCCTCCT 1 -TTCACCCCCACTGAA 1 -AACGCAAATCCCACT 1 -TAAACATAAGAAAAA 1 -TTAGCTTTTAATGTT 1 -TCCCACTGTCACATG 1 -CTAAGCGCGCTCACC 1 -TCTCCCCACAGCCTC 1 -AGGCAATATATTCCC 1 -CAGATGCAAAGAGCT 1 -TATTTAAAAGAATTT 1 -TGAGAGGGCATCAGA 1 -GAAGCCTGGTCTCTA 1 -GTTTTTGACCTTGAG 1 -GAGGCTAGTAGGAAG 1 -GGGGAAGCGGCGGGG 1 -AGCTGACAGCATTCG 1 -GGGCCTCTGGCTCCC 1 -ATGTGTTAAGGAATG 1 -TTTGGAAAGTACTGT 1 -ATGCATAGACCATTT 1 -AACAATCTGCATATT 1 -ATAGACCATTTCTGG 1 -TAGAATGCTTGGCTG 1 -TAGAAGCTCAGATGC 1 -CCGAGATTTTAAGAA 1 -GCCGTGTGAACCATG 1 -CTGGTACTCCTCCTT 1 -CCGCTCTGCACCCTC 1 -GCGCGCTACTTGCCC 1 -AAGAGGTTTTGTTGT 1 -GCTGTGATACAAAGC 1 -AGATTCCCCAATCCA 1 -TGCCGTGTGAACCAT 1 -TCTCACAAAATCTTG 1 -TTTTCCCGATATTCC 1 -GGCTTGTTCCTGCTG 1 -CTGGCCTGGAGGCTA 1 -CACCCTCTGTGGCCC 1 -GTCTTCCTTTTTTTT 1 -GGGGCTAGTAGCCTT 1 -GGGATTGTCAGGGAA 1 -TGGAGTGGCATGAAG 1 -AAAGTCATAAAGCCA 1 -AAGGCAGGAGCAGGA 1 -TGGGAGAAATCGATG 1 -GCCTGGTTGTTTCCA 1 -TCTATTTTCCTCCTC 1 -CTGACAGCATTCGGG 1 -AGATATATCTGGTCA 1 -CTTCTCCAGTACTTT 1 -CCCAAGTGAAATACC 1 -AAAGAGCTCTCTTAG 1 -TCAGGTACTCCAAAG 1 -CCTTGGTGGCCCGCC 1 -TCACCACCATTACTG 1 -ATTCTGCCAGCCTTA 1 -AAACATCACGAGACT 1 -TTCGGCGGGGAGCAG 1 -TCACTATGTGGAGAA 1 -CCCCAATCCACCTCT 1 -AAAAGAAAAGAAAGA 1 -GGAGCAGGGGAGACC 1 -AAATACATATACACT 1 -GTGGTAGCTTACGCC 1 -AAAAAATAACAATCT 1 -ATGGAATTGGGAGAA 1 -AACCAAAAAGAAAGG 1 -GTAAGTTTTTGACCT 1 -GGTTGGTTGGGCACG 1 -TGCCTGATATAGCTT 1 -TTTTCGTACAGAGGG 1 -AACAGCACGCGACGT 1 -GGACGGGTAGGCTCG 1 -CCAGGAATCACTTGA 1 -AGGACTGGTCTTTCT 1 -CCACTTAGGAACCTC 1 -GCTATCATGGCGCCA 1 -CCATAGATAACTACT 1 -TATCCAGCGTGAGTC 1 -AATGCAGCGCAATCT 1 -CAGGGGAGCTGTAAA 1 -CTTTTAAAAAATTAT 1 -GTGGTCCCAGCTGCT 1 -TCTTTTCCCGATATT 1 -TCTGATCCCTGAGGC 1 -TGAAAAATATAGAGT 1 -TAATGTCTTCCTTTT 1 -AAATCAGGTCTTCAT 1 -GGAAACAGAGTACAA 1 -AGCACCTCTGGGTCT 1 -TGTTGTTTGGTAAGA 1 -GCTATGAGTGCTGAG 1 -TTGTTTGTAAGTCCT 1 -TACTCACGTCATCCA 1 -ACAGGGTCATGTTCC 1 -GCACCTCTGGGTCTA 1 -GGAATTGGGAGAAAT 1 -TTGGGAGAAATCGAT 1 -CCCATTTGCCATAGT 1 -GGCTAGTTTACAGCA 1 -AAAAGTCATAAAGCC 1 -TTTTTTTTCTCCACT 1 -TTGAGCCCAAATTCA 1 -AGTATGCCTGCCGTG 1 -AACATGGTGAAGCCT 1 -GAAGCTTAAATAACT 1 -TACTATGGTTATCTT 1 -ATCCAAGATGGTTAC 1 -ATGGTTATCTTCTGC 1 -TAGTACATGGTATTT 1 -TAGGAAGGGCTTGTT 1 -TCCTGGACTTCTCCA 1 -GGTAAGTCTTACATT 1 -GGAGCTGTAAAACCA 1 -CTGAAAACGGGAAAG 1 -TTGCTGGTTATGTTA 1 -CTTAATGATAGGGTG 1 -GAAAACGGGAAAGTC 1 -CTCTTCTTAGAAAAG 1 -CGTGCCTGTAATCCC 1 -GTGGAGTGGCATGAA 1 -CTGGTCCTTCCTCTC 1 -CTGCTGGGTAGCTCT 1 -AGCCTATTCTGCCAG 1 -GAGCCTCCAGAGAAA 1 -AAATCTGCTAGAAAA 1 -TGGCACCTGCTGAGA 1 -CCTGGCCAACATGGT 2 -TCCTCTCCCGCTCTG 1 -ATCCCAACACTTTGG 1 -TACAACATAAAGGAT 1 -TTGGTGGCCCGCCGT 1 -CAGTACTTTCTGGCT 1 -TTTTTTTTAAGAAAA 1 -GTAGAATGCTTGGCT 1 -ACACCAAGTTAGCCC 1 -TTGGGATTGTCAGGG 1 -GAAGTCCTTGAGAGC 1 -TTCAGACTTGTCTTT 1 -TAGCACTAACACTTC 1 -TCAGGTCTTCATTAG 1 -TTTCGGTTTTGAAAA 1 -TTAATTACACAGGGG 1 -CTGCACACTGCACTC 1 -TTGTACTACACTGAA 1 -CACTTTGGGAGGCCA 1 -TTGTCCTGATTGGCT 1 -AGTAGCCTTTCCTTA 1 -CTCTGTGGCCCTCGC 1 -GTAGTTGAGACCAGC 1 -GAGAAAAGAAAAGAA 1 -CAACATGGTGAAGCC 1 -GGTACTCCAAAGATT 1 -TCACGTCATCCAGCA 1 -GGTTTCGAATAATTA 1 -CCAAGATAGTTAAGT 1 -CCTCGCTGTGCTCTC 1 -TATATTCCCAACCTG 1 -AAGATTACAGTGATG 1 -AAATAACTCTCCAAA 1 -CCAATCCACCTCTTG 1 -AAAAAAAAAAAGAAA 1 -CCCTCTCTCTAACCT 1 -CAGGCATGGTGGTGT 1 -TATGAGTGCTGAGAG 1 -ATGGTATTTTAAAAG 1 -CACATATATTTAGTG 1 -GCGGGAGGATGGCTT 1 -TATATCACCACCATT 1 -AGTCCTCACCTATCC 1 -AATGGAGAGAGAATT 1 -TTTGCGTCATTTAAT 1 -AAGCCAGTAACTGGT 1 -ACTATGTGGAGAACA 1 -AGCAATCACCTGTGG 1 -ACGGCGACGGGAGGG 1 -GAGGCTATCCAGCGT 1 -ATAGACAGCTCTAAC 1 -GTTCCAGGCAGGATG 1 -AGCTTTTAATGTTAT 1 -TAGACAGCTCTAACA 1 -GGTCTGGGGGAGGCG 1 -AGAAAGAAAGAAGTG 1 -CCTACTGGCTTCCTC 1 -GGTATAGGCCTTTGT 1 -CGCGGGGCCTCTGGC 1 -CACTAACACTTCTCT 1 -ATAAAAAAGGTCTAT 1 -TCCTGATTGGCTGGG 1 -AAACCATTAATAAAG 1 -AGGAATGCTATGAGT 1 -CGAATAATTAACTTA 1 -GCTCCGTGACTTCCC 1 -CAGCCTGTCTGATAC 1 -GGCACCTGCTGAGAT 1 -TGGCAATACACTAAG 1 -ATAATAACATCTGCC 1 -TGAGATTCTCCAGAG 1 -TGGCCCTCGCTGTGC 1 -TCTCTTGTACTACAC 1 -CAACATGGTGAAATC 1 -CCCTGAATATAAGTG 1 -ATACTTGTCCTCTTC 1 -AGTGCACTCCAGCGT 1 -AAGCTGCTGAAAGTT 1 -TGTTCTGTCCCATTT 1 -AACAGTTATCTTCCG 1 -TGTCCTGAGGACTAT 1 -ATATTTAGTAGCACT 1 -TGAGTCTCTCCTACC 1 -TAGGGAGGCCGAGGC 1 -AAATACCCTGGCAAT 1 -ATGAGTGCTGAGAGG 1 -CGCCTTCCCTCAAAC 1 -AGGCGCGGCGCTGAG 1 -AGAGCAATTGCTATG 1 -TATTTATAGACAGCT 1 -TAATGATAGGGTGTT 1 -AGAGAGGGGTCTGGC 1 -TGTTTTACTGTGGGC 1 -ACATTTTAGCAGGGA 1 -AAGTTTTTGACCTTG 1 -ATTTGCCAGCTCTTG 1 -GAAACTGAAAACGGG 1 -CATGTTCCCTTCTCC 1 -CATGCCTTTTGGCTG 1 -AATATAAGTGGAGGC 1 -GGGACGGGTAGGCTC 1 -TCATTTTAAAAAATA 1 -CTGGTTGAGCCCAAA 1 -GCCTATTCTGCCAGC 1 -CTCCTTGGTGGCCCG 1 -ACCCTGTCTCTCAAA 1 -CACTTGAAGTTGGGA 1 -GCAGGGAAAGAAGAA 1 -AGAAGTCCTTGAGAG 1 -ATGTACTGTGCCTCT 1 -ATTTACAACATAAAG 1 -AAAATGGAAGGGGTG 1 -GTTGGGAAGGTGGAA 1 -TAATCCCAGCACTTA 1 -ATTCCATCCCAAAAA 1 -TTTCTCCACTGTCTT 1 -ATAGAGCAATTGCTA 1 -TTTCATAGATCGAGA 1 -CTCTTGATGGGGCTA 1 -AGCTGAGATCTCACC 1 -CCTACGGCGACGGGA 1 -CTGGAAGATACCACA 1 -GAGACAGGTGACGGT 1 -CCCCAAGTGAAATAC 1 -ATCATGGAGGTAAGT 1 -TCCAGTACTTTCTGG 1 -TGTCTGCTGCGGCTC 1 -GGCACTCTACTAGAC 1 -TCCAGGCAGGATGAA 1 -AGCTTGACACCAAGT 1 -ACTTCTTACCTACTG 1 -AAACACTTGGTGCCT 1 -TTGAGACCAGCCTGG 1 -TAGACATTTGTTAGT 1 -AGCCTTTCCTTAATG 1 -CATCAGAAGTCCTTG 1 -CCACGTATAGAGCAA 1 -GAAAAAAATCAGGTC 1 -GTGATACAAAGCGGT 1 -TAACTTATTTGTTCC 1 -CGGGAGGGTCGGGAC 1 -GGAAAGAAGAATCCT 1 -TATAAAACCTCAGCA 1 -GACGCGCGCTACTTG 1 -ACTTATTTGTTCCCA 1 -AAATGGCAGCAATCG 1 -TGCCAGCCTTATTTC 1 -TACCCTGAATATAAG 1 -ATGTGTCTGGGTTTC 1 -GAGCGCCGAGGTTGG 1 -GATTGCCTCTGAAGG 1 -AACTCAACCATTTAA 1 -GTCGGGACAAAGTTT 1 -TTCAGTTAAGTTTTG 1 -GAGTAGTCATATCAT 1 -GGTTGAGCCCAAATT 1 -AAAGATAATCCAAGA 1 -GGCGTCGCCCGGGTA 1 -TTCCTCTCCCGCTCT 1 -GAAGTTGGGAGTTCA 1 -TGGAGACAGGTGACG 1 -TGGAAATGGAATTGG 1 -TTAAACATCACGAGA 1 -CCTGCTGTCCTAGCA 1 -TGGGTTTCATCCATC 1 -CTTCCCTCAAACAGA 1 -ATATAAAAAAGGTCT 1 -AGAGCTCTCTTAGCT 1 -TTAATGTTATGAAAA 1 -CCATCACATGTCACT 1 -GGCTTTTGCGGGAGC 1 -GACAAGGAGGAGTAG 1 -GAGTCCCATCCCATC 1 -GATAATCCAAGATGG 1 -GCTATGTCCCAGGCA 1 -GACTTTATGTGCTTT 1 -TAGGAACAGCAGCCT 1 -GATGGGTGTAGATCA 1 -TCGTCTAGGCGCCCG 1 -GTCATTTAATTTTGA 1 -TGATGATTGCCTCTG 1 -GAGGACTATTTATAG 1 -ACGGTGGCTCGTGCC 1 -TGTGTGGGTTTTGTT 1 -GCCTGGTCTCTACAA 1 -AATAACATGAGTAAT 1 -GCTATCCAGCGTGAG 1 -CTTGACACCAAGTTA 1 -GGTTTTGTTTCTTTG 1 -TTCTAACCATTTTAG 1 -AGGGCATCAGAAGTC 1 -ATTCTCCAGAGCAAA 1 -CTGGGAGTGGGGAAG 1 -GCTCTGCTTCCCTTA 1 -CTGAGATCTCACCAC 1 -TGTGGATGCTAATTA 1 -TTAATTTTGAAAACA 1 -ATGCCTGCCGTGTGA 1 -AATGGCAGCAATCGA 1 -GGGTTTCATCCATCC 1 -ACGCGTGGAGGGGCG 1 -GGCGACGGGAGGGTC 1 -CAAGGAGGAGTAGCT 1 -TACAAAGCGGTTTCG 1 -CTTAGGCTTTTGAGC 1 -CGTCATCCAGCAGAG 1 -TCCTCAGGTACTCCA 1 -TCTACTAGACACTTC 1 -TTCAATGAAAAATAT 1 -GCCTGTGGTCCCAGC 1 -CTCTCATTTTGGAAA 1 -TAGCTCATGAAATTA 1 -ATGAATCTGTGCTCT 1 -TAAAGCCAGTAACTG 1 -GTCCCAAAGGCGCGG 1 -CCCATCTGATATAAA 1 -TCCTGAATTGCTATG 1 -AGCTCAGATGCAAAG 1 -TAGTTTCCCTGCAGT 1 -ACTCCAGCGTGGGCA 1 -ATAAAACCTCAGCAG 1 -CCAGCACTTAGGGAG 1 -AGTCATAAAGCCAGT 1 -ATAAAGCTGCTTTGA 1 -TGCGGGAGCGCATGC 1 -AAAAATAACAATCTG 1 -GGTCTCTACAAAAAA 1 -CCTGGGAGTGGGGAA 1 -GGCCTGGAGGCTATC 1 -GGGCACGGTGGCTCG 1 -TCAGATGCAAAGAGC 1 -TTGAAAAAGTGGAGC 1 -TAAAAAATAACAATC 1 -CAATAACATGAGTAA 1 -TCTGCCTCTCACAGA 1 -GCTGAGATCTCACCA 1 -CACAAAATCTTGCCG 1 -TCCCAGGAATCACTT 1 -CAGCTCTTGTATGCA 1 -CCTTCCTCTCCCGCT 1 -AAAAAGGCATGTATA 1 -AATACATGCCATGCA 1 -TAACACTTCTTACCT 1 -AAAGTGGAGCATTCA 1 -ATTTAGTGTCAATCT 1 -AAGTAAAACTTAATG 1 -TCGGGTCCAACTCAA 1 -AAGGTTTGTCAGTCA 1 -GACTGGTCTTTCTAT 1 -GGTGTTTCTAGAGAG 1 -AGGATGGCTTGAGGT 1 -GCTGTGCTCGCGCTA 1 -AAGTGGAGCATTCAG 1 -ATCCCGTCTCTACTG 1 -CCCAGCTGCTCCGGT 1 -ATTCGTGCATTTTTT 1 -CTCCGTGGCCTTAGC 1 -CCAGGCACTCTACTA 1 -GTGTGAACCATGTGA 1 -CCTCTTTGGCTCTTT 1 -TGAACCAGTAGTTTC 1 -TGGATTGGTATCTGA 1 -TGGGGAGGAAACAGC 1 -CACCAAGTTAGCCCC 1 -GTAACATTTTAGCAG 1 -TGAGTATGCCTGCCG 1 -CAGCAGAAATAAAGA 1 -CACTCCAGCGTGGGC 1 -AAAGAAAGAGAAAAG 1 -AATAACAAAAATTAG 1 -ATGAGGGAAAGATAC 1 -TGTTTCTAGAGAGAT 1 -CTTTATGTGCTTTGC 1 -CATCCGACATTGAAG 1 -GCTCGTCCCAAAGGC 1 -CCGCTCTGGTCCTTC 1 -GATTGAAGTCAAGCC 1 -CTTTCGGTTTTGAAA 1 -GAACTACCCGTTATT 1 -AAGAAAGAAAGAAGT 1 -AGTGGCACCTGCTGA 1 -TTCTTAAACATCACG 1 -TTTGAAAACAGTTAT 1 -ACAGGTGACGGTCCC 1 -TTCCTCTGGCAAAAC 1 -GTCAGAGCGCCGAGG 1 -GTGATCATGTACCCT 1 -TATTTTATCGGGTCC 1 -AAACCTCAGCAGAAA 1 -CAATCTCCAGTGACA 1 -CTTAGGAACCTCAGA 1 -TATCTTGGGGCCAAA 1 -CCGTCTCTACTGAAA 1 -TGGCCAGAGTGGAAA 1 -TGAGCTTAGGCTTTT 1 -TAATTTGAATACAAT 1 -AAGAAAGAAGTGAAG 1 -ACTACCCGTTATTGA 1 -CTGGCAATATTAATG 1 -TTTTTGTTTTACTGT 1 -TTTTGTTGTTTGGTA 1 -GTTCAAGACCAGCCT 1 -ACTTGTCTTTCAGCA 1 -TCCCAGGCACTCTAC 1 -ATGGGGCTAGTAGCC 1 -AGAAAAAAAAAAAAA 1 -ACCCGGGACGCGCGC 1 -AAGCCTAACCAGGGC 1 -ACTGTCCTGAGGACT 1 -CCTGTGTGTGGGTTT 1 -GGGGGAGGCGTCGCC 1 -GCATCCTATAATCCT 1 -CTCGCTGTGCTCTCT 1 -ACAGCAGCCTATTCT 1 -TGGTACTCCTCCTTC 1 -GGTGGAAGCTCATTT 1 -AGATAATCCAAGATG 1 -CTCTGAAGGTCTATT 1 -GCTAGAAATCTGCTA 1 -ATTATTAGAAGCTCA 1 -TGCACTCCAGCGTGG 1 -GCCATAGATAACTAC 1 -GACAGCATTCGGGCC 1 -AGGACTATTTATAGA 1 -AGCAGAAATAAAGAG 1 -ATTACTCCATTTGAT 1 -TGACGGTCCCTGCGG 1 -AATCAGGTCTTCATT 1 -CCCTGAGGCATTTAA 1 -AAAAAGAAAGAGAAA 1 -TGTATATATCACCAC 1 -CTGATATTTAAAAAA 1 -AAAAAATCTTATTTT 1 -CAGCAGCAGCACTTG 1 -ATAATATTTTCTTCC 1 -TTAGCTGTGCTCGCG 1 -GCCTACGGCGACGGG 1 -TTTCTGGCCTGGAGG 1 -TACAAAGTCAGAGAG 1 -AGAAAGGCATAAACA 1 -GGGGCTATTATGAAC 1 -GGTATTTGCCAGCTC 1 -CGTCAGAGCGCCGAG 1 -CTTTCTGTGTGCCAA 1 -ATGCTTGGCTGTGAT 1 -AATGCTTGGCTGTGA 1 -AGACTCTAAGAAAAG 1 -TTAAAGATCAGATTA 1 -GGAGCAGCAGCAGCA 1 -GAGGTTTGTGAACGC 1 -CAGCTGCTCCGGTGG 1 -TTGCCCCTTTCGGCG 1 -TATTAATGTGTCTTT 1 -TAATTTTTCAGTTAA 1 -GACACCAAGTTAGCC 1 -TATTAGAAGCTCAGA 1 -TGAATTGCTATGTGT 1 -AAAAATCAGGTCTTC 1 -TGGTCTTTTCGTACA 1 -CCGTGGGGCTAGTCC 1 -ATATTCCCAACCTGG 1 -TGCTCTGATCCCTGA 1 -TGTGGGTTTTGTTTT 1 -GCCTTCTGCGTGAGA 1 -CTCTAAACAATGTAT 1 -AAGGGGTGGAAACAG 1 -GAGAAAATGTTTTTG 1 -ATGGTCTAGTCATGC 1 -TGCCTCTCATTTTGG 1 -GTGCTTTGCGTCATT 1 -ATGATGATTGCCTCT 1 -TAATATAAGTGGAGG 1 -AAGATTCAGGTTTAC 1 -TTTCTTTGTTTTTTA 1 -ATAAAGATAATCCAA 1 -GCATTTAATATGTTC 1 -CTTAAACATCACGAG 1 -AAGTATGTTCTGTCC 1 -AGAAAGAAGTGAAGG 1 -AAAGGATAATGTATA 1 -TTCCCATCACATGTC 1 -TGAACCCAGGAGGCG 1 -TTTCCTCTGGCAAAA 1 -CCTTTGGCCTACGGC 1 -CAAGCCTAACCAGGG 1 -GGGCCCTGTGGTCTT 1 -TTTAATTTGAATACA 1 -TAGGAACCTCAGATA 1 -AGGTATAGGCCTTTG 1 -TGCTATGTGTCTGGG 1 -GAGAGATATATCTGG 1 -CGGGACGCGCGCTAC 1 -TCCCACTTCCCCATG 1 -TCGCTGGCTTGGAGA 1 -TTGCGGGAGCGCATG 1 -CACGTCATCCAGCAG 1 -TGCGGGCCTTGTCCT 1 -TGCTTTGATATAAAA 1 -ATTAGCCGGGTGTGG 1 -AACATTTTAGCAGGG 1 -CCTGCCGTGTGAACC 1 -ATTTGCTGGTTATGT 1 -CTGAAAAAGATGAGT 1 -GCATTTTTTTTTAAG 1 -AAAAGAAAGGCATAA 1 -ATTCGGGCCGAGATG 1 -TTAATTTTTCAGTTA 1 -CGGTTTATTCTTCAA 1 -GCAGCTTCAGGTATA 1 -CCATTAATAAAGATA 1 -TCATTTGGCCAGAGT 1 -CCCTCCCGCTCTGGT 1 -TGGGCACGGTGGCTC 1 -TTGGGGTCTGGGGGA 1 -ATAAATGGCAGCAAT 1 -GTGCGCACCCGGGAC 1 -GGATGCTAATTAAAA 1 -TCGTGCCTGTAATCC 1 -ACTGTCTTTTTCATA 1 -TGTTAGATGTCATTT 1 -TATCTGAGGCTAGTA 1 -TGTTTCTGAAACATT 1 -GTCATTTTAAAAAAT 1 -CAGAAGATACTGCTA 1 -GAGCAATTGCTATGT 1 -TTGGAAGCTTAAATA 1 -ACCTTGGGTTGGTTG 1 -AACTACTATGGTTAT 1 -GCTCTTTCGCGGGGC 1 -CGCTGGCTTGGAGAC 1 -CTCCAGCGTGGGCAA 1 -TCCGTGGCCTTAGCT 1 -TAAGAACTACCCGTT 1 -CTGCACTCCAGCCTG 1 -AACTTAATGTCTTCC 1 -TTTTCATAGATCGAG 1 -TGTCCTCTTCTTAGA 1 -AGCCAGTAACTGGTT 1 -GCGGGGAGCAGGGGA 1 -TCAAGGCAGGAGCAG 1 -TCATTTAATTTTGAA 1 -TCCTTCTCCCCACAG 1 -AAAAGATGAGTATGC 1 -TTGGGAGTTCAAGAC 1 -TCCCTGAGGCATTTA 1 -CTATCTCTTGTACTA 1 -CACATGTCACTTTTA 1 -GTAATTTGATGGGGG 1 -CCTTAATGATAGGGT 1 -CTGGAGTGGGGGACG 1 -GTCTTCATTAGATTC 1 -GCCTAACCAGGGCTT 1 -CAGCCTGGCCAACAT 2 -TGTCACATGCATTAC 1 -CTCTTTCGCGGGGCC 1 -ATAACAAAAATTAGC 1 -CAACTCAACCATTTA 1 -GTTTATAATATTTTC 1 -TAGATCGAGACATGT 1 -GAAGCTGACAGCATT 1 -ACTTCTCCAGTACTT 1 -GCGCACCCGGGACGC 1 -TCACGAGACTCTAAG 1 -TTAGCACTGAACGAA 1 -CTTTCTATCTCTTGT 1 -AATAAAAAAATAAAG 1 -GATGGGGGCTATTAT 1 -AAGATACCACAAGAA 1 -CCTGAGGCATTTAAT 1 -CATAAAGCTGCTTTG 1 -TTTTGGAAAGTACTG 1 -CCTAACCAGGGCTTT 1 -AGGAACCTCAGATAA 1 -AGTTTATAATATTTT 1 -TGGCAAAACATGATC 1 -CTCTACAAAAAATAA 1 -TCGCGCTGGCGGGCA 1 -TTATTGACATCTTTC 1 -CCTGTAATCCCAGCA 1 -AGGTCTATGGCCATA 1 -TACTCCAAAGATTCA 1 -GCAGTGAGCTGAGAT 1 -GCAGGAACCAAAAAG 1 -GATGACCAAATGTAA 1 -CTGTCTGATACTTGT 1 -AGCACTTGCACAAAT 1 -GGAGGGTCGGGACAA 1 -TGAAACATTAGGCAA 1 -TTTTTTTCTCCACTG 1 -TTTTTCAGTTAAGTT 1 -ATCTGCATATTGGGA 1 -CCTGGTCTCTACAAA 1 -AAGATGAGTATGCCT 1 -TGGGTTGATCCACTT 1 -AACAGAGAGTTCCAG 1 -CTAAGGCACCGAGAT 1 -GGAGGCGGAGGTTGC 1 -TATATTTAGTAGCAC 1 -AGTGAAGGTTTGTCA 1 -AGACCTTTGGCCTAC 1 -ATTAGAAGCTCAGAT 1 -TCTCTTCCGCTCTTT 1 -GGGTTGATCCACTTA 1 -TCTTAGCTTTTAATG 1 -AGCCTCCAGAGAAAG 1 -CTCATTTTGGAAAGT 1 -TAGTCATATCATAAA 1 -GAGAGGGGTCTGGCA 1 -GGGAGCAGCAGCAGC 1 -TGTCTCGCTCCGTGG 1 -CTGCCGTGTGAACCA 1 -TTCTCCAGAGCAAAC 1 -GGCTTGAGGTCCGTA 1 -TTTTCTCCACTGTCT 1 -TCTTGAGCTTAGGCT 1 -ACATGATCGAAAGCA 1 -GAAAGGCTCTTAAAA 1 -TTCTAGAGAGATATA 1 -ATGTCATTTTAAAAA 1 -CGCCTGCCTTCTGCG 1 -CGGCTCTGCTTCCCT 1 -AGTACAATAACATGA 1 -CCAACCTGGCTAGTT 1 -AAGATAGTTAAGTGG 1 -AGCCCAAGATAGTTA 1 -ACGAACATCTCAAGA 1 -GGCCTCTGGCTCCCC 1 -GTGTCAATCTGTATA 1 -TATTTCTAACCATTT 1 -CTGTCTTTTTCATAG 1 -AGATGTCTCGCTCCG 1 -CATGGGCCCTGTGGT 1 -CCCAAAGGCGCGGCG 1 -CCACTGTCTTTTTCA 1 -AGCGCAGCTGGAGTG 1 -CTTGTCCTCTTCTTA 1 -ACTTGCACAAATACA 1 -TCGAATAATTAACTT 1 -AAGTCATAAAGCCAG 1 -CAGCATTCGGGCCGA 1 -GACTTTGTCACAGCC 1 -GGCCAAGGCAGGCTG 1 -GAGAGCTGTGGACTT 1 -GTTCTGTCCCATTTG 1 -TCACGGTTTATTCTT 1 -ATAATCCTGGACTTC 1 -GAAAGAAAGAAGTGA 1 -AAGAGCTCTCTTAGC 1 -TGGCGCCAGTGCACT 1 -TCCGACATTGAAGTT 1 -GAAGCGGCGGGGTGG 1 -AATGTGTCTTTTCCC 1 -TCCCGCTCTGGTCCT 1 -AAATTAATTTTTCAG 1 -TAAAGAACATACCTT 1 -GAGGGGGTTGGGCGT 1 -AATTCAAACCCAGCC 1 -GCTCTAAACAATGTA 1 -GGTCAAGGTGGCCTG 1 -ATTTTATCGGGTCCA 1 -GGGTAGCTCTAAACA 1 -AGTAGCACTAACACT 1 -CCTCTGAAGGTCTAT 1 -TTTAGAAAATCAGAT 1 -GGTTTCTGAACCAGT 1 -TGGCTTGAGGTCCGT 1 -AGTCAGGGGAGCTGT 1 -GGTCCTTCCTCTCCC 1 -TAGCTTACGCCTGTA 1 -CACAGCCCAAGATAG 1 -TTTGTAGAATGCTTG 1 -TCACATGCATTACTC 1 -TTCTGTGTGCCAAGG 1 -CACTGAAAAAGATGA 1 -CTGAGGCGGGAGGAT 1 -AAAACGCCTGCCTTC 1 -TGTAATCCCAGCACT 1 -TAGCCGGGTGTGGTG 1 -GCCTGTAATCCCAGC 1 -TTAGAAAAGATTACA 1 -TTAATATAAGTGGAG 1 -ACCCTGAATATAAGT 1 -TGGTGAAGCCTGGTC 1 -TCTGGAAGATACCAC 1 -TAGTAGCCTTTCCTT 1 -TCTACAAAAAATAAT 1 -CAGTCAGGGGAGCTG 1 -AGGATCTCTTGAGCT 1 -GCATGTCCTAGCACC 1 -TAACCATTTTAGACA 1 -ACTTGCCCCTTTCGG 1 -AGGAGGCGGAGGTTG 1 -GGTTTTGAAAACATG 1 -ATAATCCAAGATGGT 1 -GAAATCTGCTAGAAA 1 -GGGAGCAGGGGAGAC 1 -CAAGACCAGCCTGGC 1 -GTATATTTAGTAGCA 1 -TTTATCGGGTCCAAC 1 -CTGAGGACTATTTAT 1 -CCTGATATAGCTTGA 1 -GGCGGGGTGGCCTGG 1 -GACCAAATGTAAACA 1 -CATTTTAGACATTTG 1 -AGTACTTTCTGGCTG 1 -TATTTTCTTCCCACT 1 -GTAATTCGTGCATTT 1 -ATCCCTGTTGTATTT 1 -GATAATGTATATATC 1 -AAGAACATACCTTGG 2 -GGGCGCTTGGGGTCT 1 -TTGGCTGTAATTCGT 1 -CAGCTTCAGGTATAT 1 -CTGTGGTCTTTTCGT 1 -CTTTGTTTTTTAGCT 1 -CATTGAAGTTGACTT 1 -CTAACATGATAACCC 1 -TGATGCTCTCACAAA 1 -TAATTAACTTATTTG 1 -TGGTGGCTCGTGCCT 1 -ATCTTCTGCCTCTCA 1 -TATTTTAAAAGTAAA 1 -TGGAGTGGGGGACGG 1 -ATAGAGGAATTATGA 1 -TCTCTCGCTCCGTGA 1 -TGATCCACTTAGGAA 1 -ATGCATTACTCCATT 1 -GGTCCGTAGTTGAGA 1 -CCAGCCTGGGCAATG 1 -CCAAGTTCTCCTTGG 1 -CGCGTGGAGGGGCGC 1 -TTGGGGGAGGGTTTC 1 -ATTTAGGTTTTGTTT 1 -ATGTCCTAGCACCTC 1 -TGCTTTGCGTCATTT 1 -GGCGCGGCGCTGAGG 1 -AATGGAAAGTATGTT 1 -CATGGAGGTAAGTTT 1 -AACAGCAGCCTATTC 1 -CAAAAAAGAAAAAAA 1 -GAGGGCATCAGAAGT 1 -CTATCATGGCGCCAG 1 -TTGACCTTGAGAAAA 1 -GTATTTTATCGGGTC 1 -GCCTTATTTCTAACC 1 -TGAGCACTTTCTAAG 1 -CGCTCCGTGACTTCC 1 -GTTTTTGTTTCACTG 1 -GAAGTTGACTTACTG 1 -GGCATTTAATATGTT 1 -TTGAAGTTGACTTAC 1 -CGTGACTTCCCTTCT 1 -CTATGGCCATACTAC 1 -ATTCACCCCCACTGA 1 -TTTTGTTTCTTTGTT 1 -TGATATAAACAATCT 1 -GTATATTTAGCACTG 1 -TTATCTTCCGCCATA 1 -CTGCCTCTCACAGAT 1 -AGAAATCTGCTAGAA 1 -CTTCGTCTAGGCGCC 1 -CTCTCCAAAAGTCAT 1 -GCCAACATGGTGAAG 1 -ACAGCATTCGGGCCG 1 -GATATAGCTTGACAC 1 -CAGCACTTAGGGAGG 1 -CTTGCCCCTTTCGGC 1 -AATGAGATTCCATCC 1 -TAAGAACATACCTTG 1 -TCGATAAGCGTCAGA 1 -TGCCAGCTCTTGTAT 1 -AGCCTAACCAGGGCT 1 -TCAGAAGTCCTTGAG 1 -GAATGGAGAGAGAAT 1 -AAATTATAAGAACTA 1 -TTTAAAAGTAAAACT 1 -CCTCTTGATGGGGCT 1 -AGGCTCTTAAAAATG 1 -TATGGCCATACTACC 1 -TCTCTTGAGCTTAGG 1 -CGCTTGGGGTCTGGG 1 -CGCTACTCTCTCTTT 1 -GTCCAACTCAACCAT 1 -GGCTGTGATACAAAG 1 -TGGTGGCCCGCCGTG 1 -ACATTTGTTAGTACA 1 -GGCGGCATGGGCCCT 1 -AATAAAGAGGTTTTG 1 -ATTCAGGTTTACTCA 1 -CTTCCGCCATAGATA 1 -GCACTTTCTAAGTAC 1 -TGGGCACGCGTTTAA 1 -ATATAAAACCTCAGC 1 -TCTCACAGATGAAGA 1 -GGCACGGTGGCTCGT 1 -TCCCATCTGATATAA 1 -GCCAGTGCACTCCAG 1 -CAGGAGGCGGAGGTT 1 -CACTGCACACTGCAC 1 -GAAACAGCACGCGAC 1 -AATGATAGGGTGTTT 1 -GCAGGCTGATCACTT 1 -GCTCTCACAAAATCT 1 -TTTAGACATTTGTTA 1 -GGATCTCTTGAGCTT 1 -TTATCGGGTCCAACT 1 -ACTAAGGCACCGAGA 1 -AGGGGTGGAAACAGA 1 -AGTTGACTTACTGAA 1 -CTTTGGCCTACGGCG 1 -GGTAAGTTTTTGACC 1 -GTAAAACCATTAATA 1 -GTAGCCTTTCCTTAA 1 -AATAACAATCTGATA 1 -GAAATGAACTTTGAA 1 -TGATGTGTTAAGGAA 1 -CAGGAATCACTTGAA 1 -GAGGTTGGGGGAGGG 1 -CAAAATGGAGGTGGC 1 -CCTCTCTCTAACCTG 1 -CATTTTGGAAAGTAC 1 -CATTTAATTTTGAAA 1 -GAAACTAAGGCACCG 1 -CCCATCACATGTCAC 1 -GTGAGCTGAGATCTC 1 -TCTGTCCCATTTGCC 1 -GTTTGTAAGTCCTGC 1 -GGCATATAAAACCTC 1 -GCCTGGGAGTGGGGA 1 -GTAGCACTAACACTT 1 -CAATCTGTATATTTA 1 -CTGATATAAACAATC 1 -TGCCTCTGAAGGTCT 1 -TTTCATCCATCCGAC 1 -GGGAAAGAAGAATCC 1 -CTGTGGAGTGGCATG 1 -TTTTTAGCTCATGAA 1 -ATTACAGTGATGCTC 1 -ACCTTGGGTTGATCC 1 -GAGCAGGGAGCAGCA 1 -GACCTGTGTGTGGGT 1 -GTCCCAGGCACTCTA 1 -CTAACCAGGGCTTTT 1 -TTAACACTTCTTACC 1 -CAATTTATTTACAAC 1 -GTGGCCTGGTACTCC 1 -GTTTTGGAAGCTTAA 1 -TGTGGAGAACATTGA 1 -CGTCATTTAATTTTG 1 -TGTTCTTATTATTAG 1 -ATTTTAAAAGTAAAA 1 -AGCAATCGAGATTGA 1 -TGAGGGGGTTGGGCG 1 -TATATCTGGTCAAGG 1 -ACATAAAGGATAATG 1 -AGCCTTATTTCTAAC 1 -CCTGTCTGCTGCGGC 1 -CTGGATTGGTATCTG 1 -AACATGGTGAAATCC 1 -ATATTTAGTGTCAAT 1 -TTTAGCTCATGAAAT 1 -ATTTAATTTGAATAC 1 -TAAGTCCTGCTGTCC 1 -ATGGAGGTAAGTTTT 1 -CGCTGAGGTTTGTGA 1 -GGGGAGACCTTTGGC 1 -TTGTTCCTGCTGGGT 1 -GTTGGGAGTTCAAGA 1 -TGATCATGTACCCTG 1 -GATCTCTTGAGCTTA 1 -ATATCTGGTCAAGGT 1 -TCTGGTCAAGGTGGC 1 -CATGATCGAAAGCAG 1 -CCGCCGTGGGGCTAG 1 -ATGATAACCCTCACT 1 -CCCTTCTCCAAGTTC 1 -CAAACTGGGCGGCAT 1 -CCTTTGTTTGTAAGT 1 -ATGAGTCCCATCCCA 1 -GTCCCTGCGGGCCTT 1 -TTTGGAAGCTTAAAT 1 -CTGGCGGGCATTCCT 1 -CTTCATTAGATTCCC 1 -GTGCACTCCAGCGTG 1 -TAATGTATATATCAC 1 -TTATGAACTGAGAAA 1 -AATTTCCAAAGTAAT 1 -GGTTTGTCAGTCAGG 1 -TGGAATGAGATTCCA 1 -TGTGTGCCTGTAGTC 1 -GTCATCCAGCAGAGA 1 -AAAGTAATACATGCC 1 -GCACAAATACATATA 1 -GGCGGGGAGCAGGGG 1 -TCACTTGAACCCAGG 1 -GTCGCTGGCTTGGAG 1 -GAGCTGTGGACTTCG 1 -GCATTCCTGAAGCTG 1 -CAAAAAATAAAAAAA 1 -CGGTGGCTCGTGCCT 1 -GGGTAGGCTCGTCCC 1 -ATTCTTTTGTAAGCT 1 -CATGGTATTTTAAAA 1 -CGCGGCGCTGAGGTT 1 -CAAGATAGTTAAGTG 1 -CACACTGCACTCCAG 1 -ATTACACAGGGGATA 1 -GCTGGGCACGCGTTT 1 -CTTCAAAATGGAGGT 1 -CCTCTGGGTCTATGT 1 -TTAAAACGCAAATCC 1 -GCAATATTAATGTGT 1 -AATCCCAGCACTTAG 1 -TAAGAAAAACGCCTG 1 -GCACGGTGGCTCGTG 1 -CCCCATGGATGGTCT 1 -AGGACTTTATGTGCT 1 -TGGGTCTATGTGGGG 1 -GAAAGTACTGTTTCT 1 -ATCATAATGGAAAGT 1 -CCCAAATTCAAACCC 1 -AAACATAAGAAAAAA 1 -GTGGAAACAGAGTAC 1 -AGAAATCGATGACCA 1 -TATTTTCCTCCTCTG 1 -CAGCCCAAGATAGTT 1 -TTTGTTGTTTGGTAA 1 -TAGCACCTCTGGGTC 1 -TTTTAGACATTTGTT 1 -CTGACCTGTGTGTGG 1 -GGTTTCATCCATCCG 1 -TCACAAAATCTTGCC 1 -CTGATGCACAGCATG 1 -CTCCAGTACTTTCTG 1 -AGCCGGGTGTGGTGG 1 -CATGGTTTCTGAACC 1 -GGGGACGGGTAGGCT 1 -AAAAAGGTCTATGGC 1 -AGATAATAACATCTG 1 -GAATACAATTTATTT 1 -AAAAAAAACAAAAAA 1 -CTCGGGGAAGCGGCG 1 -CACTGCACTCCAGCC 1 -ACTGTTTCTGAAACA 1 -AATACATATACACTC 1 -ATGCCTCTCATTTTG 1 -AATGTATTCATGGGT 1 -AATTCACCCCCACTG 1 -CTTCCCTTAGACTGG 1 -TCTTATTTTGAAAAT 1 -TTTTCCTCCTCTGAC 1 -GGTGGAAACAGAGTA 1 -ATAGATAACTACTAT 1 -TCATGGAGGTAAGTT 1 -CTTCCCTTCTCCAAG 1 -CTATGGTTATCTTCT 1 -GTGAAGCCTGGTCTC 1 -CGCATGCCTTTTGGC 1 -ACAAATACATATACA 1 -AACTTATTTGTTCCC 1 -ATATCATAAAGCTGC 1 -ACTGTGCCTCTTACT 1 -TAACCAGGGCTTTTG 1 -AGTTATCTTCCGCCA 1 -CTCCCCACAGCCTCC 1 -TGGAAGCTCATTTGG 1 -GGAAACAGCACGCGA 1 -AATCCCGTCTCTACT 1 -ATGGAAGGGGTGGAA 1 -AGCTGCTCCGGTGGC 1 -ATGCCTTTTGGCTGT 1 -AGGAGTAGCTGCCTT 1 -TTCTTCAAAATGGAG 1 -ATCCTACAGGGTCAT 1 -CAATCGAGATTGAAG 1 -GAACCCAGGAGGCGG 1 -ATTGGGAGAAATCGA 1 -GGTCTATGTGGGGCC 1 -GTAATCCCAGCACTT 1 -GTTAGATGTCATTTT 1 -AAGTGAAATACCCTG 1 -AGGCATAAACATAAG 1 -AGCGCAATCTCCAGT 1 -AAATGGAGGTGGCTT 1 -TATCTGGTCAAGGTG 1 -CCTCTCACAGATGAA 1 -GTGACTTCCCTTCTC 1 -GAATAATTAACTTAT 1 -CAGCAGCCTATTCTG 1 -AAAGCGGTTTCGAAT 1 -CTCGTGCCTGTAATC 1 -CTGCCTTCTGCGTGA 1 -GGAAGGGCTTGTTCC 1 -CTTGGCTGTGATACA 1 -AACCCTCACTATGTG 1 -CAGAGCGCCGAGGTT 1 -TGAAGGTCTATTTTC 1 -ATGCTCTCACAAAAT 1 -AAGTTTAGGGCGTCG 1 -CCACAGCCTCCCAGA 1 -CCCAGGCACTCTACT 1 -GATCACTTGAAGTTG 1 -TCCCCAATCCACCTC 1 -ATCTTTCTGTGTGCC 1 -ACAAAAATTAGCCGG 1 -CGGGCCTTGTCCTGA 1 -TTAGAAGCTCAGATG 1 -TGCTCGCGCTACTCT 1 -AACAATGTATTCATG 1 -AGACTGTTGAGGACG 1 -TGTGGTCTTTTCGTA 1 -GCGGCGCTGAGGTTT 1 -TTTGTAAGTCCTGCT 1 -GCTTTTGTGGCAGCT 1 -TCCAAAGTAATACAT 1 -TGCCAAGGACTTTAT 1 -TTTGAAAACATGAGG 1 -AATGAACTTTGAAAA 1 -CAAAGGCGCGGCGCT 1 -CACCATTACTGGTAT 1 -CTGGCTAGTTTACAG 1 -GTTTTGTTGTTTGGT 1 -GAAAATTTCCAAAGT 1 -TGGGCAATGGAATGA 1 -CCTCAGATAATAACA 1 -TGATAACCCTCACTA 1 -GAGGTTTTGTTGTTT 1 -ATAAAAAAATAAAAA 1 -GAATATAAGTGTATT 1 -TTGCCTGGTTGTTTC 1 -CCCTGAATGAGTCCC 1 -TCGCATGTCCTAGCA 1 -TTGGGCACGGTGGCT 1 -ATACAAAAATTAACC 1 -CCACTGCACACTGCA 1 -TGAAAATACAAAAAT 1 -TAAGAAAAGGAAACT 1 -CTACCCGTTATTGAC 1 -AATTGGGAGAAATCG 1 -CTGTGGCCCTCGCTG 1 -ACCGAGATTTTAAGA 1 -GGCGTGGTAGCTTAC 1 -CGGCATGGGCCCTGT 1 -TGGCTGGATTGGTAT 1 -ACTGCACACTGCACT 1 -AGCTGTAAAACCATT 1 -CAATGAAAAATATAG 1 -GAATTATGAGGGAAA 1 -GGCCCGCCGTGGGGC 1 -TGCCTTTTGGCTGTA 1 -CCATTTCTGGAAGAT 1 -TAAATGGCAGCAATC 1 -CTAAACAATGTATTC 1 -AAGAAACTTAATTAC 1 -TTTGTGGCAGCTTCA 1 -CACTTAGGAACCTCA 1 -CTCACGTCATCCAGC 1 -TGAAGAATGGAGAGA 1 -CCGTAGTTGAGACCA 1 -GGCATCAGAAGTCCT 1 -GTTCTTAAAGATCAG 1 -CTGAAGCTGACAGCA 1 -GAGCAGGAACCAAAA 1 -ATAAGCGTCAGAGCG 1 -CTTAATGTCTTCCTT 1 -TAAACATCACGAGAC 1 -ACCTGGCACTGCGTC 1 -CGCCTGTAATCCCAG 1 -TGTTCCCTTCTCCTG 1 -GCACCCGGGACGCGC 1 -AAAAGGTCTATGGCC 1 -AGATGTACTGTGCCT 1 -TTGAGAGCCTCCAGA 1 -TTAACCAGGCATGGT 1 -ATATAGAGTTTATAA 1 -TGGTCTCTACAAAAA 1 -GGCCGAGATGTCTCG 1 -GGAGGCGTCGCGCTG 1 -GAATGAGTCCCATCC 1 -TGAGGCATTTAATAT 1 -CCCTCACTATGTGGA 1 -GAGTTTATAATATTT 1 -AGGGTTTCTCTTCCG 1 -GGGTGCGCACCCGGG 1 -ATGTGACTTTGTCAC 1 -CAAGGCAGGAGCAGG 1 -GTATAGAGCAATTGC 1 -TGGCTGTAATTCGTG 1 -TAACCTGGCACTGCG 1 -CACCTATCCCTGTTG 1 -AAAGAAAGAAGTGAA 1 -GCGCTTGGGGTCTGG 1 -TACTGAAAATACAAA 1 -GCCTGTAGTCCCAGG 1 -CATCCTATAATCCTG 1 -TATGTGGAGAACATT 1 -TCCCTGCAGTTGAGC 1 -TGTGAACGCGTGGAG 1 -GGTCCCAGCTGCTCC 1 -TCTCATTTTGGAAAG 1 -GAATGTTCTTAAAGA 1 -TTGGGAGGCCAAGGC 1 -GAGTGGAAATGGAAT 1 -GGTTATCTTCTGCCT 1 -ACGAGACTCTAAGAA 1 -CACTGTCCTGAGGAC 1 -GGTTGGGCACGGTGG 1 -TAGTTGAGACCAGCC 1 -GACTCTAAGAAAAGG 1 -GTCACATGCATTACT 1 -TGAGGGAAAGATACC 1 -TGTTTCACTGTCCTG 1 -CATTTCTGGAAGATA 1 -GGGAGTTCAAGACCA 1 -CTCTTCATTTTCAAT 1 -TTGTATTTTATCGGG 1 -TGATCATGAGAAAAT 1 -GAAAATGTTTTTGTT 1 -AAAGGAAACTGAAAA 1 -TGGTAAGAACATACC 1 -CCTGGTACTCCTCCT 1 -CGGGAGGATCTCTTG 1 -CCCGGGACGCGCGCT 1 -GGGTAAGTCTTACAT 1 -GGGAAGGTGGAAGCT 1 -CTTGGGGCCAAATCA 1 -AAGGTGGCCTGGTAC 1 -GCGCAATCTCCAGTG 1 -GAGGGGTCTGGCATA 1 -ACATTGAAGTTGACT 1 -TAAAAAAATAAAAAA 1 -AGAAAGGCTCTTAAA 1 -ACAGCACGCGACGTT 1 -GCCTCTCACAGATGA 1 -TGCCTCTCACAGATG 1 -TTCAGGTATATTTAG 1 -GGCCTTTGTTTGTAA 1 -TGCGTGAGATTCTCC 1 -TGTTTCTTTGTTTTT 1 -AATTAGGTACAAAGT 1 -GGGTCATGTTCCCTT 1 -CATAGACCATTTCTG 1 -AGATACCAAGTCACG 1 -CTGTGGTCCCAGCTG 1 -TTATAAGAACTACCC 1 -CCCACAGCCTCCCAG 1 -AGTTGTGTATGAGTA 1 -ATCTGCTAGAAAAAA 1 -AATTGCTATGTCCCA 1 -GGCTAGTAGCCTTTC 1 -CCTCTAGCTTTTGTG 1 -AAAACAGTTATCTTC 1 -CTGGCAAAACATGAT 1 -GAACGCGTGGAGGGG 1 -ACAAAAAATAATAAC 1 -GTCCGTAGTTGAGAC 1 -CATGTAATGATGATT 1 -CAACCTGGCTAGTTT 1 -CCTCACCTATCCCTG 1 -AACACTTCTTACCTA 1 -GCTGAGATACTGATG 1 -CCAGGCATGGTGGTG 1 -TGTCAGTCAGGGGAG 1 -CGCAATCTCCAGTGA 1 -CTCCATTTGATCATA 1 -CCCTGGCAATATTAA 1 -TGTGCTCTGATCCCT 1 -CATTAATAAAGATAA 1 -TCTTCCGCCATAGAT 1 -AAGGAGGAGTAGCTG 1 -GTAAGCTGCTGAAAG 1 -GCACTGAACGAACAT 1 -AATGGAAGGGGTGGA 1 -AGAGCGCCGAGGTTG 1 -AATCTGCTAGAAAAA 1 -ATGAAAAAAATCAGG 1 -ATTTTAAAAAATAAC 1 -GTGACGGTCCCTGCG 1 -TCTTCAAAATGGAGG 1 -GAGGTTGCAGTGAGC 1 -AATGTTTTGATCATG 1 -AGATCTTGAGCACTT 1 -GTACAAAGTCAGAGA 1 -TAATGTTATGAAAAA 1 -GATACCACAAGAAAC 1 -TGAAAATTTCCAAAG 1 -GAACAGCAGCCTATT 1 -TTCGTCTAGGCGCCC 1 -ATAACTCTCCAAAAG 1 -TGGTATTTGCTGGTT 1 -GCTTGGCTGTGATAC 1 -CTAACCATTTTAGAC 1 -GAAGCTCAGATGCAA 1 -CCAGTAACTGGTTGA 1 -TTTAATTTTGAAAAC 1 -TGTATGAGTAGTCAT 1 -TTAGATTCCCCAATC 1 -CTCTCTTTCTGGCCT 1 -CTACAGGGTCATGTT 1 -GTATAGGCCTTTGTT 1 -GGTTTTGTTGTTTGG 1 -GGTGACGGTCCCTGC 1 -GCTCGCGCTACTCTC 1 -ATCATGGCGCCAGTG 1 -GTTTACTCACGTCAT 1 -TGGTGTGTGCCTGTA 1 -CCAACATGGTGAAAT 1 -GTGCTCTGATCCCTG 1 -GCACCTGCTGAGATA 1 -TGCCATAGTCCTCAC 1 -CCCATCCCATCTGAT 1 -GCTCATGAAATTAGG 1 -CCTTTTAGTGATCAT 1 -TGGCTCGTGCCTGTG 1 -TGAGAAAATTGCATT 1 -GAGAACATTGACAGA 1 -GCCAAATCATGTAGA 1 -CCTGTGGATGCTAAT 1 -CTACACTGAATTCAC 1 -GTCCTGCTGTCCTAG 1 -GTATTTGCCAGCTCT 1 -TATGCATTTAGGTTT 1 -TGGCAGCTTCAGGTA 1 -TGAGGCTAGTAGGAA 1 -GAATTTTATACACAT 1 -GGCTGATCACTTGAA 1 -GGAGACCTTTGGCCT 1 -AAAAGGCATGTATAG 1 -ATAATGTATATATCA 1 -AATTATAAGAACTAC 1 -TCTAAGTACCTGGCA 1 -TGCACCCTCTGTGGC 1 -GAGCTGTAAAACCAT 1 -TCTGGCTGGATTGGT 1 -AAACGCAAATCCCAC 1 -TAAAGATAATCCAAG 1 -AAAATCTTGCCGCCT 1 -TAATATGTTCTTATT 1 -CCTGGAGGCTATCCA 1 -CCACTTCCCCATGGA 1 -CCCCCAGCGCAGCTG 1 -CATATACACTCTTAA 1 -CTCTAACATGATAAC 1 -ACTTTTAAAAAATTA 1 -ATAATAACAAAAATT 1 -TGCCTCTTACTTTCG 1 -TTCTTCCCACTTCCC 1 -GAGATACTGATGCAC 1 -ATAGAGTTTATAATA 1 -TTCAGCAAGGACTGG 1 -CTGAGGCTAGTAGGA 1 -GCATTACTCCATTTG 1 -ATCACGAGACTCTAA 1 -TTAGTACATGGTATT 1 -GGCTGAGGCGGGAGG 1 -CGGGACAAAGTTTAG 1 -CATGAAATTAGGTAC 1 -TGTCTCTCAAAAAAG 1 -AGCATGGTTTCTGAA 1 -CTGTCACATGCATTA 1 -AGTCATATCATAAAG 1 -TTGCTATGTCCCAGG 1 -AGAGTTTATAATATT 1 -TCAGTCAGGGGAGCT 1 -ATTAGGTACAAAGTC 1 -CTAGAAATCTGCTAG 1 -TCTTCATTTTCAATG 1 -AGATCAGATTAGTGG 1 -GCCTGGCCAACATGG 2 -AGATGAAGAAACTAA 1 -TGTAATTCGTGCATT 1 -GGCTCGTGCCTGTGG 1 -CATCCATCCGACATT 1 -GCCACACCGTGGGGA 1 -GCAGCAGCAGCACTT 1 -AGGCTGATCACTTGA 1 -ATGTTCTTATTATTA 1 -CCCGCTAAGTTCGCA 1 -CTCTTGTATGCATTT 1 -CTCTGGCTCCCCCAG 1 -ATGGTGAAATCCCGT 1 -ATCGGGTCCAACTCA 1 -GGCAGGAGCAGGAAC 1 -TCACAGCCCAAGATA 1 -TGTGCTTTGCGTCAT 1 -GCACGCGTTTAATAT 1 -CATGGTGAAATCCCG 1 -CAGACTTGTCTTTCA 1 -GAGGCGGGAGGATGG 1 -ATATGTTCTTATTAT 1 -GTGTAGATCAAGGCA 1 -ACTTTCGGTTTTGAA 1 -AATCACCTGTGGATG 1 -GTACTCCAAAGATTC 1 -TGCACTCCAGCCTGG 1 -GCATGGGCCCTGTGG 1 -AGCTTAGGCTTTTGA 1 -AACATTAGGCAATAT 1 -ACTTACTGAAGAATG 1 -TAGAGTTTATAATAT 1 -TCCTTAATGATAGGG 1 -TATTGACATCTTTCT 1 -GCTAGTAGGAAGGGC 1 -TTTCAATGAAAAATA 1 -GGGTGGCCTGGGAGT 1 -TCGGCGGGGAGCAGG 1 -CTCTTACTTTCGGTT 1 -CTTAGCTTTTAATGT 1 -TCTGGCCTGGAGGCT 1 -GCCTGTCTGATACTT 1 -CGGGTAGGCTCGTCC 1 -TTGGGTTGGTTGGGC 1 -CCTACCCTCCCGCTC 1 -GGCTTGGAGACAGGT 1 -AAGCTCAGATGCAAA 1 -TTCCCACTTCCCCAT 1 -AGAGAGTTCCAGGCA 1 -TGTTCCTGCTGGGTA 1 -TGGCATGAAGAAGGT 1 -CTGCCTTTTAGTGAT 1 -GCGGGGTGGCCTGGG 1 -ACAAGGAGGAGTAGC 1 -AGCGTGAGTCTCTCC 1 -GTCCTAGCATCCTAT 1 -GAGGCCGAGGCGGGA 1 -CTCAGATAATAACAT 1 -AGGCGGGAGGATCTC 1 -TAAAAAAGGTCTATG 1 -ACCATTAATAAAGAT 1 -GCTGCTGAAAGTTGT 1 -AAAATAAAGAACATA 1 -ACTTTGGGAGGCCAA 1 -CGGCGACGGGAGGGT 1 -GTTAGTACATGGTAT 1 -ACTGGAGAGCTGTGG 1 -ATATAAGTGGAGGCG 1 -GTAAGTCTTACATTC 1 -TACCCTGAATGAGTC 1 -TCTCTTTCTGGCCTG 1 -GGCCTTGTCCTGATT 1 -CGTTATTGACATCTT 1 -TGTCTTTTTCATAGA 1 -TATAAGAACTACCCG 1 -GACTTCGTCTAGGCG 1 -CTAGTAGGAAGGGCT 1 -CTCTTTCTGGCCTGG 1 -GGCAGCTTCAGGTAT 1 -ATCCTGGACTTCTCC 1 -TCTTTGTTTTTTAGC 1 -GCAGGAGCAGGAACC 1 -ATGTTCCCTTCTCCT 1 -ACAGAGGGCTTCCTC 1 -GCTGGGTAGCTCTAA 1 -TTTGTTAGTACATGG 1 -GGAGAGCTGTGGACT 1 -GTTTTGAAAACATGA 1 -TGGGAGTTCAAGACC 1 -ACTGCGTCGCTGGCT 1 -CACCTGCTGAGATAC 1 -GCGGCTCTGCTTCCC 1 -AACCCAGCCTGTCTG 1 -AAAAGAAAGAAAGAA 1 -CTGCTGAGATACTGA 1 -CAATATATTCCCAAC 1 -CTTCTGCCTCTCACA 1 -TATAAGTGGAGGCGT 1 -TATACACATATATTT 1 -GGGCTAGTAGCCTTT 1 -GAGCAGGGGAGACCT 1 -CAGGGAGCAGCAGCA 1 -AACCTGGCACTGCGT 1 -TATGCCTGCCGTGTG 1 -CAGGGGATAAATGGC 1 -TTTTAAGAAAAACGC 1 -TGCTTCCCTTAGACT 1 -GAATCACTTGAACCC 1 -CCATAGTCCTCACCT 1 -CCGACATTGAAGTTG 1 -TAGTAGCACTAACAC 1 -TGCTATGAGTGCTGA 1 -TCCCATTTGCCATAG 1 -TCCCCATGGATGGTC 1 -TTAATGTGTCTTTTC 1 -AAAACATGAGGGGGT 1 -TTTGCCAGCTCTTGT 1 -AATACCCTGGCAATA 1 -GCGTCATTTAATTTT 1 -AGCTGCTTTGATATA 1 -CTTCCTTTTTTTTCT 1 -CTTTTTTTTCTCCAC 1 -CATATCATAAAGCTG 1 -AACATGAGTAATTTG 1 -TTTTGTTTTACTGTG 1 -TTCCAAGATGTACTG 1 -ATTCAAACCCAGCCT 1 -TTAGACATTTGTTAG 1 -GCTGGCTTGGAGACA 1 -TCTGCTTCCCTTAGA 1 -GCCTCCAGAGAAAGG 1 -CACCTCTGGGTCTAT 1 -GGCCGAGGCGGGAGG 1 -GAAATACCCTGGCAA 1 -GAAAAGAAAGAAAGA 1 -AGACAAGGAGGAGTA 1 -GAGTACAATAACATG 1 -TAAAACCATTAATAA 1 -TGTGTTAAGGAATGC 1 -TAGGCTCGTCCCAAA 1 -GCATATTGGGATTGT 1 -CACTTTCTAAGTACC 1 -TTCCTTAATGATAGG 1 -TCTTTCTGGCCTGGA 1 -AAGACCAGCCTGGCC 1 -GGCGGAGGTTGCAGT 1 -AAGCGGCGGGGTGGC 1 -AAGATACTGCTAGAA 1 -AGATTCAGGTTTACT 1 -TCATGTAGACTCTTG 1 -CTACTGGCTTCCTCT 1 -TTGGCTGTGATACAA 1 -CTGTGTGCCAAGGAC 1 -TAAGTTTTTGACCTT 1 -AAGTTGTGTATGAGT 1 -TTCTAAGTACCTGGC 1 -TCCCATCCCATCTGA 1 -CACTGTCACATGCAT 1 -TTAATGTCTTCCTTT 1 -CCCAGGAGGCGGAGG 1 -ACCAAGTCACGGTTT 1 -ATAAAGGATAATGTA 1 -TTGTTTTTTAGCTCA 1 -TTTCAGCAAGGACTG 1 -AAAAATATAGAGTTT 1 -GCACTCTACTAGACA 1 -CGGGGTGGCCTGGGA 1 -CCCAAGATAGTTAAG 1 -TTTTAGTGATCATGT 1 -GGAAGGGGGTGCGCA 1 -CTTACATTCTTTTGT 1 -GAATTGCTATGTGTC 1 -ATAAAGAACATACCT 1 -GTCTTACATTCTTTT 1 -GATTACAGTGATGCT 1 -CGAGATTGAAGTCAA 1 -CCCACTGAAAAAGAT 1 -CTCTCCCGCTCTGCA 1 -GTCTATGGCCATACT 1 -GCCCAAGATAGTTAA 1 -GGACTTTATGTGCTT 1 -CGTGAGATTCTCCAG 1 -GATCCCTGAGGCATT 1 -GGAGGTTGCAGTGAG 1 -AGAACATTGACAGAG 1 -GGGTGTGGTGGCTCG 1 -AAGGTGGAAGCTCAT 1 -TATTTGCTGGTTATG 1 -TACTACCCTGAATGA 1 -TGTCTGATACTTGTC 1 -TGAGATACTGATGCA 1 -GTTTTACTGTGGGCA 1 -TACTTGTCCTCTTCT 1 -GCTCCGTGGCCTTAG 1 -AGCCCCAAGTGAAAT 1 -AAATAATAACAAAAA 1 -TCATAGATCGAGACA 1 -TACTGTGGGCATAAA 1 -AGGGTCGGGACAAAG 1 -TAAGTTTTGGAAGCT 1 -ACTTCCCTTCTCCAA 1 -CTGTCCTGAGGACTA 1 -GTCTCTACAAAAAAT 1 -CAGAGGGCTTCCTCT 1 -TACATGCCATGCATA 1 -CATGTCACTTTTAAA 1 -TCAAAAAAGAAAAAA 1 -TGTCCCAGGCACTCT 1 -TTCCCTGCAGTTGAG 1 -AGAGCGAGACCCTGT 1 -TATCCCTGTTGTATT 1 -AGTCTCTCCTACCCT 1 -AACCTCAGATAATAA 1 -AAAATGCAGCGCAAT 1 -ACCCGTTATTGACAT 1 -TGCTGTCCTAGCATC 1 -GCGTCGATAAGCGTC 1 -AAACGCCTGCCTTCT 1 -GTGCCTGTGGTCCCA 1 -GTGGGCAACAGAGCG 1 -CAGAAGTCCTTGAGA 1 -AAGCTCATTTGGCCA 1 -AGGTAAGTTTTTGAC 1 -CGGGAAAGTCCCTCT 1 -GAGAGGGCATCAGAA 1 -AGGCGTCGCCCGGGT 1 -CTATGTGTCTGGGTT 1 -GTGTTAAGGAATGCT 1 -GTAAGCCTGTCTGCT 1 -CGGGCCGAGATGTCT 1 -GGAACCAAAAAGAAA 1 -TCGGGACAAAGTTTA 1 -TTCCTCTTTGGCTCT 1 -CCAGGGCTGGATCTC 1 -CAGTGCACTCCAGCG 1 -AGGTGGCCTGGTACT 1 -TCTCAAAAAAGAAAA 1 -CATCCAGCAGAGAAT 1 -AGGGGTCTGGCATAT 1 -TCTGAAGGTCTATTT 1 -CATGTATAGAGGAAT 1 -TCACCTATCCCTGTT 1 -GGTGGTGTGTGCCTG 1 -ACCAAATGTAAACAC 1 -TGAGTAGTCATATCA 1 -ATGGGTAGGAACAGC 1 -TAATTCGTGCATTTT 1 -TGGCCTACGGCGACG 1 -CTTTTAATGTTATGA 1 -CTGTAATCCCAACAC 1 -TAAAAAATTATAAGA 1 -GGAAAGTATGTTCTG 1 -TAGCTTTTAATGTTA 1 -TGACCTGTGTGTGGG 1 -CAGATGGGTGTAGAT 1 -AACTGGTTGAGCCCA 1 -CCTTCTCCTGTGGAG 1 -TGTACCCTGAATATA 1 -TTCGAATAATTAACT 1 -GCGAGACCCTGTCTC 1 -ATAAATTAATTTTTC 1 -AGAAAAGAAAGAAAG 1 -CCAGGAGGCGGAGGT 1 -GGGGGACGGGTAGGC 1 -ATACCTTGGGTTGAT 1 -CAGCGCAATCTCCAG 1 -TTTCTATCTCTTGTA 1 -GCAGCGCAATCTCCA 1 -GGGTGTAGATCAAGG 1 -TAATTTTGAAAACAG 1 -GTACTGTGCCTCTTA 1 -GTTTTGTTTTTGTTT 1 -AAGTGGAGGCGTCGC 1 -TAAATTAATTTTTCA 1 -GGGGTCTGGCATATA 1 -TTCATACAGTTTAGA 1 -CTTCTTAAACATCAC 1 -CTCCCGCTCTGCACC 1 -CTCCTCTGACCTGTG 1 -TATTTAGTAGCACTA 1 -ATCCAGCGTGAGTCT 1 -TCTCCAGTGACAGAA 1 -TTGTAGAATGCTTGG 1 -TGTATATTTAGTAGC 1 -AATGGAGGTGGCTTG 1 -TAACAAAAATTAGCC 1 -TGATCACTTGAAGTT 1 -CTTCTCCCCACAGCC 1 -TATGAAAAAAATCAG 1 -ATCAAGGCAGGAGCA 1 -AGCTGCCTTTTAGTG 1 -TCCACTGTCTTTTTC 1 -CATTAGGCAATATAT 1 -TGTAGAATGCTTGGC 1 -TTAGGTACAAAGTCA 1 -GGCAACAGAGCGAGA 1 -TAGGTACAAAGTCAG 1 -CCCTTAGACTGGAGA 1 -GATCATAATGGAAAG 1 -ATGGCGCCAGTGCAC 1 -TCCAGCGTGAGTCTC 1 -CTTACTTTCGGTTTT 1 -AACGGGAAAGTCCCT 1 -AAAATTAGCCGGGTG 1 -GCATGTATAGAGGAA 1 -AATATTTTCTTCCCA 1 -CCTGGCAATACACTA 1 -GATGCTAATTAAAAC 1 -GAGACCAGCCTGGCC 1 -TTTTAAAAAATAACA 1 -TCTTGGGGCCAAATC 1 -TTTTGTTTTTGTTTT 1 -TCTTGAGTGATGTGT 1 -TGCCTGTAATCCCAA 1 -ACAGCAATCACCTGT 1 -TCTCTACTGAAAATA 1 -GATTCAGGTTTACTC 1 -AAAGAAAGAAAGAAG 1 -GTGGCCTTAGCTGTG 1 -AACTAAGGCACCGAG 1 -TCTGGCTCCCCCAGC 1 -CAGCGCAGCTGGAGT 1 -ATACTGATGCACAGC 1 -AAATTGCATTTAATT 1 -AAAATTATAAGAACT 1 -GTCCTCTTCTTAGAA 1 -CTCTACTGAAAATAC 1 -TGGCTCGTGCCTGTA 1 -TGAAAAAGATGAGTA 1 -TTGACACCAAGTTAG 1 -AATACACTAAGCGCG 1 -ATGCATTTAGGTTTT 1 -CTGTGCCTCTTACTT 1 -GAAGGGGGTGCGCAC 1 -TACCAAGTCACGGTT 1 -GTCTGATACTTGTCC 1 -GAGTGGCATGAAGAA 1 -GGCAGCAATCGAGAT 1 -GCGGGAGGATCTCTT 1 -GCAGAAATAAAGAGG 1 -AGAGAAAAGAAAAGA 1 -AGTCCAGGGCTGGAT 1 -TAATTAAAACGCAAA 1 -TGGAAAGTCAAATTT 1 -TGCTAGAAATCTGCT 1 -CTGGGCACGCGTTTA 1 -TGCACAAATACATAT 1 -ACCATTTTAGACATT 1 -GGAACCTCAGATAAT 1 -ACTCCAAAGATTCAG 1 -TTTGGCCAGAGTGGA 1 -CAAAACATGATCGAA 1 -CATGAGTAATTTGAT 1 -GGTCGGGACAAAGTT 1 -TCTTTGGCTCTTTGC 1 -TTCATTAGATTCCCC 1 -GTTACCAAGACTGTT 1 -GCCCCTTTCGGCGGG 1 -GTGTTTCTAGAGAGA 1 -ACATGAGGGGGTTGG 1 -ATGGAAAGTCAAATT 1 -GCATTCAGACTTGTC 1 -CTCCAAGTTCTCCTT 1 -CCTGAGGACTATTTA 1 -GAAAAAGTGGAGCAT 1 -ATTAACCAGGCATGG 1 -AATTCGTGCATTTTT 1 -GGGAAAGATACCAAG 1 -TTGCACAAATACATA 1 -AGACATGTAAGCAGC 1 -TCAGGTATATTTAGC 1 -CACAGCCTCCCAGAC 1 -CTCCACTGTCTTTTT 1 -GCACCCTCTGTGGCC 1 -CTGAAAATACAAAAA 1 -ATGCCATGCATAGAC 1 -AAAGAATTTTATACA 1 -AGTCAAATTTCCTGA 1 -TTTTCCTCTGGCAAA 1 -TGTGCCTCTTACTTT 1 -GTTAAGTGGGGTAAG 1 -TTCAAGACCAGCCTG 1 -TGAGTCCCATCCCAT 1 -CTAGCTTTTGTGGCA 1 -AAAAAGAAAAAAAAA 1 -ACTCACGTCATCCAG 1 -CCTCTCATTTTGGAA 1 -CACCACTGCACACTG 1 -GGCTTCCTCTTTGGC 1 -GCTGGATCTCGGGGA 1 -CAGCAGCACTTGCAC 1 -GCGCTGGCGGGCATT 1 -CTTTTGCGGGAGCGC 1 -TGGTTGGGCACGGTG 1 -TCCAAGTTCTCCTTG 1 -AAAAAAATCAGGTCT 1 -CTTGTCCTGATTGGC 1 -ATGCTATGAGTGCTG 1 -ACATTCTTTTGTAAG 1 -ATTGCATTTAATTTG 1 -ACGCGACGTTTGTAG 1 -TGCTCTCACAAAATC 1 -AACTCTCCAAAAGTC 1 -GCGCTACTTGCCCCT 1 -AGATAACTACTATGG 1 -CTTCATTTTCAATGA 1 -CTATGTGGAGAACAT 1 -GATTAGTGGCACCTG 1 -GTGAGTCTCTCCTAC 1 -TTCATCCATCCGACA 1 -TAAGCCTGTCTGCTG 1 -CCAAGTGAAATACCC 1 -AGATAGTTAAGTGGG 1 -CCTCCAGAGAAAGGC 1 -TCCAACTCAACCATT 1 -ATATTTTCTTCCCAC 1 -TTACAGCAATCACCT 1 -GAAGGTCTATTTTCC 1 -AACAAAAAAGGCATG 1 -CTGCGTGAGATTCTC 1 -CCAACTCAACCATTT 1 -ACCTGGCTAGTTTAC 1 -AGCATTCGGGCCGAG 1 -GTTGCAGTGAGCTGA 1 -TCATCCATCCGACAT 1 -ACCACCATTACTGGT 1 -AGAGGTTTTGTTGTT 1 -CCTGCTGAGATACTG 1 -CTGTGGATGCTAATT 1 -GATATAAAAAAGGTC 1 -TGTGCTCGCGCTACT 1 -CACCCCCACTGAAAA 1 -GAGTTCAAGACCAGC 1 -GAGGAATTATGAGGG 1 -TGTACTGTGCCTCTT 1 -CTGCGGGCCTTGTCC 1 -GCGTTTAATATAAGT 1 -GGCCCTCGCTGTGCT 1 -TGCATAGACCATTTC 1 -ATGAGTAGTCATATC 1 -GGGCGTGGTAGCTTA 1 -ACAGAAGATACTGCT 1 -CATCACGAGACTCTA 1 -GATGGGGCTAGTAGC 1 -AGCGTGGGCAACAGA 1 -GCATGCCTTTTGGCT 1 -GCCTGGAGGCTATCC 1 -ATACACTAAGCGCGC 1 -AGCAGCCTATTCTGC 1 -CTCTGGTCCTTCCTC 1 -TAACATTTTAGCAGG 1 -ACGCAAATCCCACTG 1 -AGTGGGGAAGGGGGT 1 -CGGCGCTGAGGTTTG 1 -TGTTTTTGTTTCACT 1 -TTCCCCATGGATGGT 1 -GTCTATGTGGGGCCA 1 -CTGTGATACAAAGCG 1 -CTGCTGAAAGTTGTG 1 -GCACTCCAGCGTGGG 1 -ACTAAGCGCGCTCAC 1 -TATAAAAAAGGTCTA 1 -AAATCCCGTCTCTAC 1 -TTGCGTCATTTAATT 1 -TCTAACCATTTTAGA 1 -AAAAAAATAAAGAAC 1 -TTAGCCCCAAGTGAA 1 -AAGAAAAGAAAGAAA 1 -GGATTGTCAGGGAAT 1 -AATATTAATGTGTCT 1 -ATGTTTTTGTTTCAC 1 -TCTTTGCCTGGTTGT 1 -TGAAGTTGGGAGTTC 1 -CGGGAGGATGGCTTG 1 -AGGCCGAGGCGGGAG 1 -GGGTCTATGTGGGGC 1 -AGATTGAAGTCAAGC 1 -CCAGTGACAGAAGAT 1 -TAGAGCAATTGCTAT 1 -CATTAGATTCCCCAA 1 -ATACAATTTATTTAC 1 -GGGTTTTGTTTTTGT 1 -GACTCTTGAGTGATG 1 -GTATATATCACCACC 1 -CCAGCGTGGGCAACA 1 -ATTTAGTAGCACTAA 1 -ATCTTGCCGCCTTCC 1 -TGAAGAAACTAAGGC 1 -AAATGAACTTTGAAA 1 -TCATATCATAAAGCT 1 -GCCAGAGTGGAAATG 1 -GAAAGCAGAATGTTT 1 -AAAGAAAGGCATAAA 1 -CACCGAGATTTTAAG 1 -GTTAGCCCCAAGTGA 1 -GGCGGGAGGATGGCT 1 -ATGTCTCGCTCCGTG 1 -CAGATAATAACATCT 1 -TCAGACTTGTCTTTC 1 -AGTCAGAGAGGGGTC 1 -CAGCCTGGGCAATGG 1 -CTCCAGAGCAAACTG 1 -CCTTCCCTCAAACAG 1 -GCCAACATGGTGAAA 1 -GGAGGGTTTCTCTTC 1 -CAGAGTAACATTTTA 1 -TGACAGCATTCGGGC 1 -GCTCAGATGCAAAGA 1 -GTAAGCAGCATCATG 1 -TTTAGTAGCACTAAC 1 -TTGTCAGTCAGGGGA 1 -CCTGGTTGTTTCCAA 1 -AGCAGGGAGCAGCAG 1 -ATGGTGAAGCCTGGT 1 -GTCTTTCAGCAAGGA 1 -CTTCCTCTTTGGCTC 1 -TTAGCTCATGAAATT 1 -CCTTTTCCTCTGGCA 1 -CAGAGAATGGAAAGT 1 -GGTGAAGCCTGGTCT 1 -GAGGGTCGGGACAAA 1 -AAAGAGAAAAGAAAA 1 -ATTTGATCATAATGG 1 -CTTTTCCTCTGGCAA 1 -AAACTTAATGTCTTC 1 -CGAGACTCTAAGAAA 1 -GTTTTGTTTCTTTGT 1 -GAGACCCTGTCTCTC 1 -CAGGGTCATGTTCCC 1 -ATCTCCAGTGACAGA 1 -TAAGTACCTGGCAAT 1 -CTGGTTGTTTCCAAG 1 -TGGCTTGGAGACAGG 1 -CCCTCGCTGTGCTCT 1 -CATGAGGGGGTTGGG 1 -GAAAAGAAAAGAAAG 1 -GGTACTCCTCCTTCT 1 -TACTGCTAGAAATCT 1 -AGGCAGGATGAATCT 1 -TTTGTTTCACTGTCC 1 -AAGTGGGGTAAGTCT 1 -TTGTTTTACTGTGGG 1 -CAGACAAGGAGGAGT 1 -TCTTTCTATCTCTTG 1 -CCTCTTACTTTCGGT 1 -AAATAAAAAAATAAA 2 -CCCAGCCTGTCTGAT 1 -CAATCACCTGTGGAT 1 -ACTCCAGCCTGGGCA 1 -GGCAATATATTCCCA 1 -TCCTCTTTGGCTCTT 1 -ATGTTAGATGTCATT 1 -TCGCCCGGGTAAGCC 1 -GCAATCTCCAGTGAC 1 -AGGGGATAAATGGCA 1 -TGCCTGCCGTGTGAA 1 -GAATGCTTGGCTGTG 1 -ACTTAATGTCTTCCT 1 -CGCTCTTTCGCGGGG 1 -TGGTGCCTGATATAG 1 -TTTCTCTTCCGCTCT 1 -GTGGGGAAGGGGGTG 1 -GGGCAACAGAGCGAG 1 -CTAGAAAAAAAACAA 1 -AGGTATATTTAGCAC 1 -TTGAGCAGGGAGCAG 1 -CCCAAAAAATAAAAA 1 -CTCCAGCCTGGGCAA 1 -TGATGGGGCTAGTAG 1 -ACCAAGTTAGCCCCA 1 -TCCCTTCTCCTGTGG 1 -TTAGGGAGGCCGAGG 1 -ATACAAAGCGGTTTC 1 -TTTGGCTGTAATTCG 1 -TACCCGTTATTGACA 1 -AACAATCTGATATTT 1 -GGTAAGCCTGTCTGC 1 -ATATTTAGCACTGAA 1 -AACACTTCTCTTCAT 1 -TGAGAGCCTCCAGAG 1 -TTTGACCTTGAGAAA 1 -TTAGACTGGAGAGCT 1 -TGTTTTGATCATGAG 1 -TTCTGGCCTGGAGGC 1 -ACATCTCAAGAAGGT 1 -TCGAAAGCAGAATGT 1 -ACCCTCCCGCTCTGG 1 -TATGGTTATCTTCTG 1 -TGAGTAATTTGATGG 1 -AATCTGCATATTGGG 1 -CTACTTGCCCCTTTC 1 -TGGCCATACTACCCT 1 -AATAACATCTGCCAC 1 -TGGCTGAGGCGGGAG 1 -GGCGCCAGTGCACTC 1 -GGGGCCTCTGGCTCC 1 -TCAGATAATAACATC 1 -AAAAAAAAAAAAGAA 1 -CTCTCCTACCCTCCC 1 -AAATATAGAGTTTAT 1 -CGTATAGAGCAATTG 1 -TAGAAAAGATTACAG 1 -GACTTACTGAAGAAT 1 -CTTATTTCTAACCAT 1 -ATTACTGGTATTTGC 1 -GCTAAGTTCGCATGT 1 -AGAAAGAGAAAAGAA 1 -GGGGGTTGGGCGTGG 1 -ATTTTGAAAACAGTT 1 -AGCTCATTTGGCCAG 1 -GACAGCTCTAACATG 1 -ACAGTTATCTTCCGC 1 -GAAGGTATAGGCCTT 1 -AAAAAAAAAAAAAGA 1 -CTTAAAGATCAGATT 1 -TCCAGAGAAAGGCTC 1 -CTTGCACAAATACAT 1 -GTTTTTGTTTTACTG 1 -GCAGCATCATGGAGG 1 -AAGACTGTTGAGGAC 1 -TCCCTTCTCCAAGTT 1 -CAGTAGTTTCCCTGC 1 -CAACATAAAGGATAA 1 -ACTACTATGGTTATC 1 -GGGAGCGCATGCCTT 1 -CGGTCCCTGCGGGCC 1 -GTCTTTCTATCTCTT 1 -AAAGTAAAACTTAAT 1 -GCGTCGCTGGCTTGG 1 -AGTTGAGACCAGCCT 1 -AAATTTCCTGAATTG 1 -GTGGGGAGGAAACAG 1 -ATGTGGGGCCACACC 1 -GGGAAGGGGGTGCGC 1 -AACCAGGGCTTTTGC 1 -TAAAAAAAAATCTTA 1 -TAATTTGATGGGGGC 1 -ACTAGACACTTCATA 1 -CTGAGAGGGCATCAG 1 -GACGCCAGAGATCTT 1 -TGAACTGAGAAATGA 1 -CCTTTCGGCGGGGAG 1 -TGCGTCATTTAATTT 1 -GGAGGAGTAGCTGCC 1 -GGTCCAACTCAACCA 1 -ACTGGTATTTGCTGG 1 -ATTTTAGACATTTGT 1 -CACTTGCACAAATAC 1 -GCAGCCTATTCTGCC 1 -GAGATTTTAAGAAAC 1 -CTGGCCAACATGGTG 2 -CAGCACGCGACGTTT 1 -GAACTGAGAAATGAA 1 -TCTCCACTGTCTTTT 1 -AGAAACATGTAATGA 1 -AGGGTCATGTTCCCT 1 -TCTCAAGAAGGTATA 1 -GTATCTGAGGCTAGT 1 -AAACATGAGGGGGTT 1 -AAAGGCATAAACATA 1 -AGTGAAATACCCTGG 1 -CTCACTATGTGGAGA 1 -ATGTTATGAAAAAAA 1 -TTCTTACCTACTGGC 1 -TAAGTGGAGGCGTCG 1 -GCCTGGTACTCCTCC 1 -AAACGGGAAAGTCCC 1 -ATGATTGCCTCTGAA 1 -TACAAAAAATAATAA 1 -TCCTATAATCCTGGA 1 -AGCACTTAGGGAGGC 1 -AATCCTGGACTTCTC 1 -AGAAAATGTTTTTGT 1 -TTCTCTTCATTTTCA 1 -ACGGGAAAGTCCCTC 1 -AGGTCTTCATTAGAT 1 -CTATGAGTGCTGAGA 1 -ACCTATCCCTGTTGT 1 -GGCAAAACATGATCG 1 -GACCATTTCTGGAAG 1 -ATGTACCCTGAATAT 1 -ACCTCAGATAATAAC 1 -TGATAGGGTGTTTCT 1 -AGAGGAATTATGAGG 1 -ACCTCAGCAGAAATA 1 -AGAGGGCTTCCTCTT 1 -CACTTCATACAGTTT 1 -TTTAGTGTCAATCTG 1 -TCGCTCCGTGACTTC 1 -GGACTGGTCTTTCTA 1 -CCCAGGAATCACTTG 1 -CATTTAAGGTATTTG 1 -GATCTTGAGCACTTT 1 -CCAGCGTGAGTCTCT 1 -TCTGCTGCGGCTCTG 1 -TCTTATTATTAGAAG 1 -TCATAAAGCCAGTAA 1 -GAGTGGGGAAGGGGG 1 -TAATATTTTCTTCCC 1 -GTAAAACTTAATGTC 1 -ATGAGAAAATTGCAT 1 -TGGAATTGGGAGAAA 1 -TCCAGAGCAAACTGG 1 -TCCCAAAAAATAAAA 1 -CCCCACAGCCTCCCA 1 -TTGCCGCCTTCCCTC 1 -ATAGATCGAGACATG 1 -TTATGTGCTTTGCGT 1 -GAGGTCCGTAGTTGA 1 -GCTGTAAAACCATTA 1 -GAATGGAAAGTCAAA 1 -GCTCTCTTAGCTTTT 1 -CTGGCTCCCCCAGCG 1 -AGTTCGCATGTCCTA 1 -AGTACTGTTTCTGAA 1 -CAGCCTATTCTGCCA 1 -GATGTGTTAAGGAAT 1 -TGAGATTCCATCCCA 1 -AAGGTATAGGCCTTT 1 -ACGTCATCCAGCAGA 1 -AAGAATGGAGAGAGA 1 -ATGGCAGCAATCGAG 1 -GCTGTAATTCGTGCA 1 -CGAGATTTTAAGAAA 1 -CCTGTAATCCCAACA 1 -TGCTATGTCCCAGGC 1 -CTTTGTTTGTAAGTC 1 -TTCGGGCCGAGATGT 1 -AGTCCCAGGAATCAC 1 -TAGGGCGTCGATAAG 1 -GGTCTATTTTCCTCC 1 -GTTGTTTGGTAAGAA 1 -CTTCTCCTGTGGAGT 1 -CGAGACCCTGTCTCT 1 -GGCTCGTCCCAAAGG 1 -AAGTCCTTGAGAGCC 1 -TCATTTTCAATGAAA 1 -AGCAGCAGCAGCACT 1 -TTGGAGACAGGTGAC 1 -TTTAGGTTTTGTTTC 1 -TGACCAAATGTAAAC 1 -AAGGCTCTTAAAAAT 1 -ACGGTCCCTGCGGGC 1 -CTATCCCTGTTGTAT 1 -ACACTGCACTCCAGC 1 -GCCCTGTGGTCTTTT 1 -ATTTTGGAAAGTACT 1 -TGATTGGCTGGGCAC 1 -CAGGAGCAGGAACCA 1 -GGGAGACCTTTGGCC 1 -TGCCACGTATAGAGC 1 -AAAGCTGCTTTGATA 1 -GCTAGTAGCCTTTCC 1 -TGGCACTGCGTCGCT 1 -CATATTGGGATTGTC 1 -GAGGGTTTCTCTTCC 1 -TCAGGTTTACTCACG 1 -TTTCTAAGTACCTGG 1 -AACAGAGCGAGACCC 1 -GCCTGTAATCCCAAC 1 -CTGGATCTCGGGGAA 1 -GAAACAGAGTACAAT 1 -AGGAATTATGAGGGA 1 -TAGTGGCACCTGCTG 1 -CGCATGTCCTAGCAC 1 -TGGAGGCTATCCAGC 1 -TGTAAGTCCTGCTGT 1 -ACACATATATTTAGT 1 -CTCTTGTACTACACT 1 -CCTAGCACCTCTGGG 1 -AAGATAATCCAAGAT 1 -TGAGCTGAGATCTCA 1 -ACCCAGGAGGCGGAG 1 -TTTGATCATGAGAAA 1 -GCCTTCCCTCAAACA 1 -CCAGCCTTATTTCTA 1 -AATCCCAACACTTTG 1 -TAAAGATCAGATTAG 1 -TTATACACATATATT 1 -GCATGGTTTCTGAAC 1 -GAGTGGGGGACGGGT 1 -GGGGAGGAAACAGCA 1 -ATCCACTTAGGAACC 1 -GTTGGTTGGGCACGG 1 -TTATTTGTTCCCATC 1 -ATGATAGGGTGTTTC 1 -TGGTTTCTGAACCAG 1 -GTTGAGACCAGCCTG 1 -CAGCCTTATTTCTAA 1 -GCTTCAGGTATATTT 1 -TTAAAAAAAAATCTT 1 -TATTTAAAAAAAAAT 1 -GATCGAAAGCAGAAT 1 -GGGTGGAAACAGAGT 1 -GGGGTGGCCTGGGAG 1 -AAGATGTACTGTGCC 1 -GCTCTTGTATGCATT 1 -TTCAAACCCAGCCTG 1 -GAGCTCTCTTAGCTT 1 -TTCATTTTCAATGAA 1 -GACCTTTGGCCTACG 1 -AAGAGAAAAGAAAAG 1 -CCAAAGGCGCGGCGC 1 -AGAAAATTGCATTTA 1 -TGATACAAAGCGGTT 1 -TTGTCACAGCCCAAG 1 -GGTATTTTAAAAGTA 1 -CAGGTCTTCATTAGA 1 -ATAGTTAAGTGGGGT 1 -GAAAGAAGAATCCTA 1 -TTTTAAAAGTAAAAC 1 -CCCTGTTGTATTTTA 1 -AAGTCTTACATTCTT 1 -AAATCTTATTTTGAA 1 -CCTCTCCCGCTCTGC 1 -AGAGCAAACTGGGCG 1 -CTTTGCCTGGTTGTT 1 -TTGTCAGGGAATGTT 1 -CCTCAAACAGAGAGT 1 -CTGTGCTCTCTCGCT 1 -GATAGGGTGTTTCTA 1 -AGTTTTTGACCTTGA 1 -GGGCATCAGAAGTCC 1 -AAGCTGACAGCATTC 1 -AAAAAATGGAAGGGG 1 -CAGATGAAGAAACTA 1 -CTCTTGAGCTTAGGC 1 -ATGAAAAATATAGAG 1 -CTTCTTACCTACTGG 1 -AGAAAAGAAAAGAAA 1 -TGCCTGTAGTCCCAG 1 -GATGCAAAGAGCTCT 1 -AAAAAAAATGGAAGG 1 -TGAAAACATGAGGGG 1 -CCTTGAGAAAATGTT 1 -GGGAGGGTCGGGACA 1 -GTACTACACTGAATT 1 -ATTTTCCTCCTCTGA 1 -GTCCCATTTGCCATA 1 -AGGGAGGCCGAGGCG 1 -CTGATATAGCTTGAC 1 -AATTATGAGGGAAAG 1 -AACCATTTTAGACAT 1 -ACCGTGGGGAGGAAA 1 -TAGCAGGGAAAGAAG 1 -CCGTGGCCTTAGCTG 1 -AAAGTACTGTTTCTG 1 -ACTCAACCATTTAAG 1 -TTGAAAACATGAGGG 1 -TGAGATCTCACCACT 1 -ATCGAGATTGAAGTC 1 -ATGTGGAGAACATTG 1 -CTTACTGAAGAATGG 1 -AGAATGCTTGGCTGT 1 -GGTCTTTTCGTACAG 1 -AAAGATTCAGGTTTA 1 -GCTACTCTCTCTTTC 1 -TGAGGCGGGAGGATC 1 -AAAGAACATACCTTG 1 -TTAGGAACCTCAGAT 1 -TCTGCCAGCCTTATT 1 -TAATCCAAGATGGTT 1 -TCATTAGATTCCCCA 1 -CGCGCGCTACTTGCC 1 -CTCTTAACACTTCTT 1 -TTGACTTACTGAAGA 1 -GCGCTACTCTCTCTT 1 -CAGAATGTTTTGATC 1 -TTTTAATGTTATGAA 1 -CATGCATAGACCATT 1 -CTGAACGAACATCTC 1 -GAGCAGCAGCAGCAC 1 -GTCCTGATTGGCTGG 1 -TCTTACCTACTGGCT 1 -TCTCGCTCCGTGACT 1 -CCAAATGTAAACACT 1 -AGTCCCATCCCATCT 1 -GGCACCGAGATTTTA 1 -TATCACCACCATTAC 1 -CTGCTTTGATATAAA 1 -TCTCCAGTACTTTCT 1 -GTCCTTCCTCTCCCG 1 -CTGATTGGCTGGGCA 1 -TCTGTATATTTAGTA 1 -TGATTGCCTCTGAAG 1 -AGCCTGTCTGATACT 1 -AGAGTAACATTTTAG 1 -TCCCAGCTGCTCCGG 1 -TTGAACCCAGGAGGC 1 -TACTAGACACTTCAT 1 -GAGATATATCTGGTC 1 -CATCATGGAGGTAAG 1 -ATAACCCTCACTATG 1 -ATAACAATCTGATAT 1 -TATTCCTCAGGTACT 1 -CTTTGGGAGGCCAAG 1 -TGCTGCGGCTCTGCT 1 -CATCTGATATAAACA 1 -CTGTAATCCCAGCAC 1 -CTCTAACCTGGCACT 1 -AGCAGGGAAAGAAGA 1 -TAAGAAACTTAATTA 1 -AAAGCAGAATGTTTT 1 -AATTGAAAAAGTGGA 1 -AACCATTTAAGGTAT 1 -GCGGGCCTTGTCCTG 1 -CTCCGGTGGCTGAGG 1 -AGGGAGCAGCAGCAG 1 -GAGAAAATTGCATTT 1 -ACTTTCTGGCTGGAT 1 -GAGTAGCTGCCTTTT 1 -TTAAGAAAAACGCCT 1 -CGGTTTCGAATAATT 1 -TCCGGTGGCTGAGGC 1 -TGATGCACAGCATGG 1 -CTAACCTGGCACTGC 1 -TAACTCTCCAAAAGT 1 -GGCAATACACTAAGC 1 -CTTCAGGTATATTTA 1 -AAGTACTGTTTCTGA 1 -TGAGCCCAAATTCAA 1 -CTTGTCTTTCAGCAA 1 -TTTCCTGAATTGCTA 1 -TGGAGGTAAGTTTTT 1 -TATCATGGCGCCAGT 1 -GTATAGAGGAATTAT 1 -TAAAGGATAATGTAT 1 -AAAATCTTATTTTGA 1 -GGACGCGCGCTACTT 1 -CCTGGGCAATGGAAT 1 -GCTGAGGTTTGTGAA 1 -GATCTCGGGGAAGCG 1 -GGCTTCCTCTAGCTT 1 -GCCACGTATAGAGCA 1 -CAGGGCTGGATCTCG 1 -ACATCACGAGACTCT 1 -GGAAGGGGTGGAAAC 1 -GGCATGGTGGTGTGT 1 -ACTTCGTCTAGGCGC 1 -CTCTAAGAAAAGGAA 1 -AGGCTATCCAGCGTG 1 -TACTGTGCCTCTTAC 1 -GGCCAACATGGTGAA 2 -TGAGAAAATGTTTTT 1 -TTCGCATGTCCTAGC 1 -TACTGTTTCTGAAAC 1 -GTCCCAGGAATCACT 1 -GCTTTTAATGTTATG 1 -CAGCTGGAGTGGGGG 1 -AAGTGTATTTAAAAG 1 -TTTCGTACAGAGGGC 1 -GCCTTTGTTTGTAAG 1 -GTGCCTCTTACTTTC 1 -GATCATGTACCCTGA 1 -TCTTGAGCACTTTCT 1 -TTTCCCTGCAGTTGA 1 -ATACTGCTAGAAATC 1 -GTGAGATTCTCCAGA 1 -ATGTTCTGTCCCATT 1 -GCGCAGCTGGAGTGG 1 -TCCAGTGACAGAAGA 1 -GGTCTTCATTAGATT 1 -GTGGTGTGTGCCTGT 1 -GTTTCACTGTCCTGA 1 -TATAATCCTGGACTT 1 -TGTACTACACTGAAT 1 -AAAAAAATAAAAAAA 1 -TGTGGGCATAAATTA 1 -CGTCTAGGCGCCCGC 1 -AACCCAGGAGGCGGA 1 -GTCATAAAGCCAGTA 1 -GGCATGTATAGAGGA 1 -AAACTAAGGCACCGA 1 -AGACCAGCCTGGCCA 2 -ATGCACAGCATGGTT 1 -GCGGCGGGGTGGCCT 1 -TCTGAAACATTAGGC 1 -TGGTGAAATCCCGTC 1 -CCTGAATTGCTATGT 1 -AGTAGGAAGGGCTTG 1 -GGGAGGCCGAGGCGG 1 -CCAGAGAAAGGCTCT 1 -CAGTAACTGGTTGAG 1 -TCCCATCACATGTCA 1 -TTCCTTTTTTTTCTC 1 -GGAGTTCAAGACCAG 1 -GCTAGAAAAAAAACA 1 -CACAAGAAACATGTA 1 -TTTATGTGCTTTGCG 1 -TGCCCCTTTCGGCGG 1 -ACCAGTAGTTTCCCT 1 -AGTAGTCATATCATA 1 -GTCACTTTTAAAAAA 1 -CATTTAGGTTTTGTT 1 -AGCAGCATCATGGAG 1 -TTCAAAATGGAGGTG 1 -AGAATGTTTTGATCA 1 -ATCATGTACCCTGAA 1 -AATGTCTTCCTTTTT 1 -ACCTCTGGGTCTATG 1 -TTGATCATGAGAAAA 1 -GATGTCATTTTAAAA 1 -GGGGGCTATTATGAA 1 -ATGTATAGAGGAATT 1 -AGATCGAGACATGTA 1 -TCATGAGAAAATTGC 1 -CAGGATGAATCTGTG 1 -TAACATCTGCCACGT 1 -TTTGATGGGGGCTAT 1 -TCTCCAAAAGTCATA 1 -GAAGATACCACAAGA 1 -AGTGGCATGAAGAAG 1 -CTTTTCCCGATATTC 1 -TGGCTCTTTGCCTGG 1 -AAAACCTCAGCAGAA 1 -TCCGCTCTTTCGCGG 1 -GTGAACGCGTGGAGG 1 -TTGAGTGATGTGTTA 1 -TTTCTGTGTGCCAAG 1 -AAAAAAAAGAAAGAG 1 -TTTGTTTTACTGTGG 1 -CGACGGGAGGGTCGG 1 -AAGCGGTTTCGAATA 1 -TGTTTTTGTTTTACT 1 -TGGTTGAGCCCAAAT 1 -ATCACTTGAACCCAG 1 -CAGAGTACAATAACA 1 -GCCAGTAACTGGTTG 1 -GTTCCCTTCTCCTGT 1 -GTGGCCCGCCGTGGG 1 -TCGCTGTGCTCTCTC 1 -TGGGGTAAGTCTTAC 1 -GTTGAGCAGGGAGCA 1 -ATATTAATGTGTCTT 1 -CCCACTTCCCCATGG 1 -ACACCGTGGGGAGGA 1 -TAAACACTTGGTGCC 1 -TTTTTGACCTTGAGA 1 -TCTTTCAGCAAGGAC 1 -AAAAATGCAGCGCAA 1 -CTTCATACAGTTTAG 1 -CTCTTGAGTGATGTG 1 -GTTTATTCTTCAAAA 1 -GAGCGCATGCCTTTT 1 -TGGCAGCAATCGAGA 1 -CGTGAGTCTCTCCTA 1 -TGTAATGATGATTGC 1 -TAAAACCTCAGCAGA 1 -CAGCAGAGAATGGAA 1 -CCTCCCGCTCTGGTC 1 -GTGGAGCATTCAGAC 1 -ATCTGTATATTTAGT 1 -GAAAGGCATAAACAT 1 -TATAGCTTGACACCA 1 -CCATTTGATCATAAT 1 -AGTGGGGTAAGTCTT 1 -TATAGAGCAATTGCT 1 -TTTTTGTTTCACTGT 1 -AAATGTAAACACTTG 1 -CAGAGCGAGACCCTG 1 -GGTTGGGCGTGGTAG 1 -GAAAGAAGTGAAGGT 1 -TCCTCCTTCTCCCCA 1 -TTTTGTTTCACTGTC 1 -TGTTCTTAAAGATCA 1 -GTGTGGTGGCTCGTG 1 -TAGCTGTGCTCGCGC 1 -CAGAGAAAGGCTCTT 1 -TCCACCTCTTGATGG 1 -TATCTTCTGCCTCTC 1 -CAACCATTTAAGGTA 1 -GTTTCATCCATCCGA 1 -TGGAAGGGGTGGAAA 1 -GTGGAAATGGAATTG 1 -GTAGTTTCCCTGCAG 1 -ACTTAGGAACCTCAG 1 -CATAAGAAAAAAAAT 1 -CATAAAGCCAGTAAC 1 -CCAGTGCACTCCAGC 1 -GTAGTCCCAGGAATC 1 -GCCGAGATGTCTCGC 1 -TGGGGTCTGGGGGAG 1 -ACATACCTTGGGTTG 2 -TTTGGGAGGCCAAGG 1 -AATGCTATGAGTGCT 1 -TTCTGTCCCATTTGC 1 -TGAAGAAGGTGTATG 1 -TCTTTTCGTACAGAG 1 -CGTCGCCCGGGTAAG 1 -CTCACCTATCCCTGT 1 -AAACATTAGGCAATA 1 -TTATTTCTAACCATT 1 -TCCCTGCGGGCCTTG 1 -GTTGACTTACTGAAG 1 -AAGCAGCATCATGGA 1 -TTCCTCAGGTACTCC 1 -GAACCAAAAAGAAAG 1 -GGAACAGCAGCCTAT 1 -TCAAGCCTAACCAGG 1 -TGCATATTGGGATTG 1 -TGAACCATGTGACTT 1 -CAGTTGAGCAGGGAG 1 -GAAAGTATGTTCTGT 1 -GTTCCTGCTGGGTAG 1 -TGCTCTCTCGCTCCG 1 -CCTTCTGCGTGAGAT 1 -TCCAGCCTGGGCAAT 1 -TATATTTAGTGTCAA 1 -AGCTTTTGTGGCAGC 1 -GGTGGCCTGGGAGTG 1 -CTGGCACTGCGTCGC 1 -GAAGCTCATTTGGCC 1 -GAGGACGCCAGAGAT 1 -AGTTTAGAAAATCAG 1 -TGTTAAGGAATGCTA 1 -CTGGAGAGCTGTGGA 1 -TGAAAAGTATCTTGG 1 -CTCTCTCTTTCTGGC 1 -CTTGTTGGGAAGGTG 1 -CTGAATTGCTATGTG 1 -TAGAAAAAAAACAAA 1 -CGTGTGAACCATGTG 1 -AATTTTGAAAACAGT 1 -TGTGTGCCAAGGACT 1 -CTTTTGGCTGTAATT 1 -AAATAACAATCTGAT 1 -TTTGTTTTTGTTTTA 1 -CAAGAAACATGTAAT 1 -CCAAAGATTCAGGTT 1 -GGGGAAGGGGGTGCG 1 -ACATGGTGAAGCCTG 1 -CCTGGCAATATTAAT 1 -CCATTTTAGACATTT 1 -CAAATGTAAACACTT 1 -GCATAGACCATTTCT 1 -AAAAAAGAAAAAAAA 1 -GAGCAAACTGGGCGG 1 -GAGATTGAAGTCAAG 1 -CTGTAGTCCCAGGAA 1 -AAGAAAGGCATAAAC 1 -TCAAGGTGGCCTGGT 1 -GCTCTCTCGCTCCGT 1 -CAATGTATTCATGGG 1 -GAAACATGTAATGAT 1 -TCTAACCTGGCACTG 1 -TGAATATAAGTGTAT 1 -AGACCATTTCTGGAA 1 -AGTATGTTCTGTCCC 1 -CCTTGGGTTGGTTGG 1 -AGCGTCAGAGCGCCG 1 -GAGGTGGCTTGTTGG 1 -TACAATAACATGAGT 1 -GGGGATAAATGGCAG 1 -TGTGTGTGGGTTTTG 1 -GTGTGGGTTTTGTTT 1 -TGACTTACTGAAGAA 1 -AATCGAGATTGAAGT 1 -AGATTTTAAGAAACT 1 -GCGCTCACCTTTTCC 1 -CCATGTGACTTTGTC 1 -TGGGCAACAGAGCGA 1 -CTGGGGGAGGCGTCG 1 -AGTATCTTGGGGCCA 1 -CTGTATATTTAGTAG 1 -TAGTGTCAATCTGTA 1 -TCAGCAGAAATAAAG 1 -CCATTTGCCATAGTC 1 -TTAAAAAATTATAAG 1 -AGATTCCATCCCAAA 1 -AACACTTGGTGCCTG 1 -TCCTTCCTCTCCCGC 1 -TTACAGTGATGCTCT 1 -CCCGTCTCTACTGAA 1 -TGATATAGCTTGACA 1 -GACCAGCCTGGCCAA 2 -GAGTATGCCTGCCGT 1 -CCAAAAAGAAAGGCA 1 -TAGCTTTTGTGGCAG 1 -AGCTGCTGAAAGTTG 1 -TTTTAGCTCATGAAA 1 -TTATTCTTCAAAATG 1 -TGTCTGGGTTTCATC 1 -TAAACAATCTGCATA 1 -CACACCGTGGGGAGG 1 -TCAGGGGAGCTGTAA 1 -TCTTCCTTTTTTTTC 1 -GGGCCGAGATGTCTC 1 -GGTATCTGAGGCTAG 1 -CACCCGGGACGCGCG 1 -GCCATAGTCCTCACC 1 -TTGTGGCAGCTTCAG 1 -TGCTAGAAAAAAAAC 1 -GAAAAGATTACAGTG 1 -CCTCTGACCTGTGTG 1 -AGAGAAAGGCTCTTA 1 -AAAATTAACCAGGCA 1 -CAGTGATGCTCTCAC 1 -ATCCTATAATCCTGG 1 -AAAGAAAAGAAAGAA 1 -GTTAAGGAATGCTAT 1 -TTTATTCTTCAAAAT 1 -GAACTTTGAAAAGTA 1 -CCGTGGGGAGGAAAC 1 -GCTCGTGCCTGTAAT 1 -TGGCCTGGAGGCTAT 1 -ATACTACCCTGAATG 1 -CTTTGGCTCTTTGCC 1 -TGGATGCTAATTAAA 1 -CTGTCTGCTGCGGCT 1 -TTCTCCACTGTCTTT 1 -TACCTTGGGTTGATC 1 -AAGTAATACATGCCA 1 -CTCTGGCAAAACATG 1 -TTAATGATAGGGTGT 1 -GAGGCATTTAATATG 1 -AATCTGTATATTTAG 1 -GAATGTTTTGATCAT 1 -AATTTGATGGGGGCT 1 -CTTTGAAAAGTATCT 1 -GAAGATACTGCTAGA 1 -GAGATCTTGAGCACT 1 -GCCAAGGCAGGCTGA 1 -AAAAATCTTATTTTG 1 -CAAGATGGTTACCAA 1 -TTATAGACAGCTCTA 1 -CGGAGGTTGCAGTGA 1 -GCAGTTGAGCAGGGA 1 -TTTCGAATAATTAAC 1 -TAAGGCACCGAGATT 1 -CTCTCGCTCCGTGAC 1 -TATAAACAATCTGCA 1 -TCACAGATGAAGAAA 1 -CAGCAAGGACTGGTC 1 -GGTTGGGGGAGGGTT 1 -TCATCCAGCAGAGAA 1 -TGTGGGGCCACACCG 1 -ATTCTTCAAAATGGA 1 -AGGTTGGGGGAGGGT 1 -TCAATGAAAAATATA 1 -TTTTCAGTTAAGTTT 1 -ATGCAAAGAGCTCTC 1 -CTGGGTAGCTCTAAA 1 -GTCATATCATAAAGC 1 -GATACTTGTCCTCTT 1 -AAAAGTAAAACTTAA 1 -GGGTTTCTCTTCCGC 1 -GCGCCAGTGCACTCC 1 -AAGGGCTTGTTCCTG 1 -ATTATGAACTGAGAA 1 -AGAGATATATCTGGT 1 -CGGGTAAGCCTGTCT 1 -TTATGTTAGATGTCA 1 -CCAGACAAGGAGGAG 1 -GGGACGCGCGCTACT 1 -CGGTTTTGAAAACAT 1 -TTCTCTTCCGCTCTT 1 -TTCTCCAGTACTTTC 1 -TTGACAGAGTAACAT 1 -ATATTTAAAAAAAAA 1 -AACCTCAGCAGAAAT 1 -GGTTTACTCACGTCA 1 -CCTTTTGGCTGTAAT 1 -TAAGCGCGCTCACCT 1 -TGTGCCAAGGACTTT 1 -AGACAGGTGACGGTC 1 -TTGTATGCATTTAGG 1 -ACAACATAAAGGATA 1 -GAGGATCTCTTGAGC 1 -CATTCGGGCCGAGAT 1 -GACATTTGTTAGTAC 1 -GCCTGATATAGCTTG 1 -AAGAAAAACGCCTGC 1 -AGTAAAACTTAATGT 1 -AGAAAAACGCCTGCC 1 -GGCATGGGCCCTGTG 1 -TACTCTCTCTTTCTG 1 -ACATTGACAGAGTAA 1 -CATGGTGGTGTGTGC 1 -AACTACCCGTTATTG 1 -GCGGGAGCGCATGCC 1 -CTTCCTCTAGCTTTT 1 -TTTTTTAAGAAAAAC 1 -GTCAAGCCTAACCAG 1 -CATCTCAAGAAGGTA 1 -TAGCCCCAAGTGAAA 1 -GCCTCTGAAGGTCTA 1 -GCTGGAGTGGGGGAC 1 -TATTCATGGGTAGGA 1 -TCGAGATTGAAGTCA 1 -AGGCTCGTCCCAAAG 1 -AACATAAAGGATAAT 1 -CTCCTCCTTCTCCCC 1 -TTTGCTGGTTATGTT 1 -GAAATCGATGACCAA 1 -GGGGTAAGTCTTACA 1 -AATAAAGATAATCCA 1 -ATTTTTCAGTTAAGT 1 -TGACTTTGTCACAGC 1 -GTAATACATGCCATG 1 -CTTCTCTTCATTTTC 1 -TTCCCTCAAACAGAG 1 -TGGCTCCCCCAGCGC 1 -TCTCTCAAAAAAGAA 1 -ATAAGTGTATTTAAA 1 -TGACAGAGTAACATT 1 -ATTCATGGGTAGGAA 1 -ACGGGAGGGTCGGGA 1 -TTGTTGTTTGGTAAG 1 -GTCAGGGAATGTTCT 1 -GCTAGTCCAGGGCTG 1 -ACTTAATTACACAGG 1 -TCAAACAGAGAGTTC 1 -CACCACCATTACTGG 1 -AACTGGGCGGCATGG 1 -TGGTTACCAAGACTG 1 -CAGCATGGTTTCTGA 1 -CTCTTTGCCTGGTTG 1 -CTAATTAAAACGCAA 1 -TGGGGGAGGGTTTCT 1 -CGCGTTTAATATAAG 1 -CTGCTGCGGCTCTGC 1 -TTTAATGTTATGAAA 1 -TTTTCAATGAAAAAT 1 -GTCTCTACTGAAAAT 1 -TGGAAGATACCACAA 1 -GTGATGTGTTAAGGA 1 -TGCCTTTTAGTGATC 1 -TTTAAAAAATAACAA 1 -GTGCATTTTTTTTTA 1 -AGCACTAACACTTCT 1 -CAGCACTTGCACAAA 1 -ACTATGGTTATCTTC 1 -CGTCGCTGGCTTGGA 1 -CTCTTTGGCTCTTTG 1 -TGAGCTATCATGGCG 1 -CCAGAGTGGAAATGG 1 -TGTATTTTATCGGGT 1 -TGGCCTTAGCTGTGC 1 -TGTGGAGTGGCATGA 1 -CTATTTATAGACAGC 1 -AAAAATGGAAGGGGT 1 -AACCTGGCTAGTTTA 1 -CAGAGCAAACTGGGC 1 -GCAATGGAATGAGAT 1 -GTGGCTGAGGCGGGA 1 -TCTTAAAGATCAGAT 1 -GAGATCTCACCACTG 1 -AGTCTTACATTCTTT 1 -TCTCGGGGAAGCGGC 1 -CCTTAGACTGGAGAG 1 -CCTGCCTTCTGCGTG 1 -TTCTGCGTGAGATTC 1 -TATAGGCCTTTGTTT 1 -CTGCTAGAAAAAAAA 1 -GGAAAGTCCCTCTCT 1 -CTGGACTTCTCCAGT 1 -AGAAGATACTGCTAG 1 -TCCTCACCTATCCCT 1 -GGAAGCTCATTTGGC 1 -GCTCGTGCCTGTGGT 1 -AAAAAATCAGGTCTT 1 -TACTGGCTTCCTCTA 1 -TTGTTTCTTTGTTTT 1 -GCAATCGAGATTGAA 1 -CAAGTTCTCCTTGGT 1 -TTTAAAAGAATTTTA 1 -GTGGAGAACATTGAC 1 -TCTCTTAGCTTTTAA 1 -CTCCGTGACTTCCCT 1 -CATTCCTGAAGCTGA 1 -GATGCTCTCACAAAA 1 -CTTGCCGCCTTCCCT 1 -CGGCGGGGTGGCCTG 1 -CAGGTATATTTAGCA 1 -GTAGCTGCCTTTTAG 1 -TGATCATAATGGAAA 1 -TGTATAGAGGAATTA 1 -AACTGAGAAATGAAC 1 -ACTCTAAGAAAAGGA 1 -TCCATCCCAAAAAAT 1 -TACACTAAGCGCGCT 1 -CTAACACTTCTCTTC 1 -TCTTAAAAATGCAGC 1 -TTTTTAAGAAAAACG 1 -CATTTTCAATGAAAA 1 -AAAATGGAGGTGGCT 1 -ATATATTTAGTGTCA 1 -GGTAGCTCTAAACAA 1 -CATGAGAAAATTGCA 1 -CTATTCTGCCAGCCT 1 -TTGGTATCTGAGGCT 1 -AAAAATAATAACAAA 1 -GATAACCCTCACTAT 1 -ATGCAGCGCAATCTC 1 -ACAAGAAACATGTAA 1 -AATAAAGAACATACC 1 -GGAAGGTGGAAGCTC 1 -TTTGTAAGCTGCTGA 1 -GCCTGCCGTGTGAAC 1 -AGAAAAGGAAACTGA 1 -ATACCCTGGCAATAT 1 -TGACATCTTTCTGTG 1 -TTGGCCAGAGTGGAA 1 -AGAATTGAAAAAGTG 1 -TCTTACATTCTTTTG 1 -TTTGAATACAATTTA 1 -AATTGCTATGTGTCT 1 -ATTGACAGAGTAACA 1 -CACTTTTAAAAAATT 1 -TGTGTCTGGGTTTCA 1 -CAAAGCGGTTTCGAA 1 -AAGCGCGCTCACCTT 1 -CGCCATAGATAACTA 1 -GTCTCTCCTACCCTC 1 -ATTAATGTGTCTTTT 1 -CATATAAAACCTCAG 1 -TCCTTTTTTTTCTCC 1 -GTGGTCTTTTCGTAC 1 -CAAGAAGGTATAGGC 1 -ATTTCCTGAATTGCT 1 -GAGTTCCAGGCAGGA 1 -GCACTAACACTTCTC 1 -CAAAAATTAGCCGGG 1 -TCCCGCTCTGCACCC 1 -AGAGAATTGAAAAAG 1 -TCTGCGTGAGATTCT 1 -ATTCAGACTTGTCTT 1 -TCTCTCCTACCCTCC 1 -TACAAAAATTAACCA 1 -ATCAGAAGTCCTTGA 1 -TGGCAATATTAATGT 1 -GAGACATGTAAGCAG 1 -AAGAATCCTACAGGG 1 -TCATGGGTAGGAACA 1 -CTGTTTCTGAAACAT 1 -TTTATAATATTTTCT 1 -GTTGTTTCCAAGATG 1 -CACTGTCTTTTTCAT 1 -ACCAAAAAGAAAGGC 1 -GTTTCTTTGTTTTTT 1 -TGTAGTCCCAGGAAT 1 -ACAGCTCTAACATGA 1 -GGTGGCTTGTTGGGA 1 -TATCTTCCGCCATAG 1 -TAGGTTTTGTTTCTT 1 -TTGGGCGTGGTAGCT 1 -ATGAGATTCCATCCC 1 -CGCTCCGTGGCCTTA 1 -ATGTATATATCACCA 1 -AAGGTCTATTTTCCT 1 -GTTTGGTAAGAACAT 1 -AGGCGCCCGCTAAGT 1 -AGAGATCTTGAGCAC 1 -TGGGGGCTATTATGA 1 -ATTTAATTTTGAAAA 1 -GATTTTAAGAAACTT 1 -CAAATACATATACAC 1 -CTAGAGAGATATATC 1 -TACAGTTTAGAAAAT 1 -CGCGACGTTTGTAGA 1 -GGGGCCAAATCATGT 1 -TGAGTGATGTGTTAA 1 -AAACCCAGCCTGTCT 1 -GAAATAAAGAGGTTT 1 -AATGTTATGAAAAAA 1 -TGCAGCGCAATCTCC 1 -TGCCTTCTGCGTGAG 1 -ACTGGTTGAGCCCAA 1 -TCTCCAAGTTCTCCT 1 -TTTTGAGCTATCATG 1 -CTCTCACAAAATCTT 1 -ATGACCAAATGTAAA 1 -TACTGGTATTTGCTG 1 -TTGTTCCCATCACAT 1 -GTATGCATTTAGGTT 1 -AAAGATACCAAGTCA 1 -CCCAGACAAGGAGGA 1 -GGCTCCCCCAGCGCA 1 -ATCTGAGGCTAGTAG 1 -AGTCACGGTTTATTC 1 -AAAACGGGAAAGTCC 1 -TTAGTGGCACCTGCT 1 -GTGTGTGGGTTTTGT 1 -TTACAACATAAAGGA 1 -GGCCTGGGAGTGGGG 1 -ATGTCTTCCTTTTTT 1 -GAAGAATCCTACAGG 1 -CATGTAAGCAGCATC 1 -GGAAAGTACTGTTTC 1 -TCCAAAGATTCAGGT 1 -CTGAGGTTTGTGAAC 1 -ACTTAGGGAGGCCGA 1 -GCGACGTTTGTAGAA 1 -ATTTGTTCCCATCAC 1 -GGGCCTTGTCCTGAT 1 -GCTACTTGCCCCTTT 1 -TTTTGCGGGAGCGCA 1 -ACTGAAGAATGGAGA 1 -TACCAAGACTGTTGA 1 -TACCCTCCCGCTCTG 1 -CTGTAAAACCATTAA 1 -TCCTAGCATCCTATA 1 -TGAGCAGGGAGCAGC 1 -CAATCCACCTCTTGA 1 -CATGAAGAAGGTGTA 1 -ATTAACTTATTTGTT 1 -CTACTGAAAATACAA 1 -ACTTGTCCTCTTCTT 1 -AGCTCTAAACAATGT 1 -GATCGAGACATGTAA 1 -CCGATATTCCTCAGG 1 -TCTCCCGCTCTGCAC 1 -GCAACAGAGCGAGAC 1 -GAGAGAATTGAAAAA 1 -CTGCAGTTGAGCAGG 1 -CAATTGCTATGTCCC 1 -TTAGCCGGGTGTGGT 1 -GCGCGGCGCTGAGGT 1 -GATACTGATGCACAG 1 -GCACGCGACGTTTGT 1 -CAAGGCAGGCTGATC 1 -GCCTCTGGCTCCCCC 1 -ATGTCACTTTTAAAA 1 -CTTTCTAAGTACCTG 1 -TCTAGAGAGATATAT 1 -ATTAGATTCCCCAAT 1 -GAAGGTGGAAGCTCA 1 -GGAGAAATCGATGAC 1 -GCAAATCCCACTGTC 1 -GTTCTTATTATTAGA 1 -CTTGATGGGGCTAGT 1 -ACAGTGATGCTCTCA 1 -TCCAAGATGGTTACC 1 -TTACTGGTATTTGCT 1 -AGGAATCACTTGAAC 1 -CATTTTAAAAAATAA 1 -ATCCCAAAAAATAAA 1 -ACTGGTCTTTCTATC 1 -AGTGGAAATGGAATT 1 -ACAGCATGGTTTCTG 1 -GAAAATTGCATTTAA 1 -GCAGGATGAATCTGT 1 -TGGCCTGGGAGTGGG 1 -GATAATAACATCTGC 1 -TCGCGGGGCCTCTGG 1 -TGGTCTAGTCATGCC 1 -TCCCAGACAAGGAGG 1 -CTTGGGTTGGTTGGG 1 -GTGCTGAGAGGGCAT 1 -ATACACATATATTTA 1 -CTAGACACTTCATAC 1 -TTTTTTTTTAAGAAA 1 -GGAGGCTATCCAGCG 1 -ATTTTGAAAATTTCC 1 -AGGGCGTCGATAAGC 1 -GCGTCGCGCTGGCGG 1 -CACGGTTTATTCTTC 1 -ACCAGCCTGGCCAAC 2 -TATGTGGGGCCACAC 1 -CTGGAGGCTATCCAG 1 -CCTCTGGCTCCCCCA 1 -AGAAAAAAAATGGAA 1 -GGCCAAATCATGTAG 1 -GTCTCTCAAAAAAGA 1 -ATATAGCTTGACACC 1 -GGTGTGGTGGCTCGT 1 -GCGCATGCCTTTTGG 1 -CACTTAGGGAGGCCG 1 -AAAGGCATGTATAGA 1 -GCTATTATGAACTGA 1 -CAAAAAGAAAGGCAT 1 -TGGGAAGGTGGAAGC 1 -CCTATTCTGCCAGCC 1 -TAGTGATCATGTACC 1 -CTTTCTGGCTGGATT 1 -GAACGAACATCTCAA 1 -TCTGAACCAGTAGTT 1 -TGGAAGCTTAAATAA 1 -TAGGCCTTTGTTTGT 1 -CCAAGGACTTTATGT 1 -AATGTAAACACTTGG 1 -GCCTTTCCTTAATGA 1 -ATTTATAGACAGCTC 1 -ATTCCTGAAGCTGAC 1 -AATGTTTTTGTTTCA 1 -GACTTGTCTTTCAGC 1 -ACCTTTTCCTCTGGC 1 -CCATTACTGGTATTT 1 -GTACCTGGCAATACA 1 -AGATTCTCCAGAGCA 1 -ATAAACAATCTGCAT 1 -CTGGCATATAAAACC 1 -TACAATTTATTTACA 1 -ACTTTGAAAAGTATC 1 -AGGCAGGCTGATCAC 1 -TCCTGAAGCTGACAG 1 -CGTGGGGAGGAAACA 1 -AAAAGAATTTTATAC 1 -AAGGCAGGCTGATCA 1 -TCCAGCGTGGGCAAC 1 -GAAGTCAAGCCTAAC 1 -TCGAGACATGTAAGC 1 -TTTACAACATAAAGG 1 -GCCATGCATAGACCA 1 -GGCTATCCAGCGTGA 1 -CTTGGGGTCTGGGGG 1 -CTCTCACAGATGAAG 1 -AGGTACAAAGTCAGA 1 -CTCCTGTGGAGTGGC 1 -ATTTAAGGTATTTGC 1 -GGGGAGCTGTAAAAC 1 -ATGGTTTCTGAACCA 1 -CTCATGAAATTAGGT 1 -ACTTTCTAAGTACCT 1 -TTATTATTAGAAGCT 1 -GGGCTAGTCCAGGGC 1 -AGGTTTTGTTTCTTT 1 -ACGGGTAGGCTCGTC 1 -GTGAAATCCCGTCTC 1 -TTTGATCATAATGGA 1 -TTGATATAAAAAAGG 1 -CTGAAGAATGGAGAG 1 -CATTTGATCATAATG 1 -AAAGGTCTATGGCCA 1 -GTGACAGAAGATACT 1 -TTAAGGTATTTGCCA 1 -TTGTGTATGAGTAGT 1 -GCATTTAGGTTTTGT 1 -CATTTTTTTTTAAGA 1 -GGTCTTTCTATCTCT 1 -CCGGTGGCTGAGGCG 1 -TCTGTGCTCTGATCC 1 -GCGTCGCCCGGGTAA 1 -CTTGGTGCCTGATAT 1 -TAGCTTGACACCAAG 1 -TCTGCACCCTCTGTG 1 -CTAGTTTACAGCAAT 1 -ATCAGGTCTTCATTA 1 -TCTTGTATGCATTTA 1 -ATCCACCTCTTGATG 1 -TGATCCCTGAGGCAT 1 -TAATTACACAGGGGA 1 -GGCTCTTTGCCTGGT 1 -AAGAATTTTATACAC 1 -GTGCTCGCGCTACTC 1 -GATATTCCTCAGGTA 1 -TACCCTGGCAATATT 1 -AGTTGAGCAGGGAGC 1 -AAGTCACGGTTTATT 1 -CTCTTAGCTTTTAAT 1 -CAGGGCTTTTGCGGG 1 -TTCCTCCTCTGACCT 1 -CCAGTAGTTTCCCTG 1 -ACATTAGGCAATATA 1 -CGAGGCGGGAGGATG 1 -AACATCTGCCACGTA 1 -TTTCCCGATATTCCT 1 -ATGTGCTTTGCGTCA 1 -TCCAAAAGTCATAAA 1 -GCTGCTTTGATATAA 1 -CCTTGGGTTGATCCA 1 -GGATAAATGGCAGCA 1 -AGGTTTTGTTGTTTG 1 -AACTTAATTACACAG 1 -GATTCCCCAATCCAC 1 -ATTTAATATGTTCTT 1 -CCTTTCCTTAATGAT 1 -GCTGACAGCATTCGG 1 -GAGAGCCTCCAGAGA 1 -GTATGTTCTGTCCCA 1 -CTAGTCCAGGGCTGG 1 -GGATAATGTATATAT 1 -AAAGAAGAATCCTAC 1 -GTGGCACCTGCTGAG 1 -ACCATTACTGGTATT 1 -AGCAGGAACCAAAAA 1 -GGTGGCTGAGGCGGG 1 -CCAGAGATCTTGAGC 1 -TCGGGCCGAGATGTC 1 -ACCACAAGAAACATG 1 -AGCAGCAGCACTTGC 1 -CTTAGACTGGAGAGC 1 -GGCATAAACATAAGA 1 -TTGTCCTCTTCTTAG 1 -ATCGAGACATGTAAG 1 -TGTCCTGATTGGCTG 1 -AGGACGCCAGAGATC 1 -GCGCTGAGGTTTGTG 1 -GAGCATTCAGACTTG 1 -TACACAGGGGATAAA 1 -GAAATGGAATTGGGA 1 -ATTCCCAACCTGGCT 1 -TCTTGATGGGGCTAG 1 -CTCGCTCCGTGACTT 1 -AGTGGAGCATTCAGA 1 -CCAAATTCAAACCCA 1 -AAAAGTATCTTGGGG 1 -TGGGGCCAAATCATG 1 -AAATAAAGAGGTTTT 1 -ATGGAAAGTATGTTC 1 -GGGACAAAGTTTAGG 1 -CACAGATGAAGAAAC 1 -TATGTTCTTATTATT 1 -TTATTAGAAGCTCAG 1 -CTTAATTACACAGGG 1 -GAGCTTAGGCTTTTG 1 -AGCGGTTTCGAATAA 1 -TTCTGCCAGCCTTAT 1 -AACATGATAACCCTC 1 -CTGGGTCTATGTGGG 1 -ATTGACATCTTTCTG 1 -GTTGAGGACGCCAGA 1 -AGAGAATGGAAAGTC 1 -TTAGCAGGGAAAGAA 1 -ATGATCGAAAGCAGA 1 -CTTCCCCATGGATGG 1 -TAACTACTATGGTTA 1 -GGGCATTCCTGAAGC 1 -GGCTTGTTGGGAAGG 1 -CCCTGTCTCTCAAAA 1 -GGCTAGTCCAGGGCT 1 -AGTTTTGGAAGCTTA 1 -TCTAGTCATGCCTCT 1 -TCCCCACAGCCTCCC 1 -TAATAACATCTGCCA 1 -CCATACTACCCTGAA 1 -TTACTTTCGGTTTTG 1 -CTCACAAAATCTTGC 1 -TCCCTGTTGTATTTT 1 -TTTGAAAAGTATCTT 1 -CCGAGGTTGGGGGAG 1 -AGTGATGCTCTCACA 1 -CAAGTGAAATACCCT 1 -GCTGCGGCTCTGCTT 1 -TTAGGGCGTCGATAA 1 -CTTGGAGACAGGTGA 1 -TCTACTGAAAATACA 1 -TTAATAAAGATAATC 1 -ATTTGAATACAATTT 1 -ACATGCCATGCATAG 1 -GACGTTTGTAGAATG 1 -CGGGGAGCAGGGGAG 1 -TTTTATACACATATA 1 -TTACTCACGTCATCC 1 -CTGCTCCGGTGGCTG 1 -CGCAAATCCCACTGT 1 -CTCCCAGACAAGGAG 1 -GGCCTACGGCGACGG 1 -ATAGTCCTCACCTAT 1 -CTCTAGCTTTTGTGG 1 -TACTTGCCCCTTTCG 1 -GCTCTAACATGATAA 1 -GTCTGGGGGAGGCGT 1 -GTGCCTGTAGTCCCA 1 -CCAGCTCTTGTATGC 1 -GGTGCGCACCCGGGA 1 -GGGGCTAGTCCAGGG 1 -ACATGTAAGCAGCAT 1 -TTAAGAAACTTAATT 1 -ATCACCTGTGGATGC 1 -GGCCTGGTACTCCTC 1 -GATGAATCTGTGCTC 1 -GGGCTTTTGCGGGAG 1 -TGTGTCTTTTCCCGA 1 -CCATCTGATATAAAC 1 -CGCCAGAGATCTTGA 1 -TGGGTGTAGATCAAG 1 -GGAGTAGCTGCCTTT 1 -CAAGGTGGCCTGGTA 1 -AGTACATGGTATTTT 1 -GCACTCCAGCCTGGG 1 -GGGATAAATGGCAGC 1 -AATGTATATATCACC 1 -AGTTGGGAGTTCAAG 1 -GAGGATGGCTTGAGG 1 -ATGGTGGTGTGTGCC 1 -CTTTCAGCAAGGACT 1 -CTTTGTCACAGCCCA 1 -CTGAGGCATTTAATA 1 -AGGGCTTTTGCGGGA 1 -CGAAAGCAGAATGTT 1 -TAGAGAGATATATCT 1 -CGCCGAGGTTGGGGG 1 -CCCGCTCTGCACCCT 1 -GTAAGAACATACCTT 1 -TTCTCCTTGGTGGCC 1 -AGAAGAATCCTACAG 1 -TGTGTATGAGTAGTC 1 -CGACATTGAAGTTGA 1 -GAAAAAAAACAAAAA 1 -ATCACTTGAAGTTGG 1 -GACTTCTCCAGTACT 1 -TTTCTTCCCACTTCC 1 -TATAAGTGTATTTAA 1 -CTATTATGAACTGAG 1 -ATTGGGATTGTCAGG 1 -TACTGATGCACAGCA 1 -TGTCAATCTGTATAT 1 -TGTGAACCATGTGAC 1 -AGTCCTGCTGTCCTA 1 -AGGGGAGCTGTAAAA 1 -GCGACGGGAGGGTCG 1 -TGAACGAACATCTCA 1 -AGTCCCTCTCTCTAA 1 -AAGTTCGCATGTCCT 1 -GCAGAATGTTTTGAT 1 -GGGCAATGGAATGAG 1 -ATTGAAAAAGTGGAG 1 -ATCATGTAGACTCTT 1 -CATTTTAGCAGGGAA 1 -GTGGCCCTCGCTGTG 1 -TCAAATTTCCTGAAT 1 -TTTCCAAAGTAATAC 1 -TGTTTTTTAGCTCAT 1 -TGTCATTTTAAAAAA 1 -TTAGGTTTTGTTTCT 1 -TTAAAAAATAACAAT 1 -CTCAGCAGAAATAAA 1 -TTTGTGAACGCGTGG 1 -TCCATTTGATCATAA 1 -GGCTGGATCTCGGGG 1 -GAGAGAGAATTGAAA 1 -CCCCACTGAAAAAGA 1 -CCTAGCATCCTATAA 1 -GCTCTTAAAAATGCA 1 -TCTTCCGCTCTTTCG 1 -AGCGCATGCCTTTTG 1 -TATTTAGCACTGAAC 1 -GGATGGCTTGAGGTC 1 -AAATCTTGCCGCCTT 1 -GTAACTGGTTGAGCC 1 -AGGTTTACTCACGTC 1 -AGAGCCTCCAGAGAA 1 -GTTTCCCTGCAGTTG 1 -TAGCACTGAACGAAC 1 -ATAACATCTGCCACG 1 -TTCGGTTTTGAAAAC 1 -TTTGATATAAAAAAG 1 -GAAAAATATAGAGTT 1 -GCTGAGAGGGCATCA 1 -GTTTGTCAGTCAGGG 1 -CTCCCGCTCTGGTCC 1 -TGAGGACGCCAGAGA 1 -GGGTCTGGGGGAGGC 1 -GGGCCACACCGTGGG 1 -TTTGGCCTACGGCGA 1 -CTGGTCTCTACAAAA 1 -AATGGAATGAGATTC 1 -ATTAAAACGCAAATC 1 -GAAAGTCCCTCTCTC 1 -CTCTCTCGCTCCGTG 1 -CACTGCGTCGCTGGC 1 -AAATGGAATTGGGAG 1 -AGGCTAGTAGGAAGG 1 -CAAAAATTAACCAGG 1 -CGAGATGTCTCGCTC 1 -ACACTTCATACAGTT 1 -ATCTGCCACGTATAG 1 -TGAGACCAGCCTGGC 1 -GGTTACCAAGACTGT 1 -TGATACTTGTCCTCT 1 -TGTCAGGGAATGTTC 1 -GGGGTGCGCACCCGG 1 -CTGGTTATGTTAGAT 1 -TTGTTGGGAAGGTGG 1 -ATGGAATGAGATTCC 1 -TCTGGCAAAACATGA 1 -CAAAATCTTGCCGCC 1 -TGAATGAGTCCCATC 1 -GGAAGATACCACAAG 1 -GAGATTCTCCAGAGC 1 -CTGGGTTTCATCCAT 1 -TCTTTCTGTGTGCCA 1 -GATCATGAGAAAATT 1 -TTTTTTAGCTCATGA 1 -CATGGATGGTCTAGT 1 -AAACAATGTATTCAT 1 -ACTGTTGAGGACGCC 1 -TCCTTGAGAGCCTCC 1 -TAAGTCTTACATTCT 1 -AAGATACCAAGTCAC 1 -TTTGTCAGTCAGGGG 1 -AACATACCTTGGGTT 2 -CTTATTTTGAAAATT 1 -CTCTCTTAGCTTTTA 1 -TTGTGAACGCGTGGA 1 -GTTCTCCTTGGTGGC 1 -TGAAGCCTGGTCTCT 1 -AACGCGTGGAGGGGC 1 -CCTCTGTGGCCCTCG 1 -AGCAGAGAATGGAAA 1 -TAAGTGTATTTAAAA 1 -CAAAAAATAATAACA 1 -CTTGTTCCTGCTGGG 1 -AAGTTAGCCCCAAGT 1 -TGATATAAAAAAGGT 1 -TGGGGCCACACCGTG 1 -TGGCCAACATGGTGA 2 -GGATGGTCTAGTCAT 1 -TGGAGGCGTCGCGCT 1 -TATGTTCTGTCCCAT 1 -GATGTACTGTGCCTC 1 -AAAGTCAGAGAGGGG 1 -GATTGGCTGGGCACG 1 -ATGTCCCAGGCACTC 1 -AGAAGCTCAGATGCA 1 -TTTATACACATATAT 1 -TGGGGGAGGCGTCGC 1 -GTTGGGGGAGGGTTT 1 -GGTTGTTTCCAAGAT 1 -TGCAGTTGAGCAGGG 1 -CTAGCACCTCTGGGT 1 -TGGAGCATTCAGACT 1 -GGGTAGGAACAGCAG 1 -TAATGATGATTGCCT 1 -TCCGCCATAGATAAC 1 -GTTTCTGAACCAGTA 1 -TTATCTTCTGCCTCT 1 -GCGTGAGTCTCTCCT 1 -GATGATTGCCTCTGA 1 -CACTCTACTAGACAC 1 -TACTTTCGGTTTTGA 1 -ATCTCTTGAGCTTAG 1 -AAGAAAAAAAATGGA 1 -AACAAAAATTAGCCG 1 -TTTGCCTGGTTGTTT 1 -AGGCACTCTACTAGA 1 -CAAAAAAGGCATGTA 1 -ATGTAAACACTTGGT 1 -GCAATACACTAAGCG 1 -TCTGGGTCTATGTGG 1 -GTCCCTCTCTCTAAC 1 -AGTAACATTTTAGCA 1 -ACTGAAAACGGGAAA 1 -CACGCGACGTTTGTA 1 -GCGCGCTCACCTTTT 1 -GAAAGAGAAAAGAAA 1 -TAATAACAAAAATTA 1 -TGCGTCGCTGGCTTG 1 -GGCGCTGAGGTTTGT 1 -TACCTGGCAATACAC 1 -TTCTCCTGTGGAGTG 1 -GAAAACATGAGGGGG 1 -TTGCCTCTGAAGGTC 1 -GGTTGATCCACTTAG 1 -TTCAGGTTTACTCAC 1 -TTTTAAAAAATTATA 1 -AAGCTTAAATAACTC 1 -GAGAAAGGCTCTTAA 1 -AATGATGATTGCCTC 1 -CTTACGCCTGTAATC 1 -AGCATCATGGAGGTA 1 -CGCCCGGGTAAGCCT 1 -CTTCTTAGAAAAGAT 1 -AAAATTTCCAAAGTA 1 -AAGGGGGTGCGCACC 1 -TCTTCCCACTTCCCC 1 -CCCTGCGGGCCTTGT 1 -CTAGTAGCCTTTCCT 1 -GTAGGAACAGCAGCC 1 -TCCAGCAGAGAATGG 1 -ATAACATGAGTAATT 1 -TTTGAAAATTTCCAA 1 -AGGCTTTTGAGCTAT 1 -GTGGAAGCTCATTTG 1 -ACCCTCACTATGTGG 1 -TGGAAAGTACTGTTT 1 -GGACTTCTCCAGTAC 1 -ATCCATCCGACATTG 1 -CACCGTGGGGAGGAA 1 -CTGGCTGGATTGGTA 1 -ACGTATAGAGCAATT 1 -AATTTATTTACAACA 1 -ATTGCTATGTCCCAG 1 -GTCAGTCAGGGGAGC 1 -ATTAGGCAATATATT 1 -TGTTTGTAAGTCCTG 1 -ATCCCTGAGGCATTT 1 -CATGCCATGCATAGA 1 -GACAAAGTTTAGGGC 1 -TCTAGGCGCCCGCTA 1 -AAAGGCGCGGCGCTG 1 -TGAAGTCAAGCCTAA 1 -CACTTGGTGCCTGAT 1 -TCTCTAACCTGGCAC 1 -GTTTGTGAACGCGTG 1 -GTTTCCAAGATGTAC 1 -GAGAATTGAAAAAGT 1 -TTGGTTGGGCACGGT 1 -TGTCTTCCTTTTTTT 1 -CTAAGTACCTGGCAA 1 -TCCTACAGGGTCATG 1 -AGGGCTTGTTCCTGC 1 -AAACAATCTGCATAT 1 -AAAGGCTCTTAAAAA 1 -ATAGGCCTTTGTTTG 1 -CCCTGTGGTCTTTTC 1 -GAACCATGTGACTTT 1 -CCTGTTGTATTTTAT 1 -CGTGCCTGTGGTCCC 1 -ACAAAGTTTAGGGCG 1 -TTACTCCATTTGATC 1 -GTCTTTTCGTACAGA 1 diff --git a/example_data/ref_seq/B2M/B2M_rep_mask.bed b/example_data/ref_seq/B2M/B2M_rep_mask.bed deleted file mode 100644 index afb5a5b..0000000 --- a/example_data/ref_seq/B2M/B2M_rep_mask.bed +++ /dev/null @@ -1,9 +0,0 @@ -15 45004404 45004472 MIRc -15 45004490 45004698 MIR -15 45004914 45005216 AluJr -15 45005720 45005802 MIRb -15 45006197 45006327 Charlie7 -15 45006447 45006554 MER102c -15 45006670 45006965 AluSz -15 45006998 45007064 MIRb -15 45007979 45008022 5S diff --git a/example_data/ref_seq/B2M/B2M_reverse_refseq.fa b/example_data/ref_seq/B2M/B2M_reverse_refseq.fa deleted file mode 100644 index b265136..0000000 --- a/example_data/ref_seq/B2M/B2M_reverse_refseq.fa +++ /dev/null @@ -1,2 +0,0 @@ ->B2M -GGCCATACACCTTCTTCATGCCACTCCACAGGAGAAGGGAACATGACCCTGTAGGATTCTTCTTTCCCTGCTAAAATGTTACTCTGTCAATGTTCTCCACATAGTGAGGGTTATCATGTTAGAGCTGTCTATAAATAGTCCTCAGGACAGTGAAACAAAAACATTTTCTCAAGGTCAAAAACTTACCTCCATGATGCTGCTTACATGTCTCGATCTATGAAAAAGACAGTGGAGAAAAAAAAGGAAGACATTAAGTTTTACTTTTAAAATACCATGTACTAACAAATGTCTAAAATGGTTAGAAATAAGGCTGGCAGAATAGGCTGCTGTTCCTACCCATGAATACATTGTTTAGAGCTACCCAGCAGGAACAAGCCCTTCCTACTAGCCTCAGATACCAATCCAGCCAGAAAGTACTGGAGAAGTCCAGGATTATAGGATGCTAGGACAGCAGGACTTACAAACAAAGGCCTATACCTTCTTGAGATGTTCGTTCAGTGCTAAATATACCTGAAGCTGCCACAAAAGCTAGAGGAAGCCAGTAGGTAAGAAGTGTTAAGAGTGTATATGTATTTGTGCAAGTGCTGCTGCTGCTCCCTGCTCAACTGCAGGGAAACTACTGGTTCAGAAACCATGCTGTGCATCAGTATCTCAGCAGGTGCCACTAATCTGATCTTTAAGAACATTCCCTGACAATCCCAATATGCAGATTGTTTATatcagatgggatgggactcattcagggtagtatggccatagacCTTTTTTATATCAAAGCAGCTTTATGATATGACTACTCATACACAACTTTCAGCAGCTTACAAAAGAATGTAAGACTTACCCCACTTAACTATCTTGGGCTGTGACAAAGTCACATGGTTCACACGGCAGGCATACTCATCTTTTTCAGTGGGGGTGAATTCAGTGTAGTACAAGAGATAGAAAGACCAGTCCTTGCTGAAAGACAAGTCTGAATGCTCCACTTTTTCAATTCTCTCTCCATTCTTCAGTAAGTCAACTTCAATGTCGGATGGATGAAACCCAGACACATAGCAATTCAGGAAATTTGACTTTCCATTCTCTGCTGGATGACGTGAGTAAACCTGAATCTTTGGAGTACCTGAGGAATATCGGGAAAAGACACATTAATATTGCCAGGGTATTTCACTTGGGGCTAACTTGGTGTCAAGCTATATCAGGCACCAAGTGTTTACATTTGGTCATCGATTTCTCCCAATTCCATTTCCACTCTGGCCAAATGAGCTTCCACCTTCCCAACAAGCCACCTCCATTTTGAAGAATAAACCGTGACTTGGTATCTTTCCCTCATAATTCCTCTATACATGCCTTTTTTGTTTTTTTTCTAGCAGATTTCTAGCAGTATCTTCTGTCACTGGAGATTGCGCTGCATTTTTAAGAGCCTTTCTCTGGAGGCTCTCAAGGACTTCTGATGCCCTCTCAGCACTCATAGCATTCCTTAACACATCACTCAAGAGTCTACATGATTTGGCCCCAAGATACTTTTCAAAGTTCATTTCTCAGTTCATAATAGCCCCCATCAAATTACTCATGTTATTGTACTCTGTTTCCACCCCTTCCATTTTTTTTCTTATGTTTATGCCTTTCTTTTTGGTTCCTGCTCCTGCCTTGATCTACACCCATCTGATTTTCTAAACtgtatgaagtgtctagtagagtgcctgggacatagcaattgctctatacgtggcagatgttattatCTGAGGTTCCTAAGTGGATCAACCCAAGGTATGttctttatttttttatttttttattttttgggatggaatctcattccattgcccaggctggagtgcagtgtgcagtggtgagatctcagctcactgcaacctccgcctcctgggttcaagtgattcctgggactacaggcacacaccaccatgcctggttaatttttgtattttcagtagagacgggatttcaccatgttggccaggctggtcttgaactcccaacttcaagtgatcagcctgccttggcctcccaaagtgttgggattacaggcacgagccaccgtgcccaaccAACCCAAGGTATGTTCTTACCAAACAACAAAACCTCTTTATTTCTGCTGAGGTTTTATATGCCAGACCCCTCTCTGACTTTGTACCTAATTTCATGAGCTAAAAAACAAAGAAACAaaacctaaatgcatacaagagctggcaaataccttaaatggttgagttggacccgataaaatacaacagggataggtgaggactatggcaaatgggacagaacatacTTTCCATTATGATCAAATGGAGTAATGCATGTGACAGTGGGATTTGCGTTTTAATTAGCATCCACAGGTGATTGCTGTAAACTAGCCAGGTTGGGAATATATTGCCTAATGTTTCAGAAAcagtactttccaaaatgagaggcatgactagaccatccatggggaagtgggaagaaaatattataaactctatatttttcattgaaaatgaagagaagtgttagtgctactaaatatacagattgacactAAATATATGTGTATAAAATTCTTTTAAATACACTTATATTCAGGGTACATGATCACTAAAAGGCAGCTACTCCTCCTTGTCTGGGAGGCTGTGGGGAGAAGGAGGAGTACCAGGCCACCTTGACCAGATATATCTCTCTAGAAACACCCTATCATTAAGGAAAGGCTACTAGCCCCATCAAGAGGTGGATTGGGGAATCTAATGAAGACCTGATTTTTTTCATAACATTAAAAGCTAAGAGAGCTCTTTGCATCTGAGCTTCTAATAATAAGAACATATTAAATGCCTCAGGGATCAGAGCACAGATTCATCCTGCCTGGAACTCTCTGTTTGAGGGAAGGCGGCAAGATTTTGTGAGAGCATCACTGTAATCTTTTCTAAGAAGAGGACAAGTAtcagacaggctgggtttgaatttgggctcaaccagttactggctttatgacttttggagagttatttaagcttccaaaacttAACTGAAAAATTAATTTATGCCCACAGTAAAACAAAAACAAAACCCACACACAGGTCAGAGGAGGAAAATAGACCTTCAGAGGCAATCATCATTACATGTTTCTTGTGGTATCTTCCAGAAATGGTCTATGCATGGCATGTATTACTTTGGAAATTTTCAAAATAAGATTTTTTTTTAAATATCAGATTGTTATTTTTTAAAATGACATCTAACATAACCAGCAAATACCAGTAATGGTGGTGATATATACATTATCCTTTATGTTGTAAATAAATTGTATTCAAATTAAATGCAATTTTCTCATGATCAAAACATTCTGCTTTCGATCATGTTTTGCCAGAGGAAAAGGTGAGCGCGCTTAGTGTATTGCCAGGTACTTAGAAAGTGCTCAAGATCTCTGGCGTCCTCAACAGTCTTGGTAACCATCTTGGATTATCTTTATTAATGGTTTTACAGCTCCCCTGACTGACAAACCTTCACTTCTTTCTTTCTTTTCTTTTCTCtttctttttttttttttttcttttttgagagacagggtctcgctctgttgcccacgctggagtgcactggcgccatgatagctcaaaagcctaagctcaagagatcctcccgcctcagccaccggagcagctgggaccacaggcacgagccaccacacccggctaatttttgttattattttttgtagagaccaggcttcaccatgttggccaggctggtctcaactacggacctcaagccatcctcccgcctcggcctccctaagtgctgggattacaggcgtaagctaccacgcccaaccCCCTCATGTTTTCAAAACCGAAAGTAAGAGGCACAGTACATCTTGGAAACAACCAGGCAAAGAGCCAAAGAGGAAGCCCTCTGTACGAAAAGACCACAGGGCCCATGCCGCCCAGTTTGCTCTGGAGAATCTCACGCAGAAGGCAGGCGTTTTTCTTAAAAAAAAATGCACGAATTACAGCCAAAAGGCATGCGCTCCCGCAAAAGCCCTGGTTAGgcttgacttcaatctcgattgctgccatttatcccctgtgtaattaagtttcttaaaatctcggtgccttagtttcttcatctgtgagaggcagaagataaccatagtagttatctatggcggaagataactgttttcaaaattaaatgacgcaaagcacataaagtccttggcacacagaaagatgtcaataacgggtagttcttatAATTTTTTAAAAGTGACAtgtgatgggaacaaataagttaattattcgaaaccgctttgtatcacagccaagcattctacaaacgtCGCGTGCTGTTTCCTCCCCACGGTGTGGCCCCACATAGACCCAGAGGTGCTAGGACATGCGAACTTAGCGGGCGCCTAGACGAAGTCCACAGCTCTCCAGTCTAAGGGAAGCAGAGCCGCAGCAGACAGGCTTACCCGGGCGACGCCTCCCCCAGACCCCAAGCGCCCCTCCACGCGTTCACAAACCTCAGCGCCGCGCCTTTGGGACGAGCCTACCCGTCCCCCACTCCAGCTGCGCTGGGGGAGCCAGAGGCCCCGCGAAAGAGCGGAAGAGAAACCCTCCCCCAACCTCGGCGCTCTGACGCTTATCGACGCCCTAAACTTTGTCCCGACCCTCCCGTCGCCGTAGGCCAAAGGTCTCCCCTGCTCCCCGCCGAAAGGGGCAAGTAGCGCGCGTCCCGGGTGCGCACCCCCTTCCCCACTCCCAGGCCACCCCGCCGCTTCCCCGAGATCCAGCCCTGGACTAGCCCCACGGCGGGCCACCAAGGAGAACTTGGAGAAGGGAAGTCACGGAGCGAGAGAGCACAGCGAGGGCCACAGAGGGTGCAGAGCGGGAGAGGAAGGACCAGAGCGGGAGGGTAGGAGAGACTCACGCTGGATAGCCTCCAGGCCAGAAAGAGAGAGTAGCGCGAGCACAGCTAAGGCCACGGAGCGAGACATCTCGGCCCGAATGCTGTCAGCTTCAGGAATGCCCGCCAGCGCGACGCCTCCACTTATATTAAACGCGTGCCCAGCCAATCAGGACAAGGCCCGCAGGGACCGTCACCTGTCTCCAAGCCAGCGACGCAGTGCCAGGTTAGAGAGAGGGACTTTCCCGTTTTCAGTTTCCTTTTCTTAGAGTCTCGTGATGTTTAAGAAG diff --git a/example_data/ref_seq/B2M/B2M_reverse_refseq.fa_15mers_counts_0 b/example_data/ref_seq/B2M/B2M_reverse_refseq.fa_15mers_counts_0 deleted file mode 100644 index 1a3a268..0000000 Binary files a/example_data/ref_seq/B2M/B2M_reverse_refseq.fa_15mers_counts_0 and /dev/null differ diff --git a/example_data/ref_seq/B2M/B2M_reverse_refseq.fa_15mers_dump b/example_data/ref_seq/B2M/B2M_reverse_refseq.fa_15mers_dump deleted file mode 100644 index a17b98f..0000000 --- a/example_data/ref_seq/B2M/B2M_reverse_refseq.fa_15mers_dump +++ /dev/null @@ -1,5159 +0,0 @@ -TAGCAATTGCTCTAT 1 -GTCCACAGCTCTCCA 1 -AGTGCTACTAAATAT 1 -TGCAATTTTCTCATG 1 -AGGACTATGGCAAAT 1 -CCTAAGTGGATCAAC 1 -TGCTGGATGACGTGA 1 -CACAGAGGGTGCAGA 1 -GCTCCCCGCCGAAAG 1 -GGATAGCCTCCAGGC 1 -GATTCTTCTTTCCCT 1 -ATAAAGTCCTTGGCA 1 -TTCAGTAAGTCAACT 1 -AACTTCAAGTGATCA 1 -GGCAGGCGTTTTTCT 1 -CCAGCGCGACGCCTC 1 -ACCAGCAAATACCAG 1 -ACCTGAATCTTTGGA 1 -CACCCATCTGATTTT 1 -TGGGACTCATTCAGG 1 -ATCAAAGCAGCTTTA 1 -GCTAATTTTTGTTAT 1 -CTGCAACCTCCGCCT 1 -GTCCCGACCCTCCCG 1 -TCTAGCAGATTTCTA 1 -ATTGAAAATGAAGAG 1 -CATTTATCCCCTGTG 1 -ATCTCTCTAGAAACA 1 -CAGGTTGGGAATATA 1 -CCTGTGTAATTAAGT 1 -TTGGTATCTTTCCCT 1 -CACCGGAGCAGCTGG 1 -GTCGCGTGCTGTTTC 1 -CCCTGCTAAAATGTT 1 -CATCTGAGCTTCTAA 1 -AGACAGTGGAGAAAA 1 -AAAAACATTTTCTCA 1 -CTGGGATTACAGGCG 1 -CAGGGTATTTCACTT 1 -TGGTCTTGAACTCCC 1 -TTGCTGCCATTTATC 1 -CTCGGTGCCTTAGTT 1 -CAGTAGGTAAGAAGT 1 -CACATCACTCAAGAG 1 -CATACTCATCTTTTT 1 -AAGGCTGGCAGAATA 1 -CCATCTTGGATTATC 1 -GGGATTACAGGCGTA 1 -AGATGTTATTATCTG 1 -CTCTCCATTCTTCAG 1 -ATGACTAGACCATCC 1 -AAGATTTTTTTTTAA 1 -CGTTCAGTGCTAAAT 1 -AGTCCTTGCTGAAAG 1 -CCACAGAGGGTGCAG 1 -ACCTCAAGCCATCCT 1 -ACCGGAGCAGCTGGG 1 -CAACCCAAGGTATGT 2 -TAGAGCTGTCTATAA 1 -ACAGGGCCCATGCCG 1 -AATGACATCTAACAT 1 -AGCGCCGCGCCTTTG 1 -CTTTTCTAAGAAGAG 1 -AGGTTCCTAAGTGGA 1 -TAGAAAGACCAGTCC 1 -AATCTCATTCCATTG 1 -CGGACCTCAAGCCAT 1 -CTATATCAGGCACCA 1 -TACTTTTCAAAGTTC 1 -GTAGCGCGCGTCCCG 1 -TGACCCTGTAGGATT 1 -CTCCCCCAGACCCCA 1 -TCCATTTCCACTCTG 1 -TGTATTCAAATTAAA 1 -TTCCATTTTTTTTCT 1 -AGAGACAGGGTCTCG 1 -CCTTTCTCTGGAGGC 1 -CCCCACTCCCAGGCC 1 -GGAGCCAGAGGCCCC 1 -TGCATGTGACAGTGG 1 -TCTATGGCGGAAGAT 1 -TTGGATTATCTTTAT 1 -ACAAGTATCAGACAG 1 -TCACCATGTTGGCCA 2 -GATGGATGAAACCCA 1 -AATGGTTTTACAGCT 1 -TCTGTTGCCCACGCT 1 -TCACTGTAATCTTTT 1 -TTAGAGCTGTCTATA 1 -AAAAATGCACGAATT 1 -AGGCCTATACCTTCT 1 -TGACCAGATATATCT 1 -TAAATGGTTGAGTTG 1 -TCAAGATCTCTGGCG 1 -TTAATTATTCGAAAC 1 -ATGAATACATTGTTT 1 -CTGAAAAATTAATTT 1 -TGTTTCCTCCCCACG 1 -ATAAGTTAATTATTC 1 -GCTCAACTGCAGGGA 1 -TGGGACCACAGGCAC 1 -TAGAGTCTCGTGATG 1 -AAAGACAGTGGAGAA 1 -CCTGGGACATAGCAA 1 -ATCAGATTGTTATTT 1 -AAGTCACATGGTTCA 1 -TCCACGCGTTCACAA 1 -TCCCTCATAATTCCT 1 -GTGCAGAGCGGGAGA 1 -ACTTAACTATCTTGG 1 -AGTTATCTATGGCGG 1 -TCAAATGGAGTAATG 1 -AGGCACGAGCCACCG 1 -CACGAATTACAGCCA 1 -ACACAGAAAGATGTC 1 -GGAGCGAGACATCTC 1 -TTCCTAAGTGGATCA 1 -GCCAGAAAGAGAGAG 1 -CCATGTACTAACAAA 1 -CTGATCTTTAAGAAC 1 -TACAACAGGGATAGG 1 -TTTTGAAGAATAAAC 1 -GGAAATTTTCAAAAT 1 -CGAGCACAGCTAAGG 1 -CGAAAGGGGCAAGTA 1 -ATCTCAGCAGGTGCC 1 -AAATTAAATGCAATT 1 -AGCTGTCTATAAATA 1 -CAATTTTCTCATGAT 1 -GTGTTAGTGCTACTA 1 -TACACAACTTTCAGC 1 -CACAGGCACGAGCCA 1 -GAGCTAAAAAACAAA 1 -GGTTCAGAAACCATG 1 -AACTTCAATGTCGGA 1 -TATTATAAACTCTAT 1 -CTGAATGCTCCACTT 1 -TTTAAATACACTTAT 1 -AGGCAGCTACTCCTC 1 -GGAGATTGCGCTGCA 1 -AGCCGCAGCAGACAG 1 -GCCACCACACCCGGC 1 -CCTAAGCTCAAGAGA 1 -GCAGCTGGGACCACA 1 -TACAAAAGAATGTAA 1 -TTTCTTTTCTCTTTC 1 -AGCTCTTTGCATCTG 1 -ACTTGGTATCTTTCC 1 -ATGCCTTTCTTTTTG 1 -ATGAGCTTCCACCTT 1 -CTGGAGATTGCGCTG 1 -AAGAACATTCCCTGA 1 -ACTAGCCCCACGGCG 1 -ACCTGAAGCTGCCAC 1 -CTGGCAAATACCTTA 1 -CATGTGATGGGAACA 1 -TAAATATACAGATTG 1 -CATCCATGGGGAAGT 1 -AATAGCCCCCATCAA 1 -GGGCTAACTTGGTGT 1 -TGCTAGGACATGCGA 1 -GGCTGGAGTGCAGTG 1 -GTGCCTGGGACATAG 1 -TGCCAGGTACTTAGA 1 -CCCAACCAACCCAAG 1 -TTACCCCACTTAACT 1 -GGACCAGAGCGGGAG 1 -TTGTGAGAGCATCAC 1 -ACAAACCTCAGCGCC 1 -GTATCTTCCAGAAAT 1 -TTTTTTTTTTTCTTT 1 -GCTGCTGTTCCTACC 1 -TCCTCCCGCCTCAGC 1 -AGTTCTTATAATTTT 1 -CTGAAAGACAAGTCT 1 -ATATACCTGAAGCTG 1 -CCCTGGACTAGCCCC 1 -TGCTGCTGCTGCTCC 1 -GCACGAGCCACCACA 1 -GCCCCCATCAAATTA 1 -CGGGCGACGCCTCCC 1 -GTTTACATTTGGTCA 1 -GCTCAAGAGATCCTC 1 -GCTGGGATTACAGGC 1 -TTCTTTTTGGTTCCT 1 -GAAAGTACTGGAGAA 1 -TTTTATTTTTTGGGA 1 -TGACTTGGTATCTTT 1 -GGCCCCAAGATACTT 1 -CTAAAAGGCAGCTAC 1 -ACCCAAGGTATGTTC 2 -CAAAATGAGAGGCAT 1 -GATTTGCGTTTTAAT 1 -GGCCATAGACCTTTT 1 -TCGGATGGATGAAAC 1 -GCCTTGATCTACACC 1 -TTTAAAAGTGACATG 1 -CCACACCCGGCTAAT 1 -CGCCTCCTGGGTTCA 1 -CTAATTTCATGAGCT 1 -AAAAAAGGAAGACAT 1 -ATTCCATTGCCCAGG 1 -GATAAAATACAACAG 1 -CTAGACCATCCATGG 1 -CAAAGCAGCTTTATG 1 -CCCCAAGATACTTTT 1 -ACCGTGCCCAACCAA 1 -GCTCTGACGCTTATC 1 -AAACCTCAGCGCCGC 1 -CAAAAGCCTAAGCTC 1 -TTTTCTTTTTTGAGA 1 -GGTGAGGACTATGGC 1 -AAAACATTCTGCTTT 1 -AATGGTTGAGTTGGA 1 -AATGTCTAAAATGGT 1 -TCAGGGTACATGATC 1 -AAGGGAAGTCACGGA 1 -AGTCAACTTCAATGT 1 -TTCTTGTGGTATCTT 1 -TATTGCCAGGGTATT 1 -TAAATGACGCAAAGC 1 -AATGCACGAATTACA 1 -GGTCTATGCATGGCA 1 -CCCATGAATACATTG 1 -TTGGGATGGAATCTC 1 -ACACACCACCATGCC 1 -ATCTTTCCCTCATAA 1 -CCGGGCGACGCCTCC 1 -GCATTCTACAAACGT 1 -TGGGTTTGAATTTGG 1 -CATTCCCTGACAATC 1 -TGCGCTGCATTTTTA 1 -CGCCTCAGCCACCGG 1 -GACTGACAAACCTTC 1 -AGGCCACCCCGCCGC 1 -ACCTTCCCAACAAGC 1 -ATGTTCTCCACATAG 1 -ACATCTAACATAACC 1 -GCCTCCCCCAGACCC 1 -CAGAGGCCCCGCGAA 1 -TTTAGAGCTACCCAG 1 -TGGTTGAGTTGGACC 1 -ATATGTGTATAAAAT 1 -ATTTTTTGGGATGGA 1 -AATATCGGGAAAAGA 1 -TGAGCTAAAAAACAA 1 -TGTCAGCTTCAGGAA 1 -TGGGATTTGCGTTTT 1 -AAATGCACGAATTAC 1 -TTGGGACGAGCCTAC 1 -GGACCTCAAGCCATC 1 -GCTCTGTTGCCCACG 1 -GCTTACATGTCTCGA 1 -TCTTTGGAGTACCTG 1 -ATTTCTCAGTTCATA 1 -GGGCAAGTAGCGCGC 1 -TCAAAGCAGCTTTAT 1 -TGCGCACCCCCTTCC 1 -AGGCATGCGCTCCCG 1 -TCAAAGTTCATTTCT 1 -AAACCTAAATGCATA 1 -CATGCGCTCCCGCAA 1 -GTGGGGAGAAGGAGG 1 -TATATTTTTCATTGA 1 -ACGAGCCACCGTGCC 1 -CAGGCCACCTTGACC 1 -CCAGTTTGCTCTGGA 1 -ATGGAGTAATGCATG 1 -AGCAGATTTCTAGCA 1 -GCCCCACGGCGGGCC 1 -TATGTTCTTACCAAA 1 -AAATGACGCAAAGCA 1 -ATGATTTGGCCCCAA 1 -AGACCACAGGGCCCA 1 -CGGTGCCTTAGTTTC 1 -TCTCTGTTTGAGGGA 1 -AGTCACGGAGCGAGA 1 -TTTATATCAGATGGG 1 -CTAAATGCATACAAG 1 -GTAGCGCGAGCACAG 1 -CGCCTCGGCCTCCCT 1 -TTTGACTTTCCATTC 1 -GCAAAGAGCCAAAGA 1 -AGGGCCACAGAGGGT 1 -GATTTTTTTCATAAC 1 -TGGGACAGAACATAC 1 -TAAAATGGTTAGAAA 1 -CCTCCCCACGGTGTG 1 -GGGTGCAGAGCGGGA 1 -GGCCAAATGAGCTTC 1 -GAGGGTTATCATGTT 1 -GACCTTTTTTATATC 1 -GGGACAGAACATACT 1 -CTGAGGTTCCTAAGT 1 -CTCAGCTCACTGCAA 1 -GATTTCACCATGTTG 1 -GTGACATGTGATGGG 1 -TCTTATAATTTTTTA 1 -AAGATCTCTGGCGTC 1 -CCCTCATAATTCCTC 1 -CAAGCGCCCCTCCAC 1 -TAATCTTTTCTAAGA 1 -GCGTAAGCTACCACG 1 -TTTGGAGTACCTGAG 1 -TAGTAGTTATCTATG 1 -CTTTTCAAAGTTCAT 1 -TCTCAACTACGGACC 1 -TGTTTGAGGGAAGGC 1 -TTTCACTTGGGGCTA 1 -TCTAATGAAGACCTG 1 -AAACACCCTATCATT 1 -TACAGGCACGAGCCA 1 -TTTTCAAAGTTCATT 1 -TAAAAGGCAGCTACT 1 -TCTATAAATAGTCCT 1 -ATAACATTAAAAGCT 1 -TCTTAAAAAAAAATG 1 -TTCCACCCCTTCCAT 1 -TTCAATGTCGGATGG 1 -GCTCAACCAGTTACT 1 -TGCCACTAATCTGAT 1 -TCTATGCATGGCATG 1 -GAAAAGACACATTAA 1 -AGGCAATCATCATTA 1 -TTACATGTCTCGATC 1 -GCCCCGCGAAAGAGC 1 -GCTTTCGATCATGTT 1 -CACACGGCAGGCATA 1 -ATTTTTGTTATTATT 1 -GCGTGCCCAGCCAAT 1 -CCTCCATGATGCTGC 1 -AAAGAGCGGAAGAGA 1 -GCCAAAGAGGAAGCC 1 -TGCCTTTCTTTTTGG 1 -TCCAGCCAGAAAGTA 1 -TAATTTCATGAGCTA 1 -AAAGACCAGTCCTTG 1 -TACCATGTACTAACA 1 -TCTTTTCTTTTCTCT 1 -TAAACCGTGACTTGG 1 -AGAGAAGTGTTAGTG 1 -AATACACTTATATTC 1 -CAGAAACAGTACTTT 1 -TTGGTAACCATCTTG 1 -AAACAAAGGCCTATA 1 -GAAGAGAAACCCTCC 1 -GGAGGAAAATAGACC 1 -TACATTTGGTCATCG 1 -CAGCTCTCCAGTCTA 1 -ACTCCACAGGAGAAG 1 -CACGCCCAACCCCCT 1 -TCCCAATATGCAGAT 1 -CGCGTTCACAAACCT 1 -ACAGTACTTTCCAAA 1 -AGTGTTTACATTTGG 1 -CCATGAATACATTGT 1 -ATTATCTGAGGTTCC 1 -CAGGCTTACCCGGGC 1 -GGTTGAGTTGGACCC 1 -CAGAAAGATGTCAAT 1 -GTGGGATTTGCGTTT 1 -GGCCACGGAGCGAGA 1 -GATGAAACCCAGACA 1 -CTCTATACGTGGCAG 1 -TATGGCCATAGACCT 1 -ACATGATCACTAAAA 1 -TTCGATCATGTTTTG 1 -GAGGAGGAAAATAGA 1 -ATCGACGCCCTAAAC 1 -CAAGTGCTGCTGCTG 1 -GTCGGATGGATGAAA 1 -GACCCGATAAAATAC 1 -CTACGGACCTCAAGC 1 -GCTAGGACATGCGAA 1 -GAGCTTCCACCTTCC 1 -AGTACCTGAGGAATA 1 -AGTACATCTTGGAAA 1 -CAGTGGGATTTGCGT 1 -ACATTAAAAGCTAAG 1 -TTTACTTTTAAAATA 1 -AGGGTGCAGAGCGGG 1 -TTACAAACAAAGGCC 1 -GCAGAGCCGCAGCAG 1 -TACCCGGGCGACGCC 1 -GTTAAGAGTGTATAT 1 -TTTGCTCTGGAGAAT 1 -CACGGAGCGAGAGAG 1 -ACCTCCATTTTGAAG 1 -ATAATAAGAACATAT 1 -GGTTGGGAATATATT 1 -GTGTATATGTATTTG 1 -GGAAGAAAATATTAT 1 -CCAACCCCCTCATGT 1 -AGGCTTCACCATGTT 1 -TTTTTTTTTAAATAT 1 -TTTTCAGTTTCCTTT 1 -GACCACAGGCACGAG 1 -ACGTCGCGTGCTGTT 1 -ATTTTTTTTTAAATA 1 -TTTGTGCAAGTGCTG 1 -CTGTTTGAGGGAAGG 1 -TTTTTTGAGAGACAG 1 -GACCATCCATGGGGA 1 -TAAGATTTTTTTTTA 1 -GCTGTTCCTACCCAT 1 -GGCCCGCAGGGACCG 1 -CCCGCCTCAGCCACC 1 -AATGAAGACCTGATT 1 -CATTCTGCTTTCGAT 1 -GTGTTTACATTTGGT 1 -CTCTAGAAACACCCT 1 -CATTAATATTGCCAG 1 -CCAGCGACGCAGTGC 1 -CAGACCCCAAGCGCC 1 -CTCCAGGCCAGAAAG 1 -AATTCAGGAAATTTG 1 -CCTCCCTAAGTGCTG 1 -GCGTCCTCAACAGTC 1 -TTTTTTAAATATCAG 1 -CTAGAGGAAGCCAGT 1 -GCCATGATAGCTCAA 1 -TAGTGCTACTAAATA 1 -TGTAGAGACCAGGCT 1 -CGGGTGCGCACCCCC 1 -ATTGTTTATATCAGA 1 -TGAAGAGAAGTGTTA 1 -CCACGCTGGAGTGCA 1 -GTAAAACAAAAACAA 1 -CAACAAGCCACCTCC 1 -CACTTCTTTCTTTCT 1 -ATCACTGTAATCTTT 1 -TGAGATCTCAGCTCA 1 -AAAAGACCACAGGGC 1 -GTCAGAGGAGGAAAA 1 -AAGGCAGGCGTTTTT 1 -TGCCAGGGTATTTCA 1 -GCTCTTTGCATCTGA 1 -ATGTCGGATGGATGA 1 -GACGCCCTAAACTTT 1 -TCATACACAACTTTC 1 -AAAGAGAGAGTAGCG 1 -TCATGTTATTGTACT 1 -AATGTTCTCCACATA 1 -ATCAGACAGGCTGGG 1 -GGCTTGACTTCAATC 1 -CTAGACGAAGTCCAC 1 -GAGCGAGACATCTCG 1 -GCTTATCGACGCCCT 1 -TTCTCCCAATTCCAT 1 -ATAAGGCTGGCAGAA 1 -CAACTTTCAGCAGCT 1 -TTTGAAGAATAAACC 1 -ATGTTTATGCCTTTC 1 -CTTGAGATGTTCGTT 1 -GTCACGGAGCGAGAG 1 -TATATACATTATCCT 1 -GGCAGATGTTATTAT 1 -AAACATTCTGCTTTC 1 -TCAACTACGGACCTC 1 -GCAGTGGTGAGATCT 1 -TGAGCTTCTAATAAT 1 -AGGCTGCTGTTCCTA 1 -CAATGTCGGATGGAT 1 -GAGAATCTCACGCAG 1 -ACCATCTTGGATTAT 1 -AGATTTTGTGAGAGC 1 -CAAAATAAGATTTTT 1 -ACATCTCGGCCCGAA 1 -CTAGCAGATTTCTAG 1 -AGATAACTGTTTTCA 1 -ATGGCCATAGACCTT 1 -GGACATAGCAATTGC 1 -CTTCCAAAACTTAAC 1 -GCTCCCCTGACTGAC 1 -TGAAACAAAAACATT 1 -GCTCCCTGCTCAACT 1 -GTACCTGAGGAATAT 1 -CCTGGGTTCAAGTGA 1 -AGAAAGTGCTCAAGA 1 -CAATATGCAGATTGT 1 -TGTCTCGATCTATGA 1 -CTTAGAGTCTCGTGA 1 -GAGGCAGAAGATAAC 1 -TCCTTGCTGAAAGAC 1 -AGCAGGTGCCACTAA 1 -GACAGTGGGATTTGC 1 -TAGAAAGTGCTCAAG 1 -CTCTTTGCATCTGAG 1 -GCACACACCACCATG 1 -GCGCCATGATAGCTC 1 -CATACAAGAGCTGGC 1 -CACAGCTAAGGCCAC 1 -CTCTCAGCACTCATA 1 -AAATACAACAGGGAT 1 -AAGATGTCAATAACG 1 -GACGCCTCCACTTAT 1 -ACCATCCATGGGGAA 1 -GCGTTCACAAACCTC 1 -ACAGCTCCCCTGACT 1 -ATCTTTATTAATGGT 1 -AATTTTCAAAATAAG 1 -AAGGACTTCTGATGC 1 -CTGTAGGATTCTTCT 1 -CCACTTATATTAAAC 1 -AATCCCAATATGCAG 1 -CAGTTTGCTCTGGAG 1 -ACTCATTCAGGGTAG 1 -AGAGAGCTCTTTGCA 1 -ACACCCTATCATTAA 1 -ACATTCTGCTTTCGA 1 -AGCTACTCCTCCTTG 1 -CCCCCTTCCCCACTC 1 -CAGTATCTCAGCAGG 1 -AGGAACAAGCCCTTC 1 -GTTTTTCTTAAAAAA 1 -TCTTTTTTTTTTTTT 1 -ATATACAGATTGACA 1 -TATGTATTTGTGCAA 1 -CTTTGCATCTGAGCT 1 -GACACTAAATATATG 1 -GCACAGCTAAGGCCA 1 -CGAAGTCCACAGCTC 1 -CTTCCTACTAGCCTC 1 -TCTACAAACGTCGCG 1 -ACTTCAATGTCGGAT 1 -AAAGTGCTCAAGATC 1 -CAGTTTCCTTTTCTT 1 -GAAAGATGTCAATAA 1 -GGTATCTTTCCCTCA 1 -AGGCCAAAGGTCTCC 1 -TGGGAACAAATAAGT 1 -CATGCCGCCCAGTTT 1 -CGGGAAAAGACACAT 1 -TCAAGGTCAAAAACT 1 -AAGGCAGCTACTCCT 1 -AGAAAATATTATAAA 1 -GGGATGGAATCTCAT 1 -TACAAGAGATAGAAA 1 -TTGTTATTATTTTTT 1 -GCACTCATAGCATTC 1 -AACAAAAACATTTTC 1 -AGAAAAAAAAGGAAG 1 -AAGATAACTGTTTTC 1 -TCAGATGGGATGGGA 1 -ATCTGAGGTTCCTAA 1 -TCTCAGCAGGTGCCA 1 -TTCTTAAAAAAAAAT 1 -ACATTCCCTGACAAT 1 -ATACACCTTCTTCAT 1 -ATCTGATTTTCTAAA 1 -ATCTCACGCAGAAGG 1 -AGGGATAGGTGAGGA 1 -CATGCTGTGCATCAG 1 -CATTTTTTTTCTTAT 1 -TGTGGTATCTTCCAG 1 -AAAGACAAGTCTGAA 1 -TTTCCTTTTCTTAGA 1 -CAGAAAGTACTGGAG 1 -ATTCTTCAGTAAGTC 1 -AGCACATAAAGTCCT 1 -TACACTTATATTCAG 1 -TGGGGAGAAGGAGGA 1 -AAAGGCCTATACCTT 1 -AAAGGAAGACATTAA 1 -GAATATATTGCCTAA 1 -TGGTTCACACGGCAG 1 -GTAGGTAAGAAGTGT 1 -CCTCAGGACAGTGAA 1 -TGGAAATTTTCAAAA 1 -ATTTGACTTTCCATT 1 -AATAAGGCTGGCAGA 1 -CTGGGTTCAAGTGAT 1 -TGGCCTCCCAAAGTG 1 -GAGCCAGAGGCCCCG 1 -TCGAAACCGCTTTGT 1 -CATTCTCTGCTGGAT 1 -CAGAATAGGCTGCTG 1 -TACTCATCTTTTTCA 1 -ATCCTTTATGTTGTA 1 -CTCTGGAGGCTCTCA 1 -TCATTCAGGGTAGTA 1 -GTTACTCTGTCAATG 1 -ACATGTTTCTTGTGG 1 -TTCCCCACTCCCAGG 1 -TAAAATGTTACTCTG 1 -CTGGAGGCTCTCAAG 1 -GAAGCTGCCACAAAA 1 -GAATGCTCCACTTTT 1 -TCTTCCAGAAATGGT 1 -GGACTTCTGATGCCC 1 -AAGATAACCATAGTA 1 -TAAGGCTGGCAGAAT 1 -ATTCCATTTCCACTC 1 -TATCACAGCCAAGCA 1 -CCTAATGTTTCAGAA 1 -GAGGAGTACCAGGCC 1 -AATTGCTCTATACGT 1 -CTTTCCCTCATAATT 1 -CTACCCATGAATACA 1 -TTTTAAGAGCCTTTC 1 -ACCAAGGAGAACTTG 1 -CCACTTTTTCAATTC 1 -AAGTATCAGACAGGC 1 -TAGTCCTCAGGACAG 1 -TTCGAAACCGCTTTG 1 -AAGAGGAAGCCCTCT 1 -CCAAGCATTCTACAA 1 -AGCTCAAGAGATCCT 1 -AAGAGAGCTCTTTGC 1 -AGCCCCATCAAGAGG 1 -AAGCTAAGAGAGCTC 1 -TGGGCTGTGACAAAG 1 -TGGGAGGCTGTGGGG 1 -GTTACTGGCTTTATG 1 -CAAGAGATAGAAAGA 1 -CCCTCCCGTCGCCGT 1 -CAAAAACATTTTCTC 1 -CTTCAGTAAGTCAAC 1 -AGGAAAAGGTGAGCG 1 -TCAGCACTCATAGCA 1 -TACTGGCTTTATGAC 1 -GTTTTCAAAACCGAA 1 -GGCTGGCAGAATAGG 1 -TTCTAATAATAAGAA 1 -ACCCCCTTCCCCACT 1 -ACAGTCTTGGTAACC 1 -CTCGGCCTCCCTAAG 1 -ATGTTCTTACCAAAC 1 -AAGCCTAAGCTCAAG 1 -CAAAACATTCTGCTT 1 -TGTTTATGCCTTTCT 1 -ATCTTTGGAGTACCT 1 -AGTAGGTAAGAAGTG 1 -CTTTTTCAGTGGGGG 1 -CAAGCCAGCGACGCA 1 -ACTCTGTTTCCACCC 1 -ACAAACAAAGGCCTA 1 -TAATTATTCGAAACC 1 -GCTGTCAGCTTCAGG 1 -ATATTGCCTAATGTT 1 -GACACATTAATATTG 1 -ACTGTTTTCAAAATT 1 -GCAAAAGCCCTGGTT 1 -TGTAATTAAGTTTCT 1 -CCCAACAAGCCACCT 1 -TTAATGGTTTTACAG 1 -TTTTATTTTTTTATT 1 -CAGTATCTTCTGTCA 1 -AAGGGAACATGACCC 1 -TAAAAGTGACATGTG 1 -TTCTTATGTTTATGC 1 -GATAGGTGAGGACTA 1 -GATATGACTACTCAT 1 -CTTTCTTTTTTTTTT 1 -ATTGCCAGGGTATTT 1 -GGTTATCATGTTAGA 1 -CCCGTTTTCAGTTTC 1 -GGAGCAGCTGGGACC 1 -GGAGGCTCTCAAGGA 1 -AGTTCATTTCTCAGT 1 -TTGTATTCAAATTAA 1 -TAGCGCGAGCACAGC 1 -CCGCTTTGTATCACA 1 -CCTTAACACATCACT 1 -TCTTTTAAATACACT 1 -ACCAGGCAAAGAGCC 1 -CATGTATTACTTTGG 1 -GGGAACAAATAAGTT 1 -CTTCTTGAGATGTTC 1 -CCTTCACTTCTTTCT 1 -GCAAGTAGCGCGCGT 1 -GCAAAGCACATAAAG 1 -AGTAGAGTGCCTGGG 1 -AACTGCAGGGAAACT 1 -AGTCCAGGATTATAG 1 -GAGATTGCGCTGCAT 1 -AAGGACCAGAGCGGG 1 -CAAGTATCAGACAGG 1 -TTACAGGCGTAAGCT 1 -TAGGACATGCGAACT 1 -TTTTGGGATGGAATC 1 -AAAATGAGAGGCATG 1 -CTCTGTTTCCACCCC 1 -TACCAGGCCACCTTG 1 -TCTTGAACTCCCAAC 1 -TCAGATACCAATCCA 1 -ATCTCGGTGCCTTAG 1 -GGTACTTAGAAAGTG 1 -TCAAGCTATATCAGG 1 -TTTTTTTTAAATATC 1 -AAGAATGTAAGACTT 1 -GAGTAATGCATGTGA 1 -TTTCTCAAGGTCAAA 1 -GGTAACCATCTTGGA 1 -ATGGGACAGAACATA 1 -AACCTCAGCGCCGCG 1 -CTTATAATTTTTTAA 1 -TTCATGCCACTCCAC 1 -CTGGGACCACAGGCA 1 -TTATTTTTTTATTTT 2 -ATTCTACAAACGTCG 1 -AGCCATCCTCCCGCC 1 -CTCAGCGCCGCGCCT 1 -TGAAGACCTGATTTT 1 -TCTCTCCATTCTTCA 1 -GAGCTGGCAAATACC 1 -CAATTCTCTCTCCAT 1 -CCTCCAGGCCAGAAA 1 -ACCAATCCAGCCAGA 1 -ATATCGGGAAAAGAC 1 -GTTTGCTCTGGAGAA 1 -TTGGAAATTTTCAAA 1 -AAACAAAGAAACAAA 1 -CAAGGCCCGCAGGGA 1 -AATTAAATGCAATTT 1 -CTGGGTTTGAATTTG 1 -GCAGAAGATAACCAT 1 -GAGGCTGTGGGGAGA 1 -TGTATTTGTGCAAGT 1 -AGACATCTCGGCCCG 1 -GGTATGTTCTTTATT 1 -GTTGGCCAGGCTGGT 2 -TAGCAATTCAGGAAA 1 -CGTGCCCAGCCAATC 1 -AAGATTTTGTGAGAG 1 -TATTTCACTTGGGGC 1 -CTTACCTCCATGATG 1 -GTGGCCCCACATAGA 1 -TCTCGATCTATGAAA 1 -AGAATGTAAGACTTA 1 -TACATGATTTGGCCC 1 -AGAGACGGGATTTCA 1 -ACAGTACATCTTGGA 1 -CGCAAAAGCCCTGGT 1 -GCTGTGACAAAGTCA 1 -TAGGAGAGACTCACG 1 -TCATGCCACTCCACA 1 -AGGGCCCATGCCGCC 1 -ATGACTACTCATACA 1 -GTTGCCCACGCTGGA 1 -GGATTATCTTTATTA 1 -AGCCCTCTGTACGAA 1 -AAAGTAAGAGGCACA 1 -TCCCAACAAGCCACC 1 -TTTGGGACGAGCCTA 1 -TCTCTCTAGAAACAC 1 -GAAGGCGGCAAGATT 1 -AAAGAAACAAAACCT 1 -TTATTATCTGAGGTT 1 -ACAAACCTTCACTTC 1 -TTAATTTTTGTATTT 1 -TTATATGCCAGACCC 1 -CTAAGTGCTGGGATT 1 -GACACATAGCAATTC 1 -TTTTACAGCTCCCCT 1 -TTTTGGTTCCTGCTC 1 -AGCTCACTGCAACCT 1 -GAAGAGGACAAGTAT 1 -AGAGAGGGACTTTCC 1 -AAAAGCTAAGAGAGC 1 -GTCATCGATTTCTCC 1 -TGTGAGAGCATCACT 1 -TTAAAATACCATGTA 1 -TTGAATTTGGGCTCA 1 -TCTATATTTTTCATT 1 -AATGCCCGCCAGCGC 1 -AAGCTACCACGCCCA 1 -GCATGCGCTCCCGCA 1 -CTCGGCCCGAATGCT 1 -CTTAGTGTATTGCCA 1 -GCGGGAGGGTAGGAG 1 -TGTAGGATTCTTCTT 1 -ATACCAGTAATGGTG 1 -CTCACTGCAACCTCC 1 -CCTTTCTTTTTGGTT 1 -GAAACCCTCCCCCAA 1 -GGAACATGACCCTGT 1 -ACAACAGGGATAGGT 1 -GAAACTACTGGTTCA 1 -GTGTGCAGTGGTGAG 1 -ATCTTCTGTCACTGG 1 -TAGACCCAGAGGTGC 1 -TAAAAAACAAAGAAA 1 -CTTCTAATAATAAGA 1 -TCTGCTGGATGACGT 1 -AAATTACTCATGTTA 1 -CAGCCAAAAGGCATG 1 -CAGCACTCATAGCAT 1 -GCAGCTACTCCTCCT 1 -CTGCCTTGGCCTCCC 1 -TATATTGCCTAATGT 1 -CCCTAAGTGCTGGGA 1 -GGCCACAGAGGGTGC 1 -AGGTCTCCCCTGCTC 1 -AGCAGAGCCGCAGCA 1 -AATATATTGCCTAAT 1 -AAACCCACACACAGG 1 -ACGAATTACAGCCAA 1 -ATGGTCTATGCATGG 1 -AATGCATGTGACAGT 1 -GTTTATGCCTTTCTT 1 -TATTTTTCATTGAAA 1 -CTTTATTTTTTTATT 1 -CATCATTACATGTTT 1 -TTATAAACTCTATAT 1 -TTAAGTTTCTTAAAA 1 -AGCTTCTAATAATAA 1 -AAGTAAGAGGCACAG 1 -CTAGAAACACCCTAT 1 -ATTTTTTTTCTTATG 1 -ACTTTTCAAAGTTCA 1 -TAAGAGGCACAGTAC 1 -TTCCACCTTCCCAAC 1 -CAAAAACAAAACCCA 1 -GGACCCGATAAAATA 1 -TTCAAAACCGAAAGT 1 -AGATACTTTTCAAAG 1 -AACATACTTTCCATT 1 -GGCGTCCTCAACAGT 1 -GCCAGACCCCTCTCT 1 -GGCGTAAGCTACCAC 1 -AGCACAGCTAAGGCC 1 -GCGTCCCGGGTGCGC 1 -TCTTTATTTCTGCTG 1 -CTGCTAAAATGTTAC 1 -TGCAAGTGCTGCTGC 1 -ACTTCTGATGCCCTC 1 -GCACAGTACATCTTG 1 -TGCTTTCGATCATGT 1 -CCAGGCCACCTTGAC 1 -TACCCATGAATACAT 1 -CTAAATATACCTGAA 1 -TCTGACTTTGTACCT 1 -GAGGAAGCCAGTAGG 1 -AGAGCGGGAGAGGAA 1 -ACGGGTAGTTCTTAT 1 -ATGCCTCAGGGATCA 1 -ATTCTCTCTCCATTC 1 -AGTTGGACCCGATAA 1 -AAAGTCCTTGGCACA 1 -TAGCATTCCTTAACA 1 -TACCTCCATGATGCT 1 -GAGCAGCTGGGACCA 1 -AACAAGCCACCTCCA 1 -GCTCCCGCAAAAGCC 1 -TTCTGCTGAGGTTTT 1 -CCAGTAATGGTGGTG 1 -GAAAAAAAAGGAAGA 1 -CACACAGAAAGATGT 1 -GCCAGTAGGTAAGAA 1 -AGCAGCTTACAAAAG 1 -TATCTCTCTAGAAAC 1 -TCTTCTTTCCCTGCT 1 -AGTCCTCAGGACAGT 1 -CCACAGGGCCCATGC 1 -AGGATTCTTCTTTCC 1 -TACAGCCAAAAGGCA 1 -AGCCACCACACCCGG 1 -TAGTTTCTTCATCTG 1 -CACAGATTCATCCTG 1 -TTAGAGAGAGGGACT 1 -TGTTCTTACCAAACA 1 -GTCTCCCCTGCTCCC 1 -CCCGATAAAATACAA 1 -GGCAGGCATACTCAT 1 -TGATTGCTGTAAACT 1 -AACCTAAATGCATAC 1 -ACATTGTTTAGAGCT 1 -AATGGTTAGAAATAA 1 -CAGGAGAAGGGAACA 1 -TGGGGCTAACTTGGT 1 -CACTAAAAGGCAGCT 1 -ATCCTCCCGCCTCGG 1 -GTCCTTGGCACACAG 1 -ATAAACCGTGACTTG 1 -AACGGGTAGTTCTTA 1 -TGAGGTTCCTAAGTG 1 -ATCATGTTAGAGCTG 1 -GCTGTCTATAAATAG 1 -GAGATGTTCGTTCAG 1 -CTCCCAACTTCAAGT 1 -CTTTTAAATACACTT 1 -AGGAAAATAGACCTT 1 -GTCTAGTAGAGTGCC 1 -TTGTGCAAGTGCTGC 1 -AGCTTCAGGAATGCC 1 -TCATTTCTCAGTTCA 1 -AGGCGTAAGCTACCA 1 -GCCGAAAGGGGCAAG 1 -ACAGCTCTCCAGTCT 1 -GACTTACAAACAAAG 1 -GTATGAAGTGTCTAG 1 -TTGTAGAGACCAGGC 1 -AAGACAGTGGAGAAA 1 -CGGTGTGGCCCCACA 1 -TTACTTTGGAAATTT 1 -CGAATTACAGCCAAA 1 -CTGCTCCTGCCTTGA 1 -GCGCCCCTCCACGCG 1 -AATTTTTGTTATTAT 1 -TGTATGAAGTGTCTA 1 -TCTTTTCTCTTTCTT 1 -AAAAGAATGTAAGAC 1 -TGGATTGGGGAATCT 1 -CCCATCAAGAGGTGG 1 -CCCGAATGCTGTCAG 1 -GCCTCCCTAAGTGCT 1 -CGGCGGGCCACCAAG 1 -CAGTGTAGTACAAGA 1 -AGAGCGGGAGGGTAG 1 -CACCAAGGAGAACTT 1 -CCGCGAAAGAGCGGA 1 -GAGCGCGCTTAGTGT 1 -GGAAGCAGAGCCGCA 1 -GACATGTGATGGGAA 1 -GACCAGATATATCTC 1 -CTTTCAGCAGCTTAC 1 -CAGCCCTGGACTAGC 1 -GATTATCTTTATTAA 1 -TCTTTATTTTTTTAT 1 -GTTATTTAAGCTTCC 1 -CCTGCCTTGGCCTCC 1 -TTTATATCAAAGCAG 1 -TTTCCCTCATAATTC 1 -AAAAGGTGAGCGCGC 1 -AAGCTGCCACAAAAG 1 -CCCCATCAAGAGGTG 1 -TATCAGATTGTTATT 1 -AGCCTAAGCTCAAGA 1 -ATGCATGGCATGTAT 1 -ATCTGAGCTTCTAAT 1 -ACAGGTCAGAGGAGG 1 -GTGAAACAAAAACAT 1 -GCGTTTTAATTAGCA 1 -CAAAGTGTTGGGATT 1 -ACCATGCCTGGTTAA 1 -TCCCCTGCTCCCCGC 1 -GCTAACTTGGTGTCA 1 -AGCGGGCGCCTAGAC 1 -TTCCTGGGACTACAG 1 -AAGCAGCTTTATGAT 1 -ATAGACCTTCAGAGG 1 -TTTCTTTCTTTTCTT 1 -AAACGTCGCGTGCTG 1 -CAGCAGGAACAAGCC 1 -GAATATCGGGAAAAG 1 -ATTGTACTCTGTTTC 1 -TGACAGTGGGATTTG 1 -ATTAGCATCCACAGG 1 -ACTAAAAGGCAGCTA 1 -GTGCTACTAAATATA 1 -CTTGGTGTCAAGCTA 1 -CTCGGCGCTCTGACG 1 -TGGAGTACCTGAGGA 1 -ATCGGGAAAAGACAC 1 -GTATCTTCTGTCACT 1 -AGCAGACAGGCTTAC 1 -TCCAAGCCAGCGACG 1 -CAGCAGCTTACAAAA 1 -TTCTTTTCTTTTCTC 1 -TACTCATGTTATTGT 1 -TGACATCTAACATAA 1 -CACCTTCTTCATGCC 1 -TAGTGTATTGCCAGG 1 -TTGATCTACACCCAT 1 -TCAGAGCACAGATTC 1 -ACTTGGAGAAGGGAA 1 -ACAAAGTCACATGGT 1 -TTACATTTGGTCATC 1 -GCGGAAGAGAAACCC 1 -TTATCGACGCCCTAA 1 -TTTTTTTTCTTTTTT 1 -CATTCCTTAACACAT 1 -ACAGTGAAACAAAAA 1 -CGCCTTTGGGACGAG 1 -GCCACCTCCATTTTG 1 -GGAATATATTGCCTA 1 -CCCCTGACTGACAAA 1 -GCCACTAATCTGATC 1 -GACGCAAAGCACATA 1 -ATAGGTGAGGACTAT 1 -TATAAAATTCTTTTA 1 -GCATACTCATCTTTT 1 -CCATTTCCACTCTGG 1 -TTATAATTTTTTAAA 1 -TCTGGCGTCCTCAAC 1 -ATCTCGATTGCTGCC 1 -CAAATACCAGTAATG 1 -TCTTACCAAACAACA 1 -ACTGAAAAATTAATT 1 -GAGGGCCACAGAGGG 1 -ATTCTTCTTTCCCTG 1 -CGAGAGAGCACAGCG 1 -TTCCACTCTGGCCAA 1 -AACAAATAAGTTAAT 1 -TAGAGACGGGATTTC 1 -ACCCTGTAGGATTCT 1 -TATCATTAAGGAAAG 1 -ATTAAGTTTCTTAAA 1 -GGATTTCACCATGTT 1 -GCTTCACCATGTTGG 1 -CCTTTTCTTAGAGTC 1 -CCAACCTCGGCGCTC 1 -AAGTGACATGTGATG 1 -GGCGGAAGATAACTG 1 -TGTTCTCCACATAGT 1 -GTAATGCATGTGACA 1 -ATATTCAGGGTACAT 1 -ATATCAGATTGTTAT 1 -CAGGCAAAGAGCCAA 1 -ATAAATAGTCCTCAG 1 -AGAAGTGTTAGTGCT 1 -GAGTCTACATGATTT 1 -GTAAACTAGCCAGGT 1 -GATTTTTTTTTAAAT 1 -CTCCACATAGTGAGG 1 -CAGTGGAGAAAAAAA 1 -TTCTTCTTTCCCTGC 1 -TGGTATCTTTCCCTC 1 -CTCCACTTATATTAA 1 -AGCATCACTGTAATC 1 -TGTCTGGGAGGCTGT 1 -AGTGCACTGGCGCCA 1 -ACACCTTCTTCATGC 1 -TGTGCAAGTGCTGCT 1 -ACCAAGTGTTTACAT 1 -GCCATCCTCCCGCCT 1 -AAATACCATGTACTA 1 -GGGATTACAGGCACG 1 -AGCTCTCCAGTCTAA 1 -GGGGAAGTGGGAAGA 1 -GTGACAAAGTCACAT 1 -CTTCCCCGAGATCCA 1 -AGGCACGAGCCACCA 1 -TCTGAATGCTCCACT 1 -CAGAAGGCAGGCGTT 1 -GGCATGTATTACTTT 1 -GGTCTCGCTCTGTTG 1 -TTGCTCTGGAGAATC 1 -AGTAGCGCGAGCACA 1 -GGACAGCAGGACTTA 1 -TTTCAGTGGGGGTGA 1 -ATGGTGGTGATATAT 1 -AATTCAGTGTAGTAC 1 -ACTAGACCATCCATG 1 -ATGCATGTGACAGTG 1 -GAGTTGGACCCGATA 1 -CTGGCGTCCTCAACA 1 -GCGGAAGATAACTGT 1 -ACTTTGTACCTAATT 1 -GTGGGGGTGAATTCA 1 -ACAGGCTTACCCGGG 1 -CTCTATACATGCCTT 1 -AGAGACTCACGCTGG 1 -CTTAACTATCTTGGG 1 -GCGGCAAGATTTTGT 1 -CAGGGTAGTATGGCC 1 -TGCCCACAGTAAAAC 1 -GCGCTGGGGGAGCCA 1 -GAGAGACAGGGTCTC 1 -AAAGGCATGCGCTCC 1 -AATTTTCTCATGATC 1 -GCCCTCTGTACGAAA 1 -GGGATAGGTGAGGAC 1 -AGTAATGCATGTGAC 1 -GACAAGGCCCGCAGG 1 -CTCCCCGCCGAAAGG 1 -TCCCTGACAATCCCA 1 -ATTTAAGCTTCCAAA 1 -CGCCCTAAACTTTGT 1 -CTAACATAACCAGCA 1 -CCTACTAGCCTCAGA 1 -AGATGGGATGGGACT 1 -AATGCTGTCAGCTTC 1 -CTCAGATACCAATCC 1 -GAGATAGAAAGACCA 1 -ACAGATTGACACTAA 1 -TGCCTGGTTAATTTT 1 -AGGCCCCGCGAAAGA 1 -CTTAGTTTCTTCATC 1 -GCTGTGGGGAGAAGG 1 -TCAGTGTAGTACAAG 1 -TCCCCGCCGAAAGGG 1 -CACACAGGTCAGAGG 1 -CGGAGCGAGACATCT 1 -CGCGCGTCCCGGGTG 1 -TCATCCTGCCTGGAA 1 -AGGACAGCAGGACTT 1 -ACCTTCACTTCTTTC 1 -TGACTGACAAACCTT 1 -TGAGGGTTATCATGT 1 -AACTACGGACCTCAA 1 -ACTCATGTTATTGTA 1 -GCAGAGCGGGAGAGG 1 -ATCATTACATGTTTC 1 -GATCTCTGGCGTCCT 1 -CCTGACAATCCCAAT 1 -CACAGCCAAGCATTC 1 -TAACTGAAAAATTAA 1 -GCTTGACTTCAATCT 1 -TTCAAATTAAATGCA 1 -AATTTCATGAGCTAA 1 -AGGGAAACTACTGGT 1 -AAGGTATGTTCTTTA 1 -AAGCTAGAGGAAGCC 1 -CATCACTCAAGAGTC 1 -CATGCCTTTTTTGTT 1 -GTGCCCAGCCAATCA 1 -CTAACAAATGTCTAA 1 -AGTCTTGGTAACCAT 1 -GCTGGAGTGCAGTGT 1 -CGTCCCCCACTCCAG 1 -AGCCCTGGACTAGCC 1 -TAGGCCAAAGGTCTC 1 -ACTGGCGCCATGATA 1 -ATGCGCTCCCGCAAA 1 -ACATCACTCAAGAGT 1 -GGAGAAGGGAACATG 1 -TTCTGATGCCCTCTC 1 -ATTGGGGAATCTAAT 1 -GAAATAAGGCTGGCA 1 -AACTTACCTCCATGA 1 -GTGTATAAAATTCTT 1 -ACTTTCCATTATGAT 1 -GTCCAGGATTATAGG 1 -AAACCGAAAGTAAGA 1 -TTATATTCAGGGTAC 1 -ACAGGCACGAGCCAC 2 -TTGGGATTACAGGCA 1 -CTTAACACATCACTC 1 -AAAATCTCGGTGCCT 1 -TGCAGTGTGCAGTGG 1 -GATGGAATCTCATTC 1 -AGACCATCCATGGGG 1 -TCCCCACTCCCAGGC 1 -GAACTTAGCGGGCGC 1 -TTCCCCGAGATCCAG 1 -ATGAGAGGCATGACT 1 -TTCGTTCAGTGCTAA 1 -GGAATCTCATTCCAT 1 -AAGGTGAGCGCGCTT 1 -CAGAGCGGGAGGGTA 1 -TCTTTCTTTCTTTTC 1 -GGTGATTGCTGTAAA 1 -ACCCCGCCGCTTCCC 1 -ACAGAACATACTTTC 1 -TTCACAAACCTCAGC 1 -TTCAGGAAATTTGAC 1 -TCTTAGAGTCTCGTG 1 -TGTACGAAAAGACCA 1 -ATTCGAAACCGCTTT 1 -GACATAGCAATTGCT 1 -GGAGTAATGCATGTG 1 -TATTTAAGCTTCCAA 1 -CACTAATCTGATCTT 1 -CATCCACAGGTGATT 1 -GATTACAGGCACGAG 1 -ATACAGATTGACACT 1 -CACTTATATTCAGGG 1 -CTCTGCTGGATGACG 1 -AGAAACAAAACCTAA 1 -GCGCGACGCCTCCAC 1 -GCGCCGCGCCTTTGG 1 -ACCACAGGCACGAGC 1 -AGAAGGGAAGTCACG 1 -CCTTCTTGAGATGTT 1 -AGCTACCCAGCAGGA 1 -AGCGACGCAGTGCCA 1 -CCCAGACACATAGCA 1 -CATGATCAAAACATT 1 -AGGTATGTTCTTACC 1 -CCACCGGAGCAGCTG 1 -GGGAAGGCGGCAAGA 1 -AGGAGAAGGGAACAT 1 -GAGCGAGAGAGCACA 1 -TTTTGTATTTTCAGT 1 -ACCATGTACTAACAA 1 -CAGTAAGTCAACTTC 1 -CTCACGCTGGATAGC 1 -GAGCATCACTGTAAT 1 -TGGAACTCTCTGTTT 1 -ATGCCTGGTTAATTT 1 -GGTGAGATCTCAGCT 1 -CCACCACACCCGGCT 1 -GTTATCATGTTAGAG 1 -AGTGTTAGTGCTACT 1 -GATTCATCCTGCCTG 1 -ATATCTCTCTAGAAA 1 -CCGGAGCAGCTGGGA 1 -GAAGGGAAGTCACGG 1 -ATGCCGCCCAGTTTG 1 -CATGATTTGGCCCCA 1 -CGCTTTGTATCACAG 1 -GTTCACACGGCAGGC 1 -TACCAATCCAGCCAG 1 -GAATGCTGTCAGCTT 1 -TTTCGATCATGTTTT 1 -AGTAGAGACGGGATT 1 -GAGGAAGGACCAGAG 1 -CAGGTGATTGCTGTA 1 -TTTCAGTTTCCTTTT 1 -TTATTTTTTAAAATG 1 -AATCTCGGTGCCTTA 1 -CACCTTGACCAGATA 1 -ATACAACAGGGATAG 1 -TGATGGGAACAAATA 1 -TGGTATCTTCCAGAA 1 -ATTGCCAGGTACTTA 1 -TCAGGAAATTTGACT 1 -ATTGCTCTATACGTG 1 -CTATACATGCCTTTT 1 -AGCTACCACGCCCAA 1 -GAGGTTCCTAAGTGG 1 -ACTTCAAGTGATCAG 1 -AAACCTCTTTATTTC 1 -CATAATTCCTCTATA 1 -GTAGAGACCAGGCTT 1 -CTCAACTGCAGGGAA 1 -ATTAAATGCCTCAGG 1 -GGATTACAGGCACGA 1 -GCACGAGCCACCGTG 1 -ACTCATACACAACTT 1 -GCTACTAAATATACA 1 -TTCATCTGTGAGAGG 1 -TTTAAGAACATTCCC 1 -CTGCTCCCTGCTCAA 1 -GGACTATGGCAAATG 1 -AGAAGATAACCATAG 1 -GGCACCAAGTGTTTA 1 -TATAAATAGTCCTCA 1 -CCCAACCCCCTCATG 1 -ATCTAACATAACCAG 1 -GAGTAAACCTGAATC 1 -ATGTTCGTTCAGTGC 1 -GCATACAAGAGCTGG 1 -CTGACGCTTATCGAC 1 -CGAGGGCCACAGAGG 1 -CAAAACCTAAATGCA 1 -TCCTGCTCCTGCCTT 1 -GGAATATCGGGAAAA 1 -AAGCACATAAAGTCC 1 -TGGGGAATCTAATGA 1 -CATGTTTCTTGTGGT 1 -CTCCCGCCTCGGCCT 1 -GTGCCAGGTTAGAGA 1 -TTGAACTCCCAACTT 1 -AATTACTCATGTTAT 1 -AGTTTTACTTTTAAA 1 -TGAAGCTGCCACAAA 1 -CACTCCCAGGCCACC 1 -TTATCTGAGGTTCCT 1 -CCGCAGGGACCGTCA 1 -TGGCAGATGTTATTA 1 -AAGAATAAACCGTGA 1 -ATTCCTGGGACTACA 1 -ACATGTCTCGATCTA 1 -CCCCACTTAACTATC 1 -TTCTCCACATAGTGA 1 -CAATCCCAATATGCA 1 -GTTTTCAGTTTCCTT 1 -AGACACATTAATATT 1 -GCAACCTCCGCCTCC 1 -AAGTAGCGCGCGTCC 1 -TCCCCGAGATCCAGC 1 -ATTTCTGCTGAGGTT 1 -ATATTAAACGCGTGC 1 -TACATTGTTTAGAGC 1 -CACTTTTTCAATTCT 1 -GGCACGAGCCACCAC 1 -TATTATTTTTTGTAG 1 -TTTTTTTCTAGCAGA 1 -ATGTATTACTTTGGA 1 -TTTTTCTTTTTTGAG 1 -CCTACCCATGAATAC 1 -AGTTTGCTCTGGAGA 1 -ACACTTATATTCAGG 1 -CGAGCCTACCCGTCC 1 -TTCAGGAATGCCCGC 1 -ACAAGTCTGAATGCT 1 -AAGCTTCCAAAACTT 1 -AGAAACCATGCTGTG 1 -GAAGGGAACATGACC 1 -TAAGGAAAGGCTACT 1 -TCCATTCTTCAGTAA 1 -ATTATAGGATGCTAG 1 -CATGAATACATTGTT 1 -ATTAAATGACGCAAA 1 -TTTTCATTGAAAATG 1 -TCCAGGATTATAGGA 1 -GATCTCAGCTCACTG 1 -TGGCCAAATGAGCTT 1 -TCTAGAAACACCCTA 1 -TGTTCTTTATTTTTT 1 -CCACTTAACTATCTT 1 -ACGCAAAGCACATAA 1 -ATGGCAAATGGGACA 1 -CCATCCTCCCGCCTC 1 -ACAGGGATAGGTGAG 1 -ACCTAAATGCATACA 1 -GGTGGATTGGGGAAT 1 -TTTCTCTGGAGGCTC 1 -CATTAAAAGCTAAGA 1 -TTTTTTATATCAAAG 1 -CTGAGGAATATCGGG 1 -GCCCCTCCACGCGTT 1 -GCCTTGGCCTCCCAA 1 -ACAGTGGAGAAAAAA 1 -CCAGGCTGGAGTGCA 1 -GTTCTCCACATAGTG 1 -TTTCTCTTTCTTTTT 1 -AATAAGATTTTTTTT 1 -AGGGTTATCATGTTA 1 -GCAGTATCTTCTGTC 1 -AGTAAAACAAAAACA 1 -CTAATCTGATCTTTA 1 -GACAGTGAAACAAAA 1 -CTCTGTTTGAGGGAA 1 -TGGCCCCACATAGAC 1 -CACGCGTTCACAAAC 1 -GCTTCAGGAATGCCC 1 -TAGAGGAAGCCAGTA 1 -GGATTCTTCTTTCCC 1 -AGCTGGCAAATACCT 1 -ATCTTTTCTAAGAAG 1 -TGCTGTGCATCAGTA 1 -ACTGTATGAAGTGTC 1 -TTTTAAAATACCATG 1 -AGGAGAACTTGGAGA 1 -ACCTCGGCGCTCTGA 1 -CAAACCTCAGCGCCG 1 -GTGAGATCTCAGCTC 1 -TTCCTCCCCACGGTG 1 -TACGGACCTCAAGCC 1 -GATAACCATAGTAGT 1 -CCTGGAACTCTCTGT 1 -CAAGCCACCTCCATT 1 -GGACCACAGGCACGA 1 -CCAGGCCACCCCGCC 1 -CAGGCTGGTCTTGAA 1 -CCATCCATGGGGAAG 1 -AAGTGATCAGCCTGC 1 -CGGCTAATTTTTGTT 1 -AAAGCCCTGGTTAGG 1 -TTTATGCCTTTCTTT 1 -AATCTAATGAAGACC 1 -CAGCTCCCCTGACTG 1 -ACCTCCATGATGCTG 1 -TTCCCTGCTAAAATG 1 -AGAAAGAGAGAGTAG 1 -TTTTTCTAGCAGATT 1 -CTAAATATATGTGTA 1 -CATAGACCCAGAGGT 1 -AATTCCATTTCCACT 1 -TCATAATTCCTCTAT 1 -ATTCCTTAACACATC 1 -ACTCCCAACTTCAAG 1 -AAACCCTCCCCCAAC 1 -ATGGATGAAACCCAG 1 -TTCTACAAACGTCGC 1 -CAGTAGAGACGGGAT 1 -CCAGAAAGTACTGGA 1 -GAATAAACCGTGACT 1 -ACTAGCCAGGTTGGG 1 -AAAGGTCTCCCCTGC 1 -GGCTACTAGCCCCAT 1 -TCAGGCACCAAGTGT 1 -TCCATGATGCTGCTT 1 -TCAGTATCTCAGCAG 1 -GATATATACATTATC 1 -ATCCACAGGTGATTG 1 -GGATTGGGGAATCTA 1 -TTATCTATGGCGGAA 1 -GCCGCTTCCCCGAGA 1 -CAGCAAATACCAGTA 1 -GGAGAAAAAAAAGGA 1 -AAGTGGATCAACCCA 1 -ATAAACTCTATATTT 1 -CCTAGACGAAGTCCA 1 -GTTATTATTTTTTGT 1 -GGGACTACAGGCACA 1 -TTTTGGAGAGTTATT 1 -CGTGAGTAAACCTGA 1 -GAACTCTCTGTTTGA 1 -GGAAGCCCTCTGTAC 1 -TATCCTTTATGTTGT 1 -AGCCCCACGGCGGGC 1 -GAGGGTGCAGAGCGG 1 -ACAGGGTCTCGCTCT 1 -GGGAAGCAGAGCCGC 1 -TGGTTCCTGCTCCTG 1 -GCCCATGCCGCCCAG 1 -TGCTCCTGCCTTGAT 1 -TTAAATATCAGATTG 1 -AATCTTTGGAGTACC 1 -CGGCCTCCCTAAGTG 1 -CATTCAGGGTAGTAT 1 -GGAACAAATAAGTTA 1 -TATTTTTTGTAGAGA 1 -AGATCTCTGGCGTCC 1 -ATAACCATAGTAGTT 1 -AAATAGTCCTCAGGA 1 -TGTGACAGTGGGATT 1 -AAAGTGTTGGGATTA 1 -TTTTTTATTTTTTGG 1 -AGTGTCTAGTAGAGT 1 -AGGGAAGCAGAGCCG 1 -GCCACCGTGCCCAAC 1 -AAATGGTTAGAAATA 1 -AATAAACCGTGACTT 1 -CTGCTGAGGTTTTAT 1 -ACGAAGTCCACAGCT 1 -AGAAACCCTCCCCCA 1 -ACATACTTTCCATTA 1 -TCAAGAGTCTACATG 1 -GGACTCATTCAGGGT 1 -GCGCGAGCACAGCTA 1 -AATAAGAACATATTA 1 -AGAGGCAGAAGATAA 1 -AACAACAAAACCTCT 1 -ATCTTGGAAACAACC 1 -TACCAAACAACAAAA 1 -AGATAACCATAGTAG 1 -TCCCGTCGCCGTAGG 1 -GCCACGGAGCGAGAC 1 -CAGGGAAACTACTGG 1 -AGGCACCAAGTGTTT 1 -AGACAGGCTGGGTTT 1 -GCCATAGACCTTTTT 1 -ATGCTGTCAGCTTCA 1 -TATGCCCACAGTAAA 1 -TGTTTCAGAAACAGT 1 -GACCCAGAGGTGCTA 1 -TTCATCCTGCCTGGA 1 -CGCGCCTTTGGGACG 1 -AAATGAGAGGCATGA 1 -CATGACTAGACCATC 1 -CTAATGTTTCAGAAA 1 -ATTTCACCATGTTGG 1 -CCACCTTGACCAGAT 1 -GCCTTTGGGACGAGC 1 -AACAGTCTTGGTAAC 1 -CTTAGCGGGCGCCTA 1 -CCCTCCCCCAACCTC 1 -CTTTGTCCCGACCCT 1 -TTGCGCTGCATTTTT 1 -GTTTGAGGGAAGGCG 1 -TTTGGCCCCAAGATA 1 -AATAGGCTGCTGTTC 1 -TGTTTTGCCAGAGGA 1 -TTTGTATTTTCAGTA 1 -TCAGGACAAGGCCCG 1 -TTTTGCCAGAGGAAA 1 -TGGCCATAGACCTTT 1 -CAGGAACAAGCCCTT 1 -CCAAGATACTTTTCA 1 -ATGCAATTTTCTCAT 1 -TTTATTTTTTGGGAT 1 -TTGGAGAAGGGAAGT 1 -TACTCTGTTTCCACC 1 -TTTTTTATTTTTTTA 1 -ACTTAGAAAGTGCTC 1 -CTAGGACAGCAGGAC 1 -CTAGCCCCATCAAGA 1 -GTGCACTGGCGCCAT 1 -TTTAATTAGCATCCA 1 -TGTATAAAATTCTTT 1 -CAGCTTCAGGAATGC 1 -AAGCCCTCTGTACGA 1 -AGAACATTCCCTGAC 1 -AACCGCTTTGTATCA 1 -ATCCAGCCCTGGACT 1 -TAAATAAATTGTATT 1 -CCCACAGTAAAACAA 1 -GAGAGAGTAGCGCGA 1 -TATGTGTATAAAATT 1 -GAAATTTGACTTTCC 1 -ACTTTTGGAGAGTTA 1 -ACAGCCAAAAGGCAT 1 -TTTCTCATGATCAAA 1 -CAGACAGGCTTACCC 1 -AATACCATGTACTAA 1 -AAGACAAGTCTGAAT 1 -TAAGAGAGCTCTTTG 1 -AAGAGAGAGTAGCGC 1 -GCTACCACGCCCAAC 1 -GAGAAGGAGGAGTAC 1 -ATAAAATACAACAGG 1 -ACTGGAGAAGTCCAG 1 -ATATCAAAGCAGCTT 1 -ATGCACGAATTACAG 1 -AAGCTATATCAGGCA 1 -AATTAATTTATGCCC 1 -TATATTAAACGCGTG 1 -AGGAAATTTGACTTT 1 -AAATAGACCTTCAGA 1 -TTGCCCACGCTGGAG 1 -TGACTTTGTACCTAA 1 -CTCCATTTTGAAGAA 1 -AGCTAGAGGAAGCCA 1 -CAAGTGTTTACATTT 1 -TTAAGAACATTCCCT 1 -GAGAGGCATGACTAG 1 -TCCTTAACACATCAC 1 -CAGAAACCATGCTGT 1 -AAGTCCAGGATTATA 1 -TTTTTTGGGATGGAA 1 -TCTCCAAGCCAGCGA 1 -TCTGGGAGGCTGTGG 1 -GGTGATATATACATT 1 -AATGGGACAGAACAT 1 -AGTGCTCAAGATCTC 1 -GCAGAATAGGCTGCT 1 -GGGAGAGGAAGGACC 1 -GTCACCTGTCTCCAA 1 -AAGCCATCCTCCCGC 1 -CGTTTTCAGTTTCCT 1 -GTTAATTTTTGTATT 1 -ATCTATGAAAAAGAC 1 -TTGGCACACAGAAAG 1 -TCTAGCAGTATCTTC 1 -TCCTGGGACTACAGG 1 -TAAGGCCACGGAGCG 1 -ACACATAGCAATTCA 1 -TGCCTGGAACTCTCT 1 -ACAGCAGGACTTACA 1 -AAATGCCTCAGGGAT 1 -TCGTGATGTTTAAGA 1 -GCCCGAATGCTGTCA 1 -TGCAGTGGTGAGATC 1 -TCAAAACCGAAAGTA 1 -CTACTAGCCCCATCA 1 -TCTATACATGCCTTT 1 -TTACCTCCATGATGC 1 -AGATTGTTATTTTTT 1 -CGCTGGGGGAGCCAG 1 -CCCCCAACCTCGGCG 1 -AGGTTAGAGAGAGGG 1 -GAGGCATGACTAGAC 1 -TCACTGGAGATTGCG 1 -CCACGGTGTGGCCCC 1 -TGATTTTCTAAACTG 1 -ATAGCATTCCTTAAC 1 -GTACATCTTGGAAAC 1 -GCTGGCAGAATAGGC 1 -TTTTTGTTTTTTTTC 1 -TATATCAGGCACCAA 1 -ATCAGGACAAGGCCC 1 -ACTCCCAGGCCACCC 1 -GGTATCTTCCAGAAA 1 -TTCAAGTGATCAGCC 1 -GGTATTTCACTTGGG 1 -TTCTCTGCTGGATGA 1 -TCTAGTAGAGTGCCT 1 -ACCTTCTTCATGCCA 1 -TTAATTAGCATCCAC 1 -GGTATGTTCTTACCA 1 -AAGTCCACAGCTCTC 1 -GTCTCAACTACGGAC 1 -CTCCCGCCTCAGCCA 1 -CCATTGCCCAGGCTG 1 -GATGCTAGGACAGCA 1 -AGGAAGACATTAAGT 1 -ACATAGTGAGGGTTA 1 -GCTCCACTTTTTCAA 1 -GTTCCTGCTCCTGCC 1 -TGGCATGTATTACTT 1 -AAGAGGCACAGTACA 1 -ACGGTGTGGCCCCAC 1 -CTGGCAGAATAGGCT 1 -GTATTCAAATTAAAT 1 -CCCCACGGTGTGGCC 1 -TTTCTCCCAATTCCA 1 -CTCTCCAGTCTAAGG 1 -GCTAAGAGAGCTCTT 1 -GTACCTAATTTCATG 1 -AGACGGGATTTCACC 1 -TTATCCCCTGTGTAA 1 -TCCTTGTCTGGGAGG 1 -TATATGTGTATAAAA 1 -TGTTATTATCTGAGG 1 -GAGTGTATATGTATT 1 -CTTTATGATATGACT 1 -GACATGCGAACTTAG 1 -GACATTAAGTTTTAC 1 -GTAAGAGGCACAGTA 1 -GAGGGAAGGCGGCAA 1 -AGAACTTGGAGAAGG 1 -AACTTTGTCCCGACC 1 -CACTGTAATCTTTTC 1 -TCCCGGGTGCGCACC 1 -TTTCTTTTTTTTTTT 1 -CACTGCAACCTCCGC 1 -TACTAACAAATGTCT 1 -GACAGGCTTACCCGG 1 -CCCAGCCAATCAGGA 1 -TGGGCTCAACCAGTT 1 -AAGTTTCTTAAAATC 1 -CGGAGCGAGAGAGCA 1 -ATCCCCTGTGTAATT 1 -CAGAAGATAACCATA 1 -GCAGGCGTTTTTCTT 1 -TAATATTGCCAGGGT 1 -GTCGCCGTAGGCCAA 1 -CCAAACAACAAAACC 1 -AGACCTTCAGAGGCA 1 -GTCACATGGTTCACA 1 -CTGACTTTGTACCTA 1 -GGACAAGGCCCGCAG 1 -CATCTGATTTTCTAA 1 -TTTTTTTAAATATCA 1 -TATTCGAAACCGCTT 1 -TATTTTCAGTAGAGA 1 -TGTTCGTTCAGTGCT 1 -GAAACCGCTTTGTAT 1 -TGCACTGGCGCCATG 1 -GAAGACATTAAGTTT 1 -ATAGACCTTTTTTAT 1 -TTCACCATGTTGGCC 2 -ACACCCATCTGATTT 1 -TTTGTTATTATTTTT 1 -ATTTTTTGTAGAGAC 1 -TTTTTTTTTTCTTTT 1 -GTTTCTTCATCTGTG 1 -CATGTTATTGTACTC 1 -TCCACCCCTTCCATT 1 -GTGTCAAGCTATATC 1 -AAAAGGCAGCTACTC 1 -GGTTCACACGGCAGG 1 -TCCACATAGTGAGGG 1 -TATCAGGCACCAAGT 1 -ATTTGGCCCCAAGAT 1 -AGACAAGTCTGAATG 1 -TCTAAGGGAAGCAGA 1 -GGTTAGGCTTGACTT 1 -CCAGAAAGAGAGAGT 1 -TTTTCAAAATTAAAT 1 -TGCCTCAGGGATCAG 1 -CGTAGGCCAAAGGTC 1 -CTCTGTTGCCCACGC 1 -TAGAGTGCCTGGGAC 1 -TCAATCTCGATTGCT 1 -AATAACGGGTAGTTC 1 -TCGATTTCTCCCAAT 1 -AAGTGTTAAGAGTGT 1 -TGTATTTTCAGTAGA 1 -GTGAGGACTATGGCA 1 -TCCACAGCTCTCCAG 1 -TTAAAAGCTAAGAGA 1 -GGTGCAGAGCGGGAG 1 -ATCTCAGCTCACTGC 1 -CTGGATGACGTGAGT 1 -TCAACCCAAGGTATG 1 -CATGCGAACTTAGCG 1 -AAGCCACCTCCATTT 1 -CTTTTCTTTTCTCTT 1 -TGGGATGGGACTCAT 1 -GTAGTTATCTATGGC 1 -CTTTGGAAATTTTCA 1 -CCCAAGCGCCCCTCC 1 -CATGTTTTCAAAACC 1 -ACTTGGTGTCAAGCT 1 -CAGCTTTATGATATG 1 -TTAATATTGCCAGGG 1 -GTACCAGGCCACCTT 1 -TGACGTGAGTAAACC 1 -AGCAGGAACAAGCCC 1 -GAACTCCCAACTTCA 1 -AAATCTCGGTGCCTT 1 -ACCCAGCAGGAACAA 1 -GAAGTGGGAAGAAAA 1 -AGCTTACAAAAGAAT 1 -CCCCCAGACCCCAAG 1 -CCATTTTTTTTCTTA 1 -AACTCTATATTTTTC 1 -GAGCCGCAGCAGACA 1 -ACCTGAGGAATATCG 1 -GCCGCAGCAGACAGG 1 -TGGCAAATACCTTAA 1 -GGGAAGTGGGAAGAA 1 -AGGGACTTTCCCGTT 1 -GTTTTATATGCCAGA 1 -TTACCAAACAACAAA 1 -AGAGCGGAAGAGAAA 1 -TTTTTGTTATTATTT 1 -TTGTTTAGAGCTACC 1 -CACCGTGCCCAACCA 1 -CCAAGCCAGCGACGC 1 -TACATGTTTCTTGTG 1 -AATTTATGCCCACAG 1 -TGGCACACAGAAAGA 1 -GAGTGCACTGGCGCC 1 -ATCAAAACATTCTGC 1 -TGCGCTGGGGGAGCC 1 -TGACAAACCTTCACT 1 -CCACTCTGGCCAAAT 1 -TGTTTTTTTTCTAGC 1 -CACAGTACATCTTGG 1 -GCGCCTAGACGAAGT 1 -TTTGCCAGAGGAAAA 1 -CCTGGTTAGGCTTGA 1 -GACTTTGTACCTAAT 1 -TACAGCTCCCCTGAC 1 -AAACTGTATGAAGTG 1 -CCGCCTCCTGGGTTC 1 -GACTATGGCAAATGG 1 -GCGCGCTTAGTGTAT 1 -AATTCCTCTATACAT 1 -TGAGGACTATGGCAA 1 -GTTCAAGTGATTCCT 1 -TGAAAAAGACAGTGG 1 -CTTGCTGAAAGACAA 1 -CTCTCTGTTTGAGGG 1 -GGAATCTAATGAAGA 1 -CCCCATCAAATTACT 1 -CAAGCATTCTACAAA 1 -CACATAAAGTCCTTG 1 -ATTAAAAGCTAAGAG 1 -GGAGCGAGAGAGCAC 1 -CTGTACGAAAAGACC 1 -ATCATGTTTTGCCAG 1 -AGCCTGCCTTGGCCT 1 -CGCCTCCCCCAGACC 1 -CCTCCTTGTCTGGGA 1 -CTCAGGGATCAGAGC 1 -AATTAGCATCCACAG 1 -CCCCACATAGACCCA 1 -TTGACACTAAATATA 1 -AGTCTAAGGGAAGCA 1 -ATAGCTCAAAAGCCT 1 -TCTTTTTGGTTCCTG 1 -TTCAGTTTCCTTTTC 1 -GGATTACAGGCGTAA 1 -TAGCCCCCATCAAAT 1 -ATACACTTATATTCA 1 -GTTCGTTCAGTGCTA 1 -TCCTCCCCACGGTGT 1 -TTTTTTTTTTTTTTC 1 -AGGGACCGTCACCTG 1 -CCTTCTTCATGCCAC 1 -GGCAAAGAGCCAAAG 1 -AGGTAAGAAGTGTTA 1 -ACGGACCTCAAGCCA 1 -TTCAATTCTCTCTCC 1 -CAGAACATACTTTCC 1 -TTTCTTTTTTGAGAG 1 -TTCCTACCCATGAAT 1 -GGCCAGGCTGGTCTC 1 -AAAGAATGTAAGACT 1 -ACCAGATATATCTCT 1 -CTCCACTTTTTCAAT 1 -CACGCTGGATAGCCT 1 -CACACCACCATGCCT 1 -GATGGGATGGGACTC 1 -TTTTGAGAGACAGGG 1 -GTTCATAATAGCCCC 1 -GTTTTAATTAGCATC 1 -CAAATACCTTAAATG 1 -TCACGGAGCGAGAGA 1 -AGAGAGAGGGACTTT 1 -TGGTTAATTTTTGTA 1 -GGGATTTGCGTTTTA 1 -TCAAAAGCCTAAGCT 1 -GTCAACTTCAATGTC 1 -GTGCCCAACCAACCC 1 -TGGATCAACCCAAGG 1 -TGTGACAAAGTCACA 1 -AGCGCGACGCCTCCA 1 -CCACATAGACCCAGA 1 -GGCGTTTTTCTTAAA 1 -CAGGCACACACCACC 1 -GTGACTTGGTATCTT 1 -CATGATGCTGCTTAC 1 -ATTACAGGCACGAGC 1 -AAAAAAATGCACGAA 1 -GAGGAAAAGGTGAGC 1 -GCCTCCCAAAGTGTT 1 -TTTTAAATACACTTA 1 -ATCAAGAGGTGGATT 1 -TGAGAGGCAGAAGAT 1 -TTGGCCCCAAGATAC 1 -GAAAGACCAGTCCTT 1 -GACAGTGGAGAAAAA 1 -GAGGCACAGTACATC 1 -CTGTCAGCTTCAGGA 1 -GCCCACGCTGGAGTG 1 -ACAGGCTGGGTTTGA 1 -AGCCACCGGAGCAGC 1 -GAATCTCATTCCATT 1 -AGATTCATCCTGCCT 1 -CTGTCACTGGAGATT 1 -TGTATATGTATTTGT 1 -CTGGTTCAGAAACCA 1 -AGAGCCGCAGCAGAC 1 -ACCACGCCCAACCCC 1 -ACTAGCCCCATCAAG 1 -TACTCATACACAACT 1 -GCCTATACCTTCTTG 1 -TTTTAATTAGCATCC 1 -TGTACCTAATTTCAT 1 -TTATCCTTTATGTTG 1 -CTCTCTGACTTTGTA 1 -AGAGGAAGCCAGTAG 1 -TCATCGATTTCTCCC 1 -GTCCCCCACTCCAGC 1 -TCGATTGCTGCCATT 1 -TATGTTCTTTATTTT 1 -CTATATTTTTCATTG 1 -AAGTTCATTTCTCAG 1 -GTCAAAAACTTACCT 1 -TATCTTTCCCTCATA 1 -TAATTAAGTTTCTTA 1 -TGCGAACTTAGCGGG 1 -GCTCTCCAGTCTAAG 1 -ACGGAGCGAGAGAGC 1 -TTTTGTTATTATTTT 1 -GACATCTCGGCCCGA 1 -CACCCCCTTCCCCAC 1 -TTTCTTTTTGGTTCC 1 -TTAATTTATGCCCAC 1 -TACTTAGAAAGTGCT 1 -CTAGGACATGCGAAC 1 -GTACTTAGAAAGTGC 1 -CAGCTAAGGCCACGG 1 -AGGGAAGTCACGGAG 1 -CGGCGCTCTGACGCT 1 -ATTATCCTTTATGTT 1 -ATGCCCGCCAGCGCG 1 -TAAAAGCTAAGAGAG 1 -TCTTTTTCAGTGGGG 1 -GTTTCCACCCCTTCC 1 -TAACTGTTTTCAAAA 1 -TTCTTTATTTTTTTA 1 -AGTATCTTCTGTCAC 1 -GTGCGCACCCCCTTC 1 -AACATATTAAATGCC 1 -CCACGGCGGGCCACC 1 -GGTTCAAGTGATTCC 1 -AGCGGAAGAGAAACC 1 -GGTGGTGATATATAC 1 -CTACCCAGCAGGAAC 1 -TGCTTACATGTCTCG 1 -AAAAAAAAGGAAGAC 1 -AGACCCCTCTCTGAC 1 -GAGATCCAGCCCTGG 1 -CTTGATCTACACCCA 1 -CTCATTCCATTGCCC 1 -AAGCCCTGGTTAGGC 1 -TCCAAAATGAGAGGC 1 -GATGCTGCTTACATG 1 -TTGCCAGAGGAAAAG 1 -TCAGTTTCCTTTTCT 1 -ATTACTTTGGAAATT 1 -TGGTAACCATCTTGG 1 -TCTGATCTTTAAGAA 1 -ATGCCCTCTCAGCAC 1 -AGTGGGATTTGCGTT 1 -GCCAGAAAGTACTGG 1 -GTCTCCAAGCCAGCG 1 -AGCCCTGGTTAGGCT 1 -CCAGACCCCAAGCGC 1 -TTTTTCTTATGTTTA 1 -GGACCGTCACCTGTC 1 -CTTATATTCAGGGTA 1 -CAATCAGGACAAGGC 1 -AAGGGGCAAGTAGCG 1 -CAGGTACTTAGAAAG 1 -CCATGTTGGCCAGGC 2 -ACCCGATAAAATACA 1 -CTTTTGGAGAGTTAT 1 -CACCACCATGCCTGG 1 -AACATTAAAAGCTAA 1 -TATCGGGAAAAGACA 1 -CTTTCCATTCTCTGC 1 -CCCTATCATTAAGGA 1 -CCTTGGCCTCCCAAA 1 -TGAGCTTCCACCTTC 1 -TTCATTGAAAATGAA 1 -CCAGTTACTGGCTTT 1 -GATCATGTTTTGCCA 1 -TGATGCTGCTTACAT 1 -TTTTTATTTTTTTAT 1 -CCAACCCAAGGTATG 1 -CTGGATAGCCTCCAG 1 -CTGTGAGAGGCAGAA 1 -GCTCTGGAGAATCTC 1 -ATATTGCCAGGGTAT 1 -CGAGCCACCACACCC 1 -TAGGACAGCAGGACT 1 -TGCTGCTGCTCCCTG 1 -GACGTGAGTAAACCT 1 -GGTCAAAAACTTACC 1 -GGGGAGCCAGAGGCC 1 -CCCAGGCCACCCCGC 1 -AGCATTCCTTAACAC 1 -GCTTACAAAAGAATG 1 -GAGAGGGACTTTCCC 1 -AACCAGCAAATACCA 1 -CCCTCTCTGACTTTG 1 -CTCGATCTATGAAAA 1 -CAGATTGTTATTTTT 1 -TCTTAAAATCTCGGT 1 -TATATCTCTCTAGAA 1 -CATTAAGTTTTACTT 1 -ATGGTTCACACGGCA 1 -GGAACTCTCTGTTTG 1 -AAAGTGACATGTGAT 1 -CCAAGGAGAACTTGG 1 -GGGTATTTCACTTGG 1 -AGGAATATCGGGAAA 1 -TCTCAAGGACTTCTG 1 -TCTCCATTCTTCAGT 1 -GAAACCCAGACACAT 1 -GCCAGCGACGCAGTG 1 -TATGAAGTGTCTAGT 1 -ATATCAGATGGGATG 1 -GAAACAACCAGGCAA 1 -CGCCTCCACTTATAT 1 -GTATGTTCTTACCAA 1 -TCGTTCAGTGCTAAA 1 -TGGGATTACAGGCAC 1 -GGGGGAGCCAGAGGC 1 -TGTTTATATCAGATG 1 -TAAATACACTTATAT 1 -TTTCCCTGCTAAAAT 1 -AAGCCAGCGACGCAG 1 -TTATGCCCACAGTAA 1 -TTAGAGTCTCGTGAT 1 -TTGTCTGGGAGGCTG 1 -GTTTATATCAGATGG 1 -ACCTTTTTTATATCA 1 -GGGCCACAGAGGGTG 1 -CTACCACGCCCAACC 1 -CTTTTTTTTTTTTTT 1 -CAAATGTCTAAAATG 1 -GAGCGGGAGAGGAAG 1 -CCACGCGTTCACAAA 1 -AAGACACATTAATAT 1 -GGAGGCTGTGGGGAG 1 -TTTGTGAGAGCATCA 1 -CAGTCTAAGGGAAGC 1 -CAACTGCAGGGAAAC 1 -TAATTTTTGTATTTT 1 -TAAGAACATTCCCTG 1 -GGAAAAGACACATTA 1 -ACTCCAGCTGCGCTG 1 -AGCTCAAAAGCCTAA 1 -AAATTCTTTTAAATA 1 -AGCAATTCAGGAAAT 1 -TTCAAAGTTCATTTC 1 -TTTTTAAAATGACAT 1 -TTCCCTGACAATCCC 1 -CAAAAGGCATGCGCT 1 -ATGCTGCTTACATGT 1 -GCAGGAACAAGCCCT 1 -TCACTCAAGAGTCTA 1 -TCGATCATGTTTTGC 1 -TCTACATGATTTGGC 1 -GAGCCACCACACCCG 1 -TGGATGACGTGAGTA 1 -CTTGGCACACAGAAA 1 -GCAGACAGGCTTACC 1 -TCAGTGGGGGTGAAT 1 -GTATTGCCAGGTACT 1 -CTACACCCATCTGAT 1 -TGCTGTCAGCTTCAG 1 -CCAAAGGTCTCCCCT 1 -GTTTGAATTTGGGCT 1 -TGACTACTCATACAC 1 -TTGACTTTCCATTCT 1 -TTCTCTTTCTTTTTT 1 -GAGGAAGCCCTCTGT 1 -GGCTTTATGACTTTT 1 -GCCACAGAGGGTGCA 1 -ATGGTTTTACAGCTC 1 -TGTTGTAAATAAATT 1 -CCAATCAGGACAAGG 1 -ACATAGCAATTGCTC 1 -GGCTGCTGTTCCTAC 1 -CTAAGAGAGCTCTTT 1 -ACCGAAAGTAAGAGG 1 -GCTAGAGGAAGCCAG 1 -CCAGGGTATTTCACT 1 -TCTATACGTGGCAGA 1 -ATCTAATGAAGACCT 1 -TGCAACCTCCGCCTC 1 -CCCGGCTAATTTTTG 1 -AGGATTATAGGATGC 1 -AAGTGTTTACATTTG 1 -AGAGTGTATATGTAT 1 -TTCTCTGGAGGCTCT 1 -ATATATTGCCTAATG 1 -GGGTAGTATGGCCAT 1 -AAGGCTACTAGCCCC 1 -ATATTAAATGCCTCA 1 -ATTACTCATGTTATT 1 -TTTCAGAAACAGTAC 1 -AATACATTGTTTAGA 1 -GGCTAATTTTTGTTA 1 -TCAAAAACTTACCTC 1 -TCAACTTCAATGTCG 1 -TTAAGCTTCCAAAAC 1 -AACCATGCTGTGCAT 1 -CTAGCCTCAGATACC 1 -AACTAGCCAGGTTGG 1 -AGCAGTATCTTCTGT 1 -GGCAAATGGGACAGA 1 -CATTGTTTAGAGCTA 1 -CCTCAGCGCCGCGCC 1 -TACTCTGTCAATGTT 1 -GAAACAGTACTTTCC 1 -CTAATGAAGACCTGA 1 -GTGATTGCTGTAAAC 1 -ACTTTCCATTCTCTG 1 -ATCATCATTACATGT 1 -AAAGGGGCAAGTAGC 1 -AGTGGGAAGAAAATA 1 -TCTTTCCCTCATAAT 1 -AAAGTTCATTTCTCA 1 -ATGAAGAGAAGTGTT 1 -GATCAACCCAAGGTA 1 -AACTGTATGAAGTGT 1 -ATCACTCAAGAGTCT 1 -GTTTCTTGTGGTATC 1 -CTTTCCCGTTTTCAG 1 -CGCTTAGTGTATTGC 1 -CAAGTAGCGCGCGTC 1 -TAATGAAGACCTGAT 1 -TCCTCCTTGTCTGGG 1 -AATCTCACGCAGAAG 1 -GGGTTTGAATTTGGG 1 -GGAGAATCTCACGCA 1 -AGGGAACATGACCCT 1 -GTGGAGAAAAAAAAG 1 -GGCCTCCCAAAGTGT 1 -CGAAACCGCTTTGTA 1 -TCCTCAACAGTCTTG 1 -AGCACAGATTCATCC 1 -TGTCTAGTAGAGTGC 1 -CACAGCTCTCCAGTC 1 -CTGACAATCCCAATA 1 -TTTTCTAAGAAGAGG 1 -ATAGGATGCTAGGAC 1 -GTGCAGTGGTGAGAT 1 -TTAAATGCAATTTTC 1 -CAGATGTTATTATCT 1 -AAAACATTTTCTCAA 1 -AGGGTAGGAGAGACT 1 -AACCCCCTCATGTTT 1 -TATTTTTTGGGATGG 1 -CACCATGTTGGCCAG 2 -ACTGACAAACCTTCA 1 -ATGCCACTCCACAGG 1 -AGAGCTACCCAGCAG 1 -TTCCTTTTCTTAGAG 1 -TCACTGCAACCTCCG 1 -GGGTAGTTCTTATAA 1 -CAAACAAAGGCCTAT 1 -CAATTCCATTTCCAC 1 -GCTACTAGCCCCATC 1 -ACTGTAATCTTTTCT 1 -TCAATGTTCTCCACA 1 -GGATTATAGGATGCT 1 -GGCTTCACCATGTTG 1 -TGGAGTGCACTGGCG 1 -ATGTTACTCTGTCAA 1 -AACAAAACCTCTTTA 1 -ATTAATTTATGCCCA 1 -GGCTGGTCTCAACTA 1 -ACTCATCTTTTTCAG 1 -GCCCCACATAGACCC 1 -TGCTCCACTTTTTCA 1 -GCCCTAAACTTTGTC 1 -AACATTCCCTGACAA 1 -TTCCAAAATGAGAGG 1 -ACATATTAAATGCCT 1 -TTTTTTGTAGAGACC 1 -GTTCCTAAGTGGATC 1 -GCACGAATTACAGCC 1 -AATCTCGATTGCTGC 1 -CATTTCCACTCTGGC 1 -CTTTTTTGTTTTTTT 1 -GTATTTGTGCAAGTG 1 -TGGAAACAACCAGGC 1 -TGCTCTGGAGAATCT 1 -GACGAAGTCCACAGC 1 -GTGTAGTACAAGAGA 1 -AGAGAGTAGCGCGAG 1 -CCCTCTGTACGAAAA 1 -CAGAGCGGGAGAGGA 1 -GGCCAGGCTGGTCTT 1 -GAGGTGGATTGGGGA 1 -CACAAACCTCAGCGC 1 -CGCGCTTAGTGTATT 1 -TTTCCACCCCTTCCA 1 -CAGGCTGGGTTTGAA 1 -TTAACTGAAAAATTA 1 -TTTTTTTTTTTTTTT 1 -GAATTCAGTGTAGTA 1 -TCTTTCCCTGCTAAA 1 -AATGTCGGATGGATG 1 -GAGTTATTTAAGCTT 1 -ACTAAATATATGTGT 1 -GCCAGCGCGACGCCT 1 -TTTTTTCTTATGTTT 1 -CTTTATGTTGTAAAT 1 -GGTCAGAGGAGGAAA 1 -GGATGGATGAAACCC 1 -AAACAAAAACATTTT 1 -CAGACAGGCTGGGTT 1 -GCAGGACTTACAAAC 1 -CCCAAGGTATGTTCT 2 -CGCCTAGACGAAGTC 1 -TGTATTACTTTGGAA 1 -GGAGTACCAGGCCAC 1 -AAGCAGAGCCGCAGC 1 -AGCTGGGACCACAGG 1 -TGATCAAAACATTCT 1 -TGTAATCTTTTCTAA 1 -TCCCTAAGTGCTGGG 1 -AAAACCTCTTTATTT 1 -TATTTGTGCAAGTGC 1 -CAATTCAGGAAATTT 1 -GCCCCATCAAGAGGT 1 -CCACAGGTGATTGCT 1 -GGCTGGTCTTGAACT 1 -AATACCAGTAATGGT 1 -AAGAAGTGTTAAGAG 1 -GCTCCTGCCTTGATC 1 -CAAGAGTCTACATGA 1 -AGGACTTCTGATGCC 1 -GGCAATCATCATTAC 1 -TTACAAAAGAATGTA 1 -GCCACTCCACAGGAG 1 -TGTTGGCCAGGCTGG 2 -CTACAAACGTCGCGT 1 -TCTAACATAACCAGC 1 -TCAGCTCACTGCAAC 1 -ATTCAGGAAATTTGA 1 -ATCTTTTTCAGTGGG 1 -CCTGCTCCCCGCCGA 1 -AGAGCTGTCTATAAA 1 -CATCCTGCCTGGAAC 1 -GTTCATTTCTCAGTT 1 -ACGGCAGGCATACTC 1 -GAGAGCACAGCGAGG 1 -AACCCAGACACATAG 1 -ACGCCCAACCCCCTC 1 -TGGACTAGCCCCACG 1 -CCCACGGCGGGCCAC 1 -TTGGACCCGATAAAA 1 -CACTCATAGCATTCC 1 -CAGGACTTACAAACA 1 -GGTTCCTGCTCCTGC 1 -TTTGGGCTCAACCAG 1 -ATTTTGAAGAATAAA 1 -TCAAATTAAATGCAA 1 -AAAAGCCTAAGCTCA 1 -CCGTTTTCAGTTTCC 1 -CTAAGTGGATCAACC 1 -GCATTCCTTAACACA 1 -GAGAGACTCACGCTG 1 -CTAAGCTCAAGAGAT 1 -CTCCCCTGACTGACA 1 -TTTAAATATCAGATT 1 -AAGGTCAAAAACTTA 1 -TTTTGTTTTTTTTCT 1 -TCCCGCAAAAGCCCT 1 -CTGGAGAAGTCCAGG 1 -CCTTGATCTACACCC 1 -CATTCTTCAGTAAGT 1 -CTTCAGGAATGCCCG 1 -AGAGATCCTCCCGCC 1 -CTTGGGCTGTGACAA 1 -GTGAATTCAGTGTAG 1 -CTGCTGTTCCTACCC 1 -CTGTAATCTTTTCTA 1 -TTTCATGAGCTAAAA 1 -TGTAGTACAAGAGAT 1 -TAACGGGTAGTTCTT 1 -TCGATCTATGAAAAA 1 -GAATCTCACGCAGAA 1 -GTTCTTACCAAACAA 1 -CTCCCGCAAAAGCCC 1 -AATGGAGTAATGCAT 1 -ATGGCGGAAGATAAC 1 -ATATGACTACTCATA 1 -CGCACCCCCTTCCCC 1 -ATCTCATTCCATTGC 1 -AATGTAAGACTTACC 1 -AAGAGTGTATATGTA 1 -ACATCTTGGAAACAA 1 -TCTGTCAATGTTCTC 1 -GGCTGTGGGGAGAAG 1 -AGGAAGCCCTCTGTA 1 -AGCCAGCGACGCAGT 1 -TCACACGGCAGGCAT 1 -TTTACATTTGGTCAT 1 -GGCTCAACCAGTTAC 1 -GGGACATAGCAATTG 1 -ATCAGAGCACAGATT 1 -CGCCCAGTTTGCTCT 1 -TGAAAATGAAGAGAA 1 -AGCGGGAGAGGAAGG 1 -CATCACTGTAATCTT 1 -TTGGTCATCGATTTC 1 -GCCACCGGAGCAGCT 1 -TAAATATATGTGTAT 1 -AAACAAAACCTAAAT 1 -AGGAGGAAAATAGAC 1 -ACAGCGAGGGCCACA 1 -GTAGGATTCTTCTTT 1 -TACCTTAAATGGTTG 1 -AAAAGCTAGAGGAAG 1 -GGCGGGCCACCAAGG 1 -TGTGATGGGAACAAA 1 -ATGTATTTGTGCAAG 1 -ATAGCAATTCAGGAA 1 -TACAGATTGACACTA 1 -CTCCCTAAGTGCTGG 1 -CACTTATATTAAACG 1 -TTGTATTTTCAGTAG 1 -CCAGGCAAAGAGCCA 1 -GAACAAGCCCTTCCT 1 -CTCATCTTTTTCAGT 1 -ACCCGTCCCCCACTC 1 -GCGCTTAGTGTATTG 1 -TTCTTACCAAACAAC 1 -TTTGAGAGACAGGGT 1 -TTCATAACATTAAAA 1 -CTAGCAGTATCTTCT 1 -GGGAATATATTGCCT 1 -CGGCAGGCATACTCA 1 -GCTTCCCCGAGATCC 1 -TGCCCAGCCAATCAG 1 -GACTTTCCCGTTTTC 1 -TTTGGAGAGTTATTT 1 -CTTTTTCAATTCTCT 1 -AAGCATTCTACAAAC 1 -GTTTCCTTTTCTTAG 1 -AGAGACCAGGCTTCA 1 -TAACATTAAAAGCTA 1 -GATTTGGCCCCAAGA 1 -AGTAAACCTGAATCT 1 -CTGCAGGGAAACTAC 1 -CAGGCGTAAGCTACC 1 -TGAATCTTTGGAGTA 1 -GATACCAATCCAGCC 1 -CTTTATTAATGGTTT 1 -ACCTCTTTATTTCTG 1 -CCCACGCTGGAGTGC 1 -CCGTAGGCCAAAGGT 1 -AAGCTCAAGAGATCC 1 -TAGTTCTTATAATTT 1 -TAGGCTGCTGTTCCT 1 -GGAAGACATTAAGTT 1 -CAGGGATAGGTGAGG 1 -AATATTGCCAGGGTA 1 -TATGCATGGCATGTA 1 -CAAGAGCTGGCAAAT 1 -CAAGTGATCAGCCTG 1 -ATCCTGCCTGGAACT 1 -ACCAGTTACTGGCTT 1 -CTAATTTTTGTTATT 1 -AAATGGTCTATGCAT 1 -TGCTCAAGATCTCTG 1 -AGCACTCATAGCATT 1 -CAATTGCTCTATACG 1 -GCCTGGTTAATTTTT 1 -CAATCCAGCCAGAAA 1 -AAACAAAACCCACAC 1 -CAAGTGATTCCTGGG 1 -CCCCCATCAAATTAC 1 -TGCCACAAAAGCTAG 1 -GCTTCCACCTTCCCA 1 -AAAAAATGCACGAAT 1 -CCAAGGTATGTTCTT 2 -AATATGCAGATTGTT 1 -TCTCTGGCGTCCTCA 1 -GAGCACAGCTAAGGC 1 -TTCACACGGCAGGCA 1 -TCAGTAAGTCAACTT 1 -CTGAGGTTTTATATG 1 -AGCTAAAAAACAAAG 1 -TCAGGACAGTGAAAC 1 -TATCCCCTGTGTAAT 1 -GAGACCAGGCTTCAC 1 -CTGCCACAAAAGCTA 1 -GTGCTCAAGATCTCT 1 -AAGAAAATATTATAA 1 -CTGAATCTTTGGAGT 1 -AAATGAAGAGAAGTG 1 -GATTGCTGCCATTTA 1 -TGCATACAAGAGCTG 1 -ATTGTATTCAAATTA 1 -TATGCAGATTGTTTA 1 -AACTGTTTTCAAAAT 1 -GCCAGGCTGGTCTTG 1 -CACTAAATATATGTG 1 -CTGCGCTGGGGGAGC 1 -GCAGGGAAACTACTG 1 -GTTAGAGCTGTCTAT 1 -AATAAGTTAATTATT 1 -GCAATTCAGGAAATT 1 -CCTTAGTTTCTTCAT 1 -GCACAGATTCATCCT 1 -CCACCGTGCCCAACC 1 -GCAAATACCAGTAAT 1 -ACAAAACCTCTTTAT 1 -CCAGAGGCCCCGCGA 1 -ATGTTTCAGAAACAG 1 -TGGCAAATGGGACAG 1 -TCACGCAGAAGGCAG 1 -CTCCAGCTGCGCTGG 1 -TTACATGTTTCTTGT 1 -CCAATATGCAGATTG 1 -AAGTACTGGAGAAGT 1 -AGTATCAGACAGGCT 1 -ATGATCAAAACATTC 1 -CTGGGACATAGCAAT 1 -GCTCTCAAGGACTTC 1 -GGGTACATGATCACT 1 -GAAAAGACCACAGGG 1 -AGAATCTCACGCAGA 1 -TTGGCCAGGCTGGTC 2 -CCTCCTGGGTTCAAG 1 -GTGATTCCTGGGACT 1 -CCACCCCTTCCATTT 1 -TTAGTGCTACTAAAT 1 -GCCCTCTCAGCACTC 1 -AAAGGCAGCTACTCC 1 -AAGACCACAGGGCCC 1 -CTTTTCTCTTTCTTT 1 -CTGAGCTTCTAATAA 1 -GGCAAGTAGCGCGCG 1 -ATGGGAACAAATAAG 1 -GGGACTCATTCAGGG 1 -CTGTTGCCCACGCTG 1 -TCAGCCTGCCTTGGC 1 -CCTTGCTGAAAGACA 1 -CTTGGTAACCATCTT 1 -GCAGTGTGCAGTGGT 1 -TCTCCACATAGTGAG 1 -GTTGGGATTACAGGC 1 -GAGGCTCTCAAGGAC 1 -TTATGATCAAATGGA 1 -GATACTTTTCAAAGT 1 -CACCCCGCCGCTTCC 1 -AACCAACCCAAGGTA 1 -CTCCCCCAACCTCGG 1 -CCATGATGCTGCTTA 1 -TTTCCATTCTCTGCT 1 -CCTCATGTTTTCAAA 1 -ATATTATAAACTCTA 1 -AGAGCTGGCAAATAC 1 -TCTTGAGATGTTCGT 1 -GCGCTCCCGCAAAAG 1 -TTGCGTTTTAATTAG 1 -ATAAGATTTTTTTTT 1 -CCATAGACCTTTTTT 1 -TTCTTGAGATGTTCG 1 -AACTATCTTGGGCTG 1 -TCCACTTTTTCAATT 1 -GGTTTTACAGCTCCC 1 -ATTAATGGTTTTACA 1 -TTAGGCTTGACTTCA 1 -GAATTTGGGCTCAAC 1 -AGTATCTCAGCAGGT 1 -AACTCCCAACTTCAA 1 -AGCGCGCTTAGTGTA 1 -CTATCTTGGGCTGTG 1 -TCTTTTCTAAGAAGA 1 -CGTTCACAAACCTCA 1 -TAGAAATAAGGCTGG 1 -ATGATCACTAAAAGG 1 -CCAAAATGAGAGGCA 1 -GCCCCAAGATACTTT 1 -ATACTTTCCATTATG 1 -TGAATACATTGTTTA 1 -ATCTTGGGCTGTGAC 1 -GAAACACCCTATCAT 1 -TTTAAGCTTCCAAAA 1 -CCCCTTCCCCACTCC 1 -ATATGCAGATTGTTT 1 -CCAGCCAGAAAGTAC 1 -GAGAAGGGAAGTCAC 1 -TCAGAGGAGGAAAAT 1 -TATGACTACTCATAC 1 -TTCTTAAAATCTCGG 1 -AAGTGCTGGGATTAC 1 -ACCTAATTTCATGAG 1 -GGGGGTGAATTCAGT 1 -ATTAAGGAAAGGCTA 1 -GGAACAAGCCCTTCC 1 -TGGATTATCTTTATT 1 -ACTTGGGGCTAACTT 1 -TGGTTAGGCTTGACT 1 -TTCTAAGAAGAGGAC 1 -TAACAAATGTCTAAA 1 -CGAACTTAGCGGGCG 1 -GGGAAAAGACACATT 1 -TCTCTGCTGGATGAC 1 -GATAGAAAGACCAGT 1 -AGTGAAACAAAAACA 1 -TCCATTTTGAAGAAT 1 -CTCCTGCCTTGATCT 1 -TATACCTTCTTGAGA 1 -ATACAAGAGCTGGCA 1 -TGCCCTCTCAGCACT 1 -ACCCACACACAGGTC 1 -AACAAAACCTAAATG 1 -AGAGATAGAAAGACC 1 -AACCAGTTACTGGCT 1 -CAGACACATAGCAAT 1 -AGAGGGTGCAGAGCG 1 -TAAAAAAAAATGCAC 1 -CATATTAAATGCCTC 1 -TTTCTTCATCTGTGA 1 -ATCAGATGGGATGGG 1 -AATATACAGATTGAC 1 -AACAACCAGGCAAAG 1 -ACCCCAAGCGCCCCT 1 -CTGTCTATAAATAGT 1 -TCAAAATTAAATGAC 1 -AAATATATGTGTATA 1 -GCCAAAGGTCTCCCC 1 -GTTTTGCCAGAGGAA 1 -AAAATATTATAAACT 1 -ATTCATCCTGCCTGG 1 -CAGGGTACATGATCA 1 -AATCAGGACAAGGCC 1 -AGTGGTGAGATCTCA 1 -CCAGTCCTTGCTGAA 1 -TGGTGGTGATATATA 1 -GGCCATACACCTTCT 1 -AGTTACTGGCTTTAT 1 -GCTTCCAAAACTTAA 1 -AAACCTGAATCTTTG 1 -GCTGAGGTTTTATAT 1 -TTTCTTATGTTTATG 1 -CAAGCCCTTCCTACT 1 -CATCGATTTCTCCCA 1 -GCCAGGTACTTAGAA 1 -CAAAAGAATGTAAGA 1 -CACAGGGCCCATGCC 1 -TCCTGGGTTCAAGTG 1 -AGGAGTACCAGGCCA 1 -ATCACAGCCAAGCAT 1 -CTTCTTTCCCTGCTA 1 -GGTTTTATATGCCAG 1 -AACTCTCTGTTTGAG 1 -GCGCCTTTGGGACGA 1 -TTCAAAATAAGATTT 1 -AGGTCAGAGGAGGAA 1 -GAGAACTTGGAGAAG 1 -CAGAGGTGCTAGGAC 1 -GACGCAGTGCCAGGT 1 -GCTTAGTGTATTGCC 1 -TAATAAGAACATATT 1 -TCTGCTGAGGTTTTA 1 -TTCTTCATCTGTGAG 1 -TCTTGGTAACCATCT 1 -ATGTTGGCCAGGCTG 2 -ACGGCGGGCCACCAA 1 -GAAGCCAGTAGGTAA 1 -ATTAAATGCAATTTT 1 -GTCTGAATGCTCCAC 1 -TTTATTAATGGTTTT 1 -AACGCGTGCCCAGCC 1 -AATTTGACTTTCCAT 1 -TTCTTTTTTTTTTTT 1 -GAGTACCTGAGGAAT 1 -GAGACTCACGCTGGA 1 -ACGCTTATCGACGCC 1 -TGGCGTCCTCAACAG 1 -CAGTGGGGGTGAATT 1 -GGCACAGTACATCTT 1 -TCTTGTGGTATCTTC 1 -ATATGTATTTGTGCA 1 -ATAACGGGTAGTTCT 1 -TTTCATAACATTAAA 1 -AGGCGGCAAGATTTT 1 -TTACCCGGGCGACGC 1 -TCTAAGAAGAGGACA 1 -AGTTTCTTCATCTGT 1 -GGCCAAAGGTCTCCC 1 -CTGATGCCCTCTCAG 1 -GATCAGAGCACAGAT 1 -TTCCAAAACTTAACT 1 -TGGAGAAGGGAAGTC 1 -GGGAGAAGGAGGAGT 1 -TGGCCCCAAGATACT 1 -TTTCCATTATGATCA 1 -GCGGGCCACCAAGGA 1 -GCTATATCAGGCACC 1 -GGTGCCACTAATCTG 1 -TTAAATGCCTCAGGG 1 -AATATCAGATTGTTA 1 -ATATCAGGCACCAAG 1 -AGAGTAGCGCGAGCA 1 -AAGAGTCTACATGAT 1 -AAAGTACTGGAGAAG 1 -AAAAGACAGTGGAGA 1 -GCAGATTTCTAGCAG 1 -TTTTCTCAAGGTCAA 1 -TATTGCCTAATGTTT 1 -GGGTCTCGCTCTGTT 1 -AGACAGGCTTACCCG 1 -TCTCCCCTGCTCCCC 1 -CCCTTCCTACTAGCC 1 -GCCTCAGGGATCAGA 1 -CAGTGCCAGGTTAGA 1 -GCAAATGGGACAGAA 1 -TCTGAGCTTCTAATA 1 -TTGGAGAGTTATTTA 1 -TAACATAACCAGCAA 1 -AGCTGCGCTGGGGGA 1 -CCCATCTGATTTTCT 1 -AGATAGAAAGACCAG 1 -TAAATGCAATTTTCT 1 -CGAGATCCAGCCCTG 1 -CGCTTCCCCGAGATC 1 -TTGTGGTATCTTCCA 1 -GACTTCTGATGCCCT 1 -TGAGTAAACCTGAAT 1 -TTCTCAGTTCATAAT 1 -TTGGGCTCAACCAGT 1 -CTTACATGTCTCGAT 1 -GAGGTTTTATATGCC 1 -TATACATTATCCTTT 1 -TGTCTCCAAGCCAGC 1 -CCCCCACTCCAGCTG 1 -CAAAACCGAAAGTAA 1 -TAGACCTTCAGAGGC 1 -CCCGCAAAAGCCCTG 1 -AGTGACATGTGATGG 1 -TCTGTACGAAAAGAC 1 -AACCTGAATCTTTGG 1 -GGCCCATGCCGCCCA 1 -GAAGTGTTAAGAGTG 1 -CCATTTATCCCCTGT 1 -ACAGTAAAACAAAAA 1 -GAGAGGCAGAAGATA 1 -AAAATGCACGAATTA 1 -CCGAGATCCAGCCCT 1 -AGAACATACTTTCCA 1 -AGATTTTTTTTTAAA 1 -ATCTATGGCGGAAGA 1 -AGATGTTCGTTCAGT 1 -TCCTCAGGACAGTGA 1 -CCAGGCCAGAAAGAG 1 -AATCCAGCCAGAAAG 1 -TTATGCCTTTCTTTT 1 -AGTCTGAATGCTCCA 1 -GTCTACATGATTTGG 1 -GGAGGAGTACCAGGC 1 -GTCTCGCTCTGTTGC 1 -AGTTCATAATAGCCC 1 -AGAAAGACCAGTCCT 1 -GAGATCCTCCCGCCT 1 -AGTGATCAGCCTGCC 1 -CCTGCCTTGATCTAC 1 -ATATTTTTCATTGAA 1 -TAGACCTTTTTTATA 1 -AATGCAATTTTCTCA 1 -AACCCTCCCCCAACC 1 -TACACCTTCTTCATG 1 -GCCTAAGCTCAAGAG 1 -GCCTCCACTTATATT 1 -TAAACTAGCCAGGTT 1 -ACCTTCTTGAGATGT 1 -CTGTGCATCAGTATC 1 -ACTTAACTGAAAAAT 1 -ACGGAGCGAGACATC 1 -GGAGAGGAAGGACCA 1 -CAGGCTGGTCTCAAC 1 -CACCTCCATTTTGAA 1 -GATCACTAAAAGGCA 1 -GAGAGTTATTTAAGC 1 -TTTACAGCTCCCCTG 1 -TTATGTTGTAAATAA 1 -TGCTGAAAGACAAGT 1 -CTTTTTTATATCAAA 1 -ACCTCAGCGCCGCGC 1 -AGTCCACAGCTCTCC 1 -GTTCAGAAACCATGC 1 -TTGGGAATATATTGC 1 -AGAGCCAAAGAGGAA 1 -TTTGTACCTAATTTC 1 -TGGTGATATATACAT 1 -CCACAGCTCTCCAGT 1 -CATTTTTAAGAGCCT 1 -GGTCTTGAACTCCCA 1 -TCAAGTGATCAGCCT 1 -CCCCTGTGTAATTAA 1 -TCGGGAAAAGACACA 1 -CCTGGGACTACAGGC 1 -TCACAAACCTCAGCG 1 -TTGCCCAGGCTGGAG 1 -TTTTTAAATATCAGA 1 -CATCTCGGCCCGAAT 1 -CTGGAGTGCAGTGTG 1 -AAAACCGAAAGTAAG 1 -TGCATCTGAGCTTCT 1 -GTTATTATCTGAGGT 1 -ACAGATTCATCCTGC 1 -GAGCTGTCTATAAAT 1 -ATAAGAACATATTAA 1 -AAATGTCTAAAATGG 1 -TAAATTGTATTCAAA 1 -AGTATGGCCATAGAC 1 -GATTGCTGTAAACTA 1 -TCGGCGCTCTGACGC 1 -ATTTTCTCATGATCA 1 -AGAGCACAGCGAGGG 1 -TAACCAGCAAATACC 1 -AATTTGGGCTCAACC 1 -AAGACTTACCCCACT 1 -CCTCTTTATTTCTGC 1 -CATTGAAAATGAAGA 1 -TAGTATGGCCATAGA 1 -GAAAGGGGCAAGTAG 1 -TAATGGTGGTGATAT 1 -CAACAAAACCTCTTT 1 -TTTTTGTATTTTCAG 1 -GATGGGACTCATTCA 1 -TGCAGATTGTTTATA 1 -GCTGGTCTTGAACTC 1 -TCGCCGTAGGCCAAA 1 -AGCTGCCACAAAAGC 1 -CAGAGCCGCAGCAGA 1 -CAAAACCTCTTTATT 1 -TGCCTTTTTTGTTTT 1 -ACCTCCGCCTCCTGG 1 -GATCAGCCTGCCTTG 1 -AGGTGAGCGCGCTTA 1 -TGCCAGGTTAGAGAG 1 -GTTATTGTACTCTGT 1 -GCCTTAGTTTCTTCA 1 -TCCTGCCTGGAACTC 1 -AGGTGATTGCTGTAA 1 -GGAAGAGAAACCCTC 1 -ATACCTTCTTGAGAT 1 -TTTATATGCCAGACC 1 -ATCAGCCTGCCTTGG 1 -TACTTTCCATTATGA 1 -ACATGCGAACTTAGC 1 -CAGCCTGCCTTGGCC 1 -GCGCTCTGACGCTTA 1 -CAACCAGGCAAAGAG 1 -CAAACCTTCACTTCT 1 -CCCCAACCTCGGCGC 1 -TTGTTTTTTTTCTAG 1 -AGTGTGCAGTGGTGA 1 -TGAGCGCGCTTAGTG 1 -GCCGTAGGCCAAAGG 1 -GCTTTATGACTTTTG 1 -CTTAAAATCTCGGTG 1 -AAATTTGACTTTCCA 1 -ACTAAATATACAGAT 1 -CGGAGCAGCTGGGAC 1 -AGACCAGGCTTCACC 1 -CCCGTCGCCGTAGGC 1 -AAGTCACGGAGCGAG 1 -AAAACAAAACCCACA 1 -CTTTGTATCACAGCC 1 -AGGGTAGTATGGCCA 1 -GCGCGTCCCGGGTGC 1 -ACTGGAGATTGCGCT 1 -GCGACGCCTCCACTT 1 -GAAAAAGACAGTGGA 1 -GGATGGGACTCATTC 1 -GAAGATAACCATAGT 1 -CTCTGGCCAAATGAG 1 -TCAGGAATGCCCGCC 1 -TCAAGGACTTCTGAT 1 -TCTGTTTGAGGGAAG 1 -GTAATTAAGTTTCTT 1 -CCTAATTTCATGAGC 1 -TTTTCAAAACCGAAA 1 -CTCCATTCTTCAGTA 1 -GGCAGAAGATAACCA 1 -GTTCAGTGCTAAATA 1 -TTAAATGGTTGAGTT 1 -TCCTTTTCTTAGAGT 1 -ATTTGGTCATCGATT 1 -CTATGGCGGAAGATA 1 -TACCCCACTTAACTA 1 -AGACCTTTTTTATAT 1 -GAAGTCCACAGCTCT 1 -GGGATTTCACCATGT 1 -CCGCCTCAGCCACCG 1 -TTGTACTCTGTTTCC 1 -CATGTCTCGATCTAT 1 -AGCTATATCAGGCAC 1 -TAAAATCTCGGTGCC 1 -CTGTATGAAGTGTCT 1 -TACTTTTAAAATACC 1 -ATTTATCCCCTGTGT 1 -CTCCCAGGCCACCCC 1 -GACCAGTCCTTGCTG 1 -AGATCTCAGCTCACT 1 -CCCAGTTTGCTCTGG 1 -GAGACAGGGTCTCGC 1 -ACTCTATATTTTTCA 1 -ATGACGCAAAGCACA 1 -GGGCCACCAAGGAGA 1 -CAGTACATCTTGGAA 1 -ACAAAACCCACACAC 1 -GAAATTTTCAAAATA 1 -CACAGGAGAAGGGAA 1 -CCAGAGGTGCTAGGA 1 -TGATTTGGCCCCAAG 1 -CTTCCCAACAAGCCA 1 -CCCAGGCTGGAGTGC 1 -TCAGAAACAGTACTT 1 -AGGTATGTTCTTTAT 1 -CTGCATTTTTAAGAG 1 -TTTTGTGAGAGCATC 1 -CTTGACTTCAATCTC 1 -TTTATGTTGTAAATA 1 -GGACAAGTATCAGAC 1 -TTCTTTCCCTGCTAA 1 -GTTTAGAGCTACCCA 1 -CTTTTCTTAGAGTCT 1 -TTGGGGAATCTAATG 1 -CCCCGCCGAAAGGGG 1 -GTCAATGTTCTCCAC 1 -AAAATTAAATGACGC 1 -GCTGCTTACATGTCT 1 -CCAGGCTTCACCATG 1 -GATCAAATGGAGTAA 1 -TGCAGGGAAACTACT 1 -GCTGGGGGAGCCAGA 1 -TTCAAAATTAAATGA 1 -ATACCATGTACTAAC 1 -GCTCACTGCAACCTC 1 -TATACAGATTGACAC 1 -ATGTTAGAGCTGTCT 1 -AACTGAAAAATTAAT 1 -AAGGAAAGGCTACTA 1 -GCCATTTATCCCCTG 1 -TTCAGCAGCTTACAA 1 -CAAGCTATATCAGGC 1 -TGACACTAAATATAT 1 -CCATGCCTGGTTAAT 1 -CCCACGGTGTGGCCC 1 -TTATTTCTGCTGAGG 1 -CTCAGCCACCGGAGC 1 -AGTGTATTGCCAGGT 1 -TTATTAATGGTTTTA 1 -GGAAGGACCAGAGCG 1 -GCTGCATTTTTAAGA 1 -AGCCAGAGGCCCCGC 1 -ACTGCAACCTCCGCC 1 -ACCCTCCCCCAACCT 1 -TGGGAAGAAAATATT 1 -TCATAGCATTCCTTA 1 -TTCAGAGGCAATCAT 1 -ACATGATTTGGCCCC 1 -GGCCTATACCTTCTT 1 -TGTTGCCCACGCTGG 1 -CGCTCTGACGCTTAT 1 -CTCATAGCATTCCTT 1 -GTATCTCAGCAGGTG 1 -CATTTTCTCAAGGTC 1 -TCTCGGCCCGAATGC 1 -CCATGATAGCTCAAA 1 -CTTCCAGAAATGGTC 1 -GTATCACAGCCAAGC 1 -AAGGGAAGCAGAGCC 1 -AAAAGTGACATGTGA 1 -CAGCGAGGGCCACAG 1 -CCGCCTCGGCCTCCC 1 -TGCTCTATACGTGGC 1 -TGCCAGACCCCTCTC 1 -GTAAGACTTACCCCA 1 -ACAGCTAAGGCCACG 1 -TTTCTAGCAGTATCT 1 -TCCTACCCATGAATA 1 -CATCAAATTACTCAT 1 -AGGCTACTAGCCCCA 1 -CGGGAGGGTAGGAGA 1 -TCCAGAAATGGTCTA 1 -AAGACCTGATTTTTT 1 -GGCGACGCCTCCCCC 1 -TTCCCTCATAATTCC 1 -CAACCTCGGCGCTCT 1 -CTGGTTAGGCTTGAC 1 -ATGCAGATTGTTTAT 1 -AGGCAGAAGATAACC 1 -GTTCCTACCCATGAA 1 -TTAAATGACGCAAAG 1 -ACCCAGAGGTGCTAG 1 -GGCCTCCCTAAGTGC 1 -AACATGACCCTGTAG 1 -GTGAGAGGCAGAAGA 1 -TATTCAGGGTACATG 1 -AATTTTTGTATTTTC 1 -TGCATCAGTATCTCA 1 -GCCAATCAGGACAAG 1 -GGAGTGCAGTGTGCA 1 -TGCTGAGGTTTTATA 1 -AATCTTTTCTAAGAA 1 -TTGCTGTAAACTAGC 1 -TCATTCCATTGCCCA 1 -GGTTCCTAAGTGGAT 1 -TGCCATTTATCCCCT 1 -TACAGGCGTAAGCTA 1 -AGCAGCTGGGACCAC 1 -TTTTTTAAAATGACA 1 -CCCTAAACTTTGTCC 1 -ATGACCCTGTAGGAT 1 -TAGACCATCCATGGG 1 -TGACATGTGATGGGA 1 -CTTATATTAAACGCG 1 -GCCTCGGCCTCCCTA 1 -AATGGTCTATGCATG 1 -TAATGTTTCAGAAAC 1 -CATACTTTCCATTAT 1 -TGAGAGGCATGACTA 1 -GAATCTAATGAAGAC 1 -CCCTGACAATCCCAA 1 -AGAATAAACCGTGAC 1 -CAGATACCAATCCAG 1 -CCCAGCAGGAACAAG 1 -TTTAAGAGCCTTTCT 1 -AAATAAATTGTATTC 1 -ACAAACGTCGCGTGC 1 -TCTGGAGAATCTCAC 1 -TGTGAGAGGCAGAAG 1 -TTAAGAGTGTATATG 1 -TGACAAAGTCACATG 1 -CACCTGTCTCCAAGC 1 -TGAATGCTCCACTTT 1 -TGTTTAGAGCTACCC 1 -CTGTGGGGAGAAGGA 1 -CTTTAAGAACATTCC 1 -AAACAAAAACAAAAC 1 -ATCAGGCACCAAGTG 1 -TAATGGTTTTACAGC 1 -TGGGGAAGTGGGAAG 1 -ACAAAAGCTAGAGGA 1 -CGCTGGATAGCCTCC 1 -CAGGGTCTCGCTCTG 1 -CTAAACTGTATGAAG 1 -TGCACGAATTACAGC 1 -CCTAAGTGCTGGGAT 1 -ATAACTGTTTTCAAA 1 -GTTCTTATAATTTTT 1 -CGAGACATCTCGGCC 1 -AGCCAATCAGGACAA 1 -CATGTTGGCCAGGCT 2 -CCTGCTCCTGCCTTG 1 -CCGTCGCCGTAGGCC 1 -GTACTGGAGAAGTCC 1 -GAAAGTGCTCAAGAT 1 -CCCGGGTGCGCACCC 1 -CCACTCCAGCTGCGC 1 -ACCGTGACTTGGTAT 1 -CCATTATGATCAAAT 1 -ATCCTCCCGCCTCAG 1 -CGCCGTAGGCCAAAG 1 -ACAAAAACAAAACCC 1 -CATAAAGTCCTTGGC 1 -TAGCGGGCGCCTAGA 1 -ACGGGATTTCACCAT 1 -CACTCCACAGGAGAA 1 -CTTAAAAAAAAATGC 1 -GATTGACACTAAATA 1 -GAGGGACTTTCCCGT 1 -AACAAAGGCCTATAC 1 -TTAGAAAGTGCTCAA 1 -CTGCTGGATGACGTG 1 -CCCGCGAAAGAGCGG 1 -CCAGCCCTGGACTAG 1 -CCTGTAGGATTCTTC 1 -CTTCTGATGCCCTCT 1 -CCATACACCTTCTTC 1 -GTCAATAACGGGTAG 1 -AGTGTATATGTATTT 1 -CATAGCAATTGCTCT 1 -AATGACGCAAAGCAC 1 -GTGGTGAGATCTCAG 1 -CGAAAAGACCACAGG 1 -CCTCTCAGCACTCAT 1 -TACACCCATCTGATT 1 -CCACCAAGGAGAACT 1 -TATGGCAAATGGGAC 1 -CCCATCAAATTACTC 1 -ATAGTAGTTATCTAT 1 -ATACATTGTTTAGAG 1 -AAGAGAAACCCTCCC 1 -CCAAAGAGGAAGCCC 1 -TAATAATAAGAACAT 1 -GCTGCTGCTCCCTGC 1 -ACTCATAGCATTCCT 1 -GGGAGGGTAGGAGAG 1 -TGATATATACATTAT 1 -TATTGCCAGGTACTT 1 -ACTTTTTCAATTCTC 1 -GTAGTACAAGAGATA 1 -ATGCGAACTTAGCGG 1 -AGCAGGACTTACAAA 1 -AGGACATGCGAACTT 1 -AAATTAATTTATGCC 1 -TGGTTTTACAGCTCC 1 -CACGCAGAAGGCAGG 1 -ATACATTATCCTTTA 1 -AAACCTTCACTTCTT 1 -CCCCGCGAAAGAGCG 1 -ACGTGGCAGATGTTA 1 -TGTTAGAGCTGTCTA 1 -ATGTTATTGTACTCT 1 -CCATCAAATTACTCA 1 -CATAGTAGTTATCTA 1 -CTGTCTCCAAGCCAG 1 -AGCCACCTCCATTTT 1 -TCACGCTGGATAGCC 1 -CAAGGTATGTTCTTT 1 -AGTCACATGGTTCAC 1 -CCGAATGCTGTCAGC 1 -TGTCAAGCTATATCA 1 -GAAGTCCAGGATTAT 1 -GTAGAGACGGGATTT 1 -TTTTTGGTTCCTGCT 1 -AAATACACTTATATT 1 -TTTGTTTTTTTTCTA 1 -ATTTTTAAGAGCCTT 1 -CTTCCATTTTTTTTC 1 -AAAATGACATCTAAC 1 -GTATCTTTCCCTCAT 1 -AGGACAGTGAAACAA 1 -AGATATATCTCTCTA 1 -TTTTCTCATGATCAA 1 -AAAATGGTTAGAAAT 1 -AAACCATGCTGTGCA 1 -ACTCTGTCAATGTTC 1 -TTGCCAGGTACTTAG 1 -TAAGTTTCTTAAAAT 1 -TTACAGCCAAAAGGC 1 -TAAGAAGTGTTAAGA 1 -TCAAAACATTCTGCT 1 -CCCGAGATCCAGCCC 1 -TACCTGAGGAATATC 1 -TTCAGTGTAGTACAA 1 -ATTTTTTAAAATGAC 1 -AGCCAAAAGGCATGC 1 -AGTGCTGGGATTACA 1 -CCCCTCTCTGACTTT 1 -TGTTTCTTGTGGTAT 1 -AAACAGTACTTTCCA 1 -CCTCAGGGATCAGAG 1 -CACCTTCCCAACAAG 1 -GACTACAGGCACACA 1 -CAGGGACCGTCACCT 1 -ATCTGTGAGAGGCAG 1 -GGCGCCTAGACGAAG 1 -GAAAATGAAGAGAAG 1 -TTGTATCACAGCCAA 1 -TCAACAGTCTTGGTA 1 -TTTTTAAAAGTGACA 1 -TCTTTCTTTTCTTTT 1 -GAAGCCCTCTGTACG 1 -TCACCTGTCTCCAAG 1 -AAAAGACACATTAAT 1 -CCTCGGCGCTCTGAC 1 -TGCGTTTTAATTAGC 1 -GCTGGGACCACAGGC 1 -GTAGTATGGCCATAG 1 -AAAACTTACCTCCAT 1 -GTCTATAAATAGTCC 1 -TTTTCTTATGTTTAT 1 -AAAGAGGAAGCCCTC 1 -CCGACCCTCCCGTCG 1 -GCTGAAAGACAAGTC 1 -AACCATAGTAGTTAT 1 -ACTTACCCCACTTAA 1 -GATGTTCGTTCAGTG 1 -AGTCTCGTGATGTTT 1 -CGTCGCGTGCTGTTT 1 -TATCGACGCCCTAAA 1 -GTGCAAGTGCTGCTG 1 -CTGTTTCCTCCCCAC 1 -AAGTGATTCCTGGGA 1 -GGTTAGAGAGAGGGA 1 -CTGTGACAAAGTCAC 1 -ACCCGGCTAATTTTT 1 -AAGGCCTATACCTTC 1 -TTTCTCAGTTCATAA 1 -CCTCAGCCACCGGAG 1 -CCTTGTCTGGGAGGC 1 -TCCCCCAACCTCGGC 1 -CAATCATCATTACAT 1 -AAACTAGCCAGGTTG 1 -CAGTAAAACAAAAAC 1 -GAGCCACCGTGCCCA 1 -CACGAGCCACCACAC 1 -CTTTCCCTGCTAAAA 1 -CTCAACTACGGACCT 1 -TCTTTGCATCTGAGC 1 -CATAATAGCCCCCAT 1 -TAGGTAAGAAGTGTT 1 -ACAGTGGGATTTGCG 1 -CAAAAGCCCTGGTTA 1 -GTCACTGGAGATTGC 1 -CCAGGTTGGGAATAT 1 -CGTGATGTTTAAGAA 1 -AGAGAGCACAGCGAG 1 -TTCTTTCTTTCTTTT 1 -AACCTCCGCCTCCTG 1 -CAACCCCCTCATGTT 1 -TTTGGTCATCGATTT 1 -TCATCTTTTTCAGTG 1 -AAATGACATCTAACA 1 -CCATTTTGAAGAATA 1 -ATACCTTAAATGGTT 1 -TTCCATTGCCCAGGC 1 -AACCTCTTTATTTCT 1 -GCAATTTTCTCATGA 1 -TAGACGAAGTCCACA 1 -TCTCGCTCTGTTGCC 1 -CCCTGCTCCCCGCCG 1 -TCTTGGATTATCTTT 1 -GGACATGCGAACTTA 1 -AGAGGAGGAAAATAG 1 -AGAGCATCACTGTAA 1 -CCTCAAGCCATCCTC 1 -TCCACTTATATTAAA 1 -GACCACAGGGCCCAT 1 -CAAAAGCTAGAGGAA 1 -CAGTAATGGTGGTGA 1 -CTATGAAAAAGACAG 1 -TAAATGCCTCAGGGA 1 -CCTCCACTTATATTA 1 -CCCGCCAGCGCGACG 1 -GGATGGAATCTCATT 1 -TATTATCTGAGGTTC 1 -AGGCGTTTTTCTTAA 1 -TGCCTTGGCCTCCCA 1 -TTACTCATGTTATTG 1 -CCCCAAGCGCCCCTC 1 -AACCTCGGCGCTCTG 1 -CTCGATTGCTGCCAT 1 -GCTGCTGCTGCTCCC 1 -GCTAAATATACCTGA 1 -TCATGTTAGAGCTGT 1 -TTGCCAGGGTATTTC 1 -GCACTGGCGCCATGA 1 -CCTGAGGAATATCGG 1 -CAAACGTCGCGTGCT 1 -TGAGGGAAGGCGGCA 1 -GTATAAAATTCTTTT 1 -AAAGCAGCTTTATGA 1 -GCAGGTGCCACTAAT 1 -TAGAGCTACCCAGCA 1 -CGATTTCTCCCAATT 1 -ATAGAAAGACCAGTC 1 -GACAAACCTTCACTT 1 -AAAAACAAAGAAACA 1 -ACCTTGACCAGATAT 1 -GTATCAGACAGGCTG 1 -TGTGGGGAGAAGGAG 1 -GAAAATAGACCTTCA 1 -TTTCATTGAAAATGA 1 -TCTTGGAAACAACCA 1 -ATGTTTTGCCAGAGG 1 -ACTTACAAACAAAGG 1 -TAGGTGAGGACTATG 1 -GGAGGGTAGGAGAGA 1 -AGAGGAAGCCCTCTG 1 -TACTTTCCAAAATGA 1 -GCAATCATCATTACA 1 -TATAGGATGCTAGGA 1 -ACATTAATATTGCCA 1 -CACAGCGAGGGCCAC 1 -ACCAAACAACAAAAC 1 -AAACTTTGTCCCGAC 1 -TAAGTTTTACTTTTA 1 -TCTGATGCCCTCTCA 1 -CGTGGCAGATGTTAT 1 -TTCTCATGATCAAAA 1 -GGGACCGTCACCTGT 1 -TAGTGAGGGTTATCA 1 -ATCCCAATATGCAGA 1 -TGGTCATCGATTTCT 1 -GCAGCAGACAGGCTT 1 -GCTGCCACAAAAGCT 1 -AGATTTCTAGCAGTA 1 -TATGCCTTTCTTTTT 1 -AACCGTGACTTGGTA 1 -ATTTCTAGCAGTATC 1 -AAAATTAATTTATGC 1 -ATGTGTATAAAATTC 1 -TGCATGGCATGTATT 1 -TACTGGTTCAGAAAC 1 -GAAGATAACTGTTTT 1 -ATAACCAGCAAATAC 1 -CTCCTCCTTGTCTGG 1 -GGTGCCTTAGTTTCT 1 -TTTCAAAACCGAAAG 1 -GATATATCTCTCTAG 1 -TGCGCTCCCGCAAAA 1 -TGACGCTTATCGACG 1 -CATTATCCTTTATGT 1 -ATAATTCCTCTATAC 1 -AGCATCCACAGGTGA 1 -GCGACGCCTCCCCCA 1 -AGACATTAAGTTTTA 1 -CTAAGAAGAGGACAA 1 -AAAATGTTACTCTGT 1 -TCCCAGGCCACCCCG 1 -CGCCGCGCCTTTGGG 1 -GCCTCAGATACCAAT 1 -AAGCCAGTAGGTAAG 1 -TGTACTAACAAATGT 1 -TTTCCCGTTTTCAGT 1 -AGCACAGCGAGGGCC 1 -CAGTACTTTCCAAAA 1 -CCTAAACTTTGTCCC 1 -ATTTATGCCCACAGT 1 -TCAAGTGATTCCTGG 1 -AGCCACCGTGCCCAA 1 -ATGAAGACCTGATTT 1 -TATTTTTTAAAATGA 1 -ACCCCTCTCTGACTT 1 -ATCAAATTACTCATG 1 -TAATGCATGTGACAG 1 -ATGGGATGGGACTCA 1 -TTTTCTAAACTGTAT 1 -TGCTACTAAATATAC 1 -GTTAGAAATAAGGCT 1 -TGGTCTCAACTACGG 1 -GTAGGCCAAAGGTCT 1 -ATTGCTGCCATTTAT 1 -CATTTTGAAGAATAA 1 -ACTAATCTGATCTTT 1 -CTGGCGCCATGATAG 1 -GTCTAAGGGAAGCAG 1 -TAAATAGTCCTCAGG 1 -CATTTGGTCATCGAT 1 -CCCTGGTTAGGCTTG 1 -AAAAACTTACCTCCA 1 -CGGGCGCCTAGACGA 1 -CAAATGGGACAGAAC 1 -GCTAGGACAGCAGGA 1 -CCTTTTTTGTTTTTT 1 -GGAATGCCCGCCAGC 1 -AGGATGCTAGGACAG 1 -CTGCTTTCGATCATG 1 -TACATTATCCTTTAT 1 -TTGCATCTGAGCTTC 1 -AGGCAAAGAGCCAAA 1 -CAATGTTCTCCACAT 1 -TTTATCCCCTGTGTA 1 -AGGTGGATTGGGGAA 1 -GCCACCAAGGAGAAC 1 -GCATGTATTACTTTG 1 -TATACCTGAAGCTGC 1 -GACGCTTATCGACGC 1 -CCTTTTTTATATCAA 1 -GTAAATAAATTGTAT 1 -GTGAGGGTTATCATG 1 -CATCCTCCCGCCTCG 1 -GGATAGGTGAGGACT 1 -TGTCTAAAATGGTTA 1 -TTAGAAATAAGGCTG 1 -TGGTTAGAAATAAGG 1 -CGAGCCACCGTGCCC 1 -CGCAGCAGACAGGCT 1 -GCCTGGGACATAGCA 1 -GCCAAAAGGCATGCG 1 -CAGCCAGAAAGTACT 1 -ATGCATACAAGAGCT 1 -ACGTGAGTAAACCTG 1 -GCCACAAAAGCTAGA 1 -TGGAATCTCATTCCA 1 -GGCAGCTACTCCTCC 1 -CCCCTCATGTTTTCA 1 -AACGTCGCGTGCTGT 1 -CAAGGACTTCTGATG 1 -TGGAGTGCAGTGTGC 1 -AGCCTCCAGGCCAGA 1 -AGCAGCTTTATGATA 1 -GTTGGGAATATATTG 1 -ACATAGCAATTCAGG 1 -CTCATAATTCCTCTA 1 -GACAGAACATACTTT 1 -CTTCCCCACTCCCAG 1 -AAGTGCTCAAGATCT 1 -CACCACACCCGGCTA 1 -TGCCCGCCAGCGCGA 1 -TGTGTAATTAAGTTT 1 -TATTCAAATTAAATG 1 -CATGACCCTGTAGGA 1 -AAATGAGCTTCCACC 1 -GAGAAGTGTTAGTGC 1 -AAGGTCTCCCCTGCT 1 -CTAGTAGAGTGCCTG 1 -CTAACTTGGTGTCAA 1 -CCAGCCAATCAGGAC 1 -ACACTAAATATATGT 1 -CTCTGGCGTCCTCAA 1 -ATTTTCAAAATAAGA 1 -TTCAGAAACAGTACT 1 -CTGTTCCTACCCATG 1 -GTAGTTCTTATAATT 1 -TATGCCAGACCCCTC 1 -GGCGCCATGATAGCT 1 -CTGTAAACTAGCCAG 1 -TCTCATTCCATTGCC 1 -CCCGTCCCCCACTCC 1 -TTAAAAGTGACATGT 1 -CCCCAGACCCCAAGC 1 -GTACTAACAAATGTC 1 -GACTAGACCATCCAT 1 -CGGGATTTCACCATG 1 -TTTATTTCTGCTGAG 1 -TTTGTCCCGACCCTC 1 -GAACTTGGAGAAGGG 1 -GTGATCAGCCTGCCT 1 -TCCCCTGTGTAATTA 1 -GGGGCTAACTTGGTG 1 -CAACTTCAAGTGATC 1 -ATTAAGTTTTACTTT 1 -AGCTTCCAAAACTTA 1 -CACACACCACCATGC 1 -TAATTAGCATCCACA 1 -TTGCCTAATGTTTCA 1 -AAGTCCTTGGCACAC 1 -TTATGACTTTTGGAG 1 -TACAAGAGCTGGCAA 1 -GATGTTATTATCTGA 1 -ATTATCTTTATTAAT 1 -CTCTTTCTTTTTTTT 1 -CCTCCCAAAGTGTTG 1 -GTATGGCCATAGACC 1 -CCTGAAGCTGCCACA 1 -ACCCTATCATTAAGG 1 -TGCTAGGACAGCAGG 1 -TCCACCTTCCCAACA 1 -TACTAGCCTCAGATA 1 -ACAGGCACACACCAC 1 -CCAATTCCATTTCCA 1 -ATTACAGCCAAAAGG 1 -TTTTTCATAACATTA 1 -GATGACGTGAGTAAA 1 -AATAAATTGTATTCA 1 -TTTTTATATCAAAGC 1 -TCAAAATAAGATTTT 1 -CACAAAAGCTAGAGG 1 -ATTACAGGCGTAAGC 1 -GGGTAGGAGAGACTC 1 -CGCGTCCCGGGTGCG 1 -ATTATTTTTTGTAGA 1 -AATGCTCCACTTTTT 1 -GGGAAGAAAATATTA 1 -AAACAACAAAACCTC 1 -CAGGTTAGAGAGAGG 1 -GAGAGGAAGGACCAG 1 -AGCTTTATGATATGA 1 -AAAACCCACACACAG 1 -CTCTCTCCATTCTTC 1 -TCCTTTATGTTGTAA 1 -AGTCTACATGATTTG 1 -ATTCAAATTAAATGC 1 -GCTCTATACGTGGCA 1 -TGTAAATAAATTGTA 1 -AGAAATGGTCTATGC 1 -ATCTCGGCCCGAATG 1 -CGATCATGTTTTGCC 1 -AAATTTTCAAAATAA 1 -ATTCTCTGCTGGATG 1 -GTCCTCAGGACAGTG 1 -CTGTGTAATTAAGTT 1 -AAATACCTTAAATGG 1 -GACAATCCCAATATG 1 -CATGCCACTCCACAG 1 -ACACATCACTCAAGA 1 -TAGAGAGAGGGACTT 1 -AAATGCAATTTTCTC 1 -AACACATCACTCAAG 1 -TTTCTAAACTGTATG 1 -AACAGTACTTTCCAA 1 -TGGAGAATCTCACGC 1 -AGTGAGGGTTATCAT 1 -GGCCCCACATAGACC 1 -TTTTTTCTTTTTTGA 1 -TTTTTAAGAGCCTTT 1 -TGGATAGCCTCCAGG 1 -ACAGAGGGTGCAGAG 1 -GGCGCTCTGACGCTT 1 -ACCTTAAATGGTTGA 1 -GGGATGGGACTCATT 1 -CAAAGAAACAAAACC 1 -GTGTTAAGAGTGTAT 1 -GCCAGGGTATTTCAC 1 -AACACCCTATCATTA 1 -GCATGTGACAGTGGG 1 -GACCGTCACCTGTCT 1 -GGAAGGCGGCAAGAT 1 -TCAGTAGAGACGGGA 1 -CCTTCCCAACAAGCC 1 -CCATCAAGAGGTGGA 1 -ACCCCACTTAACTAT 1 -TACATGTCTCGATCT 1 -GGAGAACTTGGAGAA 1 -CAGCGCCGCGCCTTT 1 -GCCTTTCTTTTTGGT 1 -AAAGCACATAAAGTC 1 -GTTGGACCCGATAAA 1 -CAGTGTGCAGTGGTG 1 -AATATTATAAACTCT 1 -TTATATCAAAGCAGC 1 -TAAACTGTATGAAGT 1 -AAGAGATAGAAAGAC 1 -GAATTACAGCCAAAA 1 -TATCTTCCAGAAATG 1 -TTATTTAAGCTTCCA 1 -GGACTACAGGCACAC 1 -GCACATAAAGTCCTT 1 -ACCGCTTTGTATCAC 1 -ACGCTGGATAGCCTC 1 -CTCATACACAACTTT 1 -AAAGGTGAGCGCGCT 1 -CATGATAGCTCAAAA 1 -TGCCTGGGACATAGC 1 -TTTTTTTATTTTTTT 1 -TTATTCGAAACCGCT 1 -GGTAAGAAGTGTTAA 1 -GATGTCAATAACGGG 1 -GTTTCCTCCCCACGG 1 -AAATGCATACAAGAG 1 -AGAGTCTCGTGATGT 1 -GACGCCTCCCCCAGA 1 -TGCTGTAAACTAGCC 1 -TAAGACTTACCCCAC 1 -CTCCTTGTCTGGGAG 1 -TTTCTTAAAAAAAAA 1 -TGCCTTGATCTACAC 1 -AGACACATAGCAATT 1 -TTCTTTTCTCTTTCT 1 -GGTACATGATCACTA 1 -CCAAATGAGCTTCCA 1 -GCCCGCAGGGACCGT 1 -CATGTTTTGCCAGAG 1 -CTCCCCACGGTGTGG 1 -AAATATACAGATTGA 1 -GAGAGAGGGACTTTC 1 -CCACAGTAAAACAAA 1 -TGATCTTTAAGAACA 1 -CAAAACTTAACTGAA 1 -GAATGCCCGCCAGCG 1 -TACTAAATATACAGA 1 -ATGTTGTAAATAAAT 1 -GATCTTTAAGAACAT 1 -TGAAGAATAAACCGT 1 -CGTTTTTCTTAAAAA 1 -CCAGAAATGGTCTAT 1 -TGCTCCCTGCTCAAC 1 -CACATAGTGAGGGTT 1 -TAAAGTCCTTGGCAC 1 -CTCTCTAGAAACACC 1 -TCTCAGCTCACTGCA 1 -CCATGCTGTGCATCA 1 -TGTTTTCAAAATTAA 1 -AGCGAGACATCTCGG 1 -TCTCAGTTCATAATA 1 -TTTTAAATATCAGAT 1 -TCATGTTTTGCCAGA 1 -GTGACAGTGGGATTT 1 -TTAGTTTCTTCATCT 1 -TATGTTTATGCCTTT 1 -TTAACTATCTTGGGC 1 -GCGTGCTGTTTCCTC 1 -TCTCGATTGCTGCCA 1 -CCCCACTCCAGCTGC 1 -CTCCCAAAGTGTTGG 1 -GTATTACTTTGGAAA 1 -AAATATACCTGAAGC 1 -CTAAGGCCACGGAGC 1 -AATGAGAGGCATGAC 1 -CGCCGAAAGGGGCAA 1 -TACTTTGGAAATTTT 1 -TTATGTTTATGCCTT 1 -TCATGTTTTCAAAAC 1 -GAATCTTTGGAGTAC 1 -GAGTGCAGTGTGCAG 1 -CAGCAGACAGGCTTA 1 -TCAGCAGCTTACAAA 1 -GCTACCCAGCAGGAA 1 -CCCCCTCATGTTTTC 1 -TTCAGGGTAGTATGG 1 -AGGAAAGGCTACTAG 1 -AAGGTATGTTCTTAC 1 -AGCTAAGAGAGCTCT 1 -TCCCAAAGTGTTGGG 1 -CCCTTCCCCACTCCC 1 -AGGACCAGAGCGGGA 1 -GCTGGATGACGTGAG 1 -CAGCCAATCAGGACA 1 -ATAGCCTCCAGGCCA 1 -AACCATCTTGGATTA 1 -TTTTTGGGATGGAAT 1 -TATGGCGGAAGATAA 1 -CTCCCCTGCTCCCCG 1 -ATTTGCGTTTTAATT 1 -ATATATACATTATCC 1 -TGGGACATAGCAATT 1 -CCTCCCGCCTCGGCC 1 -GACTTACCCCACTTA 1 -GAGAGAGCACAGCGA 1 -AAAGACACATTAATA 1 -ACTCAAGAGTCTACA 1 -TTTCTTGTGGTATCT 1 -GTAAGAAGTGTTAAG 1 -GGCCACCCCGCCGCT 1 -TGGGTTCAAGTGATT 1 -TTGAGATGTTCGTTC 1 -GCCTTTCTCTGGAGG 1 -GCAGGCATACTCATC 1 -GTCTAAAATGGTTAG 1 -ACATAAAGTCCTTGG 1 -TCTTTCTTTTTTTTT 1 -CCATGGGGAAGTGGG 1 -CTTTGGGACGAGCCT 1 -GCAGATTGTTTATAT 1 -ACAAAACCTAAATGC 1 -CACAGGTGATTGCTG 1 -GACAGGGTCTCGCTC 1 -GGCTAACTTGGTGTC 1 -GGAAGTGGGAAGAAA 1 -TACGTGGCAGATGTT 1 -TCAGACAGGCTGGGT 1 -CCTTTGGGACGAGCC 1 -ATACATGCCTTTTTT 1 -TCCCGACCCTCCCGT 1 -TACATCTTGGAAACA 1 -GTGCTGTTTCCTCCC 1 -GCTTTGTATCACAGC 1 -CAGCGACGCAGTGCC 1 -GGATTTGCGTTTTAA 1 -GTTAGTGCTACTAAA 1 -ACATTTTCTCAAGGT 1 -TCTCAAGGTCAAAAA 1 -AGCGCGCGTCCCGGG 1 -TTGCTGAAAGACAAG 1 -AGCTCCCCTGACTGA 1 -CCAGTAGGTAAGAAG 1 -CACTTGGGGCTAACT 1 -TTTCTAGCAGATTTC 1 -CCACCATGCCTGGTT 1 -TCAGCGCCGCGCCTT 1 -TGGAGATTGCGCTGC 1 -AGGCTTGACTTCAAT 1 -CTTACCCCACTTAAC 1 -TCTTTATTAATGGTT 1 -AAAGACCACAGGGCC 1 -CTTTCGATCATGTTT 1 -TTCACTTGGGGCTAA 1 -TGCTCCCCGCCGAAA 1 -GATTTTCTAAACTGT 1 -TTTTATATCAAAGCA 1 -CAATAACGGGTAGTT 1 -CTCATGATCAAAACA 1 -ACCATGTTGGCCAGG 2 -CTTACAAACAAAGGC 1 -TGTAAGACTTACCCC 1 -AAGTGTTAGTGCTAC 1 -CTGCTTACATGTCTC 1 -ATGAGCTAAAAAACA 1 -GTCAGCTTCAGGAAT 1 -CTTCATGCCACTCCA 1 -CCGCGCCTTTGGGAC 1 -GTGCATCAGTATCTC 1 -AAGACATTAAGTTTT 1 -TGGTGAGATCTCAGC 1 -TCTTCTGTCACTGGA 1 -ATTTGGGCTCAACCA 1 -GCTAAAATGTTACTC 1 -TGTATCACAGCCAAG 1 -ATCTACACCCATCTG 1 -CCTCCACGCGTTCAC 1 -AGCCAGGTTGGGAAT 1 -ATGAAACCCAGACAC 1 -AAACAACCAGGCAAA 1 -CTGGCTTTATGACTT 1 -CAGATGGGATGGGAC 1 -CCCTTCCATTTTTTT 1 -TGACTTTCCATTCTC 1 -ATTCAGGGTACATGA 1 -GTGTCTAGTAGAGTG 1 -TACCCGTCCCCCACT 1 -TTTCTGCTGAGGTTT 1 -CTCAAGAGTCTACAT 1 -TGATTTTTTTCATAA 1 -TAACTATCTTGGGCT 1 -AAATATTATAAACTC 1 -TATCAGATGGGATGG 1 -TTTTCAAAATAAGAT 1 -ACATGGTTCACACGG 1 -AAGAGGACAAGTATC 1 -CATTGCCCAGGCTGG 1 -CCACAGGCACGAGCC 1 -CACATGGTTCACACG 1 -CTCAAGGTCAAAAAC 1 -TGATCAAATGGAGTA 1 -CTTTTTTGAGAGACA 1 -AATCTGATCTTTAAG 1 -GACTACTCATACACA 1 -TCAATGTCGGATGGA 1 -TTTTTCAATTCTCTC 1 -ATGGGACTCATTCAG 1 -TGCCTTAGTTTCTTC 1 -ACTTTCCAAAATGAG 1 -AGAAGGCAGGCGTTT 1 -CCAGCTGCGCTGGGG 1 -CAGTGCTAAATATAC 1 -ATTCCCTGACAATCC 1 -ACAGGTGATTGCTGT 1 -AGTTTCTTAAAATCT 1 -TCCCCTGACTGACAA 1 -CCACTCCCAGGCCAC 1 -TGCTGCCATTTATCC 1 -CTGTTTTCAAAATTA 1 -CACTCAAGAGTCTAC 1 -GTGATATATACATTA 1 -GCAGCTTTATGATAT 1 -TGTACTCTGTTTCCA 1 -AAAAAGACAGTGGAG 1 -AGGAAGGACCAGAGC 1 -CCCCTCCACGCGTTC 1 -CACACACAGGTCAGA 1 -TTCATTTCTCAGTTC 1 -AGTTTCCTTTTCTTA 1 -ATCTTTAAGAACATT 1 -TTAGAGCTACCCAGC 1 -AACTTAGCGGGCGCC 1 -TTTGCATCTGAGCTT 1 -ATTTCACTTGGGGCT 1 -GCAGCTTACAAAAGA 1 -GAGAGCATCACTGTA 1 -GAGAGTAGCGCGAGC 1 -TCGCTCTGTTGCCCA 1 -ACATGCCTTTTTTGT 1 -AGGCTGGAGTGCAGT 1 -ATTCAGGGTAGTATG 1 -TATCTTCTGTCACTG 1 -CAACAGGGATAGGTG 1 -GAGGCAATCATCATT 1 -CCTGATTTTTTTCAT 1 -CAGAAATGGTCTATG 1 -TCCATGGGGAAGTGG 1 -ACACCCGGCTAATTT 1 -AGATTGCGCTGCATT 1 -GTGCCTTAGTTTCTT 1 -CTTTCTTTTCTTTTC 1 -TTTTTTGTTTTTTTT 1 -ATTTTTTAAAAGTGA 1 -TTTGGTTCCTGCTCC 1 -CTCGTGATGTTTAAG 1 -CAGAGGAGGAAAATA 1 -GTCCTCAACAGTCTT 1 -CGTAAGCTACCACGC 1 -TACAGGCACACACCA 1 -CACGGTGTGGCCCCA 1 -CATGTGACAGTGGGA 1 -CAGGCACGAGCCACC 2 -GCCCTTCCTACTAGC 1 -CTAATAATAAGAACA 1 -AAAATTCTTTTAAAT 1 -GCACAGCGAGGGCCA 1 -ATGACATCTAACATA 1 -TGACTTCAATCTCGA 1 -TCCTACTAGCCTCAG 1 -TTTTACTTTTAAAAT 1 -CTACTAAATATACAG 1 -TGTCAATGTTCTCCA 1 -GGTGCGCACCCCCTT 1 -AATTATTCGAAACCG 1 -TGCTAAAATGTTACT 1 -CCAATCCAGCCAGAA 1 -ACCCGGGCGACGCCT 1 -CTCAGCACTCATAGC 1 -TCCTCCCGCCTCGGC 1 -AGGCACACACCACCA 1 -AGAGGACAAGTATCA 1 -AAGTGCTGCTGCTGC 1 -CTCAAGATCTCTGGC 1 -TGGCGGAAGATAACT 1 -AGGTTTTATATGCCA 1 -CAAAGAGGAAGCCCT 1 -CACACCCGGCTAATT 1 -TTTTTTTCTTTTTTG 1 -AGCGAGGGCCACAGA 1 -GGGTGCGCACCCCCT 1 -TATGATCAAATGGAG 1 -CAGGGCCCATGCCGC 1 -ATCACTAAAAGGCAG 1 -CGAATGCTGTCAGCT 1 -GTGGATCAACCCAAG 1 -GCCTTTTTTGTTTTT 1 -TGCCACTCCACAGGA 1 -ATCTGATCTTTAAGA 1 -CCAGCAGGAACAAGC 1 -CTACTCATACACAAC 1 -GAACATACTTTCCAT 1 -CATAACCAGCAAATA 1 -AACAAAGAAACAAAA 1 -CCTTAAATGGTTGAG 1 -GGGAACATGACCCTG 1 -GCATCAGTATCTCAG 1 -AGGCTGGTCTTGAAC 1 -ACCCATCTGATTTTC 1 -AGGTTGGGAATATAT 1 -CAAGCCATCCTCCCG 1 -TTTCTTTTCTTTTCT 1 -AAATAAGATTTTTTT 1 -ATATGCCAGACCCCT 1 -GACTTTTGGAGAGTT 1 -AGTAAGAGGCACAGT 1 -CTGCCATTTATCCCC 1 -GGCCCCGCGAAAGAG 1 -GAAAGAGAGAGTAGC 1 -AAAGAGCCAAAGAGG 1 -TTTCAGCAGCTTACA 1 -CTCTATATTTTTCAT 1 -CTCATTCAGGGTAGT 1 -ACAAGCCACCTCCAT 1 -CTACATGATTTGGCC 1 -AGGCCCGCAGGGACC 1 -TTTTAAAATGACATC 1 -GGGACGAGCCTACCC 1 -GTAGGAGAGACTCAC 1 -CGTCGCCGTAGGCCA 1 -TTCTTCATGCCACTC 1 -ATTTCCACTCTGGCC 1 -CACCAAGTGTTTACA 1 -GTATGTTCTTTATTT 1 -CAAGAGATCCTCCCG 1 -TGATGCCCTCTCAGC 1 -CAGGCCACCCCGCCG 1 -AGATCCAGCCCTGGA 1 -TGCATTTTTAAGAGC 1 -GCCCTGGTTAGGCTT 1 -CTCAGTTCATAATAG 1 -CTGCTCCCCGCCGAA 1 -CCAAGCGCCCCTCCA 1 -GGTCTCAACTACGGA 1 -GCTGGCAAATACCTT 1 -ATTGACACTAAATAT 1 -ACTAACAAATGTCTA 1 -TTATCTTTATTAATG 1 -TTTCCAAAATGAGAG 1 -CATCTAACATAACCA 1 -GCTGCTCCCTGCTCA 1 -TAAGTCAACTTCAAT 1 -GGCAAATACCTTAAA 1 -AGAAGTGTTAAGAGT 1 -TTCCTCTATACATGC 1 -TAAAATACAACAGGG 1 -ATACTTTTCAAAGTT 1 -GTGTAATTAAGTTTC 1 -CATTCTACAAACGTC 1 -ACGCAGTGCCAGGTT 1 -AGGTCAAAAACTTAC 1 -GCTGCGCTGGGGGAG 1 -ATGTCTAAAATGGTT 1 -CATGGCATGTATTAC 1 -CTCAACCAGTTACTG 1 -CCAACTTCAAGTGAT 1 -ATTGCGCTGCATTTT 1 -TAGCAGTATCTTCTG 1 -TATATGTATTTGTGC 1 -GCAAATACCTTAAAT 1 -CTCAGCAGGTGCCAC 1 -TTGAGTTGGACCCGA 1 -AAAATACAACAGGGA 1 -CCAAAACTTAACTGA 1 -CTCGCTCTGTTGCCC 1 -GCAGGGACCGTCACC 1 -ACACAACTTTCAGCA 1 -CAGGAAATTTGACTT 1 -GATTGTTATTTTTTA 1 -CCATTCTCTGCTGGA 1 -CATCAAGAGGTGGAT 1 -CTTGTGGTATCTTCC 1 -TGTGTATAAAATTCT 1 -CTTACAAAAGAATGT 1 -AAACTTACCTCCATG 1 -AGATTGTTTATATCA 1 -CTCTGGAGAATCTCA 1 -AAGTTAATTATTCGA 1 -GGACTAGCCCCACGG 1 -GAAGGAGGAGTACCA 1 -TCCCAACTTCAAGTG 1 -CGCAGGGACCGTCAC 1 -CCCATGCCGCCCAGT 1 -TCCAGCCCTGGACTA 1 -GCTGCCATTTATCCC 1 -GGCTGTGACAAAGTC 1 -TTGAGGGAAGGCGGC 1 -TTTCAGTAGAGACGG 1 -AAAATACCATGTACT 1 -GATGCCCTCTCAGCA 1 -ACCAACCCAAGGTAT 1 -TTTGTAGAGACCAGG 1 -TGGGGGAGCCAGAGG 1 -AGAGTCTACATGATT 1 -ATGTTTCTTGTGGTA 1 -GAACATTCCCTGACA 1 -AAACCGCTTTGTATC 1 -ATTAATATTGCCAGG 1 -CGGCAAGATTTTGTG 1 -TAAGAGTGTATATGT 1 -GGCATGCGCTCCCGC 1 -CACGGCAGGCATACT 1 -ATCGATTTCTCCCAA 1 -CGTCCTCAACAGTCT 1 -ATTTTTGTATTTTCA 1 -TCAATAACGGGTAGT 1 -AGAAATAAGGCTGGC 1 -CTCCCTGCTCAACTG 1 -GGAAGCCAGTAGGTA 1 -ATGGTTAGAAATAAG 1 -TTTTTTTATTTTTTG 1 -TTCAGTAGAGACGGG 1 -TGACAATCCCAATAT 1 -ATTTTCTCAAGGTCA 1 -ATGATCAAATGGAGT 1 -TTTTTCATTGAAAAT 1 -AAAACCTAAATGCAT 1 -TCACTTGGGGCTAAC 1 -AGAGGCATGACTAGA 1 -TTTTCTAGCAGATTT 1 -AAGGCCACGGAGCGA 1 -GGTGCTAGGACATGC 1 -CAACTTCAATGTCGG 1 -CCACCTCCATTTTGA 1 -AGCCAGTAGGTAAGA 1 -TGGAGAGTTATTTAA 1 -TGAGGTTTTATATGC 1 -GCATCTGAGCTTCTA 1 -AAGTCAACTTCAATG 1 -CACGGAGCGAGACAT 1 -CGACGCCCTAAACTT 1 -AAAAGCCCTGGTTAG 1 -CAAGGTATGTTCTTA 1 -GGATGCTAGGACAGC 1 -TCAACCAGTTACTGG 1 -GTCTTGGTAACCATC 1 -CGACGCCTCCCCCAG 1 -AAAAAAAATGCACGA 1 -AGTGTTAAGAGTGTA 1 -AGACCCAGAGGTGCT 1 -AGGCTGGCAGAATAG 1 -GGGATCAGAGCACAG 1 -TACCACGCCCAACCC 1 -AGGCTCTCAAGGACT 1 -TTACTGGCTTTATGA 1 -TTTTCAGTAGAGACG 1 -TAGTTATCTATGGCG 1 -CTTCAGAGGCAATCA 1 -GCGAGAGAGCACAGC 1 -GAGTACCAGGCCACC 1 -GAAGTGTTAGTGCTA 1 -TATCAGACAGGCTGG 1 -ATTATTCGAAACCGC 1 -AACAAAAACAAAACC 1 -GTTTTACTTTTAAAA 1 -CCTCCCCCAACCTCG 1 -ATTTCTCCCAATTCC 1 -GTTTCAGAAACAGTA 1 -CTTGTCTGGGAGGCT 1 -ATAGTGAGGGTTATC 1 -ACGCCTCCCCCAGAC 1 -AAAATGAAGAGAAGT 1 -AAAGCCTAAGCTCAA 1 -CCTACCCGTCCCCCA 1 -AAGAGCCAAAGAGGA 1 -CCTTCCTACTAGCCT 1 -TACCAGTAATGGTGG 1 -CAAGGAGAACTTGGA 1 -AACAGGGATAGGTGA 1 -AATCATCATTACATG 1 -GAGCACAGATTCATC 1 -AACCGAAAGTAAGAG 1 -TTTCTTAGAGTCTCG 1 -CTCTGACGCTTATCG 1 -TAGGATGCTAGGACA 1 -CTTGAACTCCCAACT 1 -GTATTTTCAGTAGAG 1 -CCAAGTGTTTACATT 1 -GGGAAACTACTGGTT 1 -TGGGGGTGAATTCAG 1 -AATATATGTGTATAA 1 -TCTAAACTGTATGAA 1 -ATGATGCTGCTTACA 1 -CCCACTCCAGCTGCG 1 -AATACCTTAAATGGT 1 -TTTTTTCATAACATT 1 -TCCTAAGTGGATCAA 1 -AACATTTTCTCAAGG 1 -GAGTAGCGCGAGCAC 1 -GGCAAGATTTTGTGA 1 -ATGGAATCTCATTCC 1 -CATACACCTTCTTCA 1 -ATTTTTCATTGAAAA 1 -TCTTCATGCCACTCC 1 -AGGTGCTAGGACATG 1 -AAGCCCTTCCTACTA 1 -AGTAGCGCGCGTCCC 1 -GCGAAAGAGCGGAAG 1 -GCGGGCGCCTAGACG 1 -CTAAAATGGTTAGAA 1 -AACAAATGTCTAAAA 1 -GCATGACTAGACCAT 1 -TCGGTGCCTTAGTTT 1 -AAATACCAGTAATGG 1 -TATTACTTTGGAAAT 1 -CCCAGACCCCAAGCG 1 -CTAAAAAACAAAGAA 1 -ACTTAGCGGGCGCCT 1 -AAGATACTTTTCAAA 1 -CGCGTGCCCAGCCAA 1 -CACTCCAGCTGCGCT 1 -AACTTTCAGCAGCTT 1 -GTAAGTCAACTTCAA 1 -ACCAGTCCTTGCTGA 1 -CTCCAAGCCAGCGAC 1 -GCATCCACAGGTGAT 1 -AGGACAAGTATCAGA 1 -CAGTTACTGGCTTTA 1 -AAGAGCTGGCAAATA 1 -ATCAAATGGAGTAAT 1 -ATGCTCCACTTTTTC 1 -CTCTGTCAATGTTCT 1 -CGAAAGTAAGAGGCA 1 -GAATACATTGTTTAG 1 -CAAATGAGCTTCCAC 1 -GTAATGGTGGTGATA 1 -TTGACCAGATATATC 1 -AACTTGGTGTCAAGC 1 -TTATTTTTTGGGATG 1 -CCAGAGGAAAAGGTG 1 -GACTTCAATCTCGAT 1 -ATAAAATTCTTTTAA 1 -AGGCACAGTACATCT 1 -GTACTCTGTTTCCAC 1 -CCTATCATTAAGGAA 1 -AAACTTAACTGAAAA 1 -TTACTTTTAAAATAC 1 -ACAAAGGCCTATACC 1 -AGTGCCTGGGACATA 1 -CATAGCATTCCTTAA 1 -TTTGAATTTGGGCTC 1 -GGTGTCAAGCTATAT 1 -AAAGCTAGAGGAAGC 1 -AGCAAATACCAGTAA 1 -TTTTCTTAGAGTCTC 1 -TGTTTACATTTGGTC 1 -GCATGGCATGTATTA 1 -TTTTTTCTAGCAGAT 1 -CTTGGGGCTAACTTG 1 -TATCAAAGCAGCTTT 1 -ACCACCATGCCTGGT 1 -TTGTAAATAAATTGT 1 -CAGGCCAGAAAGAGA 1 -GGACAGAACATACTT 1 -TAGTAGAGTGCCTGG 1 -TATGAAAAAGACAGT 1 -CTAAATATACAGATT 1 -AAACATTTTCTCAAG 1 -AAGCGCCCCTCCACG 1 -GGTCTCCCCTGCTCC 1 -GACAAAGTCACATGG 1 -CCAAAGTGTTGGGAT 1 -CTTTATTTCTGCTGA 1 -GAGGTGCTAGGACAT 1 -AGTACCAGGCCACCT 1 -TTCATGAGCTAAAAA 1 -ATACTCATCTTTTTC 1 -ACAACAAAACCTCTT 1 -CCCACATAGACCCAG 1 -TTTTCTCTTTCTTTT 1 -ATCAGTATCTCAGCA 1 -CTGGTCTCAACTACG 1 -CCCTCTCAGCACTCA 1 -TACCTTCTTGAGATG 1 -TCATCTGTGAGAGGC 1 -GAATGTAAGACTTAC 1 -TGTCAATAACGGGTA 1 -ATTTGTGCAAGTGCT 1 -CAGGACAAGGCCCGC 1 -CAGCAGGACTTACAA 1 -CAGGCTGGAGTGCAG 1 -TTTTAAAAGTGACAT 1 -TTCCTTAACACATCA 1 -CAACCAACCCAAGGT 1 -TTCAAGTGATTCCTG 1 -CTGGGAGGCTGTGGG 1 -CCCAACCTCGGCGCT 1 -CTTGGAAACAACCAG 1 -TGTTTTCAAAACCGA 1 -CTTCAAGTGATCAGC 1 -CGTGCCCAACCAACC 1 -CATCTTGGATTATCT 1 -GCCAGAGGCCCCGCG 1 -TACAAACAAAGGCCT 1 -GTGCTGGGATTACAG 1 -GCTGTGCATCAGTAT 1 -ATGTTTTCAAAACCG 1 -CCTGGTTAATTTTTG 1 -GGTAGTATGGCCATA 1 -AGAAACACCCTATCA 1 -CTCAAAAGCCTAAGC 1 -CCCCTGCTCCCCGCC 1 -CCCGCCGCTTCCCCG 1 -AAGTTTTACTTTTAA 1 -CAATCTCGATTGCTG 1 -AGCATTCTACAAACG 1 -GACTAGCCCCACGGC 1 -ACTCTCTGTTTGAGG 1 -GGCCAGAAAGAGAGA 1 -GTGCTAAATATACCT 1 -TAGCATCCACAGGTG 1 -CCGCCCAGTTTGCTC 1 -TCATAACATTAAAAG 1 -TATCATGTTAGAGCT 1 -CTCCTGGGTTCAAGT 1 -GGTAGTTCTTATAAT 1 -AGTTAATTATTCGAA 1 -GCTGGTCTCAACTAC 1 -TATATTCAGGGTACA 1 -CAGGTCAGAGGAGGA 1 -GAAAGGCTACTAGCC 1 -TTACAGCTCCCCTGA 1 -GTAAACCTGAATCTT 1 -ACTTTGTCCCGACCC 1 -TGCTGCTTACATGTC 1 -CCTTGGCACACAGAA 1 -TTAACACATCACTCA 1 -AGTCCTTGGCACACA 1 -CCTCTGTACGAAAAG 1 -AGCGAGAGAGCACAG 1 -AAGGCATGCGCTCCC 1 -TGATCAGCCTGCCTT 1 -ACCAGGCTTCACCAT 1 -ACAGAAAGATGTCAA 1 -GGAGTACCTGAGGAA 1 -ACACATTAATATTGC 1 -AGCCTTTCTCTGGAG 1 -TCAGGGTAGTATGGC 1 -AAGTGTCTAGTAGAG 1 -CAAGATCTCTGGCGT 1 -GGGCGACGCCTCCCC 1 -TGTCTATAAATAGTC 1 -GTCTGGGAGGCTGTG 1 -CAAATTAAATGCAAT 1 -CGATCTATGAAAAAG 1 -AAGAAACAAAACCTA 1 -AGACCAGTCCTTGCT 1 -TCATAATAGCCCCCA 1 -GCGAGACATCTCGGC 1 -ACCCAGACACATAGC 1 -CGCCGCTTCCCCGAG 1 -GACAAGTCTGAATGC 1 -AGGACAAGGCCCGCA 1 -GAAGTGTCTAGTAGA 1 -CAGCTACTCCTCCTT 1 -GCAGATGTTATTATC 1 -GGATCAACCCAAGGT 1 -CTCAAGCCATCCTCC 1 -CATAGACCTTTTTTA 1 -TCATGATCAAAACAT 1 -TCCATTGCCCAGGCT 1 -TTTCAAAGTTCATTT 1 -CGGGCCACCAAGGAG 1 -CTTTCTTTCTTTTCT 1 -GTTTTCAAAATTAAA 1 -ACTCACGCTGGATAG 1 -CTGCCTTGATCTACA 1 -GGGACCACAGGCACG 1 -GCTGGATAGCCTCCA 1 -TTTTTGTAGAGACCA 1 -GAAGAAAATATTATA 1 -GAGAAAAAAAAGGAA 1 -AAAACAAAAACAAAA 1 -CATTTCTCAGTTCAT 1 -AGTTATTTAAGCTTC 1 -GTATATGTATTTGTG 1 -CGACGCCTCCACTTA 1 -GGATGAAACCCAGAC 1 -TCTGACGCTTATCGA 1 -TGCCCAACCAACCCA 1 -CTTACCAAACAACAA 1 -AGTGCAGTGTGCAGT 1 -GAACAAATAAGTTAA 1 -TTATATCAGATGGGA 1 -CAGCAGGTGCCACTA 1 -CTCCCAATTCCATTT 1 -GAGAAACCCTCCCCC 1 -CCAACCAACCCAAGG 1 -CCTCCGCCTCCTGGG 1 -TCGGCCTCCCTAAGT 1 -GCGCTGCATTTTTAA 1 -CAGGCACCAAGTGTT 1 -ACATAACCAGCAAAT 1 -TTTTCATAACATTAA 1 -TAAAACAAAAACAAA 1 -ACGCGTGCCCAGCCA 1 -CTTACCCGGGCGACG 1 -AGCGCCCCTCCACGC 1 -TCAAATTACTCATGT 1 -ATATATCTCTCTAGA 1 -TTATTGTACTCTGTT 1 -CTTGGATTATCTTTA 1 -AAATGTTACTCTGTC 1 -AAGAGCGGAAGAGAA 1 -GAAGACCTGATTTTT 1 -AACCCACACACAGGT 1 -CCTGCCTGGAACTCT 1 -AATTCTCTCTCCATT 1 -AATGCATACAAGAGC 1 -ATCTTCCAGAAATGG 1 -AAATTGTATTCAAAT 1 -TGTCGGATGGATGAA 1 -AGATCCTCCCGCCTC 1 -TACCTGAAGCTGCCA 1 -GGGTTATCATGTTAG 1 -AAACGCGTGCCCAGC 1 -GCATTTTTAAGAGCC 1 -TGGCAGAATAGGCTG 1 -AAGTGTTGGGATTAC 1 -TTTGAGGGAAGGCGG 1 -TAAATGCATACAAGA 1 -ACGCCTCCACTTATA 1 -CACCCCTTCCATTTT 1 -CCGCTTCCCCGAGAT 1 -TATGTTGTAAATAAA 1 -GAAGAATAAACCGTG 1 -AAGACCAGTCCTTGC 1 -GGCACACAGAAAGAT 1 -GCTAAGGCCACGGAG 1 -TCCCCCACTCCAGCT 1 -AGAGTGCCTGGGACA 1 -CCTCCCGCCTCAGCC 1 -CTTTGGAGTACCTGA 1 -TTTTTTTTTCTTTTT 1 -TGCCGCCCAGTTTGC 1 -ATCAACCCAAGGTAT 1 -GGGCGCCTAGACGAA 1 -GTGCAGTGTGCAGTG 1 -GATTGGGGAATCTAA 1 -AAAATAGACCTTCAG 1 -GCACACAGAAAGATG 1 -CAAAGCACATAAAGT 1 -ATGTACTAACAAATG 1 -AATACAACAGGGATA 1 -TAAGCTACCACGCCC 1 -CATGTACTAACAAAT 1 -TACATGATCACTAAA 1 -ACAGGCGTAAGCTAC 1 -TCAGTTCATAATAGC 1 -ATTTTCTAAACTGTA 1 -GCTGGAGTGCACTGG 1 -TGTGCAGTGGTGAGA 1 -TTCTTCAGTAAGTCA 1 -TTCTTTCTTTTCTTT 1 -ACATTTGGTCATCGA 1 -CACCCGGCTAATTTT 1 -AGGTACTTAGAAAGT 1 -ACCAGAGCGGGAGGG 1 -TCCACAGGTGATTGC 1 -CCAGCAAATACCAGT 1 -TCCCTGCTCAACTGC 1 -TGAACTCCCAACTTC 1 -CTTTCCATTATGATC 1 -GAATAGGCTGCTGTT 1 -ATGATATGACTACTC 1 -AGGAAGCCAGTAGGT 1 -CCGTGCCCAACCAAC 1 -CTCCCGTCGCCGTAG 1 -AATAGTCCTCAGGAC 1 -CCACAGGAGAAGGGA 1 -GCTTTATGATATGAC 1 -TTTGCGTTTTAATTA 1 -GCCCTGGACTAGCCC 1 -TAGCCTCAGATACCA 1 -TTTAAAATGACATCT 1 -GGCACGAGCCACCGT 1 -TCTTTAAGAACATTC 1 -TAACCATAGTAGTTA 1 -CGGAAGAGAAACCCT 1 -TGATTCCTGGGACTA 1 -GCCACCTTGACCAGA 1 -AGAGGCACAGTACAT 1 -ATGCCTTTTTTGTTT 1 -AATGGTGGTGATATA 1 -TTCCTACTAGCCTCA 1 -TCAGATTGTTATTTT 1 -CCTTCCCCACTCCCA 1 -AAAAAAAGGAAGACA 1 -AACTTAACTGAAAAA 1 -AGGGTATTTCACTTG 1 -TAGCCTCCAGGCCAG 1 -GGCTGGGTTTGAATT 1 -CAGCTTACAAAAGAA 1 -TGTTGGGATTACAGG 1 -CCTATACCTTCTTGA 1 -TATATCAAAGCAGCT 1 -CATGAGCTAAAAAAC 1 -GCGAACTTAGCGGGC 1 -CCTTTATGTTGTAAA 1 -TAAACTTTGTCCCGA 1 -GTGGATTGGGGAATC 1 -TCTCATGATCAAAAC 1 -TCAGGGATCAGAGCA 1 -CATTAAGGAAAGGCT 1 -TGAGGAATATCGGGA 1 -ATGTCAATAACGGGT 1 -CCCTGTGTAATTAAG 1 -TGGCCAGGCTGGTCT 2 -TGAGTTGGACCCGAT 1 -CCCAACTTCAAGTGA 1 -TTATGATATGACTAC 1 -ACAATCCCAATATGC 1 -GAGATCTCAGCTCAC 1 -CTCATGTTATTGTAC 1 -TTGGAGTACCTGAGG 1 -TTCACTTCTTTCTTT 1 -GCCAAATGAGCTTCC 1 -ACATAGACCCAGAGG 1 -GGCCCGAATGCTGTC 1 -TATAAACTCTATATT 1 -AGGCCACGGAGCGAG 1 -CAAATGGAGTAATGC 1 -GAGAAGGGAACATGA 1 -GCCTAATGTTTCAGA 1 -CGCCATGATAGCTCA 1 -TTCTCAAGGTCAAAA 1 -TTGGTGTCAAGCTAT 1 -AGCGCGAGCACAGCT 1 -CAGGCTTCACCATGT 1 -TTAAGGAAAGGCTAC 1 -ACTACTCATACACAA 1 -CAAATAAGTTAATTA 1 -TCTGATTTTCTAAAC 1 -GTATTTCACTTGGGG 1 -TTGGTTCCTGCTCCT 1 -GTCTCGATCTATGAA 1 -ATAAATTGTATTCAA 1 -ACTCTGGCCAAATGA 1 -ACACAGGTCAGAGGA 1 -GACCTCAAGCCATCC 1 -CGTCCCGGGTGCGCA 1 -CTATACCTTCTTGAG 1 -GTCTATGCATGGCAT 1 -ATACCTGAAGCTGCC 1 -CCGAAAGTAAGAGGC 1 -TCCACAGGAGAAGGG 1 -TTTTTTTTTTTTCTT 1 -ATTCCTCTATACATG 1 -AAATGGAGTAATGCA 1 -CAAAATTAAATGACG 1 -ATATATGTGTATAAA 1 -GGTTAGAAATAAGGC 1 -ACCCATGAATACATT 1 -TTTCAAAATTAAATG 1 -ACACGGCAGGCATAC 1 -ATTTCATGAGCTAAA 1 -GTGGTATCTTCCAGA 1 -TATCTATGGCGGAAG 1 -TCCAGGCCAGAAAGA 1 -TGGATGAAACCCAGA 1 -CAGCTGCGCTGGGGG 1 -GACAAGTATCAGACA 1 -CAAAGTTCATTTCTC 1 -TCAACTGCAGGGAAA 1 -ACCTGATTTTTTTCA 1 -GGTTAATTTTTGTAT 1 -TAAGTGCTGGGATTA 1 -TCTCCAGTCTAAGGG 1 -CTGGGGGAGCCAGAG 1 -TCTCTGACTTTGTAC 1 -CAAAGTCACATGGTT 1 -GGTCATCGATTTCTC 1 -ACAACCAGGCAAAGA 1 -GCCAGGTTGGGAATA 1 -CCTCTATACATGCCT 1 -GCCAGAGGAAAAGGT 1 -TCTCAGCACTCATAG 1 -TGGGACTACAGGCAC 1 -GCTGGGTTTGAATTT 1 -TATTAAATGCCTCAG 1 -GTAGAGTGCCTGGGA 1 -CCGGCTAATTTTTGT 1 -TCTGGCCAAATGAGC 1 -CTGCTGCTGCTCCCT 1 -CTGGCCAAATGAGCT 1 -GGGGAATCTAATGAA 1 -GCACCAAGTGTTTAC 1 -AACATAACCAGCAAA 1 -GGGAATCTAATGAAG 1 -ACGAGCCTACCCGTC 1 -TTCTTTTAAATACAC 1 -TAGCGCGCGTCCCGG 1 -CCACTCCACAGGAGA 1 -GCAAGTGCTGCTGCT 1 -TAAAATGACATCTAA 1 -CTATGGCAAATGGGA 1 -GGGACTTTCCCGTTT 1 -CAAATTACTCATGTT 1 -TACAAACGTCGCGTG 1 -GTCCCGGGTGCGCAC 1 -GGAAACTACTGGTTC 1 -GGGTTCAAGTGATTC 1 -TATCTTTATTAATGG 1 -CTCAAGAGATCCTCC 1 -GTACGAAAAGACCAC 1 -AGCAATTGCTCTATA 1 -CTGACTGACAAACCT 1 -AGCTAAGGCCACGGA 1 -ACAGCCAAGCATTCT 1 -CTGATTTTCTAAACT 1 -TCTCGTGATGTTTAA 1 -GCCCAACCCCCTCAT 1 -GTGTATTGCCAGGTA 1 -CGCAGAAGGCAGGCG 1 -CGCGAAAGAGCGGAA 1 -TTTTCTTAAAAAAAA 1 -GTGCCACTAATCTGA 1 -AAATGGTTGAGTTGG 1 -AATTACAGCCAAAAG 1 -CACTCTGGCCAAATG 1 -TGTTATTTTTTAAAA 1 -AAAACTTAACTGAAA 1 -AGGCCAGAAAGAGAG 1 -TTTATTTTTTTATTT 2 -GGACTTACAAACAAA 1 -CCCTGTAGGATTCTT 1 -GTACATGATCACTAA 1 -TAAAATTCTTTTAAA 1 -TGTATTGCCAGGTAC 1 -CCTGCTAAAATGTTA 1 -TTTTTTTTCTAGCAG 1 -TCATCATTACATGTT 1 -CGGAAGATAACTGTT 1 -ATCTTGGATTATCTT 1 -AATGAGCTTCCACCT 1 -GAAGAGAAGTGTTAG 1 -GGGAGGCTGTGGGGA 1 -ACTTTCAGCAGCTTA 1 -CCGCCAGCGCGACGC 1 -ACAAAAGAATGTAAG 1 -CTGATTTTTTTCATA 1 -CTACAGGCACACACC 1 -TATATGCCAGACCCC 1 -GGCCACCTTGACCAG 1 -TGGGACGAGCCTACC 1 -TCACATGGTTCACAC 1 -TCTTCAGTAAGTCAA 1 -GGATCAGAGCACAGA 1 -AGGCTTACCCGGGCG 1 -GGAAAAGGTGAGCGC 1 -TGCTGTTTCCTCCCC 1 -TTCCCGTTTTCAGTT 1 -GATTATAGGATGCTA 1 -CTCCACGCGTTCACA 1 -CTTGGTATCTTTCCC 1 -TTCTGCTTTCGATCA 1 -CCACATAGTGAGGGT 1 -AAACCGTGACTTGGT 1 -ATGTTCTTTATTTTT 1 -GGATGACGTGAGTAA 1 -CAACCAGTTACTGGC 1 -CAGTGAAACAAAAAC 1 -AAGGCGGCAAGATTT 1 -CCTCTCTGACTTTGT 1 -GCAGAAGGCAGGCGT 1 -AAGAAGAGGACAAGT 1 -AGTGGAGAAAAAAAA 1 -AGGCATACTCATCTT 1 -AGAGGTGCTAGGACA 1 -TTTATGCCCACAGTA 1 -AGTGATTCCTGGGAC 1 -GAGTCTCGTGATGTT 1 -CCCACACACAGGTCA 1 -TGAATTTGGGCTCAA 1 -GTTGTAAATAAATTG 1 -CTCCATGATGCTGCT 1 -CTCTGTACGAAAAGA 1 -CAGAAAGAGAGAGTA 1 -CATAGTGAGGGTTAT 1 -ACCAGTAATGGTGGT 1 -TTCCATTATGATCAA 1 -GAACATATTAAATGC 1 -ATAGCCCCCATCAAA 1 -TAAGAACATATTAAA 1 -TTGGGCTGTGACAAA 1 -ACGAAAAGACCACAG 1 -GACAGGCTGGGTTTG 1 -CATCTTTTTCAGTGG 1 -GTGTGGCCCCACATA 1 -CATGGGGAAGTGGGA 1 -TTTTATATGCCAGAC 1 -GCTCAAAAGCCTAAG 1 -AGTGTAGTACAAGAG 1 -TTACAGGCACGAGCC 1 -TCACTAAAAGGCAGC 1 -CCGCAGCAGACAGGC 1 -TTTTTTTTTTTTTCT 1 -ACATTATCCTTTATG 1 -AGTGTTGGGATTACA 1 -CCTCGGCCTCCCTAA 1 -AGGCTGGGTTTGAAT 1 -AAGAGAAGTGTTAGT 1 -GCGCGCGTCCCGGGT 1 -AGAATAGGCTGCTGT 1 -GATAGCTCAAAAGCC 1 -CCAGGTACTTAGAAA 1 -CACGGCGGGCCACCA 1 -TGATATGACTACTCA 1 -TTTGGGATGGAATCT 1 -ACCATGCTGTGCATC 1 -TTGGAAACAACCAGG 1 -GTGATGGGAACAAAT 1 -CCTGACTGACAAACC 1 -GGTGAGCGCGCTTAG 1 -CCATCTGATTTTCTA 1 -CCGTGACTTGGTATC 1 -TACTGGAGAAGTCCA 1 -AAATATCAGATTGTT 1 -GTGAGAGCATCACTG 1 -CAGACCCCTCTCTGA 1 -AATTTTTTAAAAGTG 1 -AACAAAACCCACACA 1 -TTATAGGATGCTAGG 1 -TTCTGTCACTGGAGA 1 -GTTCTTTATTTTTTT 1 -CTGGAGTGCACTGGC 1 -TGACTAGACCATCCA 1 -TCTGGAGGCTCTCAA 1 -GACCCCTCTCTGACT 1 -TAAATATACCTGAAG 1 -AAGAGATCCTCCCGC 1 -CTACCCGTCCCCCAC 1 -GCCACCCCGCCGCTT 1 -GGCATACTCATCTTT 1 -TCATTAAGGAAAGGC 1 -GAGGGTAGGAGAGAC 1 -CGTGACTTGGTATCT 1 -TCCCGCCTCGGCCTC 1 -TCCAAAACTTAACTG 1 -GACCAGGCTTCACCA 1 -GAAGGCAGGCGTTTT 1 -CAGAGGCAATCATCA 1 -TTTCTTAAAATCTCG 1 -TGACTTTTGGAGAGT 1 -GGGAAGTCACGGAGC 1 -ATGACTTTTGGAGAG 1 -AGGGGCAAGTAGCGC 1 -AGAAACAGTACTTTC 1 -GATTGCGCTGCATTT 1 -CCGATAAAATACAAC 1 -ACTTTTAAAATACCA 1 -AGTAGTTATCTATGG 1 -ATACGTGGCAGATGT 1 -CCACCCCGCCGCTTC 1 -AGCCCTTCCTACTAG 1 -AAAAGGCATGCGCTC 1 -CAGGCGTTTTTCTTA 1 -AGAGGAAGGACCAGA 1 -GAGGAAAATAGACCT 1 -CCAGGTTAGAGAGAG 1 -AGAGAAACCCTCCCC 1 -CTCTCAAGGACTTCT 1 -CCAGGCTGGTCTTGA 1 -CAGATTGACACTAAA 1 -GCTGTTTCCTCCCCA 1 -TTCCATTTCCACTCT 1 -CATCAGTATCTCAGC 1 -GCCTCCTGGGTTCAA 1 -TTAAACGCGTGCCCA 1 -CCACGCCCAACCCCC 1 -ATGTGACAGTGGGAT 1 -CTTGGCCTCCCAAAG 1 -CATTCCATTGCCCAG 1 -TCAGAAACCATGCTG 1 -GAGAAGTCCAGGATT 1 -TGCTGGGATTACAGG 1 -CCCCACGGCGGGCCA 1 -CAACCTCCGCCTCCT 1 -GATAGCCTCCAGGCC 1 -CTTTTAAAATACCAT 1 -GATTACAGGCGTAAG 1 -CTGGTCTTGAACTCC 1 -CCCTGACTGACAAAC 1 -CAAAGGTCTCCCCTG 1 -TTAGCATCCACAGGT 1 -GGCACACACCACCAT 1 -CATTACATGTTTCTT 1 -CCCAAAGTGTTGGGA 1 -TATGATATGACTACT 1 -CGCCAGCGCGACGCC 1 -AAATTAAATGACGCA 1 -TATTAATGGTTTTAC 1 -GTGATGTTTAAGAAG 1 -AAAATAAGATTTTTT 1 -CAGGCATACTCATCT 1 -GGTTTGAATTTGGGC 1 -GGGCTCAACCAGTTA 1 -CCAGGCTGGTCTCAA 1 -CTCCACAGGAGAAGG 1 -GACTCATTCAGGGTA 1 -TTACTCTGTCAATGT 1 -GACGGGATTTCACCA 1 -ACCATAGTAGTTATC 1 -CCACGGAGCGAGACA 1 -AGATGTCAATAACGG 1 -TGTTCCTACCCATGA 1 -CAAAACCCACACACA 1 -CAGAGGGTGCAGAGC 1 -GCTCAAGATCTCTGG 1 -TGAGATGTTCGTTCA 1 -TGATCACTAAAAGGC 1 -GGGAGCCAGAGGCCC 1 -CACAGTAAAACAAAA 1 -CCTCCCGTCGCCGTA 1 -GACCCTCCCGTCGCC 1 -GTACTTTCCAAAATG 1 -CAGAGCACAGATTCA 1 -TCTACACCCATCTGA 1 -GACCCTGTAGGATTC 1 -GAAGCAGAGCCGCAG 1 -TTCTTATAATTTTTT 1 -GTAATCTTTTCTAAG 1 -CTCCGCCTCCTGGGT 1 -CCACACACAGGTCAG 1 -GTGAGCGCGCTTAGT 1 -GCTTACCCGGGCGAC 1 -TTTGGAAATTTTCAA 1 -GAAACAAAACCTAAA 1 -CTATACGTGGCAGAT 1 -TAATTTTTTAAAAGT 1 -GGGTGAATTCAGTGT 1 -GGAGAAGTCCAGGAT 1 -CAGGTGCCACTAATC 1 -CGACGCAGTGCCAGG 1 -TTGAAAATGAAGAGA 1 -GAACATGACCCTGTA 1 -GGCTCTCAAGGACTT 1 -TATGACTTTTGGAGA 1 -TTCATAATAGCCCCC 1 -CCCCGAGATCCAGCC 1 -CAGCTGGGACCACAG 1 -TTTTCAGTGGGGGTG 1 -GTACAAGAGATAGAA 1 -TTAAAATGACATCTA 1 -TCTAAAATGGTTAGA 1 -AACCAGGCAAAGAGC 1 -CCAGAGCGGGAGGGT 1 -TATCTCAGCAGGTGC 1 -GAAACAAAAACATTT 1 -AGTGCTGCTGCTGCT 1 -TAAATATCAGATTGT 1 -GGCTTACCCGGGCGA 1 -CAAAGAGCCAAAGAG 1 -TCAATTCTCTCTCCA 1 -TGCTAAATATACCTG 1 -GGACGAGCCTACCCG 1 -AGCGGGAGGGTAGGA 1 -GTAACCATCTTGGAT 1 -CCCACTCCCAGGCCA 1 -TTGTTATTTTTTAAA 1 -AGTGCTAAATATACC 1 -GAGAGCTCTTTGCAT 1 -AGTAAGTCAACTTCA 1 -AATTAAATGACGCAA 1 -CCTTCAGAGGCAATC 1 -AGGTGCCACTAATCT 1 -CCCAGAGGTGCTAGG 1 -CAGCTCACTGCAACC 1 -GCCTAGACGAAGTCC 1 -TAATTTTTGTTATTA 1 -TGTTATTGTACTCTG 1 -CTTAACTGAAAAATT 1 -ACAACTTTCAGCAGC 1 -TCCTTGGCACACAGA 1 -AGAAGGAGGAGTACC 1 -AGAGGCCCCGCGAAA 1 -GATCCTCCCGCCTCA 1 -TACATGCCTTTTTTG 1 -CTCAAGGACTTCTGA 1 -TGATAGCTCAAAAGC 1 -AGAGGCAATCATCAT 1 -TCTATGAAAAAGACA 1 -TAAGGGAAGCAGAGC 1 -TAGTACAAGAGATAG 1 -TGGTGTCAAGCTATA 1 -CATGGTTCACACGGC 1 -CTCTGACTTTGTACC 1 -AGAGCCTTTCTCTGG 1 -ACACCACCATGCCTG 1 -CAGCGCGACGCCTCC 1 -GCTTCTAATAATAAG 1 -AAAGTCACATGGTTC 1 -CACCATGCCTGGTTA 1 -GTCCTTGCTGAAAGA 1 -CAAGATACTTTTCAA 1 -GTTTCTTAAAATCTC 1 -GAGACGGGATTTCAC 1 -TTTTTTTCATAACAT 1 -CAGGACAGTGAAACA 1 -AAGAACATATTAAAT 1 -GGTGAATTCAGTGTA 1 -TCTCTAGAAACACCC 1 -CTTTCTTTTTGGTTC 1 -CTTGGAGAAGGGAAG 1 -TGAATTCAGTGTAGT 1 -CACAACTTTCAGCAG 1 -GCCTCAGCCACCGGA 1 -TGGTCTATGCATGGC 1 -CATTATGATCAAATG 1 -TCTCGGTGCCTTAGT 1 -ACAAATGTCTAAAAT 1 -AGGGAAGGCGGCAAG 1 -GAAAGTAAGAGGCAC 1 -CAAACAACAAAACCT 1 -CCAACAAGCCACCTC 1 -TTCAATCTCGATTGC 1 -TCTGTGAGAGGCAGA 1 -TTCTTTTTTGAGAGA 1 -AGTACTGGAGAAGTC 1 -TCCCGTTTTCAGTTT 1 -TGAAAGACAAGTCTG 1 -TTTCTAAGAAGAGGA 1 -GATTTTGTGAGAGCA 1 -GCGAGCACAGCTAAG 1 -TCTGAGGTTCCTAAG 1 -AGAAGGGAACATGAC 1 -TTCTAGCAGTATCTT 1 -CTGGACTAGCCCCAC 1 -CACAGAAAGATGTCA 1 -TCCATTTTTTTTCTT 1 -CGTCACCTGTCTCCA 1 -AATTGTATTCAAATT 1 -CCAGATATATCTCTC 1 -TGGCTTTATGACTTT 1 -TGGTTCAGAAACCAT 1 -TTTCCTCCCCACGGT 1 -TTAAAATCTCGGTGC 1 -GAAACCATGCTGTGC 1 -ACCACAGGGCCCATG 1 -TAAGCTTCCAAAACT 1 -GGACAGTGAAACAAA 1 -GGGCCCATGCCGCCC 1 -ACAAATAAGTTAATT 1 -ATGTCTCGATCTATG 1 -CCACTAATCTGATCT 1 -CATCTTGGAAACAAC 1 -ATAGCAATTGCTCTA 1 -AGTAATGGTGGTGAT 1 -AGCCAAGCATTCTAC 1 -AGCCCCCATCAAATT 1 -GGACTTTCCCGTTTT 1 -GATCTATGAAAAAGA 1 -AGGGTCTCGCTCTGT 1 -CAGATTTCTAGCAGT 1 -TAAGTTAATTATTCG 1 -CTGGTTAATTTTTGT 1 -CGCAGTGCCAGGTTA 1 -CGCTTATCGACGCCC 1 -TGACGCAAAGCACAT 1 -ACCAGGCCACCTTGA 1 -AGAGAGAGTAGCGCG 1 -TTTTCAATTCTCTCT 1 -AGGTGAGGACTATGG 1 -GAAGGACCAGAGCGG 1 -TAAGAAGAGGACAAG 1 -AATGTTTCAGAAACA 1 -AAGGAGAACTTGGAG 1 -AATAGACCTTCAGAG 1 -AAGAGCCTTTCTCTG 1 -CAGATTCATCCTGCC 1 -GCAGTGCCAGGTTAG 1 -ATGCTGTGCATCAGT 1 -CGACCCTCCCGTCGC 1 -TCTGTCACTGGAGAT 1 -TTCCCAACAAGCCAC 1 -AAGTCTGAATGCTCC 1 -CTTTGTACCTAATTT 1 -AACAAGCCCTTCCTA 1 -GCCCAGCCAATCAGG 1 -CTATAAATAGTCCTC 1 -TAAGTGGATCAACCC 1 -ACAAGAGCTGGCAAA 1 -GCATCACTGTAATCT 1 -GCCCGCCAGCGCGAC 1 -GAGCTCTTTGCATCT 1 -CCCGGGCGACGCCTC 1 -TCTCTTTCTTTTTTT 1 -TCGGCCCGAATGCTG 1 -CTGGGACTACAGGCA 1 -AAACCCAGACACATA 1 -TAGGATTCTTCTTTC 1 -TCTTGGGCTGTGACA 1 -ACCACACCCGGCTAA 1 -CTAGCCAGGTTGGGA 1 -CCCTGCTCAACTGCA 1 -TTCAGTGGGGGTGAA 1 -ATTGTTATTTTTTAA 1 -AAAGATGTCAATAAC 1 -CACGCTGGAGTGCAC 1 -ACTCCTCCTTGTCTG 1 -GCAAGATTTTGTGAG 1 -GCCAGGTTAGAGAGA 1 -TTTAAAATACCATGT 1 -ACGCCCTAAACTTTG 1 -CGGATGGATGAAACC 1 -AACCTTCACTTCTTT 1 -TGGAGAAAAAAAAGG 1 -AATTCTTTTAAATAC 1 -TTGGGGCTAACTTGG 1 -GCCCAGTTTGCTCTG 1 -CAGCCAAGCATTCTA 1 -GCCGCGCCTTTGGGA 1 -CAACTACGGACCTCA 1 -GCCCAGGCTGGAGTG 1 -CGTTTTAATTAGCAT 1 -ATCATTAAGGAAAGG 1 -TTCAGAAACCATGCT 1 -CCCCGCCGCTTCCCC 1 -TCCATTCTCTGCTGG 1 -GAAAGAGCGGAAGAG 1 -TCCAGTCTAAGGGAA 1 -CCCAATTCCATTTCC 1 -ATTCAGTGTAGTACA 1 -GATTGTTTATATCAG 1 -GCGTTTTTCTTAAAA 1 -TGTGGCCCCACATAG 1 -TTATATTAAACGCGT 1 -AGTACTTTCCAAAAT 1 -CGGCCCGAATGCTGT 1 -TCAAGAGGTGGATTG 1 -GACTCACGCTGGATA 1 -TTAAAAAAAAATGCA 1 -CACAGGTCAGAGGAG 1 -TAAACGCGTGCCCAG 1 -GACATCTAACATAAC 1 -GAAGTCACGGAGCGA 1 -TATATCAGATGGGAT 1 -AGGGATCAGAGCACA 1 -TCCAGCTGCGCTGGG 1 -ATGATAGCTCAAAAG 1 -CGCAAAGCACATAAA 1 -GAGGAATATCGGGAA 1 -TTTTTTTCTTATGTT 1 -CAAAGGCCTATACCT 1 -ACTACGGACCTCAAG 1 -GAGGACTATGGCAAA 1 -AGCCAGAAAGTACTG 1 -ATGACGTGAGTAAAC 1 -TCTGCTTTCGATCAT 1 -ACCCCCTCATGTTTT 1 -AAGGAAGACATTAAG 1 -AAAACAAAGAAACAA 1 -TTTTTTAAAAGTGAC 1 -AAAGGCTACTAGCCC 1 -GGAGAAGGGAAGTCA 1 -TCAGCCACCGGAGCA 1 -GGCAGAATAGGCTGC 1 -ACTTTCCCGTTTTCA 1 -CTGCTGCTCCCTGCT 1 -GAGCTACCCAGCAGG 1 -AATAATAAGAACATA 1 -TCTCTCTCCATTCTT 1 -TGTTTCCACCCCTTC 1 -CATCTGTGAGAGGCA 1 -CTTATGTTTATGCCT 1 -GAGCTTCTAATAATA 1 -TTTTTCAGTGGGGGT 1 -ATAGACCCAGAGGTG 1 -CAGGATTATAGGATG 1 -TTTGTATCACAGCCA 1 -CATGCCTGGTTAATT 1 -CTGCCTGGAACTCTC 1 -ACGCAGAAGGCAGGC 1 -TCGCGTGCTGTTTCC 1 -AGAGTTATTTAAGCT 1 -AGAAGAGGACAAGTA 1 -TTATTTTTTGTAGAG 1 -CCTGTCTCCAAGCCA 1 -CAAAAACTTACCTCC 1 -AAATAAGTTAATTAT 1 -CACTTAACTATCTTG 1 -AATGCCTCAGGGATC 1 -GTTATTTTTTAAAAT 1 -GGTAGGAGAGACTCA 1 -ATTGCTGTAAACTAG 1 -TTTATGATATGACTA 1 -CCGAAAGGGGCAAGT 1 -TCTAATAATAAGAAC 1 -CTCATGTTTTCAAAA 1 -AAAAGGAAGACATTA 1 -TATCTGAGGTTCCTA 1 -AGCTTCCACCTTCCC 1 -TCCCCACGGTGTGGC 1 -TTCCTGCTCCTGCCT 1 -TAAACCTGAATCTTT 1 -CACCCTATCATTAAG 1 -CTAAAATGTTACTCT 1 -TAACCATCTTGGATT 1 -TTTATGACTTTTGGA 1 -CAGTGGTGAGATCTC 1 -ACGAGCCACCACACC 1 -GTAAGCTACCACGCC 1 -GGAAATTTGACTTTC 1 -CTGTCAATGTTCTCC 1 -TGCCTAATGTTTCAG 1 -CTGAAGCTGCCACAA 1 -GAGCACAGCGAGGGC 1 -CCCAAGATACTTTTC 1 -TTTTCTTTTCTCTTT 1 -ACAAAAACATTTTCT 1 -ATAGGCTGCTGTTCC 1 -ATTTTCAGTAGAGAC 1 -CACATAGCAATTCAG 1 -GCCTCCAGGCCAGAA 1 -CTGGAACTCTCTGTT 1 -ATCCATGGGGAAGTG 1 -GGAAACAACCAGGCA 1 -ACTACTGGTTCAGAA 1 -CCAGGATTATAGGAT 1 -AAAAAGGAAGACATT 1 -TGAGAGCATCACTGT 1 -GCCAAGCATTCTACA 1 -CACTGGCGCCATGAT 1 -AACTTGGAGAAGGGA 1 -CATGTTAGAGCTGTC 1 -ACTTATATTAAACGC 1 -AAGTGGGAAGAAAAT 1 -ACGCGTTCACAAACC 1 -AGAGGTGGATTGGGG 1 -AGGCTGGTCTCAACT 1 -CCACCTTCCCAACAA 1 -CCGCAAAAGCCCTGG 1 -TTCTTAGAGTCTCGT 1 -TGGGATTACAGGCGT 1 -ATTGCCTAATGTTTC 1 -GATCTACACCCATCT 1 -CGCTGCATTTTTAAG 1 -GCCCAACCAACCCAA 1 -TTGACTTCAATCTCG 1 -TCACAGCCAAGCATT 1 -TGCTGTTCCTACCCA 1 -CCTGAATCTTTGGAG 1 -AGGCAGGCGTTTTTC 1 -TGGAGGCTCTCAAGG 1 -CTTCTTCATGCCACT 1 -CACTGGAGATTGCGC 1 -TATTTCTGCTGAGGT 1 -CAGATATATCTCTCT 1 -TTCTCTCTCCATTCT 1 -AGCCTCAGATACCAA 1 -GACCTTCAGAGGCAA 1 -ATACACAACTTTCAG 1 -TTAAGTTTTACTTTT 1 -CTCAGGACAGTGAAA 1 -CCAAAAGGCATGCGC 1 -TTCCAGAAATGGTCT 1 -TGAGAGACAGGGTCT 1 -ATCCAGCCAGAAAGT 1 -GCAATTGCTCTATAC 1 -GCGCACCCCCTTCCC 1 -AGCCTACCCGTCCCC 1 -TCATGAGCTAAAAAA 1 -ACTGGCTTTATGACT 1 -AAGAGGTGGATTGGG 1 -CCTCCATTTTGAAGA 1 -GATAACTGTTTTCAA 1 -CCATGCCGCCCAGTT 1 -CTTCAATGTCGGATG 1 -ACAAAGAAACAAAAC 1 -CAAGATTTTGTGAGA 1 -AATTAAGTTTCTTAA 1 -CCTCAACAGTCTTGG 1 -TCTGTTTCCACCCCT 1 -ATGTAAGACTTACCC 1 -ACACACAGGTCAGAG 1 -TTATCATGTTAGAGC 1 -GGGCTGTGACAAAGT 1 -TTTTTTTTCTTATGT 1 -AGGACTTACAAACAA 1 -ATCTCTGGCGTCCTC 1 -ACATTAAGTTTTACT 1 -CCCTCATGTTTTCAA 1 -ATTTTTTTCATAACA 1 -TTTTTCTTAAAAAAA 1 -ATGCCCACAGTAAAA 1 -TGAAGTGTCTAGTAG 1 -TGGCGCCATGATAGC 1 -CTGACAAACCTTCAC 1 -CAGTTCATAATAGCC 1 -TGCCAGAGGAAAAGG 1 -ACTATCTTGGGCTGT 1 -AGACCCCAAGCGCCC 1 -AGGCCACCTTGACCA 1 -CGAAAGAGCGGAAGA 1 -AAACTACTGGTTCAG 1 -AAAAAAAAATGCACG 1 -CCTTCCATTTTTTTT 1 -GAAATGGTCTATGCA 1 -CCAGACCCCTCTCTG 1 -GTTAATTATTCGAAA 1 -GTCTTGAACTCCCAA 1 -CTGCTCAACTGCAGG 1 -TCAGCTTCAGGAATG 1 -TTAAATACACTTATA 1 -AAAAACAAAACCCAC 1 -TCTTTTTTGAGAGAC 1 -GGGGTGAATTCAGTG 1 -GAGGCCCCGCGAAAG 1 -GACTTTCCATTCTCT 1 -ATATACATTATCCTT 1 -GGCGGCAAGATTTTG 1 -TGTTAAGAGTGTATA 1 -GTGGTGATATATACA 1 -CTTTTTGGTTCCTGC 1 -CTTGACCAGATATAT 1 -TACTCCTCCTTGTCT 1 -GCCCACAGTAAAACA 1 -ATTCTGCTTTCGATC 1 -ATTATGATCAAATGG 1 -AAATGGGACAGAACA 1 -TAGCAGATTTCTAGC 1 -TTGTACCTAATTTCA 1 -CCAGTCTAAGGGAAG 1 -ACGCTGGAGTGCACT 1 -CGCGTGCTGTTTCCT 1 -CCTCATAATTCCTCT 1 -GATTTCTCCCAATTC 1 -ACATGTGATGGGAAC 1 -GATTTCTAGCAGTAT 1 -TCCACTCTGGCCAAA 1 -GGAGAGTTATTTAAG 1 -GACGAGCCTACCCGT 1 -TCACTTCTTTCTTTC 1 -TACTAGCCCCATCAA 1 -ACTTTGGAAATTTTC 1 -TTCAGGGTACATGAT 1 -CAGAGGAAAAGGTGA 1 -AGAACATATTAAATG 1 -TGTCACTGGAGATTG 1 -CGGGAGAGGAAGGAC 1 -AATGTTACTCTGTCA 1 -TCGACGCCCTAAACT 1 -CTTCACTTCTTTCTT 1 -CTGGAGAATCTCACG 1 -TAGAAACACCCTATC 1 -CGGGTAGTTCTTATA 1 -CTTCAATCTCGATTG 1 -GAGCGGGAGGGTAGG 1 -TACCTAATTTCATGA 1 -TAATTCCTCTATACA 1 -CAGTCCTTGCTGAAA 1 -TTTCAAAATAAGATT 1 -ATTAAACGCGTGCCC 1 -GGAAAGGCTACTAGC 1 -GAAAAGGTGAGCGCG 1 -TTTTTGAGAGACAGG 1 -CTCTTTATTTCTGCT 1 -TCCCCCAGACCCCAA 1 -AGAGGAAAAGGTGAG 1 -CACGAGCCACCGTGC 1 -GAAAGACAAGTCTGA 1 -ATGGGGAAGTGGGAA 1 -TAACACATCACTCAA 1 -TAATTTATGCCCACA 1 -ACAAGAGATAGAAAG 1 -GCGGGAGAGGAAGGA 1 -ACAGGAGAAGGGAAC 1 -GTGGGAAGAAAATAT 1 -ATGTTATTATCTGAG 1 -TACGAAAAGACCACA 1 -TGCTGCTCCCTGCTC 1 -TAGGCTTGACTTCAA 1 -ATTATAAACTCTATA 1 -TAGCTCAAAAGCCTA 1 -TAGCCAGGTTGGGAA 1 -AATATACCTGAAGCT 1 -TCCTCTATACATGCC 1 -TCATTACATGTTTCT 1 -GAGACATCTCGGCCC 1 -CCTCAGATACCAATC 1 -TTGCTCTATACGTGG 1 -ACAAGCCCTTCCTAC 1 -AGGGTACATGATCAC 1 -ATAGTCCTCAGGACA 1 -CCGCCGCTTCCCCGA 1 -TTCCATTCTCTGCTG 1 -TGCAGAGCGGGAGAG 1 -CTAAACTTTGTCCCG 1 -GGAAGTCACGGAGCG 1 -CCATTCTTCAGTAAG 1 -ATTTTTTTATTTTTT 2 -CAGTCTTGGTAACCA 1 -AGACTTACCCCACTT 1 -TTCAGTGCTAAATAT 1 -GTTAGGCTTGACTTC 1 -TATCTTGGGCTGTGA 1 -CTTCATCTGTGAGAG 1 -TGGGAATATATTGCC 1 -CACATAGACCCAGAG 1 -GTTATCTATGGCGGA 1 -AAATAAGGCTGGCAG 1 -AACATTCTGCTTTCG 1 -GACCAGAGCGGGAGG 1 -ATTGTTTAGAGCTAC 1 -CAAGGTCAAAAACTT 1 -TCCGCCTCCTGGGTT 1 -AGACTCACGCTGGAT 1 -CCGTCACCTGTCTCC 1 -CTCAACAGTCTTGGT 1 -AGTGGGGGTGAATTC 1 -CAGGAATGCCCGCCA 1 -ACTTCTTTCTTTCTT 1 -GGGGCAAGTAGCGCG 1 -GCGAGGGCCACAGAG 1 -AGGAGAGACTCACGC 1 -CATAGCAATTCAGGA 1 -ATGGTTGAGTTGGAC 1 -GCTGTAAACTAGCCA 1 -TAATAGCCCCCATCA 1 -ATTCTTTTAAATACA 1 -GATGGGAACAAATAA 1 -CACATTAATATTGCC 1 -CCCAATATGCAGATT 1 -TGTTAGTGCTACTAA 1 -GTTTTACAGCTCCCC 1 -CCTTGACCAGATATA 1 -CGCTGGAGTGCACTG 1 -GCCGCCCAGTTTGCT 1 -ACTAGCCTCAGATAC 1 -GATTCCTGGGACTAC 1 -ACTTATATTCAGGGT 1 -CTAGCCCCACGGCGG 1 -CCTGGACTAGCCCCA 1 -GGGGAGAAGGAGGAG 1 -GCCTACCCGTCCCCC 1 -CCAGACACATAGCAA 1 -TGAAAAATTAATTTA 1 -AGACGAAGTCCACAG 1 -TTTTGTAGAGACCAG 1 -CTTAGAAAGTGCTCA 1 -ATGGCATGTATTACT 1 -AGATTGACACTAAAT 1 -CGATTGCTGCCATTT 1 -TTTCACCATGTTGGC 1 -ATAATTTTTTAAAAG 1 -ATGCCAGACCCCTCT 1 -TAGCCCCACGGCGGG 1 -CTACTCCTCCTTGTC 1 -GTCAAGCTATATCAG 1 -TGGACCCGATAAAAT 1 -TGCCCACGCTGGAGT 1 -TGGGATGGAATCTCA 1 -CTTCTGTCACTGGAG 1 -CAGCCACCGGAGCAG 1 -GAGTGCCTGGGACAT 1 -GCTACTCCTCCTTGT 1 -TCCCGCCTCAGCCAC 1 -CCTCCCCCAGACCCC 1 -GGCATGACTAGACCA 1 -GGAGTGCACTGGCGC 1 -CTCACGCAGAAGGCA 1 -CGCTCTGTTGCCCAC 1 -GGTGTGGCCCCACAT 1 -AAGGCCCGCAGGGAC 1 -AGGCTGTGGGGAGAA 1 -CCCGACCCTCCCGTC 1 -CATGATCACTAAAAG 1 -AGAGCTCTTTGCATC 1 -TATAATTTTTTAAAA 1 -TACCCAGCAGGAACA 1 -TCCATTATGATCAAA 1 -GAAAATATTATAAAC 1 -GAGCCTTTCTCTGGA 1 -TCAGTGCTAAATATA 1 -ATTTTGTGAGAGCAT 1 -CCTGCTCAACTGCAG 1 -CGCCCAACCCCCTCA 1 -CCCGCAGGGACCGTC 1 -ATGTGATGGGAACAA 1 -TTTTTATTTTTTGGG 1 -ACATGACCCTGTAGG 1 -ACTTACCTCCATGAT 1 -CGCGAGCACAGCTAA 1 -CTTCCACCTTCCCAA 1 -AGTACAAGAGATAGA 1 -TTGAAGAATAAACCG 1 -ATTGCCCAGGCTGGA 1 -TTCTAAACTGTATGA 1 -TTAAGAGCCTTTCTC 1 -TTGTTTATATCAGAT 1 -GGAGAGACTCACGCT 1 -CAGGGATCAGAGCAC 1 -TGAAACCCAGACACA 1 -AGTGCCAGGTTAGAG 1 -GCCAGGCTGGTCTCA 1 -CGCCCCTCCACGCGT 1 -CTAAGGGAAGCAGAG 1 -ACTACAGGCACACAC 1 -ATGAAAAAGACAGTG 1 -ATGCTAGGACAGCAG 1 -TATTTTTTTATTTTT 2 -AGAAAGATGTCAATA 1 -CGATAAAATACAACA 1 -TCCTGCCTTGATCTA 1 -AGAGCACAGATTCAT 1 -TAACTTGGTGTCAAG 1 -TCAGAGGCAATCATC 1 -AAAGCTAAGAGAGCT 1 -CGTGCTGTTTCCTCC 1 -AGCCAAAGAGGAAGC 1 -TCTCACGCAGAAGGC 1 -TCAGCAGGTGCCACT 1 -TCAAGAGATCCTCCC 1 -AGGCATGACTAGACC 1 -CTACTAGCCTCAGAT 1 -TGCCCAGGCTGGAGT 1 -TTGGCCTCCCAAAGT 1 -TGTCCCGACCCTCCC 1 -GGAGAAGGAGGAGTA 1 -CCCACTTAACTATCT 1 -TTCTAGCAGATTTCT 1 -ACAAGGCCCGCAGGG 1 -TGTTACTCTGTCAAT 1 -AAGGAGGAGTACCAG 1 -AGGAATGCCCGCCAG 1 -GACCCCAAGCGCCCC 1 -ACCGTCACCTGTCTC 1 -TGCTCAACTGCAGGG 1 -AGGAGGAGTACCAGG 1 -TAGAGACCAGGCTTC 1 -ACCCCTTCCATTTTT 1 -GCCTGGAACTCTCTG 1 -CTATCATTAAGGAAA 1 -GTGTTGGGATTACAG 1 -TATTGTACTCTGTTT 1 -CGCGACGCCTCCACT 1 -GTTGAGTTGGACCCG 1 -CCACAAAAGCTAGAG 1 -CTTTCCAAAATGAGA 1 -GCACCCCCTTCCCCA 1 -TGGAGTAATGCATGT 1 -TCTTCATCTGTGAGA 1 -GGAAGATAACTGTTT 1 -GACCTGATTTTTTTC 1 -TTGAGAGACAGGGTC 1 -ACTTCAATCTCGATT 1 -ACTGCAGGGAAACTA 1 -TAAACTCTATATTTT 1 -TATTAAACGCGTGCC 1 -GTTAGAGAGAGGGAC 1 -GAAAAATTAATTTAT 1 -AGATACCAATCCAGC 1 -CAACAGTCTTGGTAA 1 -AAAAATTAATTTATG 1 -AAAAAACAAAGAAAC 1 -CCATAGTAGTTATCT 1 -GAGCGGAAGAGAAAC 1 -TCTCTGGAGGCTCTC 1 -AGAAGTCCAGGATTA 1 -GTTTTTTTTCTAGCA 1 -CTTTATGACTTTTGG 1 -GATCAAAACATTCTG 1 -AAACTCTATATTTTT 1 -CCCCTTCCATTTTTT 1 -ACTATGGCAAATGGG 1 -TATACATGCCTTTTT 1 -ATAATAGCCCCCATC 1 -TAATCTGATCTTTAA 1 -GTTCACAAACCTCAG 1 -TTATTATTTTTTGTA 1 -CAGATTGTTTATATC 1 -AGAGGGACTTTCCCG 1 -GAGGACAAGTATCAG 1 -ACTGGTTCAGAAACC 1 -TAAAATACCATGTAC 1 -TCTCCCAATTCCATT 1 -TCAAGCCATCCTCCC 1 -CCCTCCACGCGTTCA 1 -ACCCTCCCGTCGCCG 1 -TCTTATGTTTATGCC 1 -ATGAAGTGTCTAGTA 1 -AGAAAGTACTGGAGA 1 -GAGCCAAAGAGGAAG 1 -TTAGTGTATTGCCAG 1 -CATACACAACTTTCA 1 -CCGTCCCCCACTCCA 1 -CTTATCGACGCCCTA 1 -TTTCAATTCTCTCTC 1 -AACTACTGGTTCAGA 1 -ACCTGTCTCCAAGCC 1 -CTACTGGTTCAGAAA 1 -TGTAAACTAGCCAGG 1 -GTGCTAGGACATGCG 1 -GACTTGGTATCTTTC 1 -TAGCCCCATCAAGAG 1 -TAAGAGCCTTTCTCT 1 -TTTCCACTCTGGCCA 1 -TCCCTGCTAAAATGT 1 -GGAAAATAGACCTTC 1 -GCCATACACCTTCTT 1 -GTCTCGTGATGTTTA 1 -GATCCAGCCCTGGAC 1 -CTTAAATGGTTGAGT 1 -GTGAGTAAACCTGAA 1 -ATACCAATCCAGCCA 1 -CAAGAGGTGGATTGG 1 -GTGGCAGATGTTATT 1 -CGCTCCCGCAAAAGC 1 -GTGCTGCTGCTGCTC 1 -CCGCCGAAAGGGGCA 1 -ACCTTCAGAGGCAAT 1 -CTGTTTCCACCCCTT 1 -CAAGTCTGAATGCTC 1 -AACCCAAGGTATGTT 2 -GAGCCTACCCGTCCC 1 -ATTACATGTTTCTTG 1 -TGATCTACACCCATC 1 -CCCGCCTCGGCCTCC 1 -TGTGCATCAGTATCT 1 -GCTAAAAAACAAAGA 1 -CTATGCATGGCATGT 1 -CTTCTTTCTTTCTTT 1 -CCTAAATGCATACAA 1 -TCATTGAAAATGAAG 1 -CCCGCCGAAAGGGGC 1 -TCCCAATTCCATTTC 1 -TGTTATTATTTTTTG 1 -TATACGTGGCAGATG 1 -GCCTGCCTTGGCCTC 1 -AGTGGATCAACCCAA 1 -CTTTCTCTGGAGGCT 1 -AGACAGGGTCTCGCT 1 -GCGACGCAGTGCCAG 1 -CATAACATTAAAAGC 1 -TTGTCCCGACCCTCC 1 -AATGAAGAGAAGTGT 1 -GGCCACCAAGGAGAA 1 -TAAGCTCAAGAGATC 1 -CTCCAGTCTAAGGGA 1 -AGACCTGATTTTTTT 1 -CCGGGTGCGCACCCC 1 -TTAGCGGGCGCCTAG 1 -TGGAGAAGTCCAGGA 1 -CTTCACCATGTTGGC 1 -GACAGCAGGACTTAC 1 diff --git a/setup.py b/setup.py deleted file mode 100644 index 36d0c97..0000000 --- a/setup.py +++ /dev/null @@ -1,22 +0,0 @@ -#! /usr/bin/python - -from setuptools import setup - -def readme() : - with open('README.md') as f: - return f.read() - -setup(name='BreaKmer', - version='0.0.7', - description='Structural variation detection tool, designed for targeted sequencing data.', - long_description=readme(), - url='https://github.com/a-bioinformatician/BreaKmer', - author='Ryan Abo', - author_email='ryan.abo@gmail.com', - license='LICENSE', - py_modules=['BreaKmer'], - install_requires=[ - 'pysam >= 0.6', - 'biopython >= 1.62' - ] - )