-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbinary_file2.h
90 lines (83 loc) · 2.47 KB
/
binary_file2.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#ifndef _binary_file2_h
#define _binary_file2_h
#include <cmath>
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <istream>
#include <vector>
#include <assert.h>
using namespace std;
unsigned char* read2char(string read, int max_read_length);
char* binary_filename;
string convert(unsigned char *kmers){
int size = (int)kmers[0];
string result = "";
for(int i = 0; i < size; ++i){
int char_index = i / 4 + 1;
int char_shift = 3 - i % 4;
int DNA_val = (kmers[char_index] >> (char_shift * 2)) & 0x3;
result.push_back(DNAVALS[DNA_val]);
}
return result;
}
void set_binary_file(const char *bin_name){
binary_filename = (char*) bin_name;
}
void make_binary_file(const char *filename, int max_read_length, bool fastq){
ifstream infile(filename);
ofstream outfile;
outfile.open(binary_filename, ios::out | ios::binary);
if(fastq)
infile.ignore(1000,'\n');
int number_chars = (int) ceil(max_read_length/4.0);
while(!infile.eof()){
bool skip = false;
string read;
infile >> read;
int size = read.size();
if(fastq)
for(int i=0;i<4;i++)
infile.ignore(1000,'\n');
for(int i=0;i<size;i++)
if(read[i] != 'A' && read[i] != 'C' && read[i] != 'G' && read[i] != 'T' && read[i] != 'a' && read[i] != 'c' && read[i] != 'g' && read[i] != 't')
skip = true;
if(!skip){
unsigned char* data = read2char(read, number_chars);
outfile.write((char*)data, number_chars + 1);
delete data;
}
}
infile.close();
outfile.close();
}
unsigned char* read2char(string read, int number_chars){
unsigned char* return_char = new unsigned char[number_chars + 1];
return_char[0] = read.size();
unsigned char* read_nucleotides = new unsigned char[number_chars*4];
for(int i=0;i<read.size();++i){
read_nucleotides[i] = read[i];
}
for(int i=read.size();i<number_chars*4;++i){
read_nucleotides[i] = 'A';
}
for(int i=0;i<number_chars;++i){
int char_value = 0;
for(int j=0;j<4;++j){
char_value += (mapper::DNAmap.at(read_nucleotides[4*i+j]) << 2*(3-j));
}
return_char[i + 1] = (unsigned char)(char_value);
}
return return_char;
}
unsigned char* find_read(int read, int max_read_length){
int number_chars =(int) ceil(max_read_length/4.0);
char * memblock = new char[number_chars + 1];
ifstream infile(binary_filename, ios::out | ios::binary);
infile.seekg((number_chars + 1) * read);
infile.read(memblock, (streampos) (number_chars + 1));
infile.close();
return (unsigned char*)memblock;
}
#endif