-
Notifications
You must be signed in to change notification settings - Fork 287
/
ParseTools.cpp
175 lines (155 loc) · 3.89 KB
/
ParseTools.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#include "ParseTools.h"
#include <climits>
#include <cctype>
#include <cstring>
#include <cstdio>
#include <cstdlib>
#include <sstream>
//This functions recognizes only numbers with digits, plus sign, minus sign, decimal point, e, or E. Hexadecimal and pointers not currently supported.
bool isNumeric(const string &str) {
bool hasDigits = false;
for (int i=0; i < (int)str.size(); i++) {
char currChar = str[i];
if (!(isdigit(currChar) || currChar == '-' || currChar == '.' || currChar == '+' || currChar == 'e' || currChar == 'E')) {
return false;
}
hasDigits |= isdigit(currChar);
}
return hasDigits;
}
//As above, but does not allow decimal points
bool isInteger(const string &str) {
bool hasDigits = false;
for (int i=0; i < (int)str.size(); i++) {
char currChar = str[i];
if (!(isdigit(currChar) || currChar == '-' || currChar == '+' || currChar == 'e' || currChar == 'E')) {
return false;
}
hasDigits |= isdigit(currChar);
}
return hasDigits;
}
CHRPOS str2chrPos(const string &str) {
return str2chrPos(str.c_str(), str.size());
}
CHRPOS str2chrPos(const char * __restrict str, size_t ulen) {
if (ulen == 0) {
ulen = strlen(str);
}
const char* endpos = str;
long long result = 0;
bool neg = false;
char last = 0;
if(*endpos == '-') neg = true, endpos ++;
for(;(last = *endpos); endpos ++) {
if(last < '0' || last > '9') break;
result = result * 10 + last - '0';
}
if(last) {
if(*endpos == 'e' || *endpos == 'E') {
char* endpos = NULL;
CHRPOS ret = (CHRPOS)strtod(str, &endpos);
if(endpos && *endpos == 0) {
return ret;
}
}
fprintf(stderr, "***** ERROR: illegal number \"%s\". Exiting...\n", str);
exit(1);
}
return neg?-result:result;
}
string vectorIntToStr(const vector<int> &vec) {
string str;
str.reserve(vec.size());
for (int i=0; i < (int)vec.size(); i++) {
str += (char)(vec[i]);
}
return str;
}
#if defined(__i386__) || defined(__x86_64__)
bool isHeaderLine(const string &line) {
if (line[0] == '>') {
return true;
}
if (line[0] == '!') {
return true;
}
if (line[0] == '#') {
return true;
}
if(line.length() > 4) {
uint32_t peek = *(uint32_t*)line.c_str() | 0x20202020u;
const char* full_text = NULL;
bool require_space = false;
bool require_digit = false;
switch(peek) {
case 0x6f726863:
full_text = "chrom";
require_space = true;
require_digit = false;
break;
case 0x20726863:
case 0x09726863:
full_text = "chr";
require_space = require_digit = true;
break;
case 0x776f7262:
full_text = "browser";
break;
case 0x63617274:
full_text = "track";
break;
case 0x69736976:
full_text = "visibility";
break;
default:
return false;
}
if(full_text) {
const char* ptr = NULL;
for(ptr = line.c_str(); *ptr && *full_text; ptr ++, full_text ++) {
char c = *ptr;
if(c >= 'A' && c <= 'Z') c += 32;
if(c != *full_text) return false;
}
if(require_space && !isspace(*(ptr++))) return false;
if(require_digit && !isdigit(*(ptr++))) return false;
return true;
}
}
return false;
}
#else
bool isHeaderLine(const string &line) {
if (line[0] == '>') {
return true;
}
if (line[0] == '!') {
return true;
}
if (line[0] == '#') {
return true;
}
string tmp = line;
transform(tmp.begin(), tmp.end(), tmp.begin(), ::tolower);
//allow chr chrom to start a header line
if (memcmp(tmp.c_str(), "chrom", 5) == 0 && isspace(tmp[5]) && ! isdigit(tmp[6])) {
return true;
}
//allow chr chrom to start a header line
if (memcmp(tmp.c_str(), "chr", 3) == 0 && isspace(tmp[3]) && ! isdigit(tmp[4])) {
return true;
}
//UCSC file headers can also start with the words "browser" or "track", followed by a whitespace character.
if (memcmp(tmp.c_str(), "browser", 7) == 0) {
return true;
}
if (memcmp(tmp.c_str(), "track", 5) == 0) {
return true;
}
if (memcmp(tmp.c_str(), "visibility", 10) == 0) {
return true;
}
return false;
}
#endif