-
Notifications
You must be signed in to change notification settings - Fork 0
/
multidef.h
131 lines (108 loc) · 4.02 KB
/
multidef.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
Copyright (c) 2003 by Stefan Kurtz and The Institute for
Genomic Research. This is OSI Certified Open Source Software.
Please see the file LICENSE for licensing information and
the file ACKNOWLEDGEMENTS for names of contributors to the
code base.
*/
//\Ignore{
#ifndef MULTIDEF_H
#define MULTIDEF_H
#include <cstdio>
#include <cstdlib>
#include "arraydef.h"
//}
/*
This file defines the datatype \texttt{Multiseq} which stores information
about \(k\)-sequences \(T_{0}\), \(\ldots\), \(T_{k-1}\):
\begin{enumerate}
\item
For each \(i\in[0,k-1]\), \texttt{startdesc[i]} stores the index in
\texttt{descspace.spaceUchar}, where a textual description for sequence
\(T_{i}\) starts. A description for sequence \(T_{i}\)
ends with a newline character at index \texttt{startdesc[i+1]-1}.
The description can e.g.\ be the text following the symbol
\texttt{>} in a fasta formatted file.
\item
For each \(i\in[0,k-2]\), \texttt{markpos[i]} is the position of a
\emph{separator character} between sequence \(T_{i}\) and \(T_{i+1}\).
\item
Let \(i\in[0,k-1]\).
If \(i=0\), then \(T_{i}\) is stored in the component \texttt{sequence}
from index \(0\) to index \(\Size{T_{i}}-1\).
If \(i>0\), then \(T_{i}\) is stored in the component \texttt{sequence}
from index \(\texttt{markpos[i-1]+1}\) to index
\(\texttt{markpos[i-1]}+1+\Size{T_{i}}\).
\item
\texttt{numofsequences} is the number \(k\) of sequences stored.
\item
\texttt{totallength} is the total length of the stored sequences
including the \(k-1\) separator characters.
\end{enumerate}
*/
/*
The following defines the separator symbol for fasta files.
*/
#define FASTASEPARATOR '>'
/*
For a given multiseq and sequence number, the following macros deliver
a pointer to the first character of the description, and the
length of the description.
*/
#define DESCRIPTIONSTARTDESC(MS,SN)\
((MS)->startdesc[SN])
#define DESCRIPTIONPTR(MS,SN)\
((MS)->descspace.spaceUchar + DESCRIPTIONSTARTDESC(MS,SN))
#define DESCRIPTIONLENGTH(MS,SN)\
(DESCRIPTIONSTARTDESC(MS,(SN)+1) - DESCRIPTIONSTARTDESC(MS,SN))
/*
The following macros specifies a default initialization of a structure
of type \texttt{Showdescinfo}.
*/
#define ASSIGNDEFAULTSHOWDESC(DESC)\
(DESC)->defined = True;\
(DESC)->skipprefix = 0;\
(DESC)->maxlength = 0;\
(DESC)->replaceblanks = False;\
(DESC)->untilfirstblank = False
/*
The following defines the undefined file separator position
*/
#define UNDEFFILESEP 0
struct Multiseq
{
ArrayPosition markpos;
Uint *startdesc, // of length numofsequences + 1
numofsequences, // the number of sequences
totallength; // the total length of all sequences
ArrayCharacters descspace; // the space for the descriptions
Uchar *sequence, // the concatenated sequences
*rcsequence, // NULL or points to
// reverse complemented sequences
*originalsequence; // NULL or points to orig. sequence
}; // \Typedef{Multiseq}
/*
The following type describes how to format a sequence description.
*/
struct Showdescinfo
{
bool defined, // show a description
replaceblanks, // replaceblanks by underscore
untilfirstblank; // only show sequence until first blank
Uint skipprefix, // always skip this number of prefixes
maxlength; // maximal number of chars of description to be shown
};
/*
The following type is used to store some basic information about
a sequence stored in a \texttt{Multiseq}-record.
*/
struct Seqinfo
{
Uint seqnum, // the sequence number in multiseq
seqstartpos, // the position of the first character in multiseq.sequence
seqlength, // the length of the sequence
relposition; // the relative position of the sequence
}; // \Typedef{Seqinfo}
//\Ignore{
#endif
//}