-
Notifications
You must be signed in to change notification settings - Fork 51
/
makeEcModel.m
190 lines (173 loc) · 7.75 KB
/
makeEcModel.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
function model = makeEcModel(model,geckoLight)
% makeEcModel
% Expands a conventional genome-scale model (in RAVEN format) with enzyme
% information and prepares the reactions for integration of enzyme usage
% coefficients. This function contains all the steps that need to be done
% to get a basic ec-model, without incorporating any kcat values or
% constraints yet. This function should only have to be run once for a
% model.
%
% Input:
% model a model in RAVEN format
% geckoLight true if a simplified GECKO light model should be generated.
% Optional, default is false.
%
% Ouput:
% model a model with a model.ec structure where enzyme and kcat
% information are stored. Protein pseudometabolites and their
% draw reactions are added to the model, but their usage is
% not yet implemented (due to absent kcat values at this
% stage).
%
% The function goes through the following steps:
% 1. Remove gene associations from pseudoreactions.
% 2. Invert irreversible backwards reactions.
% 3. Correct 'rev' vector to match lb and ub vectors.
% 4. Convert to irreversible model (splits reversible reactions).
% 5. [Skipped with geckoLight:] Expand model to split reactions with
% 'OR' in grRules (each reaction is then catalyzed by one enzyme
% (complex).
% 6. Sort identifiers (so that split reactions remain close to each
% other, not real function, just makes it tidier.
% 7. Make empty model.ec structure, that will contain enzyme and kcat
% information. One entry per reaction, where isoenzymes have multiple
% entries [not in geckoLight]. This model.ec structure will later be
% populated with kcat values.
% 8. Add enzyme information fields to model.ec structure: MW, sequence.
% 9. Populate model.ec structure (from step 8) with information from
% each reaction.
% 10. [Skipped with geckoLight:] Add proteins as pseudometabolites.
% 11. Add prot_pool pseudometabolite.
% 12. [Skipped with geckoLight:] Add draw reactions for the protein
% pseudometabolites.
% 13. Add protein pool reaction, without upper bound.
%
% Note that while protein pseudometabolites, draw & pool reactions might
% be added to the model, the enzyme usage is not yet incorporated in each
% metabolic reaction, so enzymes will not be used. applyKcatConstraints
% incorporates protein pseudometabolites in reactions as enzyme usages by
% applying the specified kcats as constraints.
if nargin<2
geckoLight=false;
elseif ~islogical(geckoLight)
error('geckoLight should be either true or false')
end
if geckoLight
ec.geckoLight=true;
else
ec.geckoLight=false;
end
[geckoPath, prevDir] = findGECKOroot();
uniprotDB = loadDatabases;
%1: Remove gene rules from pseudoreactions (if any):
for i = 1:length(model.rxns)
if endsWith(model.rxnNames{i},' pseudoreaction')
model.grRules{i} = '';
model.rxnGeneMat(i,:) = zeros(1,length(model.genes));
end
end
%2: Swap direction of reactions that are defined to only carry negative flux
to_swap=model.lb < 0 & model.ub == 0;
model.S(:,to_swap)=-model.S(:,to_swap);
model.ub(to_swap)=-model.lb(to_swap);
model.lb(to_swap)=0;
%Delete blocked rxns (LB = UB = 0). Best not to do, as you cannot unblock
%these reactions later once removed. Better to do this once you run
%analysis.
% to_remove = logical((model.lb == 0).*(model.ub == 0));
% model = removeReactions(model,model.rxns(to_remove),true,true,true);
%3: Correct rev vector: true if LB < 0 & UB > 0, or it is an exchange reaction:
model.rev = false(size(model.rxns));
for i = 1:length(model.rxns)
if (model.lb(i) < 0 && model.ub(i) > 0) || sum(model.S(:,i) ~= 0) == 1
model.rev(i) = true;
end
end
%4: Make irreversible model (appends _REV to reaction IDs to indicate reverse
%reactions)
model=convertToIrrev(model);
%5: Expand model, to separate isoenzymes (appends _EXP_* to reaction IDs to
%indicate duplication)
if ~geckoLight
model=expandModel(model);
end
%6: Sort reactions, so that reversible and isoenzymic reactions are kept near
model=sortIdentifiers(model);
%7: Make ec-extension structure, one for gene-associated reaction.
rxnWithGene = find(sum(model.rxnGeneMat,2));
ec.rxns = model.rxns(rxnWithGene);
emptyCell = cell(numel(rxnWithGene),1);
emptyVect = zeros(numel(rxnWithGene),1);
if isfield(model,'eccodes')
ec.eccodes = model.eccodes(rxnWithGene);
else
ec.eccodes = emptyCell;
%TODO: loading of external ec-code database, possibly from uniprot, which
%should directly be parsed to the model, so that the eccodeDB entries match
%model.rxns. This is problematic, because Uniprot can contain multiple
%eccodes, while eccodes might not exactly match the reactions that are
%catalyzed. Only needed for classic GECKO matching, might just copy how it
%was dealt with there
end
ec.rxnEnzMat = zeros(numel(rxnWithGene),numel(model.genes)); % Non-zeros will indicate the number of subunits
ec.kcat = emptyVect;
ec.source = emptyCell; % Strings, like 'dlkcat', 'manual', 'brenda', etc.
ec.notes = emptyCell; % Additional comments
%8: Gather enzyme information via UniprotDB
[Lia,Locb] = ismember(model.genes,uniprotDB.genes);
ec.genes = model.genes(Lia); %Will often be duplicate of model.genes, but is done here to prevent issues when it is not.
ec.enzymes = uniprotDB.ID(Locb);
ec.mw = uniprotDB.MW(Locb);
ec.sequence = uniprotDB.seq(Locb);
%Additional info
ec.concs = nan(numel(ec.genes),1); % To be filled with proteomics data when available
%TODO: load Uniprot IDs from model annotation instead of from uniprotDB?
%To offer a choice, should then still be matched to a uniprotDB to obtain
%mw and sequence.
% if isfield(model,'geneMiriams')
% uniprotDB = extractMiriam(model.geneMiriams,'uniprot');
% end
%9: Only parse rxns associated to genes
for r=1:numel(rxnWithGene)
ec.rxns(r) = model.rxns(rxnWithGene(r));
rxnGenes = model.genes(find(model.rxnGeneMat(rxnWithGene(r),:)));
[~,locEnz] = ismember(rxnGenes,ec.genes); % Could also parse directly from rxnGeneMat, but some genes might be missing from Uniprot DB
ec.rxnEnzMat(r,locEnz) = 1; %Assume 1 copy per subunit or enzyme, can be modified later
end
%10: Add proteins as pseudometabolites
if ~geckoLight
[proteinMets.mets, uniprotSortId] = sort(ec.enzymes);
proteinMets.mets = strcat('prot_',proteinMets.mets);
proteinMets.metNames = proteinMets.mets;
proteinMets.compartments = 'c';
proteinMets.metMiriams = repmat({struct('name',{{'sbo'}},'value',{{'SBO:0000252'}})},numel(proteinMets.mets),1);
proteinMets.metNotes = repmat({'Enzyme-usage pseudometabolite'},numel(proteinMets.mets),1);
model = addMets(model,proteinMets);
end
%11: Add protein pool pseudometabolite
pool.mets = 'prot_pool';
pool.metNames = pool.mets;
pool.compartments = 'c';
pool.metNotes = 'Enzyme-usage protein pool';
model = addMets(model,pool);
%12: Add protein draw reactions.
if ~geckoLight
drawRxns.rxns = strcat('draw_',proteinMets.mets);
drawRxns.mets = cell(numel(drawRxns.rxns),1);
drawRxns.stoichCoeffs = cell(numel(drawRxns.rxns),1);
for i=1:numel(drawRxns.mets)
drawRxns.mets{i} = {'prot_pool',proteinMets.mets{i}};
drawRxns.stoichCoeffs{i} = [1,-(ec.mw(uniprotSortId(i)))/1000];
end
drawRxns.lb = zeros(numel(drawRxns.rxns),1);
drawRxns.grRules = ec.genes(uniprotSortId);
model = addRxns(model,drawRxns);
end
%13: Add protein pool reaction (with open UB)
poolRxn.rxns = 'prot_pool_exchange';
poolRxn.mets = {'prot_pool'};
poolRxn.stoichCoeffs = {-1};
poolRxn.lb = 0;
model = addRxns(model,poolRxn);
model.ec=ec;
end