Skip to content

Commit abba9da

Browse files
committed
lib: the command 'build' supports additional network files, #58
1 parent e0a363f commit abba9da

File tree

6 files changed

+179
-47
lines changed

6 files changed

+179
-47
lines changed

bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/AdminCliOptionsParser.java

+3
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ public class BuildCommandOptions {
109109
@Parameter(names = {"-o", "--output"}, description = "Output directory where to save the CSV files to import", required = true, arity = 1)
110110
public String output;
111111

112+
@Parameter(names = {"--add-network-file"}, description = "JSON file containing a BioNetDB network", arity = 1)
113+
public List<String> networkFiles;
114+
112115
@Parameter(names = {"--exclude"}, description = "Exclude information separated by comma, e.g.:'XREF_DBNAME:Reactome Database ID Release 63'", arity = 1)
113116
public List<String> exclude;
114117
}

bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/executors/BuildCommandExecutor.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package org.opencb.bionetdb.app.cli.admin.executors;
22

3+
import com.fasterxml.jackson.annotation.JsonInclude;
4+
import com.fasterxml.jackson.databind.MapperFeature;
35
import com.fasterxml.jackson.databind.ObjectMapper;
46
import com.fasterxml.jackson.databind.SerializationFeature;
7+
import htsjdk.samtools.util.StringUtil;
58
import org.apache.commons.collections.CollectionUtils;
69
import org.apache.commons.lang3.StringUtils;
710
import org.opencb.bionetdb.app.cli.CommandExecutor;
@@ -11,12 +14,15 @@
1114
import org.opencb.bionetdb.core.io.SbmlParser;
1215
import org.opencb.bionetdb.core.io.SifParser;
1316
import org.opencb.bionetdb.core.models.network.Network;
17+
import org.opencb.bionetdb.core.models.network.Node;
18+
import org.opencb.bionetdb.core.models.network.Relation;
1419
import org.opencb.bionetdb.lib.BioNetDbManager;
1520
import org.opencb.bionetdb.lib.utils.Builder;
1621
import org.opencb.commons.utils.FileUtils;
1722
import org.opencb.commons.utils.ListUtils;
1823

1924
import java.io.BufferedWriter;
25+
import java.io.File;
2026
import java.io.FileWriter;
2127
import java.io.IOException;
2228
import java.nio.file.Path;
@@ -47,7 +53,7 @@ public void execute() {
4753
FileUtils.checkDirectory(outputPath);
4854

4955
BioNetDbManager manager = new BioNetDbManager(configuration);
50-
manager.build(inputPath, outputPath, buildCommandOptions.exclude);
56+
manager.build(inputPath, outputPath, buildCommandOptions.networkFiles, buildCommandOptions.exclude);
5157
} catch (IOException | BioNetDBException e) {
5258
e.printStackTrace();
5359
}

bionetdb-app/src/test/java/org/opencb/bionetdb/app/BioNetDBMainTest.java

+63-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11
package org.opencb.bionetdb.app;
22

3+
import com.fasterxml.jackson.annotation.JsonInclude;
4+
import com.fasterxml.jackson.databind.MapperFeature;
5+
import com.fasterxml.jackson.databind.ObjectMapper;
36
import org.junit.Test;
7+
import org.opencb.bionetdb.core.models.network.Network;
8+
import org.opencb.bionetdb.core.models.network.Node;
9+
import org.opencb.bionetdb.core.models.network.Relation;
410

5-
import static org.junit.Assert.*;
11+
import java.io.File;
12+
import java.io.IOException;
13+
import java.util.ArrayList;
614

715
public class BioNetDBMainTest {
816

@@ -12,4 +20,58 @@ public void createCsvClinicalAnalysis() {
1220
String cmdLine = "~/appl/bionetdb/build/bin/bionetdb.sh create-csv -i " + caPath + "/input/ -o csv/ --clinical-analysis";
1321
}
1422

23+
private void createNetworks() {
24+
long uid = 0;
25+
26+
ObjectMapper mapper = new ObjectMapper();
27+
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
28+
mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
29+
30+
Network network;
31+
Node node1, node2, node3;
32+
Relation relation1, relation2, relation3;
33+
34+
network = new Network("net1", "net1", "Network #1");
35+
network.setNodes(new ArrayList<>());
36+
network.setRelations(new ArrayList<>());
37+
38+
node1 = new Node(uid++, "ENSG00000078808", "SDF4", Node.Type.GENE);
39+
network.getNodes().add(node1);
40+
node2 = new Node(uid++, null, "COCA", Node.Type.DRUG);
41+
network.getNodes().add(node2);
42+
relation1 = new Relation(uid++, "rel1", node1.getUid(), Node.Type.GENE, node2.getUid(), Node.Type.DRUG,
43+
Relation.Type.GENE__DRUG);
44+
network.getRelations().add(relation1);
45+
46+
try {
47+
mapper.writer().writeValue(new File("/tmp/network1.json"), network);
48+
} catch (IOException e) {
49+
e.printStackTrace();
50+
}
51+
52+
53+
network = new Network("net2", "net2", "Network #2");
54+
network.setNodes(new ArrayList<>());
55+
network.setRelations(new ArrayList<>());
56+
57+
node1 = new Node(uid++, "ENSG00000066666", "SDF666", Node.Type.GENE);
58+
network.getNodes().add(node1);
59+
node2 = new Node(uid++, null, "COCA", Node.Type.DRUG);
60+
network.getNodes().add(node2);
61+
node3 = new Node(uid++, "ALCOHOL", "ALCOHOL", Node.Type.DRUG);
62+
network.getNodes().add(node3);
63+
relation2 = new Relation(uid++, "rel2", node1.getUid(), Node.Type.GENE, node2.getUid(), Node.Type.DRUG,
64+
Relation.Type.GENE__DRUG);
65+
network.getRelations().add(relation2);
66+
relation3 = new Relation(uid++, "rel3", node1.getUid(), Node.Type.GENE, node3.getUid(), Node.Type.DRUG,
67+
Relation.Type.GENE__DRUG);
68+
network.getRelations().add(relation3);
69+
70+
try {
71+
mapper.writer().writeValue(new File("/tmp/network2.json"), network);
72+
} catch (IOException e) {
73+
e.printStackTrace();
74+
}
75+
76+
}
1577
}

bionetdb-core/src/main/java/org/opencb/bionetdb/core/models/network/Node.java

+4
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,10 @@ public static boolean isPhysicalEntity(Node node) {
171171
}
172172
}
173173

174+
public Node() {
175+
this(-1, null, null, null, null);
176+
}
177+
174178
public Node(long uid) {
175179
this(uid, null, null, null, null);
176180
}

bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/BioNetDbManager.java

+7
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,14 @@ public void download(Path outDir) throws IOException {
9797
//-------------------------------------------------------------------------
9898

9999
public void build(Path inputPath, Path outputPath, List<String> exclude) throws IOException {
100+
build(inputPath, outputPath, null, exclude);
101+
}
102+
103+
public void build(Path inputPath, Path outputPath, List<String> networkFiles, List<String> exclude) throws IOException {
100104
Builder builder = new Builder(inputPath, outputPath, parseFilters(exclude));
105+
if (CollectionUtils.isNotEmpty(networkFiles)) {
106+
builder.setAdditionalNeworkFiles(networkFiles);
107+
}
101108
builder.build();
102109
}
103110

bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/utils/Builder.java

+95-45
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.opencb.biodata.models.core.Xref;
1313
import org.opencb.biodata.models.variant.Variant;
1414
import org.opencb.biodata.models.variant.avro.*;
15+
import org.opencb.bionetdb.core.models.network.Network;
1516
import org.opencb.bionetdb.core.models.network.Node;
1617
import org.opencb.bionetdb.core.models.network.Relation;
1718
import org.opencb.bionetdb.lib.db.Neo4jBioPaxBuilder;
@@ -26,6 +27,7 @@
2627
import java.io.IOException;
2728
import java.io.PrintWriter;
2829
import java.nio.file.Path;
30+
import java.nio.file.Paths;
2931
import java.util.*;
3032

3133
public class Builder {
@@ -45,6 +47,8 @@ public class Builder {
4547

4648
public static final Object CLINICAL_VARIANT_FILENAME = "clinical_variants.full.json";
4749

50+
private List<String> additionalNeworkFiles;
51+
4852
private CsvInfo csv;
4953
private Path inputPath;
5054
private Path outputPath;
@@ -53,20 +57,30 @@ public class Builder {
5357

5458
protected static Logger logger;
5559

60+
public Builder(Path inputPath, Path outputPath, Map<String, Set<String>> filters) {
61+
62+
this.inputPath = inputPath;
63+
this.outputPath = outputPath;
64+
this.filters = filters;
65+
66+
67+
// Prepare CSV object
68+
csv = new CsvInfo(inputPath, outputPath);
69+
70+
// Prepare jackson writer (object to string)
71+
mapper = new ObjectMapper();
72+
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
73+
mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
74+
75+
this.logger = LoggerFactory.getLogger(this.getClass().toString());
76+
}
77+
5678
public void build() throws IOException {
5779
long start;
5880

5981
// Open CSV files
6082
csv.openCSVFiles();
6183

62-
long ensemblGeneBuildTime = 0;
63-
long refSeqGeneBuildTime = 0;
64-
long proteinBuildTime = 0;
65-
long genePanelBuildTime = 0;
66-
long bioPaxBuildTime = 0;
67-
long clinvarBuildTime = 0;
68-
69-
7084
// Check input files
7185
File ensemblGeneFile = new File(inputPath + "/" + ENSEMBL_GENE_FILENAME);
7286
if (!ensemblGeneFile.exists()) {
@@ -105,31 +119,27 @@ public void build() throws IOException {
105119
logger.info("Processing Ensembl genes...");
106120
start = System.currentTimeMillis();
107121
buildGenes(ensemblGeneFile.toPath());
108-
ensemblGeneBuildTime = (System.currentTimeMillis() - start) / 1000;
109-
logger.info("Ensembl gene processing done in {} s", ensemblGeneBuildTime);
122+
logger.info("Ensembl gene processing done in {} s", (System.currentTimeMillis() - start) / 1000);
110123
}
111124

112125
if (refSeqGeneFile.exists()) {
113126
logger.info("Processing RefSeq genes...");
114127
start = System.currentTimeMillis();
115128
buildGenes(refSeqGeneFile.toPath());
116-
refSeqGeneBuildTime = (System.currentTimeMillis() - start) / 1000;
117-
logger.info("RefSeq gene processing done in {} s", refSeqGeneBuildTime);
129+
logger.info("RefSeq gene processing done in {} s", (System.currentTimeMillis() - start) / 1000);
118130
}
119131

120132
// Processing proteins
121133
logger.info("Processing proteins...");
122134
start = System.currentTimeMillis();
123135
buildProteins(proteinFile.toPath());
124-
proteinBuildTime = (System.currentTimeMillis() - start) / 1000;
125-
logger.info("Protein processing done in {} s", proteinBuildTime);
136+
logger.info("Protein processing done in {} s", (System.currentTimeMillis() - start) / 1000);
126137

127138
// Gene panels support
128139
logger.info("Processing gene panels...");
129140
start = System.currentTimeMillis();
130141
buildGenePanels(panelFile.toPath());
131-
genePanelBuildTime = (System.currentTimeMillis() - start) / 1000;
132-
logger.info("Gene panel processing done in {} s", genePanelBuildTime);
142+
logger.info("Gene panel processing done in {} s", (System.currentTimeMillis() - start) / 1000);
133143

134144

135145
// Procesing BioPAX file
@@ -138,44 +148,27 @@ public void build() throws IOException {
138148
start = System.currentTimeMillis();
139149
bioPAXImporter.build(networkFile.toPath());
140150
biopaxProcessing.post();
141-
bioPaxBuildTime = (System.currentTimeMillis() - start) / 1000;
151+
logger.info("Processing BioPax/reactome file done in {} s", (System.currentTimeMillis() - start) / 1000);
142152

143153

144154
// Processing clinical variants
145155
logger.info("Processing clinical variants...");
146156
start = System.currentTimeMillis();
147157
buildClinicalVariants(clinicalVariantFile.toPath());
148-
clinvarBuildTime = (System.currentTimeMillis() - start) / 1000;
149-
logger.info("Processing clinical variants done in {} s", clinvarBuildTime);
158+
logger.info("Processing clinical variants done in {} s", (System.currentTimeMillis() - start) / 1000);
159+
160+
// Processing additional networks
161+
if (CollectionUtils.isNotEmpty(additionalNeworkFiles)) {
162+
for (String additionalNeworkFile: additionalNeworkFiles) {
163+
logger.info("Processing additional network file {}...", additionalNeworkFile);
164+
start = System.currentTimeMillis();
165+
processAdditionalNetwork(additionalNeworkFile);
166+
logger.info("Processing clinical variants done in {} s", (System.currentTimeMillis() - start) / 1000);
167+
}
168+
}
150169

151170
// Close CSV files
152171
csv.close();
153-
154-
logger.info("Ensembl gene build time: {} s", ensemblGeneBuildTime);
155-
logger.info("RefSeq gene build time: {} s", refSeqGeneBuildTime);
156-
logger.info("Protein build time: {} s", proteinBuildTime);
157-
logger.info("Gene panel build time: {} s", genePanelBuildTime);
158-
logger.info("BioPAX build time: {} s", bioPaxBuildTime);
159-
logger.info("Clinical variant build time: {} s", clinvarBuildTime);
160-
}
161-
162-
163-
public Builder(Path inputPath, Path outputPath, Map<String, Set<String>> filters) {
164-
165-
this.inputPath = inputPath;
166-
this.outputPath = outputPath;
167-
this.filters = filters;
168-
169-
170-
// Prepare CSV object
171-
csv = new CsvInfo(inputPath, outputPath);
172-
173-
// Prepare jackson writer (object to string)
174-
mapper = new ObjectMapper();
175-
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
176-
mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
177-
178-
this.logger = LoggerFactory.getLogger(this.getClass().toString());
179172
}
180173

181174
//-------------------------------------------------------------------------
@@ -1018,6 +1011,54 @@ private Node createVariantNode(Variant variant, Long varUid) {
10181011

10191012
return varNode;
10201013
}
1014+
1015+
private void processAdditionalNetwork(String additionalNeworkFilename) throws IOException {
1016+
// Check file
1017+
File addNetworkFile = Paths.get(additionalNeworkFilename).toFile();
1018+
if (!addNetworkFile.exists()) {
1019+
logger.info("Additional network file {} does not exist", additionalNeworkFilename);
1020+
return;
1021+
}
1022+
1023+
ObjectMapper objectMapper = new ObjectMapper();
1024+
Network network = objectMapper.readValue(addNetworkFile, Network.class);
1025+
1026+
Map<Long, Long> nodeUidMap = new HashMap<>();
1027+
1028+
// First, nodes
1029+
if (CollectionUtils.isNotEmpty(network.getNodes())) {
1030+
for (Node node: network.getNodes()) {
1031+
Long uid = csv.getLong(node.getId(), node.getType().name());
1032+
if (uid == null) {
1033+
// Node does not exist in the !
1034+
nodeUidMap.put(node.getUid(), csv.getAndIncUid());
1035+
// Update UID and append node to the CSV file
1036+
node.setUid(nodeUidMap.get(node.getUid()));
1037+
csv.getCsvWriters().get(node.getType().toString()).println(csv.nodeLine(node));
1038+
} else {
1039+
// Node already exists !!
1040+
nodeUidMap.put(node.getUid(), uid);
1041+
}
1042+
}
1043+
}
1044+
1045+
// Second, relations
1046+
if (CollectionUtils.isNotEmpty(network.getRelations())) {
1047+
for (Relation relation: network.getRelations()) {
1048+
relation.setUid(csv.getAndIncUid());
1049+
System.out.println(relation.getType().toString());
1050+
System.out.println(csv.relationLine(nodeUidMap.get(relation.getOrigUid()), nodeUidMap.get(relation.getDestUid())));
1051+
if (csv.getCsvWriters().containsKey(relation.getType().toString())) {
1052+
System.out.println("YYYYEEEEESSSSSSSS");
1053+
}
1054+
1055+
csv.getCsvWriters().get(relation.getType().toString()).println(csv.relationLine(nodeUidMap.get(relation.getOrigUid()),
1056+
nodeUidMap.get(relation.getDestUid())));
1057+
}
1058+
}
1059+
}
1060+
1061+
10211062
//
10221063
// public Long processClinicalAnalysis(ClinicalAnalysis clinicalAnalysis) throws IOException {
10231064
// Node clinicalAnalysisNode = null;
@@ -1710,4 +1751,13 @@ private void createVariantObjectNode(Variant variant, Node variantNode) throws I
17101751
pw = csv.getCsvWriters().get(Relation.Type.VARIANT__VARIANT_OBJECT.toString());
17111752
pw.println(variantNode.getUid() + CsvInfo.SEPARATOR + variantObjectNode.getUid());
17121753
}
1754+
1755+
public List<String> getAdditionalNeworkFiles() {
1756+
return additionalNeworkFiles;
1757+
}
1758+
1759+
public Builder setAdditionalNeworkFiles(List<String> additionalNeworkFiles) {
1760+
this.additionalNeworkFiles = additionalNeworkFiles;
1761+
return this;
1762+
}
17131763
}

0 commit comments

Comments
 (0)