12
12
import org .opencb .biodata .models .core .Xref ;
13
13
import org .opencb .biodata .models .variant .Variant ;
14
14
import org .opencb .biodata .models .variant .avro .*;
15
+ import org .opencb .bionetdb .core .models .network .Network ;
15
16
import org .opencb .bionetdb .core .models .network .Node ;
16
17
import org .opencb .bionetdb .core .models .network .Relation ;
17
18
import org .opencb .bionetdb .lib .db .Neo4jBioPaxBuilder ;
26
27
import java .io .IOException ;
27
28
import java .io .PrintWriter ;
28
29
import java .nio .file .Path ;
30
+ import java .nio .file .Paths ;
29
31
import java .util .*;
30
32
31
33
public class Builder {
@@ -45,6 +47,8 @@ public class Builder {
45
47
46
48
public static final Object CLINICAL_VARIANT_FILENAME = "clinical_variants.full.json" ;
47
49
50
+ private List <String > additionalNeworkFiles ;
51
+
48
52
private CsvInfo csv ;
49
53
private Path inputPath ;
50
54
private Path outputPath ;
@@ -53,20 +57,30 @@ public class Builder {
53
57
54
58
protected static Logger logger ;
55
59
60
+ public Builder (Path inputPath , Path outputPath , Map <String , Set <String >> filters ) {
61
+
62
+ this .inputPath = inputPath ;
63
+ this .outputPath = outputPath ;
64
+ this .filters = filters ;
65
+
66
+
67
+ // Prepare CSV object
68
+ csv = new CsvInfo (inputPath , outputPath );
69
+
70
+ // Prepare jackson writer (object to string)
71
+ mapper = new ObjectMapper ();
72
+ mapper .setSerializationInclusion (JsonInclude .Include .NON_NULL );
73
+ mapper .configure (MapperFeature .REQUIRE_SETTERS_FOR_GETTERS , true );
74
+
75
+ this .logger = LoggerFactory .getLogger (this .getClass ().toString ());
76
+ }
77
+
56
78
public void build () throws IOException {
57
79
long start ;
58
80
59
81
// Open CSV files
60
82
csv .openCSVFiles ();
61
83
62
- long ensemblGeneBuildTime = 0 ;
63
- long refSeqGeneBuildTime = 0 ;
64
- long proteinBuildTime = 0 ;
65
- long genePanelBuildTime = 0 ;
66
- long bioPaxBuildTime = 0 ;
67
- long clinvarBuildTime = 0 ;
68
-
69
-
70
84
// Check input files
71
85
File ensemblGeneFile = new File (inputPath + "/" + ENSEMBL_GENE_FILENAME );
72
86
if (!ensemblGeneFile .exists ()) {
@@ -105,31 +119,27 @@ public void build() throws IOException {
105
119
logger .info ("Processing Ensembl genes..." );
106
120
start = System .currentTimeMillis ();
107
121
buildGenes (ensemblGeneFile .toPath ());
108
- ensemblGeneBuildTime = (System .currentTimeMillis () - start ) / 1000 ;
109
- logger .info ("Ensembl gene processing done in {} s" , ensemblGeneBuildTime );
122
+ logger .info ("Ensembl gene processing done in {} s" , (System .currentTimeMillis () - start ) / 1000 );
110
123
}
111
124
112
125
if (refSeqGeneFile .exists ()) {
113
126
logger .info ("Processing RefSeq genes..." );
114
127
start = System .currentTimeMillis ();
115
128
buildGenes (refSeqGeneFile .toPath ());
116
- refSeqGeneBuildTime = (System .currentTimeMillis () - start ) / 1000 ;
117
- logger .info ("RefSeq gene processing done in {} s" , refSeqGeneBuildTime );
129
+ logger .info ("RefSeq gene processing done in {} s" , (System .currentTimeMillis () - start ) / 1000 );
118
130
}
119
131
120
132
// Processing proteins
121
133
logger .info ("Processing proteins..." );
122
134
start = System .currentTimeMillis ();
123
135
buildProteins (proteinFile .toPath ());
124
- proteinBuildTime = (System .currentTimeMillis () - start ) / 1000 ;
125
- logger .info ("Protein processing done in {} s" , proteinBuildTime );
136
+ logger .info ("Protein processing done in {} s" , (System .currentTimeMillis () - start ) / 1000 );
126
137
127
138
// Gene panels support
128
139
logger .info ("Processing gene panels..." );
129
140
start = System .currentTimeMillis ();
130
141
buildGenePanels (panelFile .toPath ());
131
- genePanelBuildTime = (System .currentTimeMillis () - start ) / 1000 ;
132
- logger .info ("Gene panel processing done in {} s" , genePanelBuildTime );
142
+ logger .info ("Gene panel processing done in {} s" , (System .currentTimeMillis () - start ) / 1000 );
133
143
134
144
135
145
// Procesing BioPAX file
@@ -138,44 +148,27 @@ public void build() throws IOException {
138
148
start = System .currentTimeMillis ();
139
149
bioPAXImporter .build (networkFile .toPath ());
140
150
biopaxProcessing .post ();
141
- bioPaxBuildTime = (System .currentTimeMillis () - start ) / 1000 ;
151
+ logger . info ( "Processing BioPax/reactome file done in {} s" , (System .currentTimeMillis () - start ) / 1000 ) ;
142
152
143
153
144
154
// Processing clinical variants
145
155
logger .info ("Processing clinical variants..." );
146
156
start = System .currentTimeMillis ();
147
157
buildClinicalVariants (clinicalVariantFile .toPath ());
148
- clinvarBuildTime = (System .currentTimeMillis () - start ) / 1000 ;
149
- logger .info ("Processing clinical variants done in {} s" , clinvarBuildTime );
158
+ logger .info ("Processing clinical variants done in {} s" , (System .currentTimeMillis () - start ) / 1000 );
159
+
160
+ // Processing additional networks
161
+ if (CollectionUtils .isNotEmpty (additionalNeworkFiles )) {
162
+ for (String additionalNeworkFile : additionalNeworkFiles ) {
163
+ logger .info ("Processing additional network file {}..." , additionalNeworkFile );
164
+ start = System .currentTimeMillis ();
165
+ processAdditionalNetwork (additionalNeworkFile );
166
+ logger .info ("Processing clinical variants done in {} s" , (System .currentTimeMillis () - start ) / 1000 );
167
+ }
168
+ }
150
169
151
170
// Close CSV files
152
171
csv .close ();
153
-
154
- logger .info ("Ensembl gene build time: {} s" , ensemblGeneBuildTime );
155
- logger .info ("RefSeq gene build time: {} s" , refSeqGeneBuildTime );
156
- logger .info ("Protein build time: {} s" , proteinBuildTime );
157
- logger .info ("Gene panel build time: {} s" , genePanelBuildTime );
158
- logger .info ("BioPAX build time: {} s" , bioPaxBuildTime );
159
- logger .info ("Clinical variant build time: {} s" , clinvarBuildTime );
160
- }
161
-
162
-
163
- public Builder (Path inputPath , Path outputPath , Map <String , Set <String >> filters ) {
164
-
165
- this .inputPath = inputPath ;
166
- this .outputPath = outputPath ;
167
- this .filters = filters ;
168
-
169
-
170
- // Prepare CSV object
171
- csv = new CsvInfo (inputPath , outputPath );
172
-
173
- // Prepare jackson writer (object to string)
174
- mapper = new ObjectMapper ();
175
- mapper .setSerializationInclusion (JsonInclude .Include .NON_NULL );
176
- mapper .configure (MapperFeature .REQUIRE_SETTERS_FOR_GETTERS , true );
177
-
178
- this .logger = LoggerFactory .getLogger (this .getClass ().toString ());
179
172
}
180
173
181
174
//-------------------------------------------------------------------------
@@ -1018,6 +1011,54 @@ private Node createVariantNode(Variant variant, Long varUid) {
1018
1011
1019
1012
return varNode ;
1020
1013
}
1014
+
1015
+ private void processAdditionalNetwork (String additionalNeworkFilename ) throws IOException {
1016
+ // Check file
1017
+ File addNetworkFile = Paths .get (additionalNeworkFilename ).toFile ();
1018
+ if (!addNetworkFile .exists ()) {
1019
+ logger .info ("Additional network file {} does not exist" , additionalNeworkFilename );
1020
+ return ;
1021
+ }
1022
+
1023
+ ObjectMapper objectMapper = new ObjectMapper ();
1024
+ Network network = objectMapper .readValue (addNetworkFile , Network .class );
1025
+
1026
+ Map <Long , Long > nodeUidMap = new HashMap <>();
1027
+
1028
+ // First, nodes
1029
+ if (CollectionUtils .isNotEmpty (network .getNodes ())) {
1030
+ for (Node node : network .getNodes ()) {
1031
+ Long uid = csv .getLong (node .getId (), node .getType ().name ());
1032
+ if (uid == null ) {
1033
+ // Node does not exist in the !
1034
+ nodeUidMap .put (node .getUid (), csv .getAndIncUid ());
1035
+ // Update UID and append node to the CSV file
1036
+ node .setUid (nodeUidMap .get (node .getUid ()));
1037
+ csv .getCsvWriters ().get (node .getType ().toString ()).println (csv .nodeLine (node ));
1038
+ } else {
1039
+ // Node already exists !!
1040
+ nodeUidMap .put (node .getUid (), uid );
1041
+ }
1042
+ }
1043
+ }
1044
+
1045
+ // Second, relations
1046
+ if (CollectionUtils .isNotEmpty (network .getRelations ())) {
1047
+ for (Relation relation : network .getRelations ()) {
1048
+ relation .setUid (csv .getAndIncUid ());
1049
+ System .out .println (relation .getType ().toString ());
1050
+ System .out .println (csv .relationLine (nodeUidMap .get (relation .getOrigUid ()), nodeUidMap .get (relation .getDestUid ())));
1051
+ if (csv .getCsvWriters ().containsKey (relation .getType ().toString ())) {
1052
+ System .out .println ("YYYYEEEEESSSSSSSS" );
1053
+ }
1054
+
1055
+ csv .getCsvWriters ().get (relation .getType ().toString ()).println (csv .relationLine (nodeUidMap .get (relation .getOrigUid ()),
1056
+ nodeUidMap .get (relation .getDestUid ())));
1057
+ }
1058
+ }
1059
+ }
1060
+
1061
+
1021
1062
//
1022
1063
// public Long processClinicalAnalysis(ClinicalAnalysis clinicalAnalysis) throws IOException {
1023
1064
// Node clinicalAnalysisNode = null;
@@ -1710,4 +1751,13 @@ private void createVariantObjectNode(Variant variant, Node variantNode) throws I
1710
1751
pw = csv .getCsvWriters ().get (Relation .Type .VARIANT__VARIANT_OBJECT .toString ());
1711
1752
pw .println (variantNode .getUid () + CsvInfo .SEPARATOR + variantObjectNode .getUid ());
1712
1753
}
1754
+
1755
+ public List <String > getAdditionalNeworkFiles () {
1756
+ return additionalNeworkFiles ;
1757
+ }
1758
+
1759
+ public Builder setAdditionalNeworkFiles (List <String > additionalNeworkFiles ) {
1760
+ this .additionalNeworkFiles = additionalNeworkFiles ;
1761
+ return this ;
1762
+ }
1713
1763
}
0 commit comments