Skip to content

Commit f27707b

Browse files
committed
removed submodule replaced with copied src
1 parent 809e8bb commit f27707b

38 files changed

+5211
-4
lines changed

.gitmodules

-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +0,0 @@
1-
[submodule "cppEDM"]
2-
path = cppEDM
3-
url = https://github.com/SugiharaLab/cppEDM

cppEDM

-1
This file was deleted.

cppEDM/lib/.gitignore

Whitespace-only changes.

cppEDM/lib/libEDM.a

2.36 MB
Binary file not shown.

cppEDM/src/AuxFunc.cc

+383
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,383 @@
1+
2+
#include "AuxFunc.h"
3+
#include "DateTime.h"
4+
5+
//---------------------------------------------------------------
6+
// Common code for Simplex and Smap that embeds, extracts
7+
// the target vector and computes neighbors.
8+
//
9+
// NOTE: time column is not returned in the embedding dataBlock.
10+
//
11+
// NOTE: If dataIn is embedded by Embed(), the returned dataBlock
12+
// has tau * (E-1) fewer rows than dataIn. Since dataIn is
13+
// included in the returned DataEmbedNN struct, the first
14+
// tau * (E-1) dataIn rows are deleted from dataIn. The
15+
// target vector is also reduced.
16+
//
17+
// NOTE: If rows are deleted, then the library and prediction
18+
// vectors in Parameters are updated to reflect this.
19+
//---------------------------------------------------------------
20+
DataEmbedNN EmbedNN( DataFrame<double> dataIn,
21+
Parameters &param,
22+
bool checkDataRows )
23+
{
24+
if ( checkDataRows ) {
25+
CheckDataRows( param, dataIn, "EmbedNN" );
26+
}
27+
28+
//----------------------------------------------------------
29+
// Extract or embedd data block
30+
//----------------------------------------------------------
31+
DataFrame<double> dataBlock; // Multivariate or embedded DataFrame
32+
33+
if ( param.embedded ) {
34+
// dataIn is multivariable block, no embedding needed
35+
// Select the specified columns
36+
if ( param.columnNames.size() ) {
37+
dataBlock = dataIn.DataFrameFromColumnNames(param.columnNames);
38+
}
39+
else if ( param.columnIndex.size() ) {
40+
dataBlock = dataIn.DataFrameFromColumnIndex(param.columnIndex);
41+
}
42+
else {
43+
throw std::runtime_error( "EmbedNN(): colNames and "
44+
" colIndex are empty.\n" );
45+
}
46+
}
47+
else {
48+
// embedded = false: Create the embedding block via Embed()
49+
// dataBlock will have tau * (E-1) fewer rows than dataIn
50+
dataBlock = Embed( dataIn, param.E, param.tau,
51+
param.columns_str, param.verbose );
52+
}
53+
54+
//----------------------------------------------------------
55+
// Get target (library) vector
56+
//----------------------------------------------------------
57+
std::valarray<double> target_vec;
58+
if ( param.targetIndex ) {
59+
target_vec = dataIn.Column( param.targetIndex );
60+
}
61+
else if ( param.targetName.size() ) {
62+
target_vec = dataIn.VectorColumnName( param.targetName );
63+
}
64+
else {
65+
// Default to first column
66+
target_vec = dataIn.Column( 0 );
67+
}
68+
69+
//------------------------------------------------------------
70+
// embedded = false: Embed() was called on dataIn
71+
// Remove target, dataIn rows as needed
72+
// Adjust param.library and param.prediction indices
73+
//------------------------------------------------------------
74+
if ( not param.embedded ) {
75+
// If we support negative tau, this will change
76+
// For now, assume only positive tau is allowed
77+
size_t shift = std::max( 0, param.tau * (param.E - 1) );
78+
79+
std::valarray<double> target_vec_embed( dataIn.NRows() - shift );
80+
// Bogus cast to ( std::valarray<double> ) for MSVC
81+
// as it doesn't export its own slice_array applied to []
82+
target_vec_embed = ( std::valarray<double> )
83+
target_vec[ std::slice( shift, target_vec.size() - shift, 1 ) ];
84+
85+
target_vec = target_vec_embed;
86+
87+
DataFrame<double> dataInEmbed( dataIn.NRows() - shift,
88+
dataIn.NColumns(),
89+
dataIn.ColumnNames() );
90+
91+
for ( size_t row = 0; row < dataInEmbed.NRows(); row++ ) {
92+
dataInEmbed.WriteRow( row, dataIn.Row( row + shift ) );
93+
}
94+
95+
// Shift and add time vector if present
96+
if ( dataIn.Time().size() ) {
97+
dataInEmbed.Time() =
98+
std::vector< std::string >( dataIn.Time().size() - shift );
99+
100+
for ( size_t t = shift; t < dataIn.Time().size(); t++ ) {
101+
dataInEmbed.Time()[ t - shift ] = dataIn.Time()[ t ];
102+
}
103+
dataInEmbed.TimeName() = dataIn.TimeName();
104+
}
105+
106+
// dataIn was passed in by value, so copy constructed.
107+
// JP Is it OK to reassign it here, then copy into dataEmbedNN?
108+
dataIn = dataInEmbed;
109+
110+
// Adust param.library and param.prediction vectors of indices
111+
if ( shift > 0 ) {
112+
size_t library_len = param.library.size();
113+
size_t prediction_len = param.prediction.size();
114+
115+
// If 0, 1, ... shift are in library or prediction
116+
// those rows were deleted, delete these elements.
117+
// First, create a vector of indices to delete
118+
std::vector< size_t > deleted_elements( shift, 0 );
119+
std::iota( deleted_elements.begin(), deleted_elements.end(), 0 );
120+
121+
// erase elements of row indices that were deleted
122+
for ( auto element = deleted_elements.begin();
123+
element != deleted_elements.end(); element++ ) {
124+
125+
std::vector< size_t >::iterator it;
126+
it = std::find( param.library.begin(),
127+
param.library.end(), *element );
128+
129+
if ( it != param.library.end() ) {
130+
param.library.erase( it );
131+
}
132+
133+
it = std::find( param.prediction.begin(),
134+
param.prediction.end(), *element );
135+
136+
if ( it != param.prediction.end() ) {
137+
param.prediction.erase( it );
138+
}
139+
}
140+
141+
// Now offset all values by shift so that vectors indices
142+
// in library and prediction refer to the same data rows
143+
// before the deletion/shift.
144+
for ( auto li = param.library.begin();
145+
li != param.library.end(); li++ ) {
146+
*li = *li - shift;
147+
}
148+
for ( auto pi = param.prediction.begin();
149+
pi != param.prediction.end(); pi++ ) {
150+
*pi = *pi - shift;
151+
}
152+
} // if ( shift > 0 )
153+
}
154+
155+
//----------------------------------------------------------
156+
// Nearest neighbors
157+
//----------------------------------------------------------
158+
Neighbors neighbors = FindNeighbors( dataBlock, param );
159+
160+
// Create struct to return the objects
161+
DataEmbedNN dataEmbedNN = DataEmbedNN( dataIn, dataBlock,
162+
target_vec, neighbors );
163+
return dataEmbedNN;
164+
}
165+
166+
//----------------------------------------------------------
167+
// Common code for Simplex and Smap output generation
168+
//----------------------------------------------------------
169+
DataFrame<double> FormatOutput( Parameters param,
170+
std::valarray<double> predictions,
171+
std::valarray<double> const_predictions,
172+
std::valarray<double> target_vec,
173+
std::vector<std::string> time,
174+
std::string timeName )
175+
{
176+
177+
#ifdef DEBUG_ALL
178+
std::cout << "FormatOutput() param.prediction.size: "
179+
<< param.prediction.size() << " >>> ";
180+
for( auto i = 0; i < param.prediction.size(); i++ ) {
181+
std::cout << param.prediction[i] << ",";
182+
} std::cout << std::endl;
183+
std::cout << "FormatOutput() time.size: " << time.size() << " >>> ";
184+
for( auto i = 0; i < time.size(); i++ ) {
185+
std::cout << time[i] << ",";
186+
} std::cout << std::endl;
187+
#endif
188+
189+
//----------------------------------------------------
190+
// Time vector with additional Tp points
191+
//----------------------------------------------------
192+
size_t N_time = time.size();
193+
size_t N_row = predictions.size();
194+
195+
// Populate vector of time strings for output
196+
std::vector<std::string> timeOut( N_row + param.Tp );
197+
198+
if ( N_time ) {
199+
FillTimes( param, time, std::ref( timeOut ) );
200+
}
201+
202+
//----------------------------------------------------
203+
// Observations: add Tp nan at end
204+
//----------------------------------------------------
205+
std::valarray<double> observations( N_row + param.Tp );
206+
std::slice pred_i = std::slice( param.prediction[0], N_row, 1 );
207+
208+
observations[ std::slice( 0, N_row, 1 ) ] =
209+
( std::valarray<double> ) target_vec[ pred_i ];
210+
211+
for ( size_t i = N_row; i < N_row + param.Tp; i++ ) {
212+
observations[ i ] = NAN;
213+
}
214+
215+
//----------------------------------------------------
216+
// Predictions: insert Tp nan at start
217+
//----------------------------------------------------
218+
std::valarray<double> predictionsOut( N_row + param.Tp );
219+
for ( size_t i = 0; i < param.Tp; i++ ) {
220+
predictionsOut[ i ] = NAN;
221+
}
222+
predictionsOut[ std::slice(param.Tp, N_row, 1) ] = predictions;
223+
224+
std::valarray<double> constPredictionsOut( N_row + param.Tp );
225+
if ( param.const_predict ) {
226+
for ( size_t i = 0; i < param.Tp; i++ ) {
227+
constPredictionsOut[ i ] = NAN;
228+
}
229+
constPredictionsOut[ std::slice(param.Tp, N_row, 1) ] =
230+
const_predictions;
231+
}
232+
233+
//----------------------------------------------------
234+
// Create output DataFrame
235+
//----------------------------------------------------
236+
size_t dataFrameColumms = param.const_predict ? 3 : 2;
237+
238+
DataFrame<double> dataFrame( N_row + param.Tp, dataFrameColumms );
239+
240+
if ( param.const_predict ) {
241+
dataFrame.ColumnNames() = { "Observations",
242+
"Predictions", "Const_Predictions" };
243+
}
244+
else {
245+
dataFrame.ColumnNames() = { "Observations", "Predictions" };
246+
}
247+
248+
if ( N_time ) {
249+
dataFrame.TimeName() = timeName;
250+
dataFrame.Time() = timeOut;
251+
}
252+
dataFrame.WriteColumn( 0, observations );
253+
dataFrame.WriteColumn( 1, predictionsOut );
254+
if ( param.const_predict ) {
255+
dataFrame.WriteColumn( 2, constPredictionsOut );
256+
}
257+
258+
#ifdef DEBUG_ALL
259+
std::cout << "FormatOutput() time " << timeOut.size()
260+
<< " pred " << predictionsOut.size()
261+
<< " obs " << observations.size() << std::endl;
262+
std::cout << "FormatOutput() dataFrame -------------------" << std::endl;
263+
std::cout << dataFrame;
264+
#endif
265+
266+
return dataFrame;
267+
}
268+
269+
//----------------------------------------------------------
270+
// Copy strings of time values into timeOut.
271+
// If prediction times exceed times from the data, then
272+
// create new entries for the additional times.
273+
//----------------------------------------------------------
274+
void FillTimes( Parameters param,
275+
std::vector<std::string> time,
276+
std::vector<std::string> &timeOut )
277+
{
278+
size_t N_time = time.size();
279+
size_t N_row = param.prediction.size();
280+
size_t max_pred_i = param.prediction[ N_row - 1 ];
281+
282+
if ( timeOut.size() != N_row + param.Tp ) {
283+
std::stringstream errMsg;
284+
errMsg << "FillTimes(): timeOut vector length " << timeOut.size()
285+
<< " is not equal to the number of predictions + Tp "
286+
<< N_row + param.Tp << std::endl;
287+
throw std::runtime_error( errMsg.str() );
288+
}
289+
290+
// Fill in times guaranteed to be in param.prediction indices
291+
for ( auto i = 0; i < N_row; i++ ) {
292+
timeOut[ i ] = time[ param.prediction[ i ] ];
293+
}
294+
295+
// Now fill in times beyond param.prediction indices
296+
if ( max_pred_i + param.Tp < N_time ) {
297+
// All prediction times are available in time, get the rest
298+
for ( auto i = 0; i < param.Tp; i++ ) {
299+
timeOut[ N_row + i ] = time[ max_pred_i + i + 1 ];
300+
}
301+
}
302+
else {
303+
// Keep track of whether warning of time format already printed
304+
bool time_format_warning_printed = false;
305+
306+
// Tp introduces time values beyond the range of time
307+
for ( auto i = N_row; i < N_row + param.Tp; i++ ) {
308+
std::stringstream tss;
309+
310+
if ( OnlyDigits( time[ max_pred_i ] ) ) {
311+
// Numeric so add Tp
312+
tss << std::stod( time[ max_pred_i ] ) + i - N_row + 1;
313+
}
314+
else {
315+
int time_delta = i - N_row + 1;
316+
//get last two datetimes to compute time diff to add time delta
317+
std::string time_new( time[ max_pred_i ] );
318+
std::string time_old( time[ max_pred_i - 1 ] );
319+
std::string new_time = increment_datetime_str( time_old,
320+
time_new,
321+
time_delta );
322+
323+
if ( new_time.size() ) {
324+
tss << new_time;
325+
}
326+
else {
327+
// Add " +ti" if not a recognized format
328+
// increment_datetime_str() returns ""
329+
tss << time[ max_pred_i ] << " +" << i - N_row + 1;
330+
if ( not time_format_warning_printed ) {
331+
std::stringstream errMsg;
332+
errMsg << "FillTimes(): Input time column format "
333+
<< "not recognized. Appending '+ tp' to last "
334+
<< " available time." << std::endl;
335+
std::cout << errMsg.str();
336+
time_format_warning_printed = true;
337+
}
338+
}
339+
}
340+
341+
timeOut[ i ] = tss.str();
342+
}
343+
}
344+
}
345+
346+
//----------------------------------------------------------
347+
// Validate dataFrameIn rows against lib and pred indices
348+
//----------------------------------------------------------
349+
void CheckDataRows( Parameters param,
350+
DataFrame<double> dataFrameIn,
351+
std::string call )
352+
{
353+
// param.prediction & library have been zero-offset in Validate()
354+
// to convert from user specified data row to array indicies
355+
size_t prediction_max_i = param.prediction[ param.prediction.size() - 1 ];
356+
size_t library_max_i = param.library [ param.library.size() - 1 ];
357+
358+
size_t shift;
359+
if ( param.embedded ) {
360+
shift = 0;
361+
}
362+
else {
363+
shift = std::max( 0, param.tau * (param.E - 1) );
364+
}
365+
366+
if ( dataFrameIn.NRows() <= prediction_max_i + shift ) {
367+
std::stringstream errMsg;
368+
errMsg << "CheckDataRows(): The prediction index + tau(E-1) "
369+
<< prediction_max_i + shift
370+
<< " equals or exceeds the number of data rows "
371+
<< dataFrameIn.NRows();
372+
throw std::runtime_error( errMsg.str() );
373+
}
374+
375+
if ( dataFrameIn.NRows() <= library_max_i + shift ) {
376+
std::stringstream errMsg;
377+
errMsg << "CheckDataRows(): The library index + tau(E-1) "
378+
<< library_max_i + shift
379+
<< " equals or exceeds the number of data rows "
380+
<< dataFrameIn.NRows();
381+
throw std::runtime_error( errMsg.str() );
382+
}
383+
}

0 commit comments

Comments
 (0)