-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathsubmission.py
1222 lines (1053 loc) · 44.4 KB
/
submission.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import hashlib
import datetime
from sqlalchemy import Enum
from sqlalchemy import Float
from sqlalchemy import Column
from sqlalchemy import String
from sqlalchemy import Integer
from sqlalchemy import Boolean
from sqlalchemy import DateTime
from sqlalchemy import ForeignKey
from sqlalchemy import UniqueConstraint
from sqlalchemy import inspect
from sqlalchemy.orm import backref
from sqlalchemy.orm import relationship
from sqlalchemy.ext.hybrid import hybrid_property
from .base import Model
from .event import EventScoreType
from .datatype import NumpyType
__all__ = [
'Submission',
'SubmissionScore',
'SubmissionFile',
'SubmissionFileType',
'SubmissionFileTypeExtension',
'Extension',
'SubmissionScoreOnCVFold',
'SubmissionOnCVFold',
'DetachedSubmissionOnCVFold',
'SubmissionSimilarity',
]
# evaluate right after train/test, so no need for 'scored' states
submission_states = Enum(
'new', # submitted by user to frontend server
'checked', # not used, checking is part of the workflow now
'checking_error', # not used, checking is part of the workflow now
'trained', # training finished normally on the backend server
'training_error', # training finished abnormally on the backend server
'validated', # validation finished normally on the backend server
'validating_error', # validation finished abnormally on the backend server
'tested', # testing finished normally on the backend server
'testing_error', # testing finished abnormally on the backend server
'training', # training is running normally on the backend server
'sent_to_training', # frontend server sent submission to backend server
'scored', # submission scored on the frontend server.Final state
name='submission_states')
submission_types = Enum('live', 'test', name='submission_types')
def _encode_string(text):
return bytes(text, 'utf-8') if isinstance(text, str) else text
class Submission(Model):
"""Submission table.
Parameters
----------
name : str
The submission name.
event_team : :class:`ramp_database.model.EventTeam`
The event/team instance.
session : :class:`sqlalchemy.orm.Session`
The session to directly perform the operation on the database.
Attributes
----------
id : int
The ID of the table row.
event_team_id : int
The event/team ID.
event_team : :class:`ramp_database.model.EventTeam`
The event/team instance.
name : str
The name of the submission.
hash_ : string
A hash to identify the submission.
files : list of :class:`ramp_database.model.SubmissionFile`
The list of the files associated with the submission.
submission_timestamp : datetime
The date and time when the submission was added to the database.
sent_to_training_timestamp : datetime
The date and time when the submission was sent for training.
training_timestamp : datetime
The date and time when the training finished.
contributivity : float
The contributivity of the submission.
historical_contributivity : float
The historical contributivity.
type : {'live' or 'test'}
The type of submission.
state : str
The state of the submission. For possible states, see the
``submission_states`` enum in this module (top of this file).
error_msg : str
The error message of the submission.
is_valid : bool
Is it a valid submission.
is_to_ensemble : bool
Whether to use the submission for the contributivity score.
is_in_competition : bool
Whether the submission is used to participate to the comptetition.
notes : str
Store any note regarding the submission.
train_time_cv_mean : float
The mean of the computation time for a fold on the train data.
valid_time_cv_mean : float
The mean of the computation time for a fold on the valid data.
test_time_cv_mean : float
The mean of the computation time for a fold on the test data.
train_time_cv_std : float
The standard deviation of the computation time for a fold on the train
data.
valid_time_cv_std : float
The standard deviation of the computation time for a fold on the valid
data.
test_time_cv_std : float
The standard deviation of the computation time for a fold on the test
data.
max_ram : float
The maximum amount of RAM consumed during training.
historical_contributivitys : list of \
:class:`ramp_database.model.HistoricalContributivity`
A back-reference of the historical contributivities for the submission.
scores : list of :class:`ramp_database.model.SubmissionScore`
A back-reference of scores for the submission.
files : list of :class:`ramp_database.model.SubmissionFile`
A back-reference of files attached to the submission.
on_cv_folds : list of :class:`ramp_database.model.SubmissionOnCVFold`
A back-reference of the CV fold for this submission.
"""
__tablename__ = 'submissions'
id = Column(Integer, primary_key=True)
event_team_id = Column(Integer, ForeignKey('event_teams.id'),
nullable=False)
event_team = relationship('EventTeam',
backref=backref('submissions',
cascade='all, delete-orphan'))
name = Column(String(20), nullable=False)
hash_ = Column(String, nullable=False, index=True, unique=True)
submission_timestamp = Column(DateTime, nullable=False)
sent_to_training_timestamp = Column(DateTime)
training_timestamp = Column(DateTime) # end of training
contributivity = Column(Float, default=0.0)
historical_contributivity = Column(Float, default=0.0)
type = Column(submission_types, default='live')
state = Column(String, default='new')
# TODO: hide absolute path in error
error_msg = Column(String, default='')
# user can delete but we keep
is_valid = Column(Boolean, default=True)
# We can forget bad models.
# If false, don't combine and set contributivity to zero
is_to_ensemble = Column(Boolean, default=True)
# in competitive events participants can select the submission
# with which they want to participate in the competition
is_in_competition = Column(Boolean, default=True)
notes = Column(String, default='') # eg, why is it disqualified
train_time_cv_mean = Column(Float, default=0.0)
valid_time_cv_mean = Column(Float, default=0.0)
test_time_cv_mean = Column(Float, default=0.0)
train_time_cv_std = Column(Float, default=0.0)
valid_time_cv_std = Column(Float, default=0.0)
test_time_cv_std = Column(Float, default=0.0)
# the maximum memory size used when training/testing, in MB
max_ram = Column(Float, default=0.0)
# later also ramp_id
UniqueConstraint(event_team_id, name, name='ts_constraint')
def __init__(self, name, event_team, session=None):
self.name = name
self.event_team = event_team
self.session = inspect(event_team).session
sha_hasher = hashlib.sha1()
sha_hasher.update(_encode_string(self.event.name))
sha_hasher.update(_encode_string(self.team.name))
sha_hasher.update(_encode_string(self.name))
# We considered using the id, but then it will be given away in the
# url which is maybe not a good idea.
self.hash_ = '{}'.format(sha_hasher.hexdigest())
self.submission_timestamp = datetime.datetime.utcnow()
if session is None:
event_score_types = \
(EventScoreType.query.filter_by(event=event_team.event))
else:
event_score_types = (session.query(EventScoreType)
.filter(EventScoreType.event ==
event_team.event)
.all())
for event_score_type in event_score_types:
submission_score = SubmissionScore(
submission=self, event_score_type=event_score_type)
self.session.add(submission_score)
self.reset()
def __str__(self):
return 'Submission({}/{}/{})'.format(
self.event.name, self.team.name, self.name)
def __repr__(self):
return ('Submission(event_name={}, team_name={}, name={}, files={}, '
'state={}, train_time={})'
.format(self.event.name, self.team.name, self.name,
self.files, self.state, self.train_time_cv_mean))
@hybrid_property
def team(self):
"""str: The team name."""
return self.event_team.team
@hybrid_property
def event(self):
""":class:`ramp_database.model.Event`: The event associated with the
submission."""
return self.event_team.event
@property
# This will work only with Flask
def official_score_function(self):
"""callable: The scoring function."""
return self.event.official_score_function
@property
def official_score_name(self):
"""str: The name of the default score."""
return self.event.official_score_name
@property
def official_score(self):
""":class:`ramp_database.model.SubmissionScore`: The official score."""
score_dict = {score.score_name: score for score in self.scores}
return score_dict[self.official_score_name]
@property
def score_types(self):
"""list of :class:`ramp_database.model.EventScoreType`: All the scores used
for the submissions."""
return self.event.score_types
@property
def Predictions(self):
""":class:`rampwf.prediction_types`: The predictions used for the
problem."""
return self.event.Predictions
@hybrid_property
def is_not_sandbox(self):
"""bool: Whether the submission is not a sandbox."""
return self.name != self.event.ramp_sandbox_name
@hybrid_property
def is_error(self):
"""bool: Whether the training of the submission failed."""
return 'error' in self.state
@hybrid_property
def is_new(self):
"""bool: Whether the submission is a new submission."""
return (self.state in ['new', 'training', 'sent_to_training'] and
self.is_not_sandbox)
@hybrid_property
def is_public_leaderboard(self):
"""bool: Whether the submission is part of the public leaderboard."""
return (self.is_not_sandbox and self.is_valid and
(self.state == 'scored'))
@hybrid_property
def is_private_leaderboard(self):
"""bool: Whether the submission is part of the private leaderboard."""
return (self.is_not_sandbox and self.is_valid and
(self.state == 'scored'))
@property
def path(self):
"""str: The path to the submission."""
return os.path.join(self.event.path_ramp_submissions, self.basename)
@property
def basename(self):
"""str: The base name of the submission."""
return 'submission_' + '{:09d}'.format(self.id)
@property
def module(self):
"""str: Path of the submission as a module."""
return self.path.lstrip('./').replace('/', '.')
@property
def f_names(self):
"""list of str: File names of a submission."""
return [file.f_name for file in self.files]
@property
def link(self):
"""str: Unique link to the first submission file."""
return self.files[0].link
@property
def full_name_with_link(self):
"""str: HTML hyperlink to the first submission file with event, team,
and submission information.
The hyperlink forward to the first submission file while the text
corresponds to the event, team, and submission name.
"""
return '<a href={}>{}/{}/{}</a>'.format(
self.link, self.event.name, self.team.name, self.name[:20])
@property
def name_with_link(self):
"""str: HTML hyperlink to the first submission file with submission
information.
The hyperlink forward to the first submission file while the text
corresponds to submission name.
"""
return '<a href={}>{}</a>'.format(self.link, self.name[:20])
@property
def state_with_link(self):
"""str: HTML hyperlink to the state file to report error."""
return '<a href=/{}>{}</a>'.format(
os.path.join(self.hash_, 'error.txt'), self.state)
def ordered_scores(self, score_names):
"""Generator yielding :class:`ramp_database.model.SubmissionScore`.
Ordered according to ``score_names``. Called by
:func:`ramp_database.tools.leaderboard.get_public_leaderboard` and
:func:`ramp_database.tools.get_private_leaderboard`, making sure scores
are listed in the correct column.
Parameters
----------
score_names : list of str
Name of the scores.
Returns
-------
scores : generator of \
:class:`ramp_database.model.submission.SubmissionScore``
Generate a scoring instance.
"""
score_dict = {score.score_name: score for score in self.scores}
for score_name in score_names:
yield score_dict[score_name]
# These were constructing means and stds by fetching fold times. It was
# slow because submission_on_folds contain also possibly large predictions
# If postgres solves this issue (which can be tested on the mean and std
# scores on the private leaderbord), the corresponding columns (which are
# now redundant) can be deleted and these can be uncommented.
# @property
# def train_time_cv_mean(self):
# return np.mean([ts.train_time for ts in self.on_cv_folds])
# @property
# def valid_time_cv_mean(self):
# return np.mean([ts.valid_time for ts in self.on_cv_folds])
# @property
# def test_time_cv_mean(self):
# return np.mean([ts.test_time for ts in self.on_cv_folds])
# @property
# def train_time_cv_std(self):
# return np.std([ts.train_time for ts in self.on_cv_folds])
# @property
# def valid_time_cv_std(self):
# return np.std([ts.valid_time for ts in self.on_cv_folds])
# @property
# def test_time_cv_std(self):
# return np.std([ts.test_time for ts in self.on_cv_folds])
def set_state(self, state, session=None):
"""Set the state of the submission and of each CV fold.
Parameters
----------
state : str
The state of the new submission.
"""
self.state = state
if state == "sent_to_training":
self.sent_to_training_timestamp = datetime.datetime.utcnow()
elif state == "training":
self.training_timestamp = datetime.datetime.utcnow()
if session is None:
all_cv_folds = self.on_cv_folds
else:
all_cv_folds = (session.query(SubmissionOnCVFold)
.filter_by(submission_id=self.id)
.all())
all_cv_folds = sorted(all_cv_folds, key=lambda x: x.id)
for submission_on_cv_fold in all_cv_folds:
submission_on_cv_fold.state = state
def reset(self):
"""Reset the submission to an initial stage.
The contributivity, state, error, and scores will be reset to initial
values.
"""
self.contributivity = 0.0
self.state = 'new'
self.error_msg = ''
for score in self.scores:
score.valid_score_cv_bag = score.event_score_type.worst
score.test_score_cv_bag = score.event_score_type.worst
score.valid_score_cv_bags = None
score.test_score_cv_bags = None
def set_error(self, error, error_msg, session=None):
"""Fail the submission as well as the CV folds.
Parameters
----------
error : str
The error state of the submission and each fold.
error_msg : str
The associated error message for the submission and each fold.
Notes
-----
Setting the error will first reset the submission.
"""
self.reset()
self.state = error
self.error_msg = error_msg
if session is None:
all_cv_folds = self.on_cv_folds
else:
all_cv_folds = (session.query(SubmissionOnCVFold)
.filter_by(submission_id=self.id)
.all())
all_cv_folds = sorted(all_cv_folds, key=lambda x: x.id)
for submission_on_cv_fold in all_cv_folds:
submission_on_cv_fold.set_error(error, error_msg)
# contributivity could be a property but then we could not query on it
def set_contributivity(self, session=None):
"""Compute the contributivity of a submission.
Notes
-----
The contributivity is computed only id the submission is public and
valid and this is not the sandbox submission.
"""
self.contributivity = 0.0
if self.is_public_leaderboard:
# we share a unit of 1. among folds
if session is None:
all_cv_folds = self.on_cv_folds
else:
all_cv_folds = (session.query(SubmissionOnCVFold)
.filter_by(submission_id=self.id)
.all())
all_cv_folds = sorted(all_cv_folds, key=lambda x: x.id)
unit_contributivity = 1. / len(all_cv_folds)
for submission_on_cv_fold in all_cv_folds:
self.contributivity += (unit_contributivity *
submission_on_cv_fold.contributivity)
def set_state_after_training(self, session=None):
"""Set the state of a submission depending of the state of the fold
after training.
"""
self.training_timestamp = datetime.datetime.utcnow()
if session is None:
all_cv_folds = self.on_cv_folds
else:
all_cv_folds = (session.query(SubmissionOnCVFold)
.filter_by(submission_id=self.id)
.all())
all_cv_folds = sorted(all_cv_folds, key=lambda x: x.id)
states = [submission_on_cv_fold.state
for submission_on_cv_fold in all_cv_folds]
if all(state == 'tested' for state in states):
self.state = 'tested'
elif all(state in ['tested', 'validated'] for state in states):
self.state = 'validated'
elif all(state in ['tested', 'validated', 'trained']
for state in states):
self.state = 'trained'
elif any(state == 'training_error' for state in states):
self.state = 'training_error'
i = states.index('training_error')
self.error_msg = all_cv_folds[i].error_msg
elif any(state == 'validating_error' for state in states):
self.state = 'validating_error'
i = states.index('validating_error')
self.error_msg = all_cv_folds[i].error_msg
elif any(state == 'testing_error' for state in states):
self.state = 'testing_error'
i = states.index('testing_error')
self.error_msg = all_cv_folds[i].error_msg
if 'error' not in self.state:
self.error_msg = ''
class SubmissionScore(Model):
"""SubmissionScore table.
Attributes
----------
id : int
The ID of the row table.
submission_id : int
The ID of the associated submission.
submission : :class:`ramp_database.model.Submission`
The submission instance associated.
event_score_type_id : int
The ID of the event/score type associated.
event_score_type : :class:`ramp_database.model.EventScoreType`
The event/score type instance associated.
valid_score_cv_bag : float
The validation bagged scores.
test_score_cv_bag : float
The testing bagged scores.
valid_score_cv_bags : ndarray
The partial validation scores for all CV bags.
test_score_cv_bags : ndarray
The partial testing scores for all CV bags.
on_cv_folds : list of :class:`ramp_database.model.SubmissionScoreOnCVFold`
A back-reference the CV fold associated with the score.
"""
__tablename__ = 'submission_scores'
id = Column(Integer, primary_key=True)
submission_id = Column(Integer, ForeignKey('submissions.id'),
nullable=False)
submission = relationship('Submission',
backref=backref('scores',
cascade='all, delete-orphan'))
event_score_type_id = Column(Integer, ForeignKey('event_score_types.id'),
nullable=False)
event_score_type = relationship('EventScoreType',
backref=backref('submissions'))
# These are cv-bagged scores. Individual scores are found in
# SubmissionToTrain
valid_score_cv_bag = Column(Float) # cv
test_score_cv_bag = Column(Float) # holdout
# we store the partial scores so to see the saturation and
# overfitting as the number of cv folds grow
valid_score_cv_bags = Column(NumpyType)
test_score_cv_bags = Column(NumpyType)
@property
def score_name(self):
"""str: The name of the score."""
return self.event_score_type.name
@property
def score_function(self):
"""callable: The function used to score."""
return self.event_score_type.score_function
# default display precision in n_digits
@property
def precision(self):
"""int: The numerical precision of the associated score."""
return self.event_score_type.precision
# TODO: we should have a SubmissionWorkflowElementType table, describing the
# type of files we are expecting for a given RAMP. Fast unit test should be
# set up there, and each file should be unit tested right after submission.
# Cosmetic: Perhaps mark which file the leaderboard link should point to (right
# now it is set to the first file in the list which is arbitrary).
# We will also have to handle auxiliary files (like CSVs or other classes).
# User interface could have a single submission form with a menu containing
# the file names for a given ramp + an "other" field when users will have to
# name their files
class SubmissionFile(Model):
"""SubmissionFile table.
Attributes
----------
id : int
The ID of the table row.
submission_id : int
The ID of the associated submission.
submission : :class:`ramp_database.model.Submission`
The submission instance associated.
workflow_element_id : int
The ID of the associated workflow element.
workflow_element : :class:`ramp_database.model.WorkflowElement`
The workflow element associated with the submission.
submission_file_type_extension_id : int
The ID of the associated submission file type extension.
submission_file_type_extension : \
:class:`ramp_database.model.SubmissionFileTypeExtension`
The associated submission file type extension instance.
"""
__tablename__ = 'submission_files'
id = Column(Integer, primary_key=True)
submission_id = Column(Integer, ForeignKey('submissions.id'),
nullable=False)
submission = relationship('Submission',
backref=backref('files',
cascade='all, delete-orphan'))
workflow_element_id = Column(Integer, ForeignKey('workflow_elements.id'),
nullable=False)
workflow_element = relationship('WorkflowElement',
backref=backref('submission_files'))
submission_file_type_extension_id = Column(
Integer, ForeignKey('submission_file_type_extensions.id'),
nullable=False
)
submission_file_type_extension = relationship(
'SubmissionFileTypeExtension', backref=backref('submission_files')
)
def __repr__(self):
return ('SubmissionFile(name={}, type={}, extension={}, path={})'
.format(self.name, self.type, self.extension, self.path))
@property
def is_editable(self):
"""bool: Whether the submission file is from an editable format on the
frontend."""
return self.workflow_element.is_editable
@property
def extension(self):
"""str: The extension of the file format."""
return self.submission_file_type_extension.extension.name
@property
def type(self):
"""str: The workflow type associated with the file."""
return self.workflow_element.type
@property
def name(self):
"""str: The name of the workflow element."""
return self.workflow_element.name
@property
def f_name(self):
"""str: The corresponding file name."""
return self.type + '.' + self.extension
@property
def link(self):
"""str: A unique link to the file. The hash is generated by the
Submission instance."""
return '/' + os.path.join(self.submission.hash_, self.f_name)
@property
def path(self):
"""str: The path to the file in the deployment directory."""
return os.path.join(self.submission.path, self.f_name)
@property
def name_with_link(self):
"""str: The HTML hyperlink of the name of the submission file."""
return '<a href="' + self.link + '">' + self.name + '</a>'
def get_code(self):
"""str: Get the content of the file."""
with open(self.path) as f:
code = f.read()
return code
def set_code(self, code):
"""Set the content of the submission file.
Parameters
----------
code : str
The code to write into the submission file.
"""
with open(self.path, 'w') as f:
f.write(code)
class SubmissionFileTypeExtension(Model):
"""SubmissionFileTypeExtension table.
This a many-to-many relationship between the SubmissionFileType and
Extension.
Attributes
----------
id : int
The ID of the table row.
type_id : int
The ID of the submission file type.
type : :class:`ramp_database.model.SubmissionFileType`
The submission file type instance.
extension_id : int
The ID of the extension.
extension : :class:`ramp_database.model.Extension`
The file extension instance.
submission_files : list of \
:class:`ramp_database.model.SubmissionFileTypeExtension`
A back-reference to the submission files related to the type extension.
"""
__tablename__ = 'submission_file_type_extensions'
id = Column(Integer, primary_key=True)
type_id = Column(Integer, ForeignKey('submission_file_types.id'),
nullable=False)
type = relationship('SubmissionFileType', backref=backref('extensions'))
extension_id = Column(Integer, ForeignKey('extensions.id'), nullable=False)
extension = relationship('Extension',
backref=backref('submission_file_types'))
UniqueConstraint(type_id, extension_id, name='we_constraint')
@property
def file_type(self):
"""str: The name of the file type."""
return self.type.name
@property
def extension_name(self):
"""str: The name of the file extension."""
return self.extension.name
class SubmissionFileType(Model):
"""SubmissionFileType table.
Attributes
----------
id : int
The ID of the table row.
name : str
The name of the submission file type.
is_editable : bool
Whether or not this type of file is editable on the frontend.
max_size : int
The maximum size of this file type.
workflow_element_types : list of \
:class:`ramp_database.model.WorkflowElementType`
A back-reference to the workflow element type for this submission file
type.
"""
__tablename__ = 'submission_file_types'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False, unique=True)
is_editable = Column(Boolean, default=True)
max_size = Column(Integer, default=None)
class Extension(Model):
"""Extension table.
Attributes
----------
id : int
The ID of the table row.
name : str
The name of the extension.
submission_file_types : list of \
:class:`ramp_database.model.SubmissionFileTypeExtension`
A back-reference to the submission file types for this extension.
"""
__tablename__ = 'extensions'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False, unique=True)
class SubmissionScoreOnCVFold(Model):
"""SubmissionScoreOnCVFold table.
Attributes
----------
id : int
The ID of the table row.
submission_on_cv_fold_id : int
The ID of the CV fold.
submission_on_cv_fold : :class:`ramp_database.model.SubmissionOnCVFold`
The submission on CV fold instance.
submission_score_id : int
The ID of the submission score.
submission_score : :class:`ramp_database.model.SubmissionScore`
The submission score instance.
train_score : float
The training score on the fold.
valid_score : float
The validation score on the fold.
test_score : float
The testing score on the fold.
"""
__tablename__ = 'submission_score_on_cv_folds'
id = Column(Integer, primary_key=True)
submission_on_cv_fold_id = Column(
Integer, ForeignKey('submission_on_cv_folds.id'), nullable=False
)
submission_on_cv_fold = relationship(
'SubmissionOnCVFold',
backref=backref('scores', cascade='all, delete-orphan')
)
submission_score_id = Column(Integer, ForeignKey('submission_scores.id'),
nullable=False)
submission_score = relationship(
'SubmissionScore',
backref=backref('on_cv_folds', cascade='all, delete-orphan')
)
train_score = Column(Float)
valid_score = Column(Float)
test_score = Column(Float)
UniqueConstraint(
submission_on_cv_fold_id, submission_score_id, name='ss_constraint')
@property
def name(self):
"""str: The name of the score."""
return self.event_score_type.name
@property
def event_score_type(self):
""":class:`EventScoreType`: The event/score type instance."""
return self.submission_score.event_score_type
@property
def score_function(self):
"""callable: the scoring function."""
return self.event_score_type.score_function
# TODO: rename submission to workflow and submitted file to workflow_element
# TODO: SubmissionOnCVFold should actually be a workflow element. Saving
# train_pred means that we can input it to the next workflow element
# TODO: implement check
class SubmissionOnCVFold(Model):
"""SubmissionOnCVFold.
Parameters
----------
submission : :class:`ramp_database.model.Submission`
The submission used.
cv_fold : :class:`ramp_database.model.CVFold`
The fold to associate with the submission.
Attributes
----------
id : int
The ID of the table row.
submission_id : int
The ID of the submission.
submission : :class:`ramp_database.model.Submission`
The submission instance.
cv_fold_id : int
The ID of the CV fold.
cv_fold : :class:`ramp_database.model.CVFold`
The CV fold instance.
contributivity : float
The contributivity of the submission.
best : bool
Whether or not the submission is the best.
full_train_y_pred : ndarray
Predictions on the full training set.
test_y_pred : ndarray
Predictions on the testing set.
train_time : float
Computation time for the training set.
valid_time : float
Computation time for the validation set.
test_time : float
Computation time for the testing set.
state : str
State of of the submission on this fold.
error_msg : str
Error message in case of failing submission.
scores : list of :class:`ramp_database.model.SubmissionScoreOnCVFold`
A back-reference on the scores for this fold.
Notes
-----
SubmissionOnCVFold is an instantiation of Submission, to be trained on a
data file and a cv fold. We don't actually store the trained model in the
db (lack of disk and pickling issues), so trained submission is not a
database column. On the other hand, we will store train, valid, and test
predictions. In a sense substituting CPU time for storage.
"""
__tablename__ = 'submission_on_cv_folds'
id = Column(Integer, primary_key=True)
submission_id = Column(Integer, ForeignKey('submissions.id'),
nullable=False)
submission = relationship('Submission',
backref=backref('on_cv_folds',
cascade="all, delete-orphan"))
cv_fold_id = Column(Integer, ForeignKey('cv_folds.id'), nullable=False)
cv_fold = relationship('CVFold',
backref=backref('submissions',
cascade="all, delete-orphan"))
# filled by cv_fold.get_combined_predictions
contributivity = Column(Float, default=0.0)
best = Column(Boolean, default=False)
# prediction on the full training set, including train and valid points
# properties train_predictions and valid_predictions will make the slicing
full_train_y_pred = Column(NumpyType, default=None)
test_y_pred = Column(NumpyType, default=None)
train_time = Column(Float, default=0.0)
valid_time = Column(Float, default=0.0)
test_time = Column(Float, default=0.0)
state = Column(submission_states, default='new')
error_msg = Column(String, default='')
UniqueConstraint(submission_id, cv_fold_id, name='sc_constraint')
def __init__(self, submission, cv_fold):
self.submission = submission
self.cv_fold = cv_fold
self.session = inspect(submission).session
for score in submission.scores:
submission_score_on_cv_fold = SubmissionScoreOnCVFold(
submission_on_cv_fold=self, submission_score=score)
self.session.add(submission_score_on_cv_fold)
self.reset()
def __repr__(self):
return ('state = {}, c = {}, best = {}'
.format(self.state, self.contributivity, self.best))
@hybrid_property
def is_public_leaderboard(self):
"""bool: Whether or not the submission is scored and ready to be on
the public leaderboard."""
return self.state == 'scored'
@hybrid_property
def is_trained(self):
"""bool: Whether or not the submission was trained."""
return self.state in ('trained', 'validated', 'tested',
'validating_error', 'testing_error', 'scored')
@hybrid_property
def is_validated(self):
"""bool: Whether or not the submission was validated."""
return self.state in ('validated', 'tested', 'testing_error', 'scored')
@hybrid_property
def is_tested(self):
"""bool: Whether or not the submission was tested."""
return self.state in ('tested', 'scored')
@hybrid_property
def is_error(self):
"""bool: Whether or not the submission failed at one of the stage."""
return 'error' in self.state
# The following four functions are converting the stored numpy arrays
# <>_y_pred into Prediction instances
@property
def full_train_predictions(self):
""":class:`rampwf.prediction_types.Predictions`: Training
predictions."""
return self.submission.Predictions(y_pred=self.full_train_y_pred)
@property
def train_predictions(self):
""":class:`rampwf.prediction_types.Predictions`: Training
predictions."""
return self.submission.Predictions(
y_pred=self.full_train_y_pred[self.cv_fold.train_is])
@property
def valid_predictions(self):
""":class:`rampwf.prediction_types.Predictions`: Validation