Skip to content

Commit de2790e

Browse files
author
Hernán Morales Durand
committed
Add equality to partitioned data set
Add split train test test method
1 parent 22bf563 commit de2790e

File tree

3 files changed

+96
-5
lines changed

3 files changed

+96
-5
lines changed

src/AI-DataPartitioners-Tests/AIRandomPartitionerTest.class.st

+44-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,24 @@
11
Class {
22
#name : 'AIRandomPartitionerTest',
33
#superclass : 'TestCase',
4+
#instVars : [
5+
'partitioner',
6+
'df'
7+
],
48
#category : 'AI-DataPartitioners-Tests',
59
#package : 'AI-DataPartitioners-Tests'
610
}
711

12+
{ #category : 'running' }
13+
AIRandomPartitionerTest >> setUp [
14+
15+
super setUp.
16+
df := DataFrame withRows: #( #( 'Barcelona' 1.609 true ) #( 'Dubai' 2.789 true ) #( 'London' 8.788 false ) ).
17+
18+
df rowNames: #( 'A' 'B' 'C' ).
19+
df columnNames: #( 'City' 'Population' 'BeenThere' )
20+
]
21+
822
{ #category : 'tests' }
923
AIRandomPartitionerTest >> testSplitDataWithProportionsCase1 [
1024

@@ -68,10 +82,36 @@ AIRandomPartitionerTest >> testSplitDataWithSizesCase2 [
6882
]
6983

7084
{ #category : 'tests' }
71-
AIRandomPartitionerTest >> testSplitTrainTestFromFeaturesUsingTargetWithProportionsShuffle [
85+
AIRandomPartitionerTest >> testSplitTrainTestFromUsingTargetColumnWithProportionsShuffle [
7286

73-
| expected |
87+
| expectedPartition partitionedDataSet |
7488

75-
expected := self.
76-
self assert: false
89+
expectedPartition := AIPartitionedDataSet new
90+
xTrain: (DataFrame
91+
withRows: #( #( 'Barcelona' 1.609 ) #( 'London' 8.788 ))
92+
rowNames: #('A' 'C')
93+
columnNames: #( 'City' 'Population' ));
94+
xTest: (DataFrame
95+
withRows: #( #( 'Dubai' 2.789 ))
96+
rowNames: #('B')
97+
columnNames: #( 'City' 'Population' ));
98+
yTrain: (DataFrame
99+
withRows: #( #( true ) #( false ))
100+
rowNames: #('A' 'C')
101+
columnNames: #( 'BeenThere' ));
102+
yTest: (DataFrame
103+
withRows: #( #( true ))
104+
rowNames: #('B')
105+
columnNames: #( 'BeenThere' ));
106+
yourself.
107+
108+
partitionedDataSet := (AIRandomPartitioner new
109+
splitTrainTestFrom: df
110+
usingTargetColumn: #('BeenThere')
111+
withProportions: #(0.7 0.3)
112+
seed: 1).
113+
114+
self
115+
assert: partitionedDataSet
116+
equals: expectedPartition
77117
]

src/AI-DataPartitioners/AIPartitionedDataSet.class.st

+28
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,34 @@ Class {
2424
#package : 'AI-DataPartitioners'
2525
}
2626

27+
{ #category : 'comparing' }
28+
AIPartitionedDataSet >> = aPartitionedDataSet [
29+
30+
self == aPartitionedDataSet
31+
ifTrue: [ ^ true ].
32+
self class = aPartitionedDataSet class
33+
ifFalse: [ ^ false ].
34+
self xTest = aPartitionedDataSet xTest
35+
ifFalse: [ ^ false ].
36+
self yTest = aPartitionedDataSet yTest
37+
ifFalse: [ ^ false ].
38+
self xTrain = aPartitionedDataSet xTrain
39+
ifFalse: [ ^ false ].
40+
self yTrain = aPartitionedDataSet yTrain
41+
ifFalse: [ ^ false ].
42+
^ true
43+
]
44+
45+
{ #category : 'comparing' }
46+
AIPartitionedDataSet >> hash [
47+
"hash is implemented because #= is implemented"
48+
49+
^ self species hash
50+
bitXor: (self xTest hash
51+
bitXor: (self xTrain hash bitXor: (self yTest hash
52+
bitXor: self yTrain hash)))
53+
]
54+
2755
{ #category : 'accessing' }
2856
AIPartitionedDataSet >> xTest [
2957

src/AI-DataPartitioners/AIRandomPartitioner.class.st

+24-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,30 @@ AIRandomPartitioner >> split: aCollection withSizes: aCollectionOfSizes [
141141
]
142142

143143
{ #category : 'api' }
144-
AIRandomPartitioner >> splitTrainTestFromFeatures: aDataFrame usingTarget: targetCollection withProportions: aTwoElementCollectionOfProportions shuffle: aBoolean [
144+
AIRandomPartitioner >> splitTrainTestFrom: aDataFrame usingTargetColumn: targetCollection withProportions: aTwoElementCollectionOfProportions seed: aNumber [
145+
"Answer a <AIPartitionedDataSet>. Split the receiver's data into two sets: train and test.
146+
147+
xTrain and yTrain sets are used for training and fitting the model.
148+
xTest and yTest sets are used for testing the model.
149+
"
150+
151+
| partition |
152+
153+
partition := self
154+
split: aDataFrame
155+
withProportions: aTwoElementCollectionOfProportions
156+
seed: aNumber.
157+
^ AIPartitionedDataSet new
158+
xTrain: (partition first columnsAllBut: targetCollection);
159+
yTrain: (partition first columns: targetCollection);
160+
161+
xTest: (partition second columnsAllBut: targetCollection);
162+
yTest: (partition second columns: targetCollection);
163+
yourself
164+
]
165+
166+
{ #category : 'api' }
167+
AIRandomPartitioner >> splitTrainTestFrom: aDataFrame usingTargetColumn: targetCollection withProportions: aTwoElementCollectionOfProportions shuffle: aBoolean [
145168
"Answer a <AIPartitionedDataSet>. Split the receiver's data into two sets: train and test.
146169
147170
xTrain and yTrain sets are used for training and fitting the model.

0 commit comments

Comments
 (0)