-
Notifications
You must be signed in to change notification settings - Fork 3
/
train.php
73 lines (51 loc) · 1.98 KB
/
train.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
<?php
include __DIR__ . '/vendor/autoload.php';
use Rubix\ML\Loggers\Screen;
use Rubix\ML\Extractors\CSV;
use Rubix\ML\Extractors\ColumnPicker;
use Rubix\ML\Datasets\Labeled;
use Rubix\ML\Classifiers\NaiveBayes;
use Rubix\ML\Pipeline;
use Rubix\ML\Transformers\IntervalDiscretizer;
use Rubix\ML\Transformers\NumericStringConverter;
use Rubix\ML\CrossValidation\Reports\AggregateReport;
use Rubix\ML\CrossValidation\Reports\ConfusionMatrix;
use Rubix\ML\CrossValidation\Reports\MulticlassBreakdown;
use Rubix\ML\PersistentModel;
use Rubix\ML\Persisters\Filesystem;
ini_set('memory_limit', '-1');
$logger = new Screen();
$logger->info('Loading data into memory');
$extractor = new ColumnPicker(new CSV('dataset.csv', true), [
'Gender', 'SeniorCitizen', 'Partner', 'Dependents', 'MonthsInService', 'Phone',
'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
'TechSupport', 'TV', 'Movies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
'MonthlyCharges', 'TotalCharges', 'Region', 'Churn',
]);
$dataset = Labeled::fromIterator($extractor);
[$training, $testing] = $dataset->randomize()->stratifiedSplit(0.8);
$estimator = new NaiveBayes([
'Yes' => 0.1,
'No' => 0.9,
]);
$estimator = new Pipeline([
new NumericStringConverter(),
new IntervalDiscretizer(3, true),
], $estimator);
$logger->info('Training the model');
$estimator->train($training);
$logger->info('Making predictions');
$predictions = $estimator->predict($testing);
$reportGenerator = new AggregateReport([
new MulticlassBreakdown(),
new ConfusionMatrix(),
]);
$report = $reportGenerator->generate($predictions, $testing->labels());
echo $report;
$report->toJSON()->saveTo(new Filesystem('report.json'));
$logger->info('Report saved as report.json');
if (strtolower(readline('Save this model? (y|[n]): ')) === 'y') {
$estimator = new PersistentModel($estimator, new Filesystem('model.rbx'));
$estimator->save();
$logger->info('Model saved as model.rbx');
}