-
Notifications
You must be signed in to change notification settings - Fork 11
/
CarRental.kt
111 lines (102 loc) · 4.58 KB
/
CarRental.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package lab.mars.rl.problem
import lab.mars.rl.model.impl.mdp.CNSetMDP
import lab.mars.rl.model.impl.mdp.IndexedMDP
import lab.mars.rl.model.impl.mdp.IndexedPossible
import lab.mars.rl.model.null_possible
import lab.mars.rl.util.dimension.cnsetFrom
import lab.mars.rl.util.dimension.x
import lab.mars.rl.util.math.poisson
import org.apache.commons.math3.util.FastMath.*
/**
* <p>
* Created on 2017-09-06.
* </p>
*
* @author wumo
*/
object CarRental {
const val max_car = 20
private const val max_move = 5
private const val cost_per_car_moved = 2.0
private const val credit_per_car_rent = 10.0
private const val max_car_per_parking_lot = 10
private const val cost_per_parking_lot = 4.0
private const val mean_for_rent_L1 = 3.0
private const val mean_for_rent_L2 = 4.0
private const val mean_for_return_L1 = 3.0
private const val mean_for_return_L2 = 2.0
private val lambda = DoubleArray(4)
private val prob = Array(4) { Array(max_car + 1) { DoubleArray(2) } }
private const val idx_prob_rent_L1 = 0
private const val idx_prob_rent_L2 = 1
private const val idx_prob_return_L1 = 2
private const val idx_prob_return_L2 = 3
private const val num_idx_prob = 4
private const val idx_normal = 0
private const val idx_cumulative = 1
init {
lambda[idx_prob_rent_L1] = mean_for_rent_L1
lambda[idx_prob_rent_L2] = mean_for_rent_L2
lambda[idx_prob_return_L1] = mean_for_return_L1
lambda[idx_prob_return_L2] = mean_for_return_L2
for (L in 0 until num_idx_prob)
for (k in 0..max_car) {
prob[L][k][idx_normal] = poisson(lambda[L], k)
prob[L][k][idx_cumulative] = if (k < 1) 1.0 else prob[L][k - 1][idx_cumulative] - prob[L][k - 1][idx_normal]
}
}
private fun max_move(num_L1: Int, num_L2: Int): Int {
val max_L1_to_L2 = num_L1 - max(0, num_L1 - max_move)//max move for Location 1
val accept_L1_to_L2 = min(max_car, num_L2 + max_move) - num_L2//max move for Location 2. more is useless
return min(max_L1_to_L2, accept_L1_to_L2)
}
fun make(exercise4_4_version: Boolean): IndexedMDP {
val mdp = CNSetMDP(gamma = 0.9, state_dim = (max_car + 1) x (max_car + 1)) { (L1, L2) ->
val max_L1_to_L2 = max_move(L1, L2)
val max_L2_to_L1 = max_move(L2, L1)
max_L1_to_L2 + max_L2_to_L1 + 1
}
for (s in mdp.states) {
val (s_1, s_2) = s
val max_L1_to_L2 = max_move(s_1, s_2)
for (action in s.actions) {
val idx = action[0]
val L1_to_L2 = max_L1_to_L2 - idx
val nL1 = s_1 - L1_to_L2
val nL2 = s_2 + L1_to_L2
val possibles = cnsetFrom((max_car + 1) x (max_car + 1) x { min(it[0], nL1) + min(it[1], nL2) + 1 }) { null_possible }
val cost = if (exercise4_4_version) {
val move_cost = (if (L1_to_L2 >= 1) L1_to_L2 - 1 else abs(L1_to_L2)) * cost_per_car_moved
val parking_cost = (ceil(nL1.toDouble() / max_car_per_parking_lot) - 1 + ceil(nL2.toDouble() / max_car_per_parking_lot) - 1) * cost_per_parking_lot
// double parking_cost=0;
move_cost + parking_cost
} else
abs(L1_to_L2) * cost_per_car_moved
for (rent_L1 in 0..nL1)
for (rent_L2 in 0..nL2) {
val _prob = prob[idx_prob_rent_L1][rent_L1][if (rent_L1 < nL1) idx_normal else idx_cumulative] * prob[idx_prob_rent_L2][rent_L2][if (rent_L2 < nL2) idx_normal else idx_cumulative]
val total_rent = rent_L1 + rent_L2
val reward = total_rent * credit_per_car_rent - cost
val max_return_L1 = max_car - (nL1 - rent_L1)
val max_return_L2 = max_car - (nL2 - rent_L2)
for (return_L1 in 0..max_return_L1)
for (return_L2 in 0..max_return_L2) {
var _prob2 = _prob
_prob2 *= prob[idx_prob_return_L1][return_L1][if (return_L1 < max_return_L1) idx_normal else idx_cumulative] * prob[idx_prob_return_L2][return_L2][if (return_L2 < max_return_L2) idx_normal else idx_cumulative]
val new_L1 = nL1 - rent_L1 + return_L1
val new_L2 = nL2 - rent_L2 + return_L2
val min_rent = max(0, nL1 - new_L1) + max(0, nL2 - new_L2)
var possible = possibles[new_L1, new_L2, total_rent - min_rent]
if (possible === null_possible) {
possible = IndexedPossible(mdp.states[new_L1, new_L2], reward, _prob2)
possibles[new_L1, new_L2, total_rent - min_rent] = possible
} else
possible.probability += _prob2
}
}
action.possibles = possibles
}
}
return mdp
}
}