[Game] Rename returns to rewards (#1161)

sotetsuk · web-flow · commit f26e9e33c63a · 2024-01-09T16:51:44.000+09:00
diff --git a/pgx/_src/games/__init__.py b/pgx/_src/games/__init__.py
@@ -8,7 +8,7 @@
 
 
 @runtime_checkable
-class TwoPlayerPerfectInfoGame(Protocol[T]):
+class GameProtocol(Protocol[T]):
     def init(self) -> T:
         ...
 
@@ -24,5 +24,5 @@ def legal_action_mask(self, state: T) -> Array:
     def is_terminal(self, state: T) -> Array:
         ...
 
-    def returns(self, state: T) -> Array:
+    def rewards(self, state: T) -> Array:
         ...
diff --git a/pgx/_src/games/connect_four.py b/pgx/_src/games/connect_four.py
@@ -63,7 +63,7 @@ def is_terminal(self, state: GameState) -> Array:
         board2d = state.board.reshape(6, 7)
         return (state.winner >= 0) | jnp.all((board2d >= 0).sum(axis=0) == 6)
 
-    def returns(self, state: GameState) -> Array:
+    def rewards(self, state: GameState) -> Array:
         return jax.lax.select(
             state.winner >= 0,
             jnp.float32([-1, -1]).at[state.winner].set(1),
diff --git a/pgx/_src/games/go.py b/pgx/_src/games/go.py
@@ -126,7 +126,7 @@ def is_terminal(self, state: GameState) -> Array:
         timeover = self.max_termination_steps <= state.step_count
         return two_consecutive_pass | state.is_psk | timeover
 
-    def returns(self, state: GameState) -> Array:
+    def rewards(self, state: GameState) -> Array:
         score = _count_point(state, self.size)
         rewards = jax.lax.select(
             score[0] - self.komi > score[1],
diff --git a/pgx/_src/games/tic_tac_toe.py b/pgx/_src/games/tic_tac_toe.py
@@ -60,7 +60,7 @@ def legal_action_mask(self, state: GameState) -> Array:
     def is_terminal(self, state: GameState) -> Array:
         return (state.winner >= 0) | jnp.all(state.board != -1)
 
-    def returns(self, state: GameState) -> Array:
+    def rewards(self, state: GameState) -> Array:
         return jax.lax.select(
             state.winner >= 0,
             jnp.float32([-1, -1]).at[state.winner].set(1),
diff --git a/pgx/connect_four.py b/pgx/connect_four.py
@@ -57,7 +57,7 @@ def _step(self, state: core.State, action: Array, key) -> State:
         assert isinstance(state, State)
         legal_action_mask = self._game.legal_action_mask(state._x)
         terminated = self._game.is_terminal(state._x)
-        rewards = self._game.returns(state._x)
+        rewards = self._game.rewards(state._x)
         should_flip = state.current_player != state._x.color
         rewards = jax.lax.select(should_flip, jnp.flip(rewards), rewards)
         rewards = jax.lax.select(terminated, rewards, jnp.zeros(2, jnp.float32))
diff --git a/pgx/go.py b/pgx/go.py
@@ -73,7 +73,7 @@ def _step(self, state: core.State, action: Array, key) -> State:
         return state.replace(  # type:ignore
             current_player=state._player_order[x.color],
             legal_action_mask=self._game.legal_action_mask(x),
-            rewards=self._game.returns(x)[state._player_order],
+            rewards=self._game.rewards(x)[state._player_order],
             terminated=self._game.is_terminal(x),
             _x=x,
         )
diff --git a/pgx/tic_tac_toe.py b/pgx/tic_tac_toe.py
@@ -56,7 +56,7 @@ def _step(self, state: core.State, action: Array, key) -> State:
         assert isinstance(state, State)
         legal_action_mask = self._game.legal_action_mask(state._x)
         terminated = self._game.is_terminal(state._x)
-        rewards = self._game.returns(state._x)
+        rewards = self._game.rewards(state._x)
         should_flip = state.current_player != state._x.color
         rewards = jax.lax.select(should_flip, jnp.flip(rewards), rewards)
         rewards = jax.lax.select(terminated, rewards, jnp.zeros(2, jnp.float32))

Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@ def _step(self, state: core.State, action: Array, key) -> State:`
`73`	`73`	`return state.replace( # type:ignore`
`74`	`74`	`current_player=state._player_order[x.color],`
`75`	`75`	`legal_action_mask=self._game.legal_action_mask(x),`
`76`		`- rewards=self._game.returns(x)[state._player_order],`
	`76`	`+ rewards=self._game.rewards(x)[state._player_order],`
`77`	`77`	`terminated=self._game.is_terminal(x),`
`78`	`78`	`_x=x,`
`79`	`79`	`)`