Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d225d31
LINT: Enable ruff imports for rllib/algorithms
czgdp1807 Sep 19, 2025
32f0857
LINT: Enable ruff imports for rllib/core
czgdp1807 Sep 19, 2025
0215f36
LINT: Enable ruff imports for offline, tests, callbacks and env in rllib
czgdp1807 Sep 19, 2025
db68c00
Merge branch 'master' into ruff_rllib_algorithms
kamil-kaczmarek Sep 23, 2025
38f0199
Merge branch 'master' into ruff_rllib_algorithms
kamil-kaczmarek Sep 23, 2025
ff67d7e
Merge branch 'master' into ruff_rllib_algorithms
kamil-kaczmarek Sep 23, 2025
a076f70
Merge branch 'master' into ruff_rllib_import_3
kamil-kaczmarek Sep 24, 2025
5aa86fa
Merge branch 'master' into ruff_rllib_import_3
kamil-kaczmarek Sep 24, 2025
f09bdee
Merge branch 'master' into ruff_rllib_algorithms
kamil-kaczmarek Sep 24, 2025
c54f45a
Merge branch 'master' into ruff_rllib_import_3
kamil-kaczmarek Sep 24, 2025
17be443
Merge branch 'master' into ruff_rllib_algorithms
kamil-kaczmarek Sep 25, 2025
7d7b6f5
Merge branch 'master' into ruff_rllib_import_3
kamil-kaczmarek Sep 25, 2025
184c7d2
Merge main into ruff_rllib_import_5
czgdp1807 Oct 1, 2025
a6af7a4
Apply ruff_imports
czgdp1807 Oct 1, 2025
a83df9b
Merge main into ruff_rllib_import_5
czgdp1807 Oct 2, 2025
27d681b
Merge branch 'master' into ruff_rllib_import_3
kamil-kaczmarek Oct 3, 2025
1e17146
Merge branch 'master' into ruff_rllib_algorithms
kamil-kaczmarek Oct 4, 2025
3913514
Merge branch 'master' into ruff_rllib_import_5
pseudo-rnd-thoughts Oct 6, 2025
bf4e896
Merge branch 'master' into ruff_rllib_import_5
aslonnie Oct 7, 2025
e22fec6
Merge branch 'master' into ruff_rllib_import_3
kamil-kaczmarek Oct 18, 2025
3bc3e45
Merge branch 'master' into ruff_rllib_algorithms
kamil-kaczmarek Oct 18, 2025
5484e76
Merge branch 'master' into ruff_rllib_algorithms
kamil-kaczmarek Oct 21, 2025
e474cb4
Merge branch 'master' into ruff_rllib_import_3
Oct 31, 2025
9e3e32f
run pre-commit
Oct 31, 2025
3e0388b
Merge branch 'ruff_rllib_import_3' into ruff_rllib_import_5
Oct 31, 2025
541bc3b
run pre-commit
Oct 31, 2025
6a77971
Merge branch 'ruff_rllib_import_5' into ruff_rllib_algorithms
Oct 31, 2025
f30f2ec
run pre-commit
Oct 31, 2025
9f4be9c
Merge branch 'master' into ruff_rllib_algorithms
Oct 31, 2025
02f6398
remove all redundant ruff isort configures and run pre-commit
Oct 31, 2025
f69992a
Fix isort for replay-buffer
Oct 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,22 @@ afterray = ["psutil", "setproctitle"]
"python/ray/__init__.py" = ["I"]
"python/ray/dag/__init__.py" = ["I"]
"python/ray/air/__init__.py" = ["I"]
"rllib/*" = ["I"]
"rllib/__init__.py" = ["I"]
"rllib/benchmarks/*" = ["I"]
"rllib/connectors/*" = ["I"]
"rllib/evaluation/*" = ["I"]
"rllib/models/*" = ["I"]
"rllib/utils/*" = ["I"]
# "rllib/algorithms/*" = ["I"]
"rllib/core/*" = ["I"]
"rllib/examples/*" = ["I"]
"rllib/offline/*" = ["I"]
"rllib/tests/*" = ["I"]
"rllib/callbacks/*" = ["I"]
"rllib/env/*" = ["I"]
"rllib/execution/*" = ["I"]
"rllib/policy/*" = ["I"]
"rllib/tuned_examples/*" = ["I"]
"release/*" = ["I"]

# TODO(matthewdeng): Remove this line
Expand Down
3 changes: 1 addition & 2 deletions rllib/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
from ray.rllib.algorithms.dqn.dqn import DQN, DQNConfig
from ray.rllib.algorithms.impala.impala import (
IMPALA,
IMPALAConfig,
Impala,
IMPALAConfig,
ImpalaConfig,
)
from ray.rllib.algorithms.marwil.marwil import MARWIL, MARWILConfig
from ray.rllib.algorithms.ppo.ppo import PPO, PPOConfig
from ray.rllib.algorithms.sac.sac import SAC, SACConfig


__all__ = [
"Algorithm",
"AlgorithmConfig",
Expand Down
55 changes: 27 additions & 28 deletions rllib/algorithms/algorithm.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
from collections import defaultdict
import concurrent
import copy
from datetime import datetime
import functools
import gymnasium as gym
import importlib
import importlib.metadata
import json
import logging
import numpy as np
import os
from packaging import version
import pathlib
import pyarrow.fs
import re
import tempfile
import time
from collections import defaultdict
from datetime import datetime
from typing import (
TYPE_CHECKING,
Any,
Callable,
Collection,
Expand All @@ -27,26 +24,32 @@
Set,
Tuple,
Type,
TYPE_CHECKING,
Union,
)

import gymnasium as gym
import numpy as np
import pyarrow.fs
import tree # pip install dm_tree
from packaging import version

import ray
from ray.tune.result import TRAINING_ITERATION
import ray.cloudpickle as pickle
from ray._common.deprecation import (
DEPRECATED_VALUE,
Deprecated,
deprecation_warning,
)
from ray._common.usage.usage_lib import TagKey, record_extra_usage_tag
from ray.actor import ActorHandle
from ray.tune import Checkpoint
import ray.cloudpickle as pickle
from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
from ray.rllib.algorithms.registry import ALGORITHMS_CLASS_TO_NAME as ALL_ALGORITHMS
from ray.rllib.algorithms.utils import (
AggregatorActor,
_get_env_runner_bundles,
_get_offline_eval_runner_bundles,
_get_learner_bundles,
_get_main_process_bundle,
_get_offline_eval_runner_bundles,
)
from ray.rllib.callbacks.utils import make_callback
from ray.rllib.connectors.agent.obs_preproc import ObsPreprocessorConnector
Expand All @@ -63,11 +66,11 @@
DEFAULT_MODULE_ID,
)
from ray.rllib.core.columns import Columns
from ray.rllib.core.rl_module import validate_module_id
from ray.rllib.core.rl_module.multi_rl_module import (
MultiRLModule,
MultiRLModuleSpec,
)
from ray.rllib.core.rl_module import validate_module_id
from ray.rllib.core.rl_module.rl_module import RLModule, RLModuleSpec
from ray.rllib.env import INPUT_ENV_SPACES
from ray.rllib.env.env_context import EnvContext
Expand All @@ -81,39 +84,34 @@
from ray.rllib.execution.rollout_ops import synchronous_parallel_sample
from ray.rllib.offline import get_dataset_and_shards
from ray.rllib.offline.estimators import (
OffPolicyEstimator,
ImportanceSampling,
WeightedImportanceSampling,
DirectMethod,
DoublyRobust,
ImportanceSampling,
OffPolicyEstimator,
WeightedImportanceSampling,
)
from ray.rllib.offline.offline_evaluator import OfflineEvaluator
from ray.rllib.policy.policy import Policy, PolicySpec
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, SampleBatch
from ray.rllib.utils import deep_update, FilterManager, force_list
from ray.rllib.utils import FilterManager, deep_update, force_list
from ray.rllib.utils.actor_manager import FaultTolerantActorManager, RemoteCallResults
from ray.rllib.utils.annotations import (
DeveloperAPI,
ExperimentalAPI,
OldAPIStack,
override,
OverrideToImplementCustomLogic,
OverrideToImplementCustomLogic_CallToSuperRecommended,
PublicAPI,
override,
)
from ray.rllib.utils.checkpoints import (
Checkpointable,
CHECKPOINT_VERSION,
CHECKPOINT_VERSION_LEARNER_AND_ENV_RUNNER,
Checkpointable,
get_checkpoint_info,
try_import_msgpack,
)
from ray.rllib.utils.debug import update_global_seed_if_necessary
from ray._common.deprecation import (
DEPRECATED_VALUE,
Deprecated,
deprecation_warning,
)
from ray.rllib.utils.error import ERR_MSG_INVALID_ENV_DESCRIPTOR, EnvError
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.from_config import from_config
Expand All @@ -136,9 +134,9 @@
NUM_AGENT_STEPS_TRAINED,
NUM_AGENT_STEPS_TRAINED_LIFETIME,
NUM_ENV_STEPS_SAMPLED,
NUM_ENV_STEPS_SAMPLED_FOR_EVALUATION_THIS_ITER,
NUM_ENV_STEPS_SAMPLED_LIFETIME,
NUM_ENV_STEPS_SAMPLED_THIS_ITER,
NUM_ENV_STEPS_SAMPLED_FOR_EVALUATION_THIS_ITER,
NUM_ENV_STEPS_TRAINED,
NUM_ENV_STEPS_TRAINED_LIFETIME,
NUM_EPISODES,
Expand All @@ -149,19 +147,19 @@
RESTORE_ENV_RUNNERS_TIMER,
RESTORE_EVAL_ENV_RUNNERS_TIMER,
RESTORE_OFFLINE_EVAL_RUNNERS_TIMER,
STEPS_TRAINED_THIS_ITER_COUNTER,
SYNCH_ENV_CONNECTOR_STATES_TIMER,
SYNCH_EVAL_ENV_CONNECTOR_STATES_TIMER,
SYNCH_WORKER_WEIGHTS_TIMER,
TIMERS,
TRAINING_ITERATION_TIMER,
TRAINING_STEP_TIMER,
STEPS_TRAINED_THIS_ITER_COUNTER,
)
from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
from ray.rllib.utils.replay_buffers import MultiAgentReplayBuffer, ReplayBuffer
from ray.rllib.utils.runners.runner_group import RunnerGroup
from ray.rllib.utils.serialization import deserialize_type, NOT_SERIALIZABLE
from ray.rllib.utils.serialization import NOT_SERIALIZABLE, deserialize_type
from ray.rllib.utils.spaces import space_utils
from ray.rllib.utils.typing import (
AgentConnectorDataType,
Expand All @@ -184,15 +182,16 @@
TensorType,
)
from ray.train.constants import DEFAULT_STORAGE_PATH
from ray.tune import Checkpoint
from ray.tune.execution.placement_groups import PlacementGroupFactory
from ray.tune.experiment.trial import ExportFormat
from ray.tune.logger import Logger, UnifiedLogger
from ray.tune.registry import ENV_CREATOR, _global_registry
from ray.tune.registry import ENV_CREATOR, _global_registry, get_trainable_cls
from ray.tune.resources import Resources
from ray.tune.result import TRAINING_ITERATION
from ray.tune.trainable import Trainable
from ray.util import log_once
from ray.util.timer import _Timer
from ray.tune.registry import get_trainable_cls

if TYPE_CHECKING:
from ray.rllib.core.learner.learner_group import LearnerGroup
Expand Down
17 changes: 8 additions & 9 deletions rllib/algorithms/algorithm_config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import copy
import dataclasses
from enum import Enum
import logging
import math
import sys
from enum import Enum
from typing import (
TYPE_CHECKING,
Any,
Callable,
Collection,
Expand All @@ -13,7 +14,6 @@
Optional,
Tuple,
Type,
TYPE_CHECKING,
Union,
)

Expand All @@ -22,6 +22,11 @@
from packaging import version

import ray
from ray._common.deprecation import (
DEPRECATED_VALUE,
Deprecated,
deprecation_warning,
)
from ray.rllib.callbacks.callbacks import RLlibCallback
from ray.rllib.connectors.connector_v2 import ConnectorV2
from ray.rllib.core import DEFAULT_MODULE_ID
Expand All @@ -33,7 +38,7 @@
from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.env import INPUT_ENV_SPACES, INPUT_ENV_SINGLE_SPACES
from ray.rllib.env import INPUT_ENV_SINGLE_SPACES, INPUT_ENV_SPACES
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.env.wrappers.atari_wrappers import is_atari
from ray.rllib.evaluation.collectors.sample_collector import SampleCollector
Expand All @@ -48,11 +53,6 @@
OldAPIStack,
OverrideToImplementCustomLogic_CallToSuperRecommended,
)
from ray._common.deprecation import (
DEPRECATED_VALUE,
Deprecated,
deprecation_warning,
)
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.from_config import NotProvided, from_config
from ray.rllib.utils.schedules.scheduler import Scheduler
Expand Down Expand Up @@ -83,7 +83,6 @@
from ray.util import log_once
from ray.util.placement_group import PlacementGroup


if TYPE_CHECKING:
from ray.rllib.algorithms.algorithm import Algorithm
from ray.rllib.core.learner import Learner
Expand Down
6 changes: 3 additions & 3 deletions rllib/algorithms/appo/appo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,22 @@
https://arxiv.org/pdf/1912.00167
"""

from typing import Optional, Type
import logging
from typing import Optional, Type

from ray._common.deprecation import DEPRECATED_VALUE, deprecation_warning
from ray.rllib.algorithms.algorithm_config import AlgorithmConfig, NotProvided
from ray.rllib.algorithms.impala.impala import IMPALA, IMPALAConfig
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.policy.policy import Policy
from ray.rllib.utils.annotations import override
from ray._common.deprecation import DEPRECATED_VALUE, deprecation_warning
from ray.rllib.utils.metrics import (
LAST_TARGET_UPDATE_TS,
LEARNER_STATS_KEY,
NUM_AGENT_STEPS_SAMPLED,
NUM_ENV_STEPS_SAMPLED,
NUM_TARGET_UPDATES,
)
from ray.rllib.utils.metrics import LEARNER_STATS_KEY

logger = logging.getLogger(__name__)

Expand Down
22 changes: 11 additions & 11 deletions rllib/algorithms/appo/appo_tf_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,37 @@
Keep in sync with changes to VTraceTFPolicy.
"""

import numpy as np
import logging
import gymnasium as gym
from typing import Dict, List, Optional, Type, Union

import gymnasium as gym
import numpy as np

from ray.rllib.algorithms.appo.utils import make_appo_models
from ray.rllib.algorithms.impala import vtrace_tf as vtrace
from ray.rllib.algorithms.impala.impala_tf_policy import (
_make_time_major,
VTraceClipGradients,
VTraceOptimizer,
_make_time_major,
)
from ray.rllib.evaluation.postprocessing import (
Postprocessing,
compute_bootstrap_value,
compute_gae_for_sample_batch,
Postprocessing,
)
from ray.rllib.models.tf.tf_action_dist import Categorical
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.models.tf.tf_action_dist import Categorical, TFActionDistribution
from ray.rllib.policy.dynamic_tf_policy_v2 import DynamicTFPolicyV2
from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.policy.tf_mixins import (
EntropyCoeffSchedule,
LearningRateSchedule,
KLCoeffMixin,
ValueNetworkMixin,
GradStatsMixin,
KLCoeffMixin,
LearningRateSchedule,
TargetNetworkMixin,
ValueNetworkMixin,
)
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.models.tf.tf_action_dist import TFActionDistribution
from ray.rllib.utils.annotations import (
override,
)
Expand Down
17 changes: 9 additions & 8 deletions rllib/algorithms/appo/appo_torch_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,38 @@
Keep in sync with changes to VTraceTFPolicy.
"""

import gymnasium as gym
import numpy as np
import logging
from typing import Any, Dict, List, Optional, Type, Union

import gymnasium as gym
import numpy as np

import ray
from ray.rllib.algorithms.appo.utils import make_appo_models
import ray.rllib.algorithms.impala.vtrace_torch as vtrace
from ray.rllib.algorithms.appo.utils import make_appo_models
from ray.rllib.algorithms.impala.impala_torch_policy import (
make_time_major,
VTraceOptimizer,
make_time_major,
)
from ray.rllib.evaluation.postprocessing import (
Postprocessing,
compute_bootstrap_value,
compute_gae_for_sample_batch,
Postprocessing,
)
from ray.rllib.models.action_dist import ActionDistribution
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.models.torch.torch_action_dist import (
TorchDistributionWrapper,
TorchCategorical,
TorchDistributionWrapper,
)
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.policy.torch_mixins import (
EntropyCoeffSchedule,
LearningRateSchedule,
KLCoeffMixin,
ValueNetworkMixin,
LearningRateSchedule,
TargetNetworkMixin,
ValueNetworkMixin,
)
from ray.rllib.policy.torch_policy_v2 import TorchPolicyV2
from ray.rllib.utils.annotations import override
Expand Down
Loading