Skip to content

Commit 514a9e7

Browse files
authored
Clean up TraCI and Pybullet exceptions (#2128)
* Fix pybullet __del__ improper exception. * Clarify SUMO and TraCI incompatibility error. * Add traci_retries to the engine configuration. * Clarify TraCI connection issues.
1 parent e883442 commit 514a9e7

File tree

5 files changed

+66
-46
lines changed

5 files changed

+66
-46
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,12 @@ Copy and pasting the git commit messages is __NOT__ enough.
1313
### Changed
1414
- `VehicleIndex.build_agent_vehicle()` no longer has `filename` and `surface_patches` parameters.
1515
- The following modules have been renamed: `envision.types` -> `envision.etypes`, `smarts.core.utils.logging` -> `smarts.core.utils.core_logging`, `smarts.core.utils.math` -> `smarts.core.utils.core_math`, `smarts.sstudio.types` -> `smarts.sstudio.sstypes`. For compatibility reasons they can still be imported by their original module name.
16+
- Exposed `traffic:traci_retries`/`SMARTS_TRAFFIC_TRACI_RETRIES` to control how many times the `SumoTrafficSimulation` will try to restart when using default configuration.
17+
1618
### Deprecated
1719
### Fixed
20+
- `SumoTrafficSimulation` gives clearer reasons as to why it failed to connect to the TraCI server.
21+
- Suppressed an issue where `pybullet_utils.pybullet.BulletClient` would cause an error because it was catching a non `BaseException` type.
1822
- Fixed an issue where `AgentInterface.vehicle_type` would not affect agent vehicles when attempting to take over an existing vehicle.
1923
- Fixed a case where newly created agent vehicles would have a constant `"sedan"` size instead of the size of `AgentInterface.vehicle_type`.
2024
- Fixed a case where if vehicles are replaced they would not respect controller and vehicle parameters.

smarts/core/sumo_traffic_simulation.py

+24-18
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from shapely.geometry import Polygon
3131
from shapely.geometry import box as shapely_box
3232

33-
from smarts.core import gen_id
33+
from smarts.core import config, gen_id
3434
from smarts.core.actor import ActorRole, ActorState
3535
from smarts.core.colors import SceneColors
3636
from smarts.core.coordinates import Dimensions, Heading, Pose, RefLinePoint
@@ -74,6 +74,8 @@ class SumoTrafficSimulation(TrafficProvider):
7474
remove_agents_only_mode:
7575
Remove only agent vehicles used by SMARTS and not delete other SUMO
7676
vehicles when the traffic simulation calls to tear-down
77+
traci_retries:
78+
The number of times to retry acquisition of a TraCI server before erroring.
7779
"""
7880

7981
_HAS_DYNAMIC_ATTRIBUTES = True
@@ -86,8 +88,9 @@ def __init__(
8688
sumo_port: Optional[int] = None,
8789
auto_start: bool = True,
8890
allow_reload: bool = True,
89-
debug: bool = True,
91+
debug: bool = False,
9092
remove_agents_only_mode: bool = False,
93+
traci_retries: Optional[int] = None,
9194
):
9295
self._remove_agents_only_mode = remove_agents_only_mode
9396
self._log = logging.getLogger(self.__class__.__name__)
@@ -121,6 +124,7 @@ def __init__(
121124
self._last_vehicle_subscriptions = dict()
122125
self._sim = None
123126
self._handling_error = False
127+
self._traci_retries = traci_retries
124128

125129
# start with the default recovery flags...
126130
self._recovery_flags = super().recovery_flags
@@ -203,15 +207,15 @@ def _initialize_traci_conn(self, num_retries=5):
203207
base_params=self._base_sumo_load_params(),
204208
sumo_binary=sumo_binary,
205209
)
206-
# Ensure there has been enough time for sumo to start
207-
time.sleep(0.05)
210+
208211
try:
209212
while self._traci_conn.viable and not self._traci_conn.connected:
210213
try:
211214
self._traci_conn.connect(
212215
timeout=5,
213216
minimum_traci_version=20,
214217
minimum_sumo_version=(1, 10, 0),
218+
debug=self._debug,
215219
)
216220
except traci.exceptions.FatalTraCIError:
217221
# Could not connect in time just retry connection
@@ -224,30 +228,28 @@ def _initialize_traci_conn(self, num_retries=5):
224228
self._traci_conn.close_traci_and_pipes()
225229
continue
226230
except ConnectionRefusedError:
227-
# Some other process owns the port... sumo did not die just retry
228-
self._traci_conn.close_traci_and_pipes()
231+
# Some other process somehow owns the port... sumo needs to be restarted.
229232
continue
233+
except OSError:
234+
# TraCI or SUMO version are not at the minimum required version.
235+
raise
230236
except KeyboardInterrupt:
231237
self._log.debug("Keyboard interrupted TraCI connection.")
232238
self._traci_conn.close_traci_and_pipes()
233239
raise
234240
break
241+
else:
242+
exception = traci.exceptions.FatalTraCIError(
243+
f"Unable to connect to TraCI server after `{num_retries=}`."
244+
)
245+
self._handle_traci_exception(exception, actors_relinquishable=False)
246+
raise exception
235247

236248
try:
237-
assert self._traci_conn is not None
238249
# It is mandatory to set order when using multiple clients.
239250
self._traci_conn.setOrder(0)
240251
self._traci_conn.getVersion()
241-
except (traci.exceptions.FatalTraCIError, AssertionError) as err:
242-
logging.error(
243-
"""Failed to initialize SUMO
244-
Your scenario might not be configured correctly or
245-
you were trying to initialize many SUMO instances at
246-
once and we were not able to assign unique port
247-
numbers to all SUMO processes.
248-
Check %s for hints""",
249-
self._log_file,
250-
)
252+
except (traci.exceptions.FatalTraCIError, TypeError) as err:
251253
self._handle_traci_exception(err, actors_relinquishable=False)
252254
self.teardown()
253255
raise
@@ -321,7 +323,11 @@ def setup(self, scenario) -> ProviderState:
321323

322324
if restart_sumo:
323325
try:
324-
self._initialize_traci_conn()
326+
engine_config = config()
327+
traci_retries = self._traci_retries or engine_config(
328+
"sumo", "traci_retries", default=5, cast=int
329+
)
330+
self._initialize_traci_conn(num_retries=traci_retries)
325331
except traci.exceptions.FatalTraCIError:
326332
return ProviderState()
327333
elif self._allow_reload:

smarts/core/utils/pybullet.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,12 @@ def __init__(self, connection_mode=None):
4949

5050
def __del__(self):
5151
"""Clean up connection if not already done."""
52-
super().__del__()
52+
try:
53+
super().__del__()
54+
except TypeError as error:
55+
# Pybullet 3.2.6 currently attempts to catch an error type that does not exist.
56+
if not error.args[0].contains("BaseException"):
57+
raise
5358

5459
def __getattr__(self, name):
5560
"""Inject the client id into Bullet functions."""

smarts/core/utils/sumo.py

+28-26
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"""Importing this module "redirects" the import to the "real" sumolib. This is available
2121
for convenience and to reduce code duplication as sumolib lives under SUMO_HOME.
2222
"""
23+
from __future__ import annotations
2324

2425
import functools
2526
import inspect
@@ -28,7 +29,7 @@
2829
import os
2930
import subprocess
3031
import sys
31-
from typing import Any, List, Optional
32+
from typing import Any, List, Optional, Tuple
3233

3334
from smarts.core.utils import networking
3435
from smarts.core.utils.core_logging import suppress_output
@@ -86,7 +87,7 @@ def __init__(
8687
self._sumo_proc = None
8788
self._traci_conn = None
8889
self._sumo_port = None
89-
self._sumo_version = ()
90+
self._sumo_version: Tuple[int, ...] = tuple()
9091

9192
if sumo_port is None:
9293
sumo_port = networking.find_free_port()
@@ -115,18 +116,15 @@ def __del__(self) -> None:
115116

116117
def connect(
117118
self,
118-
timeout: float = 5,
119-
minimum_traci_version=20,
120-
minimum_sumo_version=(
121-
1,
122-
10,
123-
0,
124-
),
119+
timeout: float,
120+
minimum_traci_version: int,
121+
minimum_sumo_version: Tuple[int, ...],
122+
debug: bool = False,
125123
):
126124
"""Attempt a connection with the SUMO process."""
127125
traci_conn = None
128126
try:
129-
with suppress_output(stdout=False):
127+
with suppress_output(stderr=not debug, stdout=False):
130128
traci_conn = traci.connect(
131129
self._sumo_port,
132130
numRetries=max(0, int(20 * timeout)),
@@ -142,41 +140,46 @@ def connect(
142140
raise
143141
except ConnectionRefusedError:
144142
logging.error(
145-
"Connection refused. Tried to connect to unpaired TraCI client."
143+
"Connection refused. Tried to connect to an unpaired TraCI client."
146144
)
147145
raise
148146

149147
try:
150148
vers, vers_str = traci_conn.getVersion()
151-
assert (
152-
vers >= minimum_traci_version
153-
), f"TraCI API version must be >= {minimum_traci_version}. Got version ({vers})"
149+
if vers < minimum_traci_version:
150+
raise OSError(
151+
f"TraCI API version must be >= {minimum_traci_version}. Got version ({vers})"
152+
)
154153
self._sumo_version = tuple(
155154
int(v) for v in vers_str.partition(" ")[2].split(".")
156155
) # e.g. "SUMO 1.11.0" -> (1, 11, 0)
157-
assert (
158-
self._sumo_version >= minimum_sumo_version
159-
), f"SUMO version must be >= SUMO {minimum_sumo_version}"
156+
if self._sumo_version < minimum_sumo_version:
157+
raise OSError(f"SUMO version must be >= SUMO {minimum_sumo_version}")
160158
except traci.exceptions.FatalTraCIError as err:
161-
logging.debug("TraCI could not connect in time.")
159+
logging.debug("TraCI disconnected, process may have died.")
162160
# XXX: the error type is changed to TraCIException to make it consistent with the
163161
# process died case of `traci.connect`.
164162
raise traci.exceptions.TraCIException(err)
165-
except AssertionError:
163+
except OSError:
166164
self.close_traci_and_pipes()
167165
raise
168166
self._traci_conn = traci_conn
169167

170168
@property
171-
def connected(self):
169+
def connected(self) -> bool:
172170
"""Check if the connection is still valid."""
173171
return self._sumo_proc is not None and self._traci_conn is not None
174172

175173
@property
176-
def viable(self):
174+
def viable(self) -> bool:
177175
"""If making a connection to the sumo process is still viable."""
178176
return self._sumo_proc is not None and self._sumo_proc.poll() is None
179177

178+
@property
179+
def sumo_version(self) -> Tuple[int, ...]:
180+
"""Get the current SUMO version as a tuple."""
181+
return self._sumo_version
182+
180183
def __getattr__(self, name: str) -> Any:
181184
if not self.connected:
182185
return None
@@ -210,7 +213,7 @@ def __safe_close(conn):
210213
# TraCI connection is already dead.
211214
pass
212215
except AttributeError:
213-
# Socket was destroyed internally by a fatal error somehow.
216+
# Socket was destroyed internally, likely due to an error.
214217
pass
215218

216219
if self._traci_conn:
@@ -231,14 +234,13 @@ def teardown(self):
231234

232235

233236
def _wrap_traci_method(*args, method, sumo_process: TraciConn, **kwargs):
234-
# Argument order must be `*args` first so keyword arguments are required for `method` and `sumo_process`.
237+
# Argument order must be `*args` first so `method` and `sumo_process` are keyword only arguments.
235238
try:
236239
return method(*args, **kwargs)
237240
except traci.exceptions.FatalTraCIError:
238-
# Traci cannot continue
241+
# TraCI cannot continue
239242
sumo_process.close_traci_and_pipes()
240243
raise
241244
except traci.exceptions.TraCIException:
242-
# Case where SUMO can continue
243-
# TAI: consider closing the process even with a non fatal error
245+
# Case where TraCI/SUMO can theoretically continue
244246
raise

smarts/engine.ini

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; For syntax see https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
12
[benchmark]
23
[core]
34
debug = False
@@ -11,4 +12,6 @@ max_pybullet_freq = 240
1112
[resources]
1213
default_agent_vehicle = passenger
1314
[ray]
14-
log_to_driver=False
15+
log_to_driver=False
16+
[traffic]
17+
traci_retries=5

0 commit comments

Comments
 (0)