Skip to content

Commit f65b6ac

Browse files
wuwentaorokam
andauthored
fix(device)!: rollback and socket refresh_status (#307)
remove some changes and **rollback to old code process, most of the code change is from old midea-local version.** and fix the timeout error issue. in addition, there is some ruff error, I'm sorry, not have enough time to fix today. but it should fix most of current socket issue. will try to fix it with next release, we can quickly publish a release to fix these github issues/bugs. thanks <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Enhanced connection management with a new connection retry logic. - Introduced a timeout for socket receive operations to improve reliability. - **Bug Fixes** - Improved error handling for various exceptions during connection processes. - **Refactor** - Simplified methods for better clarity and readability, particularly in connection handling and message processing. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Co-authored-by: Lucas Mindêllo de Andrade <[email protected]>
1 parent 73b02a2 commit f65b6ac

File tree

1 file changed

+113
-109
lines changed

1 file changed

+113
-109
lines changed

Diff for: midealocal/device.py

+113-109
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
MIN_AUTH_RESPONSE = 20
3030
MIN_MSG_LENGTH = 56
3131
MIN_V2_FACTUAL_MSG_LENGTH = 6
32+
RESPONSE_TIMEOUT = 12 # main loop socket recv timeout, 12 * 10s = 120s
3233
SOCKET_TIMEOUT = 10 # socket connection default timeout
3334
QUERY_TIMEOUT = 2 # query response in 1s, 0xAC have more queries, set to 2s
3435

@@ -150,7 +151,7 @@ def fetch_v2_message(msg: bytes) -> tuple[list, bytes]:
150151
break
151152
return result, msg
152153

153-
def connect(self, init: bool = False, reconnect: bool = False) -> bool:
154+
def connect(self, check_protocol: bool = False) -> bool:
154155
"""Connect to device."""
155156
connected = False
156157
try:
@@ -168,10 +169,8 @@ def connect(self, init: bool = False, reconnect: bool = False) -> bool:
168169
self.authenticate()
169170
# 1. midea_ac_lan add device verify token with connect and auth
170171
# 2. init connection, check_protocol
171-
# 3. reconnect, skip check_protocol
172-
if reconnect or init:
173-
self.refresh_status(check_protocol=init)
174-
if init:
172+
if check_protocol:
173+
self.refresh_status(check_protocol=check_protocol)
175174
self.get_capabilities()
176175
connected = True
177176
except TimeoutError:
@@ -194,7 +193,9 @@ def connect(self, init: bool = False, reconnect: bool = False) -> bool:
194193
exc_info=e,
195194
)
196195
self._socket = None
197-
self.set_available(connected)
196+
# enable/disable device in init connection
197+
if check_protocol:
198+
self.set_available(connected)
198199
return connected
199200

200201
def authenticate(self) -> None:
@@ -246,19 +247,10 @@ def send_message_v2(self, data: bytes, query: bool = False) -> None:
246247
# raise exception to main loop
247248
raise SocketException
248249
try:
249-
_LOGGER.debug(
250-
"[%s] send_message_v2 with data %s",
251-
self._device_id,
252-
data.hex(),
253-
)
254250
# query msg, set timeout to QUERY_TIMEOUT
255251
if query:
256252
self._socket.settimeout(QUERY_TIMEOUT)
257253
self._socket.send(data)
258-
_LOGGER.debug(
259-
"[%s] send_message_v2 success",
260-
self._device_id,
261-
)
262254
except TimeoutError:
263255
_LOGGER.debug(
264256
"[%s] send_message_v2 timed out",
@@ -307,15 +299,6 @@ def build_send(self, cmd: MessageRequest, query: bool = False) -> None:
307299
_LOGGER.debug("[%s] Sending: %s, query is %s", self._device_id, cmd, query)
308300
msg = PacketBuilder(self._device_id, data).finalize()
309301
self.send_message(msg, query=query)
310-
# after send set command, force refresh_status
311-
if cmd.message_type == MessageType.set:
312-
_LOGGER.debug(
313-
"[%s] Force refresh after set status to: %s",
314-
self._device_id,
315-
cmd,
316-
)
317-
now = time.time()
318-
self._previous_refresh = now - self._refresh_interval
319302

320303
def get_capabilities(self) -> None:
321304
"""Get device capabilities."""
@@ -340,39 +323,33 @@ def refresh_status(self, check_protocol: bool = False) -> None:
340323
# set socket QUERY_TIMEOUT for query msg
341324
# build_send exception should be catch by connect/run
342325
self.build_send(cmd, query=True)
343-
try:
344-
while True:
345-
if not self._socket:
346-
_LOGGER.debug(
347-
"[%s] authenticate failure, device socket is none",
348-
self._device_id,
349-
)
350-
# raise exception to connect/main loop
351-
raise SocketException
352-
msg = self._socket.recv(512)
353-
if len(msg) == 0:
354-
raise OSError("Empty message received.")
355-
result = self.parse_message(msg)
356-
# Prevent infinite loop
357-
if result == MessageResult.SUCCESS:
358-
break
359-
elif result == MessageResult.PADDING: # noqa: RET508
360-
continue
361-
else:
362-
raise ResponseException # noqa: TRY301
363-
# recovery SOCKET_TIMEOUT after recv msg
364-
self._socket.settimeout(SOCKET_TIMEOUT)
365-
# only catch TimoutError for check_protocol
366-
# unexpected exception in recv/settimeout, catch by main loop
367-
except TimeoutError:
368-
_LOGGER.debug(
369-
"[%s] protocol %s, cmd %s, timeout",
370-
self._device_id,
371-
cmd.__class__.__name__,
372-
cmd,
373-
)
374-
# init check_protocol, skip timeout exception
375-
if check_protocol:
326+
# init check_protocol, skip timeout exception
327+
if check_protocol:
328+
try:
329+
while True:
330+
if not self._socket:
331+
_LOGGER.debug(
332+
"[%s] device socket is none",
333+
self._device_id,
334+
)
335+
# raise exception to connect/main loop
336+
raise SocketException
337+
msg = self._socket.recv(512)
338+
if len(msg) == 0:
339+
raise ConnectionResetError("Connection closed by peer.")
340+
result = self.parse_message(msg)
341+
# Prevent infinite loop
342+
if result == MessageResult.SUCCESS:
343+
break
344+
elif result == MessageResult.PADDING: # noqa: RET508
345+
continue
346+
else:
347+
raise ResponseException # noqa: TRY301
348+
# recovery SOCKET_TIMEOUT after recv msg
349+
self._socket.settimeout(SOCKET_TIMEOUT)
350+
# only catch TimoutError for check_protocol
351+
# unexpected exception in recv/settimeout, catch by main loop
352+
except TimeoutError:
376353
error_count += 1
377354
self._unsupported_protocol.append(cmd.__class__.__name__)
378355
_LOGGER.debug(
@@ -381,16 +358,24 @@ def refresh_status(self, check_protocol: bool = False) -> None:
381358
cmd.__class__.__name__,
382359
cmd,
383360
)
384-
# refresh_status, raise timeout exception to main loop
385-
else:
386-
raise
387-
except ResponseException:
388-
# parse msg error
389-
error_count += 1
361+
except ResponseException:
362+
# parse msg error
363+
error_count += 1
364+
_LOGGER.debug(
365+
"[%s] refresh_status ResponseException %s, cmd %s",
366+
self._device_id,
367+
cmd.__class__.__name__,
368+
cmd,
369+
)
390370
else:
371+
_LOGGER.debug(
372+
"[%s] refresh_status with cmd: %s, unsupported protocol, SKIP",
373+
self._device_id,
374+
cmd,
375+
)
391376
error_count += 1
392-
# init check_protocol and all the query failed
393-
if check_protocol and error_count == len(cmds):
377+
# all the query failed
378+
if error_count == len(cmds):
394379
_LOGGER.debug(
395380
"[%s] all the query cmds failed %s, please report bug",
396381
self._device_id,
@@ -556,11 +541,9 @@ def close(self) -> None:
556541
self._is_run = False
557542
self.close_socket()
558543

559-
def close_socket(self, init: bool = False) -> None:
544+
def close_socket(self) -> None:
560545
"""Close socket."""
561-
# init connection, check_protocol
562-
if init:
563-
self._unsupported_protocol = []
546+
self._unsupported_protocol = []
564547
self._buffer = b""
565548
if self._socket:
566549
try:
@@ -577,7 +560,7 @@ def set_ip_address(self, ip_address: str) -> None:
577560
if self._ip_address != ip_address:
578561
_LOGGER.debug("[%s] Update IP address to %s", self._device_id, ip_address)
579562
self._ip_address = ip_address
580-
self.close_socket(init=True)
563+
self.close_socket()
581564

582565
def set_refresh_interval(self, refresh_interval: int) -> None:
583566
"""Set refresh interval."""
@@ -593,93 +576,114 @@ def _check_heartbeat(self, now: float) -> None:
593576
self.send_heartbeat()
594577
self._previous_heartbeat = now
595578

596-
def run(self) -> None:
579+
def _connect_loop(self) -> None:
580+
"""Connect loop until device online."""
581+
# connect loop until online
582+
connection_retries = 0
583+
while self._socket is None:
584+
_LOGGER.debug("[%s] Socket is None, try to connect", self._device_id)
585+
if self.connect(check_protocol=True) is False:
586+
self.close_socket()
587+
connection_retries += 1
588+
# Sleep time with exponential backoff, maximum 600 seconds
589+
sleep_time = min(5 * (2 ** (connection_retries - 1)), 600)
590+
_LOGGER.warning(
591+
"[%s] Unable to connect, sleep %s seconds and retry",
592+
self._device_id,
593+
sleep_time,
594+
)
595+
# sleep and reconnect loop until device online
596+
time.sleep(sleep_time)
597+
598+
def run(self) -> None: # noqa: PLR0915
597599
"""Run loop brief description.
598600
599601
1. first/init connection, self._socket is None
600602
1.1 connect() device loop, pass, enable device
601603
1.2 auth for v3 device, MUST pass for v3 device
602-
1.3 init refresh_status, send all query and check supported protocol
604+
1.3 init refresh_status, send query and check supported protocol
603605
1.3.1 set socket timeout to QUERY_TIMEOUT before send query
604606
1.3.2 get response and add timeout query cmd to not supported
605607
1.3.1 parse recv response/status for supported protocol
606608
1.4 get_capabilities()
607-
2. after socket/device connected, loop for heartbeat/refresh_status
609+
2. after socket/device connected, check for heartbeat/refresh_status
608610
3. job1: check refresh_interval
609611
3.1 socket/device connection should exist
610-
3.2 send only supported query to get response and refresh status
611-
3.3 set socket query timeout and recovery after recv msg
612+
3.2 send only supported query and refresh status in main loop recv
613+
3.3 set socket timeout before socket recv
612614
4. job2: check heartbeat interval
613-
4.1 socket connection should exist
615+
4.1 socket/device connection should exist
614616
4.2 send heartbeat packet to keep alive
615617
616618
scenario/bug fix:
617619
1. while True loop should sleep 0.1 second to prevent cpu usage issue
618620
2. device running and power off become offline, status update
619621
3. device disconnected and power on, become online, status update
622+
4. set command call build_send, main loop recv socket msg and refresh
620623
621624
"""
622625
# service loop
623626
while self._is_run:
624-
# connect loop until online
625-
connection_retries = 0
626-
while self._socket is None:
627-
_LOGGER.debug("[%s] Socket is None, try to connect", self._device_id)
628-
if self.connect(init=True) is False:
629-
self.close_socket(init=True)
630-
connection_retries += 1
631-
# Sleep time with exponential backoff, maximum 600 seconds
632-
sleep_time = min(5 * (2 ** (connection_retries - 1)), 600)
633-
_LOGGER.warning(
634-
"[%s] Unable to connect, sleep %s seconds and retry",
635-
self._device_id,
636-
sleep_time,
637-
)
638-
# sleep and reconnect loop until device online
639-
time.sleep(sleep_time)
640-
connection_retries = 0
627+
# connect loop until device online
628+
self._connect_loop()
629+
# socket recv msg timeout counter
630+
timeout_counter = 0
641631
start = time.time()
642632
self._previous_refresh = self._previous_heartbeat = start
643-
# main loop after connected
633+
# refresh/recv msg loop after connected
644634
while True:
645-
reconnect = False
646635
try:
636+
if not self._socket:
637+
_LOGGER.debug("[%s] Socket is none", self._device_id)
638+
raise SocketException # noqa: TRY301
647639
now = time.time()
640+
# refresh_status only send supported query msg
648641
self._check_refresh(now)
649642
self._check_heartbeat(now)
643+
# set SOCKET_TIMEOUT before recv socket msg
644+
self._socket.settimeout(SOCKET_TIMEOUT)
645+
# refresh status after set/query
646+
msg = self._socket.recv(512)
647+
if len(msg) == 0:
648+
raise ConnectionResetError("Connection closed by peer") # noqa: TRY301
649+
# parse msg and update latest status
650+
result = self.parse_message(msg)
651+
if result == MessageResult.SUCCESS:
652+
timeout_counter = 0
653+
if result == MessageResult.ERROR:
654+
_LOGGER.debug("[%s] Message 'ERROR' received", self._device_id)
655+
self.close_socket()
656+
break
650657
except TimeoutError:
651-
_LOGGER.debug("[%s] Socket timed out", self._device_id)
652-
reconnect = True
658+
timeout_counter += 1
659+
if timeout_counter >= RESPONSE_TIMEOUT:
660+
_LOGGER.debug("[%s] Heartbeat timed out", self._device_id)
661+
self.close_socket()
662+
break
653663
except SocketException: # refresh_status
654664
_LOGGER.debug("[%s] Socket Exception", self._device_id)
655-
reconnect = True
665+
self.close_socket()
666+
break
656667
except NoSupportedProtocol:
657668
_LOGGER.debug("[%s] No Supported protocol", self._device_id)
658669
# ignore and continue loop
659670
continue
660671
except ConnectionResetError: # refresh_status -> build_send exception
661672
_LOGGER.debug("[%s] Connection reset by peer", self._device_id)
662-
reconnect = True
673+
self.close_socket()
674+
break
663675
except OSError: # refresh_status
664676
_LOGGER.debug("[%s] OS error", self._device_id)
665-
reconnect = True
677+
self.close_socket()
678+
break
666679
except Exception as e:
667680
_LOGGER.exception(
668681
"[%s] Unexpected error",
669682
self._device_id,
670683
exc_info=e,
671684
)
672-
reconnect = True
673-
# reconnect socket and try to skip check_protocol
674-
if reconnect:
675685
self.close_socket()
676-
if self.connect(reconnect=True):
677-
# pass, continue while True loop
678-
continue
679-
# device disconnect, break while True loop, start main loop
680686
break
681-
# prevent while True loop cpu 100%
682-
time.sleep(0.1)
683687

684688
def set_attribute(self, attr: str, value: bool | int | str) -> None:
685689
"""Set attribute."""

0 commit comments

Comments
 (0)