Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 208 additions & 0 deletions homeassistant/components/thread/diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
"""Diagnostics support for Thread networks.

When triaging Matter and HomeKit issues you often need to check for problems with the Thread network.

This report helps spot and rule out:

* Is the users border router visible at all?
* Is the border router actually announcing any routes? The user could have a network boundary like
VLANs or WiFi isolation that is blocking the RA packets.
* Alternatively, if user isn't on HAOS they could have accept_ra_rt_info_max_plen set incorrectly.
* Are there any bogus routes that could be interfering. If routes don't expire they can build up.
When you have 10 routes and only 2 border routers something has gone wrong.

This does not do any connectivity checks. So user could have all their border routers visible, but
some of their thread accessories can't be pinged, but it's still a thread problem.
"""

from __future__ import annotations

from typing import Any, TypedDict

from pyroute2 import NDB # pylint: disable=no-name-in-module
from python_otbr_api.tlv_parser import MeshcopTLVType

from homeassistant.components import zeroconf
from homeassistant.config_entries import ConfigEntry
from homeassistant.core import HomeAssistant

from .dataset_store import async_get_store
from .discovery import async_read_zeroconf_cache


class Neighbour(TypedDict):
"""A neighbour cache entry (ip neigh)."""

lladdr: str
state: int
probes: int


class Route(TypedDict):
"""A route table entry (ip -6 route)."""

metrics: int
priority: int
is_nexthop: bool


class Router(TypedDict):
"""A border router."""

server: str | None
addresses: list[str]
neighbours: dict[str, Neighbour]
thread_version: str | None
model: str | None
vendor: str | None
routes: dict[str, Route]


class Network(TypedDict):
"""A thread network."""

name: str | None
routers: dict[str, Router]
prefixes: set[str]
unexpected_routers: set[str]


def _get_possible_thread_routes() -> (
tuple[dict[str, dict[str, Route]], dict[str, set[str]]]
):
# Build a list of possible thread routes
# Right now, this is ipv6 /64's that have a gateway
# We cross reference with zerconf data to confirm which via's are known border routers
routes: dict[str, dict[str, Route]] = {}
reverse_routes: dict[str, set[str]] = {}

with NDB() as ndb:
for record in ndb.routes:
# Limit to IPV6 routes
if record.family != 10:
continue
# Limit to /64 prefixes
if record.dst_len != 64:
continue
# Limit to routes with a via
if not record.gateway and not record.nh_gateway:
continue
gateway = record.gateway or record.nh_gateway
route = routes.setdefault(gateway, {})
route[record.dst] = {
"metrics": record.metrics,
"priority": record.priority,
# NM creates "nexthop" routes - a single route with many via's
# Kernel creates many routes with a single via
"is_nexthop": record.nh_gateway is not None,
}
reverse_routes.setdefault(record.dst, set()).add(gateway)
return routes, reverse_routes


def _get_neighbours() -> dict[str, Neighbour]:
neighbours: dict[str, Neighbour] = {}

with NDB() as ndb:
for record in ndb.neighbours:
neighbours[record.dst] = {
"lladdr": record.lladdr,
"state": record.state,
"probes": record.probes,
}

return neighbours


async def async_get_config_entry_diagnostics(
hass: HomeAssistant, entry: ConfigEntry
) -> dict[str, Any]:
"""Return diagnostics for all known thread networks."""

networks: dict[str, Network] = {}

# Start with all networks that HA knows about
store = await async_get_store(hass)
for record in store.datasets.values():
if not record.extended_pan_id:
continue
network = networks.setdefault(
record.extended_pan_id,
{
"name": record.network_name,
"routers": {},
"prefixes": set(),
"unexpected_routers": set(),
},
)
if mlp := record.dataset.get(MeshcopTLVType.MESHLOCALPREFIX):
network["prefixes"].add(f"{mlp[0:4]}:{mlp[4:8]}:{mlp[8:12]}:{mlp[12:16]}")

# Find all routes currently act that might be thread related, so we can match them to
# border routers as we process the zeroconf data.
routes, reverse_routes = await hass.async_add_executor_job(
_get_possible_thread_routes
)

# Find all neighbours
neighbours = await hass.async_add_executor_job(_get_neighbours)

aiozc = await zeroconf.async_get_async_instance(hass)
for data in async_read_zeroconf_cache(aiozc):
if not data.extended_pan_id:
continue

network = networks.setdefault(
data.extended_pan_id,
{
"name": data.network_name,
"routers": {},
"prefixes": set(),
"unexpected_routers": set(),
},
)

if not data.server:
continue

router = network["routers"][data.server] = {
"server": data.server,
"addresses": data.addresses or [],
"neighbours": {},
"thread_version": data.thread_version,
"model": data.model_name,
"vendor": data.vendor_name,
"routes": {},
}

# For every address this border router hass, see if we have seen
# it in the route table as a via - these are the routes its
# announcing via RA
if data.addresses:
for address in data.addresses:
if address in routes:
router["routes"].update(routes[address])

if address in neighbours:
router["neighbours"][address] = neighbours[address]

network["prefixes"].update(router["routes"].keys())

# Find unexpected via's.
# Collect all router addresses and then for each prefix, find via's that aren't
# a known router for that prefix.
for network in networks.values():
routers = set()

for router in network["routers"].values():
routers.update(router["addresses"])

for prefix in network["prefixes"]:
if prefix not in reverse_routes:
continue
if ghosts := reverse_routes[prefix] - routers:
network["unexpected_routers"] = ghosts

return {
"networks": networks,
}
90 changes: 66 additions & 24 deletions homeassistant/components/thread/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from collections.abc import Callable
import dataclasses
import logging
from typing import cast

from zeroconf import ServiceListener, Zeroconf
from zeroconf.asyncio import AsyncZeroconf
from zeroconf import BadTypeInNameException, DNSPointer, ServiceListener, Zeroconf
from zeroconf.asyncio import AsyncServiceInfo, AsyncZeroconf

from homeassistant.components import zeroconf
from homeassistant.core import HomeAssistant
Expand All @@ -19,6 +20,8 @@
"HomeAssistant": "homeassistant",
}
THREAD_TYPE = "_meshcop._udp.local."
CLASS_IN = 1
TYPE_PTR = 12


@dataclasses.dataclass
Expand All @@ -31,6 +34,65 @@ class ThreadRouterDiscoveryData:
network_name: str | None
server: str | None
vendor_name: str | None
addresses: list[str] | None
thread_version: str | None


def async_discovery_data_from_service(
service: AsyncServiceInfo,
) -> ThreadRouterDiscoveryData:
"""Get a ThreadRouterDiscoveryData from an AsyncServiceInfo."""

def try_decode(value: bytes | None) -> str | None:
"""Try decoding UTF-8."""
if value is None:
return None
try:
return value.decode()
except UnicodeDecodeError:
return None
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this base64 encode the value prefixed with encode-error:, so we know it's not None?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only moved what was already there so I could re-use it, so I don't know if theres any reason for the current behaviour. I'm not opposed to changing it - @emontnemery?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok let's do it in another PR then.

Copy link
Copy Markdown
Member Author

@Jc2k Jc2k Feb 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I would like that detail in the diagnostic report, but the same decoder is used by the thread panel and It would be weird there. Probably better to show nothing that show the user garbage? Maybe leave as is for now and we can revisit if we see it in practice? Can always include the raw TXT record (in the diagnostics report only) if we start seeing trash border routers?


ext_pan_id = service.properties.get(b"xp")
network_name = try_decode(service.properties.get(b"nn"))
model_name = try_decode(service.properties.get(b"mn"))
server = service.server
vendor_name = try_decode(service.properties.get(b"vn"))
thread_version = try_decode(service.properties.get(b"tv"))
return ThreadRouterDiscoveryData(
brand=KNOWN_BRANDS.get(vendor_name),
extended_pan_id=ext_pan_id.hex() if ext_pan_id is not None else None,
model_name=model_name,
network_name=network_name,
server=server,
vendor_name=vendor_name,
addresses=service.parsed_addresses(),
thread_version=thread_version,
)


def async_read_zeroconf_cache(aiozc: AsyncZeroconf) -> list[ThreadRouterDiscoveryData]:
"""Return all meshcop records already in the zeroconf cache."""
results = []

records = aiozc.zeroconf.cache.async_all_by_details(THREAD_TYPE, TYPE_PTR, CLASS_IN)
for record in records:
record = cast(DNSPointer, record)

try:
info = AsyncServiceInfo(THREAD_TYPE, record.alias)
except BadTypeInNameException as ex:
_LOGGER.debug(
"Ignoring record with bad type in name: %s: %s", record.alias, ex
)
continue

if not info.load_from_cache(aiozc.zeroconf):
# data is not fully in the cache, so ignore for now
continue

results.append(async_discovery_data_from_service(info))

return results


class ThreadRouterDiscovery:
Expand Down Expand Up @@ -83,35 +145,15 @@ async def _add_update_service(self, type_: str, name: str):
_LOGGER.debug("_add_update_service failed to add %s, %s", type_, name)
return

def try_decode(value: bytes | None) -> str | None:
"""Try decoding UTF-8."""
if value is None:
return None
try:
return value.decode()
except UnicodeDecodeError:
return None

_LOGGER.debug("_add_update_service %s %s", name, service)
# We use the extended mac address as key, bail out if it's missing
try:
extended_mac_address = service.properties[b"xa"].hex()
except (KeyError, UnicodeDecodeError) as err:
_LOGGER.debug("_add_update_service failed to parse service %s", err)
return
ext_pan_id = service.properties.get(b"xp")
network_name = try_decode(service.properties.get(b"nn"))
model_name = try_decode(service.properties.get(b"mn"))
server = service.server
vendor_name = try_decode(service.properties.get(b"vn"))
data = ThreadRouterDiscoveryData(
brand=KNOWN_BRANDS.get(vendor_name),
extended_pan_id=ext_pan_id.hex() if ext_pan_id is not None else None,
model_name=model_name,
network_name=network_name,
server=server,
vendor_name=vendor_name,
)

data = async_discovery_data_from_service(service)
if name in self._known_routers and self._known_routers[name] == (
extended_mac_address,
data,
Expand Down
2 changes: 1 addition & 1 deletion homeassistant/components/thread/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
"documentation": "https://www.home-assistant.io/integrations/thread",
"integration_type": "service",
"iot_class": "local_polling",
"requirements": ["python-otbr-api==1.0.3"],
"requirements": ["python-otbr-api==1.0.3", "pyroute2==0.7.5"],
"zeroconf": ["_meshcop._udp.local."]
}
3 changes: 3 additions & 0 deletions requirements_all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1916,6 +1916,9 @@ pyrisco==0.5.7
# homeassistant.components.rituals_perfume_genie
pyrituals==0.0.6

# homeassistant.components.thread
pyroute2==0.7.5

# homeassistant.components.ruckus_unleashed
pyruckus==0.16

Expand Down
3 changes: 3 additions & 0 deletions requirements_test_all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1384,6 +1384,9 @@ pyrisco==0.5.7
# homeassistant.components.rituals_perfume_genie
pyrituals==0.0.6

# homeassistant.components.thread
pyroute2==0.7.5

# homeassistant.components.ruckus_unleashed
pyruckus==0.16

Expand Down
3 changes: 2 additions & 1 deletion tests/components/thread/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
import pytest

from homeassistant.components import thread
from homeassistant.core import HomeAssistant

from tests.common import MockConfigEntry

CONFIG_ENTRY_DATA = {}


@pytest.fixture(name="thread_config_entry")
async def thread_config_entry_fixture(hass):
async def thread_config_entry_fixture(hass: HomeAssistant):
"""Mock Thread config entry."""
config_entry = MockConfigEntry(
data=CONFIG_ENTRY_DATA,
Expand Down
Loading