|
30 | 30 | KVConnectorMetadata, |
31 | 31 | KVConnectorRole, |
32 | 32 | ) |
33 | | -from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats |
| 33 | +from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( |
| 34 | + KVConnectorPromMetrics, |
| 35 | + KVConnectorStats, |
| 36 | + PromMetric, |
| 37 | + PromMetricT, |
| 38 | +) |
34 | 39 | from vllm.distributed.parallel_state import ( |
35 | 40 | get_tensor_model_parallel_rank, |
36 | 41 | get_tensor_model_parallel_world_size, |
@@ -254,6 +259,15 @@ def build_kv_connector_stats( |
254 | 259 | else NixlKVConnectorStats() |
255 | 260 | ) |
256 | 261 |
|
| 262 | + @classmethod |
| 263 | + def build_prom_metrics( |
| 264 | + cls, |
| 265 | + metric_types: dict[type[PromMetric], type[PromMetricT]], |
| 266 | + labelnames: list[str], |
| 267 | + per_engine_labelvalues: dict[int, list[str]], |
| 268 | + ) -> KVConnectorPromMetrics: |
| 269 | + return NixlPromMetrics(metric_types, labelnames, per_engine_labelvalues) |
| 270 | + |
257 | 271 | def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None: |
258 | 272 | assert self.connector_worker is not None |
259 | 273 | assert isinstance(self._connector_metadata, NixlConnectorMetadata) |
@@ -1744,3 +1758,124 @@ def reduce(self) -> dict[str, int | float]: |
1744 | 1758 | @property |
1745 | 1759 | def num_successful_transfers(self) -> int: |
1746 | 1760 | return len(self.data["transfer_duration"]) |
| 1761 | + |
| 1762 | + |
| 1763 | +class NixlPromMetrics(KVConnectorPromMetrics): |
| 1764 | + def __init__( |
| 1765 | + self, |
| 1766 | + metric_types: dict[type[PromMetric], type[PromMetricT]], |
| 1767 | + labelnames: list[str], |
| 1768 | + per_engine_labelvalues: dict[int, list[str]], |
| 1769 | + ): |
| 1770 | + super().__init__(metric_types, labelnames, per_engine_labelvalues) |
| 1771 | + |
| 1772 | + buckets = [ |
| 1773 | + 0.001, |
| 1774 | + 0.005, |
| 1775 | + 0.01, |
| 1776 | + 0.025, |
| 1777 | + 0.05, |
| 1778 | + 0.075, |
| 1779 | + 0.1, |
| 1780 | + 0.2, |
| 1781 | + 0.3, |
| 1782 | + 0.5, |
| 1783 | + 0.75, |
| 1784 | + 1.0, |
| 1785 | + 5.0, |
| 1786 | + ] |
| 1787 | + nixl_histogram_xfer_time = self._histogram_cls( |
| 1788 | + name="vllm:nixl_xfer_time_seconds", |
| 1789 | + documentation="Histogram of transfer duration for NIXL KV Cache transfers.", |
| 1790 | + buckets=buckets, |
| 1791 | + labelnames=labelnames, |
| 1792 | + ) |
| 1793 | + self.nixl_histogram_xfer_time = self.make_per_engine(nixl_histogram_xfer_time) |
| 1794 | + nixl_histogram_post_time = self._histogram_cls( |
| 1795 | + name="vllm:nixl_post_time_seconds", |
| 1796 | + documentation="Histogram of transfer post time for NIXL KV" |
| 1797 | + " Cache transfers.", |
| 1798 | + buckets=buckets[1:], |
| 1799 | + labelnames=labelnames, |
| 1800 | + ) |
| 1801 | + self.nixl_histogram_post_time = self.make_per_engine(nixl_histogram_post_time) |
| 1802 | + # uniform 2kb to 16gb range |
| 1803 | + buckets = [2**10 + i for i in range(1, 24, 2)] |
| 1804 | + nixl_histogram_bytes_transferred = self._histogram_cls( |
| 1805 | + name="vllm:nixl_bytes_transferred", |
| 1806 | + documentation="Histogram of bytes transferred per NIXL KV Cache transfers.", |
| 1807 | + buckets=buckets, |
| 1808 | + labelnames=labelnames, |
| 1809 | + ) |
| 1810 | + self.nixl_histogram_bytes_transferred = self.make_per_engine( |
| 1811 | + nixl_histogram_bytes_transferred |
| 1812 | + ) |
| 1813 | + buckets = [ |
| 1814 | + 10, |
| 1815 | + 20, |
| 1816 | + 30, |
| 1817 | + 50, |
| 1818 | + 75, |
| 1819 | + 100, |
| 1820 | + 200, |
| 1821 | + 400, |
| 1822 | + 1000, |
| 1823 | + 2000, |
| 1824 | + 4000, |
| 1825 | + 10000, |
| 1826 | + 20000, |
| 1827 | + 50000, |
| 1828 | + ] |
| 1829 | + nixl_histogram_num_descriptors = self._histogram_cls( |
| 1830 | + name="vllm:nixl_num_descriptors", |
| 1831 | + documentation="Histogram of number of descriptors per NIXL" |
| 1832 | + " KV Cache transfers.", |
| 1833 | + buckets=buckets, |
| 1834 | + labelnames=labelnames, |
| 1835 | + ) |
| 1836 | + self.nixl_histogram_num_descriptors = self.make_per_engine( |
| 1837 | + nixl_histogram_num_descriptors |
| 1838 | + ) |
| 1839 | + counter_nixl_num_failed_transfers = self._counter_cls( |
| 1840 | + name="vllm:nixl_num_failed_transfers", |
| 1841 | + documentation="Number of failed NIXL KV Cache transfers.", |
| 1842 | + labelnames=labelnames, |
| 1843 | + ) |
| 1844 | + self.counter_nixl_num_failed_transfers = self.make_per_engine( |
| 1845 | + counter_nixl_num_failed_transfers |
| 1846 | + ) |
| 1847 | + counter_nixl_num_failed_notifications = self._counter_cls( |
| 1848 | + name="vllm:nixl_num_failed_notifications", |
| 1849 | + documentation="Number of failed NIXL KV Cache notifications.", |
| 1850 | + labelnames=labelnames, |
| 1851 | + ) |
| 1852 | + self.counter_nixl_num_failed_notifications = self.make_per_engine( |
| 1853 | + counter_nixl_num_failed_notifications |
| 1854 | + ) |
| 1855 | + |
| 1856 | + def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0): |
| 1857 | + for prom_obj, list_item_key in zip( |
| 1858 | + [ |
| 1859 | + self.nixl_histogram_xfer_time, |
| 1860 | + self.nixl_histogram_post_time, |
| 1861 | + self.nixl_histogram_bytes_transferred, |
| 1862 | + self.nixl_histogram_num_descriptors, |
| 1863 | + ], |
| 1864 | + [ |
| 1865 | + "transfer_duration", |
| 1866 | + "post_duration", |
| 1867 | + "bytes_transferred", |
| 1868 | + "num_descriptors", |
| 1869 | + ], |
| 1870 | + ): |
| 1871 | + for list_item in transfer_stats_data[list_item_key]: |
| 1872 | + prom_obj[engine_idx].observe(list_item) |
| 1873 | + for counter_obj, counter_item_key in zip( |
| 1874 | + [ |
| 1875 | + self.counter_nixl_num_failed_transfers, |
| 1876 | + self.counter_nixl_num_failed_notifications, |
| 1877 | + ], |
| 1878 | + ["num_failed_transfers", "num_failed_notifications"], |
| 1879 | + ): |
| 1880 | + for list_item in transfer_stats_data[counter_item_key]: |
| 1881 | + counter_obj[engine_idx].inc(list_item) |
0 commit comments