|
23 | 23 | from sphinx.domains import Domain, Index, IndexEntry, ObjType
|
24 | 24 | from sphinx.environment import BuildEnvironment
|
25 | 25 | from sphinx.locale import _, __
|
| 26 | +from sphinx.pycode.parser import Token, TokenProcessor |
26 | 27 | from sphinx.roles import XRefRole
|
27 | 28 | from sphinx.util import logging
|
28 | 29 | from sphinx.util.docfields import Field, GroupedField, TypedField
|
|
39 | 40 | logger = logging.getLogger(__name__)
|
40 | 41 |
|
41 | 42 |
|
42 |
| -# REs for Python signatures |
| 43 | +# REs for Python signatures (supports PEP 695) |
43 | 44 | py_sig_re = re.compile(
|
44 | 45 | r'''^ ([\w.]*\.)? # class name(s)
|
45 | 46 | (\w+) \s* # thing name
|
| 47 | + (?: \[\s*(.*)\s*])? # optional: type parameters list (PEP 695) |
46 | 48 | (?: \(\s*(.*)\s*\) # optional: arguments
|
47 | 49 | (?:\s* -> \s* (.*))? # return annotation
|
48 | 50 | )? $ # and nothing more
|
@@ -257,6 +259,265 @@ def _unparse_pep_604_annotation(node: ast.Subscript) -> list[Node]:
|
257 | 259 | return [type_to_xref(annotation, env)]
|
258 | 260 |
|
259 | 261 |
|
| 262 | +class _TypeParameterListParser(TokenProcessor): |
| 263 | + def __init__(self, sig: str) -> None: |
| 264 | + signature = ''.join(sig.splitlines()).strip() |
| 265 | + super().__init__([signature]) |
| 266 | + # Each item is a tuple (name, kind, default, bound) mimicking |
| 267 | + # inspect.Parameter to allow default values on VAR_POSITIONAL |
| 268 | + # or VAR_KEYWORD parameters. |
| 269 | + self.tparams: list[tuple[str, int, Any, Any]] = [] |
| 270 | + |
| 271 | + def fetch_tparam_spec(self) -> list[Token]: |
| 272 | + from token import DEDENT, INDENT, OP |
| 273 | + |
| 274 | + tokens = [] |
| 275 | + while self.fetch_token(): |
| 276 | + tokens.append(self.current) |
| 277 | + for ldelim, rdelim in [('(', ')'), ('{', '}'), ('[', ']')]: |
| 278 | + if self.current == [OP, ldelim]: |
| 279 | + tokens += self.fetch_until([OP, rdelim]) |
| 280 | + break |
| 281 | + else: |
| 282 | + if self.current == INDENT: |
| 283 | + tokens += self.fetch_until(DEDENT) |
| 284 | + elif self.current.match([OP, ':'], [OP, '='], [OP, ',']): |
| 285 | + tokens.pop() |
| 286 | + break |
| 287 | + return tokens |
| 288 | + |
| 289 | + def parse(self) -> None: |
| 290 | + from itertools import chain, tee |
| 291 | + from token import ENDMARKER, NAME, NEWLINE, NUMBER, OP, STRING |
| 292 | + |
| 293 | + def pairwise(iterable): |
| 294 | + a, b = tee(iterable) |
| 295 | + next(b, None) |
| 296 | + return zip(a, b) |
| 297 | + |
| 298 | + def triplewise(iterable): |
| 299 | + for (a, _), (b, c) in pairwise(pairwise(iterable)): |
| 300 | + yield a, b, c |
| 301 | + |
| 302 | + def pformat_token(token: Token) -> str: |
| 303 | + if token.match(NEWLINE, ENDMARKER): |
| 304 | + return '' |
| 305 | + |
| 306 | + if token.match([OP, ':'], [OP, ','], [OP, '#']): |
| 307 | + return f'{token.value} ' |
| 308 | + |
| 309 | + # Arithmetic operators are allowed because PEP 695 specifies the |
| 310 | + # default type parameter to be *any* expression (so "T1 << T2" is |
| 311 | + # allowed if it makes sense). The caller is responsible to ensure |
| 312 | + # that a multiplication operator ("*") is not to be confused with |
| 313 | + # an unpack operator (which will not be surrounded by spaces). |
| 314 | + # |
| 315 | + # The operators are ordered according to how likely they are to |
| 316 | + # be used and for (possible) future implementations (e.g., "&" for |
| 317 | + # an intersection type). |
| 318 | + if token.match( |
| 319 | + # most likely operators to appear |
| 320 | + [OP, '='], [OP, '|'], |
| 321 | + # type composition (future compatibility) |
| 322 | + [OP, '&'], [OP, '^'], [OP, '<'], [OP, '>'], |
| 323 | + # unlikely type composition |
| 324 | + [OP, '+'], [OP, '-'], [OP, '*'], [OP, '**'], |
| 325 | + # unlikely operators but included for completeness |
| 326 | + [OP, '@'], [OP, '/'], [OP, '//'], [OP, '%'], |
| 327 | + [OP, '<<'], [OP, '>>'], [OP, '>>>'], |
| 328 | + [OP, '<='], [OP, '>='], [OP, '=='], [OP, '!='], |
| 329 | + ): |
| 330 | + return f' {token.value} ' |
| 331 | + |
| 332 | + return token.value |
| 333 | + |
| 334 | + def build_identifier(tokens: list[Token]) -> str: |
| 335 | + idents: list[str] = [] |
| 336 | + |
| 337 | + fillvalue = Token(ENDMARKER, '', (-1, -1), (-1, -1), '<generated>') |
| 338 | + groups = triplewise(chain(tokens, [fillvalue, fillvalue])) |
| 339 | + head, _, _ = next(groups, (fillvalue,) * 3) |
| 340 | + |
| 341 | + if head.match([OP, '*'], [OP, '**']): |
| 342 | + idents.append(head.value) |
| 343 | + else: |
| 344 | + idents.append(pformat_token(head)) |
| 345 | + |
| 346 | + is_unpack_operator = False |
| 347 | + for token, op, after in groups: |
| 348 | + if is_unpack_operator: |
| 349 | + idents.append(token.value) |
| 350 | + is_unpack_operator = False |
| 351 | + else: |
| 352 | + idents.append(pformat_token(token)) |
| 353 | + |
| 354 | + is_unpack_operator = ( |
| 355 | + op.match([OP, '*'], [OP, '**']) and not ( |
| 356 | + token.match(NAME, NUMBER, STRING) |
| 357 | + and after.match(NAME, NUMBER, STRING) |
| 358 | + ) |
| 359 | + ) |
| 360 | + return ''.join(idents).strip() |
| 361 | + |
| 362 | + while self.fetch_token(): |
| 363 | + if self.current == NAME: |
| 364 | + tpname = self.current.value.strip() |
| 365 | + if self.previous and self.previous.match([OP, '*'], [OP, '**']): |
| 366 | + if self.previous == [OP, '*']: |
| 367 | + tpkind = Parameter.VAR_POSITIONAL |
| 368 | + else: |
| 369 | + tpkind = Parameter.VAR_KEYWORD |
| 370 | + else: |
| 371 | + tpkind = Parameter.POSITIONAL_OR_KEYWORD |
| 372 | + |
| 373 | + tpbound: Any = Parameter.empty |
| 374 | + tpdefault: Any = Parameter.empty |
| 375 | + |
| 376 | + self.fetch_token() |
| 377 | + if self.current and self.current.match([OP, ':'], [OP, '=']): |
| 378 | + if self.current == [OP, ':']: |
| 379 | + tpbound = build_identifier(self.fetch_tparam_spec()) |
| 380 | + if self.current == [OP, '=']: |
| 381 | + tpdefault = build_identifier(self.fetch_tparam_spec()) |
| 382 | + |
| 383 | + if tpkind != Parameter.POSITIONAL_OR_KEYWORD and tpbound != Parameter.empty: |
| 384 | + raise SyntaxError('type parameter bound or constraint is not allowed ' |
| 385 | + f'for {tpkind.description} parameters') |
| 386 | + |
| 387 | + tparam = (tpname, tpkind, tpdefault, tpbound) |
| 388 | + self.tparams.append(tparam) |
| 389 | + |
| 390 | + def _build_identifier(self, tokens: list[Token]) -> str: |
| 391 | + from itertools import chain, tee |
| 392 | + from token import ENDMARKER, NAME, NUMBER, OP, STRING |
| 393 | + |
| 394 | + def pairwise(iterable): |
| 395 | + a, b = tee(iterable) |
| 396 | + next(b, None) |
| 397 | + return zip(a, b) |
| 398 | + |
| 399 | + def triplewise(iterable): |
| 400 | + for (a, _), (b, c) in pairwise(pairwise(iterable)): |
| 401 | + yield a, b, c |
| 402 | + |
| 403 | + idents: list[str] = [] |
| 404 | + end = Token(ENDMARKER, '', (-1, -1), (-1, -1), '<generated>') |
| 405 | + groups = triplewise(chain(tokens, [end, end])) |
| 406 | + |
| 407 | + head, _, _ = next(groups, (end,) * 3) |
| 408 | + is_unpack_operator = head.match([OP, '*'], [OP, '**']) |
| 409 | + idents.append(self._pformat_token(head, native=is_unpack_operator)) |
| 410 | + |
| 411 | + is_unpack_operator = False |
| 412 | + for token, op, after in groups: |
| 413 | + ident = self._pformat_token(token, native=is_unpack_operator) |
| 414 | + idents.append(ident) |
| 415 | + # determine if the next token is an unpack operator depending |
| 416 | + # on the left and right hand side of the operator symbol |
| 417 | + is_unpack_operator = ( |
| 418 | + op.match([OP, '*'], [OP, '**']) and not ( |
| 419 | + token.match(NAME, NUMBER, STRING) |
| 420 | + and after.match(NAME, NUMBER, STRING) |
| 421 | + ) |
| 422 | + ) |
| 423 | + |
| 424 | + return ''.join(idents).strip() |
| 425 | + |
| 426 | + def _pformat_token(self, token: Token, native=False) -> str: |
| 427 | + from token import ENDMARKER, NEWLINE, OP |
| 428 | + |
| 429 | + if native: |
| 430 | + return token.value |
| 431 | + |
| 432 | + if token.match(NEWLINE, ENDMARKER): |
| 433 | + return '' |
| 434 | + |
| 435 | + if token.match([OP, ':'], [OP, ','], [OP, '#']): |
| 436 | + return f'{token.value} ' |
| 437 | + |
| 438 | + # Arithmetic operators are allowed because PEP 695 specifies the |
| 439 | + # default type parameter to be *any* expression (so "T1 << T2" is |
| 440 | + # allowed if it makes sense). The caller is responsible to ensure |
| 441 | + # that a multiplication operator ("*") is not to be confused with |
| 442 | + # an unpack operator (which will not be surrounded by spaces). |
| 443 | + # |
| 444 | + # The operators are ordered according to how likely they are to |
| 445 | + # be used and for (possible) future implementations (e.g., "&" for |
| 446 | + # an intersection type). |
| 447 | + if token.match( |
| 448 | + # most likely operators to appear |
| 449 | + [OP, '='], [OP, '|'], |
| 450 | + # type composition (future compatibility) |
| 451 | + [OP, '&'], [OP, '^'], [OP, '<'], [OP, '>'], |
| 452 | + # unlikely type composition |
| 453 | + [OP, '+'], [OP, '-'], [OP, '*'], [OP, '**'], |
| 454 | + # unlikely operators but included for completeness |
| 455 | + [OP, '@'], [OP, '/'], [OP, '//'], [OP, '%'], |
| 456 | + [OP, '<<'], [OP, '>>'], [OP, '>>>'], |
| 457 | + [OP, '<='], [OP, '>='], [OP, '=='], [OP, '!='], |
| 458 | + ): |
| 459 | + return f' {token.value} ' |
| 460 | + |
| 461 | + return token.value |
| 462 | + |
| 463 | + |
| 464 | +def _parse_tplist( |
| 465 | + tplist: str, env: BuildEnvironment | None = None, |
| 466 | + multi_line_parameter_list: bool = False, |
| 467 | +) -> addnodes.desc_tparameterlist: |
| 468 | + """Parse a list of type parameters according to PEP 695.""" |
| 469 | + tparams = addnodes.desc_tparameterlist(tplist) |
| 470 | + tparams['multi_line_parameter_list'] = multi_line_parameter_list |
| 471 | + # formal parameter names are interpreted as type parameter names and |
| 472 | + # type annotations are interpreted as type parameter bounds |
| 473 | + parser = _TypeParameterListParser(tplist) |
| 474 | + parser.parse() |
| 475 | + for (tpname, tpkind, tpdefault, tpbound) in parser.tparams: |
| 476 | + # no positional-only or keyword-only allowed in a type parameters list |
| 477 | + assert tpkind not in {Parameter.POSITIONAL_ONLY, Parameter.KEYWORD_ONLY} |
| 478 | + |
| 479 | + node = addnodes.desc_parameter() |
| 480 | + if tpkind == Parameter.VAR_POSITIONAL: |
| 481 | + node += addnodes.desc_sig_operator('', '*') |
| 482 | + elif tpkind == Parameter.VAR_KEYWORD: |
| 483 | + node += addnodes.desc_sig_operator('', '**') |
| 484 | + node += addnodes.desc_sig_name('', tpname) |
| 485 | + |
| 486 | + if tpbound is not Parameter.empty: |
| 487 | + type_bound = _parse_annotation(tpbound, env) |
| 488 | + if not type_bound: |
| 489 | + continue |
| 490 | + |
| 491 | + node += addnodes.desc_sig_punctuation('', ':') |
| 492 | + node += addnodes.desc_sig_space() |
| 493 | + |
| 494 | + type_bound_expr = addnodes.desc_sig_name('', '', *type_bound) |
| 495 | + # add delimiters around type bounds written as e.g., "(T1, T2)" |
| 496 | + if tpbound.startswith('(') and tpbound.endswith(')'): |
| 497 | + type_bound_text = type_bound_expr.astext() |
| 498 | + if type_bound_text.startswith('(') and type_bound_text.endswith(')'): |
| 499 | + node += type_bound_expr |
| 500 | + else: |
| 501 | + node += addnodes.desc_sig_punctuation('', '(') |
| 502 | + node += type_bound_expr |
| 503 | + node += addnodes.desc_sig_punctuation('', ')') |
| 504 | + else: |
| 505 | + node += type_bound_expr |
| 506 | + |
| 507 | + if tpdefault is not Parameter.empty: |
| 508 | + if tpbound is not Parameter.empty or tpkind != Parameter.POSITIONAL_OR_KEYWORD: |
| 509 | + node += addnodes.desc_sig_space() |
| 510 | + node += addnodes.desc_sig_operator('', '=') |
| 511 | + node += addnodes.desc_sig_space() |
| 512 | + else: |
| 513 | + node += addnodes.desc_sig_operator('', '=') |
| 514 | + node += nodes.inline('', tpdefault, classes=['default_value'], |
| 515 | + support_smartquotes=False) |
| 516 | + |
| 517 | + tparams += node |
| 518 | + return tparams |
| 519 | + |
| 520 | + |
260 | 521 | def _parse_arglist(
|
261 | 522 | arglist: str, env: BuildEnvironment | None = None, multi_line_parameter_list: bool = False,
|
262 | 523 | ) -> addnodes.desc_parameterlist:
|
@@ -514,7 +775,7 @@ def handle_signature(self, sig: str, signode: desc_signature) -> tuple[str, str]
|
514 | 775 | m = py_sig_re.match(sig)
|
515 | 776 | if m is None:
|
516 | 777 | raise ValueError
|
517 |
| - prefix, name, arglist, retann = m.groups() |
| 778 | + prefix, name, tplist, arglist, retann = m.groups() |
518 | 779 |
|
519 | 780 | # determine module and class name (if applicable), as well as full name
|
520 | 781 | modname = self.options.get('module', self.env.ref_context.get('py:module'))
|
@@ -570,6 +831,14 @@ def handle_signature(self, sig: str, signode: desc_signature) -> tuple[str, str]
|
570 | 831 | signode += addnodes.desc_addname(nodetext, nodetext)
|
571 | 832 |
|
572 | 833 | signode += addnodes.desc_name(name, name)
|
| 834 | + |
| 835 | + if tplist: |
| 836 | + try: |
| 837 | + signode += _parse_tplist(tplist, self.env, multi_line_parameter_list) |
| 838 | + except Exception as exc: |
| 839 | + logger.warning("could not parse tplist (%r): %s", tplist, exc, |
| 840 | + location=signode) |
| 841 | + |
573 | 842 | if arglist:
|
574 | 843 | try:
|
575 | 844 | signode += _parse_arglist(arglist, self.env, multi_line_parameter_list)
|
|
0 commit comments