Skip to content

Commit

Permalink
improve ux for displacy when the serve port is in use (#11948)
Browse files Browse the repository at this point in the history
* check port in use and add itself

* check port in use and add itself

* Auto switch to nearest available port.

* Use bind to check port instead of connect_ex.

* Reformat.

* Add auto_select_port argument.

* update docs for displacy.serve

* Update spacy/errors.py

Co-authored-by: Paul O'Leary McCann <[email protected]>

* Update website/docs/api/top-level.md

Co-authored-by: Paul O'Leary McCann <[email protected]>

* Update spacy/errors.py

Co-authored-by: Paul O'Leary McCann <[email protected]>

* Add test using multiprocessing

* fix argument name

* Increase sleep times

Want to rule this out as a cause of test failure

* Don't terminate a process that isn't alive

* Refactor port finding logic

This moves all the port logic into its own util function, which can be
tested without having to background a server directly.

* Use with for the server

This ensures the server is closed correctly.

* Pass in the host when checking port availability

* Shorten argument name

* Update error codes following merge

* Add types for arguments, specify docstrings.

* Add typing for arguments with default value.

* Update docstring to match spaCy format.

* Update docstring to match spaCy format.

* Fix docs

Arg name changed from `auto_select_port` to just `auto_select`.

* Revert "Fix docs"

This reverts commit 356966f.

Co-authored-by: zhiiw <[email protected]>
Co-authored-by: Paul O'Leary McCann <[email protected]>
Co-authored-by: Raphael Mitsch <[email protected]>
  • Loading branch information
4 people authored Jan 10, 2023
1 parent 6d03b04 commit eb8bb35
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 12 deletions.
9 changes: 8 additions & 1 deletion spacy/displacy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ..tokens import Doc, Span
from ..errors import Errors, Warnings
from ..util import is_in_jupyter
from ..util import find_available_port


_html = {}
Expand Down Expand Up @@ -82,6 +83,7 @@ def serve(
manual: bool = False,
port: int = 5000,
host: str = "0.0.0.0",
auto_select_port: bool = False,
) -> None:
"""Serve displaCy visualisation.
Expand All @@ -93,15 +95,20 @@ def serve(
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
port (int): Port to serve visualisation.
host (str): Host to serve visualisation.
auto_select_port (bool): Automatically select a port if the specified port is in use.
DOCS: https://spacy.io/api/top-level#displacy.serve
USAGE: https://spacy.io/usage/visualizers
"""
from wsgiref import simple_server

port = find_available_port(port, host, auto_select_port)

if is_in_jupyter():
warnings.warn(Warnings.W011)
render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
render(
docs, style=style, page=page, minify=minify, options=options, manual=manual
)
httpd = simple_server.make_server(host, port, app)
print(f"\nUsing the '{style}' visualizer")
print(f"Serving on http://{host}:{port} ...\n")
Expand Down
5 changes: 5 additions & 0 deletions spacy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ class Warnings(metaclass=ErrorsWithCodes):
"is a Cython extension type.")
W123 = ("Argument `enable` with value {enable} does not contain all values specified in the config option "
"`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.")


class Errors(metaclass=ErrorsWithCodes):
Expand Down Expand Up @@ -963,6 +964,10 @@ class Errors(metaclass=ErrorsWithCodes):
"knowledge base, use `InMemoryLookupKB`.")
E1047 = ("`find_threshold()` only supports components with a `scorer` attribute.")
E1048 = ("Got '{unexpected}' as console progress bar type, but expected one of the following: {expected}")
E1049 = ("No available port found for displaCy on host {host}. Please specify an available port "
"with `displacy.serve(doc, port)`")
E1050 = ("Port {port} is already in use. Please specify an available port with `displacy.serve(doc, port)` "
"or use `auto_switch_port=True` to pick an available port automatically.")


# Deprecated model shortcuts, only used in errors and warnings
Expand Down
15 changes: 14 additions & 1 deletion spacy/tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from spacy.ml._precomputable_affine import PrecomputableAffine
from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
from spacy.util import dot_to_object, SimpleFrozenList, import_file
from spacy.util import to_ternary_int
from spacy.util import to_ternary_int, find_available_port
from thinc.api import Config, Optimizer, ConfigValidationError
from thinc.api import get_current_ops, set_current_ops, NumpyOps, CupyOps, MPSOps
from thinc.compat import has_cupy_gpu, has_torch_mps_gpu
Expand Down Expand Up @@ -434,3 +434,16 @@ def test_to_ternary_int():
assert to_ternary_int(-10) == -1
assert to_ternary_int("string") == -1
assert to_ternary_int([0, "string"]) == -1


def test_find_available_port():
host = "0.0.0.0"
port = 5000
assert find_available_port(port, host) == port, "Port 5000 isn't free"

from wsgiref.simple_server import make_server, demo_app

with make_server(host, port, demo_app) as httpd:
with pytest.warns(UserWarning, match="already in use"):
found_port = find_available_port(port, host, auto_select=True)
assert found_port == port + 1, "Didn't find next port"
48 changes: 48 additions & 0 deletions spacy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import inspect
import pkgutil
import logging
import socket

try:
import cupy.random
Expand Down Expand Up @@ -1736,3 +1737,50 @@ def all_equal(iterable):
(or if the input is an empty sequence), False otherwise."""
g = itertools.groupby(iterable)
return next(g, True) and not next(g, False)


def _is_port_in_use(port: int, host: str = "localhost") -> bool:
"""Check if 'host:port' is in use. Return True if it is, False otherwise.
port (int): the port to check
host (str): the host to check (default "localhost")
RETURNS (bool): Whether 'host:port' is in use.
"""
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.bind((host, port))
return False
except socket.error:
return True
finally:
s.close()


def find_available_port(start: int, host: str, auto_select: bool = False) -> int:
"""Given a starting port and a host, handle finding a port.
If `auto_select` is False, a busy port will raise an error.
If `auto_select` is True, the next free higher port will be used.
start (int): the port to start looking from
host (str): the host to find a port on
auto_select (bool): whether to automatically select a new port if the given port is busy (default False)
RETURNS (int): The port to use.
"""
if not _is_port_in_use(start, host):
return start

port = start
if not auto_select:
raise ValueError(Errors.E1050.format(port=port))

while _is_port_in_use(port, host) and port < 65535:
port += 1

if port == 65535 and _is_port_in_use(port, host):
raise ValueError(Errors.E1049.format(host=host))

# if we get here, the port changed
warnings.warn(Warnings.W124.format(host=host, port=start, serve_port=port))
return port
21 changes: 11 additions & 10 deletions website/docs/api/top-level.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,16 +237,17 @@ browser. Will run a simple web server.
> displacy.serve([doc1, doc2], style="dep")
> ```
| Name | Description |
| --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span]], Doc, Span]~~ |
| `style` | Visualization style, `"dep"`, `"ent"` or `"span"` <Tag variant="new">3.3</Tag>. Defaults to `"dep"`. ~~str~~ |
| `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ |
| `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ |
| `options` | [Visualizer-specific options](#displacy_options), e.g. colors. ~~Dict[str, Any]~~ |
| `manual` | Don't parse `Doc` and instead expect a dict or list of dicts. [See here](/usage/visualizers#manual-usage) for formats and examples. Defaults to `False`. ~~bool~~ |
| `port` | Port to serve visualization. Defaults to `5000`. ~~int~~ |
| `host` | Host to serve visualization. Defaults to `"0.0.0.0"`. ~~str~~ |
| Name | Description |
| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span]], Doc, Span]~~ |
| `style` | Visualization style, `"dep"`, `"ent"` or `"span"` <Tag variant="new">3.3</Tag>. Defaults to `"dep"`. ~~str~~ |
| `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ |
| `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ |
| `options` | [Visualizer-specific options](#displacy_options), e.g. colors. ~~Dict[str, Any]~~ |
| `manual` | Don't parse `Doc` and instead expect a dict or list of dicts. [See here](/usage/visualizers#manual-usage) for formats and examples. Defaults to `False`. ~~bool~~ |
| `port` | Port to serve visualization. Defaults to `5000`. ~~int~~ |
| `host` | Host to serve visualization. Defaults to `"0.0.0.0"`. ~~str~~ |
| `auto_select_port` | If `True`, automatically switch to a different port if the specified port is already in use. Defaults to `False`. ~~bool~~ |
### displacy.render {#displacy.render tag="method" new="2"}
Expand Down

0 comments on commit eb8bb35

Please sign in to comment.