|
3 | 3 |
|
4 | 4 | class IEggStorage(Interface):
|
5 | 5 | """
|
6 |
| - A component that handles storing and retrieving eggs. |
| 6 | + A component to store project eggs. |
7 | 7 | """
|
8 | 8 |
|
9 | 9 | def put(eggfile, project, version):
|
10 |
| - """Store the egg (passed in the file object) under the given project and |
11 |
| - version""" |
| 10 | + """ |
| 11 | + Store the egg (a file object), which represents a ``version`` of the ``project``. |
| 12 | + """ |
12 | 13 |
|
13 | 14 | def get(project, version=None):
|
14 |
| - """Return a tuple (version, file) for the egg matching the specified |
15 |
| - project and version. If version is None, the latest version is |
16 |
| - returned. If no egg is found for the given project/version (None, None) |
17 |
| - should be returned.""" |
| 15 | + """ |
| 16 | + Return ``(version, file)`` for the egg matching the ``project`` and ``version``. |
| 17 | +
|
| 18 | + If ``version`` is ``None``, the latest version and corresponding file are returned. |
| 19 | +
|
| 20 | + If no egg is found, ``(None, None)`` is returned. |
| 21 | +
|
| 22 | + .. tip:: Remember to close the ``file`` when done. |
| 23 | + """ |
18 | 24 |
|
19 | 25 | def list(project):
|
20 |
| - """Return the list of versions which have eggs stored (for the given |
21 |
| - project) in order (the latest version is the currently used).""" |
| 26 | + """ |
| 27 | + Return all versions of the ``project`` in order, with the latest version last. |
| 28 | + """ |
22 | 29 |
|
23 | 30 | def list_projects():
|
24 | 31 | """
|
25 |
| - Return the list of projects from the stored eggs. |
| 32 | + Return all projects in storage. |
26 | 33 |
|
27 | 34 | .. versionadded:: 1.3.0
|
28 | 35 | Move this logic into the interface and its implementations, to allow customization.
|
29 | 36 | """
|
30 | 37 |
|
31 | 38 | def delete(project, version=None):
|
32 |
| - """Delete the egg stored for the given project and version. If should |
33 |
| - also delete the project if no versions are left""" |
| 39 | + """ |
| 40 | + Delete the egg matching the ``project`` and ``version``. Delete the ``project``, if no versions remains. |
| 41 | + """ |
34 | 42 |
|
35 | 43 |
|
36 | 44 | class IPoller(Interface):
|
37 | 45 | """
|
38 |
| - A component that polls for projects that need to run. |
| 46 | + A component that tracks capacity for new jobs, and starts jobs when ready. |
39 | 47 | """
|
40 | 48 |
|
41 | 49 | queues = Attribute(
|
42 | 50 | """
|
43 | 51 | An object (like a ``dict``) with a ``__getitem__`` method that accepts a project's name and returns its
|
44 |
| - :py:interface:`spider queue<scrapyd.interfaces.ISpiderQueue>`. |
| 52 | + :py:interface:`spider queue<scrapyd.interfaces.ISpiderQueue>` of pending jobs. |
45 | 53 | """
|
46 | 54 | )
|
47 | 55 |
|
48 | 56 | def poll():
|
49 |
| - """Called periodically to poll for projects""" |
| 57 | + """ |
| 58 | + Called periodically to start jobs if there's capacity. |
| 59 | + """ |
50 | 60 |
|
51 | 61 | def next():
|
52 |
| - """Return the next message. |
| 62 | + """ |
| 63 | + Return the next pending job. |
53 | 64 |
|
54 |
| - It should return a Deferred which will get fired when there is a new |
55 |
| - project that needs to run, or already fired if there was a project |
56 |
| - waiting to run already. |
| 65 | + It should return a Deferred that will be fired when there's capacity, or already fired if there's capacity. |
57 | 66 |
|
58 |
| - The message is a dict containing (at least): |
| 67 | + The pending job is a ``dict`` containing at least the ``_project`` name, ``_spider`` name and ``_job`` ID. |
| 68 | + The job ID is unique, at least within the project. |
59 | 69 |
|
60 |
| - - the name of the project to be run in the ``_project`` key |
61 |
| - - the name of the spider to be run in the ``_spider`` key |
62 |
| - - a unique identifier for this run in the ``_job`` key |
| 70 | + The pending job is later passed to :meth:`scrapyd.interfaces.IEnvironment.get_environment`. |
63 | 71 |
|
64 |
| - This message will be passed later to :meth:`scrapyd.interfaces.IEnvironment.get_environment`. |
| 72 | + .. seealso:: :meth:`scrapyd.interfaces.ISpiderQueue.pop` |
65 | 73 | """
|
66 | 74 |
|
67 | 75 | def update_projects():
|
68 |
| - """Called when projects may have changed, to refresh the available |
69 |
| - projects, including at initialization""" |
| 76 | + """ |
| 77 | + Called when projects may have changed, to refresh the available projects, including at initialization. |
| 78 | + """ |
70 | 79 |
|
71 | 80 |
|
72 | 81 | class ISpiderQueue(Interface):
|
| 82 | + """ |
| 83 | + A component to store pending jobs. |
| 84 | +
|
| 85 | + The ``dict`` keys used by the chosen ``ISpiderQueue`` implementation must match the chosen: |
| 86 | +
|
| 87 | + - :ref:`launcher` service (which calls :meth:`scrapyd.interfaces.IPoller.next`) |
| 88 | + - :py:interface:`~scrapyd.interfaces.IEnvironment` implementation (see :meth:`scrapyd.interfaces.IPoller.next`) |
| 89 | + - :ref:`webservices<config-services>` that schedule, cancel or list pending jobs |
| 90 | + """ |
| 91 | + |
73 | 92 | def add(name, priority, **spider_args):
|
74 | 93 | """
|
75 |
| - Add a spider to the queue given its name a some spider arguments. |
76 |
| -
|
77 |
| - This method can return a deferred. |
| 94 | + Add a pending job, given the spider ``name``, crawl ``priority`` and keyword arguments, which might include the |
| 95 | + ``_job`` ID, egg ``_version`` and Scrapy ``settings`` depending on the implementation, with keyword arguments |
| 96 | + that are not recognized by the implementation being treated as spider arguments. |
78 | 97 |
|
79 | 98 | .. versionchanged:: 1.3.0
|
80 | 99 | Add the ``priority`` parameter.
|
81 | 100 | """
|
82 | 101 |
|
83 | 102 | def pop():
|
84 |
| - """Pop the next message from the queue. The messages is a dict |
85 |
| - containing a key ``name`` with the spider name and other keys as spider |
86 |
| - attributes. |
87 |
| -
|
88 |
| - This method can return a deferred.""" |
| 103 | + """ |
| 104 | + Pop the next pending job. The pending job is a ``dict`` containing the spider ``name``. Depending on the |
| 105 | + implementation, other keys might include the ``_job`` ID, egg ``_version`` and Scrapy ``settings``, with |
| 106 | + keyword arguments that are not recognized by the receiver being treated as spider arguments. |
| 107 | + """ |
89 | 108 |
|
90 | 109 | def list():
|
91 |
| - """Return a list with the messages in the queue. Each message is a dict |
92 |
| - which must have a ``name`` key (with the spider name), and other optional |
93 |
| - keys that will be used as spider arguments, to create the spider. |
| 110 | + """ |
| 111 | + Return the pending jobs. |
94 | 112 |
|
95 |
| - This method can return a deferred.""" |
| 113 | + .. seealso:: :meth:`scrapyd.interfaces.ISpiderQueue.pop` |
| 114 | + """ |
96 | 115 |
|
97 | 116 | def count():
|
98 |
| - """Return the number of spiders in the queue. |
99 |
| -
|
100 |
| - This method can return a deferred.""" |
| 117 | + """ |
| 118 | + Return the number of pending jobs. |
| 119 | + """ |
101 | 120 |
|
102 | 121 | def remove(func):
|
103 |
| - """Remove all elements from the queue for which func(element) is true, |
104 |
| - and return the number of removed elements. |
| 122 | + """ |
| 123 | + Remove pending jobs for which ``func(job)`` is true, and return the number of removed pending jobss. |
105 | 124 | """
|
106 | 125 |
|
107 | 126 | def clear():
|
108 |
| - """Clear the queue. |
109 |
| -
|
110 |
| - This method can return a deferred.""" |
| 127 | + """ |
| 128 | + Remove all pending jobs. |
| 129 | + """ |
111 | 130 |
|
112 | 131 |
|
113 | 132 | class ISpiderScheduler(Interface):
|
114 | 133 | """
|
115 |
| - A component to schedule spider runs. |
| 134 | + A component to schedule jobs. |
116 | 135 | """
|
117 | 136 |
|
118 | 137 | def schedule(project, spider_name, priority, **spider_args):
|
119 | 138 | """
|
120 |
| - Schedule a spider for the given project. |
| 139 | + Schedule a crawl. |
121 | 140 |
|
122 | 141 | .. versionchanged:: 1.3.0
|
123 | 142 | Add the ``priority`` parameter.
|
124 | 143 | """
|
125 | 144 |
|
126 | 145 | def list_projects():
|
127 |
| - """Return the list of available projects""" |
| 146 | + """ |
| 147 | + Return all projects that can be scheduled. |
| 148 | + """ |
128 | 149 |
|
129 | 150 | def update_projects():
|
130 |
| - """Called when projects may have changed, to refresh the available |
131 |
| - projects, including at initialization""" |
| 151 | + """ |
| 152 | + Called when projects may have changed, to refresh the available projects, including at initialization. |
| 153 | + """ |
132 | 154 |
|
133 | 155 |
|
134 | 156 | class IEnvironment(Interface):
|
135 | 157 | """
|
136 |
| - A component to generate the environment of crawler processes. |
| 158 | + A component to generate the environment of jobs. |
| 159 | +
|
| 160 | + The chosen ``IEnvironment`` implementation must match the chosen :ref:`launcher` service. |
137 | 161 | """
|
138 | 162 |
|
139 | 163 | def get_settings(message):
|
140 | 164 | """
|
141 | 165 | Return the Scrapy settings to use for running the process.
|
142 | 166 |
|
143 |
| - ``message`` is the message received from the :meth:`scrapyd.interfaces.IPoller.next` method. |
| 167 | + Depending on the chosen :ref:`launcher`, this would be one of more ``LOG_FILE`` or ``FEEDS``. |
144 | 168 |
|
145 | 169 | .. versionadded:: 1.4.2
|
146 | 170 | Support for overriding Scrapy settings via ``SCRAPY_`` environment variables was removed in Scrapy 2.8.
|
| 171 | +
|
| 172 | + :param message: the pending job received from the :meth:`scrapyd.interfaces.IPoller.next` method |
147 | 173 | """
|
148 | 174 |
|
149 | 175 | def get_environment(message, slot):
|
150 |
| - """Return the environment variables to use for running the process. |
| 176 | + """ |
| 177 | + Return the environment variables to use for running the process. |
151 | 178 |
|
152 |
| - ``message`` is the message received from the :meth:`scrapyd.interfaces.IPoller.next` method. |
153 |
| - ``slot`` is the ``Launcher`` slot where the process will be running. |
| 179 | + Depending on the chosen :ref:`launcher`, this would be one of more of ``SCRAPY_PROJECT``, |
| 180 | + ``SCRAPYD_EGG_VERSION`` or ``SCRAPY_SETTINGS_MODULE``. |
| 181 | +
|
| 182 | + :param message: the pending job received from the :meth:`scrapyd.interfaces.IPoller.next` method |
| 183 | + :param slot: the :ref:`launcher` slot for tracking the process |
154 | 184 | """
|
155 | 185 |
|
156 | 186 |
|
157 | 187 | class IJobStorage(Interface):
|
158 | 188 | """
|
159 |
| - A component that handles storing and retrieving finished jobs. |
| 189 | + A component to store finished jobs. |
160 | 190 |
|
161 | 191 | .. versionadded:: 1.3.0
|
162 | 192 | """
|
163 | 193 |
|
164 | 194 | def add(job):
|
165 |
| - """Add a finished job in the storage.""" |
| 195 | + """ |
| 196 | + Add a finished job in the storage. |
| 197 | + """ |
166 | 198 |
|
167 | 199 | def list():
|
168 | 200 | """
|
169 |
| - Return a list of the finished jobs. |
| 201 | + Return the finished jobs. |
170 | 202 |
|
171 | 203 | .. seealso:: :meth:`scrapyd.interfaces.IJobStorage.__iter__`
|
172 | 204 | """
|
173 | 205 |
|
174 | 206 | def __len__():
|
175 |
| - """Return a number of the finished jobs.""" |
| 207 | + """ |
| 208 | + Return the number of finished jobs. |
| 209 | + """ |
176 | 210 |
|
177 | 211 | def __iter__():
|
178 | 212 | """
|
179 | 213 | Iterate over the finished jobs in reverse order by ``end_time``.
|
180 | 214 |
|
181 |
| - A job has the attributes ``project``, ``spider``, ``job``, ``start_time`` and ``end_time``. |
| 215 | + A job has the attributes ``project``, ``spider``, ``job``, ``start_time`` and ``end_time`` and may have the |
| 216 | + attributes ``args`` (``scrapy crawl`` CLI arguments) and ``env`` (environment variables). |
182 | 217 | """
|
0 commit comments