scrapy · jpmckinney · Jul 16, 2024 · Jul 16, 2024 · Jul 16, 2024 · Jul 16, 2024
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,31 +1,22 @@
 name: Tests
 on: [push, pull_request]
-
 jobs:
   tests:
     if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
     strategy:
-      fail-fast: false
       matrix:
+        os: [macos-latest, windows-latest, ubuntu-latest]
         python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
-
     steps:
       - uses: actions/checkout@v4
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-
-      - name: install requirements
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e .[test]
-      - name: Run unit tests
-        run: |
-          pytest scrapyd --cov=./ --cov-report=xml
-
+          cache: pip
+          cache-dependency-path: setup.py
+      - run: pip install -e .[test]
+      - run: pytest tests --cov scrapyd
       - name: Run integration tests
         run: |
           printf "[scrapyd]\nusername = hello12345\npassword = 67890world\n" > scrapyd.conf
@@ -34,18 +25,6 @@ jobs:
           sleep 1
           pytest integration_tests
           cat scrapyd.log
-          kill %%
-
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v4
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          directory: ./coverage/reports/
-          env_vars: OS,PYTHON
-          fail_ci_if_error: false
-          files: ./coverage.xml
-          flags: unittests
-          name: codecov-umbrella
-          # https://github.com/codecov/codecov-action/issues/476
-          # path_to_write_report: ./coverage/codecov_report.txt
-          verbose: true
+      - env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: coveralls --service=github
diff --git a/.gitignore b/.gitignore
@@ -16,7 +16,7 @@ venv
 /_trial_temp
 /dbs
 /htmlcov
-/scrapyd.tests.test_*
+/tests.test_*
 /scrapyd.conf
 /twistd.pid
 /eggs

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -6,7 +6,8 @@ recursive-include docs *.rst
 recursive-include docs *.txt
 recursive-include docs Makefile
 recursive-include scrapyd *.py
-recursive-include scrapyd *.egg
+recursive-include tests *.egg
+recursive-include tests *.py
 recursive-include integration_tests *.py
 exclude .pre-commit-config.yaml
 exclude .readthedocs.yaml
diff --git a/docs/contributing.rst b/docs/contributing.rst
@@ -5,30 +5,30 @@ Contributing
 
 .. important::
 
-    Read through the `Scrapy Contribution Docs`_ for tips relating to writing patches, reporting bugs, and project coding style
+    Read through the `Scrapy Contribution Docs <http://scrapy.readthedocs.org/en/latest/contributing.html>`__ for tips relating to writing patches, reporting bugs, and project coding style
 
 These docs describe how to setup and contribute to Scrapyd.
 
 Reporting issues & bugs
 -----------------------
 
-Issues should be reported to the Scrapyd project `issue tracker`_ on GitHub
+Issues should be reported to the Scrapyd project `issue tracker <https://github.com/scrapy/scrapyd/issues>`__ on GitHub.
 
 Tests
 -----
 
-Tests are implemented using the `Twisted unit-testing framework`_. Scrapyd uses ``trial`` as the test running application.
+Tests are implemented using the `Twisted unit-testing framework <http://twistedmatrix.com/documents/current/core/development/policy/test-standard.html>`__. Scrapyd uses ``trial`` as the test running application.
 
 Running tests
 -------------
 
 To run all tests go to the root directory of the Scrapyd source code and run:
 
-    ``trial scrapyd``
+    ``trial tests``
 
 To run a specific test (say ``tests/test_poller.py``) use:
 
-    ``trial scrapyd.tests.test_poller``
+    ``trial tests.test_poller``
 
 
 Writing tests
@@ -38,24 +38,19 @@ All functionality (including new features and bug fixes) should include a test
 case to check that it works as expected, so please include tests for your
 patches if you want them to get accepted sooner.
 
-Scrapyd uses unit-tests, which are located in the `scrapyd/tests`_ directory.
+Scrapyd uses unit-tests, which are located in the `tests <https://github.com/scrapy/scrapyd/tree/master/tests>`__ directory.
 Their module name typically resembles the full path of the module they're
 testing. For example, the scheduler code is in::
 
     scrapyd.scheduler
 
 And their unit-tests are in::
 
-    scrapyd/tests/test_scheduler.py
+    tests/test_scheduler.py
 
 Installing locally
 ------------------
 
 To install a locally edited version of Scrapyd onto the system to use and test, inside the project root run:
 
     ``pip install -e .``
-
-.. _Twisted unit-testing framework: http://twistedmatrix.com/documents/current/core/development/policy/test-standard.html
-.. _scrapyd/tests: https://github.com/scrapy/scrapyd/tree/master/scrapyd/tests
-.. _issue tracker: https://github.com/scrapy/scrapyd/issues
-.. _Scrapy Contribution Docs: http://scrapy.readthedocs.org/en/latest/contributing.html
diff --git a/docs/news.rst b/docs/news.rst
@@ -24,34 +24,45 @@ Documentation
 Changed
 ~~~~~~~
 
+- **BACKWARDS-INCOMPATIBLE CHANGE:** The ``IEggStorage.get()`` interface returns a ``(version, filename)`` tuple, instead of a ``(version, file)`` tuple.
+- Drop support for end-of-life Python version 3.7.
+
+Web UI
+^^^^^^
+
 - Add a confirmation dialog to the Cancel button.
 - Add "Last modified" column to the directory listings of log files and item feeds.
-- Drop support for end-of-life Python version 3.7.
 
 API
 ^^^
 
-- If the ``egg`` parameter to the :ref:`addversion.json` webservice is not a ZIP file, use the error message, "egg is not a ZIP file (if using curl, use egg=@path not egg=path)".
-- Clarify some error messages, for example:
+- Clarify error messages, for example:
 
-  - ``'project' parameter is required`` instead of ``'project'``
-  - ``project 'myproject' not found`` instead of ``'myproject'``
-  - ``exception class: message`` instead of ``message``
-  - ``ValueError: Unknown or corrupt egg`` instead of ``TypeError: 'tuple' object is not an iterator``
+  - ``'project' parameter is required``, instead of ``'project'``
+  - ``project 'myproject' not found``, instead of ``'myproject'``
+  - ``exception class: message``, instead of ``message``
+  - ``ValueError: Unknown or corrupt egg``, instead of ``TypeError: 'tuple' object is not an iterator``
 
 CLI
 ^^^
 
-- Correct the usage message and long description, remove all ``twistd`` subcommands, and remove the ``--nodaemon`` and ``--python=`` options, which it overrides.
-- Run the ``scrapyd.__main__`` module instead of the ``scrapyd.scripts.scrapyd_run`` module.
+- Correct the usage message and long description, remove all ``twistd`` subcommands, and remove the ``--nodaemon`` and ``--python=`` options, which are overridden.
+- Run the ``scrapyd.__main__`` module, instead of the ``scrapyd.scripts.scrapyd_run`` module.
 
 Fixed
 ~~~~~
 
 - The :ref:`schedule.json` webservice sets the ``node_name`` field in error responses.
-- The :ref:`cancel.json` webservice now works on Windows.
+- The next pending job for all but one project was unreported by the :ref:`daemonstatus.json` and :ref:`listjobs.json` webservices, and was not cancellable by the :ref:`cancel.json` webservice.
+
+Platform support
+^^^^^^^^^^^^^^^^
+
+Scrapyd is now tested on macOS and Windows, in addition to Linux.
+
+- The :ref:`cancel.json` webservice now works on Windows, by using SIGBREAK instead of SIGNINT or SIGTERM.
+- The :ref:`dbs_dir` setting no longer causes an error if it contains a drive letter on Windows.
 - The :ref:`jobs_to_keep` setting no longer causes an error if a file to delete can't be deleted (for example, if the file is open on Windows).
-- When managing multiple projects, the next pending job for all but one project was unreported by the :ref:`daemonstatus.json` and :ref:`listjobs.json` webservices, and was not cancellable by the :ref:`cancel.json` webservice.
 
 1.4.3 (2023-09-25)
 ------------------

diff --git a/integration_tests/test_webservice.py b/integration_tests/test_webservice.py
@@ -1,3 +1,5 @@
+import sys
+
 import pytest
 import requests
 import scrapy
@@ -9,6 +11,9 @@ def assert_webservice(method, path, expected, **kwargs):
     response = req(method, path, **kwargs)
     json = response.json()
     json.pop("node_name")
+    if "message" in expected:
+        if sys.platform == 'win32':
+            expected["message"] = expected["message"].replace("\n", "\r\n")
 
     assert json == expected
 
@@ -125,7 +130,10 @@ def test_delversion_nonexistent():
         "/delversion.json",
         {
             "status": "error",
-            "message": "FileNotFoundError: [Errno 2] No such file or directory: 'eggs/nonexistent/noegg.egg'",
+            "message": "FileNotFoundError: " + (
+                "[Errno 2] No such file or directory: 'eggs/nonexistent/noegg.egg'" if sys.platform != 'win32'
+                else "[WinError 3] The system cannot find the path specified: 'eggs\\\\nonexistent\\\\noegg.egg'"
+            ),
         },
         data={"project": "nonexistent", "version": "noegg"},
     )
@@ -137,7 +145,10 @@ def test_delproject_nonexistent():
         "/delproject.json",
         {
             "status": "error",
-            "message": "FileNotFoundError: [Errno 2] No such file or directory: 'eggs/nonexistent'",
+            "message": "FileNotFoundError: " + (
+                "[Errno 2] No such file or directory: 'eggs/nonexistent'" if sys.platform != 'win32'
+                else "[WinError 3] The system cannot find the path specified: 'eggs\\\\nonexistent'"
+            ),
         },
         data={"project": "nonexistent"},
     )
diff --git a/scrapyd/eggstorage.py b/scrapyd/eggstorage.py
@@ -1,7 +1,7 @@
+import os
 import re
+import shutil
 from glob import glob
-from os import listdir, makedirs, path, remove
-from shutil import copyfileobj, rmtree
 
 from zope.interface import implementer
 
@@ -17,42 +17,40 @@ def __init__(self, config):
 
     def put(self, eggfile, project, version):
         eggpath = self._eggpath(project, version)
-        eggdir = path.dirname(eggpath)
-        if not path.exists(eggdir):
-            makedirs(eggdir)
+        eggdir = os.path.dirname(eggpath)
+        if not os.path.exists(eggdir):
+            os.makedirs(eggdir)
         with open(eggpath, 'wb') as f:
-            copyfileobj(eggfile, f)
+            shutil.copyfileobj(eggfile, f)
 
     def get(self, project, version=None):
         if version is None:
             try:
                 version = self.list(project)[-1]
             except IndexError:
                 return None, None
-        return version, open(self._eggpath(project, version), 'rb')
+        return version, self._eggpath(project, version)
 
     def list(self, project):
-        eggdir = path.join(self.basedir, project)
-        versions = [path.splitext(path.basename(x))[0]
-                    for x in glob("%s/*.egg" % eggdir)]
+        versions = [
+            os.path.splitext(os.path.basename(path))[0]
+            for path in glob(os.path.join(self.basedir, project, "*.egg"))
+        ]
         return sorted_versions(versions)
 
     def list_projects(self):
-        projects = []
-        if path.exists(self.basedir):
-            projects.extend(d for d in listdir(self.basedir)
-                            if path.isdir('%s/%s' % (self.basedir, d)))
-        return projects
+        if os.path.exists(self.basedir):
+            return [name for name in os.listdir(self.basedir) if os.path.isdir(os.path.join(self.basedir, name))]
+        return []
 
     def delete(self, project, version=None):
         if version is None:
-            rmtree(path.join(self.basedir, project))
+            shutil.rmtree(os.path.join(self.basedir, project))
         else:
-            remove(self._eggpath(project, version))
+            os.remove(self._eggpath(project, version))
             if not self.list(project):  # remove project if no versions left
                 self.delete(project)
 
     def _eggpath(self, project, version):
         sanitized_version = re.sub(r'[^a-zA-Z0-9_-]', '_', version)
-        x = path.join(self.basedir, project, "%s.egg" % sanitized_version)
-        return x
+        return os.path.join(self.basedir, project, f"{sanitized_version}.egg")
diff --git a/scrapyd/interfaces.py b/scrapyd/interfaces.py
@@ -9,7 +9,7 @@ def put(eggfile, project, version):
         version"""
 
     def get(project, version=None):
-        """Return a tuple (version, file) with the the egg for the specified
+        """Return a tuple (version, filename) for the egg matching the specified
         project and version. If version is None, the latest version is
         returned. If no egg is found for the given project/version (None, None)
         should be returned."""

diff --git a/scrapyd/runner.py b/scrapyd/runner.py
@@ -18,10 +18,9 @@ def project_environment(project):
     eggstorage = eggstorage_cls(config)
 
     eggversion = os.environ.get('SCRAPYD_EGG_VERSION', None)
-    version, eggfile = eggstorage.get(project, eggversion)
-    if eggfile:
-        activate_egg(eggfile.name)
-        eggfile.close()
+    version, eggpath = eggstorage.get(project, eggversion)
+    if eggpath:
+        activate_egg(eggpath)
 
     assert 'scrapy.conf' not in sys.modules, "Scrapy settings already loaded"
     yield

diff --git a/scrapyd/utils.py b/scrapyd/utils.py
@@ -22,7 +22,10 @@ def render(self, txrequest):
         return self.render_object(r, txrequest)
 
     def render_object(self, obj, txrequest):
-        r = self.json_encoder.encode(obj) + "\n"
+        if obj is None:
+            r = ''
+        else:
+            r = self.json_encoder.encode(obj) + "\n"
         txrequest.setHeader('Content-Type', 'application/json')
         txrequest.setHeader('Access-Control-Allow-Origin', '*')
         txrequest.setHeader('Access-Control-Allow-Methods', 'GET, POST, PATCH, PUT, DELETE')
@@ -62,7 +65,7 @@ def get_spider_queues(config):
 
 def sqlite_connection_string(config, database):
     dbs_dir = config.get('dbs_dir', 'dbs')
-    if dbs_dir == ':memory:' or urlsplit(dbs_dir).scheme:
+    if dbs_dir == ':memory:' or (urlsplit(dbs_dir).scheme and not os.path.splitdrive(dbs_dir)[0]):
         return dbs_dir
     if not os.path.exists(dbs_dir):
         os.makedirs(dbs_dir)

diff --git a/scrapyd/webservice.py b/scrapyd/webservice.py
@@ -55,7 +55,6 @@ def render_OPTIONS(self, txrequest):
             methods.append('POST')
         txrequest.setHeader('Allow', ', '.join(methods))
         txrequest.setResponseCode(http.NO_CONTENT)
-        return b''
 
     def _error(self, message):
         return {"node_name": self.root.nodename, "status": "error", "message": message}
@@ -104,6 +103,9 @@ def render_POST(self, txrequest):
         args = {k: v[0] for k, v in native_stringify_dict(copy(txrequest.args), keys_only=False).items()}
         project = _get_required_param(args, 'project')
         jobid = _get_required_param(args, 'job')
+        # Instead of os.name, use sys.platform, which disambiguates Cygwin, which implements SIGINT not SIGBREAK.
+        # https://cygwin.com/cygwin-ug-net/kill.html
+        # https://github.com/scrapy/scrapy/blob/06f9c289d1c92dbb8e41a837b886e5cadb81a061/tests/test_crawler.py#L886
         signal = args.get('signal', 'INT' if sys.platform != 'win32' else 'BREAK')
         prevstate = None
         try:

diff --git a/setup.cfg b/setup.cfg
@@ -1,10 +1,6 @@
 [bdist_wheel]
 universal = 1
 
-[coverage:run]
-include = scrapyd/*
-omit = scrapyd/tests*
-
 [flake8]
 max-line-length = 119
 

diff --git a/setup.py b/setup.py
@@ -32,6 +32,7 @@
     ],
     extras_require={
         'test': [
+            'coveralls',
             'pytest',
             'pytest-cov',
             'requests',

diff --git a/scrapyd/tests/__init__.py → tests/__init__.py b/scrapyd/tests/__init__.py → tests/__init__.py
diff --git a/scrapyd/tests/conftest.py → tests/conftest.py b/scrapyd/tests/conftest.py → tests/conftest.py
diff --git a/scrapyd/tests/logstdout.egg → tests/logstdout.egg b/scrapyd/tests/logstdout.egg → tests/logstdout.egg
diff --git a/scrapyd/tests/mockserver.py → tests/mockserver.py b/scrapyd/tests/mockserver.py → tests/mockserver.py
@@ -25,7 +25,7 @@ def __enter__(self, authentication=None):
         """
         command = [
             sys.executable, '-m',
-            "scrapyd.tests.start_mock_app",
+            "tests.start_mock_app",
             get_ephemeral_port()
         ]
         if self.authentication is not None:

diff --git a/scrapyd/tests/mybot.egg → tests/mybot.egg b/scrapyd/tests/mybot.egg → tests/mybot.egg
diff --git a/scrapyd/tests/mybot2.egg → tests/mybot2.egg b/scrapyd/tests/mybot2.egg → tests/mybot2.egg
diff --git a/scrapyd/tests/mybot3.egg → tests/mybot3.egg b/scrapyd/tests/mybot3.egg → tests/mybot3.egg
diff --git a/scrapyd/tests/mybotunicode.egg → tests/mybotunicode.egg b/scrapyd/tests/mybotunicode.egg → tests/mybotunicode.egg
diff --git a/scrapyd/tests/quotesbot.egg → tests/quotesbot.egg b/scrapyd/tests/quotesbot.egg → tests/quotesbot.egg
diff --git a/scrapyd/tests/quotesbot_asyncio.egg → tests/quotesbot_asyncio.egg b/scrapyd/tests/quotesbot_asyncio.egg → tests/quotesbot_asyncio.egg
diff --git a/scrapyd/tests/start_mock_app.py → tests/start_mock_app.py b/scrapyd/tests/start_mock_app.py → tests/start_mock_app.py
diff --git a/scrapyd/tests/test_dont_load_settings.py → tests/test_dont_load_settings.py b/scrapyd/tests/test_dont_load_settings.py → tests/test_dont_load_settings.py