Skip to content

Commit

Permalink
Optimize the legacy JSON endpoints (pypi#11920)
Browse files Browse the repository at this point in the history
* Add Release.is_prerelease to denormalize prerelease data

* Migrate existing data and switch to using Release.is_prerelease

* Switch legacy JSON endpoints to a better performing query

* Move migration revision forward

* Move classifier ordering into the database

* Update trove-classifiers

* Use the new methods in trove-classifiers

* Fix tests

* reformat

* more test fixes

* Run each migration in it's own transaction

* Run 100000 updates per chunk

* reformat

* reformat
  • Loading branch information
dstufft authored and SamirPS committed Aug 30, 2022
1 parent f4c11f0 commit eda0632
Show file tree
Hide file tree
Showing 18 changed files with 399 additions and 153 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ initdb: .state/docker-build-web
docker-compose run --rm web psql -h db -d warehouse -U postgres -c "UPDATE users SET name='Ee Durbin' WHERE username='ewdurbin'"
docker-compose run --rm web python -m warehouse db upgrade head
docker-compose run --rm web python -m warehouse sponsors populate-db
docker-compose run --rm web python -m warehouse classifiers sync
$(MAKE) reindex

reindex: .state/docker-build-web
Expand Down
3 changes: 3 additions & 0 deletions bin/release
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ python -m warehouse db upgrade head

# Insert/upgrade malware checks.
python -m warehouse malware sync-checks

# Insert/upgrade classifiers.
python -m warehouse classifiers sync
6 changes: 3 additions & 3 deletions requirements/main.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1253,9 +1253,9 @@ translationstring==1.4 \
--hash=sha256:5f4dc4d939573db851c8d840551e1a0fb27b946afe3b95aafc22577eed2d6262 \
--hash=sha256:bf947538d76e69ba12ab17283b10355a9ecfbc078e6123443f43f2107f6376f3
# via pyramid
trove-classifiers==2022.6.26 \
--hash=sha256:361d6e85bcea11b90be8b4c3ab4f23ddea0c6ee566ca4a82f5f2e4318d08c1b8 \
--hash=sha256:97be455919ba5d0f715147e7f4e17f64c8d46645d9f1dbac92a68201ca2373d7
trove-classifiers==2022.7.22 \
--hash=sha256:0545d0e62af12c722578c9c99f3391b9ce81fa4fe1fd608d6c029b6a55da6456 \
--hash=sha256:b5d23dbbaf3969c1b8d14e2f56cda4c3b6427c0f9917f367f179f9db393b8721
# via -r requirements/main.in
typeguard==2.13.3 \
--hash=sha256:00edaa8da3a133674796cf5ea87d9f4b4c367d77476e185e80251cc13dfbb8c4 \
Expand Down
3 changes: 3 additions & 0 deletions tests/common/db/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class Meta:

id = factory.Faker("uuid4", cast_to=None)
name = factory.Faker("pystr", max_chars=12)
normalized_name = factory.LazyAttribute(
lambda o: packaging.utils.canonicalize_name(o.name)
)


class ProjectEventFactory(WarehouseFactory):
Expand Down
64 changes: 64 additions & 0 deletions tests/unit/cli/test_classifiers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pretend

from warehouse import db
from warehouse.classifiers.models import Classifier
from warehouse.cli import classifiers


def test_classifiers_update(db_request, monkeypatch, cli):
engine = pretend.stub()
config = pretend.stub(registry={"sqlalchemy.engine": engine})
session_cls = pretend.call_recorder(lambda bind: db_request.db)
monkeypatch.setattr(db, "Session", session_cls)

cs = [
c.classifier
for c in db_request.db.query(Classifier).order_by(Classifier.ordering).all()
]

monkeypatch.setattr(classifiers, "sorted_classifiers", ["C :: D", "A :: B"] + cs)

db_request.db.add(Classifier(classifier="A :: B", ordering=0))
assert db_request.db.query(Classifier).filter_by(classifier="C :: D").count() == 0
cli.invoke(classifiers.sync, obj=config)

c = db_request.db.query(Classifier).filter_by(classifier="C :: D").one()

assert c.classifier == "C :: D"
assert c.ordering == 0

c = db_request.db.query(Classifier).filter_by(classifier="A :: B").one()

assert c.classifier == "A :: B"
assert c.ordering == 1


def test_classifiers_no_update(db_request, monkeypatch, cli):
engine = pretend.stub()
config = pretend.stub(registry={"sqlalchemy.engine": engine})
session_cls = pretend.call_recorder(lambda bind: db_request.db)
monkeypatch.setattr(db, "Session", session_cls)

original = db_request.db.query(Classifier).order_by(Classifier.ordering).all()

monkeypatch.setattr(
classifiers, "sorted_classifiers", [c.classifier for c in original]
)

cli.invoke(classifiers.sync, obj=config)

after = db_request.db.query(Classifier).order_by(Classifier.ordering).all()

assert original == after
71 changes: 0 additions & 71 deletions tests/unit/forklift/test_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3134,77 +3134,6 @@ def test_upload_succeeds_creates_release(
),
]

def test_upload_succeeds_creates_classifier(
self, pyramid_config, db_request, metrics, monkeypatch
):
pyramid_config.testing_securitypolicy(userid=1)

user = UserFactory.create()
EmailFactory.create(user=user)
project = ProjectFactory.create()
RoleFactory.create(user=user, project=project)

monkeypatch.setattr(legacy, "classifiers", {"AA :: BB", "CC :: DD"})

db_request.db.add(Classifier(classifier="AA :: BB"))

filename = "{}-{}.tar.gz".format(project.name, "1.0")

db_request.user = user
db_request.user_agent = "warehouse-tests/6.6.6"
db_request.POST = MultiDict(
{
"metadata_version": "1.2",
"name": project.name,
"version": "1.0",
"summary": "This is my summary!",
"filetype": "sdist",
"md5_digest": _TAR_GZ_PKG_MD5,
"content": pretend.stub(
filename=filename,
file=io.BytesIO(_TAR_GZ_PKG_TESTDATA),
type="application/tar",
),
}
)
db_request.POST.extend(
[
("classifiers", "AA :: BB"),
("classifiers", "CC :: DD"),
("requires_dist", "foo"),
("requires_dist", "bar (>1.0)"),
("project_urls", "Test, https://example.com/"),
("requires_external", "Cheese (>1.0)"),
("provides", "testing"),
]
)

storage_service = pretend.stub(store=lambda path, filepath, meta: None)
db_request.find_service = lambda svc, name=None, context=None: {
IFileStorage: storage_service,
IMetricsService: metrics,
}.get(svc)

resp = legacy.file_upload(db_request)

assert resp.status_code == 200

# Ensure that a new Classifier has been created
classifier = (
db_request.db.query(Classifier)
.filter(Classifier.classifier == "CC :: DD")
.one()
)
assert classifier.classifier == "CC :: DD"

# Ensure that the Release has the new classifier
release = (
db_request.db.query(Release)
.filter((Release.project == project) & (Release.version == "1.0"))
.one()
)
assert release.classifiers == ["AA :: BB", "CC :: DD"]

def test_all_valid_classifiers_can_be_created(self, db_request):
for classifier in classifiers:
db_request.db.add(Classifier(classifier=classifier))
Expand Down
60 changes: 44 additions & 16 deletions tests/unit/legacy/api/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def _assert_has_cors_headers(headers):
class TestJSONProject:
def test_normalizing_redirects(self, db_request):
project = ProjectFactory.create()
ReleaseFactory.create(project=project, version="1.0")

name = project.name.lower()
if name == project.normalized_name:
Expand All @@ -52,7 +53,7 @@ def test_normalizing_redirects(self, db_request):
lambda name: "/project/the-redirect/"
)

resp = json.json_project(project, db_request)
resp = json.json_project(db_request)

assert isinstance(resp, HTTPMovedPermanently)
assert resp.headers["Location"] == "/project/the-redirect/"
Expand All @@ -63,7 +64,8 @@ def test_normalizing_redirects(self, db_request):

def test_missing_release(self, db_request):
project = ProjectFactory.create()
resp = json.json_project(project, db_request)
db_request.matchdict = {"name": project.normalized_name}
resp = json.json_project(db_request)
assert isinstance(resp, HTTPNotFound)
_assert_has_cors_headers(resp.headers)

Expand All @@ -81,8 +83,9 @@ def test_with_prereleases(self, monkeypatch, db_request):
lambda request, project, release, *, all_releases: data
)
monkeypatch.setattr(json, "_json_data", json_data)
db_request.matchdict = {"name": project.normalized_name}

rvalue = json.json_project(project, db_request)
rvalue = json.json_project(db_request)

assert rvalue is data
assert json_data.calls == [
Expand All @@ -102,8 +105,9 @@ def test_only_prereleases(self, monkeypatch, db_request):
lambda request, project, release, *, all_releases: data
)
monkeypatch.setattr(json, "_json_data", json_data)
db_request.matchdict = {"name": project.normalized_name}

rvalue = json.json_project(project, db_request)
rvalue = json.json_project(db_request)

assert rvalue is data
assert json_data.calls == [
Expand All @@ -129,8 +133,9 @@ def test_all_releases_yanked(self, monkeypatch, db_request):
lambda request, project, release, *, all_releases: data
)
monkeypatch.setattr(json, "_json_data", json_data)
db_request.matchdict = {"name": project.normalized_name}

rvalue = json.json_project(project, db_request)
rvalue = json.json_project(db_request)

assert rvalue is data
assert json_data.calls == [
Expand All @@ -156,8 +161,9 @@ def test_latest_release_yanked(self, monkeypatch, db_request):
lambda request, project, release, *, all_releases: data
)
monkeypatch.setattr(json, "_json_data", json_data)
db_request.matchdict = {"name": project.normalized_name}

rvalue = json.json_project(project, db_request)
rvalue = json.json_project(db_request)

assert rvalue is data
assert json_data.calls == [
Expand All @@ -184,8 +190,9 @@ def test_all_non_prereleases_yanked(self, monkeypatch, db_request):
lambda request, project, release, *, all_releases: data
)
monkeypatch.setattr(json, "_json_data", json_data)
db_request.matchdict = {"name": project.normalized_name}

rvalue = json.json_project(project, db_request)
rvalue = json.json_project(db_request)

assert rvalue is data
assert json_data.calls == [
Expand Down Expand Up @@ -254,8 +261,9 @@ def test_renders(self, pyramid_config, db_request, db_session):
je = JournalEntryFactory.create(name=project.name, submitted_by=user)

db_request.route_url = pretend.call_recorder(lambda *args, **kw: url)
db_request.matchdict = {"name": project.normalized_name}

result = json.json_project(project, db_request)
result = json.json_project(db_request)

assert set(db_request.route_url.calls) == {
pretend.call("packaging.file", path=files[0].path),
Expand Down Expand Up @@ -405,6 +413,7 @@ def test_renders(self, pyramid_config, db_request, db_session):
class TestJSONProjectSlash:
def test_normalizing_redirects(self, db_request):
project = ProjectFactory.create()
ReleaseFactory.create(project=project, version="1.0")

name = project.name.lower()
if name == project.normalized_name:
Expand All @@ -415,7 +424,7 @@ def test_normalizing_redirects(self, db_request):
lambda name: "/project/the-redirect/"
)

resp = json.json_project_slash(project, db_request)
resp = json.json_project_slash(db_request)

assert isinstance(resp, HTTPMovedPermanently)
assert resp.headers["Location"] == "/project/the-redirect/"
Expand All @@ -434,12 +443,12 @@ def test_normalizing_redirects(self, db_request):
if name == release.project.normalized_name:
name = release.project.name.upper()

db_request.matchdict = {"name": name}
db_request.matchdict = {"name": name, "version": "3.0"}
db_request.current_route_path = pretend.call_recorder(
lambda name: "/project/the-redirect/3.0/"
)

resp = json.json_release(release, db_request)
resp = json.json_release(db_request)

assert isinstance(resp, HTTPMovedPermanently)
assert resp.headers["Location"] == "/project/the-redirect/3.0/"
Expand All @@ -448,6 +457,13 @@ def test_normalizing_redirects(self, db_request):
pretend.call(name=release.project.normalized_name)
]

def test_missing_release(self, db_request):
project = ProjectFactory.create()
db_request.matchdict = {"name": project.normalized_name, "version": "3.0"}
resp = json.json_release(db_request)
assert isinstance(resp, HTTPNotFound)
_assert_has_cors_headers(resp.headers)

def test_detail_renders(self, pyramid_config, db_request, db_session):
project = ProjectFactory.create(has_docs=True)
description_content_type = "text/x-rst"
Expand Down Expand Up @@ -510,8 +526,12 @@ def test_detail_renders(self, pyramid_config, db_request, db_session):
je = JournalEntryFactory.create(name=project.name, submitted_by=user)

db_request.route_url = pretend.call_recorder(lambda *args, **kw: url)
db_request.matchdict = {
"name": project.normalized_name,
"version": releases[3].canonical_version,
}

result = json.json_release(releases[3], db_request)
result = json.json_release(db_request)

assert set(db_request.route_url.calls) == {
pretend.call("packaging.file", path=files[2].path),
Expand Down Expand Up @@ -597,8 +617,12 @@ def test_minimal_renders(self, pyramid_config, db_request):

url = "/the/fake/url/"
db_request.route_url = pretend.call_recorder(lambda *args, **kw: url)
db_request.matchdict = {
"name": project.normalized_name,
"version": release.canonical_version,
}

result = json.json_release(release, db_request)
result = json.json_release(db_request)

assert set(db_request.route_url.calls) == {
pretend.call("packaging.file", path=file.path),
Expand Down Expand Up @@ -679,8 +703,12 @@ def test_vulnerabilities_renders(self, pyramid_config, db_request):

url = "/the/fake/url/"
db_request.route_url = pretend.call_recorder(lambda *args, **kw: url)
db_request.matchdict = {
"name": project.normalized_name,
"version": release.canonical_version,
}

result = json.json_release(release, db_request)
result = json.json_release(db_request)

assert result["vulnerabilities"] == [
{
Expand All @@ -704,12 +732,12 @@ def test_normalizing_redirects(self, db_request):
if name == release.project.normalized_name:
name = release.project.name.upper()

db_request.matchdict = {"name": name}
db_request.matchdict = {"name": name, "version": "3.0"}
db_request.current_route_path = pretend.call_recorder(
lambda name: "/project/the-redirect/3.0/"
)

resp = json.json_release_slash(release, db_request)
resp = json.json_release_slash(db_request)

assert isinstance(resp, HTTPMovedPermanently)
assert resp.headers["Location"] == "/project/the-redirect/3.0/"
Expand Down
Loading

0 comments on commit eda0632

Please sign in to comment.