Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/scripts/check-release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh

# Checking if current tag matches the package version
current_tag=$(echo $GITHUB_REF | cut -d '/' -f 3 | sed -r 's/^v//')
file_tag=$(grep '__version__ =' meilisearch/version.py | cut -d '=' -f 2- | tr -d ' ' | tr -d '"' | tr -d ',')
if [ "$current_tag" != "$file_tag" ]; then
echo "Error: the current tag does not match the version in package file(s)."
echo "$current_tag vs $file_tag"
exit 1
fi

echo 'OK'
exit 0
3 changes: 3 additions & 0 deletions .github/workflows/publish-docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Check release validity
run: sh .github/scripts/check-release.sh

- name: Set up QEMU
uses: docker/setup-qemu-action@v2

Expand Down
3 changes: 2 additions & 1 deletion scraper/src/config/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
import copy

from .version import qualified_version
from .config_validator import ConfigValidator
from .urls_parser import UrlsParser
from .selectors_parser import SelectorsParser
Expand Down Expand Up @@ -44,7 +45,7 @@ class ConfigLoader:
strict_redirect = True
strip_chars = ".,;:§¶"
use_anchors = False
user_agent = 'Meilisearch docs-scraper'
user_agent = qualified_version()
only_content_level = False
query_rules = []

Expand Down
9 changes: 9 additions & 0 deletions scraper/src/config/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

__version__ = "0.12.9"


def qualified_version() -> str:
"""Get the qualified version of this module."""

return f"Meilisearch DocsScraper (v{__version__})"
4 changes: 3 additions & 1 deletion scraper/src/meilisearch_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import meilisearch
from builtins import range

from .config.version import qualified_version

def remove_bad_encoding(value):
return value.replace(''', "'")

Expand Down Expand Up @@ -99,7 +101,7 @@ class MeiliSearchHelper:
}

def __init__(self, host_url, api_key, index_uid, custom_settings):
self.meilisearch_client = meilisearch.Client(host_url, api_key)
self.meilisearch_client = meilisearch.Client(host_url, api_key, client_agents=(qualified_version(),))
self.meilisearch_index = self.meilisearch_client.index(index_uid)
self.delete_index()
self.add_settings(MeiliSearchHelper.SETTINGS, custom_settings)
Expand Down
12 changes: 12 additions & 0 deletions tests/config_loader/basic_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# coding: utf-8
import re
from scraper.src.config.version import __version__, qualified_version
from scraper.src.config.config_loader import ConfigLoader
from .abstract import config
import pytest
Expand Down Expand Up @@ -48,3 +50,13 @@ def test_excpetion_when_shadowing_(self):

with pytest.raises(Exception):
ConfigLoader(c)

def test_get_qualified_version(self):
""" Old variable scrap_url must be spread to scrape_url. If one is defined, the previous one must be used"""
c = config({
'user_agent': qualified_version()
})

config_loaded = ConfigLoader(c)

assert config_loaded.user_agent == f"Meilisearch DocsScraper (v{__version__})"
Empty file.
21 changes: 21 additions & 0 deletions tests/meilisearch_helper/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MASTER_KEY = "masterKey"
BASE_URL = "http://127.0.0.1:7700"

DEFAULT_INDEX = 'index_uid'

DEFAULT_DATA_DELETE = {
"taskUid": 1,
"indexUid": DEFAULT_INDEX,
"status": "enqueued",
"type": "indexDeletion",
"enqueuedAt": "2023-03-30T13:24:01.789654093Z"
}
DEFAULT_DATA_PATCH = {
"taskUid": 1,
"indexUid": DEFAULT_INDEX,
"status": "enqueued",
"type": "settingsUpdate",
"enqueuedAt": "2023-03-30T13:24:01.789654093Z"
}

DEFAULT_ACCEPTED_STATUS = 202
37 changes: 37 additions & 0 deletions tests/meilisearch_helper/meilisearch_helper_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# coding: utf-8
import json
from unittest.mock import patch

import requests

from scraper.src.meilisearch_helper import MeiliSearchHelper
from scraper.src.config.version import __version__
from tests.meilisearch_helper import common


class TestMeilisearchHelper:
@patch("requests.delete")
@patch("requests.patch")
def test_use_meilisearch_default(self, mock_delete, mock_patch):
mock_delete.configure_mock(__name__="delete")
mock_response = requests.models.Response()
mock_response.status_code = common.DEFAULT_ACCEPTED_STATUS
mock_response._content = json.dumps(common.DEFAULT_DATA_DELETE).encode('utf-8')
mock_delete.return_value = mock_response

mock_patch.configure_mock(__name__="patch")
mock_response = requests.models.Response()
mock_response.status_code = common.DEFAULT_ACCEPTED_STATUS
mock_response._content = json.dumps(common.DEFAULT_DATA_DELETE).encode('utf-8')
mock_patch.return_value = mock_response
""" Should set the `User-Agent` doscraper by default """
# When
actual = MeiliSearchHelper(
common.BASE_URL,
common.MASTER_KEY,
common.DEFAULT_INDEX,
MeiliSearchHelper.SETTINGS
)

# Then
assert actual.meilisearch_client.http.headers['User-Agent'] == f"Meilisearch Python (v0.27.0);Meilisearch DocsScraper (v{__version__})"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fact that all these headers are not separated by a white space makes matching harder on our analytics tools no?
It might not be a prefix search strategy though!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good question, I have no idea, but I think @brunoocasali will be better able to answer, as these entries will be used through Amplitude.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope @bidoubiwa it is correct like this, they use the ; semicolon as the separator.

More about it here in this comment: meilisearch/integration-guides#150 (comment)

9 changes: 9 additions & 0 deletions tests/meilisearch_helper/version_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# coding: utf-8
import re

from scraper.src.config.version import __version__


class TestInit:
def test_get_version(self):
assert re.match(r"^(\d+\.)?(\d+\.)?(\*|\d+)$", __version__)