From 472f44f7790b3fae624f5e57ebfb671f62fb72a3 Mon Sep 17 00:00:00 2001 From: reglim Date: Wed, 30 Nov 2022 13:51:00 +0100 Subject: [PATCH] Feat(docat): Add Search Functionality We use an index database, which we expose via a search api. Tests are also included in this commit. Some Models that were also used internally are now renamed, and the folders used by Docat are created at startup. fixes: #13, #320, #322 --- README.md | 56 +-- doc/getting-started.md | 17 +- docat/.gitignore | 1 + docat/README.md | 7 +- docat/docat/app.py | 235 ++++++++---- docat/docat/models.py | 52 +++ docat/docat/utils.py | 244 ++++++++++++- docat/pyproject.toml | 2 + docat/tests/conftest.py | 62 +++- docat/tests/test_hide_show.py | 11 +- docat/tests/test_index.py | 560 +++++++++++++++++++++++++++++ docat/tests/test_search.py | 656 ++++++++++++++++++++++++++++++++++ docat/tests/test_upload.py | 2 +- docat/tests/test_utils.py | 13 +- 14 files changed, 1782 insertions(+), 136 deletions(-) create mode 100644 docat/docat/models.py create mode 100644 docat/tests/test_index.py create mode 100644 docat/tests/test_search.py diff --git a/README.md b/README.md index 232cf6d59..59756f192 100644 --- a/README.md +++ b/README.md @@ -11,28 +11,12 @@ The simplest way is to build and run the docker container, you can optionally use volumes to persist state: ```sh -# run container in background and persist data (docs, nginx configs and tokens database) +# run container in background and persist data (docs, nginx configs and tokens database as well as the content index) # use 'ghcr.io/docat-org/docat:unstable' to get the latest changes -mkdir -p docat-run/db && touch docat-run/db/db.json +mkdir -p docat-run/ docker run \ --detach \ - --volume $PWD/docat-run/doc:/var/docat/doc/ \ - --volume $PWD/docat-run/db/db.json:/app/docat/db.json \ - --publish 8000:80 \ - ghcr.io/docat-org/docat -``` - -*Alternative:* Mount a dedicated directory to host `db.json` : - -```sh -# run container in background and persist data (docs, nginx configs and tokens database) -# use 'ghcr.io/docat-org/docat:unstable' to get the latest changes -mkdir -p docat-run/db && touch docat-run/db/db.json -docker run \ - --detach \ - --volume $PWD/docat-run/doc:/var/docat/doc/ \ - --volume $PWD/docat-run/db:/var/docat/db/ \ - --env DOCAT_DB_PATH=/var/docat/db/db.json + --volume $PWD/docat-run/doc:/var/docat/ \ --publish 8000:80 \ ghcr.io/docat-org/docat ``` @@ -47,13 +31,13 @@ For local development, first configure and start the backend (inside the `docat/ ```sh # create a folder for local development (uploading docs) -DEV_DOC_PATH="$(mktemp -d)" +DEV_DOCAT_PATH="$(mktemp -d)" # install dependencies poetry install # run the local development version -DOCAT_SERVE_FILES=1 DOCAT_DOC_PATH="$DEV_DOC_PATH" poetry run python -m docat +DOCAT_SERVE_FILES=1 DOCAT_STORAGE_PATH="$DEV_DOCAT_PATH" poetry run python -m docat ``` After this you need to start the frontend (inside the `web/` folder): @@ -116,10 +100,36 @@ It is possible to configure some things after the fact. Supported config options: -* headerHTML +- headerHTML ## Advanced Usage ### Hide Controls -If you would like to send link to a specific version of the documentation without the option to change the version, you can do so by clicking on the `Hide Controls` button. This will hide the control buttons and change the link, which can then be copied as usual. +If you would like to send link to a specific version of the documentation without the option to change the version, you can do so by clicking on the `Hide Controls` button. This will hide the control buttons and change the link, which can then be copied as usual. + +### Indexing + +Docat uses indexing for better search performance. The index is automatically updated when you upload, modify or delete a project. However, this means that if you already have existing projects, these need to be initially indexed. There are two ways to do this: + +#### Using an Environment Variable: + +When the **DOCAT_INDEX_FILES** is set, docat forces creation of the index on startup. See [local development](#local-development) for examples. + +> Note: This will increase startup time substantially, depending on how many projects you have. + +#### Using the API: + +You can force the index re-creation using the following request: + +```sh +curl -X POST http://localhost:8000/api/index/update +``` + +Using `docatl`: + +```sh +docatl update-index --host http://localhost:8000 +``` + +Don't worry if it takes some time :) \ No newline at end of file diff --git a/doc/getting-started.md b/doc/getting-started.md index 232aa78c9..ed2ca11d0 100644 --- a/doc/getting-started.md +++ b/doc/getting-started.md @@ -170,4 +170,19 @@ Using `docatl`: ```sh docatl show awesome-project 0.0.1 --host http://localhost:8000 --api-key -``` \ No newline at end of file +``` +### Force Index Re-creation + +To force the re-creation of the search index, you can use the following command: + +```sh +curl -X POST http://localhost:8000/api/index/update +``` + +Using `docatl`: + +```sh +docatl update-index --host http://localhost:8000 +``` + +Note that this can take some time. \ No newline at end of file diff --git a/docat/.gitignore b/docat/.gitignore index ab628a18a..54433c7a7 100644 --- a/docat/.gitignore +++ b/docat/.gitignore @@ -5,4 +5,5 @@ upload .tox .coverage db.json +index.json .python-version diff --git a/docat/README.md b/docat/README.md index 2f05c29c2..2d72a54bb 100644 --- a/docat/README.md +++ b/docat/README.md @@ -13,15 +13,16 @@ Install the dependencies and run the application: # install dependencies poetry install # run the app -[DOCAT_SERVE_FILES=1] [FLASK_DEBUG=1] [PORT=8888] poetry run python -m docat +[DOCAT_SERVE_FILES=1] [DOCAT_INDEX_FILES=1] [FLASK_DEBUG=1] [PORT=8888] poetry run python -m docat ``` ### Config Options * **DOCAT_SERVE_FILES**: Serve static documentation instead of a nginx (for testing) -* **DOCAT_DOC_PATH**: Upload directory for static files (needs to match nginx config) +* **DOCAT_INDEX_FILES**: Index files on start for searching +* **DOCAT_STORAGE_PATH**: Upload directory for static files (needs to match nginx config) * **FLASK_DEBUG**: Start flask in debug mode ## Usage -See [getting-started.md](../doc/getting-started.md) +See [getting-started.md](../doc/getting-started.md) \ No newline at end of file diff --git a/docat/docat/app.py b/docat/docat/app.py index 6d0dac8f3..5f6b3f502 100644 --- a/docat/docat/app.py +++ b/docat/docat/app.py @@ -10,18 +10,42 @@ import os import secrets import shutil -from dataclasses import dataclass from pathlib import Path from typing import Optional import magic from fastapi import Depends, FastAPI, File, Header, Response, UploadFile, status from fastapi.staticfiles import StaticFiles -from pydantic import BaseModel from starlette.responses import JSONResponse from tinydb import Query, TinyDB -from docat.utils import DB_PATH, UPLOAD_FOLDER, calculate_token, create_symlink, extract_archive, remove_docs +from docat.models import ( + ApiResponse, + ClaimResponse, + ProjectDetail, + Projects, + SearchResponse, + SearchResultFile, + SearchResultProject, + SearchResultVersion, + TokenStatus, +) +from docat.utils import ( + DB_PATH, + INDEX_PATH, + UPLOAD_FOLDER, + calculate_token, + create_symlink, + extract_archive, + get_all_projects, + get_project_details, + index_all_projects, + remove_docs, + remove_file_index_from_db, + remove_version_from_version_index, + update_file_index_for_project_version, + update_version_index_for_project, +) #: Holds the FastAPI application app = FastAPI( @@ -31,101 +55,133 @@ docs_url="/api/docs", redoc_url="/api/redoc", ) -#: Holds an instance to the TinyDB -DOCAT_DB_PATH = os.getenv("DOCAT_DB_PATH", DB_PATH) -db = TinyDB(DOCAT_DB_PATH) -#: Holds the static base path where the uploaded documentation artifacts are stored -DOCAT_UPLOAD_FOLDER = Path(os.getenv("DOCAT_DOC_PATH", UPLOAD_FOLDER)) - - -def get_db(): - """Return the cached TinyDB instance.""" - return db - - -@dataclass(frozen=True) -class TokenStatus: - valid: bool - reason: Optional[str] = None - - -class ApiResponse(BaseModel): - message: str - -class ClaimResponse(ApiResponse): - token: str +DOCAT_STORAGE_PATH = Path(os.getenv("DOCAT_STORAGE_PATH") or Path("/var/docat")) +DOCAT_DB_PATH = DOCAT_STORAGE_PATH / DB_PATH +DOCAT_INDEX_PATH = DOCAT_STORAGE_PATH / INDEX_PATH +DOCAT_UPLOAD_FOLDER = DOCAT_STORAGE_PATH / UPLOAD_FOLDER -class ProjectsResponse(BaseModel): - projects: list[str] +@app.on_event("startup") +def startup_create_folders(): + # Create the folders if they don't exist + DOCAT_UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) + DOCAT_DB_PATH.touch() + DOCAT_INDEX_PATH.touch() -class ProjectVersion(BaseModel): - name: str - tags: list[str] +def get_db(): + """Return the cached TinyDB instance.""" + return TinyDB(DOCAT_DB_PATH) -class ProjectDetailResponse(BaseModel): - name: str - versions: list[ProjectVersion] +@app.post("/api/index/update", response_model=ApiResponse, status_code=status.HTTP_200_OK) +@app.post("/api/index/update/", response_model=ApiResponse, status_code=status.HTTP_200_OK) +def update_index(): + index_all_projects(DOCAT_UPLOAD_FOLDER, DOCAT_INDEX_PATH) + return ApiResponse(message="Successfully updated search index") -@app.get("/api/projects", response_model=ProjectsResponse, status_code=status.HTTP_200_OK) +@app.get("/api/projects", response_model=Projects, status_code=status.HTTP_200_OK) def get_projects(): if not DOCAT_UPLOAD_FOLDER.exists(): - return ProjectsResponse(projects=[]) - - def has_not_hidden_versions(project): - path = DOCAT_UPLOAD_FOLDER / project - return any( - (path / version).is_dir() and not (path / version / ".hidden").exists() for version in (DOCAT_UPLOAD_FOLDER / project).iterdir() - ) - - return ProjectsResponse( - projects=list( - filter( - has_not_hidden_versions, - [str(project.relative_to(DOCAT_UPLOAD_FOLDER)) for project in DOCAT_UPLOAD_FOLDER.iterdir() if project.is_dir()], - ) - ) - ) + return Projects(projects=[]) + return get_all_projects(DOCAT_UPLOAD_FOLDER) @app.get( "/api/projects/{project}", - response_model=ProjectDetailResponse, + response_model=ProjectDetail, status_code=status.HTTP_200_OK, responses={status.HTTP_404_NOT_FOUND: {"model": ApiResponse}}, ) @app.get( "/api/projects/{project}/", - response_model=ProjectDetailResponse, + response_model=ProjectDetail, status_code=status.HTTP_200_OK, responses={status.HTTP_404_NOT_FOUND: {"model": ApiResponse}}, ) def get_project(project): - docs_folder = DOCAT_UPLOAD_FOLDER / project - if not docs_folder.exists(): + details = get_project_details(DOCAT_UPLOAD_FOLDER, project) + + if not details: return JSONResponse(status_code=status.HTTP_404_NOT_FOUND, content={"message": f"Project {project} does not exist"}) - tags = [x for x in docs_folder.iterdir() if x.is_dir() and x.is_symlink()] - - return ProjectDetailResponse( - name=project, - versions=sorted( - [ - ProjectVersion( - name=str(x.relative_to(docs_folder)), - tags=[str(t.relative_to(docs_folder)) for t in tags if t.resolve() == x], - ) - for x in docs_folder.iterdir() - if x.is_dir() and not x.is_symlink() and not (docs_folder / x.name / ".hidden").exists() - ], - key=lambda k: k.name, - reverse=True, - ), - ) + return details + + +@app.get("/api/search", response_model=SearchResponse, status_code=status.HTTP_200_OK) +@app.get("/api/search/", response_model=SearchResponse, status_code=status.HTTP_200_OK) +def search(query: str): + query = query.lower() + found_projects: list[SearchResultProject] = [] + found_versions: list[SearchResultVersion] = [] + found_files: list[SearchResultFile] = [] + + index_db = TinyDB(DOCAT_INDEX_PATH) + project_table = index_db.table("projects") + projects = project_table.all() + all_versions: list[tuple] = [] + + # Collect all projects that contain the query + for project in projects: + name = project.get("name") + versions = project.get("versions") + + if not name or not versions: + continue + + all_versions += ((name, version) for version in versions) + + if query in name.lower(): + project_res = SearchResultProject(name=name) + found_projects.append(project_res) + + # Order by occurences of the query + found_projects = sorted(found_projects, key=lambda x: x.name.count(query), reverse=True) + + # Collect all versions and tags that contain the query + for (project, version) in all_versions: + version_name = version.get("name") + version_tags = version.get("tags") + + if query in version_name.lower(): + version_res = SearchResultVersion(project=project, version=version_name) + found_versions.append(version_res) + + for tag in version_tags: + if query in tag: + tag_res = SearchResultVersion(version=tag, project=project) + found_versions.append(tag_res) + + # Order by occurences of the query + found_versions = sorted(found_versions, key=lambda x: x.version.count(query), reverse=True) + + # Collect all files whose name contains the query or whose content contains the query + files_table = index_db.table("files") + files = files_table.all() + + for file in files: + file_content = file.get("content") + file_path_str = file.get("path") + file_project = file.get("project") + file_project_version = file.get("version") + + if file_content is None or not file_path_str or not file_project or not file_project_version: + continue + + file_path = Path(file_path_str) + + if query in file_path.name.lower(): + file_res = SearchResultFile(project=file_project, version=file_project_version, path=file_path_str) + found_files.append(file_res) + continue # Skip content search if the file name already matches + + if file_path.suffix == ".html" and query in file_content.lower(): + file_res = SearchResultFile(project=file_project, version=file_project_version, path=file_path_str) + found_files.append(file_res) + + return SearchResponse(projects=found_projects, versions=found_versions, files=found_files) @app.post("/api/{project}/icon", response_model=ApiResponse, status_code=status.HTTP_200_OK) @@ -202,6 +258,9 @@ def hide_version( with open(hidden_file, "w") as f: f.close() + update_version_index_for_project(DOCAT_UPLOAD_FOLDER, DOCAT_INDEX_PATH, project) + remove_file_index_from_db(DOCAT_INDEX_PATH, project, version) + return ApiResponse(message=f"Version {version} is now hidden") @@ -237,6 +296,9 @@ def show_version( os.remove(hidden_file) + update_version_index_for_project(DOCAT_UPLOAD_FOLDER, DOCAT_INDEX_PATH, project) + update_file_index_for_project_version(DOCAT_UPLOAD_FOLDER, DOCAT_INDEX_PATH, project, version) + return ApiResponse(message=f"Version {version} is now shown") @@ -262,7 +324,7 @@ def upload( if base_path.exists(): token_status = check_token_for_project(db, docat_api_key, project) if token_status.valid: - remove_docs(project, version) + remove_docs(project, version, DOCAT_UPLOAD_FOLDER) else: response.status_code = status.HTTP_401_UNAUTHORIZED return ApiResponse(message=token_status.reason) @@ -276,6 +338,8 @@ def upload( shutil.copyfileobj(file.file, buffer) extract_archive(target_file, base_path) + update_version_index_for_project(DOCAT_UPLOAD_FOLDER, DOCAT_INDEX_PATH, project) + update_file_index_for_project_version(DOCAT_UPLOAD_FOLDER, DOCAT_INDEX_PATH, project, version) return ApiResponse(message="File successfully uploaded") @@ -290,6 +354,7 @@ def tag(project: str, version: str, new_tag: str, response: Response): return ApiResponse(message=f"Version {version} not found") if create_symlink(version, destination): + update_version_index_for_project(DOCAT_UPLOAD_FOLDER, DOCAT_INDEX_PATH, project) return ApiResponse(message=f"Tag {new_tag} -> {version} successfully created") else: response.status_code = status.HTTP_409_CONFLICT @@ -344,8 +409,19 @@ def rename(project: str, new_project_name: str, response: Response, docat_api_ke # update the claim to the new project name Project = Query() - table = db.table("claims") - table.update({"name": new_project_name}, Project.name == project) + claims_table = db.table("claims") + claims_table.update({"name": new_project_name}, Project.name == project) + + # update the version index to the new project name + index_db = TinyDB(DOCAT_INDEX_PATH) + Project = Query() + project_table = index_db.table("projects") + project_table.update({"name": new_project_name}, Project.name == project) + + # update the file index to the new project name + File = Query() + file_table = index_db.table("files") + file_table.update({"project": new_project_name}, File.project == project) os.rename(project_base_path, new_project_base_path) @@ -358,11 +434,13 @@ def rename(project: str, new_project_name: str, response: Response, docat_api_ke def delete(project: str, version: str, response: Response, docat_api_key: str = Header(None), db: TinyDB = Depends(get_db)): token_status = check_token_for_project(db, docat_api_key, project) if token_status.valid: - message = remove_docs(project, version) + message = remove_docs(project, version, DOCAT_UPLOAD_FOLDER) if message: response.status_code = status.HTTP_404_NOT_FOUND return ApiResponse(message=message) else: + remove_version_from_version_index(DOCAT_INDEX_PATH, project, version) + remove_file_index_from_db(DOCAT_INDEX_PATH, project, version) return ApiResponse(message=f"Successfully deleted version '{version}'") else: response.status_code = status.HTTP_401_UNAUTHORIZED @@ -386,4 +464,9 @@ def check_token_for_project(db, token, project) -> TokenStatus: # serve_local_docs for local testing without a nginx if os.environ.get("DOCAT_SERVE_FILES"): + DOCAT_UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) app.mount("/doc", StaticFiles(directory=DOCAT_UPLOAD_FOLDER, html=True), name="docs") + +# index local files on start +if os.environ.get("DOCAT_INDEX_FILES"): + index_all_projects(DOCAT_UPLOAD_FOLDER, DOCAT_INDEX_PATH) diff --git a/docat/docat/models.py b/docat/docat/models.py new file mode 100644 index 000000000..fd3cd9347 --- /dev/null +++ b/docat/docat/models.py @@ -0,0 +1,52 @@ +from dataclasses import dataclass + +from pydantic import BaseModel + + +@dataclass(frozen=True) +class TokenStatus: + valid: bool + reason: str | None = None + + +class ApiResponse(BaseModel): + message: str + + +class ClaimResponse(ApiResponse): + token: str + + +class Projects(BaseModel): + projects: list[str] + + +class ProjectVersion(BaseModel): + name: str + tags: list[str] + + +class ProjectDetail(BaseModel): + name: str + versions: list[ProjectVersion] + + +class SearchResultProject(BaseModel): + name: str + + +class SearchResultVersion(BaseModel): + project: str + version: str + + +class SearchResultFile(BaseModel): + project: str + version: str + path: str + + +class SearchResponse(BaseModel): + projects: list[SearchResultProject] + versions: list[SearchResultVersion] + files: list[SearchResultFile] diff --git a/docat/docat/utils.py b/docat/docat/utils.py index 3143113a0..9c2cd0145 100644 --- a/docat/docat/utils.py +++ b/docat/docat/utils.py @@ -7,9 +7,16 @@ from pathlib import Path from zipfile import ZipFile +from bs4 import BeautifulSoup +from bs4.element import Comment +from tinydb import Query, TinyDB + +from docat.models import ProjectDetail, Projects, ProjectVersion + NGINX_CONFIG_PATH = Path("/etc/nginx/locations.d") -UPLOAD_FOLDER = Path("/var/docat/doc") +UPLOAD_FOLDER = "doc" DB_PATH = "db.json" +INDEX_PATH = "index.json" def create_symlink(source, destination): @@ -48,7 +55,7 @@ def extract_archive(target_file, destination): target_file.unlink() # remove the zip file -def remove_docs(project, version): +def remove_docs(project: str, version: str, upload_folder_path: Path): """ Delete documentation @@ -56,7 +63,7 @@ def remove_docs(project, version): project (str): name of the project version (str): project version """ - docs = UPLOAD_FOLDER / project / version + docs = upload_folder_path / project / version if docs.exists(): # remove the requested version # rmtree can not remove a symlink @@ -90,3 +97,234 @@ def calculate_token(password, salt): salt (byte): the salt used for the password """ return hashlib.pbkdf2_hmac("sha256", password.encode("utf-8"), salt, 100000).hex() + + +def get_all_projects(upload_folder_path: Path) -> Projects: + """ + Returns all projects in the upload folder. + """ + + def has_not_hidden_versions(project): + path = upload_folder_path / project + return any( + (path / version).is_dir() and not (path / version / ".hidden").exists() for version in (upload_folder_path / project).iterdir() + ) + + return Projects( + projects=list( + filter( + has_not_hidden_versions, + [str(project.relative_to(upload_folder_path)) for project in upload_folder_path.iterdir() if project.is_dir()], + ) + ) + ) + + +def get_project_details(upload_folder_path: Path, project_name: str) -> ProjectDetail | None: + """ + Returns all versions and tags for a project. + """ + docs_folder = upload_folder_path / project_name + + if not docs_folder.exists(): + return None + + tags = [x for x in docs_folder.iterdir() if x.is_dir() and x.is_symlink()] + + return ProjectDetail( + name=project_name, + versions=sorted( + [ + ProjectVersion( + name=str(x.relative_to(docs_folder)), + tags=[str(t.relative_to(docs_folder)) for t in tags if t.resolve() == x], + ) + for x in docs_folder.iterdir() + if x.is_dir() and not x.is_symlink() and not (docs_folder / x.name / ".hidden").exists() + ], + key=lambda k: k.name, + reverse=True, + ), + ) + + +def index_all_projects( + upload_folder_path: Path, + index_db_path: Path, +): + """ + This will extract all content from all versions for each project, + and save it into index.json. + """ + # drop existing index + index_db_path.unlink(missing_ok=True) + + all_projects = get_all_projects(upload_folder_path).projects + + for project in all_projects: + update_version_index_for_project(upload_folder_path, index_db_path, project) + update_file_index_for_project(upload_folder_path, index_db_path, project) + + +def update_file_index_for_project(upload_folder_path: Path, index_db_path: Path, project: str): + """ + Rebuilds the file index for all versions of the given project + """ + index_db = TinyDB(index_db_path) + files_table = index_db.table("files") + files_table.remove(Query().project == project) + + project_details = get_project_details(upload_folder_path, project) + + if not project_details: + return + + for version in project_details.versions: + update_file_index_for_project_version(upload_folder_path, index_db_path, project, version.name) + + +def update_file_index_for_project_version(upload_folder_path: Path, index_db_path: Path, project: str, version: str): + """ + Removes existing indexes, and rebuilds it with the name of the contained files, and their content for html files. + """ + docs_folder = upload_folder_path / project / version + + if not docs_folder.exists(): + return + + remove_file_index_from_db(index_db_path, project, version) + + for file in docs_folder.rglob("*"): + if not file.is_file(): + continue + + # save the file path + path = str(file.relative_to(docs_folder)) + content = get_html_content(file) if file.name.endswith(".html") else "" + + insert_file_index_into_db(index_db_path, project, version, path, content) + + +def update_version_index_for_project(upload_folder_path: Path, index_db_path: Path, project: str): + """ + Removes existing version indexes for the given project. + It saves all existing versions and tags to the indexdb. + """ + index_db = TinyDB(index_db_path) + project_table = index_db.table("projects") + Project = Query() + project_table.remove(Project.name == project) + + details = get_project_details(upload_folder_path, project) + + if not details: + return + + for version in details.versions: + insert_version_into_version_index(index_db_path, project, version.name, version.tags) + + +def get_html_content(file_path: Path) -> str: + """ + Returns the content of a html file as a string. + """ + + def html_tag_visible(element): + if element.parent.name in ["style", "script", "head", "title", "meta", "[document]"] or isinstance(element, Comment): + return False + + return True + + file_content = file_path.read_text() + soup = BeautifulSoup(file_content, "html.parser") + text_content = filter(html_tag_visible, soup.findAll(string=True)) + content = " ".join(t.strip() for t in text_content).lower() + return content + + +def insert_file_index_into_db(index_db_path: Path, project: str, version: str, file_path: str, content: str): + """ + Inserts a file index into the index.json. + """ + index_db = TinyDB(index_db_path) + files_table = index_db.table("files") + + files_table.insert({"path": file_path, "content": content, "project": project, "version": version}) + + index_db.close() + + +def remove_file_index_from_db(index_db_path: Path, project: str, version: str): + """ + Removes the file index for the given project version + """ + + index_db = TinyDB(index_db_path) + files_table = index_db.table("files") + + File = Query() + files_table.remove(File.project == project and File.version == version) + + index_db.close() + + +def insert_version_into_version_index(index_db_path: Path, project: str, version: str, tags: list[str]): + """ + Inserts a project index into the index db. + """ + index_db = TinyDB(index_db_path) + projects_table = index_db.table("projects") + Project = Query() + found_projects = projects_table.search(Project.name == project) + + if not found_projects: + # create + projects_table.insert({"name": project, "versions": [{"name": version, "tags": tags}]}) + index_db.close() + return + + existing_versions = found_projects[0].get("versions") + + if not existing_versions: + return # should not happen + + if version in (v.get("name") for v in existing_versions): + # version already exists, remove so we can add it again, updating the tags + existing_versions = list((v for v in existing_versions if v.get("name") != version)) + + existing_versions.append({"name": version, "tags": tags}) + projects_table.update({"versions": existing_versions}, Project.name == project) + index_db.close() + + +def remove_version_from_version_index(index_db_path: Path, project: str, version: str): + """ + Removes a version from the project index in the index db. + """ + index_db = TinyDB(index_db_path) + projects_table = index_db.table("projects") + + Project = Query() + found_projects = projects_table.search(Project.name == project) + + if not found_projects: + return + + found_versions = found_projects[0].get("versions") + + if not found_versions or version not in (v["name"] for v in found_versions): + return + + if len(found_versions) == 1: + projects_table.remove(Project.name == project) # remove project if it has no versions left + return + + version_to_remove = next(v for v in found_versions if v["name"] == version) + + if not version_to_remove: + return # shouldn't happen + + found_versions.remove(version_to_remove) + projects_table.update({"versions": found_versions}, Project.name == project) + + index_db.close() diff --git a/docat/pyproject.toml b/docat/pyproject.toml index f7b79dd85..8f51831e1 100644 --- a/docat/pyproject.toml +++ b/docat/pyproject.toml @@ -12,6 +12,8 @@ fastapi = "^0.86.0" python-multipart = "^0.0.5" uvicorn = "^0.20.0" python-magic = "^0.4.27" +beautifulsoup4 = "^4.11.1" +types-beautifulsoup4 = "^4.11.6" [tool.poetry.dev-dependencies] flake8 = "^6.0.0" diff --git a/docat/tests/conftest.py b/docat/tests/conftest.py index 8104dffd2..d3c946c3e 100644 --- a/docat/tests/conftest.py +++ b/docat/tests/conftest.py @@ -4,25 +4,37 @@ import pytest from fastapi.testclient import TestClient from tinydb import TinyDB -from tinydb.storages import MemoryStorage import docat.app as docat from docat.utils import create_symlink -@pytest.fixture -def client(): +@pytest.fixture(autouse=True) +def setup_docat_paths(): + """ + Set up the temporary paths for the docat app. + """ + temp_dir = tempfile.TemporaryDirectory() - docat.DOCAT_UPLOAD_FOLDER = Path(temp_dir.name) - docat.db = TinyDB(storage=MemoryStorage) - yield TestClient(docat.app) - docat.app.db = None + docat.DOCAT_STORAGE_PATH = Path(temp_dir.name) + docat.DOCAT_DB_PATH = Path(temp_dir.name) / "db.json" + docat.DOCAT_INDEX_PATH = Path(temp_dir.name) / "index.json" + docat.DOCAT_UPLOAD_FOLDER = Path(temp_dir.name) / "doc" + + yield + temp_dir.cleanup() @pytest.fixture -def upload_folder_path(): - return docat.DOCAT_UPLOAD_FOLDER +def client(): + docat.db = TinyDB(docat.DOCAT_DB_PATH) + docat.index_db = TinyDB(docat.DOCAT_INDEX_PATH) + + yield TestClient(docat.app) + + docat.app.db = None + docat.app.index_db = None @pytest.fixture @@ -34,18 +46,34 @@ def client_with_claimed_project(client): @pytest.fixture -def temp_project_version(tmp_path): - docs = tmp_path / "doc" - - docs.mkdir() - +def temp_project_version(): def __create(project, version): - version_docs = docs / project / version + version_docs = docat.DOCAT_UPLOAD_FOLDER / project / version version_docs.mkdir(parents=True) (version_docs / "index.html").touch() - create_symlink(version_docs, docs / project / "latest") + create_symlink(version_docs, docat.DOCAT_UPLOAD_FOLDER / project / "latest") - return docs + return docat.DOCAT_UPLOAD_FOLDER yield __create + + +@pytest.fixture +def index_db_project_table(): + index_db = TinyDB(docat.DOCAT_INDEX_PATH) + projects_table = index_db.table("projects") + + yield projects_table + + index_db.close() + + +@pytest.fixture +def index_db_files_table(): + index_db = TinyDB(docat.DOCAT_INDEX_PATH) + projects_table = index_db.table("files") + + yield projects_table + + index_db.close() diff --git a/docat/tests/test_hide_show.py b/docat/tests/test_hide_show.py index c8fcb04ea..e92ebfc2a 100644 --- a/docat/tests/test_hide_show.py +++ b/docat/tests/test_hide_show.py @@ -1,7 +1,8 @@ import io -from pathlib import Path from unittest.mock import patch +import docat.app as docat + def test_hide(client_with_claimed_project): """ @@ -70,11 +71,11 @@ def test_hide_only_version_not_listed_in_projects(client_with_claimed_project): assert project_details_response.json() == {"name": "some-project", "versions": []} -def test_hide_creates_hidden_file(client_with_claimed_project, upload_folder_path): +def test_hide_creates_hidden_file(client_with_claimed_project): """ Tests that the hidden file is created when hiding a version """ - hidden_file_path = Path(upload_folder_path) / "some-project" / "1.0.0" / ".hidden" + hidden_file_path = docat.DOCAT_UPLOAD_FOLDER / "some-project" / "1.0.0" / ".hidden" # create a version create_response = client_with_claimed_project.post( @@ -223,11 +224,11 @@ def test_show(client_with_claimed_project): } -def test_show_deletes_hidden_file(client_with_claimed_project, upload_folder_path): +def test_show_deletes_hidden_file(client_with_claimed_project): """ Tests that the hidden file is deleted when requesting show. """ - hidden_file_path = Path(upload_folder_path) / "some-project" / "1.0.0" / ".hidden" + hidden_file_path = docat.DOCAT_UPLOAD_FOLDER / "some-project" / "1.0.0" / ".hidden" # create a version create_response = client_with_claimed_project.post( diff --git a/docat/tests/test_index.py b/docat/tests/test_index.py new file mode 100644 index 000000000..a61d2984f --- /dev/null +++ b/docat/tests/test_index.py @@ -0,0 +1,560 @@ +import io +import os +import shutil +from unittest.mock import patch + +import docat.app as docat +from docat.utils import ( + index_all_projects, + insert_file_index_into_db, + insert_version_into_version_index, + remove_file_index_from_db, + remove_version_from_version_index, + update_file_index_for_project, + update_file_index_for_project_version, + update_version_index_for_project, +) + + +def test_insert_file_index_into_db(client_with_claimed_project, index_db_files_table): + """ + Tests wether insert_file_index_into_db inserts the correct json into the database. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + version = "1.0.0" + + insert_file_index_into_db(docat.DOCAT_INDEX_PATH, project, version, "index.html", "hello world") + + assert index_db_files_table.all() == [{"path": "index.html", "content": "hello world", "project": project, "version": version}] + + +def test_remove_file_index_from_db(client_with_claimed_project, index_db_files_table): + """ + Tests wether remove_file_index_from_db removes exactly the json insert_file_index_into_db wrote into the database. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + version = "1.0.0" + + insert_file_index_into_db(docat.DOCAT_INDEX_PATH, project, version, "index.html", "hello world") + remove_file_index_from_db(docat.DOCAT_INDEX_PATH, project, version) + + assert index_db_files_table.all() == [] + + +def test_insert_version_into_version_index(client_with_claimed_project, index_db_project_table): + """ + Tests wether insert_version_into_version_index inserts the correct json into the database. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + version = "1.0.0" + tag = "latest" + + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [tag]) + + assert index_db_project_table.all() == [{"name": project, "versions": [{"name": version, "tags": [tag]}]}] + + +def test_insert_version_into_version_index_no_duplicates(client_with_claimed_project, index_db_project_table): + """ + Tests wether insert_version_into_version_index doesn't create a new project + or version when the version with the same tags already exists. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + version = "1.0.0" + tag = "latest" + + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [tag]) + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [tag]) + + assert index_db_project_table.all() == [{"name": project, "versions": [{"name": version, "tags": [tag]}]}] + + +def test_insert_version_into_version_index_second(client_with_claimed_project, index_db_project_table): + """ + Tests wether insert_version_into_version_index appends the version when the project already exists. + + client_with_claimed_project is needed to create the context with the index db. + """ + + project = "some-project" + versions = ["1.0.0", "2.0.0"] + tags = ["latest", "stable"] + + for version, tag in zip(versions, tags): + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [tag]) + + assert index_db_project_table.all() == [ + {"name": project, "versions": [{"name": versions[0], "tags": [tags[0]]}, {"name": versions[1], "tags": [tags[1]]}]} + ] + + +def test_insert_version_into_version_index_second_with_different_tags(client_with_claimed_project, index_db_project_table): + """ + Tests wether insert_version_into_version_index correctly overwrites tags. + For example, when a version is tagged as "latest" and then as "stable" and "nightly" , the "latest" tag should be removed. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + version = "1.0.0" + old_tags = ["latest"] + new_tags = ["stale", "nightly"] + + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [old_tags]) + + assert index_db_project_table.all() == [{"name": project, "versions": [{"name": version, "tags": [old_tags]}]}] + + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [new_tags]) + + assert index_db_project_table.all() == [{"name": project, "versions": [{"name": version, "tags": [new_tags]}]}] + + +def test_insert_version_into_version_index_second_with_overlapping_tags(client_with_claimed_project, index_db_project_table): + """ + Tests wether insert_version_into_version_index correctly overwrites tags. + For example, when a version is tagged as "latest" and then as "stable" and "latest", the tags should become "stable" and "latest". + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + version = "1.0.0" + old_tags = ["latest"] + new_tags = ["stable", "latest"] + + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [old_tags]) + assert index_db_project_table.all() == [{"name": project, "versions": [{"name": version, "tags": [old_tags]}]}] + + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [new_tags]) + assert index_db_project_table.all() == [{"name": project, "versions": [{"name": version, "tags": [new_tags]}]}] + + +def test_remove_version_from_version_index(client_with_claimed_project, index_db_project_table): + """ + Tests that only the version given is removed from the database. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + versions = ["1.0.0", "2.0.0"] + tags = ["latest", "stable"] + + for version, tag in zip(versions, tags): + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [tag]) + + assert index_db_project_table.all() == [ + {"name": project, "versions": [{"name": versions[0], "tags": [tags[0]]}, {"name": versions[1], "tags": [tags[1]]}]} + ] + + remove_version_from_version_index(docat.DOCAT_INDEX_PATH, project, versions[1]) + assert index_db_project_table.all() == [{"name": project, "versions": [{"name": versions[0], "tags": [tags[0]]}]}] + + +def test_remove_version_from_version_index_remove_last_version(client_with_claimed_project, index_db_project_table): + """ + Tests wether remove_version_from_version_index removes the whole project from the database if the last version is removed. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + version = "1.0.0" + tag = "latest" + + insert_version_into_version_index(docat.DOCAT_INDEX_PATH, project, version, [tag]) + remove_version_from_version_index(docat.DOCAT_INDEX_PATH, project, version) + + assert index_db_project_table.all() == [] + + +def test_update_version_index_for_project(client_with_claimed_project, index_db_project_table): + """ + Tests wether update_version_index_for_project correctly handles inserting and deleting versions. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + versions = ["1.0.0", "2.0.0"] + + project_folder = docat.DOCAT_UPLOAD_FOLDER / project + + # we need to create the project folders manually, + # since the api already updates the index + for version in versions: + (project_folder / version).mkdir(parents=True) + + with open(project_folder / version / "index.html", "w") as f: + f.write("

Hello World

") + + update_version_index_for_project(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH, project) + assert index_db_project_table.all() == [ + {"name": project, "versions": [{"name": versions[1], "tags": []}, {"name": versions[0], "tags": []}]} + ] + + shutil.rmtree(project_folder / versions[0]) + update_version_index_for_project(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH, project) + assert index_db_project_table.all() == [{"name": project, "versions": [{"name": versions[1], "tags": []}]}] + + +def test_update_file_index_for_project_version(client_with_claimed_project, index_db_files_table): + """ + Tests wether update_file_index_for_project_version correctly handles inserting and deleting files. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + version = "1.0.0" + files = ["index.html", "style.css"] + + # we need to create the project folders manually, + # since the api already updates the index + (docat.DOCAT_UPLOAD_FOLDER / project / version).mkdir(parents=True) + + for file in files: + with open(docat.DOCAT_UPLOAD_FOLDER / project / version / file, "w") as f: + f.write("

Hello World

") + + update_file_index_for_project_version(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH, project, version) + assert index_db_files_table.all().sort(key=lambda e: e.get("path")) == [ + {"path": files[1], "content": "", "project": project, "version": version}, + {"path": files[0], "content": "hello world", "project": project, "version": version}, + ].sort(key=lambda e: e["path"]) + + os.remove(docat.DOCAT_UPLOAD_FOLDER / project / version / files[0]) + update_file_index_for_project_version(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH, project, version) + assert index_db_files_table.all() == [ + {"path": files[1], "content": "", "project": project, "version": version}, + ] + + +def test_update_file_index_for_project_version_folder_does_not_exist(client_with_claimed_project): + """ + Tests wether the function just returns when the folder for + the given project / version does not exist. + client_with_claimed_project is needed to create the context with the index db. + """ + project = "non-existing-project" + + with patch("docat.utils.TinyDB") as mock_tinydb: + update_file_index_for_project_version(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH, project, "1.0.0") + mock_tinydb.assert_not_called() + + +def test_update_file_index_for_project(client_with_claimed_project, index_db_files_table): + """ + Tests wether update_file_index_for_project correctly handles inserting and deleting versions. + + client_with_claimed_project is needed to create the context with the index db. + """ + project = "some-project" + versions = ["1.0.0", "2.0.0"] + + # we need to create the project folders manually, + # since the api already updates the index + for version in versions: + (docat.DOCAT_UPLOAD_FOLDER / project / version).mkdir(parents=True) + + with open(docat.DOCAT_UPLOAD_FOLDER / project / version / "index.html", "w") as f: + f.write("

Hello World

") + + update_file_index_for_project(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH, project) + assert index_db_files_table.all().sort(key=lambda e: e.get("version")) == [ + {"path": "index.html", "content": "hello world", "project": project, "version": versions[1]}, + {"path": "index.html", "content": "hello world", "project": project, "version": versions[0]}, + ].sort(key=lambda e: e["version"]) + + shutil.rmtree(docat.DOCAT_UPLOAD_FOLDER / project / versions[0]) + update_file_index_for_project(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH, project) + assert index_db_files_table.all() == [{"path": "index.html", "content": "hello world", "project": project, "version": versions[1]}] + + +def test_index_project_with_html_content(client_with_claimed_project): + """ + Tests wether the function creates an index for a given project as expected. + """ + project = "some-project" + version = "1.0.0" + file = "index.html" + content = "Hello World" + + # create a project with a version and a file + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": (file, io.BytesIO(f"

{content}

".encode()), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + with patch("docat.utils.insert_file_index_into_db") as mock_insert_file_index_into_db: + update_file_index_for_project_version(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH, project, version) + + mock_insert_file_index_into_db.assert_called_once_with( + docat.DOCAT_INDEX_PATH, + project, + version, + file, + content.lower(), + ) + + +def test_index_project_non_html(client_with_claimed_project): + """ + Tests wether the function ignores the content of non-html files as expected. + """ + project = "some-project" + version = "1.0.0" + file = "index.txt" + content = "Hello World" + + # create a project with a version and a file + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": (file, io.BytesIO(f"

{content}

".encode()), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + with patch("docat.utils.insert_file_index_into_db") as mock_insert_file_index_into_db: + update_file_index_for_project_version(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH, project, version) + mock_insert_file_index_into_db.assert_called_once_with( + docat.DOCAT_INDEX_PATH, + project, + version, + file, + "", + ) + + +def test_index_all_projects_creates_version_and_tag_index(client_with_claimed_project): + """ + Tests wether index_all_projects finds all versions and creates the index accordingly. + """ + project = "some-project" + versions = ["1.0.0", "2.0.0"] + tags = ["latest", "stable"] + + # create a project with two versions + for version in versions: + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + # tag the versions + for (i, version) in enumerate(versions): + tag_project_response = client_with_claimed_project.put(f"/api/{project}/{version}/tags/{tags[i]}") + assert tag_project_response.status_code == 201 + + with patch("docat.utils.insert_version_into_version_index") as mock_insert_version_into_version_index: + index_all_projects(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH) + mock_insert_version_into_version_index.assert_any_call(docat.DOCAT_INDEX_PATH, project, versions[0], [tags[0]]) + mock_insert_version_into_version_index.assert_any_call(docat.DOCAT_INDEX_PATH, project, versions[1], [tags[1]]) + + +def test_index_all_projects_creates_file_and_version_index(client_with_claimed_project): + """ + Tests wether index_all_projects finds all projects and versions and creates the index accordingly. + """ + projects = ["some-project", "another-project"] + versions = ["1.0.0", "2.0.0"] + + # create two projects with two versions each + for project in projects: + for version in versions: + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + with patch("docat.utils.insert_version_into_version_index") as mock_insert_version_into_version_index, patch( + "docat.utils.insert_file_index_into_db" + ) as mock_insert_file_index_into_db: + index_all_projects(docat.DOCAT_UPLOAD_FOLDER, docat.DOCAT_INDEX_PATH) + for project in projects: + for version in versions: + mock_insert_version_into_version_index.assert_any_call(docat.DOCAT_INDEX_PATH, project, version, []) + mock_insert_file_index_into_db.assert_any_call(docat.DOCAT_INDEX_PATH, project, version, "index.html", "hello world") + + +def test_index_all_projects_creates_file_and_version_index_api(client_with_claimed_project): + """ + Tests via the API wether index_all_projects finds all projects and versions and creates the index accordingly. + """ + projects = ["some-project", "another-project"] + versions = ["1.0.0", "2.0.0"] + + # create two projects with two versions each + for project in projects: + for version in versions: + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + with patch("docat.utils.insert_version_into_version_index") as mock_insert_version_into_version_index, patch( + "docat.utils.insert_file_index_into_db" + ) as mock_insert_file_index_into_db: + index_all_projects_response = client_with_claimed_project.post("/api/index/update") + assert index_all_projects_response.status_code == 200 + + for project in projects: + for version in versions: + mock_insert_version_into_version_index.assert_any_call(docat.DOCAT_INDEX_PATH, project, version, []) + mock_insert_file_index_into_db.assert_any_call(docat.DOCAT_INDEX_PATH, project, version, "index.html", "hello world") + + +def test_hide_show_removes_file_index_and_adds_again_only_version(client_with_claimed_project, index_db_files_table): + """ + Tests that the hide function removes the files of the version from the index and that + show adds it again with only one version. + """ + project = "some-project" + version = "1.0.0" + + # create a project with a version + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + # make sure we have the files in the index + assert index_db_files_table.all().sort(key=lambda e: e.get("version")) == [ + {"path": "index.html", "content": "hello world", "project": project, "version": version}, + ].sort(key=lambda e: e["version"]) + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + # make sure the files are gone from the index + assert index_db_files_table.all() == [] + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/show", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + # make sure it's back + assert index_db_files_table.all().sort(key=lambda e: e.get("version")) == [ + {"path": "index.html", "content": "hello world", "project": project, "version": version}, + ].sort(key=lambda e: e["version"]) + + +def test_hide_show_removes_file_index_and_adds_again(client_with_claimed_project, index_db_files_table): + """ + Tests that the hide function removes the files of the version from the index and that + show adds it again. + """ + project = "some-project" + versions = ["1.0.0", "2.0.0"] + + for version in versions: + # create a project with a version + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + # make sure we have the files in the index + assert index_db_files_table.all().sort(key=lambda e: e.get("version")) == [ + {"path": "index.html", "content": "hello world", "project": project, "version": version[0]}, + {"path": "index.html", "content": "hello world", "project": project, "version": version[1]}, + ].sort(key=lambda e: e["version"]) + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{versions[0]}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + # make sure the files are gone from the index + assert index_db_files_table.all().sort(key=lambda e: e.get("version")) == [ + {"path": "index.html", "content": "hello world", "project": project, "version": version[1]}, + ].sort(key=lambda e: e["version"]) + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{versions[0]}/show", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + # make sure they're back + assert index_db_files_table.all().sort(key=lambda e: e.get("version")) == [ + {"path": "index.html", "content": "hello world", "project": project, "version": version[0]}, + {"path": "index.html", "content": "hello world", "project": project, "version": version[1]}, + ].sort(key=lambda e: e["version"]) + + +def test_hide_show_removes_project_index_and_adds_again_on_hide_and_show_of_only_version( + client_with_claimed_project, index_db_project_table +): + """ + Tests that the hide function removes the version and project + from the index if the only version gets hidden and that show adds it again. + """ + project = "some-project" + version = "1.0.0" + + # create a project with a version + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + # make sure we have the version in the index + assert index_db_project_table.all().sort(key=lambda e: e.get("name")) == [ + {"name": project, "versions": [version]}, + ].sort(key=lambda e: e["name"]) + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + # make sure the version and project is gone from the index + assert index_db_project_table.all() == [] + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/show", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + # make sure it's back + assert index_db_project_table.all().sort(key=lambda e: e.get("name")) == [ + {"name": project, "versions": [version]}, + ].sort(key=lambda e: e["name"]) + + +def test_hide_show_removes_version_from_index(client_with_claimed_project, index_db_project_table): + """ + Tests that the hide function removes the version + from the index if it gets hidden and that show adds it again. + """ + project = "some-project" + versions = ["1.0.0", "2.0.0"] + + for version in versions: + # create a project with a version + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + # make sure we have the version in the index + assert index_db_project_table.all().sort(key=lambda e: e.get("name")) == [ + {"name": project, "versions": [{"name": v, "tags": []} for v in versions]}, + ].sort(key=lambda e: e["name"]) + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{versions[0]}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + # make sure the version is gone from the index + assert index_db_project_table.all().sort(key=lambda e: e.get("name")) == [ + {"name": project, "versions": [{"name": versions[1], "tags": []}]}, + ].sort(key=lambda e: e["name"]) + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{versions[0]}/show", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + # make sure it's back + assert index_db_project_table.all().sort(key=lambda e: e.get("name")) == [ + {"name": project, "versions": [{"name": v, "tags": []} for v in versions]}, + ].sort(key=lambda e: e["name"]) diff --git a/docat/tests/test_search.py b/docat/tests/test_search.py new file mode 100644 index 000000000..811b3a417 --- /dev/null +++ b/docat/tests/test_search.py @@ -0,0 +1,656 @@ +import io + + +def test_search_finds_project_by_name(client_with_claimed_project): + """ + Search should find a project by name. (Partial match) + """ + create_project_response = client_with_claimed_project.post( + "/api/some-project/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=some") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [{"name": "some-project"}], "versions": [], "files": []} + + +def test_search_finds_project_by_name_full_match(client_with_claimed_project): + """ + Search should find a project by name. (Full match) + """ + create_project_response = client_with_claimed_project.post( + "/api/some-project/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=some-project") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [{"name": "some-project"}], "versions": [], "files": []} + + +def test_search_project_by_name_negative(client_with_claimed_project): + """ + Search should not find a project by an unrelated name. + """ + create_project_response = client_with_claimed_project.post( + "/api/some-project/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=other") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [], "versions": [], "files": []} + + +def test_search_finds_tag(client_with_claimed_project): + """ + Search should find a tag by name. (Partial match) + """ + create_project_response = client_with_claimed_project.post( + "/api/some-project/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + create_tag_response = client_with_claimed_project.put("/api/some-project/1.0.0/tags/latest") + assert create_tag_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=lat") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [], "versions": [{"project": "some-project", "version": "latest"}], "files": []} + + +def test_search_finds_tag_full_match(client_with_claimed_project): + """ + Search should find a tag by name. (Full match) + """ + create_project_response = client_with_claimed_project.post( + "/api/some-project/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + create_tag_response = client_with_claimed_project.put("/api/some-project/1.0.0/tags/latest") + assert create_tag_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=latest") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [], "versions": [{"project": "some-project", "version": "latest"}], "files": []} + + +def test_search_finds_tag_negative(client_with_claimed_project): + """ + Search should not find a tag by an unrelated name. + """ + create_project_response = client_with_claimed_project.post( + "/api/some-project/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + create_tag_response = client_with_claimed_project.put("/api/some-project/1.0.0/tags/latest") + assert create_tag_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=other") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [], "versions": [], "files": []} + + +def test_search_finds_version(client_with_claimed_project): + """ + Search should find a version by name. (Partial match) + """ + create_project_response = client_with_claimed_project.post( + "/api/some-project/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=1.0") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [], "versions": [{"project": "some-project", "version": "1.0.0"}], "files": []} + + +def test_search_finds_version_full_match(client_with_claimed_project): + """ + Search should find a version by name. (Full match) + """ + create_project_response = client_with_claimed_project.post( + "/api/some-project/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=1.0.0") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [], "versions": [{"project": "some-project", "version": "1.0.0"}], "files": []} + + +def test_search_finds_version_negative(client_with_claimed_project): + """ + Search should not find a version by an unrelated name. + """ + create_project_response = client_with_claimed_project.post( + "/api/some-project/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=0.1.0") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [], "versions": [], "files": []} + + +def test_search_finds_both_project_and_version(client_with_claimed_project): + """ + Search should find both the version and the project itself, if the names contain the query. + """ + create_project_response = client_with_claimed_project.post( + "/api/test/test", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=test") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [{"name": "test"}], "versions": [{"project": "test", "version": "test"}], "files": []} + + +def test_search_is_case_insensitive(client_with_claimed_project): + """ + Search should find the project even when the case doesn't match. + """ + create_project_response = client_with_claimed_project.post( + "/api/test/1.0.0", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=Test") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [{"name": "test"}], "versions": [], "files": []} + + +def test_index_updated_on_tag(client_with_claimed_project): + """ + The tag should automatically be recongnized by search after creation. + """ + project = "some-project" + version = "1.0.0" + tag = "latest" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=latest") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [], "versions": [], "files": []} + + create_tag_response = client_with_claimed_project.put(f"/api/{project}/{version}/tags/{tag}") + assert create_tag_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=latest") + assert search_response.status_code == 200 + assert search_response.json() == {"projects": [], "versions": [{"project": project, "version": tag}], "files": []} + + +def test_index_updated_on_rename(client_with_claimed_project): + """ + The tag should automatically be recongnized by search after creation. + """ + old_project_name = "some-project" + new_project_name = "my-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{old_project_name}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response_1 = client_with_claimed_project.get("/api/search?query=some-project") + assert search_response_1.status_code == 200 + assert search_response_1.json() == {"projects": [{"name": old_project_name}], "versions": [], "files": []} + + rename_response = client_with_claimed_project.put( + f"/api/{old_project_name}/rename/{new_project_name}", headers={"Docat-Api-Key": "1234"} + ) + assert rename_response.status_code == 200 + + search_response_2 = client_with_claimed_project.get("/api/search?query=some") + assert search_response_2.status_code == 200 + assert search_response_2.json() == {"projects": [], "versions": [], "files": []} + + search_response_3 = client_with_claimed_project.get("/api/search?query=my") + assert search_response_3.status_code == 200 + assert search_response_3.json() == {"projects": [{"name": new_project_name}], "versions": [], "files": []} + + search_response_4 = client_with_claimed_project.get("/api/search?query=1.0") + assert search_response_4.status_code == 200 + assert search_response_4.json() == {"projects": [], "versions": [{"project": new_project_name, "version": version}], "files": []} + + search_response_5 = client_with_claimed_project.get("/api/search?query=index") + assert search_response_5.status_code == 200 + assert search_response_5.json() == { + "projects": [], + "versions": [], + "files": [{"project": new_project_name, "version": version, "path": "index.html"}], + } + + +def test_search_updated_on_delete(client_with_claimed_project): + """ + The version and it's files should be removed from the index when deleted + """ + project = "some-project" + version = "1.0.0" + version_to_delete = "1.0.1" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version_to_delete}", + files={"file": ("about.html", io.BytesIO(b"

Other Content

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response_1 = client_with_claimed_project.get("/api/search?query=1.0") + assert search_response_1.status_code == 200 + assert search_response_1.json() == { + "projects": [], + "versions": [{"project": project, "version": version_to_delete}, {"project": project, "version": version}], + "files": [], + } + search_response_2 = client_with_claimed_project.get("/api/search?query=about.html") + assert search_response_2.status_code == 200 + assert search_response_2.json() == { + "projects": [], + "versions": [], + "files": [{"project": project, "version": version_to_delete, "path": "about.html"}], + } + + delete_project_response = client_with_claimed_project.delete(f"/api/{project}/{version_to_delete}", headers={"Docat-Api-Key": "1234"}) + assert delete_project_response.status_code == 200 + search_response_3 = client_with_claimed_project.get("/api/search?query=1.0") + assert search_response_3.status_code == 200 + assert search_response_3.json() == { + "projects": [], + "versions": [{"project": project, "version": version}], + "files": [], + } + + search_response_4 = client_with_claimed_project.get("/api/search?query=about") + assert search_response_4.status_code == 200 + assert search_response_4.json() == { + "projects": [], + "versions": [], + "files": [], + } + + +def test_search_finds_files_by_name(client_with_claimed_project): + """ + The search should find files by name. + """ + project = "some-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=index") + assert search_response.status_code == 200 + assert search_response.json() == { + "projects": [], + "versions": [], + "files": [{"project": project, "version": version, "path": "index.html"}], + } + + +def test_search_finds_files_by_content_html(client_with_claimed_project): + """ + The search should find html files by content. + """ + project = "some-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=hello%20world") + assert search_response.status_code == 200 + assert search_response.json() == { + "projects": [], + "versions": [], + "files": [{"project": project, "version": version, "path": "index.html"}], + } + + +def test_search_ignores_content_for_non_html_files(client_with_claimed_project): + """ + The search should not find content of non-html files. + (Should be impossible anyways because indexing should already ignore the content.) + """ + + project = "some-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.txt", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=hello%20world") + assert search_response.status_code == 200 + assert search_response.json() == { + "projects": [], + "versions": [], + "files": [], + } + + +def test_search_ignores_files_of_hidden_versions_by_name(client_with_claimed_project): + """ + After a version was hidden, it's files should not be found by name anymore. + """ + project = "some-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.txt", io.BytesIO(b"Lorem ipsum dolor sit..."), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response_1 = client_with_claimed_project.get("/api/search?query=index") + assert search_response_1.status_code == 200 + assert search_response_1.json() == { + "projects": [], + "versions": [], + "files": [{"project": project, "version": version, "path": "index.txt"}], + } + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + search_response_2 = client_with_claimed_project.get("/api/search?query=index") + assert search_response_2.status_code == 200 + assert search_response_2.json() == { + "projects": [], + "versions": [], + "files": [], + } + + +def test_search_ignores_files_of_hidden_versions_by_content(client_with_claimed_project): + """ + After a version was hidden, it's files should not be found by html content anymore. + """ + project = "some-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response_1 = client_with_claimed_project.get("/api/search?query=hello%20world") + assert search_response_1.status_code == 200 + assert search_response_1.json() == { + "projects": [], + "versions": [], + "files": [{"project": project, "version": version, "path": "index.html"}], + } + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + search_response_2 = client_with_claimed_project.get("/api/search?query=hello%20world") + assert search_response_2.status_code == 200 + assert search_response_2.json() == { + "projects": [], + "versions": [], + "files": [], + } + + +def test_search_ignores_project_with_only_hidden_versions(client_with_claimed_project): + """ + The project should not be found when all it's versions are hidden. + """ + project = "some-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response_1 = client_with_claimed_project.get("/api/search?query=some-project") + assert search_response_1.status_code == 200 + assert search_response_1.json() == { + "projects": [{"name": project}], + "versions": [], + "files": [], + } + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + search_response_2 = client_with_claimed_project.get("/api/search?query=some-project") + assert search_response_2.status_code == 200 + assert search_response_2.json() == { + "projects": [], + "versions": [], + "files": [], + } + + +def test_search_finds_project_with_only_hidden_versions_after_showing(client_with_claimed_project): + """ + The project should be found again when all it's versions are hidden and then shown again. + """ + project = "some-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response_1 = client_with_claimed_project.get("/api/search?query=some-project") + assert search_response_1.status_code == 200 + assert search_response_1.json() == { + "projects": [{"name": project}], + "versions": [], + "files": [], + } + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + search_response_2 = client_with_claimed_project.get("/api/search?query=some-project") + assert search_response_2.status_code == 200 + assert search_response_2.json() == { + "projects": [], + "versions": [], + "files": [], + } + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/show", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + search_response_1 = client_with_claimed_project.get("/api/search?query=some-project") + assert search_response_1.status_code == 200 + assert search_response_1.json() == { + "projects": [{"name": project}], + "versions": [], + "files": [], + } + + +def test_search_ignores_hidden_versions(client_with_claimed_project): + """ + The version should not be found when it's hidden. + """ + project = "some-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response_1 = client_with_claimed_project.get("/api/search?query=1.0") + assert search_response_1.status_code == 200 + assert search_response_1.json() == { + "projects": [], + "versions": [{"project": project, "version": version}], + "files": [], + } + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + search_response_2 = client_with_claimed_project.get("/api/search?query=1.0.0") + assert search_response_2.status_code == 200 + assert search_response_2.json() == { + "projects": [], + "versions": [], + "files": [], + } + + +def test_search_finds_shown_versions_after_hide(client_with_claimed_project): + """ + The version should be found again after it's hidden and shown again. + """ + project = "some-project" + version = "1.0.0" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": ("index.html", io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response_1 = client_with_claimed_project.get("/api/search?query=1.0") + assert search_response_1.status_code == 200 + assert search_response_1.json() == { + "projects": [], + "versions": [{"project": project, "version": version}], + "files": [], + } + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/hide", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + search_response_2 = client_with_claimed_project.get("/api/search?query=1.0") + assert search_response_2.status_code == 200 + assert search_response_2.json() == { + "projects": [], + "versions": [], + "files": [], + } + + hide_version_response = client_with_claimed_project.post(f"/api/{project}/{version}/show", headers={"Docat-Api-Key": "1234"}) + assert hide_version_response.status_code == 200 + + search_response_1 = client_with_claimed_project.get("/api/search?query=1.0") + assert search_response_1.status_code == 200 + assert search_response_1.json() == { + "projects": [], + "versions": [{"project": project, "version": version}], + "files": [], + } + + +def test_search_project_version_and_file_match(client_with_claimed_project): + """ + Test that the search finds the project, the version and a file with a matching name at the same time. + """ + project = "some-project" + version = "some-version" + file = "some-file.html" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": (file, io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=some") + assert search_response.status_code == 200 + assert search_response.json() == { + "projects": [{"name": project}], + "versions": [{"project": project, "version": version}], + "files": [{"project": project, "version": version, "path": file}], + } + + +def test_search_project_version_content_match(client_with_claimed_project): + """ + Test that the search finds the project, the version and the file with matching content at the same time. + """ + project = "some-project" + version = "some-version" + file = "index.html" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": (file, io.BytesIO(b"

some content

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=some") + assert search_response.status_code == 200 + assert search_response.json() == { + "projects": [{"name": project}], + "versions": [{"project": project, "version": version}], + "files": [{"project": project, "version": version, "path": file}], + } + + +def test_search_file_and_content_match_no_duplicates(client_with_claimed_project): + """ + Test that the search only returns the file once when the file name and the content match. + """ + project = "some-project" + version = "1.0.0" + file = "hello-world.html" + + create_project_response = client_with_claimed_project.post( + f"/api/{project}/{version}", + files={"file": (file, io.BytesIO(b"

Hello World

"), "plain/text")}, + ) + assert create_project_response.status_code == 201 + + search_response = client_with_claimed_project.get("/api/search?query=hello") + assert search_response.status_code == 200 + assert search_response.json() == { + "projects": [], + "versions": [], + "files": [{"project": project, "version": version, "path": file}], + } diff --git a/docat/tests/test_upload.py b/docat/tests/test_upload.py index 86fb200ff..1d5746693 100644 --- a/docat/tests/test_upload.py +++ b/docat/tests/test_upload.py @@ -30,7 +30,7 @@ def test_successfully_override(client_with_claimed_project): assert response.status_code == 201 assert response_data["message"] == "File successfully uploaded" - assert remove_mock.mock_calls == [call("some-project", "1.0.0")] + assert remove_mock.mock_calls == [call("some-project", "1.0.0", docat.DOCAT_UPLOAD_FOLDER)] def test_tags_are_not_overwritten_without_api_key(client_with_claimed_project): diff --git a/docat/tests/test_utils.py b/docat/tests/test_utils.py index 2ae3bd2b1..959ad2e92 100644 --- a/docat/tests/test_utils.py +++ b/docat/tests/test_utils.py @@ -1,6 +1,7 @@ from pathlib import Path from unittest.mock import MagicMock, patch +import docat.app as docat from docat.utils import create_symlink, extract_archive, remove_docs @@ -70,11 +71,10 @@ def test_archive_artifact(): def test_remove_version(temp_project_version): docs = temp_project_version("project", "1.0") - with patch("docat.utils.UPLOAD_FOLDER", docs): - remove_docs("project", "1.0") + remove_docs("project", "1.0", docat.DOCAT_UPLOAD_FOLDER) - assert docs.exists() - assert not (docs / "project").exists() + assert docs.exists() + assert not (docs / "project").exists() def test_remove_symlink_version(temp_project_version): @@ -83,7 +83,6 @@ def test_remove_symlink_version(temp_project_version): symlink_to_latest = docs / project / "latest" assert symlink_to_latest.is_symlink() - with patch("docat.utils.UPLOAD_FOLDER", docs): - remove_docs(project, "latest") + remove_docs(project, "latest", docat.DOCAT_UPLOAD_FOLDER) - assert not symlink_to_latest.exists() + assert not symlink_to_latest.exists()