diff --git a/.flox/env/manifest.lock b/.flox/env/manifest.lock index 95cdb1d12..ff15369b9 100644 --- a/.flox/env/manifest.lock +++ b/.flox/env/manifest.lock @@ -3,9 +3,6 @@ "manifest": { "version": 1, "install": { - "libpqxx": { - "pkg-path": "libpqxx" - }, "mise": { "pkg-path": "mise" }, @@ -15,9 +12,6 @@ "pulumi-python": { "pkg-path": "pulumiPackages.pulumi-python" }, - "pulumictl": { - "pkg-path": "pulumictl" - }, "ruff": { "pkg-path": "ruff" }, @@ -41,130 +35,6 @@ } }, "packages": [ - { - "attr_path": "libpqxx", - "broken": false, - "derivation": "/nix/store/w9lvn6s828mpwq6b3pqv0hizsfzwhgjm-libpqxx-7.10.1.drv", - "description": "C++ library to access PostgreSQL databases", - "install_id": "libpqxx", - "license": "BSD-3-Clause", - "locked_url": "https://github.com/flox/nixpkgs?rev=979daf34c8cacebcd917d540070b52a3c2b9b16e", - "name": "libpqxx-7.10.1", - "pname": "libpqxx", - "rev": "979daf34c8cacebcd917d540070b52a3c2b9b16e", - "rev_count": 793735, - "rev_date": "2025-05-04T03:14:55Z", - "scrape_date": "2025-05-05T04:19:30.069215Z", - "stabilities": [ - "staging", - "unstable" - ], - "unfree": false, - "version": "7.10.1", - "outputs_to_install": [ - "out" - ], - "outputs": { - "dev": "/nix/store/zhxb9605ws89rzy1rh7c43a5q6l2fksd-libpqxx-7.10.1-dev", - "out": "/nix/store/qznww33gb7ym7jnd3i4raxwagd059fir-libpqxx-7.10.1" - }, - "system": "aarch64-darwin", - "group": "toplevel", - "priority": 5 - }, - { - "attr_path": "libpqxx", - "broken": false, - "derivation": "/nix/store/yv2vasrkh611sh8liv2lx361n9bn82ag-libpqxx-7.10.1.drv", - "description": "C++ library to access PostgreSQL databases", - "install_id": "libpqxx", - "license": "BSD-3-Clause", - "locked_url": "https://github.com/flox/nixpkgs?rev=979daf34c8cacebcd917d540070b52a3c2b9b16e", - "name": "libpqxx-7.10.1", - "pname": "libpqxx", - "rev": "979daf34c8cacebcd917d540070b52a3c2b9b16e", - "rev_count": 793735, - "rev_date": "2025-05-04T03:14:55Z", - "scrape_date": "2025-05-05T04:37:23.298189Z", - "stabilities": [ - "staging", - "unstable" - ], - "unfree": false, - "version": "7.10.1", - "outputs_to_install": [ - "out" - ], - "outputs": { - "dev": "/nix/store/4fafh1bc0varik2d6wngd20mn3fwkzmc-libpqxx-7.10.1-dev", - "out": "/nix/store/d727l5xl8khnzrkgd8x7p7hy7v1apn4q-libpqxx-7.10.1" - }, - "system": "aarch64-linux", - "group": "toplevel", - "priority": 5 - }, - { - "attr_path": "libpqxx", - "broken": false, - "derivation": "/nix/store/l8bwf0cl29np6xi1kyvgkvb5jnv4a1w3-libpqxx-7.10.1.drv", - "description": "C++ library to access PostgreSQL databases", - "install_id": "libpqxx", - "license": "BSD-3-Clause", - "locked_url": "https://github.com/flox/nixpkgs?rev=979daf34c8cacebcd917d540070b52a3c2b9b16e", - "name": "libpqxx-7.10.1", - "pname": "libpqxx", - "rev": "979daf34c8cacebcd917d540070b52a3c2b9b16e", - "rev_count": 793735, - "rev_date": "2025-05-04T03:14:55Z", - "scrape_date": "2025-05-05T04:54:30.752025Z", - "stabilities": [ - "staging", - "unstable" - ], - "unfree": false, - "version": "7.10.1", - "outputs_to_install": [ - "out" - ], - "outputs": { - "dev": "/nix/store/72i3bj360rp5kp2rbfkrb9267raqpbxi-libpqxx-7.10.1-dev", - "out": "/nix/store/xlzlkln61251vl4vdhzcgiv03krq4d7s-libpqxx-7.10.1" - }, - "system": "x86_64-darwin", - "group": "toplevel", - "priority": 5 - }, - { - "attr_path": "libpqxx", - "broken": false, - "derivation": "/nix/store/3cnj26fkyk7yvkzbkq9ricckz2x3lg46-libpqxx-7.10.1.drv", - "description": "C++ library to access PostgreSQL databases", - "install_id": "libpqxx", - "license": "BSD-3-Clause", - "locked_url": "https://github.com/flox/nixpkgs?rev=979daf34c8cacebcd917d540070b52a3c2b9b16e", - "name": "libpqxx-7.10.1", - "pname": "libpqxx", - "rev": "979daf34c8cacebcd917d540070b52a3c2b9b16e", - "rev_count": 793735, - "rev_date": "2025-05-04T03:14:55Z", - "scrape_date": "2025-05-05T05:15:58.462220Z", - "stabilities": [ - "staging", - "unstable" - ], - "unfree": false, - "version": "7.10.1", - "outputs_to_install": [ - "out" - ], - "outputs": { - "dev": "/nix/store/n3rb5ag633fj1fpkkb17dc826g3k2yh7-libpqxx-7.10.1-dev", - "out": "/nix/store/dxbnd3lixj1273hdfqpdvyb7v9w4da6m-libpqxx-7.10.1" - }, - "system": "x86_64-linux", - "group": "toplevel", - "priority": 5 - }, { "attr_path": "mise", "broken": false, @@ -525,126 +395,6 @@ "group": "toplevel", "priority": 5 }, - { - "attr_path": "pulumictl", - "broken": false, - "derivation": "/nix/store/kx43jzcfslw28byvs6h5ngsgl432pvvv-pulumictl-0.0.49.drv", - "description": "Swiss Army Knife for Pulumi Development", - "install_id": "pulumictl", - "license": "Apache-2.0", - "locked_url": "https://github.com/flox/nixpkgs?rev=979daf34c8cacebcd917d540070b52a3c2b9b16e", - "name": "pulumictl-0.0.49", - "pname": "pulumictl", - "rev": "979daf34c8cacebcd917d540070b52a3c2b9b16e", - "rev_count": 793735, - "rev_date": "2025-05-04T03:14:55Z", - "scrape_date": "2025-05-05T04:19:37.687142Z", - "stabilities": [ - "staging", - "unstable" - ], - "unfree": false, - "version": "0.0.49", - "outputs_to_install": [ - "out" - ], - "outputs": { - "out": "/nix/store/ny69c9bfkf4w179240ch45injfb2ajqr-pulumictl-0.0.49" - }, - "system": "aarch64-darwin", - "group": "toplevel", - "priority": 5 - }, - { - "attr_path": "pulumictl", - "broken": false, - "derivation": "/nix/store/www9nfncvv7l339n8dks22x5vs5lz1mk-pulumictl-0.0.49.drv", - "description": "Swiss Army Knife for Pulumi Development", - "install_id": "pulumictl", - "license": "Apache-2.0", - "locked_url": "https://github.com/flox/nixpkgs?rev=979daf34c8cacebcd917d540070b52a3c2b9b16e", - "name": "pulumictl-0.0.49", - "pname": "pulumictl", - "rev": "979daf34c8cacebcd917d540070b52a3c2b9b16e", - "rev_count": 793735, - "rev_date": "2025-05-04T03:14:55Z", - "scrape_date": "2025-05-05T04:37:42.118866Z", - "stabilities": [ - "staging", - "unstable" - ], - "unfree": false, - "version": "0.0.49", - "outputs_to_install": [ - "out" - ], - "outputs": { - "out": "/nix/store/xpdh5dijdki4cngh7k7n4rg84i6c28zs-pulumictl-0.0.49" - }, - "system": "aarch64-linux", - "group": "toplevel", - "priority": 5 - }, - { - "attr_path": "pulumictl", - "broken": false, - "derivation": "/nix/store/17wf5x1kk3v5ch5npwhamnix629y07wg-pulumictl-0.0.49.drv", - "description": "Swiss Army Knife for Pulumi Development", - "install_id": "pulumictl", - "license": "Apache-2.0", - "locked_url": "https://github.com/flox/nixpkgs?rev=979daf34c8cacebcd917d540070b52a3c2b9b16e", - "name": "pulumictl-0.0.49", - "pname": "pulumictl", - "rev": "979daf34c8cacebcd917d540070b52a3c2b9b16e", - "rev_count": 793735, - "rev_date": "2025-05-04T03:14:55Z", - "scrape_date": "2025-05-05T04:54:38.447587Z", - "stabilities": [ - "staging", - "unstable" - ], - "unfree": false, - "version": "0.0.49", - "outputs_to_install": [ - "out" - ], - "outputs": { - "out": "/nix/store/6wmig1w7f3vmfrlyg2qzv21bvacj3as8-pulumictl-0.0.49" - }, - "system": "x86_64-darwin", - "group": "toplevel", - "priority": 5 - }, - { - "attr_path": "pulumictl", - "broken": false, - "derivation": "/nix/store/ib7hqxg7xdf5kyh78jqggzdcs97q1224-pulumictl-0.0.49.drv", - "description": "Swiss Army Knife for Pulumi Development", - "install_id": "pulumictl", - "license": "Apache-2.0", - "locked_url": "https://github.com/flox/nixpkgs?rev=979daf34c8cacebcd917d540070b52a3c2b9b16e", - "name": "pulumictl-0.0.49", - "pname": "pulumictl", - "rev": "979daf34c8cacebcd917d540070b52a3c2b9b16e", - "rev_count": 793735, - "rev_date": "2025-05-04T03:14:55Z", - "scrape_date": "2025-05-05T05:16:19.858098Z", - "stabilities": [ - "staging", - "unstable" - ], - "unfree": false, - "version": "0.0.49", - "outputs_to_install": [ - "out" - ], - "outputs": { - "out": "/nix/store/rmh9mjkxijxcc7cvjhsqc9657fbw0yyg-pulumictl-0.0.49" - }, - "system": "x86_64-linux", - "group": "toplevel", - "priority": 5 - }, { "attr_path": "ruff", "broken": false, diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 74efac9df..2faace258 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -8,17 +8,13 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - # os: [macos-latest] # ubuntu-latest should be included once C++ dependencies are fixed - os: [ubuntu-latest] # ubuntu-latest should be included once C++ dependencies are fixed + # os: [macos-latest] + os: [ubuntu-latest] steps: - name: Checkout code uses: actions/checkout@v4 - name: Install Flox uses: flox/install-flox-action@v2 - # - name: Install libstdc++ - # run: | - # sudo apt-get update - # sudo apt-get install -y lib32stdc++6 - name: Install Python dependencies uses: flox/activate-action@v1 with: diff --git a/.mise.toml b/.mise.toml index 0d6a48d73..a8f86e17d 100644 --- a/.mise.toml +++ b/.mise.toml @@ -52,42 +52,42 @@ uv run coverage xml \ [tasks."python:test:behave"] description = "Run behave end-to-end tests" run = """ -cd application/{{arg(name="application_name")}} +cd application/{{arg(name="service_name")}} uv run behave features/ """ -[tasks."application:build"] +[tasks."application:service:build"] description = "Build the application service" run = """ TIMESTAMP=$(date +%Y%m%d) docker build \ ---file application/{{arg(name="application_name")}}/Dockerfile \ ---tag pocketsizefund/{{arg(name="application_name")}}:latest \ ---tag pocketsizefund/{{arg(name="application_name")}}:${TIMESTAMP} \ +--file application/{{arg(name="service_name")}}/Dockerfile \ +--tag pocketsizefund/{{arg(name="service_name")}}:latest \ +--tag pocketsizefund/{{arg(name="service_name")}}:${TIMESTAMP} \ . """ -[tasks."application:run"] +[tasks."application:service:run"] description = "Run the application service" run = """ docker run \ --env-file .env \ --publish 8080:8080 \ -pocketsizefund/{{arg(name="application_name")}}:latest \ +pocketsizefund/{{arg(name="service_name")}}:latest \ """ -[tasks."application:dev"] +[tasks."application:service:development"] description = "Run the application service locally with hot reloading" run = """ -cd application/{{arg(name="application_name")}} -uv run uvicorn src.{{arg(name="application_name")}}.main:application --reload +cd application/{{arg(name="service_name")}} +uv run uvicorn src.{{arg(name="service_name")}}.main:application --reload """ -[tasks."application:test"] +[tasks."application:service:test"] description = "Run integration tests" run = """ -cd application/{{arg(name="application_name")}} +cd application/{{arg(name="service_name")}} docker-compose up --build --abort-on-container-exit --remove-orphans """ @@ -95,7 +95,7 @@ docker-compose up --build --abort-on-container-exit --remove-orphans depends = ["python:lint"] description = "Run code quality checks" run = """ -yamllint . +yamllint -d "{extends: relaxed, rules: {line-length: {max: 110}}}" . """ [tasks."infrastructure:up"] diff --git a/application/datamanager/features/steps/equity_bars_steps.py b/application/datamanager/features/steps/equity_bars_steps.py index c4b9f3246..5ea39b6f6 100644 --- a/application/datamanager/features/steps/equity_bars_steps.py +++ b/application/datamanager/features/steps/equity_bars_steps.py @@ -1,19 +1,15 @@ -import json import os import sys -from datetime import datetime, timedelta from pathlib import Path -# Add the project root to the Python path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -import httpx import requests from behave import given, when, then @given("I have date ranges") -def step_impl(context): +def step_impl_date_ranges(context): for row in context.table: context.start_date = row["start_date"] context.end_date = row["end_date"] @@ -25,14 +21,14 @@ def step_impl_api_url(context): @when('I send a POST request to "{endpoint}" for date range') -def step_impl(context, endpoint): +def step_impl_post_request(context, endpoint): url = f"{context.api_url}{endpoint}" response = requests.post(url, json={"date": context.start_date}) context.response = response @when('I send a GET request to "{endpoint}" for date range') -def step_imp(context, endpoint): +def step_imp_get_request(context, endpoint): url = f"{context.api_url}{endpoint}" response = requests.get( url, @@ -42,94 +38,12 @@ def step_imp(context, endpoint): @then("the response status code should be {status_code}") -def step_impl(context, status_code): +def step_impl_response_status_code(context, status_code): assert context.response.status_code == int(status_code), ( f"Expected status code {status_code}, got {context.response.status_code}" ) -@then('the response should contain a JSON with "{field1}" and "{field2}" fields') -def step_impl(context, field1, field2): - response_json = context.response.json() - assert field1 in response_json, f"Response JSON missing field: {field1}" - assert field2 in response_json, f"Response JSON missing field: {field2}" - - -@then('I can get that data back from the API for date "{date_str}"') -def step_impl(context, date_str): - expected_file = Path(f"equity_bars_{date_str}.parquet") - assert expected_file.exists(), f"Expected parquet file {expected_file} not found" - - -@given("I have equity bars data for dates") -def step_impl(context): - # Store the table of dates for later use - context.dates = [row["date"] for row in context.table] - - # Generate and store data for each date (mock data for testing) - for date_str in context.dates: - url = f"{context.api_url}/equity-bars" - response = requests.post(url, json={"date": date_str}) - assert response.status_code == 200, f"Failed to set up data for date {date_str}" - - -@given('I have equity bars data for date "{date_str}"') -def step_impl(context, date_str): - url = f"{context.api_url}/equity-bars" - response = requests.post(url, json={"date": date_str}) - assert response.status_code == 200, f"Failed to set up data for date {date_str}" - context.test_date = date_str - - -@when( - 'I send a GET request to "{endpoint}" with date range "{start_date}" to "{end_date}"' -) -def step_impl(context, endpoint, start_date, end_date): - url = f"{context.api_url}{endpoint}" - params = { - "date_range": { - "start": start_date, - "end": end_date, - } - } - response = requests.get(url, params=params) - context.response = response - context.start_date = start_date - context.end_date = end_date - - -@then("the response should contain equity bars data for the date range") -def step_impl(context): - response_json = context.response.json() - assert "data" in response_json, "Response does not contain data field" - - # Check if data is not empty - assert len(response_json["data"]) > 0, "Response data is empty" - - # For a more thorough check, we could verify date ranges of the returned data - # This would depend on the structure of your API response - - -@then("the response should include a metadata section") -def step_impl(context): - response_json = context.response.json() - assert "metadata" in response_json, "Response does not contain metadata field" - - # Check metadata fields - metadata = response_json["metadata"] - assert "start_date" in metadata, "Metadata missing start_date" - assert "end_date" in metadata, "Metadata missing end_date" - assert "count" in metadata, "Metadata missing count" - - # Verify the dates match our request - assert context.start_date in metadata["start_date"], ( - f"Metadata start date mismatch: {metadata['start_date']}" - ) - assert context.end_date in metadata["end_date"], ( - f"Metadata end date mismatch: {metadata['end_date']}" - ) - - @when('I send a DELETE request to "{endpoint}" for date "{date_str}"') def step_impl(context, endpoint, date_str): url = f"{context.api_url}{endpoint}" @@ -139,29 +53,11 @@ def step_impl(context, endpoint, date_str): @then('the equity bars data for "{date_str}" should be deleted') -def step_impl(context, date_str): - # Verify the data no longer exists by trying to fetch it +def step_impl_equity_bars(context, date_str): if os.environ.get("GCP_GCS_BUCKET"): - # In a real test, we'd check that the file no longer exists in GCS - # For testing purposes, we'll just assume it was deleted properly assert True, "GCS bucket deletion check would go here" else: - # For local files, check the file no longer exists expected_file = Path(f"equity_bars_{date_str}.parquet") assert not expected_file.exists(), ( f"Parquet file {expected_file} still exists after deletion" ) - - -@then("the response should confirm successful deletion") -def step_impl(context): - response_json = context.response.json() - assert "status" in response_json, "Response missing status field" - assert response_json["status"] == "success", ( - f"Expected success status, got {response_json['status']}" - ) - assert "date" in response_json, "Response missing date field" - assert "message" in response_json, "Response missing message field" - assert "deleted successfully" in response_json["message"], ( - "Response message does not confirm deletion" - ) diff --git a/application/datamanager/features/steps/health_steps.py b/application/datamanager/features/steps/health_steps.py index 947dc0c7f..14ba44ad0 100644 --- a/application/datamanager/features/steps/health_steps.py +++ b/application/datamanager/features/steps/health_steps.py @@ -1,11 +1,9 @@ -""" -Step definitions for health endpoint feature. -""" from behave import when import requests + @when('I send a GET request to "{endpoint}"') def step_impl(context, endpoint): """Send a GET request to the specified endpoint.""" url = f"{context.api_url}{endpoint}" - context.response = requests.get(url) \ No newline at end of file + context.response = requests.get(url) diff --git a/application/datamanager/src/datamanager/bars.sql b/application/datamanager/src/datamanager/bars.sql deleted file mode 100644 index 8b1378917..000000000 --- a/application/datamanager/src/datamanager/bars.sql +++ /dev/null @@ -1 +0,0 @@ - diff --git a/application/datamanager/src/datamanager/config.py b/application/datamanager/src/datamanager/config.py index 67adc30dc..d57ea181d 100644 --- a/application/datamanager/src/datamanager/config.py +++ b/application/datamanager/src/datamanager/config.py @@ -1,9 +1,7 @@ import os import json from functools import cached_property -from datetime import date, datetime from pydantic import BaseModel, Field, computed_field -from loguru import logger class Polygon(BaseModel): diff --git a/application/datamanager/src/datamanager/main.py b/application/datamanager/src/datamanager/main.py index 755f52ee7..503998e78 100644 --- a/application/datamanager/src/datamanager/main.py +++ b/application/datamanager/src/datamanager/main.py @@ -9,9 +9,9 @@ from datetime import date import httpx import polars as pl -from fastapi import FastAPI, HTTPException, Request, Response, status, Query +from fastapi import FastAPI, Request, Response, status from .config import Settings -from .models import BarsResult, DateRange, SummaryDate +from .models import BarsSummary, SummaryDate from loguru import logger @@ -79,8 +79,6 @@ async def get_equity_bars( bucket=settings.gcp.bucket.name, start_date=start_date, end_date=end_date ) - logger.info(query) - try: data = request.app.state.connection.execute(query).arrow() @@ -109,15 +107,13 @@ async def get_equity_bars( return Response(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR) -@application.post("/equity-bars", response_model=BarsResult) -async def fetch_equity_bars(request: Request, summary_date: SummaryDate) -> BarsResult: - logger.info(summary_date) +@application.post("/equity-bars", response_model=BarsSummary) +async def fetch_equity_bars(request: Request, summary_date: SummaryDate) -> BarsSummary: polygon = request.app.state.settings.polygon bucket = request.app.state.settings.gcp.bucket - logger.info(summary_date.date) url = f"{polygon.base_url}{polygon.daily_bars}{summary_date.date.strftime('%Y-%m-%d')}" - logger.info(url) + logger.info(f"polygon_api_endpoint={url}") params = {"adjusted": "true", "apiKey": polygon.api_key} async with httpx.AsyncClient() as client: @@ -139,7 +135,7 @@ async def fetch_equity_bars(request: Request, summary_date: SummaryDate) -> Bars pl.from_epoch("t", time_unit="ms").dt.day().alias("day"), ] ).write_parquet(bucket.daily_bars_path, partition_by=["year", "month", "day"]) - return BarsResult(date=summary_date.date.strftime("%Y-%m-%d"), count=count) + return BarsSummary(date=summary_date.date.strftime("%Y-%m-%d"), count=count) @application.delete("/equity-bars") diff --git a/application/datamanager/src/datamanager/models.py b/application/datamanager/src/datamanager/models.py index e860784b8..e2191ce47 100644 --- a/application/datamanager/src/datamanager/models.py +++ b/application/datamanager/src/datamanager/models.py @@ -35,6 +35,6 @@ def check_end_after_start(cls, end_value, info): return end_value -class BarsResult(BaseModel): +class BarsSummary(BaseModel): date: str count: int diff --git a/application/datamanager/src/datamanager/query.py b/application/datamanager/src/datamanager/query.py deleted file mode 100644 index 1dd0a409d..000000000 --- a/application/datamanager/src/datamanager/query.py +++ /dev/null @@ -1,28 +0,0 @@ -from datetime import timedelta, date -from polars import DataFrame -import duckdb -from pathlib import Path - - -async def list_file_paths( - *, bucket: str, start_date: date, end_date: date -) -> list[Path]: - filepaths = [] - current = start_date - while current <= end_date: - filepaths.append( - f"gs://{bucket}/equity/bars/{current.strftime('%Y-%m-%d')}/data.parquet" - ) - current += timedelta(days=1) - - return filepaths - - -async def query_bars(*, filepaths: list[Path]) -> DataFrame: - query = f""" - SELECT * FROM read_parquet({filepaths}) - """ - try: - return duckdb.sql(query).pl() - except duckdb.duckdb.HTTPException as e: - return diff --git a/infrastructure/Pulumi.yaml b/infrastructure/Pulumi.yaml index 2bb26c685..a139b0c60 100644 --- a/infrastructure/Pulumi.yaml +++ b/infrastructure/Pulumi.yaml @@ -1,3 +1,4 @@ +--- name: pocketsizefund-infrastructure runtime: python description: Pocket Size Fund Infrastructure diff --git a/infrastructure/README.md b/infrastructure/README.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/infrastructure/cloud_run.py b/infrastructure/cloud_run.py index c5942aa2f..76dcb35de 100644 --- a/infrastructure/cloud_run.py +++ b/infrastructure/cloud_run.py @@ -46,7 +46,7 @@ cloudrun.ServiceTemplateSpecContainerEnvArgs( name="DUCKDB_SECRET", value=duckdb_secret, - ), + ), ], ) ],