diff --git a/applications/equitypricemodel/src/equitypricemodel/server.py b/applications/equitypricemodel/src/equitypricemodel/server.py index 9d3916f2..ccfa27a8 100644 --- a/applications/equitypricemodel/src/equitypricemodel/server.py +++ b/applications/equitypricemodel/src/equitypricemodel/server.py @@ -161,7 +161,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: """Load model artifacts from S3 at startup.""" import shutil # noqa: PLC0415 - bucket = os.environ.get("AWS_S3_MODEL_ARTIFACTS_BUCKET") + bucket = os.environ.get("AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME") artifact_path = os.environ.get("AWS_S3_MODEL_ARTIFACT_PATH", "artifacts/") model_directory = "." diff --git a/infrastructure/Pulumi.production.yaml b/infrastructure/Pulumi.production.yaml index 3f51a72e..e4fa192a 100644 --- a/infrastructure/Pulumi.production.yaml +++ b/infrastructure/Pulumi.production.yaml @@ -1,3 +1,4 @@ +--- # yamllint disable rule:line-length config: aws:region: us-east-1 diff --git a/infrastructure/__main__.py b/infrastructure/__main__.py index ff04a249..47b83bff 100644 --- a/infrastructure/__main__.py +++ b/infrastructure/__main__.py @@ -1164,7 +1164,7 @@ def serialize_secret_config_object( sort_keys=True, ) ), - opts=pulumi.ResourceOptions(retain_on_delete=True, protect=True), + opts=pulumi.ResourceOptions(retain_on_delete=True), tags=tags, ) @@ -1200,7 +1200,7 @@ def serialize_secret_config_object( ) ), managed_policy_arns=[github_actions_infrastructure_policy.arn], - opts=pulumi.ResourceOptions(retain_on_delete=True, protect=True), + opts=pulumi.ResourceOptions(retain_on_delete=True), tags=tags, ) @@ -1384,7 +1384,7 @@ def serialize_secret_config_object( sort_keys=True, ) ), - opts=pulumi.ResourceOptions(retain_on_delete=True, protect=True), + opts=pulumi.ResourceOptions(retain_on_delete=True), tags=tags, ) @@ -1406,7 +1406,7 @@ def serialize_secret_config_object( sort_keys=True, ), managed_policy_arns=[sagemaker_execution_policy.arn], - opts=pulumi.ResourceOptions(retain_on_delete=True, protect=True), + opts=pulumi.ResourceOptions(retain_on_delete=True), tags=tags, ) @@ -1598,7 +1598,7 @@ def serialize_secret_config_object( "value": f"http://datamanager.{args[1]}:8080", }, { - "name": "AWS_S3_MODEL_ARTIFACTS_BUCKET", + "name": "AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME", "value": args[3], }, { @@ -1777,8 +1777,8 @@ def serialize_secret_config_object( pulumi.export( "aws_ecr_equitypricemodel_repository", equitypricemodel_repository.repository_url ) -pulumi.export("aws_s3_data_bucket", data_bucket.bucket) -pulumi.export("aws_s3_model_artifacts_bucket", model_artifacts_bucket.bucket) +pulumi.export("aws_s3_data_bucket_name", data_bucket.bucket) +pulumi.export("aws_s3_model_artifacts_bucket_name", model_artifacts_bucket.bucket) pulumi.export( "aws_ecr_equitypricemodel_trainer_repository", equitypricemodel_trainer_repository.repository_url, diff --git a/maskfile.md b/maskfile.md index e76304ec..5f131108 100644 --- a/maskfile.md +++ b/maskfile.md @@ -186,39 +186,31 @@ cd infrastructure/ echo "Launching infrastructure" -organization_name=$(pulumi org get-default) - -if [ -z "${organization_name}" ]; then +if ! organization_name=$(pulumi org get-default 2>/dev/null) || [ -z "${organization_name}" ]; then echo "Unable to determine Pulumi organization name - ensure you are logged in" exit 1 fi pulumi stack select ${organization_name}/fund/production --create -if [[ "$BOOTSTRAP" == "true" ]]; then - echo "Importing existing resources into Pulumi state (if they exist)" - - # Import GitHub Actions IAM role if it exists - pulumi import --yes aws:iam/role:Role github_actions_infrastructure_role fund-github-actions-infrastructure-role 2>/dev/null || true +echo "Importing existing resources into Pulumi state (if they exist)" - # Import GitHub Actions IAM policy if it exists (requires ARN lookup) - GITHUB_POLICY_ARN=$(aws iam list-policies --scope Local --query 'Policies[?PolicyName==`fund-github-actions-infrastructure-policy`].Arn' --output text 2>/dev/null || echo "") - if [ -n "$GITHUB_POLICY_ARN" ]; then - pulumi import --yes aws:iam/policy:Policy github_actions_infrastructure_policy "$GITHUB_POLICY_ARN" 2>/dev/null || true - fi +pulumi import --yes aws:iam/role:Role github_actions_infrastructure_role fund-github-actions-infrastructure-role 2>/dev/null || true - # Import SageMaker execution role if it exists - pulumi import --yes aws:iam/role:Role sagemaker_execution_role fund-sagemaker-execution-role 2>/dev/null || true +GITHUB_POLICY_ARN=$(aws iam list-policies --scope Local --query 'Policies[?PolicyName==`fund-github-actions-infrastructure-policy`].Arn' --output text 2>/dev/null || echo "") +if [ -n "$GITHUB_POLICY_ARN" ]; then + pulumi import --yes aws:iam/policy:Policy github_actions_infrastructure_policy "$GITHUB_POLICY_ARN" 2>/dev/null || true +fi - # Import SageMaker execution policy if it exists (requires ARN lookup) - SAGEMAKER_POLICY_ARN=$(aws iam list-policies --scope Local --query 'Policies[?PolicyName==`fund-sagemaker-execution-policy`].Arn' --output text 2>/dev/null || echo "") - if [ -n "$SAGEMAKER_POLICY_ARN" ]; then - pulumi import --yes aws:iam/policy:Policy sagemaker_execution_policy "$SAGEMAKER_POLICY_ARN" 2>/dev/null || true - fi +pulumi import --yes aws:iam/role:Role sagemaker_execution_role fund-sagemaker-execution-role 2>/dev/null || true - echo "Importing resources complete" +SAGEMAKER_POLICY_ARN=$(aws iam list-policies --scope Local --query 'Policies[?PolicyName==`fund-sagemaker-execution-policy`].Arn' --output text 2>/dev/null || echo "") +if [ -n "$SAGEMAKER_POLICY_ARN" ]; then + pulumi import --yes aws:iam/policy:Policy sagemaker_execution_policy "$SAGEMAKER_POLICY_ARN" 2>/dev/null || true fi +echo "Importing resources complete" + pulumi up --diff --yes if [[ "$BOOTSTRAP" == "true" ]]; then @@ -232,16 +224,19 @@ if [[ "$BOOTSTRAP" == "true" ]]; then role_arn=$(pulumi stack output aws_iam_github_actions_infrastructure_role_arn --stack production) region=$(pulumi config get aws:region --stack production) - artifacts_bucket=$(pulumi stack output aws_s3_model_artifacts_bucket --stack production) + artifacts_bucket=$(pulumi stack output aws_s3_model_artifacts_bucket_name --stack production) + data_bucket=$(pulumi stack output aws_s3_data_bucket_name --stack production) gh secret set AWS_IAM_INFRASTRUCTURE_ROLE_ARN --env pulumi --body "$role_arn" gh secret set AWS_REGION --env pulumi --body "$region" gh secret set AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME --env pulumi --body "$artifacts_bucket" + gh secret set AWS_S3_DATA_BUCKET_NAME --env pulumi --body "$data_bucket" echo "GitHub environment secrets updated successfully" echo " - AWS_IAM_INFRASTRUCTURE_ROLE_ARN" echo " - AWS_REGION" echo " - AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME" + echo " - AWS_S3_DATA_BUCKET_NAME" echo "" echo "Note: PULUMI_ACCESS_TOKEN must be set manually" echo "Generate token at: https://app.pulumi.com/account/tokens" @@ -347,15 +342,17 @@ if [ -z "$base_url" ]; then exit 1 fi +cd "${MASKFILE_DIR}" + case "$application_name" in portfoliomanager) full_url="${base_url}/portfolio" echo "Creating portfolio: $full_url" - http_code=$(curl -X POST "$full_url" \ - -H "Content-Type: application/json" \ - -w "%{http_code}" \ - -s -o /dev/stderr) + http_code=$(curl --request POST "$full_url" \ + --header "Content-Type: application/json" \ + --write-out "%{http_code}" \ + --output /dev/stderr) echo "HTTP Status: $http_code" @@ -367,18 +364,11 @@ case "$application_name" in datamanager) if [ -n "${date_range:-}" ]; then - cd "${MASKFILE_DIR}" uv run python tools/sync_equity_bars_data.py "$base_url" "$date_range" else - current_date=$(date -u +"%Y-%m-%dT00:00:00Z") - full_url="${base_url}/equity-bars" - echo "Syncing equity bars: $full_url" - - curl -X POST "$full_url" \ - -H "Content-Type: application/json" \ - -d "{\"date\": \"$current_date\"}" \ - -w "\nHTTP Status: %{http_code}\n" \ - -s + current_date=$(date -u +"%Y-%m-%d") + date_range_json="{\"start_date\": \"$current_date\", \"end_date\": \"$current_date\"}" + uv run python tools/sync_equity_bars_data.py "$base_url" "$date_range_json" fi ;; @@ -755,7 +745,15 @@ set -euo pipefail echo "Syncing equity data: ${data_type}" cd infrastructure -export AWS_S3_DATA_BUCKET="$(pulumi stack output aws_s3_data_bucket)" + +if ! organization_name=$(pulumi org get-default 2>/dev/null) || [ -z "${organization_name}" ]; then + echo "Unable to determine Pulumi organization name - ensure you are logged in" + exit 1 +fi + +pulumi stack select ${organization_name}/fund/production + +export AWS_S3_DATA_BUCKET_NAME="$(pulumi stack output aws_s3_data_bucket_name)" cd ../ @@ -791,8 +789,16 @@ set -euo pipefail export APPLICATION_NAME="${application_name}" cd infrastructure -export AWS_S3_DATA_BUCKET="$(pulumi stack output aws_s3_data_bucket)" -export AWS_S3_MODEL_ARTIFACTS_BUCKET="$(pulumi stack output aws_s3_model_artifacts_bucket)" + +if ! organization_name=$(pulumi org get-default 2>/dev/null) || [ -z "${organization_name}" ]; then + echo "Unable to determine Pulumi organization name - ensure you are logged in" + exit 1 +fi + +pulumi stack select ${organization_name}/fund/production + +export AWS_S3_DATA_BUCKET_NAME="$(pulumi stack output aws_s3_data_bucket_name)" +export AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME="$(pulumi stack output aws_s3_model_artifacts_bucket_name)" export LOOKBACK_DAYS="${LOOKBACK_DAYS:-365}" cd ../ @@ -859,11 +865,19 @@ esac export SAGEMAKER_INSTANCE_TYPE="${instance_type}" cd infrastructure + +if ! organization_name=$(pulumi org get-default 2>/dev/null) || [ -z "${organization_name}" ]; then + echo "Unable to determine Pulumi organization name - ensure you are logged in" + exit 1 +fi + +pulumi stack select ${organization_name}/fund/production + export AWS_ECR_EQUITY_PRICE_MODEL_TRAINER_IMAGE_ARN="$(pulumi stack output aws_ecr_equitypricemodel_trainer_image)" export AWS_IAM_SAGEMAKER_ROLE_ARN="$(pulumi stack output aws_iam_sagemaker_role_arn)" -export AWS_S3_MODEL_ARTIFACTS_BUCKET="$(pulumi stack output aws_s3_model_artifacts_bucket)" -export AWS_S3_EQUITY_PRICE_MODEL_ARTIFACT_OUTPUT_PATH="s3://${AWS_S3_MODEL_ARTIFACTS_BUCKET}/artifacts" -export AWS_S3_EQUITY_PRICE_MODEL_TRAINING_DATA_PATH="s3://${AWS_S3_MODEL_ARTIFACTS_BUCKET}/training" +export AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME="$(pulumi stack output aws_s3_model_artifacts_bucket_name)" +export AWS_S3_EQUITY_PRICE_MODEL_ARTIFACT_OUTPUT_PATH="s3://${AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME}/artifacts" +export AWS_S3_EQUITY_PRICE_MODEL_TRAINING_DATA_PATH="s3://${AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME}/training" cd ../ diff --git a/tools/prepare_training_data.py b/tools/prepare_training_data.py index f68fd321..da4d243f 100644 --- a/tools/prepare_training_data.py +++ b/tools/prepare_training_data.py @@ -249,16 +249,16 @@ def prepare_training_data( if __name__ == "__main__": - data_bucket: str | None = os.getenv("AWS_S3_DATA_BUCKET") - model_artifacts_bucket: str | None = os.getenv("AWS_S3_MODEL_ARTIFACTS_BUCKET") + data_bucket: str | None = os.getenv("AWS_S3_DATA_BUCKET_NAME") + model_artifacts_bucket: str | None = os.getenv("AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME") lookback_days = int(os.getenv("LOOKBACK_DAYS", "365")) if data_bucket is None: - logger.error("AWS_S3_DATA_BUCKET environment variable not set") + logger.error("AWS_S3_DATA_BUCKET_NAME environment variable not set") sys.exit(1) if model_artifacts_bucket is None: - logger.error("AWS_S3_MODEL_ARTIFACTS_BUCKET environment variable not set") + logger.error("AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME environment variable not set") sys.exit(1) end_date = datetime.now(tz=UTC).replace(hour=0, minute=0, second=0, microsecond=0) diff --git a/tools/sync_equity_categories.py b/tools/sync_equity_categories.py index eb1131a5..615754c9 100644 --- a/tools/sync_equity_categories.py +++ b/tools/sync_equity_categories.py @@ -25,7 +25,6 @@ MASSIVE_BASE_URL = os.getenv("MASSIVE_BASE_URL", "https://api.massive.io") -# Massive ticker types: CS (Common Stock), ADRC/ADRP/ADRS (ADR variants) EQUITY_TYPES = {"CS", "ADRC", "ADRP", "ADRS"} @@ -73,18 +72,14 @@ def extract_categories(tickers: list[dict]) -> pl.DataFrame: rows = [] for ticker_data in tickers: ticker = ticker_data.get("ticker", "") - # Skip entries with empty or missing ticker values if not ticker: continue - # Filter for Common Stock and all ADR types if ticker_data.get("type") not in EQUITY_TYPES: continue - # Try to get sector/industry from various fields Massive provides sector = ticker_data.get("sector", "") industry = ticker_data.get("industry", "") - # Some tickers may not have sector/industry if not sector: sector = "NOT AVAILABLE" if not industry: @@ -150,14 +145,14 @@ def sync_equity_categories( if __name__ == "__main__": api_key: str | None = os.getenv("MASSIVE_API_KEY") - bucket_name: str | None = os.getenv("AWS_S3_DATA_BUCKET") + bucket_name: str | None = os.getenv("AWS_S3_DATA_BUCKET_NAME") if api_key is None: logger.error("MASSIVE_API_KEY environment variable not set") sys.exit(1) if bucket_name is None: - logger.error("AWS_S3_DATA_BUCKET environment variable not set") + logger.error("AWS_S3_DATA_BUCKET_NAME environment variable not set") sys.exit(1) try: