Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
"""Load model artifacts from S3 at startup."""
import shutil # noqa: PLC0415

bucket = os.environ.get("AWS_S3_MODEL_ARTIFACTS_BUCKET")
bucket = os.environ.get("AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME")
artifact_path = os.environ.get("AWS_S3_MODEL_ARTIFACT_PATH", "artifacts/")
model_directory = "."

Expand Down
1 change: 1 addition & 0 deletions infrastructure/Pulumi.production.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
---
# yamllint disable rule:line-length
config:
aws:region: us-east-1
Expand Down
14 changes: 7 additions & 7 deletions infrastructure/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ def serialize_secret_config_object(
sort_keys=True,
)
),
opts=pulumi.ResourceOptions(retain_on_delete=True, protect=True),
opts=pulumi.ResourceOptions(retain_on_delete=True),
Comment thread
forstmeier marked this conversation as resolved.
tags=tags,
)

Expand Down Expand Up @@ -1200,7 +1200,7 @@ def serialize_secret_config_object(
)
),
managed_policy_arns=[github_actions_infrastructure_policy.arn],
opts=pulumi.ResourceOptions(retain_on_delete=True, protect=True),
opts=pulumi.ResourceOptions(retain_on_delete=True),
tags=tags,
)

Expand Down Expand Up @@ -1384,7 +1384,7 @@ def serialize_secret_config_object(
sort_keys=True,
)
),
opts=pulumi.ResourceOptions(retain_on_delete=True, protect=True),
opts=pulumi.ResourceOptions(retain_on_delete=True),
tags=tags,
)

Expand All @@ -1406,7 +1406,7 @@ def serialize_secret_config_object(
sort_keys=True,
),
managed_policy_arns=[sagemaker_execution_policy.arn],
opts=pulumi.ResourceOptions(retain_on_delete=True, protect=True),
opts=pulumi.ResourceOptions(retain_on_delete=True),
tags=tags,
)

Expand Down Expand Up @@ -1598,7 +1598,7 @@ def serialize_secret_config_object(
"value": f"http://datamanager.{args[1]}:8080",
},
{
"name": "AWS_S3_MODEL_ARTIFACTS_BUCKET",
"name": "AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME",
"value": args[3],
},
{
Expand Down Expand Up @@ -1777,8 +1777,8 @@ def serialize_secret_config_object(
pulumi.export(
"aws_ecr_equitypricemodel_repository", equitypricemodel_repository.repository_url
)
pulumi.export("aws_s3_data_bucket", data_bucket.bucket)
pulumi.export("aws_s3_model_artifacts_bucket", model_artifacts_bucket.bucket)
pulumi.export("aws_s3_data_bucket_name", data_bucket.bucket)
pulumi.export("aws_s3_model_artifacts_bucket_name", model_artifacts_bucket.bucket)
pulumi.export(
"aws_ecr_equitypricemodel_trainer_repository",
equitypricemodel_trainer_repository.repository_url,
Expand Down
98 changes: 56 additions & 42 deletions maskfile.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,39 +186,31 @@ cd infrastructure/

echo "Launching infrastructure"

organization_name=$(pulumi org get-default)

if [ -z "${organization_name}" ]; then
if ! organization_name=$(pulumi org get-default 2>/dev/null) || [ -z "${organization_name}" ]; then
echo "Unable to determine Pulumi organization name - ensure you are logged in"
exit 1
fi

pulumi stack select ${organization_name}/fund/production --create

if [[ "$BOOTSTRAP" == "true" ]]; then
echo "Importing existing resources into Pulumi state (if they exist)"

# Import GitHub Actions IAM role if it exists
pulumi import --yes aws:iam/role:Role github_actions_infrastructure_role fund-github-actions-infrastructure-role 2>/dev/null || true
echo "Importing existing resources into Pulumi state (if they exist)"

# Import GitHub Actions IAM policy if it exists (requires ARN lookup)
GITHUB_POLICY_ARN=$(aws iam list-policies --scope Local --query 'Policies[?PolicyName==`fund-github-actions-infrastructure-policy`].Arn' --output text 2>/dev/null || echo "")
if [ -n "$GITHUB_POLICY_ARN" ]; then
pulumi import --yes aws:iam/policy:Policy github_actions_infrastructure_policy "$GITHUB_POLICY_ARN" 2>/dev/null || true
fi
pulumi import --yes aws:iam/role:Role github_actions_infrastructure_role fund-github-actions-infrastructure-role 2>/dev/null || true

# Import SageMaker execution role if it exists
pulumi import --yes aws:iam/role:Role sagemaker_execution_role fund-sagemaker-execution-role 2>/dev/null || true
GITHUB_POLICY_ARN=$(aws iam list-policies --scope Local --query 'Policies[?PolicyName==`fund-github-actions-infrastructure-policy`].Arn' --output text 2>/dev/null || echo "")
if [ -n "$GITHUB_POLICY_ARN" ]; then
pulumi import --yes aws:iam/policy:Policy github_actions_infrastructure_policy "$GITHUB_POLICY_ARN" 2>/dev/null || true
fi

# Import SageMaker execution policy if it exists (requires ARN lookup)
SAGEMAKER_POLICY_ARN=$(aws iam list-policies --scope Local --query 'Policies[?PolicyName==`fund-sagemaker-execution-policy`].Arn' --output text 2>/dev/null || echo "")
if [ -n "$SAGEMAKER_POLICY_ARN" ]; then
pulumi import --yes aws:iam/policy:Policy sagemaker_execution_policy "$SAGEMAKER_POLICY_ARN" 2>/dev/null || true
fi
pulumi import --yes aws:iam/role:Role sagemaker_execution_role fund-sagemaker-execution-role 2>/dev/null || true

echo "Importing resources complete"
SAGEMAKER_POLICY_ARN=$(aws iam list-policies --scope Local --query 'Policies[?PolicyName==`fund-sagemaker-execution-policy`].Arn' --output text 2>/dev/null || echo "")
if [ -n "$SAGEMAKER_POLICY_ARN" ]; then
pulumi import --yes aws:iam/policy:Policy sagemaker_execution_policy "$SAGEMAKER_POLICY_ARN" 2>/dev/null || true
fi

echo "Importing resources complete"
Comment thread
forstmeier marked this conversation as resolved.

pulumi up --diff --yes

if [[ "$BOOTSTRAP" == "true" ]]; then
Expand All @@ -232,16 +224,19 @@ if [[ "$BOOTSTRAP" == "true" ]]; then

role_arn=$(pulumi stack output aws_iam_github_actions_infrastructure_role_arn --stack production)
region=$(pulumi config get aws:region --stack production)
artifacts_bucket=$(pulumi stack output aws_s3_model_artifacts_bucket --stack production)
artifacts_bucket=$(pulumi stack output aws_s3_model_artifacts_bucket_name --stack production)
data_bucket=$(pulumi stack output aws_s3_data_bucket_name --stack production)

gh secret set AWS_IAM_INFRASTRUCTURE_ROLE_ARN --env pulumi --body "$role_arn"
gh secret set AWS_REGION --env pulumi --body "$region"
gh secret set AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME --env pulumi --body "$artifacts_bucket"
gh secret set AWS_S3_DATA_BUCKET_NAME --env pulumi --body "$data_bucket"

echo "GitHub environment secrets updated successfully"
echo " - AWS_IAM_INFRASTRUCTURE_ROLE_ARN"
echo " - AWS_REGION"
echo " - AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME"
echo " - AWS_S3_DATA_BUCKET_NAME"
echo ""
echo "Note: PULUMI_ACCESS_TOKEN must be set manually"
echo "Generate token at: https://app.pulumi.com/account/tokens"
Expand Down Expand Up @@ -347,15 +342,17 @@ if [ -z "$base_url" ]; then
exit 1
fi

cd "${MASKFILE_DIR}"

case "$application_name" in
portfoliomanager)
full_url="${base_url}/portfolio"
echo "Creating portfolio: $full_url"

http_code=$(curl -X POST "$full_url" \
-H "Content-Type: application/json" \
-w "%{http_code}" \
-s -o /dev/stderr)
http_code=$(curl --request POST "$full_url" \
Comment thread
forstmeier marked this conversation as resolved.
--header "Content-Type: application/json" \
--write-out "%{http_code}" \
--output /dev/stderr)
Comment thread
forstmeier marked this conversation as resolved.

echo "HTTP Status: $http_code"

Expand All @@ -367,18 +364,11 @@ case "$application_name" in

datamanager)
if [ -n "${date_range:-}" ]; then
cd "${MASKFILE_DIR}"
uv run python tools/sync_equity_bars_data.py "$base_url" "$date_range"
else
current_date=$(date -u +"%Y-%m-%dT00:00:00Z")
full_url="${base_url}/equity-bars"
echo "Syncing equity bars: $full_url"

curl -X POST "$full_url" \
-H "Content-Type: application/json" \
-d "{\"date\": \"$current_date\"}" \
-w "\nHTTP Status: %{http_code}\n" \
-s
current_date=$(date -u +"%Y-%m-%d")
date_range_json="{\"start_date\": \"$current_date\", \"end_date\": \"$current_date\"}"
uv run python tools/sync_equity_bars_data.py "$base_url" "$date_range_json"
fi
;;

Expand Down Expand Up @@ -755,7 +745,15 @@ set -euo pipefail
echo "Syncing equity data: ${data_type}"

cd infrastructure
export AWS_S3_DATA_BUCKET="$(pulumi stack output aws_s3_data_bucket)"

if ! organization_name=$(pulumi org get-default 2>/dev/null) || [ -z "${organization_name}" ]; then
echo "Unable to determine Pulumi organization name - ensure you are logged in"
exit 1
fi

pulumi stack select ${organization_name}/fund/production

export AWS_S3_DATA_BUCKET_NAME="$(pulumi stack output aws_s3_data_bucket_name)"
Comment thread
forstmeier marked this conversation as resolved.

cd ../

Expand Down Expand Up @@ -791,8 +789,16 @@ set -euo pipefail
export APPLICATION_NAME="${application_name}"

cd infrastructure
export AWS_S3_DATA_BUCKET="$(pulumi stack output aws_s3_data_bucket)"
export AWS_S3_MODEL_ARTIFACTS_BUCKET="$(pulumi stack output aws_s3_model_artifacts_bucket)"

if ! organization_name=$(pulumi org get-default 2>/dev/null) || [ -z "${organization_name}" ]; then
echo "Unable to determine Pulumi organization name - ensure you are logged in"
exit 1
fi

pulumi stack select ${organization_name}/fund/production

export AWS_S3_DATA_BUCKET_NAME="$(pulumi stack output aws_s3_data_bucket_name)"
export AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME="$(pulumi stack output aws_s3_model_artifacts_bucket_name)"
export LOOKBACK_DAYS="${LOOKBACK_DAYS:-365}"

cd ../
Expand Down Expand Up @@ -859,11 +865,19 @@ esac
export SAGEMAKER_INSTANCE_TYPE="${instance_type}"

cd infrastructure

if ! organization_name=$(pulumi org get-default 2>/dev/null) || [ -z "${organization_name}" ]; then
echo "Unable to determine Pulumi organization name - ensure you are logged in"
exit 1
fi

pulumi stack select ${organization_name}/fund/production

export AWS_ECR_EQUITY_PRICE_MODEL_TRAINER_IMAGE_ARN="$(pulumi stack output aws_ecr_equitypricemodel_trainer_image)"
export AWS_IAM_SAGEMAKER_ROLE_ARN="$(pulumi stack output aws_iam_sagemaker_role_arn)"
export AWS_S3_MODEL_ARTIFACTS_BUCKET="$(pulumi stack output aws_s3_model_artifacts_bucket)"
export AWS_S3_EQUITY_PRICE_MODEL_ARTIFACT_OUTPUT_PATH="s3://${AWS_S3_MODEL_ARTIFACTS_BUCKET}/artifacts"
export AWS_S3_EQUITY_PRICE_MODEL_TRAINING_DATA_PATH="s3://${AWS_S3_MODEL_ARTIFACTS_BUCKET}/training"
export AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME="$(pulumi stack output aws_s3_model_artifacts_bucket_name)"
export AWS_S3_EQUITY_PRICE_MODEL_ARTIFACT_OUTPUT_PATH="s3://${AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME}/artifacts"
export AWS_S3_EQUITY_PRICE_MODEL_TRAINING_DATA_PATH="s3://${AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME}/training"

cd ../

Expand Down
8 changes: 4 additions & 4 deletions tools/prepare_training_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,16 +249,16 @@ def prepare_training_data(


if __name__ == "__main__":
data_bucket: str | None = os.getenv("AWS_S3_DATA_BUCKET")
model_artifacts_bucket: str | None = os.getenv("AWS_S3_MODEL_ARTIFACTS_BUCKET")
data_bucket: str | None = os.getenv("AWS_S3_DATA_BUCKET_NAME")
model_artifacts_bucket: str | None = os.getenv("AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME")
lookback_days = int(os.getenv("LOOKBACK_DAYS", "365"))

if data_bucket is None:
logger.error("AWS_S3_DATA_BUCKET environment variable not set")
logger.error("AWS_S3_DATA_BUCKET_NAME environment variable not set")
sys.exit(1)

if model_artifacts_bucket is None:
logger.error("AWS_S3_MODEL_ARTIFACTS_BUCKET environment variable not set")
logger.error("AWS_S3_MODEL_ARTIFACTS_BUCKET_NAME environment variable not set")
sys.exit(1)

end_date = datetime.now(tz=UTC).replace(hour=0, minute=0, second=0, microsecond=0)
Expand Down
9 changes: 2 additions & 7 deletions tools/sync_equity_categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

MASSIVE_BASE_URL = os.getenv("MASSIVE_BASE_URL", "https://api.massive.io")

# Massive ticker types: CS (Common Stock), ADRC/ADRP/ADRS (ADR variants)
EQUITY_TYPES = {"CS", "ADRC", "ADRP", "ADRS"}


Expand Down Expand Up @@ -73,18 +72,14 @@ def extract_categories(tickers: list[dict]) -> pl.DataFrame:
rows = []
for ticker_data in tickers:
ticker = ticker_data.get("ticker", "")
# Skip entries with empty or missing ticker values
if not ticker:
continue
# Filter for Common Stock and all ADR types
if ticker_data.get("type") not in EQUITY_TYPES:
continue

# Try to get sector/industry from various fields Massive provides
sector = ticker_data.get("sector", "")
industry = ticker_data.get("industry", "")

# Some tickers may not have sector/industry
if not sector:
sector = "NOT AVAILABLE"
if not industry:
Expand Down Expand Up @@ -150,14 +145,14 @@ def sync_equity_categories(

if __name__ == "__main__":
api_key: str | None = os.getenv("MASSIVE_API_KEY")
bucket_name: str | None = os.getenv("AWS_S3_DATA_BUCKET")
bucket_name: str | None = os.getenv("AWS_S3_DATA_BUCKET_NAME")

if api_key is None:
logger.error("MASSIVE_API_KEY environment variable not set")
sys.exit(1)

if bucket_name is None:
logger.error("AWS_S3_DATA_BUCKET environment variable not set")
logger.error("AWS_S3_DATA_BUCKET_NAME environment variable not set")
sys.exit(1)

try:
Expand Down