Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion applications/data_manager/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@ RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder

ENV CARGO_BUILD_JOBS=4
# Limit parallel jobs to avoid OOM on memory-constrained builder nodes
ENV CARGO_BUILD_JOBS=1
Comment thread
forstmeier marked this conversation as resolved.

# opt-level=1 ensures cargo-chef cook and cargo build use the same profile,
# preventing cache invalidation; acceptable trade-off for a data manager service
ENV CARGO_PROFILE_RELEASE_OPT_LEVEL=1
Comment thread
forstmeier marked this conversation as resolved.

COPY --from=planner /app/recipe.json recipe.json

Expand Down
11 changes: 8 additions & 3 deletions infrastructure/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,21 +239,21 @@

data_manager_log_group = aws.cloudwatch.LogGroup(
"data_manager_logs",
name="/ecs/fund/data-manager-server",
name="/ecs/fund/applications-data-manager-server",
retention_in_days=7,
tags=tags,
)

portfolio_manager_log_group = aws.cloudwatch.LogGroup(
"portfolio_manager_logs",
name="/ecs/fund/portfolio-manager-server",
name="/ecs/fund/applications-portfolio-manager-server",
retention_in_days=7,
tags=tags,
)

ensemble_manager_log_group = aws.cloudwatch.LogGroup(
"ensemble_manager_logs",
name="/ecs/fund/ensemble-manager-server",
name="/ecs/fund/applications-ensemble-manager-server",
retention_in_days=7,
tags=tags,
)
Expand Down Expand Up @@ -440,6 +440,10 @@
"name": "DISABLE_DISK_CACHE",
"value": "1",
},
{
"name": "AWS_S3_MODEL_ARTIFACT_PATH",
"value": "artifacts/tide/",
},
],
"secrets": [
{
Expand Down Expand Up @@ -559,6 +563,7 @@
task_definition=ensemble_manager_task_definition.arn,
desired_count=1,
launch_type="FARGATE",
health_check_grace_period_seconds=180,
network_configuration=aws.ecs.ServiceNetworkConfigurationArgs(
subnets=[private_subnet_1.id, private_subnet_2.id],
security_groups=[ecs_security_group.id],
Expand Down
58 changes: 41 additions & 17 deletions infrastructure/notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,34 @@
endpoint=notification_email_address,
)

infrastructure_alerts_topic_policy = aws.sns.TopicPolicy(
"infrastructure_alerts_topic_policy",
arn=infrastructure_alerts_topic.arn,
policy=aws.iam.get_policy_document_output(
statements=[
aws.iam.GetPolicyDocumentStatementArgs(
sid="AWSCostAnomalyDetectionSNSPublishingPermissions",
effect="Allow",
actions=["SNS:Publish"],
principals=[
aws.iam.GetPolicyDocumentStatementPrincipalArgs(
type="Service",
identifiers=["costalerts.amazonaws.com"],
)
],
resources=[infrastructure_alerts_topic.arn],
conditions=[
aws.iam.GetPolicyDocumentStatementConditionArgs(
test="StringEquals",
variable="aws:SourceAccount",
values=[account_id],
)
],
)
]
).json,
)

cost_anomaly_monitor = aws.costexplorer.AnomalyMonitor(
"cost_anomaly_monitor",
name="fund-cost-anomaly-monitor",
Expand All @@ -47,24 +75,20 @@
name="fund-cost-anomaly-subscription",
monitor_arn_lists=[cost_anomaly_monitor.arn],
frequency="IMMEDIATE",
threshold_expression=json.dumps(
{
"Dimensions": {
"Key": "ANOMALY_TOTAL_IMPACT_ABSOLUTE",
"Values": ["25"],
"MatchOptions": ["GREATER_THAN_OR_EQUAL"],
}
}
),
subscribers=pulumi.Output.from_input(budget_alert_email_addresses).apply(
lambda emails: [
aws.costexplorer.AnomalySubscriptionSubscriberArgs(
address=email,
type="EMAIL",
)
for email in emails
]
threshold_expression=aws.costexplorer.AnomalySubscriptionThresholdExpressionArgs(
dimension=aws.costexplorer.AnomalySubscriptionThresholdExpressionDimensionArgs(
key="ANOMALY_TOTAL_IMPACT_ABSOLUTE",
values=["25"],
match_options=["GREATER_THAN_OR_EQUAL"],
)
Comment thread
forstmeier marked this conversation as resolved.
),
subscribers=[
aws.costexplorer.AnomalySubscriptionSubscriberArgs(
address=infrastructure_alerts_topic.arn,
type="SNS",
)
],
Comment thread
forstmeier marked this conversation as resolved.
Comment thread
forstmeier marked this conversation as resolved.
opts=pulumi.ResourceOptions(depends_on=[infrastructure_alerts_topic_policy]),
tags=tags,
)

Expand Down
2 changes: 1 addition & 1 deletion infrastructure/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description = "Infrastructure management with Pulumi"
requires-python = "==3.12.10"
dependencies = [
"pulumi>=3.189.0",
"pulumi-aws>=7.4.0",
"pulumi-aws>=7.7.0",
"pulumi-command>=1.1.0",
"pulumi-docker>=4.10.0",
"pulumi-tls>=5.2.1",
Expand Down
21 changes: 13 additions & 8 deletions infrastructure/training.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import base64
import json

import pulumi
import pulumi_aws as aws
from config import tags
from config import region, tags
from iam import execution_role, task_role
from networking import ecs_security_group, private_subnet_1, private_subnet_2
from storage import tide_runner_image_uri
Expand Down Expand Up @@ -157,31 +158,35 @@

tide_trainer_task_definition = aws.ecs.TaskDefinition(
"tide_trainer_task_definition",
family="fund-tide-trainer",
family="tide-runner",
requires_compatibilities=["EC2"],
network_mode="awsvpc",
cpu="4096",
memory="14336",
execution_role_arn=execution_role.arn,
task_role_arn=task_role.arn,
container_definitions=tide_runner_image_uri.apply(
lambda image_uri: json.dumps(
container_definitions=pulumi.Output.all(
models_log_group.name,
tide_runner_image_uri,
).apply(
lambda args: json.dumps(
[
{
"name": "prefect",
"image": image_uri,
"image": args[1],
"essential": True,
"resourceRequirements": [{"type": "GPU", "value": "1"}],
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/fund/models",
"awslogs-region": "us-east-1",
"awslogs-group": args[0],
"awslogs-region": region,
"awslogs-stream-prefix": "tide",
Comment thread
forstmeier marked this conversation as resolved.
},
},
}
]
],
sort_keys=True,
)
),
tags=tags,
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading