Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions docs/miner.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ This daily calculation and evaluation framework closely aligns with real-world f
Annualization is used for the Sharpe ratio, Sortino ratio, and risk adjusted return with either volatility or returns being annualized to better evaluate the long-term value of strategies and standardize our metrics. Volatility is the standard deviation of returns and is a key factor in the Sharpe and Sortino calculations.

In determining the correct annualization factor, we weigh more recent trading days slightly higher than older trading days. This should encourage miners to regularly update their strategies and adapt to changing market conditions, continually providing the network with the most relevant signals. The most recent 10 days account for 25% of the total score, the most recent 30 days account for 50%, and the most recent 70 days account for 75%, with a pattern that tapers exponentially over time.
The average daily PnL metric has a more aggressive recency weighting to encourage frequent trading activity. The first 10 days has 40% of the total score, the first 30 days account for 70%, and the first 70 days account for 87% also with weight that tapers exponentially over time.

Additionally, normalization with annual risk-free rate of T-bills further standardizes our metrics and allows us to measure miner performance on a more consistent basis.

Expand Down Expand Up @@ -115,12 +116,12 @@ $$

| Metric | Scoring Weight |
|------------------------|----------------|
| Average Daily PnL | 50% |
| Calmar Ratio | 10% |
| Sharpe Ratio | 10% |
| Omega Ratio | 10% |
| Sortino Ratio | 10% |
| Statistical Confidence | 10% |
| Average Daily PnL | 90% |
| Calmar Ratio | 2% |
| Sharpe Ratio | 2% |
| Omega Ratio | 2% |
| Sortino Ratio | 2% |
| Statistical Confidence | 2% |

### Scoring Penalties

Expand Down
2 changes: 1 addition & 1 deletion meta/meta.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"subnet_version": "6.8.6"
"subnet_version": "6.8.7"
}
44 changes: 41 additions & 3 deletions tests/vali_tests/test_asset_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,16 +441,54 @@ def test_days_in_year_matches_subcategory_asset_class(self):
def test_days_in_year_invalid_category_raises_error(self):
"""Test that days_in_year_from_asset_category raises error for invalid category"""
segmentation_machine = AssetSegmentation(self.test_ledgers)

# Create a mock invalid category that doesn't exist in ASSET_CLASS_BREAKDOWN
from unittest.mock import Mock
invalid_category = Mock()
invalid_category.name = "INVALID_CATEGORY"

with self.assertRaises(ValueError) as context:
segmentation_machine.days_in_year_from_asset_category(invalid_category)

self.assertIn("Days in year must be positive", str(context.exception))

def test_aggregate_pnl_single_subcategory(self):
"""Test that PnL is correctly aggregated for a single subcategory"""
default_ledger = PerfLedger()

# Create checkpoints with explicit PnL values for crypto_majors
checkpoints_btc = [
checkpoint_generator(last_update_ms=1000, gain=0.05, loss=-0.02, pnl_gain=100.0, pnl_loss=-20.0, n_updates=1),
checkpoint_generator(last_update_ms=2000, gain=0.03, loss=-0.01, pnl_gain=50.0, pnl_loss=-10.0, n_updates=1)
]
checkpoints_eth = [
checkpoint_generator(last_update_ms=1000, gain=0.04, loss=-0.015, pnl_gain=80.0, pnl_loss=-15.0, n_updates=1),
checkpoint_generator(last_update_ms=2000, gain=0.02, loss=-0.005, pnl_gain=40.0, pnl_loss=-5.0, n_updates=1)
]

btc_ledger = ledger_generator(checkpoints=checkpoints_btc)
eth_ledger = ledger_generator(checkpoints=checkpoints_eth)

sub_ledgers = {
"BTCUSD": btc_ledger,
"ETHUSD": eth_ledger
}

result = AssetSegmentation.aggregate_miner_subledgers(default_ledger, sub_ledgers)

# Verify aggregation at timestamp 1000
cp_1000 = next(cp for cp in result.cps if cp.last_update_ms == 1000)
self.assertEqual(cp_1000.pnl_gain, 180.0) # 100 + 80
self.assertEqual(cp_1000.pnl_loss, -35.0) # -20 + -15
self.assertEqual(cp_1000.gain, 0.09) # 0.05 + 0.04
self.assertEqual(cp_1000.loss, -0.035) # -0.02 + -0.015

# Verify aggregation at timestamp 2000
cp_2000 = next(cp for cp in result.cps if cp.last_update_ms == 2000)
self.assertEqual(cp_2000.pnl_gain, 90.0) # 50 + 40
self.assertEqual(cp_2000.pnl_loss, -15.0) # -10 + -5
self.assertEqual(cp_2000.gain, 0.05) # 0.03 + 0.02
self.assertEqual(cp_2000.loss, -0.015) # -0.01 + -0.005

if __name__ == '__main__':
unittest.main()
23 changes: 20 additions & 3 deletions tests/vali_tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,14 +661,31 @@ def test_pnl_score_time_weighted_large_dataset(self):
# First 20 days: positive, last 10 days: negative
pnl_pattern = [50.0] * 20 + [-100.0] * 10
ledger = create_daily_checkpoints_with_pnl(pnl_pattern)

weighted_score = Metrics.pnl_score([], ledger, weighting=True)
unweighted_score = Metrics.pnl_score([], ledger, weighting=False)

# Unweighted: (20*50 + 10*(-100)) / 30 = 0/30 = 0
expected_unweighted = sum(pnl_pattern) / len(pnl_pattern)
self.assertAlmostEqual(unweighted_score, expected_unweighted, places=1)

# Time weighting should make the score more negative due to recent losses
self.assertLess(weighted_score, unweighted_score,
"Recent negative values should dominate with time weighting")

def test_pnl_score_70_percent_weight_in_first_30_days(self):
"""Test that roughly 70% of the weight for PnL metric is in the most recent 30 days"""
# Create a 100-day dataset where:
# - First 70 days have PnL = 0
# - Last 30 days have PnL = 100
# If ~70% of weight is in the last 30 days, weighted average should be ~70
pnl_pattern = [0.0] * 90 + [100.0] * 30
ledger = create_daily_checkpoints_with_pnl(pnl_pattern)

weighted_score = Metrics.pnl_score([], ledger, weighting=True)

# With 70% weight on recent 30 days (value=100) and 30% on older 70 days (value=0):
# Expected weighted score ≈ 70
# Using a tolerance of ±5 to account for the decay distribution
self.assertAlmostEqual(weighted_score, 70.0, delta=1.0,
msg=f"Expected weighted score ~70 if 70% weight in recent 30 days, got {weighted_score:.2f}")
2 changes: 2 additions & 0 deletions vali_objects/utils/asset_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def aggregate_miner_subledgers(
existing_checkpoint.loss += checkpoint.loss
existing_checkpoint.spread_fee_loss += checkpoint.spread_fee_loss
existing_checkpoint.carry_fee_loss += checkpoint.carry_fee_loss
existing_checkpoint.pnl_gain += checkpoint.pnl_gain
existing_checkpoint.pnl_loss += checkpoint.pnl_loss

aggregated_dict_ledger[checkpoint.last_update_ms] = existing_checkpoint

Expand Down
18 changes: 11 additions & 7 deletions vali_objects/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ def weighted_log_returns(log_returns: list[float]) -> list[float]:
return list(weighted_returns)

@staticmethod
def weighting_distribution(log_returns: Union[list[float], np.ndarray]) -> np.ndarray:
def weighting_distribution(log_returns: Union[list[float], np.ndarray],
max_weight: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_MAX,
min_weight: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_MIN,
decay_rate: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_RATE) -> np.ndarray:
"""
Returns the weighting distribution that decays from max_weight to min_weight
using the configured decay rate
"""
max_weight = ValiConfig.WEIGHTED_AVERAGE_DECAY_MAX
min_weight = ValiConfig.WEIGHTED_AVERAGE_DECAY_MIN
decay_rate = ValiConfig.WEIGHTED_AVERAGE_DECAY_RATE

if len(log_returns) < 1:
return np.ones(0)
Expand All @@ -48,14 +48,18 @@ def weighting_distribution(log_returns: Union[list[float], np.ndarray]) -> np.nd
return decay_values[::-1][-len(log_returns):]

@staticmethod
def average(log_returns: Union[list[float], np.ndarray], weighting=False, indices: Union[list[int], None] = None) -> float:
def average(log_returns: Union[list[float], np.ndarray], weighting=False,
indices: Union[list[int], None] = None,
max_weight: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_MAX,
min_weight: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_MIN,
decay_rate: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_RATE) -> float:
"""
Returns the mean of the log returns
"""
if len(log_returns) == 0:
return 0.0

weighting_distribution = Metrics.weighting_distribution(log_returns)
weighting_distribution = Metrics.weighting_distribution(log_returns, max_weight=max_weight, min_weight=min_weight, decay_rate=decay_rate)

if indices is not None and len(indices) != 0:
indices = [i for i in indices if i in range(len(log_returns))]
Expand Down Expand Up @@ -432,5 +436,5 @@ def pnl_score(log_returns: list[float], ledger: PerfLedger, bypass_confidence: b
return ValiConfig.PNL_NOCONFIDENCE_VALUE

# Apply time weighting if requested
return Metrics.average(daily_pnl_values, weighting=weighting)
return Metrics.average(daily_pnl_values, weighting=weighting, min_weight=ValiConfig.WEIGHTED_AVERAGE_DECAY_MIN_PNL)

14 changes: 10 additions & 4 deletions vali_objects/utils/price_slippage_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import math
from collections import defaultdict
from zoneinfo import ZoneInfo

import holidays
import numpy as np
Expand All @@ -22,6 +23,7 @@ class PriceSlippageModel:
slippage_estimates: dict = {}
live_price_fetcher: LivePriceFetcher = None
holidays_nyse = None
eastern_tz = ZoneInfo("America/New_York")
is_backtesting = False
fetch_slippage_data = False
recalculate_slippage = False
Expand Down Expand Up @@ -111,14 +113,18 @@ def calc_slippage_equities(cls, bid:float, ask:float, order:Order) -> float:
@classmethod
def calc_slippage_forex(cls, bid:float, ask:float, order:Order) -> float:
"""
Using the direct BB+ model as a stand-in for forex
V2: 10 bps slippage daily from 5-6 pm EST, and 5 bps slippage otherwise

V1: Using the direct BB+ model as a stand-in for forex
slippage percentage = 0.433 * spread/mid_price + 0.335 * sqrt(annualized_volatility**2 / 3 / 250) * sqrt(volume / (0.3 * estimated daily volume))
"""
if order.processed_ms > SLIPPAGE_V2_TIME_MS:
if order.trade_pair.subcategory == ForexSubcategory.G1:
return 0.001 # 10 bps
order_datetime = TimeUtil.millis_to_datetime(order.processed_ms).astimezone(cls.eastern_tz)
hour = order_datetime.hour
if 17 <= hour < 18: # Daily 5-6 pm EST. Higher slippage during market open/closing hours
return 0.001 # 10 bps
else:
return 0.0015 # 15 bps
return 0.0005 # 5 bps

order_date = TimeUtil.millis_to_short_date_str(order.processed_ms)
annualized_volatility = cls.features[order_date]["vol"][order.trade_pair.trade_pair_id]
Expand Down
16 changes: 10 additions & 6 deletions vali_objects/vali_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ class ValiConfig:
WEIGHTED_AVERAGE_DECAY_RATE = 0.075
WEIGHTED_AVERAGE_DECAY_MIN = 0.15
WEIGHTED_AVERAGE_DECAY_MAX = 1.0

# Decay min specific for daily average PnL calculations
WEIGHTED_AVERAGE_DECAY_MIN_PNL = 0.045 # Results in most recent 30 days having 70% weight

POSITIONAL_EQUIVALENCE_WINDOW_MS = 1000 * 60 * 60 * 24 # 1 day

SET_WEIGHT_REFRESH_TIME_MS = 60 * 5 * 1000 # 5 minutes
Expand Down Expand Up @@ -234,13 +238,13 @@ class ValiConfig:
SHORT_LOOKBACK_WINDOW = 7 * DAILY_CHECKPOINTS

# Scoring weights
SCORING_OMEGA_WEIGHT = 0.1
SCORING_SHARPE_WEIGHT = 0.1
SCORING_SORTINO_WEIGHT = 0.1
SCORING_STATISTICAL_CONFIDENCE_WEIGHT = 0.1
SCORING_CALMAR_WEIGHT = 0.1
SCORING_OMEGA_WEIGHT = 0.02
SCORING_SHARPE_WEIGHT = 0.02
SCORING_SORTINO_WEIGHT = 0.02
SCORING_STATISTICAL_CONFIDENCE_WEIGHT = 0.02
SCORING_CALMAR_WEIGHT = 0.02
SCORING_RETURN_WEIGHT = 0.0
SCORING_PNL_WEIGHT = 0.5
SCORING_PNL_WEIGHT = 0.9

# Scoring hyperparameters
OMEGA_LOSS_MINIMUM = 0.01 # Equivalent to 1% loss
Expand Down
Loading