diff --git a/docs/miner.md b/docs/miner.md index 11bb16647..7fe1860e7 100644 --- a/docs/miner.md +++ b/docs/miner.md @@ -72,6 +72,7 @@ This daily calculation and evaluation framework closely aligns with real-world f Annualization is used for the Sharpe ratio, Sortino ratio, and risk adjusted return with either volatility or returns being annualized to better evaluate the long-term value of strategies and standardize our metrics. Volatility is the standard deviation of returns and is a key factor in the Sharpe and Sortino calculations. In determining the correct annualization factor, we weigh more recent trading days slightly higher than older trading days. This should encourage miners to regularly update their strategies and adapt to changing market conditions, continually providing the network with the most relevant signals. The most recent 10 days account for 25% of the total score, the most recent 30 days account for 50%, and the most recent 70 days account for 75%, with a pattern that tapers exponentially over time. +The average daily PnL metric has a more aggressive recency weighting to encourage frequent trading activity. The first 10 days has 40% of the total score, the first 30 days account for 70%, and the first 70 days account for 87% also with weight that tapers exponentially over time. Additionally, normalization with annual risk-free rate of T-bills further standardizes our metrics and allows us to measure miner performance on a more consistent basis. @@ -115,12 +116,12 @@ $$ | Metric | Scoring Weight | |------------------------|----------------| -| Average Daily PnL | 50% | -| Calmar Ratio | 10% | -| Sharpe Ratio | 10% | -| Omega Ratio | 10% | -| Sortino Ratio | 10% | -| Statistical Confidence | 10% | +| Average Daily PnL | 90% | +| Calmar Ratio | 2% | +| Sharpe Ratio | 2% | +| Omega Ratio | 2% | +| Sortino Ratio | 2% | +| Statistical Confidence | 2% | ### Scoring Penalties diff --git a/meta/meta.json b/meta/meta.json index f747dc795..b2740f7a3 100644 --- a/meta/meta.json +++ b/meta/meta.json @@ -1,3 +1,3 @@ { - "subnet_version": "6.8.6" + "subnet_version": "6.8.7" } diff --git a/tests/vali_tests/test_asset_segmentation.py b/tests/vali_tests/test_asset_segmentation.py index a24f4cfa0..f92e77424 100644 --- a/tests/vali_tests/test_asset_segmentation.py +++ b/tests/vali_tests/test_asset_segmentation.py @@ -441,16 +441,54 @@ def test_days_in_year_matches_subcategory_asset_class(self): def test_days_in_year_invalid_category_raises_error(self): """Test that days_in_year_from_asset_category raises error for invalid category""" segmentation_machine = AssetSegmentation(self.test_ledgers) - + # Create a mock invalid category that doesn't exist in ASSET_CLASS_BREAKDOWN from unittest.mock import Mock invalid_category = Mock() invalid_category.name = "INVALID_CATEGORY" - + with self.assertRaises(ValueError) as context: segmentation_machine.days_in_year_from_asset_category(invalid_category) - + self.assertIn("Days in year must be positive", str(context.exception)) + def test_aggregate_pnl_single_subcategory(self): + """Test that PnL is correctly aggregated for a single subcategory""" + default_ledger = PerfLedger() + + # Create checkpoints with explicit PnL values for crypto_majors + checkpoints_btc = [ + checkpoint_generator(last_update_ms=1000, gain=0.05, loss=-0.02, pnl_gain=100.0, pnl_loss=-20.0, n_updates=1), + checkpoint_generator(last_update_ms=2000, gain=0.03, loss=-0.01, pnl_gain=50.0, pnl_loss=-10.0, n_updates=1) + ] + checkpoints_eth = [ + checkpoint_generator(last_update_ms=1000, gain=0.04, loss=-0.015, pnl_gain=80.0, pnl_loss=-15.0, n_updates=1), + checkpoint_generator(last_update_ms=2000, gain=0.02, loss=-0.005, pnl_gain=40.0, pnl_loss=-5.0, n_updates=1) + ] + + btc_ledger = ledger_generator(checkpoints=checkpoints_btc) + eth_ledger = ledger_generator(checkpoints=checkpoints_eth) + + sub_ledgers = { + "BTCUSD": btc_ledger, + "ETHUSD": eth_ledger + } + + result = AssetSegmentation.aggregate_miner_subledgers(default_ledger, sub_ledgers) + + # Verify aggregation at timestamp 1000 + cp_1000 = next(cp for cp in result.cps if cp.last_update_ms == 1000) + self.assertEqual(cp_1000.pnl_gain, 180.0) # 100 + 80 + self.assertEqual(cp_1000.pnl_loss, -35.0) # -20 + -15 + self.assertEqual(cp_1000.gain, 0.09) # 0.05 + 0.04 + self.assertEqual(cp_1000.loss, -0.035) # -0.02 + -0.015 + + # Verify aggregation at timestamp 2000 + cp_2000 = next(cp for cp in result.cps if cp.last_update_ms == 2000) + self.assertEqual(cp_2000.pnl_gain, 90.0) # 50 + 40 + self.assertEqual(cp_2000.pnl_loss, -15.0) # -10 + -5 + self.assertEqual(cp_2000.gain, 0.05) # 0.03 + 0.02 + self.assertEqual(cp_2000.loss, -0.015) # -0.01 + -0.005 + if __name__ == '__main__': unittest.main() diff --git a/tests/vali_tests/test_metrics.py b/tests/vali_tests/test_metrics.py index 8dae0d9b6..7a55048e3 100644 --- a/tests/vali_tests/test_metrics.py +++ b/tests/vali_tests/test_metrics.py @@ -661,14 +661,31 @@ def test_pnl_score_time_weighted_large_dataset(self): # First 20 days: positive, last 10 days: negative pnl_pattern = [50.0] * 20 + [-100.0] * 10 ledger = create_daily_checkpoints_with_pnl(pnl_pattern) - + weighted_score = Metrics.pnl_score([], ledger, weighting=True) unweighted_score = Metrics.pnl_score([], ledger, weighting=False) - + # Unweighted: (20*50 + 10*(-100)) / 30 = 0/30 = 0 expected_unweighted = sum(pnl_pattern) / len(pnl_pattern) self.assertAlmostEqual(unweighted_score, expected_unweighted, places=1) - + # Time weighting should make the score more negative due to recent losses self.assertLess(weighted_score, unweighted_score, "Recent negative values should dominate with time weighting") + + def test_pnl_score_70_percent_weight_in_first_30_days(self): + """Test that roughly 70% of the weight for PnL metric is in the most recent 30 days""" + # Create a 100-day dataset where: + # - First 70 days have PnL = 0 + # - Last 30 days have PnL = 100 + # If ~70% of weight is in the last 30 days, weighted average should be ~70 + pnl_pattern = [0.0] * 90 + [100.0] * 30 + ledger = create_daily_checkpoints_with_pnl(pnl_pattern) + + weighted_score = Metrics.pnl_score([], ledger, weighting=True) + + # With 70% weight on recent 30 days (value=100) and 30% on older 70 days (value=0): + # Expected weighted score ≈ 70 + # Using a tolerance of ±5 to account for the decay distribution + self.assertAlmostEqual(weighted_score, 70.0, delta=1.0, + msg=f"Expected weighted score ~70 if 70% weight in recent 30 days, got {weighted_score:.2f}") diff --git a/vali_objects/utils/asset_segmentation.py b/vali_objects/utils/asset_segmentation.py index ec15036a9..ed796a2e5 100644 --- a/vali_objects/utils/asset_segmentation.py +++ b/vali_objects/utils/asset_segmentation.py @@ -110,6 +110,8 @@ def aggregate_miner_subledgers( existing_checkpoint.loss += checkpoint.loss existing_checkpoint.spread_fee_loss += checkpoint.spread_fee_loss existing_checkpoint.carry_fee_loss += checkpoint.carry_fee_loss + existing_checkpoint.pnl_gain += checkpoint.pnl_gain + existing_checkpoint.pnl_loss += checkpoint.pnl_loss aggregated_dict_ledger[checkpoint.last_update_ms] = existing_checkpoint diff --git a/vali_objects/utils/metrics.py b/vali_objects/utils/metrics.py index b664cb455..0b9e1f8a9 100644 --- a/vali_objects/utils/metrics.py +++ b/vali_objects/utils/metrics.py @@ -27,14 +27,14 @@ def weighted_log_returns(log_returns: list[float]) -> list[float]: return list(weighted_returns) @staticmethod - def weighting_distribution(log_returns: Union[list[float], np.ndarray]) -> np.ndarray: + def weighting_distribution(log_returns: Union[list[float], np.ndarray], + max_weight: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_MAX, + min_weight: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_MIN, + decay_rate: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_RATE) -> np.ndarray: """ Returns the weighting distribution that decays from max_weight to min_weight using the configured decay rate """ - max_weight = ValiConfig.WEIGHTED_AVERAGE_DECAY_MAX - min_weight = ValiConfig.WEIGHTED_AVERAGE_DECAY_MIN - decay_rate = ValiConfig.WEIGHTED_AVERAGE_DECAY_RATE if len(log_returns) < 1: return np.ones(0) @@ -48,14 +48,18 @@ def weighting_distribution(log_returns: Union[list[float], np.ndarray]) -> np.nd return decay_values[::-1][-len(log_returns):] @staticmethod - def average(log_returns: Union[list[float], np.ndarray], weighting=False, indices: Union[list[int], None] = None) -> float: + def average(log_returns: Union[list[float], np.ndarray], weighting=False, + indices: Union[list[int], None] = None, + max_weight: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_MAX, + min_weight: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_MIN, + decay_rate: float = ValiConfig.WEIGHTED_AVERAGE_DECAY_RATE) -> float: """ Returns the mean of the log returns """ if len(log_returns) == 0: return 0.0 - weighting_distribution = Metrics.weighting_distribution(log_returns) + weighting_distribution = Metrics.weighting_distribution(log_returns, max_weight=max_weight, min_weight=min_weight, decay_rate=decay_rate) if indices is not None and len(indices) != 0: indices = [i for i in indices if i in range(len(log_returns))] @@ -432,5 +436,5 @@ def pnl_score(log_returns: list[float], ledger: PerfLedger, bypass_confidence: b return ValiConfig.PNL_NOCONFIDENCE_VALUE # Apply time weighting if requested - return Metrics.average(daily_pnl_values, weighting=weighting) + return Metrics.average(daily_pnl_values, weighting=weighting, min_weight=ValiConfig.WEIGHTED_AVERAGE_DECAY_MIN_PNL) diff --git a/vali_objects/utils/price_slippage_model.py b/vali_objects/utils/price_slippage_model.py index ae15bce90..70812d8d4 100644 --- a/vali_objects/utils/price_slippage_model.py +++ b/vali_objects/utils/price_slippage_model.py @@ -1,5 +1,6 @@ import math from collections import defaultdict +from zoneinfo import ZoneInfo import holidays import numpy as np @@ -22,6 +23,7 @@ class PriceSlippageModel: slippage_estimates: dict = {} live_price_fetcher: LivePriceFetcher = None holidays_nyse = None + eastern_tz = ZoneInfo("America/New_York") is_backtesting = False fetch_slippage_data = False recalculate_slippage = False @@ -111,14 +113,18 @@ def calc_slippage_equities(cls, bid:float, ask:float, order:Order) -> float: @classmethod def calc_slippage_forex(cls, bid:float, ask:float, order:Order) -> float: """ - Using the direct BB+ model as a stand-in for forex + V2: 10 bps slippage daily from 5-6 pm EST, and 5 bps slippage otherwise + + V1: Using the direct BB+ model as a stand-in for forex slippage percentage = 0.433 * spread/mid_price + 0.335 * sqrt(annualized_volatility**2 / 3 / 250) * sqrt(volume / (0.3 * estimated daily volume)) """ if order.processed_ms > SLIPPAGE_V2_TIME_MS: - if order.trade_pair.subcategory == ForexSubcategory.G1: - return 0.001 # 10 bps + order_datetime = TimeUtil.millis_to_datetime(order.processed_ms).astimezone(cls.eastern_tz) + hour = order_datetime.hour + if 17 <= hour < 18: # Daily 5-6 pm EST. Higher slippage during market open/closing hours + return 0.001 # 10 bps else: - return 0.0015 # 15 bps + return 0.0005 # 5 bps order_date = TimeUtil.millis_to_short_date_str(order.processed_ms) annualized_volatility = cls.features[order_date]["vol"][order.trade_pair.trade_pair_id] diff --git a/vali_objects/vali_config.py b/vali_objects/vali_config.py index cf9fbc052..62f4166ba 100644 --- a/vali_objects/vali_config.py +++ b/vali_objects/vali_config.py @@ -169,6 +169,10 @@ class ValiConfig: WEIGHTED_AVERAGE_DECAY_RATE = 0.075 WEIGHTED_AVERAGE_DECAY_MIN = 0.15 WEIGHTED_AVERAGE_DECAY_MAX = 1.0 + + # Decay min specific for daily average PnL calculations + WEIGHTED_AVERAGE_DECAY_MIN_PNL = 0.045 # Results in most recent 30 days having 70% weight + POSITIONAL_EQUIVALENCE_WINDOW_MS = 1000 * 60 * 60 * 24 # 1 day SET_WEIGHT_REFRESH_TIME_MS = 60 * 5 * 1000 # 5 minutes @@ -234,13 +238,13 @@ class ValiConfig: SHORT_LOOKBACK_WINDOW = 7 * DAILY_CHECKPOINTS # Scoring weights - SCORING_OMEGA_WEIGHT = 0.1 - SCORING_SHARPE_WEIGHT = 0.1 - SCORING_SORTINO_WEIGHT = 0.1 - SCORING_STATISTICAL_CONFIDENCE_WEIGHT = 0.1 - SCORING_CALMAR_WEIGHT = 0.1 + SCORING_OMEGA_WEIGHT = 0.02 + SCORING_SHARPE_WEIGHT = 0.02 + SCORING_SORTINO_WEIGHT = 0.02 + SCORING_STATISTICAL_CONFIDENCE_WEIGHT = 0.02 + SCORING_CALMAR_WEIGHT = 0.02 SCORING_RETURN_WEIGHT = 0.0 - SCORING_PNL_WEIGHT = 0.5 + SCORING_PNL_WEIGHT = 0.9 # Scoring hyperparameters OMEGA_LOSS_MINIMUM = 0.01 # Equivalent to 1% loss