Skip to content

Commit 0d38cdf

Browse files
committed
fix linting issues
fix linting issues
1 parent eac9f72 commit 0d38cdf

File tree

17 files changed

+289
-120
lines changed

17 files changed

+289
-120
lines changed

lsc_eval/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ cd lsc_eval && pdm install
3030
export OPENAI_API_KEY="your-key"
3131

3232
# Run evaluation (Create your own data)
33-
python runner.py --system-config config/system.yaml --eval-data config/evaluation_data.yaml
33+
python -m runner --system-config config/system.yaml --eval-data config/evaluation_data.yaml
3434
```
3535

3636
## 📊 Supported Metrics

lsc_eval/pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ dependencies = [
1717
"datasets>=2.0.0",
1818
"matplotlib>=3.5.0",
1919
"seaborn>=0.11.0",
20-
"numpy>=1.21.0"
20+
"numpy>=1.23.0"
2121
]
2222

2323
[tool.pdm.dev-dependencies]
@@ -46,6 +46,9 @@ build-backend = "pdm.backend"
4646
[tool.pdm]
4747
distribution = true
4848

49+
[tool.pdm.build]
50+
includes = ["lsc_eval/config/*.yaml"]
51+
4952
[tool.ruff]
5053
# always generate Python 3.11-compatible code.
5154
target-version = "py311"

lsc_eval/runner.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Simple interface that uses EvaluationEngine as the core controller.
55
66
Usage:
7-
python runner.py --system-config config/system.yaml --eval-data config/evaluation_data.yaml
7+
python -m runner --system-config config/system.yaml --eval-data config/evaluation_data.yaml
88
99
Or programmatically:
1010
from runner import run_evaluation
@@ -48,7 +48,9 @@ def run_evaluation(
4848
data_validator = DataValidator()
4949
evaluation_data = data_validator.load_evaluation_data(evaluation_data_path)
5050

51-
print(f"✅ System config: {system_config.llm_provider}/{system_config.llm_model}")
51+
print(
52+
f"✅ System config: {system_config.llm_provider}/{system_config.llm_model}"
53+
)
5254
print(f"✅ Evaluation data: {len(evaluation_data)} conversation groups")
5355

5456
# Step 2: Initialize evaluation engine (core controller)
@@ -67,7 +69,9 @@ def run_evaluation(
6769
system_config=system_config,
6870
)
6971

70-
output_handler.generate_reports(results, include_graphs=system_config.include_graphs)
72+
output_handler.generate_reports(
73+
results, include_graphs=system_config.include_graphs
74+
)
7175

7276
print("\n🎉 Evaluation Complete!")
7377
print(f"📊 {len(results)} evaluations completed")
@@ -81,7 +85,9 @@ def run_evaluation(
8185
)
8286

8387
if summary["ERROR"] > 0:
84-
print(f"⚠️ {summary['ERROR']} evaluations had errors - check detailed report")
88+
print(
89+
f"⚠️ {summary['ERROR']} evaluations had errors - check detailed report"
90+
)
8591

8692
return {
8793
"TOTAL": summary["TOTAL"],
@@ -96,7 +102,7 @@ def run_evaluation(
96102
return None
97103

98104

99-
def main():
105+
def main() -> int:
100106
"""Command line interface."""
101107
parser = argparse.ArgumentParser(description="LSC Evaluation Framework / Tool")
102108
parser.add_argument(

lsc_eval/src/lsc_eval/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
from .core import (
44
ConfigLoader,
55
DataValidator,
6-
SystemConfig,
76
EvaluationData,
87
EvaluationResult,
8+
SystemConfig,
99
TurnData,
1010
)
1111
from .evaluation_engine import EvaluationEngine
1212
from .llm_managers.llm_manager import LLMManager
13-
from .output import OutputHandler
13+
from .output.output_handler import OutputHandler
1414

1515
__all__ = [
1616
"SystemConfig",

lsc_eval/src/lsc_eval/core/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
"""Core functionality - Configuration, data validation, and models."""
22

3-
from .config_loader import ConfigLoader, SystemConfig, setup_environment_variables, validate_metrics
3+
from .config_loader import (
4+
ConfigLoader,
5+
SystemConfig,
6+
setup_environment_variables,
7+
validate_metrics,
8+
)
49
from .data_validator import DataValidator
510
from .models import EvaluationData, EvaluationResult, LLMConfig, TurnData
611

lsc_eval/src/lsc_eval/core/config_loader.py

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22

33
import logging
44
import os
5-
from typing import Any, Dict, List
5+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
66

77
import yaml
88
from pydantic import BaseModel, Field
99

10+
if TYPE_CHECKING:
11+
from .models import EvaluationData
12+
1013
# Global metric mapping sets (populated dynamically from system config)
1114
TURN_LEVEL_METRICS: set[str] = set()
1215
CONVERSATION_LEVEL_METRICS: set[str] = set()
@@ -24,7 +27,7 @@
2427
]
2528

2629

27-
def setup_environment_variables(config_path: str):
30+
def setup_environment_variables(config_path: str) -> None:
2831
"""Early setup of environment variables from system config. Called before any other imports."""
2932
try:
3033
with open(config_path, "r", encoding="utf-8") as f:
@@ -44,18 +47,22 @@ def setup_environment_variables(config_path: str):
4447
os.environ["LITELLM_LOG_LEVEL"] = "ERROR"
4548

4649

47-
def setup_logging(logging_config: Dict[str, Any]):
50+
def setup_logging(logging_config: Dict[str, Any]) -> logging.Logger:
4851
"""Configure logging for application and packages."""
4952
# Get logging settings with new structure
5053
source_level = getattr(logging, logging_config.get("source_level", "INFO").upper())
51-
package_level = getattr(logging, logging_config.get("package_level", "WARNING").upper())
54+
package_level = getattr(
55+
logging, logging_config.get("package_level", "WARNING").upper()
56+
)
5257
log_format = logging_config.get(
5358
"format", "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
5459
)
5560

5661
# Configure root logger for our application
5762
logging.basicConfig(
58-
level=source_level, format=log_format, force=True # Override any existing configuration
63+
level=source_level,
64+
format=log_format,
65+
force=True, # Override any existing configuration
5966
)
6067

6168
# Set logging levels for packages using system configuration
@@ -85,7 +92,9 @@ def setup_logging(logging_config: Dict[str, Any]):
8592
override_level_obj = getattr(logging, override_level.upper())
8693
logging.getLogger(package_name).setLevel(override_level_obj)
8794
except AttributeError:
88-
print(f"Warning: Invalid log level '{override_level}' for package '{package_name}'")
95+
print(
96+
f"Warning: Invalid log level '{override_level}' for package '{package_name}'"
97+
)
8998

9099
# Get logger for our application
91100
logger = logging.getLogger("lsc_eval")
@@ -102,7 +111,7 @@ def setup_logging(logging_config: Dict[str, Any]):
102111
return logger
103112

104113

105-
def populate_metric_mappings(metrics_metadata: Dict[str, Any]):
114+
def populate_metric_mappings(metrics_metadata: Dict[str, Any]) -> None:
106115
"""Populate global metric mapping sets from system config metadata."""
107116
TURN_LEVEL_METRICS.clear()
108117
CONVERSATION_LEVEL_METRICS.clear()
@@ -120,7 +129,9 @@ def populate_metric_mappings(metrics_metadata: Dict[str, Any]):
120129
CONVERSATION_LEVEL_METRICS.add(metric_name)
121130

122131

123-
def validate_metrics(turn_metrics: List[str], conversation_metrics: List[str]) -> List[str]:
132+
def validate_metrics(
133+
turn_metrics: List[str], conversation_metrics: List[str]
134+
) -> List[str]:
124135
"""Validate that provided metrics are recognized."""
125136
errors = []
126137

@@ -151,7 +162,9 @@ class SystemConfig(BaseModel):
151162
# Logging Configuration
152163
logging_source_level: str = Field(default="INFO")
153164
logging_package_level: str = Field(default="WARNING")
154-
logging_format: str = Field(default="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
165+
logging_format: str = Field(
166+
default="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
167+
)
155168
logging_show_timestamps: bool = Field(default=True)
156169
logging_package_overrides: Dict[str, str] = Field(default_factory=dict)
157170

@@ -169,18 +182,22 @@ class SystemConfig(BaseModel):
169182
visualization_dpi: int = Field(default=300)
170183

171184
# Default metrics metadata from system config
172-
default_turn_metrics_metadata: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
173-
default_conversation_metrics_metadata: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
185+
default_turn_metrics_metadata: Dict[str, Dict[str, Any]] = Field(
186+
default_factory=dict
187+
)
188+
default_conversation_metrics_metadata: Dict[str, Dict[str, Any]] = Field(
189+
default_factory=dict
190+
)
174191

175192

176193
class ConfigLoader:
177194
"""Configuration loader for LSC Evaluation Framework."""
178195

179-
def __init__(self):
196+
def __init__(self) -> None:
180197
"""Initialize Config Loader."""
181-
self.system_config = None
182-
self.evaluation_data = None
183-
self.logger = None
198+
self.system_config: Optional[SystemConfig] = None
199+
self.evaluation_data: Optional[List[EvaluationData]] = None
200+
self.logger: Optional[logging.Logger] = None
184201

185202
def load_system_config(self, config_path: str) -> SystemConfig:
186203
"""Load system configuration from YAML file."""
@@ -229,7 +246,9 @@ def load_system_config(self, config_path: str) -> SystemConfig:
229246
visualization_dpi=visualization_config.get("dpi", 300),
230247
# Default metrics metadata from system config
231248
default_turn_metrics_metadata=metrics_metadata.get("turn_level", {}),
232-
default_conversation_metrics_metadata=metrics_metadata.get("conversation_level", {}),
249+
default_conversation_metrics_metadata=metrics_metadata.get(
250+
"conversation_level", {}
251+
),
233252
)
234253

235254
self.logger.debug(

lsc_eval/src/lsc_eval/core/data_validator.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
"""Data validation of input data before evaluation."""
22

3-
from typing import List
3+
from typing import List, Optional
44

55
import yaml
66

7-
from .models import EvaluationData
87
from .config_loader import CONVERSATION_LEVEL_METRICS, TURN_LEVEL_METRICS
8+
from .models import EvaluationData
99

1010

1111
class DataValidator:
1212
"""Data validator for evaluation data."""
1313

14-
def __init__(self):
14+
def __init__(self) -> None:
1515
"""Initialize validator."""
16-
self.validation_errors = []
17-
self.evaluation_data = None
16+
self.validation_errors: List[str] = []
17+
self.evaluation_data: Optional[List[EvaluationData]] = None
1818

1919
def load_evaluation_data(self, data_path: str) -> List[EvaluationData]:
2020
"""Load and validate evaluation data from YAML file."""
@@ -53,7 +53,7 @@ def validate_evaluation_data(self, evaluation_data: List[EvaluationData]) -> boo
5353
print("✅ All data validation passed")
5454
return True
5555

56-
def _validate_metrics_availability(self, data: EvaluationData):
56+
def _validate_metrics_availability(self, data: EvaluationData) -> None:
5757
"""Validate that specified metrics are available/supported."""
5858
conversation_id = data.conversation_group_id
5959

@@ -71,7 +71,7 @@ def _validate_metrics_availability(self, data: EvaluationData):
7171
f"Conversation {conversation_id}: Unknown conversation metric '{metric}'"
7272
)
7373

74-
def _validate_metric_requirements(self, data: EvaluationData):
74+
def _validate_metric_requirements(self, data: EvaluationData) -> None:
7575
"""Validate that required fields exist for specified metrics."""
7676
conversation_id = data.conversation_group_id
7777

lsc_eval/src/lsc_eval/core/models.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ class EvaluationData(BaseModel):
5656

5757
# Metric-specific configuration (threshold, weights, etc.)
5858
turn_metrics_metadata: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
59-
conversation_metrics_metadata: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
59+
conversation_metrics_metadata: Dict[str, Dict[str, Any]] = Field(
60+
default_factory=dict
61+
)
6062

6163
# Conversation turns
6264
turns: List[TurnData]
@@ -83,7 +85,9 @@ def validate_metrics(cls, v: List[str]) -> List[str]:
8385
"""Validate metrics are properly formatted."""
8486
for metric in v:
8587
if not metric or ":" not in metric:
86-
raise ValueError(f'Metric "{metric}" must be in format "framework:metric_name"')
88+
raise ValueError(
89+
f'Metric "{metric}" must be in format "framework:metric_name"'
90+
)
8791
return v
8892

8993
def validate_metric_requirements(self) -> List[str]:
@@ -114,7 +118,10 @@ def validate_metric_requirements(self) -> List[str]:
114118
f"TurnData {turn_data.turn_id}: Metric '{metric}' requires contexts"
115119
)
116120

117-
if metric in expected_response_required_metrics and not turn_data.expected_response:
121+
if (
122+
metric in expected_response_required_metrics
123+
and not turn_data.expected_response
124+
):
118125
errors.append(
119126
f"TurnData {turn_data.turn_id}: "
120127
f"Metric '{metric}' requires expected_response"
@@ -168,10 +175,16 @@ class LLMConfig(BaseModel):
168175
model_name: str = Field(..., description="The model name to use")
169176
api_base: Optional[str] = Field(None, description="Custom API base URL")
170177
api_key: Optional[str] = Field(None, description="API key for the model")
171-
temperature: float = Field(0.0, ge=0.0, le=2.0, description="Temperature for sampling")
172-
max_tokens: Optional[int] = Field(None, ge=1, description="Maximum tokens to generate")
178+
temperature: float = Field(
179+
0.0, ge=0.0, le=2.0, description="Temperature for sampling"
180+
)
181+
max_tokens: Optional[int] = Field(
182+
None, ge=1, description="Maximum tokens to generate"
183+
)
173184
timeout: Optional[int] = Field(None, ge=1, description="Request timeout in seconds")
174-
num_retries: int = Field(3, ge=0, description="Number of retries for failed requests")
185+
num_retries: int = Field(
186+
3, ge=0, description="Number of retries for failed requests"
187+
)
175188

176189
@field_validator("model_name")
177190
@classmethod

0 commit comments

Comments
 (0)