Skip to content

Commit 5b854a6

Browse files
committed
fix linting issues
1 parent e5fcdee commit 5b854a6

File tree

13 files changed

+229
-69
lines changed

13 files changed

+229
-69
lines changed

lsc_eval/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ cd lsc_eval && pdm install
3030
export OPENAI_API_KEY="your-key"
3131

3232
# Run evaluation (Create your own data)
33-
python runner.py --system-config config/system.yaml --eval-data config/evaluation_data.yaml
33+
python -m runner --system-config config/system.yaml --eval-data config/evaluation_data.yaml
3434
```
3535

3636
## 📊 Supported Metrics

lsc_eval/runner.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ def run_evaluation(
4848
data_validator = DataValidator()
4949
evaluation_data = data_validator.load_evaluation_data(evaluation_data_path)
5050

51-
print(f"✅ System config: {system_config.llm_provider}/{system_config.llm_model}")
51+
print(
52+
f"✅ System config: {system_config.llm_provider}/{system_config.llm_model}"
53+
)
5254
print(f"✅ Evaluation data: {len(evaluation_data)} conversation groups")
5355

5456
# Step 2: Initialize evaluation engine (core controller)
@@ -67,7 +69,9 @@ def run_evaluation(
6769
system_config=system_config,
6870
)
6971

70-
output_handler.generate_reports(results, include_graphs=system_config.include_graphs)
72+
output_handler.generate_reports(
73+
results, include_graphs=system_config.include_graphs
74+
)
7175

7276
print("\n🎉 Evaluation Complete!")
7377
print(f"📊 {len(results)} evaluations completed")
@@ -81,7 +85,9 @@ def run_evaluation(
8185
)
8286

8387
if summary["ERROR"] > 0:
84-
print(f"⚠️ {summary['ERROR']} evaluations had errors - check detailed report")
88+
print(
89+
f"⚠️ {summary['ERROR']} evaluations had errors - check detailed report"
90+
)
8591

8692
return {
8793
"TOTAL": summary["TOTAL"],

lsc_eval/src/lsc_eval/core/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
"""Core functionality - Configuration, data validation, and models."""
22

3-
from .config_loader import ConfigLoader, SystemConfig, setup_environment_variables, validate_metrics
3+
from .config_loader import (
4+
ConfigLoader,
5+
SystemConfig,
6+
setup_environment_variables,
7+
validate_metrics,
8+
)
49
from .data_validator import DataValidator
510
from .models import EvaluationData, EvaluationResult, LLMConfig, TurnData
611

lsc_eval/src/lsc_eval/core/config_loader.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,18 @@ def setup_logging(logging_config: Dict[str, Any]) -> logging.Logger:
5151
"""Configure logging for application and packages."""
5252
# Get logging settings with new structure
5353
source_level = getattr(logging, logging_config.get("source_level", "INFO").upper())
54-
package_level = getattr(logging, logging_config.get("package_level", "WARNING").upper())
54+
package_level = getattr(
55+
logging, logging_config.get("package_level", "WARNING").upper()
56+
)
5557
log_format = logging_config.get(
5658
"format", "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
5759
)
5860

5961
# Configure root logger for our application
6062
logging.basicConfig(
61-
level=source_level, format=log_format, force=True # Override any existing configuration
63+
level=source_level,
64+
format=log_format,
65+
force=True, # Override any existing configuration
6266
)
6367

6468
# Set logging levels for packages using system configuration
@@ -88,7 +92,9 @@ def setup_logging(logging_config: Dict[str, Any]) -> logging.Logger:
8892
override_level_obj = getattr(logging, override_level.upper())
8993
logging.getLogger(package_name).setLevel(override_level_obj)
9094
except AttributeError:
91-
print(f"Warning: Invalid log level '{override_level}' for package '{package_name}'")
95+
print(
96+
f"Warning: Invalid log level '{override_level}' for package '{package_name}'"
97+
)
9298

9399
# Get logger for our application
94100
logger = logging.getLogger("lsc_eval")
@@ -123,7 +129,9 @@ def populate_metric_mappings(metrics_metadata: Dict[str, Any]) -> None:
123129
CONVERSATION_LEVEL_METRICS.add(metric_name)
124130

125131

126-
def validate_metrics(turn_metrics: List[str], conversation_metrics: List[str]) -> List[str]:
132+
def validate_metrics(
133+
turn_metrics: List[str], conversation_metrics: List[str]
134+
) -> List[str]:
127135
"""Validate that provided metrics are recognized."""
128136
errors = []
129137

@@ -154,7 +162,9 @@ class SystemConfig(BaseModel):
154162
# Logging Configuration
155163
logging_source_level: str = Field(default="INFO")
156164
logging_package_level: str = Field(default="WARNING")
157-
logging_format: str = Field(default="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
165+
logging_format: str = Field(
166+
default="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
167+
)
158168
logging_show_timestamps: bool = Field(default=True)
159169
logging_package_overrides: Dict[str, str] = Field(default_factory=dict)
160170

@@ -172,8 +182,12 @@ class SystemConfig(BaseModel):
172182
visualization_dpi: int = Field(default=300)
173183

174184
# Default metrics metadata from system config
175-
default_turn_metrics_metadata: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
176-
default_conversation_metrics_metadata: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
185+
default_turn_metrics_metadata: Dict[str, Dict[str, Any]] = Field(
186+
default_factory=dict
187+
)
188+
default_conversation_metrics_metadata: Dict[str, Dict[str, Any]] = Field(
189+
default_factory=dict
190+
)
177191

178192

179193
class ConfigLoader:
@@ -232,7 +246,9 @@ def load_system_config(self, config_path: str) -> SystemConfig:
232246
visualization_dpi=visualization_config.get("dpi", 300),
233247
# Default metrics metadata from system config
234248
default_turn_metrics_metadata=metrics_metadata.get("turn_level", {}),
235-
default_conversation_metrics_metadata=metrics_metadata.get("conversation_level", {}),
249+
default_conversation_metrics_metadata=metrics_metadata.get(
250+
"conversation_level", {}
251+
),
236252
)
237253

238254
self.logger.debug(

lsc_eval/src/lsc_eval/core/models.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ class EvaluationData(BaseModel):
5656

5757
# Metric-specific configuration (threshold, weights, etc.)
5858
turn_metrics_metadata: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
59-
conversation_metrics_metadata: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
59+
conversation_metrics_metadata: Dict[str, Dict[str, Any]] = Field(
60+
default_factory=dict
61+
)
6062

6163
# Conversation turns
6264
turns: List[TurnData]
@@ -83,7 +85,9 @@ def validate_metrics(cls, v: List[str]) -> List[str]:
8385
"""Validate metrics are properly formatted."""
8486
for metric in v:
8587
if not metric or ":" not in metric:
86-
raise ValueError(f'Metric "{metric}" must be in format "framework:metric_name"')
88+
raise ValueError(
89+
f'Metric "{metric}" must be in format "framework:metric_name"'
90+
)
8791
return v
8892

8993
def validate_metric_requirements(self) -> List[str]:
@@ -114,7 +118,10 @@ def validate_metric_requirements(self) -> List[str]:
114118
f"TurnData {turn_data.turn_id}: Metric '{metric}' requires contexts"
115119
)
116120

117-
if metric in expected_response_required_metrics and not turn_data.expected_response:
121+
if (
122+
metric in expected_response_required_metrics
123+
and not turn_data.expected_response
124+
):
118125
errors.append(
119126
f"TurnData {turn_data.turn_id}: "
120127
f"Metric '{metric}' requires expected_response"
@@ -168,10 +175,16 @@ class LLMConfig(BaseModel):
168175
model_name: str = Field(..., description="The model name to use")
169176
api_base: Optional[str] = Field(None, description="Custom API base URL")
170177
api_key: Optional[str] = Field(None, description="API key for the model")
171-
temperature: float = Field(0.0, ge=0.0, le=2.0, description="Temperature for sampling")
172-
max_tokens: Optional[int] = Field(None, ge=1, description="Maximum tokens to generate")
178+
temperature: float = Field(
179+
0.0, ge=0.0, le=2.0, description="Temperature for sampling"
180+
)
181+
max_tokens: Optional[int] = Field(
182+
None, ge=1, description="Maximum tokens to generate"
183+
)
173184
timeout: Optional[int] = Field(None, ge=1, description="Request timeout in seconds")
174-
num_retries: int = Field(3, ge=0, description="Number of retries for failed requests")
185+
num_retries: int = Field(
186+
3, ge=0, description="Number of retries for failed requests"
187+
)
175188

176189
@field_validator("model_name")
177190
@classmethod

lsc_eval/src/lsc_eval/evaluation_engine.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,13 @@
77
import time
88
from typing import Any, Dict, List, Optional, Tuple, Union
99

10-
from .core import ConfigLoader, DataValidator, EvaluationData, EvaluationResult, TurnData
10+
from .core import (
11+
ConfigLoader,
12+
DataValidator,
13+
EvaluationData,
14+
EvaluationResult,
15+
TurnData,
16+
)
1117
from .llm_managers.llm_manager import LLMManager
1218
from .metrics.custom_metrics import CustomMetrics
1319
from .metrics.deepeval_metrics import DeepEvalMetrics
@@ -29,7 +35,11 @@ def __init__(self, conv_data: EvaluationData, metric_identifier: str):
2935

3036
@classmethod
3137
def for_turn(
32-
cls, conv_data: EvaluationData, metric_identifier: str, turn_idx: int, turn_data: TurnData
38+
cls,
39+
conv_data: EvaluationData,
40+
metric_identifier: str,
41+
turn_idx: int,
42+
turn_data: TurnData,
3343
) -> "EvaluationRequest":
3444
"""Create request for turn-level evaluation."""
3545
request = cls(conv_data, metric_identifier)
@@ -74,7 +84,9 @@ def __init__(self, llm_manager: LLMManager):
7484
self.custom_metrics = CustomMetrics(llm_manager)
7585

7686
# Metric routing map
77-
self.handlers: Dict[str, Union[RagasMetrics, DeepEvalMetrics, CustomMetrics]] = {
87+
self.handlers: Dict[
88+
str, Union[RagasMetrics, DeepEvalMetrics, CustomMetrics]
89+
] = {
7890
"ragas": self.ragas_metrics,
7991
"deepeval": self.deepeval_metrics,
8092
"custom": self.custom_metrics,
@@ -131,7 +143,9 @@ def validate_data(self, evaluation_data: List[EvaluationData]) -> bool:
131143
"""Validate evaluation data using data validator."""
132144
return self.data_validator.validate_evaluation_data(evaluation_data)
133145

134-
def run_evaluation(self, evaluation_data: List[EvaluationData]) -> List[EvaluationResult]:
146+
def run_evaluation(
147+
self, evaluation_data: List[EvaluationData]
148+
) -> List[EvaluationResult]:
135149
"""
136150
Run complete evaluation pipeline.
137151
@@ -177,10 +191,14 @@ def _process_conversation(self, conv_data: EvaluationData) -> None:
177191
print(f"🗣️ Conversation-level metrics: {conv_data.conversation_metrics}")
178192
self._evaluate_conversation(conv_data)
179193

180-
def _evaluate_turn(self, conv_data: EvaluationData, turn_idx: int, turn_data: TurnData) -> None:
194+
def _evaluate_turn(
195+
self, conv_data: EvaluationData, turn_idx: int, turn_data: TurnData
196+
) -> None:
181197
"""Evaluate single turn with specified turn metrics."""
182198
for metric_identifier in conv_data.turn_metrics:
183-
request = EvaluationRequest.for_turn(conv_data, metric_identifier, turn_idx, turn_data)
199+
request = EvaluationRequest.for_turn(
200+
conv_data, metric_identifier, turn_idx, turn_data
201+
)
184202
result = self._evaluate_metric(request)
185203
if result:
186204
self.results.append(result)
@@ -193,7 +211,9 @@ def _evaluate_conversation(self, conv_data: EvaluationData) -> None:
193211
if result:
194212
self.results.append(result)
195213

196-
def _evaluate_metric(self, request: EvaluationRequest) -> Optional[EvaluationResult]:
214+
def _evaluate_metric(
215+
self, request: EvaluationRequest
216+
) -> Optional[EvaluationResult]:
197217
"""
198218
Evaluate single metric using context.
199219
@@ -213,7 +233,9 @@ def _evaluate_metric(self, request: EvaluationRequest) -> Optional[EvaluationRes
213233
print(f" {request.metric_identifier} (threshold: {threshold})")
214234

215235
# Route to metrics manager
216-
score, reason = self.metrics_manager.evaluate_metric(framework, metric_name, request)
236+
score, reason = self.metrics_manager.evaluate_metric(
237+
framework, metric_name, request
238+
)
217239

218240
# Determine result status
219241
if score is None:
@@ -222,7 +244,9 @@ def _evaluate_metric(self, request: EvaluationRequest) -> Optional[EvaluationRes
222244
else:
223245
result_status = self._determine_status(score, threshold)
224246
status_emoji = (
225-
"✅" if result_status == "PASS" else "❌" if result_status == "FAIL" else "⚠️"
247+
"✅"
248+
if result_status == "PASS"
249+
else "❌" if result_status == "FAIL" else "⚠️"
226250
)
227251
print(f" {status_emoji} {result_status}: {score:.3f}")
228252

@@ -266,7 +290,9 @@ def _get_effective_threshold(
266290
"""Get effective threshold for metric (conversation-specific or system default)."""
267291
# Check conversation-specific metadata first
268292
if is_conversation:
269-
metadata = conv_data.conversation_metrics_metadata.get(metric_identifier, {})
293+
metadata = conv_data.conversation_metrics_metadata.get(
294+
metric_identifier, {}
295+
)
270296
else:
271297
metadata = conv_data.turn_metrics_metadata.get(metric_identifier, {})
272298

@@ -279,9 +305,9 @@ def _get_effective_threshold(
279305
return None
280306

281307
if is_conversation:
282-
default_metadata = (system_config.default_conversation_metrics_metadata or {}).get(
283-
metric_identifier, {}
284-
)
308+
default_metadata = (
309+
system_config.default_conversation_metrics_metadata or {}
310+
).get(metric_identifier, {})
285311
else:
286312
default_metadata = (system_config.default_turn_metrics_metadata or {}).get(
287313
metric_identifier, {}

lsc_eval/src/lsc_eval/llm_managers/llm_manager.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ def __init__(self, config: LLMConfig):
4747
"""Initialize with validated environment and constructed model name."""
4848
self.config = config
4949
self.model_name = self._construct_model_name_and_validate()
50-
print(f"✅ LLM Manager: {self.config.provider}/{self.config.model} -> {self.model_name}")
50+
print(
51+
f"✅ LLM Manager: {self.config.provider}/{self.config.model} -> {self.model_name}"
52+
)
5153

5254
def _construct_model_name_and_validate(self) -> str:
5355
"""Construct model name for LiteLLM and validate required environment variables."""
@@ -73,7 +75,9 @@ def _construct_model_name_and_validate(self) -> str:
7375
def _validate_openai_env(self) -> None:
7476
"""Validate OpenAI environment variables."""
7577
if not os.environ.get("OPENAI_API_KEY"):
76-
raise LLMError("OPENAI_API_KEY environment variable is required for OpenAI provider")
78+
raise LLMError(
79+
"OPENAI_API_KEY environment variable is required for OpenAI provider"
80+
)
7781

7882
def _validate_azure_env(self) -> None:
7983
"""Validate Azure OpenAI environment variables."""
@@ -85,7 +89,9 @@ def _validate_watsonx_env(self) -> None:
8589
"""Validate Watsonx environment variables."""
8690
required = ["WATSONX_API_KEY", "WATSONX_API_BASE", "WATSONX_PROJECT_ID"]
8791
if not all(os.environ.get(var) for var in required):
88-
raise LLMError(f"Watsonx provider requires environment variables: {required}")
92+
raise LLMError(
93+
f"Watsonx provider requires environment variables: {required}"
94+
)
8995

9096
def _validate_anthropic_env(self) -> None:
9197
"""Validate Anthropic environment variables."""

lsc_eval/src/lsc_eval/llm_managers/ragas_llm.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ def generate_text( # pylint: disable=too-many-arguments,too-many-positional-arg
2929
prompt_text = str(prompt)
3030

3131
# Use temperature from params unless explicitly overridden
32-
temp = temperature if temperature != 1e-08 else self.litellm_params.get("temperature", 0.0)
32+
temp = (
33+
temperature
34+
if temperature != 1e-08
35+
else self.litellm_params.get("temperature", 0.0)
36+
)
3337

3438
try:
3539
response = litellm.completion(
@@ -68,7 +72,9 @@ async def agenerate_text( # pylint: disable=too-many-arguments,too-many-positio
6872
) -> LLMResult:
6973
"""Async generate."""
7074
temp = temperature if temperature is not None else 1e-08
71-
return self.generate_text(prompt, n=n, temperature=temp, stop=stop, callbacks=callbacks)
75+
return self.generate_text(
76+
prompt, n=n, temperature=temp, stop=stop, callbacks=callbacks
77+
)
7278

7379
def is_finished(self, response: LLMResult) -> bool:
7480
"""Check if response is complete."""

0 commit comments

Comments
 (0)