Skip to content

Commit e5fcdee

Browse files
committed
fix linting issues
1 parent eac9f72 commit e5fcdee

File tree

12 files changed

+60
-51
lines changed

12 files changed

+60
-51
lines changed

lsc_eval/pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ dependencies = [
1717
"datasets>=2.0.0",
1818
"matplotlib>=3.5.0",
1919
"seaborn>=0.11.0",
20-
"numpy>=1.21.0"
20+
"numpy>=1.23.0"
2121
]
2222

2323
[tool.pdm.dev-dependencies]
@@ -46,6 +46,9 @@ build-backend = "pdm.backend"
4646
[tool.pdm]
4747
distribution = true
4848

49+
[tool.pdm.build]
50+
includes = ["lsc_eval/config/*.yaml"]
51+
4952
[tool.ruff]
5053
# always generate Python 3.11-compatible code.
5154
target-version = "py311"

lsc_eval/runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Simple interface that uses EvaluationEngine as the core controller.
55
66
Usage:
7-
python runner.py --system-config config/system.yaml --eval-data config/evaluation_data.yaml
7+
python -m runner --system-config config/system.yaml --eval-data config/evaluation_data.yaml
88
99
Or programmatically:
1010
from runner import run_evaluation
@@ -96,7 +96,7 @@ def run_evaluation(
9696
return None
9797

9898

99-
def main():
99+
def main() -> int:
100100
"""Command line interface."""
101101
parser = argparse.ArgumentParser(description="LSC Evaluation Framework / Tool")
102102
parser.add_argument(

lsc_eval/src/lsc_eval/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
from .core import (
44
ConfigLoader,
55
DataValidator,
6-
SystemConfig,
76
EvaluationData,
87
EvaluationResult,
8+
SystemConfig,
99
TurnData,
1010
)
1111
from .evaluation_engine import EvaluationEngine
1212
from .llm_managers.llm_manager import LLMManager
13-
from .output import OutputHandler
13+
from .output.output_handler import OutputHandler
1414

1515
__all__ = [
1616
"SystemConfig",

lsc_eval/src/lsc_eval/core/config_loader.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22

33
import logging
44
import os
5-
from typing import Any, Dict, List
5+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
66

77
import yaml
88
from pydantic import BaseModel, Field
99

10+
if TYPE_CHECKING:
11+
from .models import EvaluationData
12+
1013
# Global metric mapping sets (populated dynamically from system config)
1114
TURN_LEVEL_METRICS: set[str] = set()
1215
CONVERSATION_LEVEL_METRICS: set[str] = set()
@@ -24,7 +27,7 @@
2427
]
2528

2629

27-
def setup_environment_variables(config_path: str):
30+
def setup_environment_variables(config_path: str) -> None:
2831
"""Early setup of environment variables from system config. Called before any other imports."""
2932
try:
3033
with open(config_path, "r", encoding="utf-8") as f:
@@ -44,7 +47,7 @@ def setup_environment_variables(config_path: str):
4447
os.environ["LITELLM_LOG_LEVEL"] = "ERROR"
4548

4649

47-
def setup_logging(logging_config: Dict[str, Any]):
50+
def setup_logging(logging_config: Dict[str, Any]) -> logging.Logger:
4851
"""Configure logging for application and packages."""
4952
# Get logging settings with new structure
5053
source_level = getattr(logging, logging_config.get("source_level", "INFO").upper())
@@ -102,7 +105,7 @@ def setup_logging(logging_config: Dict[str, Any]):
102105
return logger
103106

104107

105-
def populate_metric_mappings(metrics_metadata: Dict[str, Any]):
108+
def populate_metric_mappings(metrics_metadata: Dict[str, Any]) -> None:
106109
"""Populate global metric mapping sets from system config metadata."""
107110
TURN_LEVEL_METRICS.clear()
108111
CONVERSATION_LEVEL_METRICS.clear()
@@ -176,11 +179,11 @@ class SystemConfig(BaseModel):
176179
class ConfigLoader:
177180
"""Configuration loader for LSC Evaluation Framework."""
178181

179-
def __init__(self):
182+
def __init__(self) -> None:
180183
"""Initialize Config Loader."""
181-
self.system_config = None
182-
self.evaluation_data = None
183-
self.logger = None
184+
self.system_config: Optional[SystemConfig] = None
185+
self.evaluation_data: Optional[List[EvaluationData]] = None
186+
self.logger: Optional[logging.Logger] = None
184187

185188
def load_system_config(self, config_path: str) -> SystemConfig:
186189
"""Load system configuration from YAML file."""

lsc_eval/src/lsc_eval/core/data_validator.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
"""Data validation of input data before evaluation."""
22

3-
from typing import List
3+
from typing import List, Optional
44

55
import yaml
66

7-
from .models import EvaluationData
87
from .config_loader import CONVERSATION_LEVEL_METRICS, TURN_LEVEL_METRICS
8+
from .models import EvaluationData
99

1010

1111
class DataValidator:
1212
"""Data validator for evaluation data."""
1313

14-
def __init__(self):
14+
def __init__(self) -> None:
1515
"""Initialize validator."""
16-
self.validation_errors = []
17-
self.evaluation_data = None
16+
self.validation_errors: List[str] = []
17+
self.evaluation_data: Optional[List[EvaluationData]] = None
1818

1919
def load_evaluation_data(self, data_path: str) -> List[EvaluationData]:
2020
"""Load and validate evaluation data from YAML file."""
@@ -53,7 +53,7 @@ def validate_evaluation_data(self, evaluation_data: List[EvaluationData]) -> boo
5353
print("✅ All data validation passed")
5454
return True
5555

56-
def _validate_metrics_availability(self, data: EvaluationData):
56+
def _validate_metrics_availability(self, data: EvaluationData) -> None:
5757
"""Validate that specified metrics are available/supported."""
5858
conversation_id = data.conversation_group_id
5959

@@ -71,7 +71,7 @@ def _validate_metrics_availability(self, data: EvaluationData):
7171
f"Conversation {conversation_id}: Unknown conversation metric '{metric}'"
7272
)
7373

74-
def _validate_metric_requirements(self, data: EvaluationData):
74+
def _validate_metric_requirements(self, data: EvaluationData) -> None:
7575
"""Validate that required fields exist for specified metrics."""
7676
conversation_id = data.conversation_group_id
7777

lsc_eval/src/lsc_eval/evaluation_engine.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"""
66

77
import time
8-
from typing import Any, Dict, List, Optional, Union
8+
from typing import Any, Dict, List, Optional, Tuple, Union
99

1010
from .core import ConfigLoader, DataValidator, EvaluationData, EvaluationResult, TurnData
1111
from .llm_managers.llm_manager import LLMManager
@@ -80,7 +80,9 @@ def __init__(self, llm_manager: LLMManager):
8080
"custom": self.custom_metrics,
8181
}
8282

83-
def evaluate_metric(self, framework: str, metric_name: str, request: EvaluationRequest):
83+
def evaluate_metric(
84+
self, framework: str, metric_name: str, request: EvaluationRequest
85+
) -> Tuple[Optional[float], str]:
8486
"""Route evaluation to appropriate handler."""
8587
if framework in self.handlers:
8688
# Create shared EvaluationScope
@@ -155,7 +157,7 @@ def run_evaluation(self, evaluation_data: List[EvaluationData]) -> List[Evaluati
155157
print(f"\n✅ Evaluation complete: {len(self.results)} results generated")
156158
return self.results
157159

158-
def _process_conversation(self, conv_data: EvaluationData):
160+
def _process_conversation(self, conv_data: EvaluationData) -> None:
159161
"""Process single conversation - handle turn and conversation level metrics."""
160162
print(f"\n📋 Evaluating: {conv_data.conversation_group_id}")
161163

@@ -175,15 +177,15 @@ def _process_conversation(self, conv_data: EvaluationData):
175177
print(f"🗣️ Conversation-level metrics: {conv_data.conversation_metrics}")
176178
self._evaluate_conversation(conv_data)
177179

178-
def _evaluate_turn(self, conv_data: EvaluationData, turn_idx: int, turn_data: TurnData):
180+
def _evaluate_turn(self, conv_data: EvaluationData, turn_idx: int, turn_data: TurnData) -> None:
179181
"""Evaluate single turn with specified turn metrics."""
180182
for metric_identifier in conv_data.turn_metrics:
181183
request = EvaluationRequest.for_turn(conv_data, metric_identifier, turn_idx, turn_data)
182184
result = self._evaluate_metric(request)
183185
if result:
184186
self.results.append(result)
185187

186-
def _evaluate_conversation(self, conv_data: EvaluationData):
188+
def _evaluate_conversation(self, conv_data: EvaluationData) -> None:
187189
"""Evaluate conversation-level metrics."""
188190
for metric_identifier in conv_data.conversation_metrics:
189191
request = EvaluationRequest.for_conversation(conv_data, metric_identifier)

lsc_eval/src/lsc_eval/llm_managers/ragas_llm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ def __init__(self, model_name: str, litellm_params: Dict[str, Any]):
1919

2020
def generate_text( # pylint: disable=too-many-arguments,too-many-positional-arguments
2121
self,
22-
prompt,
22+
prompt: Any,
2323
n: int = 1,
2424
temperature: float = 1e-08,
2525
stop: Optional[List[str]] = None,
26-
callbacks=None,
26+
callbacks: Optional[Any] = None,
2727
) -> LLMResult:
2828
"""Generate text using LiteLLM with provided parameters."""
2929
prompt_text = str(prompt)
@@ -60,11 +60,11 @@ def generate_text( # pylint: disable=too-many-arguments,too-many-positional-arg
6060

6161
async def agenerate_text( # pylint: disable=too-many-arguments,too-many-positional-arguments
6262
self,
63-
prompt,
63+
prompt: Any,
6464
n: int = 1,
6565
temperature: Optional[float] = None,
6666
stop: Optional[List[str]] = None,
67-
callbacks=None,
67+
callbacks: Optional[Any] = None,
6868
) -> LLMResult:
6969
"""Async generate."""
7070
temp = temperature if temperature is not None else 1e-08

lsc_eval/src/lsc_eval/metrics/custom_metrics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def __init__(self, llm_manager: LLMManager):
4242
def evaluate(
4343
self,
4444
metric_name: str,
45-
conv_data,
45+
conv_data: Any,
4646
scope: EvaluationScope,
4747
) -> Tuple[Optional[float], str]:
4848
"""Evaluate a custom metric."""
@@ -192,7 +192,7 @@ def _create_evaluation_prompt(self, params: EvaluationPromptParams) -> str:
192192

193193
def _evaluate_answer_correctness(
194194
self,
195-
_conv_data,
195+
_conv_data: Any,
196196
_turn_idx: Optional[int],
197197
turn_data: Optional[TurnData],
198198
is_conversation: bool,

lsc_eval/src/lsc_eval/metrics/deepeval_metrics.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def __init__(self, llm_manager: LLMManager):
3737
"knowledge_retention": self._evaluate_knowledge_retention,
3838
}
3939

40-
def _build_conversational_test_case(self, conv_data) -> ConversationalTestCase:
40+
def _build_conversational_test_case(self, conv_data: Any) -> ConversationalTestCase:
4141
"""Build ConversationalTestCase from conversation data."""
4242
turns = []
4343
for turn_data in conv_data.turns:
@@ -48,7 +48,7 @@ def _build_conversational_test_case(self, conv_data) -> ConversationalTestCase:
4848

4949
return ConversationalTestCase(turns=turns)
5050

51-
def _evaluate_metric(self, metric, test_case) -> Tuple[float, str]:
51+
def _evaluate_metric(self, metric: Any, test_case: Any) -> Tuple[float, str]:
5252
"""Evaluate and get result."""
5353
metric.measure(test_case)
5454

@@ -62,7 +62,7 @@ def _evaluate_metric(self, metric, test_case) -> Tuple[float, str]:
6262
def evaluate(
6363
self,
6464
metric_name: str,
65-
conv_data,
65+
conv_data: Any,
6666
scope: EvaluationScope,
6767
) -> Tuple[Optional[float], str]:
6868
"""Evaluate a DeepEval metric."""
@@ -78,7 +78,7 @@ def evaluate(
7878

7979
def _evaluate_conversation_completeness(
8080
self,
81-
conv_data,
81+
conv_data: Any,
8282
_turn_idx: Optional[int],
8383
_turn_data: Optional[TurnData],
8484
is_conversation: bool,
@@ -94,7 +94,7 @@ def _evaluate_conversation_completeness(
9494

9595
def _evaluate_conversation_relevancy(
9696
self,
97-
conv_data,
97+
conv_data: Any,
9898
_turn_idx: Optional[int],
9999
_turn_data: Optional[TurnData],
100100
is_conversation: bool,
@@ -114,7 +114,7 @@ def _evaluate_conversation_relevancy(
114114

115115
def _evaluate_knowledge_retention(
116116
self,
117-
conv_data,
117+
conv_data: Any,
118118
_turn_idx: Optional[int],
119119
_turn_data: Optional[TurnData],
120120
is_conversation: bool,

lsc_eval/src/lsc_eval/metrics/ragas_metrics.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def _extract_turn_data(self, turn_data: Optional[TurnData]) -> Tuple[str, str, L
6262
return query, response, contexts
6363

6464
def _evaluate_metric(
65-
self, metric_class, dataset_dict, result_key: str, metric_name: str
65+
self, metric_class: Any, dataset_dict: Dict[str, Any], result_key: str, metric_name: str
6666
) -> Tuple[Optional[float], str]:
6767
"""Evaluate metric with configured LLM."""
6868
dataset = Dataset.from_dict(dataset_dict)
@@ -78,7 +78,7 @@ def _evaluate_metric(
7878
def evaluate(
7979
self,
8080
metric_name: str,
81-
conv_data,
81+
conv_data: Any,
8282
scope: EvaluationScope,
8383
) -> Tuple[Optional[float], str]:
8484
"""Evaluate a Ragas metric."""
@@ -108,7 +108,7 @@ def evaluate(
108108

109109
def _evaluate_response_relevancy(
110110
self,
111-
_conv_data,
111+
_conv_data: Any,
112112
_turn_idx: Optional[int],
113113
turn_data: Optional[TurnData],
114114
is_conversation: bool,
@@ -127,7 +127,7 @@ def _evaluate_response_relevancy(
127127

128128
def _evaluate_faithfulness(
129129
self,
130-
_conv_data,
130+
_conv_data: Any,
131131
_turn_idx: Optional[int],
132132
turn_data: Optional[TurnData],
133133
is_conversation: bool,
@@ -144,7 +144,7 @@ def _evaluate_faithfulness(
144144

145145
def _evaluate_context_precision_without_reference(
146146
self,
147-
_conv_data,
147+
_conv_data: Any,
148148
_turn_idx: Optional[int],
149149
turn_data: Optional[TurnData],
150150
is_conversation: bool,
@@ -166,7 +166,7 @@ def _evaluate_context_precision_without_reference(
166166

167167
def _evaluate_context_precision_with_reference(
168168
self,
169-
_conv_data,
169+
_conv_data: Any,
170170
_turn_idx: Optional[int],
171171
turn_data: Optional[TurnData],
172172
is_conversation: bool,
@@ -196,7 +196,7 @@ def _evaluate_context_precision_with_reference(
196196

197197
def _evaluate_context_recall(
198198
self,
199-
_conv_data,
199+
_conv_data: Any,
200200
_turn_idx: Optional[int],
201201
turn_data: Optional[TurnData],
202202
is_conversation: bool,
@@ -223,7 +223,7 @@ def _evaluate_context_recall(
223223

224224
def _evaluate_context_relevance(
225225
self,
226-
_conv_data,
226+
_conv_data: Any,
227227
_turn_idx: Optional[int],
228228
turn_data: Optional[TurnData],
229229
is_conversation: bool,

0 commit comments

Comments
 (0)